pyo3_introspection/
introspection.rs

1use crate::model::{Class, Function, Module};
2use anyhow::{bail, Context, Result};
3use goblin::elf::Elf;
4use goblin::mach::symbols::N_SECT;
5use goblin::mach::{Mach, MachO, SingleArch};
6use goblin::pe::PE;
7use goblin::Object;
8use serde::Deserialize;
9use std::collections::HashMap;
10use std::fs;
11use std::path::Path;
12
13/// Introspect a cdylib built with PyO3 and returns the definition of a Python module.
14///
15/// This function currently supports the ELF (most *nix including Linux), Match-O (macOS) and PE (Windows) formats.
16pub fn introspect_cdylib(library_path: impl AsRef<Path>, main_module_name: &str) -> Result<Module> {
17    let chunks = find_introspection_chunks_in_binary_object(library_path.as_ref())?;
18    parse_chunks(&chunks, main_module_name)
19}
20
21/// Parses the introspection chunks found in the binary
22fn parse_chunks(chunks: &[Chunk], main_module_name: &str) -> Result<Module> {
23    let chunks_by_id = chunks
24        .iter()
25        .map(|c| {
26            (
27                match c {
28                    Chunk::Module { id, .. } => id,
29                    Chunk::Class { id, .. } => id,
30                    Chunk::Function { id, .. } => id,
31                },
32                c,
33            )
34        })
35        .collect::<HashMap<_, _>>();
36    // We look for the root chunk
37    for chunk in chunks {
38        if let Chunk::Module {
39            name,
40            members,
41            id: _,
42        } = chunk
43        {
44            if name == main_module_name {
45                return parse_module(name, members, &chunks_by_id);
46            }
47        }
48    }
49    bail!("No module named {main_module_name} found")
50}
51
52fn parse_module(
53    name: &str,
54    members: &[String],
55    chunks_by_id: &HashMap<&String, &Chunk>,
56) -> Result<Module> {
57    let mut modules = Vec::new();
58    let mut classes = Vec::new();
59    let mut functions = Vec::new();
60    for member in members {
61        if let Some(chunk) = chunks_by_id.get(member) {
62            match chunk {
63                Chunk::Module {
64                    name,
65                    members,
66                    id: _,
67                } => {
68                    modules.push(parse_module(name, members, chunks_by_id)?);
69                }
70                Chunk::Class { name, id: _ } => classes.push(Class { name: name.into() }),
71                Chunk::Function { name, id: _ } => functions.push(Function { name: name.into() }),
72            }
73        }
74    }
75    Ok(Module {
76        name: name.into(),
77        modules,
78        classes,
79        functions,
80    })
81}
82
83fn find_introspection_chunks_in_binary_object(path: &Path) -> Result<Vec<Chunk>> {
84    let library_content =
85        fs::read(path).with_context(|| format!("Failed to read {}", path.display()))?;
86    match Object::parse(&library_content)
87        .context("The built library is not valid or not supported by our binary parser")?
88    {
89        Object::Elf(elf) => find_introspection_chunks_in_elf(&elf, &library_content),
90        Object::Mach(Mach::Binary(macho)) => {
91            find_introspection_chunks_in_macho(&macho, &library_content)
92        }
93        Object::Mach(Mach::Fat(multi_arch)) => {
94            for arch in &multi_arch {
95                match arch? {
96                    SingleArch::MachO(macho) => {
97                        return find_introspection_chunks_in_macho(&macho, &library_content)
98                    }
99                    SingleArch::Archive(_) => (),
100                }
101            }
102            bail!("No Mach-o chunk found in the multi-arch Mach-o container")
103        }
104        Object::PE(pe) => find_introspection_chunks_in_pe(&pe, &library_content),
105        _ => {
106            bail!("Only ELF, Mach-o and PE containers can be introspected")
107        }
108    }
109}
110
111fn find_introspection_chunks_in_elf(elf: &Elf<'_>, library_content: &[u8]) -> Result<Vec<Chunk>> {
112    let mut chunks = Vec::new();
113    for sym in &elf.syms {
114        if is_introspection_symbol(elf.strtab.get_at(sym.st_name).unwrap_or_default()) {
115            let section_header = &elf.section_headers[sym.st_shndx];
116            let data_offset = sym.st_value + section_header.sh_offset - section_header.sh_addr;
117            chunks.push(read_symbol_value_with_ptr_and_len(
118                &library_content[usize::try_from(data_offset).context("File offset overflow")?..],
119                0,
120                library_content,
121                elf.is_64,
122            )?);
123        }
124    }
125    Ok(chunks)
126}
127
128fn find_introspection_chunks_in_macho(
129    macho: &MachO<'_>,
130    library_content: &[u8],
131) -> Result<Vec<Chunk>> {
132    if !macho.little_endian {
133        bail!("Only little endian Mach-o binaries are supported");
134    }
135
136    let sections = macho
137        .segments
138        .sections()
139        .flatten()
140        .map(|t| t.map(|s| s.0))
141        .collect::<Result<Vec<_>, _>>()?;
142    let mut chunks = Vec::new();
143    for (name, nlist) in macho.symbols().flatten() {
144        if nlist.is_global() && nlist.get_type() == N_SECT && is_introspection_symbol(name) {
145            let section = &sections[nlist.n_sect];
146            let data_offset = nlist.n_value + u64::from(section.offset) - section.addr;
147            chunks.push(read_symbol_value_with_ptr_and_len(
148                &library_content[usize::try_from(data_offset).context("File offset overflow")?..],
149                0,
150                library_content,
151                macho.is_64,
152            )?);
153        }
154    }
155    Ok(chunks)
156}
157
158fn find_introspection_chunks_in_pe(pe: &PE<'_>, library_content: &[u8]) -> Result<Vec<Chunk>> {
159    let rdata_data_section = pe
160        .sections
161        .iter()
162        .find(|section| section.name().unwrap_or_default() == ".rdata")
163        .context("No .rdata section found")?;
164    let rdata_shift = pe.image_base
165        + usize::try_from(rdata_data_section.virtual_address)
166            .context(".rdata virtual_address overflow")?
167        - usize::try_from(rdata_data_section.pointer_to_raw_data)
168            .context(".rdata pointer_to_raw_data overflow")?;
169
170    let mut chunks = Vec::new();
171    for export in &pe.exports {
172        if is_introspection_symbol(export.name.unwrap_or_default()) {
173            chunks.push(read_symbol_value_with_ptr_and_len(
174                &library_content[export.offset.context("No symbol offset")?..],
175                rdata_shift,
176                library_content,
177                pe.is_64,
178            )?);
179        }
180    }
181    Ok(chunks)
182}
183
184fn read_symbol_value_with_ptr_and_len(
185    value_slice: &[u8],
186    shift: usize,
187    full_library_content: &[u8],
188    is_64: bool,
189) -> Result<Chunk> {
190    let (ptr, len) = if is_64 {
191        let (ptr, len) = value_slice[..16].split_at(8);
192        let ptr = usize::try_from(u64::from_le_bytes(
193            ptr.try_into().context("Too short symbol value")?,
194        ))
195        .context("Pointer overflow")?;
196        let len = usize::try_from(u64::from_le_bytes(
197            len.try_into().context("Too short symbol value")?,
198        ))
199        .context("Length overflow")?;
200        (ptr, len)
201    } else {
202        let (ptr, len) = value_slice[..8].split_at(4);
203        let ptr = usize::try_from(u32::from_le_bytes(
204            ptr.try_into().context("Too short symbol value")?,
205        ))
206        .context("Pointer overflow")?;
207        let len = usize::try_from(u32::from_le_bytes(
208            len.try_into().context("Too short symbol value")?,
209        ))
210        .context("Length overflow")?;
211        (ptr, len)
212    };
213    let chunk = &full_library_content[ptr - shift..ptr - shift + len];
214    serde_json::from_slice(chunk).with_context(|| {
215        format!(
216            "Failed to parse introspection chunk: '{}'",
217            String::from_utf8_lossy(chunk)
218        )
219    })
220}
221
222fn is_introspection_symbol(name: &str) -> bool {
223    name.strip_prefix('_')
224        .unwrap_or(name)
225        .starts_with("PYO3_INTROSPECTION_0_")
226}
227
228#[derive(Deserialize)]
229#[serde(tag = "type", rename_all = "lowercase")]
230enum Chunk {
231    Module {
232        id: String,
233        name: String,
234        members: Vec<String>,
235    },
236    Class {
237        id: String,
238        name: String,
239    },
240    Function {
241        id: String,
242        name: String,
243    },
244}
⚠️ Internal Docs ⚠️ Not Public API 👉 Official Docs Here