pyo3_introspection/
introspection.rs

1use crate::model::{Argument, Arguments, Class, Function, Module, VariableLengthArgument};
2use anyhow::{bail, ensure, Context, Result};
3use goblin::elf::Elf;
4use goblin::mach::load_command::CommandVariant;
5use goblin::mach::symbols::{NO_SECT, N_SECT};
6use goblin::mach::{Mach, MachO, SingleArch};
7use goblin::pe::PE;
8use goblin::Object;
9use serde::Deserialize;
10use std::collections::HashMap;
11use std::fs;
12use std::path::Path;
13
14/// Introspect a cdylib built with PyO3 and returns the definition of a Python module.
15///
16/// This function currently supports the ELF (most *nix including Linux), Match-O (macOS) and PE (Windows) formats.
17pub fn introspect_cdylib(library_path: impl AsRef<Path>, main_module_name: &str) -> Result<Module> {
18    let chunks = find_introspection_chunks_in_binary_object(library_path.as_ref())?;
19    parse_chunks(&chunks, main_module_name)
20}
21
22/// Parses the introspection chunks found in the binary
23fn parse_chunks(chunks: &[Chunk], main_module_name: &str) -> Result<Module> {
24    let chunks_by_id = chunks
25        .iter()
26        .map(|c| {
27            (
28                match c {
29                    Chunk::Module { id, .. } => id,
30                    Chunk::Class { id, .. } => id,
31                    Chunk::Function { id, .. } => id,
32                },
33                c,
34            )
35        })
36        .collect::<HashMap<_, _>>();
37    // We look for the root chunk
38    for chunk in chunks {
39        if let Chunk::Module {
40            name,
41            members,
42            id: _,
43        } = chunk
44        {
45            if name == main_module_name {
46                return convert_module(name, members, &chunks_by_id);
47            }
48        }
49    }
50    bail!("No module named {main_module_name} found")
51}
52
53fn convert_module(
54    name: &str,
55    members: &[String],
56    chunks_by_id: &HashMap<&String, &Chunk>,
57) -> Result<Module> {
58    let mut modules = Vec::new();
59    let mut classes = Vec::new();
60    let mut functions = Vec::new();
61    for member in members {
62        if let Some(chunk) = chunks_by_id.get(member) {
63            match chunk {
64                Chunk::Module {
65                    name,
66                    members,
67                    id: _,
68                } => {
69                    modules.push(convert_module(name, members, chunks_by_id)?);
70                }
71                Chunk::Class { name, id: _ } => classes.push(Class { name: name.into() }),
72                Chunk::Function {
73                    name,
74                    id: _,
75                    arguments,
76                } => functions.push(Function {
77                    name: name.into(),
78                    arguments: Arguments {
79                        positional_only_arguments: arguments
80                            .posonlyargs
81                            .iter()
82                            .map(convert_argument)
83                            .collect(),
84                        arguments: arguments.args.iter().map(convert_argument).collect(),
85                        vararg: arguments
86                            .vararg
87                            .as_ref()
88                            .map(convert_variable_length_argument),
89                        keyword_only_arguments: arguments
90                            .kwonlyargs
91                            .iter()
92                            .map(convert_argument)
93                            .collect(),
94                        kwarg: arguments
95                            .kwarg
96                            .as_ref()
97                            .map(convert_variable_length_argument),
98                    },
99                }),
100            }
101        }
102    }
103    Ok(Module {
104        name: name.into(),
105        modules,
106        classes,
107        functions,
108    })
109}
110
111fn convert_argument(arg: &ChunkArgument) -> Argument {
112    Argument {
113        name: arg.name.clone(),
114        default_value: arg.default.clone(),
115    }
116}
117
118fn convert_variable_length_argument(arg: &ChunkArgument) -> VariableLengthArgument {
119    VariableLengthArgument {
120        name: arg.name.clone(),
121    }
122}
123
124fn find_introspection_chunks_in_binary_object(path: &Path) -> Result<Vec<Chunk>> {
125    let library_content =
126        fs::read(path).with_context(|| format!("Failed to read {}", path.display()))?;
127    match Object::parse(&library_content)
128        .context("The built library is not valid or not supported by our binary parser")?
129    {
130        Object::Elf(elf) => find_introspection_chunks_in_elf(&elf, &library_content),
131        Object::Mach(Mach::Binary(macho)) => {
132            find_introspection_chunks_in_macho(&macho, &library_content)
133        }
134        Object::Mach(Mach::Fat(multi_arch)) => {
135            for arch in &multi_arch {
136                match arch? {
137                    SingleArch::MachO(macho) => {
138                        return find_introspection_chunks_in_macho(&macho, &library_content)
139                    }
140                    SingleArch::Archive(_) => (),
141                }
142            }
143            bail!("No Mach-o chunk found in the multi-arch Mach-o container")
144        }
145        Object::PE(pe) => find_introspection_chunks_in_pe(&pe, &library_content),
146        _ => {
147            bail!("Only ELF, Mach-o and PE containers can be introspected")
148        }
149    }
150}
151
152fn find_introspection_chunks_in_elf(elf: &Elf<'_>, library_content: &[u8]) -> Result<Vec<Chunk>> {
153    let mut chunks = Vec::new();
154    for sym in &elf.syms {
155        if is_introspection_symbol(elf.strtab.get_at(sym.st_name).unwrap_or_default()) {
156            let section_header = &elf.section_headers[sym.st_shndx];
157            let data_offset = sym.st_value + section_header.sh_offset - section_header.sh_addr;
158            chunks.push(read_symbol_value_with_ptr_and_len(
159                &library_content[usize::try_from(data_offset).context("File offset overflow")?..],
160                0,
161                library_content,
162                elf.is_64,
163            )?);
164        }
165    }
166    Ok(chunks)
167}
168
169fn find_introspection_chunks_in_macho(
170    macho: &MachO<'_>,
171    library_content: &[u8],
172) -> Result<Vec<Chunk>> {
173    if !macho.little_endian {
174        bail!("Only little endian Mach-o binaries are supported");
175    }
176    ensure!(
177        !macho.load_commands.iter().any(|command| {
178            matches!(command.command, CommandVariant::DyldChainedFixups(_))
179        }),
180        "Mach-O binaries with fixup chains are not supported yet, to avoid using fixup chains, use `--codegen=link-arg=-no_fixup_chains` option."
181    );
182
183    let sections = macho
184        .segments
185        .sections()
186        .flatten()
187        .map(|t| t.map(|s| s.0))
188        .collect::<Result<Vec<_>, _>>()?;
189    let mut chunks = Vec::new();
190    for symbol in macho.symbols() {
191        let (name, nlist) = symbol?;
192        if nlist.is_global()
193            && nlist.get_type() == N_SECT
194            && nlist.n_sect != NO_SECT as usize
195            && is_introspection_symbol(name)
196        {
197            let section = &sections[nlist.n_sect - 1]; // Sections are counted from 1
198            let data_offset = nlist.n_value + u64::from(section.offset) - section.addr;
199            chunks.push(read_symbol_value_with_ptr_and_len(
200                &library_content[usize::try_from(data_offset).context("File offset overflow")?..],
201                0,
202                library_content,
203                macho.is_64,
204            )?);
205        }
206    }
207    Ok(chunks)
208}
209
210fn find_introspection_chunks_in_pe(pe: &PE<'_>, library_content: &[u8]) -> Result<Vec<Chunk>> {
211    let rdata_data_section = pe
212        .sections
213        .iter()
214        .find(|section| section.name().unwrap_or_default() == ".rdata")
215        .context("No .rdata section found")?;
216    let rdata_shift = pe.image_base
217        + usize::try_from(rdata_data_section.virtual_address)
218            .context(".rdata virtual_address overflow")?
219        - usize::try_from(rdata_data_section.pointer_to_raw_data)
220            .context(".rdata pointer_to_raw_data overflow")?;
221
222    let mut chunks = Vec::new();
223    for export in &pe.exports {
224        if is_introspection_symbol(export.name.unwrap_or_default()) {
225            chunks.push(read_symbol_value_with_ptr_and_len(
226                &library_content[export.offset.context("No symbol offset")?..],
227                rdata_shift,
228                library_content,
229                pe.is_64,
230            )?);
231        }
232    }
233    Ok(chunks)
234}
235
236fn read_symbol_value_with_ptr_and_len(
237    value_slice: &[u8],
238    shift: usize,
239    full_library_content: &[u8],
240    is_64: bool,
241) -> Result<Chunk> {
242    let (ptr, len) = if is_64 {
243        let (ptr, len) = value_slice[..16].split_at(8);
244        let ptr = usize::try_from(u64::from_le_bytes(
245            ptr.try_into().context("Too short symbol value")?,
246        ))
247        .context("Pointer overflow")?;
248        let len = usize::try_from(u64::from_le_bytes(
249            len.try_into().context("Too short symbol value")?,
250        ))
251        .context("Length overflow")?;
252        (ptr, len)
253    } else {
254        let (ptr, len) = value_slice[..8].split_at(4);
255        let ptr = usize::try_from(u32::from_le_bytes(
256            ptr.try_into().context("Too short symbol value")?,
257        ))
258        .context("Pointer overflow")?;
259        let len = usize::try_from(u32::from_le_bytes(
260            len.try_into().context("Too short symbol value")?,
261        ))
262        .context("Length overflow")?;
263        (ptr, len)
264    };
265    let chunk = &full_library_content[ptr - shift..ptr - shift + len];
266    serde_json::from_slice(chunk).with_context(|| {
267        format!(
268            "Failed to parse introspection chunk: '{}'",
269            String::from_utf8_lossy(chunk)
270        )
271    })
272}
273
274fn is_introspection_symbol(name: &str) -> bool {
275    name.strip_prefix('_')
276        .unwrap_or(name)
277        .starts_with("PYO3_INTROSPECTION_0_")
278}
279
280#[derive(Deserialize)]
281#[serde(tag = "type", rename_all = "lowercase")]
282enum Chunk {
283    Module {
284        id: String,
285        name: String,
286        members: Vec<String>,
287    },
288    Class {
289        id: String,
290        name: String,
291    },
292    Function {
293        id: String,
294        name: String,
295        arguments: ChunkArguments,
296    },
297}
298
299#[derive(Deserialize)]
300struct ChunkArguments {
301    #[serde(default)]
302    posonlyargs: Vec<ChunkArgument>,
303    #[serde(default)]
304    args: Vec<ChunkArgument>,
305    #[serde(default)]
306    vararg: Option<ChunkArgument>,
307    #[serde(default)]
308    kwonlyargs: Vec<ChunkArgument>,
309    #[serde(default)]
310    kwarg: Option<ChunkArgument>,
311}
312
313#[derive(Deserialize)]
314struct ChunkArgument {
315    name: String,
316    #[serde(default)]
317    default: Option<String>,
318}
⚠️ Internal Docs ⚠️ Not Public API 👉 Official Docs Here