1use crate::model::{Argument, Arguments, Class, Const, Function, Module, VariableLengthArgument};
2use anyhow::{bail, ensure, Context, Result};
3use goblin::elf::Elf;
4use goblin::mach::load_command::CommandVariant;
5use goblin::mach::symbols::{NO_SECT, N_SECT};
6use goblin::mach::{Mach, MachO, SingleArch};
7use goblin::pe::PE;
8use goblin::Object;
9use serde::Deserialize;
10use std::cmp::Ordering;
11use std::collections::HashMap;
12use std::fs;
13use std::path::Path;
14
15pub fn introspect_cdylib(library_path: impl AsRef<Path>, main_module_name: &str) -> Result<Module> {
19 let chunks = find_introspection_chunks_in_binary_object(library_path.as_ref())?;
20 parse_chunks(&chunks, main_module_name)
21}
22
23fn parse_chunks(chunks: &[Chunk], main_module_name: &str) -> Result<Module> {
25 let mut chunks_by_id = HashMap::<&str, &Chunk>::new();
26 let mut chunks_by_parent = HashMap::<&str, Vec<&Chunk>>::new();
27 for chunk in chunks {
28 if let Some(id) = match chunk {
29 Chunk::Module { id, .. } => Some(id),
30 Chunk::Class { id, .. } => Some(id),
31 Chunk::Function { id, .. } => id.as_ref(),
32 } {
33 chunks_by_id.insert(id, chunk);
34 }
35 if let Some(parent) = match chunk {
36 Chunk::Module { .. } | Chunk::Class { .. } => None,
37 Chunk::Function { parent, .. } => parent.as_ref(),
38 } {
39 chunks_by_parent.entry(parent).or_default().push(chunk);
40 }
41 }
42 for chunk in chunks {
44 if let Chunk::Module {
45 name,
46 members,
47 consts,
48 id: _,
49 } = chunk
50 {
51 if name == main_module_name {
52 return convert_module(name, members, consts, &chunks_by_id, &chunks_by_parent);
53 }
54 }
55 }
56 bail!("No module named {main_module_name} found")
57}
58
59fn convert_module(
60 name: &str,
61 members: &[String],
62 consts: &[ConstChunk],
63 chunks_by_id: &HashMap<&str, &Chunk>,
64 chunks_by_parent: &HashMap<&str, Vec<&Chunk>>,
65) -> Result<Module> {
66 let (modules, classes, functions) = convert_members(
67 &members
68 .iter()
69 .filter_map(|id| chunks_by_id.get(id.as_str()).copied())
70 .collect::<Vec<_>>(),
71 chunks_by_id,
72 chunks_by_parent,
73 )?;
74
75 Ok(Module {
76 name: name.into(),
77 modules,
78 classes,
79 functions,
80 consts: consts
81 .iter()
82 .map(|c| Const {
83 name: c.name.clone(),
84 value: c.value.clone(),
85 })
86 .collect(),
87 })
88}
89
90fn convert_members(
92 chunks: &[&Chunk],
93 chunks_by_id: &HashMap<&str, &Chunk>,
94 chunks_by_parent: &HashMap<&str, Vec<&Chunk>>,
95) -> Result<(Vec<Module>, Vec<Class>, Vec<Function>)> {
96 let mut modules = Vec::new();
97 let mut classes = Vec::new();
98 let mut functions = Vec::new();
99 for chunk in chunks {
100 match chunk {
101 Chunk::Module {
102 name,
103 members,
104 consts,
105 id: _,
106 } => {
107 modules.push(convert_module(
108 name,
109 members,
110 consts,
111 chunks_by_id,
112 chunks_by_parent,
113 )?);
114 }
115 Chunk::Class { name, id } => {
116 classes.push(convert_class(id, name, chunks_by_id, chunks_by_parent)?)
117 }
118 Chunk::Function {
119 name,
120 id: _,
121 arguments,
122 parent: _,
123 decorators,
124 returns,
125 } => functions.push(convert_function(name, arguments, decorators, returns)),
126 }
127 }
128 Ok((modules, classes, functions))
129}
130
131fn convert_class(
132 id: &str,
133 name: &str,
134 chunks_by_id: &HashMap<&str, &Chunk>,
135 chunks_by_parent: &HashMap<&str, Vec<&Chunk>>,
136) -> Result<Class> {
137 let (nested_modules, nested_classes, mut methods) = convert_members(
138 chunks_by_parent
139 .get(&id)
140 .map(Vec::as_slice)
141 .unwrap_or_default(),
142 chunks_by_id,
143 chunks_by_parent,
144 )?;
145 ensure!(
146 nested_modules.is_empty(),
147 "Classes cannot contain nested modules"
148 );
149 ensure!(
150 nested_classes.is_empty(),
151 "Nested classes are not supported yet"
152 );
153 methods.sort_by(|l, r| match l.name.cmp(&r.name) {
155 Ordering::Equal => {
156 if l.decorators.iter().any(|d| d == "property") {
158 Ordering::Less
159 } else if r.decorators.iter().any(|d| d == "property") {
160 Ordering::Greater
161 } else {
162 l.decorators.cmp(&r.decorators)
164 }
165 }
166 o => o,
167 });
168 Ok(Class {
169 name: name.into(),
170 methods,
171 })
172}
173
174fn convert_function(
175 name: &str,
176 arguments: &ChunkArguments,
177 decorators: &[String],
178 returns: &Option<String>,
179) -> Function {
180 Function {
181 name: name.into(),
182 decorators: decorators.to_vec(),
183 arguments: Arguments {
184 positional_only_arguments: arguments.posonlyargs.iter().map(convert_argument).collect(),
185 arguments: arguments.args.iter().map(convert_argument).collect(),
186 vararg: arguments
187 .vararg
188 .as_ref()
189 .map(convert_variable_length_argument),
190 keyword_only_arguments: arguments.kwonlyargs.iter().map(convert_argument).collect(),
191 kwarg: arguments
192 .kwarg
193 .as_ref()
194 .map(convert_variable_length_argument),
195 },
196 returns: returns.clone(),
197 }
198}
199
200fn convert_argument(arg: &ChunkArgument) -> Argument {
201 Argument {
202 name: arg.name.clone(),
203 default_value: arg.default.clone(),
204 annotation: arg.annotation.clone(),
205 }
206}
207
208fn convert_variable_length_argument(arg: &ChunkArgument) -> VariableLengthArgument {
209 VariableLengthArgument {
210 name: arg.name.clone(),
211 }
212}
213
214fn find_introspection_chunks_in_binary_object(path: &Path) -> Result<Vec<Chunk>> {
215 let library_content =
216 fs::read(path).with_context(|| format!("Failed to read {}", path.display()))?;
217 match Object::parse(&library_content)
218 .context("The built library is not valid or not supported by our binary parser")?
219 {
220 Object::Elf(elf) => find_introspection_chunks_in_elf(&elf, &library_content),
221 Object::Mach(Mach::Binary(macho)) => {
222 find_introspection_chunks_in_macho(&macho, &library_content)
223 }
224 Object::Mach(Mach::Fat(multi_arch)) => {
225 for arch in &multi_arch {
226 match arch? {
227 SingleArch::MachO(macho) => {
228 return find_introspection_chunks_in_macho(&macho, &library_content)
229 }
230 SingleArch::Archive(_) => (),
231 }
232 }
233 bail!("No Mach-o chunk found in the multi-arch Mach-o container")
234 }
235 Object::PE(pe) => find_introspection_chunks_in_pe(&pe, &library_content),
236 _ => {
237 bail!("Only ELF, Mach-o and PE containers can be introspected")
238 }
239 }
240}
241
242fn find_introspection_chunks_in_elf(elf: &Elf<'_>, library_content: &[u8]) -> Result<Vec<Chunk>> {
243 let mut chunks = Vec::new();
244 for sym in &elf.syms {
245 if is_introspection_symbol(elf.strtab.get_at(sym.st_name).unwrap_or_default()) {
246 let section_header = &elf.section_headers[sym.st_shndx];
247 let data_offset = sym.st_value + section_header.sh_offset - section_header.sh_addr;
248 chunks.push(read_symbol_value_with_ptr_and_len(
249 &library_content[usize::try_from(data_offset).context("File offset overflow")?..],
250 0,
251 library_content,
252 elf.is_64,
253 )?);
254 }
255 }
256 Ok(chunks)
257}
258
259fn find_introspection_chunks_in_macho(
260 macho: &MachO<'_>,
261 library_content: &[u8],
262) -> Result<Vec<Chunk>> {
263 if !macho.little_endian {
264 bail!("Only little endian Mach-o binaries are supported");
265 }
266 ensure!(
267 !macho.load_commands.iter().any(|command| {
268 matches!(command.command, CommandVariant::DyldChainedFixups(_))
269 }),
270 "Mach-O binaries with fixup chains are not supported yet, to avoid using fixup chains, use `--codegen=link-arg=-no_fixup_chains` option."
271 );
272
273 let sections = macho
274 .segments
275 .sections()
276 .flatten()
277 .map(|t| t.map(|s| s.0))
278 .collect::<Result<Vec<_>, _>>()?;
279 let mut chunks = Vec::new();
280 for symbol in macho.symbols() {
281 let (name, nlist) = symbol?;
282 if nlist.is_global()
283 && nlist.get_type() == N_SECT
284 && nlist.n_sect != NO_SECT as usize
285 && is_introspection_symbol(name)
286 {
287 let section = §ions[nlist.n_sect - 1]; let data_offset = nlist.n_value + u64::from(section.offset) - section.addr;
289 chunks.push(read_symbol_value_with_ptr_and_len(
290 &library_content[usize::try_from(data_offset).context("File offset overflow")?..],
291 0,
292 library_content,
293 macho.is_64,
294 )?);
295 }
296 }
297 Ok(chunks)
298}
299
300fn find_introspection_chunks_in_pe(pe: &PE<'_>, library_content: &[u8]) -> Result<Vec<Chunk>> {
301 let rdata_data_section = pe
302 .sections
303 .iter()
304 .find(|section| section.name().unwrap_or_default() == ".rdata")
305 .context("No .rdata section found")?;
306 let rdata_shift = pe.image_base
307 + usize::try_from(rdata_data_section.virtual_address)
308 .context(".rdata virtual_address overflow")?
309 - usize::try_from(rdata_data_section.pointer_to_raw_data)
310 .context(".rdata pointer_to_raw_data overflow")?;
311
312 let mut chunks = Vec::new();
313 for export in &pe.exports {
314 if is_introspection_symbol(export.name.unwrap_or_default()) {
315 chunks.push(read_symbol_value_with_ptr_and_len(
316 &library_content[export.offset.context("No symbol offset")?..],
317 rdata_shift,
318 library_content,
319 pe.is_64,
320 )?);
321 }
322 }
323 Ok(chunks)
324}
325
326fn read_symbol_value_with_ptr_and_len(
327 value_slice: &[u8],
328 shift: usize,
329 full_library_content: &[u8],
330 is_64: bool,
331) -> Result<Chunk> {
332 let (ptr, len) = if is_64 {
333 let (ptr, len) = value_slice[..16].split_at(8);
334 let ptr = usize::try_from(u64::from_le_bytes(
335 ptr.try_into().context("Too short symbol value")?,
336 ))
337 .context("Pointer overflow")?;
338 let len = usize::try_from(u64::from_le_bytes(
339 len.try_into().context("Too short symbol value")?,
340 ))
341 .context("Length overflow")?;
342 (ptr, len)
343 } else {
344 let (ptr, len) = value_slice[..8].split_at(4);
345 let ptr = usize::try_from(u32::from_le_bytes(
346 ptr.try_into().context("Too short symbol value")?,
347 ))
348 .context("Pointer overflow")?;
349 let len = usize::try_from(u32::from_le_bytes(
350 len.try_into().context("Too short symbol value")?,
351 ))
352 .context("Length overflow")?;
353 (ptr, len)
354 };
355 let chunk = &full_library_content[ptr - shift..ptr - shift + len];
356 serde_json::from_slice(chunk).with_context(|| {
357 format!(
358 "Failed to parse introspection chunk: '{}'",
359 String::from_utf8_lossy(chunk)
360 )
361 })
362}
363
364fn is_introspection_symbol(name: &str) -> bool {
365 name.strip_prefix('_')
366 .unwrap_or(name)
367 .starts_with("PYO3_INTROSPECTION_0_")
368}
369
370#[derive(Deserialize)]
371#[serde(tag = "type", rename_all = "lowercase")]
372enum Chunk {
373 Module {
374 id: String,
375 name: String,
376 members: Vec<String>,
377 consts: Vec<ConstChunk>,
378 },
379 Class {
380 id: String,
381 name: String,
382 },
383 Function {
384 #[serde(default)]
385 id: Option<String>,
386 name: String,
387 arguments: Box<ChunkArguments>,
388 #[serde(default)]
389 parent: Option<String>,
390 #[serde(default)]
391 decorators: Vec<String>,
392 #[serde(default)]
393 returns: Option<String>,
394 },
395}
396
397#[derive(Deserialize)]
398struct ConstChunk {
399 name: String,
400 value: String,
401}
402
403#[derive(Deserialize)]
404struct ChunkArguments {
405 #[serde(default)]
406 posonlyargs: Vec<ChunkArgument>,
407 #[serde(default)]
408 args: Vec<ChunkArgument>,
409 #[serde(default)]
410 vararg: Option<ChunkArgument>,
411 #[serde(default)]
412 kwonlyargs: Vec<ChunkArgument>,
413 #[serde(default)]
414 kwarg: Option<ChunkArgument>,
415}
416
417#[derive(Deserialize)]
418struct ChunkArgument {
419 name: String,
420 #[serde(default)]
421 default: Option<String>,
422 #[serde(default)]
423 annotation: Option<String>,
424}