1use crate::model::{Argument, Arguments, Class, Function, Module, VariableLengthArgument};
2use anyhow::{bail, ensure, Context, Result};
3use goblin::elf::Elf;
4use goblin::mach::load_command::CommandVariant;
5use goblin::mach::symbols::{NO_SECT, N_SECT};
6use goblin::mach::{Mach, MachO, SingleArch};
7use goblin::pe::PE;
8use goblin::Object;
9use serde::Deserialize;
10use std::collections::HashMap;
11use std::fs;
12use std::path::Path;
13
14pub fn introspect_cdylib(library_path: impl AsRef<Path>, main_module_name: &str) -> Result<Module> {
18 let chunks = find_introspection_chunks_in_binary_object(library_path.as_ref())?;
19 parse_chunks(&chunks, main_module_name)
20}
21
22fn parse_chunks(chunks: &[Chunk], main_module_name: &str) -> Result<Module> {
24 let chunks_by_id = chunks
25 .iter()
26 .map(|c| {
27 (
28 match c {
29 Chunk::Module { id, .. } => id,
30 Chunk::Class { id, .. } => id,
31 Chunk::Function { id, .. } => id,
32 },
33 c,
34 )
35 })
36 .collect::<HashMap<_, _>>();
37 for chunk in chunks {
39 if let Chunk::Module {
40 name,
41 members,
42 id: _,
43 } = chunk
44 {
45 if name == main_module_name {
46 return convert_module(name, members, &chunks_by_id);
47 }
48 }
49 }
50 bail!("No module named {main_module_name} found")
51}
52
53fn convert_module(
54 name: &str,
55 members: &[String],
56 chunks_by_id: &HashMap<&String, &Chunk>,
57) -> Result<Module> {
58 let mut modules = Vec::new();
59 let mut classes = Vec::new();
60 let mut functions = Vec::new();
61 for member in members {
62 if let Some(chunk) = chunks_by_id.get(member) {
63 match chunk {
64 Chunk::Module {
65 name,
66 members,
67 id: _,
68 } => {
69 modules.push(convert_module(name, members, chunks_by_id)?);
70 }
71 Chunk::Class { name, id: _ } => classes.push(Class { name: name.into() }),
72 Chunk::Function {
73 name,
74 id: _,
75 arguments,
76 } => functions.push(Function {
77 name: name.into(),
78 arguments: Arguments {
79 positional_only_arguments: arguments
80 .posonlyargs
81 .iter()
82 .map(convert_argument)
83 .collect(),
84 arguments: arguments.args.iter().map(convert_argument).collect(),
85 vararg: arguments
86 .vararg
87 .as_ref()
88 .map(convert_variable_length_argument),
89 keyword_only_arguments: arguments
90 .kwonlyargs
91 .iter()
92 .map(convert_argument)
93 .collect(),
94 kwarg: arguments
95 .kwarg
96 .as_ref()
97 .map(convert_variable_length_argument),
98 },
99 }),
100 }
101 }
102 }
103 Ok(Module {
104 name: name.into(),
105 modules,
106 classes,
107 functions,
108 })
109}
110
111fn convert_argument(arg: &ChunkArgument) -> Argument {
112 Argument {
113 name: arg.name.clone(),
114 default_value: arg.default.clone(),
115 }
116}
117
118fn convert_variable_length_argument(arg: &ChunkArgument) -> VariableLengthArgument {
119 VariableLengthArgument {
120 name: arg.name.clone(),
121 }
122}
123
124fn find_introspection_chunks_in_binary_object(path: &Path) -> Result<Vec<Chunk>> {
125 let library_content =
126 fs::read(path).with_context(|| format!("Failed to read {}", path.display()))?;
127 match Object::parse(&library_content)
128 .context("The built library is not valid or not supported by our binary parser")?
129 {
130 Object::Elf(elf) => find_introspection_chunks_in_elf(&elf, &library_content),
131 Object::Mach(Mach::Binary(macho)) => {
132 find_introspection_chunks_in_macho(&macho, &library_content)
133 }
134 Object::Mach(Mach::Fat(multi_arch)) => {
135 for arch in &multi_arch {
136 match arch? {
137 SingleArch::MachO(macho) => {
138 return find_introspection_chunks_in_macho(&macho, &library_content)
139 }
140 SingleArch::Archive(_) => (),
141 }
142 }
143 bail!("No Mach-o chunk found in the multi-arch Mach-o container")
144 }
145 Object::PE(pe) => find_introspection_chunks_in_pe(&pe, &library_content),
146 _ => {
147 bail!("Only ELF, Mach-o and PE containers can be introspected")
148 }
149 }
150}
151
152fn find_introspection_chunks_in_elf(elf: &Elf<'_>, library_content: &[u8]) -> Result<Vec<Chunk>> {
153 let mut chunks = Vec::new();
154 for sym in &elf.syms {
155 if is_introspection_symbol(elf.strtab.get_at(sym.st_name).unwrap_or_default()) {
156 let section_header = &elf.section_headers[sym.st_shndx];
157 let data_offset = sym.st_value + section_header.sh_offset - section_header.sh_addr;
158 chunks.push(read_symbol_value_with_ptr_and_len(
159 &library_content[usize::try_from(data_offset).context("File offset overflow")?..],
160 0,
161 library_content,
162 elf.is_64,
163 )?);
164 }
165 }
166 Ok(chunks)
167}
168
169fn find_introspection_chunks_in_macho(
170 macho: &MachO<'_>,
171 library_content: &[u8],
172) -> Result<Vec<Chunk>> {
173 if !macho.little_endian {
174 bail!("Only little endian Mach-o binaries are supported");
175 }
176 ensure!(
177 !macho.load_commands.iter().any(|command| {
178 matches!(command.command, CommandVariant::DyldChainedFixups(_))
179 }),
180 "Mach-O binaries with fixup chains are not supported yet, to avoid using fixup chains, use `--codegen=link-arg=-no_fixup_chains` option."
181 );
182
183 let sections = macho
184 .segments
185 .sections()
186 .flatten()
187 .map(|t| t.map(|s| s.0))
188 .collect::<Result<Vec<_>, _>>()?;
189 let mut chunks = Vec::new();
190 for symbol in macho.symbols() {
191 let (name, nlist) = symbol?;
192 if nlist.is_global()
193 && nlist.get_type() == N_SECT
194 && nlist.n_sect != NO_SECT as usize
195 && is_introspection_symbol(name)
196 {
197 let section = §ions[nlist.n_sect - 1]; let data_offset = nlist.n_value + u64::from(section.offset) - section.addr;
199 chunks.push(read_symbol_value_with_ptr_and_len(
200 &library_content[usize::try_from(data_offset).context("File offset overflow")?..],
201 0,
202 library_content,
203 macho.is_64,
204 )?);
205 }
206 }
207 Ok(chunks)
208}
209
210fn find_introspection_chunks_in_pe(pe: &PE<'_>, library_content: &[u8]) -> Result<Vec<Chunk>> {
211 let rdata_data_section = pe
212 .sections
213 .iter()
214 .find(|section| section.name().unwrap_or_default() == ".rdata")
215 .context("No .rdata section found")?;
216 let rdata_shift = pe.image_base
217 + usize::try_from(rdata_data_section.virtual_address)
218 .context(".rdata virtual_address overflow")?
219 - usize::try_from(rdata_data_section.pointer_to_raw_data)
220 .context(".rdata pointer_to_raw_data overflow")?;
221
222 let mut chunks = Vec::new();
223 for export in &pe.exports {
224 if is_introspection_symbol(export.name.unwrap_or_default()) {
225 chunks.push(read_symbol_value_with_ptr_and_len(
226 &library_content[export.offset.context("No symbol offset")?..],
227 rdata_shift,
228 library_content,
229 pe.is_64,
230 )?);
231 }
232 }
233 Ok(chunks)
234}
235
236fn read_symbol_value_with_ptr_and_len(
237 value_slice: &[u8],
238 shift: usize,
239 full_library_content: &[u8],
240 is_64: bool,
241) -> Result<Chunk> {
242 let (ptr, len) = if is_64 {
243 let (ptr, len) = value_slice[..16].split_at(8);
244 let ptr = usize::try_from(u64::from_le_bytes(
245 ptr.try_into().context("Too short symbol value")?,
246 ))
247 .context("Pointer overflow")?;
248 let len = usize::try_from(u64::from_le_bytes(
249 len.try_into().context("Too short symbol value")?,
250 ))
251 .context("Length overflow")?;
252 (ptr, len)
253 } else {
254 let (ptr, len) = value_slice[..8].split_at(4);
255 let ptr = usize::try_from(u32::from_le_bytes(
256 ptr.try_into().context("Too short symbol value")?,
257 ))
258 .context("Pointer overflow")?;
259 let len = usize::try_from(u32::from_le_bytes(
260 len.try_into().context("Too short symbol value")?,
261 ))
262 .context("Length overflow")?;
263 (ptr, len)
264 };
265 let chunk = &full_library_content[ptr - shift..ptr - shift + len];
266 serde_json::from_slice(chunk).with_context(|| {
267 format!(
268 "Failed to parse introspection chunk: '{}'",
269 String::from_utf8_lossy(chunk)
270 )
271 })
272}
273
274fn is_introspection_symbol(name: &str) -> bool {
275 name.strip_prefix('_')
276 .unwrap_or(name)
277 .starts_with("PYO3_INTROSPECTION_0_")
278}
279
280#[derive(Deserialize)]
281#[serde(tag = "type", rename_all = "lowercase")]
282enum Chunk {
283 Module {
284 id: String,
285 name: String,
286 members: Vec<String>,
287 },
288 Class {
289 id: String,
290 name: String,
291 },
292 Function {
293 id: String,
294 name: String,
295 arguments: ChunkArguments,
296 },
297}
298
299#[derive(Deserialize)]
300struct ChunkArguments {
301 #[serde(default)]
302 posonlyargs: Vec<ChunkArgument>,
303 #[serde(default)]
304 args: Vec<ChunkArgument>,
305 #[serde(default)]
306 vararg: Option<ChunkArgument>,
307 #[serde(default)]
308 kwonlyargs: Vec<ChunkArgument>,
309 #[serde(default)]
310 kwarg: Option<ChunkArgument>,
311}
312
313#[derive(Deserialize)]
314struct ChunkArgument {
315 name: String,
316 #[serde(default)]
317 default: Option<String>,
318}