1use crate::model::{Class, Function, Module};
2use anyhow::{bail, Context, Result};
3use goblin::elf::Elf;
4use goblin::mach::symbols::N_SECT;
5use goblin::mach::{Mach, MachO, SingleArch};
6use goblin::pe::PE;
7use goblin::Object;
8use serde::Deserialize;
9use std::collections::HashMap;
10use std::fs;
11use std::path::Path;
12
13pub fn introspect_cdylib(library_path: impl AsRef<Path>, main_module_name: &str) -> Result<Module> {
17 let chunks = find_introspection_chunks_in_binary_object(library_path.as_ref())?;
18 parse_chunks(&chunks, main_module_name)
19}
20
21fn parse_chunks(chunks: &[Chunk], main_module_name: &str) -> Result<Module> {
23 let chunks_by_id = chunks
24 .iter()
25 .map(|c| {
26 (
27 match c {
28 Chunk::Module { id, .. } => id,
29 Chunk::Class { id, .. } => id,
30 Chunk::Function { id, .. } => id,
31 },
32 c,
33 )
34 })
35 .collect::<HashMap<_, _>>();
36 for chunk in chunks {
38 if let Chunk::Module {
39 name,
40 members,
41 id: _,
42 } = chunk
43 {
44 if name == main_module_name {
45 return parse_module(name, members, &chunks_by_id);
46 }
47 }
48 }
49 bail!("No module named {main_module_name} found")
50}
51
52fn parse_module(
53 name: &str,
54 members: &[String],
55 chunks_by_id: &HashMap<&String, &Chunk>,
56) -> Result<Module> {
57 let mut modules = Vec::new();
58 let mut classes = Vec::new();
59 let mut functions = Vec::new();
60 for member in members {
61 if let Some(chunk) = chunks_by_id.get(member) {
62 match chunk {
63 Chunk::Module {
64 name,
65 members,
66 id: _,
67 } => {
68 modules.push(parse_module(name, members, chunks_by_id)?);
69 }
70 Chunk::Class { name, id: _ } => classes.push(Class { name: name.into() }),
71 Chunk::Function { name, id: _ } => functions.push(Function { name: name.into() }),
72 }
73 }
74 }
75 Ok(Module {
76 name: name.into(),
77 modules,
78 classes,
79 functions,
80 })
81}
82
83fn find_introspection_chunks_in_binary_object(path: &Path) -> Result<Vec<Chunk>> {
84 let library_content =
85 fs::read(path).with_context(|| format!("Failed to read {}", path.display()))?;
86 match Object::parse(&library_content)
87 .context("The built library is not valid or not supported by our binary parser")?
88 {
89 Object::Elf(elf) => find_introspection_chunks_in_elf(&elf, &library_content),
90 Object::Mach(Mach::Binary(macho)) => {
91 find_introspection_chunks_in_macho(&macho, &library_content)
92 }
93 Object::Mach(Mach::Fat(multi_arch)) => {
94 for arch in &multi_arch {
95 match arch? {
96 SingleArch::MachO(macho) => {
97 return find_introspection_chunks_in_macho(&macho, &library_content)
98 }
99 SingleArch::Archive(_) => (),
100 }
101 }
102 bail!("No Mach-o chunk found in the multi-arch Mach-o container")
103 }
104 Object::PE(pe) => find_introspection_chunks_in_pe(&pe, &library_content),
105 _ => {
106 bail!("Only ELF, Mach-o and PE containers can be introspected")
107 }
108 }
109}
110
111fn find_introspection_chunks_in_elf(elf: &Elf<'_>, library_content: &[u8]) -> Result<Vec<Chunk>> {
112 let mut chunks = Vec::new();
113 for sym in &elf.syms {
114 if is_introspection_symbol(elf.strtab.get_at(sym.st_name).unwrap_or_default()) {
115 let section_header = &elf.section_headers[sym.st_shndx];
116 let data_offset = sym.st_value + section_header.sh_offset - section_header.sh_addr;
117 chunks.push(read_symbol_value_with_ptr_and_len(
118 &library_content[usize::try_from(data_offset).context("File offset overflow")?..],
119 0,
120 library_content,
121 elf.is_64,
122 )?);
123 }
124 }
125 Ok(chunks)
126}
127
128fn find_introspection_chunks_in_macho(
129 macho: &MachO<'_>,
130 library_content: &[u8],
131) -> Result<Vec<Chunk>> {
132 if !macho.little_endian {
133 bail!("Only little endian Mach-o binaries are supported");
134 }
135
136 let sections = macho
137 .segments
138 .sections()
139 .flatten()
140 .map(|t| t.map(|s| s.0))
141 .collect::<Result<Vec<_>, _>>()?;
142 let mut chunks = Vec::new();
143 for (name, nlist) in macho.symbols().flatten() {
144 if nlist.is_global() && nlist.get_type() == N_SECT && is_introspection_symbol(name) {
145 let section = §ions[nlist.n_sect];
146 let data_offset = nlist.n_value + u64::from(section.offset) - section.addr;
147 chunks.push(read_symbol_value_with_ptr_and_len(
148 &library_content[usize::try_from(data_offset).context("File offset overflow")?..],
149 0,
150 library_content,
151 macho.is_64,
152 )?);
153 }
154 }
155 Ok(chunks)
156}
157
158fn find_introspection_chunks_in_pe(pe: &PE<'_>, library_content: &[u8]) -> Result<Vec<Chunk>> {
159 let rdata_data_section = pe
160 .sections
161 .iter()
162 .find(|section| section.name().unwrap_or_default() == ".rdata")
163 .context("No .rdata section found")?;
164 let rdata_shift = pe.image_base
165 + usize::try_from(rdata_data_section.virtual_address)
166 .context(".rdata virtual_address overflow")?
167 - usize::try_from(rdata_data_section.pointer_to_raw_data)
168 .context(".rdata pointer_to_raw_data overflow")?;
169
170 let mut chunks = Vec::new();
171 for export in &pe.exports {
172 if is_introspection_symbol(export.name.unwrap_or_default()) {
173 chunks.push(read_symbol_value_with_ptr_and_len(
174 &library_content[export.offset.context("No symbol offset")?..],
175 rdata_shift,
176 library_content,
177 pe.is_64,
178 )?);
179 }
180 }
181 Ok(chunks)
182}
183
184fn read_symbol_value_with_ptr_and_len(
185 value_slice: &[u8],
186 shift: usize,
187 full_library_content: &[u8],
188 is_64: bool,
189) -> Result<Chunk> {
190 let (ptr, len) = if is_64 {
191 let (ptr, len) = value_slice[..16].split_at(8);
192 let ptr = usize::try_from(u64::from_le_bytes(
193 ptr.try_into().context("Too short symbol value")?,
194 ))
195 .context("Pointer overflow")?;
196 let len = usize::try_from(u64::from_le_bytes(
197 len.try_into().context("Too short symbol value")?,
198 ))
199 .context("Length overflow")?;
200 (ptr, len)
201 } else {
202 let (ptr, len) = value_slice[..8].split_at(4);
203 let ptr = usize::try_from(u32::from_le_bytes(
204 ptr.try_into().context("Too short symbol value")?,
205 ))
206 .context("Pointer overflow")?;
207 let len = usize::try_from(u32::from_le_bytes(
208 len.try_into().context("Too short symbol value")?,
209 ))
210 .context("Length overflow")?;
211 (ptr, len)
212 };
213 let chunk = &full_library_content[ptr - shift..ptr - shift + len];
214 serde_json::from_slice(chunk).with_context(|| {
215 format!(
216 "Failed to parse introspection chunk: '{}'",
217 String::from_utf8_lossy(chunk)
218 )
219 })
220}
221
222fn is_introspection_symbol(name: &str) -> bool {
223 name.strip_prefix('_')
224 .unwrap_or(name)
225 .starts_with("PYO3_INTROSPECTION_0_")
226}
227
228#[derive(Deserialize)]
229#[serde(tag = "type", rename_all = "lowercase")]
230enum Chunk {
231 Module {
232 id: String,
233 name: String,
234 members: Vec<String>,
235 },
236 Class {
237 id: String,
238 name: String,
239 },
240 Function {
241 id: String,
242 name: String,
243 },
244}