pyo3/types/bytes.rs
1use crate::byteswriter::PyBytesWriter;
2use crate::ffi_ptr_ext::FfiPtrExt;
3use crate::instance::{Borrowed, Bound};
4use crate::{ffi, Py, PyAny, PyResult, Python};
5use std::io::Write;
6use std::ops::Index;
7use std::slice::SliceIndex;
8use std::str;
9
10/// Represents a Python `bytes` object.
11///
12/// This type is immutable.
13///
14/// Values of this type are accessed via PyO3's smart pointers, e.g. as
15/// [`Py<PyBytes>`][crate::Py] or [`Bound<'py, PyBytes>`][Bound].
16///
17/// For APIs available on `bytes` objects, see the [`PyBytesMethods`] trait which is implemented for
18/// [`Bound<'py, PyBytes>`][Bound].
19///
20/// # Equality
21///
22/// For convenience, [`Bound<'py, PyBytes>`][Bound] implements [`PartialEq<[u8]>`][PartialEq] to allow comparing the
23/// data in the Python bytes to a Rust `[u8]` byte slice.
24///
25/// This is not always the most appropriate way to compare Python bytes, as Python bytes subclasses
26/// may have different equality semantics. In situations where subclasses overriding equality might
27/// be relevant, use [`PyAnyMethods::eq`](crate::types::any::PyAnyMethods::eq), at cost of the
28/// additional overhead of a Python method call.
29///
30/// ```rust
31/// # use pyo3::prelude::*;
32/// use pyo3::types::PyBytes;
33///
34/// # Python::attach(|py| {
35/// let py_bytes = PyBytes::new(py, b"foo".as_slice());
36/// // via PartialEq<[u8]>
37/// assert_eq!(py_bytes, b"foo".as_slice());
38///
39/// // via Python equality
40/// let other = PyBytes::new(py, b"foo".as_slice());
41/// assert!(py_bytes.as_any().eq(other).unwrap());
42///
43/// // Note that `eq` will convert its argument to Python using `IntoPyObject`.
44/// // Byte collections are specialized, so that the following slice will indeed
45/// // convert into a `bytes` object and not a `list`:
46/// assert!(py_bytes.as_any().eq(b"foo".as_slice()).unwrap());
47/// # });
48/// ```
49#[repr(transparent)]
50pub struct PyBytes(PyAny);
51
52pyobject_native_type_core!(PyBytes, pyobject_native_static_type_object!(ffi::PyBytes_Type), "builtins", "bytes", #checkfunction=ffi::PyBytes_Check);
53
54impl PyBytes {
55 /// Creates a new Python bytestring object.
56 /// The bytestring is initialized by copying the data from the `&[u8]`.
57 ///
58 /// Panics if out of memory.
59 pub fn new<'p>(py: Python<'p>, s: &[u8]) -> Bound<'p, PyBytes> {
60 let ptr = s.as_ptr().cast();
61 let len = s.len() as ffi::Py_ssize_t;
62 unsafe {
63 ffi::PyBytes_FromStringAndSize(ptr, len)
64 .assume_owned(py)
65 .cast_into_unchecked()
66 }
67 }
68
69 /// Creates a new Python `bytes` object with an `init` closure to write its contents.
70 /// Before calling `init` the bytes' contents are zero-initialised.
71 /// * If Python raises a MemoryError on the allocation, `new_with` will return
72 /// it inside `Err`.
73 /// * If `init` returns `Err(e)`, `new_with` will return `Err(e)`.
74 /// * If `init` returns `Ok(())`, `new_with` will return `Ok(&PyBytes)`.
75 ///
76 /// # Examples
77 ///
78 /// ```
79 /// use pyo3::{prelude::*, types::PyBytes};
80 ///
81 /// # fn main() -> PyResult<()> {
82 /// Python::attach(|py| -> PyResult<()> {
83 /// let py_bytes = PyBytes::new_with(py, 10, |bytes: &mut [u8]| {
84 /// bytes.copy_from_slice(b"Hello Rust");
85 /// Ok(())
86 /// })?;
87 /// let bytes: &[u8] = py_bytes.extract()?;
88 /// assert_eq!(bytes, b"Hello Rust");
89 /// Ok(())
90 /// })
91 /// # }
92 /// ```
93 #[inline]
94 pub fn new_with<F>(py: Python<'_>, len: usize, init: F) -> PyResult<Bound<'_, PyBytes>>
95 where
96 F: FnOnce(&mut [u8]) -> PyResult<()>,
97 {
98 unsafe {
99 let pyptr = ffi::PyBytes_FromStringAndSize(std::ptr::null(), len as ffi::Py_ssize_t);
100 // Check for an allocation error and return it
101 let pybytes = pyptr.assume_owned_or_err(py)?.cast_into_unchecked();
102 let buffer: *mut u8 = ffi::PyBytes_AsString(pyptr).cast();
103 debug_assert!(!buffer.is_null());
104 // Zero-initialise the uninitialised bytestring
105 std::ptr::write_bytes(buffer, 0u8, len);
106 // (Further) Initialise the bytestring in init
107 // If init returns an Err, pypybytearray will automatically deallocate the buffer
108 init(std::slice::from_raw_parts_mut(buffer, len)).map(|_| pybytes)
109 }
110 }
111
112 /// Creates a new Python `bytes` object using a writer closure.
113 ///
114 /// This function allocates a Python `bytes` object with at least `reserved_capacity` bytes of capacity,
115 /// then provides a mutable writer to the closure `write`. The closure can write any number of bytes,
116 /// even more than the reserved capacity; the buffer will grow dynamically as needed.
117 ///
118 /// If `reserved_capacity` is 0, the buffer will start empty and grow as the writer writes data.
119 ///
120 /// After the closure returns, the resulting bytes object contains the written data.
121 ///
122 /// # Example
123 ///
124 /// ```
125 /// use pyo3::{prelude::*, types::PyBytes};
126 /// use std::io::Write;
127 ///
128 /// # fn main() -> PyResult<()> {
129 /// Python::attach(|py| -> PyResult<()> {
130 /// let py_bytes = PyBytes::new_with_writer(py, 0, |writer| {
131 /// writer.write_all(b"hello world")?;
132 /// Ok(())
133 /// })?;
134 /// assert_eq!(py_bytes.as_bytes(), b"hello world");
135 /// Ok(())
136 /// })
137 /// # }
138 /// ```
139 #[inline]
140 pub fn new_with_writer<F>(
141 py: Python<'_>,
142 reserved_capacity: usize,
143 write: F,
144 ) -> PyResult<Bound<'_, PyBytes>>
145 where
146 F: FnOnce(&mut dyn Write) -> PyResult<()>,
147 {
148 let mut writer = PyBytesWriter::with_capacity(py, reserved_capacity)?;
149 write(&mut writer)?;
150 writer.try_into()
151 }
152
153 /// Creates a new Python byte string object from a raw pointer and length.
154 ///
155 /// Panics if out of memory.
156 ///
157 /// # Safety
158 ///
159 /// This function dereferences the raw pointer `ptr` as the
160 /// leading pointer of a slice of length `len`. [As with
161 /// `std::slice::from_raw_parts`, this is
162 /// unsafe](https://doc.rust-lang.org/std/slice/fn.from_raw_parts.html#safety).
163 pub unsafe fn from_ptr(py: Python<'_>, ptr: *const u8, len: usize) -> Bound<'_, PyBytes> {
164 unsafe {
165 ffi::PyBytes_FromStringAndSize(ptr.cast(), len as isize)
166 .assume_owned(py)
167 .cast_into_unchecked()
168 }
169 }
170}
171
172/// Implementation of functionality for [`PyBytes`].
173///
174/// These methods are defined for the `Bound<'py, PyBytes>` smart pointer, so to use method call
175/// syntax these methods are separated into a trait, because stable Rust does not yet support
176/// `arbitrary_self_types`.
177#[doc(alias = "PyBytes")]
178pub trait PyBytesMethods<'py>: crate::sealed::Sealed {
179 /// Gets the Python string as a byte slice.
180 fn as_bytes(&self) -> &[u8];
181}
182
183impl<'py> PyBytesMethods<'py> for Bound<'py, PyBytes> {
184 #[inline]
185 fn as_bytes(&self) -> &[u8] {
186 self.as_borrowed().as_bytes()
187 }
188}
189
190impl<'a> Borrowed<'a, '_, PyBytes> {
191 /// Gets the Python string as a byte slice.
192 #[allow(clippy::wrong_self_convention)]
193 pub(crate) fn as_bytes(self) -> &'a [u8] {
194 unsafe {
195 let buffer = ffi::PyBytes_AsString(self.as_ptr()) as *const u8;
196 let length = ffi::PyBytes_Size(self.as_ptr()) as usize;
197 debug_assert!(!buffer.is_null());
198 std::slice::from_raw_parts(buffer, length)
199 }
200 }
201}
202
203impl Py<PyBytes> {
204 /// Gets the Python bytes as a byte slice. Because Python bytes are
205 /// immutable, the result may be used for as long as the reference to
206 /// `self` is held, including when the GIL is released.
207 pub fn as_bytes<'a>(&'a self, py: Python<'_>) -> &'a [u8] {
208 self.bind_borrowed(py).as_bytes()
209 }
210}
211
212/// This is the same way [Vec] is indexed.
213impl<I: SliceIndex<[u8]>> Index<I> for Bound<'_, PyBytes> {
214 type Output = I::Output;
215
216 fn index(&self, index: I) -> &Self::Output {
217 &self.as_bytes()[index]
218 }
219}
220
221/// Compares whether the Python bytes object is equal to the [u8].
222///
223/// In some cases Python equality might be more appropriate; see the note on [`PyBytes`].
224impl PartialEq<[u8]> for Bound<'_, PyBytes> {
225 #[inline]
226 fn eq(&self, other: &[u8]) -> bool {
227 self.as_borrowed() == *other
228 }
229}
230
231/// Compares whether the Python bytes object is equal to the [u8].
232///
233/// In some cases Python equality might be more appropriate; see the note on [`PyBytes`].
234impl PartialEq<&'_ [u8]> for Bound<'_, PyBytes> {
235 #[inline]
236 fn eq(&self, other: &&[u8]) -> bool {
237 self.as_borrowed() == **other
238 }
239}
240
241/// Compares whether the Python bytes object is equal to the [u8].
242///
243/// In some cases Python equality might be more appropriate; see the note on [`PyBytes`].
244impl PartialEq<Bound<'_, PyBytes>> for [u8] {
245 #[inline]
246 fn eq(&self, other: &Bound<'_, PyBytes>) -> bool {
247 *self == other.as_borrowed()
248 }
249}
250
251/// Compares whether the Python bytes object is equal to the [u8].
252///
253/// In some cases Python equality might be more appropriate; see the note on [`PyBytes`].
254impl PartialEq<&'_ Bound<'_, PyBytes>> for [u8] {
255 #[inline]
256 fn eq(&self, other: &&Bound<'_, PyBytes>) -> bool {
257 *self == other.as_borrowed()
258 }
259}
260
261/// Compares whether the Python bytes object is equal to the [u8].
262///
263/// In some cases Python equality might be more appropriate; see the note on [`PyBytes`].
264impl PartialEq<Bound<'_, PyBytes>> for &'_ [u8] {
265 #[inline]
266 fn eq(&self, other: &Bound<'_, PyBytes>) -> bool {
267 **self == other.as_borrowed()
268 }
269}
270
271/// Compares whether the Python bytes object is equal to the [u8].
272///
273/// In some cases Python equality might be more appropriate; see the note on [`PyBytes`].
274impl PartialEq<[u8]> for &'_ Bound<'_, PyBytes> {
275 #[inline]
276 fn eq(&self, other: &[u8]) -> bool {
277 self.as_borrowed() == other
278 }
279}
280
281/// Compares whether the Python bytes object is equal to the [u8].
282///
283/// In some cases Python equality might be more appropriate; see the note on [`PyBytes`].
284impl PartialEq<[u8]> for Borrowed<'_, '_, PyBytes> {
285 #[inline]
286 fn eq(&self, other: &[u8]) -> bool {
287 self.as_bytes() == other
288 }
289}
290
291/// Compares whether the Python bytes object is equal to the [u8].
292///
293/// In some cases Python equality might be more appropriate; see the note on [`PyBytes`].
294impl PartialEq<&[u8]> for Borrowed<'_, '_, PyBytes> {
295 #[inline]
296 fn eq(&self, other: &&[u8]) -> bool {
297 *self == **other
298 }
299}
300
301/// Compares whether the Python bytes object is equal to the [u8].
302///
303/// In some cases Python equality might be more appropriate; see the note on [`PyBytes`].
304impl PartialEq<Borrowed<'_, '_, PyBytes>> for [u8] {
305 #[inline]
306 fn eq(&self, other: &Borrowed<'_, '_, PyBytes>) -> bool {
307 other == self
308 }
309}
310
311/// Compares whether the Python bytes object is equal to the [u8].
312///
313/// In some cases Python equality might be more appropriate; see the note on [`PyBytes`].
314impl PartialEq<Borrowed<'_, '_, PyBytes>> for &'_ [u8] {
315 #[inline]
316 fn eq(&self, other: &Borrowed<'_, '_, PyBytes>) -> bool {
317 other == self
318 }
319}
320
321impl<'a> AsRef<[u8]> for Borrowed<'a, '_, PyBytes> {
322 #[inline]
323 fn as_ref(&self) -> &'a [u8] {
324 self.as_bytes()
325 }
326}
327
328impl AsRef<[u8]> for Bound<'_, PyBytes> {
329 #[inline]
330 fn as_ref(&self) -> &[u8] {
331 self.as_bytes()
332 }
333}
334
335#[cfg(test)]
336mod tests {
337 use super::*;
338 use crate::types::PyAnyMethods as _;
339
340 #[test]
341 fn test_bytes_index() {
342 Python::attach(|py| {
343 let bytes = PyBytes::new(py, b"Hello World");
344 assert_eq!(bytes[1], b'e');
345 });
346 }
347
348 #[test]
349 fn test_bound_bytes_index() {
350 Python::attach(|py| {
351 let bytes = PyBytes::new(py, b"Hello World");
352 assert_eq!(bytes[1], b'e');
353
354 let bytes = &bytes;
355 assert_eq!(bytes[1], b'e');
356 });
357 }
358
359 #[test]
360 fn test_bytes_new_with() -> super::PyResult<()> {
361 Python::attach(|py| -> super::PyResult<()> {
362 let py_bytes = PyBytes::new_with(py, 10, |b: &mut [u8]| {
363 b.copy_from_slice(b"Hello Rust");
364 Ok(())
365 })?;
366 let bytes: &[u8] = py_bytes.extract()?;
367 assert_eq!(bytes, b"Hello Rust");
368 Ok(())
369 })
370 }
371
372 #[test]
373 fn test_bytes_new_with_zero_initialised() -> super::PyResult<()> {
374 Python::attach(|py| -> super::PyResult<()> {
375 let py_bytes = PyBytes::new_with(py, 10, |_b: &mut [u8]| Ok(()))?;
376 let bytes: &[u8] = py_bytes.extract()?;
377 assert_eq!(bytes, &[0; 10]);
378 Ok(())
379 })
380 }
381
382 #[test]
383 fn test_bytes_new_with_error() {
384 use crate::exceptions::PyValueError;
385 Python::attach(|py| {
386 let py_bytes_result = PyBytes::new_with(py, 10, |_b: &mut [u8]| {
387 Err(PyValueError::new_err("Hello Crustaceans!"))
388 });
389 assert!(py_bytes_result.is_err());
390 assert!(py_bytes_result
391 .err()
392 .unwrap()
393 .is_instance_of::<PyValueError>(py));
394 });
395 }
396
397 #[test]
398 fn test_comparisons() {
399 Python::attach(|py| {
400 let b = b"hello, world".as_slice();
401 let py_bytes = PyBytes::new(py, b);
402
403 assert_eq!(py_bytes, b"hello, world".as_slice());
404
405 assert_eq!(py_bytes, b);
406 assert_eq!(&py_bytes, b);
407 assert_eq!(b, py_bytes);
408 assert_eq!(b, &py_bytes);
409
410 assert_eq!(py_bytes, *b);
411 assert_eq!(&py_bytes, *b);
412 assert_eq!(*b, py_bytes);
413 assert_eq!(*b, &py_bytes);
414
415 let py_string = py_bytes.as_borrowed();
416
417 assert_eq!(py_string, b);
418 assert_eq!(&py_string, b);
419 assert_eq!(b, py_string);
420 assert_eq!(b, &py_string);
421
422 assert_eq!(py_string, *b);
423 assert_eq!(*b, py_string);
424 })
425 }
426
427 #[test]
428 #[cfg(not(Py_LIMITED_API))]
429 fn test_as_string() {
430 Python::attach(|py| {
431 let b = b"hello, world".as_slice();
432 let py_bytes = PyBytes::new(py, b);
433 unsafe {
434 assert_eq!(
435 ffi::PyBytes_AsString(py_bytes.as_ptr()) as *const std::ffi::c_char,
436 ffi::PyBytes_AS_STRING(py_bytes.as_ptr()) as *const std::ffi::c_char
437 );
438 }
439 })
440 }
441
442 #[test]
443 fn test_as_ref_slice() {
444 Python::attach(|py| {
445 let b = b"hello, world";
446 let py_bytes = PyBytes::new(py, b);
447 let ref_bound: &[u8] = py_bytes.as_ref();
448 assert_eq!(ref_bound, b);
449 let py_bytes_borrowed = py_bytes.as_borrowed();
450 let ref_borrowed: &[u8] = py_bytes_borrowed.as_ref();
451 assert_eq!(ref_borrowed, b);
452 })
453 }
454
455 #[test]
456 fn test_with_writer() {
457 Python::attach(|py| {
458 let bytes = PyBytes::new_with_writer(py, 0, |writer| {
459 writer.write_all(b"hallo")?;
460 Ok(())
461 })
462 .unwrap();
463
464 assert_eq!(bytes.as_bytes(), b"hallo");
465 })
466 }
467}