Skip to main content

pyo3/types/
bytes.rs

1use crate::byteswriter::PyBytesWriter;
2use crate::ffi_ptr_ext::FfiPtrExt;
3use crate::instance::{Borrowed, Bound};
4use crate::{ffi, Py, PyAny, PyResult, Python};
5use std::io::Write;
6use std::ops::Index;
7use std::slice::SliceIndex;
8use std::str;
9
10/// Represents a Python `bytes` object.
11///
12/// This type is immutable.
13///
14/// Values of this type are accessed via PyO3's smart pointers, e.g. as
15/// [`Py<PyBytes>`][crate::Py] or [`Bound<'py, PyBytes>`][Bound].
16///
17/// For APIs available on `bytes` objects, see the [`PyBytesMethods`] trait which is implemented for
18/// [`Bound<'py, PyBytes>`][Bound].
19///
20/// # Equality
21///
22/// For convenience, [`Bound<'py, PyBytes>`][Bound] implements [`PartialEq<[u8]>`][PartialEq] to allow comparing the
23/// data in the Python bytes to a Rust `[u8]` byte slice.
24///
25/// This is not always the most appropriate way to compare Python bytes, as Python bytes subclasses
26/// may have different equality semantics. In situations where subclasses overriding equality might
27/// be relevant, use [`PyAnyMethods::eq`](crate::types::any::PyAnyMethods::eq), at cost of the
28/// additional overhead of a Python method call.
29///
30/// ```rust
31/// # use pyo3::prelude::*;
32/// use pyo3::types::PyBytes;
33///
34/// # Python::attach(|py| {
35/// let py_bytes = PyBytes::new(py, b"foo".as_slice());
36/// // via PartialEq<[u8]>
37/// assert_eq!(py_bytes, b"foo".as_slice());
38///
39/// // via Python equality
40/// let other = PyBytes::new(py, b"foo".as_slice());
41/// assert!(py_bytes.as_any().eq(other).unwrap());
42///
43/// // Note that `eq` will convert its argument to Python using `IntoPyObject`.
44/// // Byte collections are specialized, so that the following slice will indeed
45/// // convert into a `bytes` object and not a `list`:
46/// assert!(py_bytes.as_any().eq(b"foo".as_slice()).unwrap());
47/// # });
48/// ```
49#[repr(transparent)]
50pub struct PyBytes(PyAny);
51
52pyobject_native_type_core!(PyBytes, pyobject_native_static_type_object!(ffi::PyBytes_Type), "builtins", "bytes", #checkfunction=ffi::PyBytes_Check);
53
54impl PyBytes {
55    /// Creates a new Python bytestring object.
56    /// The bytestring is initialized by copying the data from the `&[u8]`.
57    ///
58    /// Panics if out of memory.
59    pub fn new<'p>(py: Python<'p>, s: &[u8]) -> Bound<'p, PyBytes> {
60        let ptr = s.as_ptr().cast();
61        let len = s.len() as ffi::Py_ssize_t;
62        unsafe {
63            ffi::PyBytes_FromStringAndSize(ptr, len)
64                .assume_owned(py)
65                .cast_into_unchecked()
66        }
67    }
68
69    /// Creates a new Python `bytes` object with an `init` closure to write its contents.
70    /// Before calling `init` the bytes' contents are zero-initialised.
71    /// * If Python raises a MemoryError on the allocation, `new_with` will return
72    ///   it inside `Err`.
73    /// * If `init` returns `Err(e)`, `new_with` will return `Err(e)`.
74    /// * If `init` returns `Ok(())`, `new_with` will return `Ok(&PyBytes)`.
75    ///
76    /// # Examples
77    ///
78    /// ```
79    /// use pyo3::{prelude::*, types::PyBytes};
80    ///
81    /// # fn main() -> PyResult<()> {
82    /// Python::attach(|py| -> PyResult<()> {
83    ///     let py_bytes = PyBytes::new_with(py, 10, |bytes: &mut [u8]| {
84    ///         bytes.copy_from_slice(b"Hello Rust");
85    ///         Ok(())
86    ///     })?;
87    ///     let bytes: &[u8] = py_bytes.extract()?;
88    ///     assert_eq!(bytes, b"Hello Rust");
89    ///     Ok(())
90    /// })
91    /// # }
92    /// ```
93    #[inline]
94    pub fn new_with<F>(py: Python<'_>, len: usize, init: F) -> PyResult<Bound<'_, PyBytes>>
95    where
96        F: FnOnce(&mut [u8]) -> PyResult<()>,
97    {
98        unsafe {
99            let pyptr = ffi::PyBytes_FromStringAndSize(std::ptr::null(), len as ffi::Py_ssize_t);
100            // Check for an allocation error and return it
101            let pybytes = pyptr.assume_owned_or_err(py)?.cast_into_unchecked();
102            let buffer: *mut u8 = ffi::PyBytes_AsString(pyptr).cast();
103            debug_assert!(!buffer.is_null());
104            // Zero-initialise the uninitialised bytestring
105            std::ptr::write_bytes(buffer, 0u8, len);
106            // (Further) Initialise the bytestring in init
107            // If init returns an Err, pypybytearray will automatically deallocate the buffer
108            init(std::slice::from_raw_parts_mut(buffer, len)).map(|_| pybytes)
109        }
110    }
111
112    /// Creates a new Python `bytes` object using a writer closure.
113    ///
114    /// This function allocates a Python `bytes` object with at least `reserved_capacity` bytes of capacity,
115    /// then provides a mutable writer to the closure `write`. The closure can write any number of bytes,
116    /// even more than the reserved capacity; the buffer will grow dynamically as needed.
117    ///
118    /// If `reserved_capacity` is 0, the buffer will start empty and grow as the writer writes data.
119    ///
120    /// After the closure returns, the resulting bytes object contains the written data.
121    ///
122    /// # Example
123    ///
124    /// ```
125    /// use pyo3::{prelude::*, types::PyBytes};
126    /// use std::io::Write;
127    ///
128    /// # fn main() -> PyResult<()> {
129    /// Python::attach(|py| -> PyResult<()> {
130    ///     let py_bytes = PyBytes::new_with_writer(py, 0, |writer| {
131    ///         writer.write_all(b"hello world")?;
132    ///         Ok(())
133    ///     })?;
134    ///     assert_eq!(py_bytes.as_bytes(), b"hello world");
135    ///     Ok(())
136    /// })
137    /// # }
138    /// ```
139    #[inline]
140    pub fn new_with_writer<F>(
141        py: Python<'_>,
142        reserved_capacity: usize,
143        write: F,
144    ) -> PyResult<Bound<'_, PyBytes>>
145    where
146        F: FnOnce(&mut dyn Write) -> PyResult<()>,
147    {
148        let mut writer = PyBytesWriter::with_capacity(py, reserved_capacity)?;
149        write(&mut writer)?;
150        writer.try_into()
151    }
152
153    /// Creates a new Python byte string object from a raw pointer and length.
154    ///
155    /// Panics if out of memory.
156    ///
157    /// # Safety
158    ///
159    /// This function dereferences the raw pointer `ptr` as the
160    /// leading pointer of a slice of length `len`. [As with
161    /// `std::slice::from_raw_parts`, this is
162    /// unsafe](https://doc.rust-lang.org/std/slice/fn.from_raw_parts.html#safety).
163    pub unsafe fn from_ptr(py: Python<'_>, ptr: *const u8, len: usize) -> Bound<'_, PyBytes> {
164        unsafe {
165            ffi::PyBytes_FromStringAndSize(ptr.cast(), len as isize)
166                .assume_owned(py)
167                .cast_into_unchecked()
168        }
169    }
170}
171
172/// Implementation of functionality for [`PyBytes`].
173///
174/// These methods are defined for the `Bound<'py, PyBytes>` smart pointer, so to use method call
175/// syntax these methods are separated into a trait, because stable Rust does not yet support
176/// `arbitrary_self_types`.
177#[doc(alias = "PyBytes")]
178pub trait PyBytesMethods<'py>: crate::sealed::Sealed {
179    /// Gets the Python string as a byte slice.
180    fn as_bytes(&self) -> &[u8];
181}
182
183impl<'py> PyBytesMethods<'py> for Bound<'py, PyBytes> {
184    #[inline]
185    fn as_bytes(&self) -> &[u8] {
186        self.as_borrowed().as_bytes()
187    }
188}
189
190impl<'a> Borrowed<'a, '_, PyBytes> {
191    /// Gets the Python string as a byte slice.
192    #[allow(clippy::wrong_self_convention)]
193    pub(crate) fn as_bytes(self) -> &'a [u8] {
194        unsafe {
195            let buffer = ffi::PyBytes_AsString(self.as_ptr()) as *const u8;
196            let length = ffi::PyBytes_Size(self.as_ptr()) as usize;
197            debug_assert!(!buffer.is_null());
198            std::slice::from_raw_parts(buffer, length)
199        }
200    }
201}
202
203impl Py<PyBytes> {
204    /// Gets the Python bytes as a byte slice. Because Python bytes are
205    /// immutable, the result may be used for as long as the reference to
206    /// `self` is held, including when the GIL is released.
207    pub fn as_bytes<'a>(&'a self, py: Python<'_>) -> &'a [u8] {
208        self.bind_borrowed(py).as_bytes()
209    }
210}
211
212/// This is the same way [Vec] is indexed.
213impl<I: SliceIndex<[u8]>> Index<I> for Bound<'_, PyBytes> {
214    type Output = I::Output;
215
216    fn index(&self, index: I) -> &Self::Output {
217        &self.as_bytes()[index]
218    }
219}
220
221/// Compares whether the Python bytes object is equal to the [u8].
222///
223/// In some cases Python equality might be more appropriate; see the note on [`PyBytes`].
224impl PartialEq<[u8]> for Bound<'_, PyBytes> {
225    #[inline]
226    fn eq(&self, other: &[u8]) -> bool {
227        self.as_borrowed() == *other
228    }
229}
230
231/// Compares whether the Python bytes object is equal to the [u8].
232///
233/// In some cases Python equality might be more appropriate; see the note on [`PyBytes`].
234impl PartialEq<&'_ [u8]> for Bound<'_, PyBytes> {
235    #[inline]
236    fn eq(&self, other: &&[u8]) -> bool {
237        self.as_borrowed() == **other
238    }
239}
240
241/// Compares whether the Python bytes object is equal to the [u8].
242///
243/// In some cases Python equality might be more appropriate; see the note on [`PyBytes`].
244impl PartialEq<Bound<'_, PyBytes>> for [u8] {
245    #[inline]
246    fn eq(&self, other: &Bound<'_, PyBytes>) -> bool {
247        *self == other.as_borrowed()
248    }
249}
250
251/// Compares whether the Python bytes object is equal to the [u8].
252///
253/// In some cases Python equality might be more appropriate; see the note on [`PyBytes`].
254impl PartialEq<&'_ Bound<'_, PyBytes>> for [u8] {
255    #[inline]
256    fn eq(&self, other: &&Bound<'_, PyBytes>) -> bool {
257        *self == other.as_borrowed()
258    }
259}
260
261/// Compares whether the Python bytes object is equal to the [u8].
262///
263/// In some cases Python equality might be more appropriate; see the note on [`PyBytes`].
264impl PartialEq<Bound<'_, PyBytes>> for &'_ [u8] {
265    #[inline]
266    fn eq(&self, other: &Bound<'_, PyBytes>) -> bool {
267        **self == other.as_borrowed()
268    }
269}
270
271/// Compares whether the Python bytes object is equal to the [u8].
272///
273/// In some cases Python equality might be more appropriate; see the note on [`PyBytes`].
274impl PartialEq<[u8]> for &'_ Bound<'_, PyBytes> {
275    #[inline]
276    fn eq(&self, other: &[u8]) -> bool {
277        self.as_borrowed() == other
278    }
279}
280
281/// Compares whether the Python bytes object is equal to the [u8].
282///
283/// In some cases Python equality might be more appropriate; see the note on [`PyBytes`].
284impl PartialEq<[u8]> for Borrowed<'_, '_, PyBytes> {
285    #[inline]
286    fn eq(&self, other: &[u8]) -> bool {
287        self.as_bytes() == other
288    }
289}
290
291/// Compares whether the Python bytes object is equal to the [u8].
292///
293/// In some cases Python equality might be more appropriate; see the note on [`PyBytes`].
294impl PartialEq<&[u8]> for Borrowed<'_, '_, PyBytes> {
295    #[inline]
296    fn eq(&self, other: &&[u8]) -> bool {
297        *self == **other
298    }
299}
300
301/// Compares whether the Python bytes object is equal to the [u8].
302///
303/// In some cases Python equality might be more appropriate; see the note on [`PyBytes`].
304impl PartialEq<Borrowed<'_, '_, PyBytes>> for [u8] {
305    #[inline]
306    fn eq(&self, other: &Borrowed<'_, '_, PyBytes>) -> bool {
307        other == self
308    }
309}
310
311/// Compares whether the Python bytes object is equal to the [u8].
312///
313/// In some cases Python equality might be more appropriate; see the note on [`PyBytes`].
314impl PartialEq<Borrowed<'_, '_, PyBytes>> for &'_ [u8] {
315    #[inline]
316    fn eq(&self, other: &Borrowed<'_, '_, PyBytes>) -> bool {
317        other == self
318    }
319}
320
321impl<'a> AsRef<[u8]> for Borrowed<'a, '_, PyBytes> {
322    #[inline]
323    fn as_ref(&self) -> &'a [u8] {
324        self.as_bytes()
325    }
326}
327
328impl AsRef<[u8]> for Bound<'_, PyBytes> {
329    #[inline]
330    fn as_ref(&self) -> &[u8] {
331        self.as_bytes()
332    }
333}
334
335#[cfg(test)]
336mod tests {
337    use super::*;
338    use crate::types::PyAnyMethods as _;
339
340    #[test]
341    fn test_bytes_index() {
342        Python::attach(|py| {
343            let bytes = PyBytes::new(py, b"Hello World");
344            assert_eq!(bytes[1], b'e');
345        });
346    }
347
348    #[test]
349    fn test_bound_bytes_index() {
350        Python::attach(|py| {
351            let bytes = PyBytes::new(py, b"Hello World");
352            assert_eq!(bytes[1], b'e');
353
354            let bytes = &bytes;
355            assert_eq!(bytes[1], b'e');
356        });
357    }
358
359    #[test]
360    fn test_bytes_new_with() -> super::PyResult<()> {
361        Python::attach(|py| -> super::PyResult<()> {
362            let py_bytes = PyBytes::new_with(py, 10, |b: &mut [u8]| {
363                b.copy_from_slice(b"Hello Rust");
364                Ok(())
365            })?;
366            let bytes: &[u8] = py_bytes.extract()?;
367            assert_eq!(bytes, b"Hello Rust");
368            Ok(())
369        })
370    }
371
372    #[test]
373    fn test_bytes_new_with_zero_initialised() -> super::PyResult<()> {
374        Python::attach(|py| -> super::PyResult<()> {
375            let py_bytes = PyBytes::new_with(py, 10, |_b: &mut [u8]| Ok(()))?;
376            let bytes: &[u8] = py_bytes.extract()?;
377            assert_eq!(bytes, &[0; 10]);
378            Ok(())
379        })
380    }
381
382    #[test]
383    fn test_bytes_new_with_error() {
384        use crate::exceptions::PyValueError;
385        Python::attach(|py| {
386            let py_bytes_result = PyBytes::new_with(py, 10, |_b: &mut [u8]| {
387                Err(PyValueError::new_err("Hello Crustaceans!"))
388            });
389            assert!(py_bytes_result.is_err());
390            assert!(py_bytes_result
391                .err()
392                .unwrap()
393                .is_instance_of::<PyValueError>(py));
394        });
395    }
396
397    #[test]
398    fn test_comparisons() {
399        Python::attach(|py| {
400            let b = b"hello, world".as_slice();
401            let py_bytes = PyBytes::new(py, b);
402
403            assert_eq!(py_bytes, b"hello, world".as_slice());
404
405            assert_eq!(py_bytes, b);
406            assert_eq!(&py_bytes, b);
407            assert_eq!(b, py_bytes);
408            assert_eq!(b, &py_bytes);
409
410            assert_eq!(py_bytes, *b);
411            assert_eq!(&py_bytes, *b);
412            assert_eq!(*b, py_bytes);
413            assert_eq!(*b, &py_bytes);
414
415            let py_string = py_bytes.as_borrowed();
416
417            assert_eq!(py_string, b);
418            assert_eq!(&py_string, b);
419            assert_eq!(b, py_string);
420            assert_eq!(b, &py_string);
421
422            assert_eq!(py_string, *b);
423            assert_eq!(*b, py_string);
424        })
425    }
426
427    #[test]
428    #[cfg(not(Py_LIMITED_API))]
429    fn test_as_string() {
430        Python::attach(|py| {
431            let b = b"hello, world".as_slice();
432            let py_bytes = PyBytes::new(py, b);
433            unsafe {
434                assert_eq!(
435                    ffi::PyBytes_AsString(py_bytes.as_ptr()) as *const std::ffi::c_char,
436                    ffi::PyBytes_AS_STRING(py_bytes.as_ptr()) as *const std::ffi::c_char
437                );
438            }
439        })
440    }
441
442    #[test]
443    fn test_as_ref_slice() {
444        Python::attach(|py| {
445            let b = b"hello, world";
446            let py_bytes = PyBytes::new(py, b);
447            let ref_bound: &[u8] = py_bytes.as_ref();
448            assert_eq!(ref_bound, b);
449            let py_bytes_borrowed = py_bytes.as_borrowed();
450            let ref_borrowed: &[u8] = py_bytes_borrowed.as_ref();
451            assert_eq!(ref_borrowed, b);
452        })
453    }
454
455    #[test]
456    fn test_with_writer() {
457        Python::attach(|py| {
458            let bytes = PyBytes::new_with_writer(py, 0, |writer| {
459                writer.write_all(b"hallo")?;
460                Ok(())
461            })
462            .unwrap();
463
464            assert_eq!(bytes.as_bytes(), b"hallo");
465        })
466    }
467}
⚠️ Internal Docs ⚠️ Not Public API 👉 Official Docs Here