Skip to main content

pyo3/types/
bytearray.rs

1use crate::err::{PyErr, PyResult};
2use crate::ffi_ptr_ext::FfiPtrExt;
3use crate::instance::{Borrowed, Bound};
4use crate::py_result_ext::PyResultExt;
5use crate::sync::critical_section::with_critical_section;
6use crate::{ffi, PyAny, Python};
7#[cfg(RustPython)]
8use crate::{
9    sync::PyOnceLock,
10    types::{PyType, PyTypeMethods},
11    Py,
12};
13use core::slice;
14
15/// Represents a Python `bytearray`.
16///
17/// Values of this type are accessed via PyO3's smart pointers, e.g. as
18/// [`Py<PyByteArray>`][crate::Py] or [`Bound<'py, PyByteArray>`][Bound].
19///
20/// For APIs available on `bytearray` objects, see the [`PyByteArrayMethods`] trait which is implemented for
21/// [`Bound<'py, PyByteArray>`][Bound].
22#[repr(transparent)]
23pub struct PyByteArray(PyAny);
24
25#[cfg(not(RustPython))]
26pyobject_native_type_core!(PyByteArray, pyobject_native_static_type_object!(ffi::PyByteArray_Type), "builtins", "bytearray", #checkfunction=ffi::PyByteArray_Check);
27
28#[cfg(RustPython)]
29pyobject_native_type_core!(
30    PyByteArray,
31    |py| {
32        static TYPE: PyOnceLock<Py<PyType>> = PyOnceLock::new();
33        TYPE.import(py, "builtins", "bytearray").unwrap().as_type_ptr()
34    },
35    "builtins",
36    "bytearray",
37    #checkfunction=ffi::PyByteArray_Check
38);
39
40impl PyByteArray {
41    /// Creates a new Python bytearray object.
42    ///
43    /// The byte string is initialized by copying the data from the `&[u8]`.
44    pub fn new<'py>(py: Python<'py>, src: &[u8]) -> Bound<'py, PyByteArray> {
45        let ptr = src.as_ptr().cast();
46        let len = src.len() as ffi::Py_ssize_t;
47        unsafe {
48            ffi::PyByteArray_FromStringAndSize(ptr, len)
49                .assume_owned(py)
50                .cast_into_unchecked()
51        }
52    }
53
54    /// Creates a new Python `bytearray` object with an `init` closure to write its contents.
55    /// Before calling `init` the bytearray is zero-initialised.
56    /// * If Python raises a MemoryError on the allocation, `new_with` will return
57    ///   it inside `Err`.
58    /// * If `init` returns `Err(e)`, `new_with` will return `Err(e)`.
59    /// * If `init` returns `Ok(())`, `new_with` will return `Ok(&PyByteArray)`.
60    ///
61    /// # Examples
62    ///
63    /// ```
64    /// use pyo3::{prelude::*, types::PyByteArray};
65    ///
66    /// # fn main() -> PyResult<()> {
67    /// Python::attach(|py| -> PyResult<()> {
68    ///     let py_bytearray = PyByteArray::new_with(py, 10, |bytes: &mut [u8]| {
69    ///         bytes.copy_from_slice(b"Hello Rust");
70    ///         Ok(())
71    ///     })?;
72    ///     let bytearray: &[u8] = unsafe { py_bytearray.as_bytes() };
73    ///     assert_eq!(bytearray, b"Hello Rust");
74    ///     Ok(())
75    /// })
76    /// # }
77    /// ```
78    pub fn new_with<F>(py: Python<'_>, len: usize, init: F) -> PyResult<Bound<'_, PyByteArray>>
79    where
80        F: FnOnce(&mut [u8]) -> PyResult<()>,
81    {
82        unsafe {
83            // Allocate buffer and check for an error
84            let pybytearray: Bound<'_, Self> =
85                ffi::PyByteArray_FromStringAndSize(core::ptr::null(), len as ffi::Py_ssize_t)
86                    .assume_owned_or_err(py)?
87                    .cast_into_unchecked();
88
89            let buffer: *mut u8 = ffi::PyByteArray_AsString(pybytearray.as_ptr()).cast();
90            debug_assert!(!buffer.is_null());
91            // Zero-initialise the uninitialised bytearray
92            core::ptr::write_bytes(buffer, 0u8, len);
93            // (Further) Initialise the bytearray in init
94            // If init returns an Err, pypybytearray will automatically deallocate the buffer
95            init(core::slice::from_raw_parts_mut(buffer, len)).map(|_| pybytearray)
96        }
97    }
98
99    /// Creates a new Python `bytearray` object from another Python object that
100    /// implements the buffer protocol.
101    pub fn from<'py>(src: &Bound<'py, PyAny>) -> PyResult<Bound<'py, PyByteArray>> {
102        unsafe {
103            ffi::PyByteArray_FromObject(src.as_ptr())
104                .assume_owned_or_err(src.py())
105                .cast_into_unchecked()
106        }
107    }
108}
109
110/// Implementation of functionality for [`PyByteArray`].
111///
112/// These methods are defined for the `Bound<'py, PyByteArray>` smart pointer, so to use method call
113/// syntax these methods are separated into a trait, because stable Rust does not yet support
114/// `arbitrary_self_types`.
115#[doc(alias = "PyByteArray")]
116pub trait PyByteArrayMethods<'py>: crate::sealed::Sealed {
117    /// Gets the length of the bytearray.
118    fn len(&self) -> usize;
119
120    /// Checks if the bytearray is empty.
121    fn is_empty(&self) -> bool;
122
123    /// Gets the start of the buffer containing the contents of the bytearray.
124    ///
125    /// # Safety
126    ///
127    /// See the safety requirements of [`PyByteArrayMethods::as_bytes`] and [`PyByteArrayMethods::as_bytes_mut`].
128    fn data(&self) -> *mut u8;
129
130    /// Extracts a slice of the `ByteArray`'s entire buffer.
131    ///
132    /// # Safety
133    ///
134    /// Mutation of the `bytearray` invalidates the slice. If it is used afterwards, the behavior is
135    /// undefined.
136    ///
137    /// These mutations may occur in Python code as well as from Rust:
138    /// - Calling methods like [`PyByteArrayMethods::as_bytes_mut`] and [`PyByteArrayMethods::resize`] will
139    ///   invalidate the slice.
140    /// - Actions like dropping objects or raising exceptions can invoke `__del__`methods or signal
141    ///   handlers, which may execute arbitrary Python code. This means that if Python code has a
142    ///   reference to the `bytearray` you cannot safely use the vast majority of PyO3's API whilst
143    ///   using the slice.
144    ///
145    /// As a result, this slice should only be used for short-lived operations without executing any
146    /// Python code, such as copying into a Vec.
147    /// For free-threaded Python support see also [`with_critical_section`].
148    ///
149    /// # Examples
150    ///
151    /// ```rust
152    /// use pyo3::prelude::*;
153    /// use pyo3::exceptions::PyRuntimeError;
154    /// use pyo3::sync::critical_section::with_critical_section;
155    /// use pyo3::types::PyByteArray;
156    ///
157    /// #[pyfunction]
158    /// fn a_valid_function(bytes: &Bound<'_, PyByteArray>) -> PyResult<()> {
159    ///     let section = with_critical_section(bytes, || {
160    ///         // SAFETY: We promise to not let the interpreter regain control over the bytearray
161    ///         // or invoke any PyO3 APIs while using the slice.
162    ///         let slice = unsafe { bytes.as_bytes() };
163    ///
164    ///         // Copy only a section of `bytes` while avoiding
165    ///         // `to_vec` which copies the entire thing.
166    ///         slice.get(6..11)
167    ///             .map(Vec::from)
168    ///             .ok_or_else(|| PyRuntimeError::new_err("input is not long enough"))
169    ///     })?;
170    ///
171    ///     // Now we can do things with `section` and call PyO3 APIs again.
172    ///     // ...
173    ///     # assert_eq!(&section, b"world");
174    ///
175    ///     Ok(())
176    /// }
177    /// # fn main() -> PyResult<()> {
178    /// #     Python::attach(|py| -> PyResult<()> {
179    /// #         let fun = wrap_pyfunction!(a_valid_function, py)?;
180    /// #         let locals = pyo3::types::PyDict::new(py);
181    /// #         locals.set_item("a_valid_function", fun)?;
182    /// #
183    /// #         py.run(cr#"b = bytearray(b"hello world")
184    /// # a_valid_function(b)
185    /// #
186    /// # try:
187    /// #     a_valid_function(bytearray())
188    /// # except RuntimeError as e:
189    /// #     assert str(e) == 'input is not long enough'"#,
190    /// #             None,
191    /// #             Some(&locals),
192    /// #         )?;
193    /// #
194    /// #         Ok(())
195    /// #     })
196    /// # }
197    /// ```
198    ///
199    /// # Incorrect usage
200    ///
201    /// The following `bug` function is unsound ⚠️
202    ///
203    /// ```rust,no_run
204    /// # use pyo3::prelude::*;
205    /// # use pyo3::types::PyByteArray;
206    ///
207    /// # #[allow(dead_code)]
208    /// #[pyfunction]
209    /// fn bug(py: Python<'_>, bytes: &Bound<'_, PyByteArray>) {
210    ///     // No critical section is being used.
211    ///     // This means that for free-threaded Python another thread could be modifying the
212    ///     // bytearray concurrently and thus invalidate `slice` any time.
213    ///     let slice = unsafe { bytes.as_bytes() };
214    ///
215    ///     // This explicitly yields control back to the Python interpreter...
216    ///     // ...but it's not always this obvious. Many things do this implicitly.
217    ///     py.detach(|| {
218    ///         // Python code could be mutating through its handle to `bytes`,
219    ///         // which makes reading it a data race, which is undefined behavior.
220    ///         println!("{:?}", slice[0]);
221    ///     });
222    ///
223    ///     // Python code might have mutated it, so we can not rely on the slice
224    ///     // remaining valid. As such this is also undefined behavior.
225    ///     println!("{:?}", slice[0]);
226    /// }
227    /// ```
228    unsafe fn as_bytes(&self) -> &[u8];
229
230    /// Extracts a mutable slice of the `ByteArray`'s entire buffer.
231    ///
232    /// # Safety
233    ///
234    /// Any other accesses of the `bytearray`'s buffer invalidate the slice. If it is used
235    /// afterwards, the behavior is undefined. The safety requirements of [`PyByteArrayMethods::as_bytes`]
236    /// apply to this function as well.
237    #[expect(clippy::mut_from_ref)]
238    unsafe fn as_bytes_mut(&self) -> &mut [u8];
239
240    /// Copies the contents of the bytearray to a Rust vector.
241    ///
242    /// # Examples
243    ///
244    /// ```
245    /// # use pyo3::prelude::*;
246    /// # use pyo3::types::PyByteArray;
247    /// # Python::attach(|py| {
248    /// let bytearray = PyByteArray::new(py, b"Hello World.");
249    /// let mut copied_message = bytearray.to_vec();
250    /// assert_eq!(b"Hello World.", copied_message.as_slice());
251    ///
252    /// copied_message[11] = b'!';
253    /// assert_eq!(b"Hello World!", copied_message.as_slice());
254    ///
255    /// pyo3::py_run!(py, bytearray, "assert bytearray == b'Hello World.'");
256    /// # });
257    /// ```
258    fn to_vec(&self) -> Vec<u8>;
259
260    /// Resizes the bytearray object to the new length `len`.
261    ///
262    /// Note that this will invalidate any pointers obtained by [PyByteArrayMethods::data], as well as
263    /// any (unsafe) slices obtained from [PyByteArrayMethods::as_bytes] and [PyByteArrayMethods::as_bytes_mut].
264    fn resize(&self, len: usize) -> PyResult<()>;
265}
266
267impl<'py> PyByteArrayMethods<'py> for Bound<'py, PyByteArray> {
268    #[inline]
269    fn len(&self) -> usize {
270        // non-negative Py_ssize_t should always fit into Rust usize
271        unsafe { ffi::PyByteArray_Size(self.as_ptr()) as usize }
272    }
273
274    fn is_empty(&self) -> bool {
275        self.len() == 0
276    }
277
278    fn data(&self) -> *mut u8 {
279        self.as_borrowed().data()
280    }
281
282    unsafe fn as_bytes(&self) -> &[u8] {
283        unsafe { self.as_borrowed().as_bytes() }
284    }
285
286    unsafe fn as_bytes_mut(&self) -> &mut [u8] {
287        unsafe { self.as_borrowed().as_bytes_mut() }
288    }
289
290    fn to_vec(&self) -> Vec<u8> {
291        with_critical_section(self, || {
292            // SAFETY:
293            //  * `self` is a `Bound` object, which guarantees that the Python GIL is held.
294            //  * For free-threaded Python, a critical section is used in lieu of the GIL.
295            //  * We don't interact with the interpreter
296            //  * We don't mutate the underlying slice
297            unsafe { self.as_bytes() }.to_vec()
298        })
299    }
300
301    fn resize(&self, len: usize) -> PyResult<()> {
302        unsafe {
303            let result = ffi::PyByteArray_Resize(self.as_ptr(), len as ffi::Py_ssize_t);
304            if result == 0 {
305                Ok(())
306            } else {
307                Err(PyErr::fetch(self.py()))
308            }
309        }
310    }
311}
312
313impl<'a> Borrowed<'a, '_, PyByteArray> {
314    fn data(&self) -> *mut u8 {
315        unsafe { ffi::PyByteArray_AsString(self.as_ptr()).cast() }
316    }
317
318    pub(crate) unsafe fn as_bytes(self) -> &'a [u8] {
319        unsafe { slice::from_raw_parts(self.data(), self.len()) }
320    }
321
322    unsafe fn as_bytes_mut(self) -> &'a mut [u8] {
323        unsafe { slice::from_raw_parts_mut(self.data(), self.len()) }
324    }
325}
326
327impl<'py> TryFrom<&Bound<'py, PyAny>> for Bound<'py, PyByteArray> {
328    type Error = crate::PyErr;
329
330    /// Creates a new Python `bytearray` object from another Python object that
331    /// implements the buffer protocol.
332    fn try_from(value: &Bound<'py, PyAny>) -> Result<Self, Self::Error> {
333        PyByteArray::from(value)
334    }
335}
336
337#[cfg(test)]
338mod tests {
339    use crate::types::{PyAnyMethods, PyByteArray, PyByteArrayMethods};
340    use crate::{exceptions, Bound, Py, PyAny, Python};
341
342    #[test]
343    fn test_len() {
344        Python::attach(|py| {
345            let src = b"Hello Python";
346            let bytearray = PyByteArray::new(py, src);
347            assert_eq!(src.len(), bytearray.len());
348        });
349    }
350
351    #[test]
352    fn test_as_bytes() {
353        Python::attach(|py| {
354            let src = b"Hello Python";
355            let bytearray = PyByteArray::new(py, src);
356
357            let slice = unsafe { bytearray.as_bytes() };
358            assert_eq!(src, slice);
359            assert_eq!(bytearray.data() as *const _, slice.as_ptr());
360        });
361    }
362
363    #[test]
364    fn test_as_bytes_mut() {
365        Python::attach(|py| {
366            let src = b"Hello Python";
367            let bytearray = PyByteArray::new(py, src);
368
369            let slice = unsafe { bytearray.as_bytes_mut() };
370            assert_eq!(src, slice);
371            assert_eq!(bytearray.data(), slice.as_mut_ptr());
372
373            slice[0..5].copy_from_slice(b"Hi...");
374
375            assert_eq!(bytearray.str().unwrap(), "bytearray(b'Hi... Python')");
376        });
377    }
378
379    #[test]
380    fn test_to_vec() {
381        Python::attach(|py| {
382            let src = b"Hello Python";
383            let bytearray = PyByteArray::new(py, src);
384
385            let vec = bytearray.to_vec();
386            assert_eq!(src, vec.as_slice());
387        });
388    }
389
390    #[test]
391    fn test_from() {
392        Python::attach(|py| {
393            let src = b"Hello Python";
394            let bytearray = PyByteArray::new(py, src);
395
396            let ba: Py<PyAny> = bytearray.into();
397            let bytearray = PyByteArray::from(ba.bind(py)).unwrap();
398
399            assert_eq!(src, unsafe { bytearray.as_bytes() });
400        });
401    }
402
403    #[test]
404    fn test_from_err() {
405        Python::attach(|py| {
406            if let Err(err) = PyByteArray::from(py.None().bind(py)) {
407                assert!(err.is_instance_of::<exceptions::PyTypeError>(py));
408            } else {
409                panic!("error");
410            }
411        });
412    }
413
414    #[test]
415    fn test_try_from() {
416        Python::attach(|py| {
417            let src = b"Hello Python";
418            let bytearray: &Bound<'_, PyAny> = &PyByteArray::new(py, src);
419            let bytearray: Bound<'_, PyByteArray> = TryInto::try_into(bytearray).unwrap();
420
421            assert_eq!(src, unsafe { bytearray.as_bytes() });
422        });
423    }
424
425    #[test]
426    fn test_resize() {
427        Python::attach(|py| {
428            let src = b"Hello Python";
429            let bytearray = PyByteArray::new(py, src);
430
431            bytearray.resize(20).unwrap();
432            assert_eq!(20, bytearray.len());
433        });
434    }
435
436    #[test]
437    fn test_byte_array_new_with() -> super::PyResult<()> {
438        Python::attach(|py| -> super::PyResult<()> {
439            let py_bytearray = PyByteArray::new_with(py, 10, |b: &mut [u8]| {
440                b.copy_from_slice(b"Hello Rust");
441                Ok(())
442            })?;
443            let bytearray: &[u8] = unsafe { py_bytearray.as_bytes() };
444            assert_eq!(bytearray, b"Hello Rust");
445            Ok(())
446        })
447    }
448
449    #[test]
450    fn test_byte_array_new_with_zero_initialised() -> super::PyResult<()> {
451        Python::attach(|py| -> super::PyResult<()> {
452            let py_bytearray = PyByteArray::new_with(py, 10, |_b: &mut [u8]| Ok(()))?;
453            let bytearray: &[u8] = unsafe { py_bytearray.as_bytes() };
454            assert_eq!(bytearray, &[0; 10]);
455            Ok(())
456        })
457    }
458
459    #[test]
460    fn test_byte_array_new_with_error() {
461        use crate::exceptions::PyValueError;
462        Python::attach(|py| {
463            let py_bytearray_result = PyByteArray::new_with(py, 10, |_b: &mut [u8]| {
464                Err(PyValueError::new_err("Hello Crustaceans!"))
465            });
466            assert!(py_bytearray_result.is_err());
467            assert!(py_bytearray_result
468                .err()
469                .unwrap()
470                .is_instance_of::<PyValueError>(py));
471        })
472    }
473
474    // * wasm has no threading support
475    // * CPython 3.13t is unsound => test fails
476    #[cfg(all(
477        not(target_family = "wasm"),
478        any(Py_3_14, not(all(Py_3_13, Py_GIL_DISABLED)))
479    ))]
480    #[test]
481    fn test_data_integrity_in_critical_section() {
482        use crate::instance::Py;
483        use crate::sync::{critical_section::with_critical_section, MutexExt};
484
485        use core::sync::atomic::{AtomicBool, Ordering};
486        use core::time::Duration;
487        use std::sync::Mutex;
488        use std::thread;
489        use std::thread::ScopedJoinHandle;
490
491        const SIZE: usize = 1_000_000;
492        const DATA_VALUE: u8 = 42;
493
494        fn make_byte_array(py: Python<'_>, size: usize, value: u8) -> Bound<'_, PyByteArray> {
495            PyByteArray::new_with(py, size, |b| {
496                b.fill(value);
497                Ok(())
498            })
499            .unwrap()
500        }
501
502        let data: Mutex<Py<PyByteArray>> = Mutex::new(Python::attach(|py| {
503            make_byte_array(py, SIZE, DATA_VALUE).unbind()
504        }));
505
506        fn get_data<'py>(
507            data: &Mutex<Py<PyByteArray>>,
508            py: Python<'py>,
509        ) -> Bound<'py, PyByteArray> {
510            data.lock_py_attached(py).unwrap().bind(py).clone()
511        }
512
513        fn set_data(data: &Mutex<Py<PyByteArray>>, new: Bound<'_, PyByteArray>) {
514            let py = new.py();
515            *data.lock_py_attached(py).unwrap() = new.unbind()
516        }
517
518        let running = AtomicBool::new(true);
519        let extending = AtomicBool::new(false);
520
521        // continuously extends and resets the bytearray in data
522        let worker1 = || {
523            let mut rounds = 0;
524            while running.load(Ordering::SeqCst) && rounds < 50 {
525                Python::attach(|py| {
526                    let byte_array = get_data(&data, py);
527                    extending.store(true, Ordering::SeqCst);
528                    byte_array
529                        .call_method("extend", (&byte_array,), None)
530                        .unwrap();
531                    extending.store(false, Ordering::SeqCst);
532                    set_data(&data, make_byte_array(py, SIZE, DATA_VALUE));
533                    rounds += 1;
534                });
535            }
536        };
537
538        // continuously checks the integrity of bytearray in data
539        let worker2 = || {
540            while running.load(Ordering::SeqCst) {
541                if !extending.load(Ordering::SeqCst) {
542                    // wait until we have a chance to read inconsistent state
543                    continue;
544                }
545                Python::attach(|py| {
546                    let read = get_data(&data, py);
547                    if read.len() == SIZE {
548                        // extend is still not done => wait even more
549                        return;
550                    }
551                    with_critical_section(&read, || {
552                        // SAFETY: we are in a critical section
553                        // This is the whole point of the test: make sure that a
554                        // critical section is sufficient to ensure that the data
555                        // read is consistent.
556                        unsafe {
557                            let bytes = read.as_bytes();
558                            assert!(bytes.iter().rev().take(50).all(|v| *v == DATA_VALUE
559                                && bytes.iter().take(50).all(|v| *v == DATA_VALUE)));
560                        }
561                    });
562                });
563            }
564        };
565
566        thread::scope(|s| {
567            let mut handle1 = Some(s.spawn(worker1));
568            let mut handle2 = Some(s.spawn(worker2));
569            let mut handles = [&mut handle1, &mut handle2];
570
571            let t0 = std::time::Instant::now();
572            while t0.elapsed() < Duration::from_secs(1) {
573                for handle in &mut handles {
574                    if handle
575                        .as_ref()
576                        .map(ScopedJoinHandle::is_finished)
577                        .unwrap_or(false)
578                    {
579                        let res = handle.take().unwrap().join();
580                        if res.is_err() {
581                            running.store(false, Ordering::SeqCst);
582                        }
583                        res.unwrap();
584                    }
585                }
586                if handles.iter().any(|handle| handle.is_none()) {
587                    break;
588                }
589            }
590            running.store(false, Ordering::SeqCst);
591            for handle in &mut handles {
592                if let Some(handle) = handle.take() {
593                    handle.join().unwrap()
594                }
595            }
596        });
597    }
598}
⚠️ Internal Docs ⚠️ Not Public API 👉 Official Docs Here