pyo3/types/bytearray.rs
1use crate::err::{PyErr, PyResult};
2use crate::ffi_ptr_ext::FfiPtrExt;
3use crate::instance::{Borrowed, Bound};
4use crate::py_result_ext::PyResultExt;
5use crate::sync::critical_section::with_critical_section;
6use crate::{ffi, PyAny, Python};
7#[cfg(RustPython)]
8use crate::{
9 sync::PyOnceLock,
10 types::{PyType, PyTypeMethods},
11 Py,
12};
13use core::slice;
14
15/// Represents a Python `bytearray`.
16///
17/// Values of this type are accessed via PyO3's smart pointers, e.g. as
18/// [`Py<PyByteArray>`][crate::Py] or [`Bound<'py, PyByteArray>`][Bound].
19///
20/// For APIs available on `bytearray` objects, see the [`PyByteArrayMethods`] trait which is implemented for
21/// [`Bound<'py, PyByteArray>`][Bound].
22#[repr(transparent)]
23pub struct PyByteArray(PyAny);
24
25#[cfg(not(RustPython))]
26pyobject_native_type_core!(PyByteArray, pyobject_native_static_type_object!(ffi::PyByteArray_Type), "builtins", "bytearray", #checkfunction=ffi::PyByteArray_Check);
27
28#[cfg(RustPython)]
29pyobject_native_type_core!(
30 PyByteArray,
31 |py| {
32 static TYPE: PyOnceLock<Py<PyType>> = PyOnceLock::new();
33 TYPE.import(py, "builtins", "bytearray").unwrap().as_type_ptr()
34 },
35 "builtins",
36 "bytearray",
37 #checkfunction=ffi::PyByteArray_Check
38);
39
40impl PyByteArray {
41 /// Creates a new Python bytearray object.
42 ///
43 /// The byte string is initialized by copying the data from the `&[u8]`.
44 pub fn new<'py>(py: Python<'py>, src: &[u8]) -> Bound<'py, PyByteArray> {
45 let ptr = src.as_ptr().cast();
46 let len = src.len() as ffi::Py_ssize_t;
47 unsafe {
48 ffi::PyByteArray_FromStringAndSize(ptr, len)
49 .assume_owned(py)
50 .cast_into_unchecked()
51 }
52 }
53
54 /// Creates a new Python `bytearray` object with an `init` closure to write its contents.
55 /// Before calling `init` the bytearray is zero-initialised.
56 /// * If Python raises a MemoryError on the allocation, `new_with` will return
57 /// it inside `Err`.
58 /// * If `init` returns `Err(e)`, `new_with` will return `Err(e)`.
59 /// * If `init` returns `Ok(())`, `new_with` will return `Ok(&PyByteArray)`.
60 ///
61 /// # Examples
62 ///
63 /// ```
64 /// use pyo3::{prelude::*, types::PyByteArray};
65 ///
66 /// # fn main() -> PyResult<()> {
67 /// Python::attach(|py| -> PyResult<()> {
68 /// let py_bytearray = PyByteArray::new_with(py, 10, |bytes: &mut [u8]| {
69 /// bytes.copy_from_slice(b"Hello Rust");
70 /// Ok(())
71 /// })?;
72 /// let bytearray: &[u8] = unsafe { py_bytearray.as_bytes() };
73 /// assert_eq!(bytearray, b"Hello Rust");
74 /// Ok(())
75 /// })
76 /// # }
77 /// ```
78 pub fn new_with<F>(py: Python<'_>, len: usize, init: F) -> PyResult<Bound<'_, PyByteArray>>
79 where
80 F: FnOnce(&mut [u8]) -> PyResult<()>,
81 {
82 unsafe {
83 // Allocate buffer and check for an error
84 let pybytearray: Bound<'_, Self> =
85 ffi::PyByteArray_FromStringAndSize(core::ptr::null(), len as ffi::Py_ssize_t)
86 .assume_owned_or_err(py)?
87 .cast_into_unchecked();
88
89 let buffer: *mut u8 = ffi::PyByteArray_AsString(pybytearray.as_ptr()).cast();
90 debug_assert!(!buffer.is_null());
91 // Zero-initialise the uninitialised bytearray
92 core::ptr::write_bytes(buffer, 0u8, len);
93 // (Further) Initialise the bytearray in init
94 // If init returns an Err, pypybytearray will automatically deallocate the buffer
95 init(core::slice::from_raw_parts_mut(buffer, len)).map(|_| pybytearray)
96 }
97 }
98
99 /// Creates a new Python `bytearray` object from another Python object that
100 /// implements the buffer protocol.
101 pub fn from<'py>(src: &Bound<'py, PyAny>) -> PyResult<Bound<'py, PyByteArray>> {
102 unsafe {
103 ffi::PyByteArray_FromObject(src.as_ptr())
104 .assume_owned_or_err(src.py())
105 .cast_into_unchecked()
106 }
107 }
108}
109
110/// Implementation of functionality for [`PyByteArray`].
111///
112/// These methods are defined for the `Bound<'py, PyByteArray>` smart pointer, so to use method call
113/// syntax these methods are separated into a trait, because stable Rust does not yet support
114/// `arbitrary_self_types`.
115#[doc(alias = "PyByteArray")]
116pub trait PyByteArrayMethods<'py>: crate::sealed::Sealed {
117 /// Gets the length of the bytearray.
118 fn len(&self) -> usize;
119
120 /// Checks if the bytearray is empty.
121 fn is_empty(&self) -> bool;
122
123 /// Gets the start of the buffer containing the contents of the bytearray.
124 ///
125 /// # Safety
126 ///
127 /// See the safety requirements of [`PyByteArrayMethods::as_bytes`] and [`PyByteArrayMethods::as_bytes_mut`].
128 fn data(&self) -> *mut u8;
129
130 /// Extracts a slice of the `ByteArray`'s entire buffer.
131 ///
132 /// # Safety
133 ///
134 /// Mutation of the `bytearray` invalidates the slice. If it is used afterwards, the behavior is
135 /// undefined.
136 ///
137 /// These mutations may occur in Python code as well as from Rust:
138 /// - Calling methods like [`PyByteArrayMethods::as_bytes_mut`] and [`PyByteArrayMethods::resize`] will
139 /// invalidate the slice.
140 /// - Actions like dropping objects or raising exceptions can invoke `__del__`methods or signal
141 /// handlers, which may execute arbitrary Python code. This means that if Python code has a
142 /// reference to the `bytearray` you cannot safely use the vast majority of PyO3's API whilst
143 /// using the slice.
144 ///
145 /// As a result, this slice should only be used for short-lived operations without executing any
146 /// Python code, such as copying into a Vec.
147 /// For free-threaded Python support see also [`with_critical_section`].
148 ///
149 /// # Examples
150 ///
151 /// ```rust
152 /// use pyo3::prelude::*;
153 /// use pyo3::exceptions::PyRuntimeError;
154 /// use pyo3::sync::critical_section::with_critical_section;
155 /// use pyo3::types::PyByteArray;
156 ///
157 /// #[pyfunction]
158 /// fn a_valid_function(bytes: &Bound<'_, PyByteArray>) -> PyResult<()> {
159 /// let section = with_critical_section(bytes, || {
160 /// // SAFETY: We promise to not let the interpreter regain control over the bytearray
161 /// // or invoke any PyO3 APIs while using the slice.
162 /// let slice = unsafe { bytes.as_bytes() };
163 ///
164 /// // Copy only a section of `bytes` while avoiding
165 /// // `to_vec` which copies the entire thing.
166 /// slice.get(6..11)
167 /// .map(Vec::from)
168 /// .ok_or_else(|| PyRuntimeError::new_err("input is not long enough"))
169 /// })?;
170 ///
171 /// // Now we can do things with `section` and call PyO3 APIs again.
172 /// // ...
173 /// # assert_eq!(§ion, b"world");
174 ///
175 /// Ok(())
176 /// }
177 /// # fn main() -> PyResult<()> {
178 /// # Python::attach(|py| -> PyResult<()> {
179 /// # let fun = wrap_pyfunction!(a_valid_function, py)?;
180 /// # let locals = pyo3::types::PyDict::new(py);
181 /// # locals.set_item("a_valid_function", fun)?;
182 /// #
183 /// # py.run(cr#"b = bytearray(b"hello world")
184 /// # a_valid_function(b)
185 /// #
186 /// # try:
187 /// # a_valid_function(bytearray())
188 /// # except RuntimeError as e:
189 /// # assert str(e) == 'input is not long enough'"#,
190 /// # None,
191 /// # Some(&locals),
192 /// # )?;
193 /// #
194 /// # Ok(())
195 /// # })
196 /// # }
197 /// ```
198 ///
199 /// # Incorrect usage
200 ///
201 /// The following `bug` function is unsound ⚠️
202 ///
203 /// ```rust,no_run
204 /// # use pyo3::prelude::*;
205 /// # use pyo3::types::PyByteArray;
206 ///
207 /// # #[allow(dead_code)]
208 /// #[pyfunction]
209 /// fn bug(py: Python<'_>, bytes: &Bound<'_, PyByteArray>) {
210 /// // No critical section is being used.
211 /// // This means that for free-threaded Python another thread could be modifying the
212 /// // bytearray concurrently and thus invalidate `slice` any time.
213 /// let slice = unsafe { bytes.as_bytes() };
214 ///
215 /// // This explicitly yields control back to the Python interpreter...
216 /// // ...but it's not always this obvious. Many things do this implicitly.
217 /// py.detach(|| {
218 /// // Python code could be mutating through its handle to `bytes`,
219 /// // which makes reading it a data race, which is undefined behavior.
220 /// println!("{:?}", slice[0]);
221 /// });
222 ///
223 /// // Python code might have mutated it, so we can not rely on the slice
224 /// // remaining valid. As such this is also undefined behavior.
225 /// println!("{:?}", slice[0]);
226 /// }
227 /// ```
228 unsafe fn as_bytes(&self) -> &[u8];
229
230 /// Extracts a mutable slice of the `ByteArray`'s entire buffer.
231 ///
232 /// # Safety
233 ///
234 /// Any other accesses of the `bytearray`'s buffer invalidate the slice. If it is used
235 /// afterwards, the behavior is undefined. The safety requirements of [`PyByteArrayMethods::as_bytes`]
236 /// apply to this function as well.
237 #[expect(clippy::mut_from_ref)]
238 unsafe fn as_bytes_mut(&self) -> &mut [u8];
239
240 /// Copies the contents of the bytearray to a Rust vector.
241 ///
242 /// # Examples
243 ///
244 /// ```
245 /// # use pyo3::prelude::*;
246 /// # use pyo3::types::PyByteArray;
247 /// # Python::attach(|py| {
248 /// let bytearray = PyByteArray::new(py, b"Hello World.");
249 /// let mut copied_message = bytearray.to_vec();
250 /// assert_eq!(b"Hello World.", copied_message.as_slice());
251 ///
252 /// copied_message[11] = b'!';
253 /// assert_eq!(b"Hello World!", copied_message.as_slice());
254 ///
255 /// pyo3::py_run!(py, bytearray, "assert bytearray == b'Hello World.'");
256 /// # });
257 /// ```
258 fn to_vec(&self) -> Vec<u8>;
259
260 /// Resizes the bytearray object to the new length `len`.
261 ///
262 /// Note that this will invalidate any pointers obtained by [PyByteArrayMethods::data], as well as
263 /// any (unsafe) slices obtained from [PyByteArrayMethods::as_bytes] and [PyByteArrayMethods::as_bytes_mut].
264 fn resize(&self, len: usize) -> PyResult<()>;
265}
266
267impl<'py> PyByteArrayMethods<'py> for Bound<'py, PyByteArray> {
268 #[inline]
269 fn len(&self) -> usize {
270 // non-negative Py_ssize_t should always fit into Rust usize
271 unsafe { ffi::PyByteArray_Size(self.as_ptr()) as usize }
272 }
273
274 fn is_empty(&self) -> bool {
275 self.len() == 0
276 }
277
278 fn data(&self) -> *mut u8 {
279 self.as_borrowed().data()
280 }
281
282 unsafe fn as_bytes(&self) -> &[u8] {
283 unsafe { self.as_borrowed().as_bytes() }
284 }
285
286 unsafe fn as_bytes_mut(&self) -> &mut [u8] {
287 unsafe { self.as_borrowed().as_bytes_mut() }
288 }
289
290 fn to_vec(&self) -> Vec<u8> {
291 with_critical_section(self, || {
292 // SAFETY:
293 // * `self` is a `Bound` object, which guarantees that the Python GIL is held.
294 // * For free-threaded Python, a critical section is used in lieu of the GIL.
295 // * We don't interact with the interpreter
296 // * We don't mutate the underlying slice
297 unsafe { self.as_bytes() }.to_vec()
298 })
299 }
300
301 fn resize(&self, len: usize) -> PyResult<()> {
302 unsafe {
303 let result = ffi::PyByteArray_Resize(self.as_ptr(), len as ffi::Py_ssize_t);
304 if result == 0 {
305 Ok(())
306 } else {
307 Err(PyErr::fetch(self.py()))
308 }
309 }
310 }
311}
312
313impl<'a> Borrowed<'a, '_, PyByteArray> {
314 fn data(&self) -> *mut u8 {
315 unsafe { ffi::PyByteArray_AsString(self.as_ptr()).cast() }
316 }
317
318 pub(crate) unsafe fn as_bytes(self) -> &'a [u8] {
319 unsafe { slice::from_raw_parts(self.data(), self.len()) }
320 }
321
322 unsafe fn as_bytes_mut(self) -> &'a mut [u8] {
323 unsafe { slice::from_raw_parts_mut(self.data(), self.len()) }
324 }
325}
326
327impl<'py> TryFrom<&Bound<'py, PyAny>> for Bound<'py, PyByteArray> {
328 type Error = crate::PyErr;
329
330 /// Creates a new Python `bytearray` object from another Python object that
331 /// implements the buffer protocol.
332 fn try_from(value: &Bound<'py, PyAny>) -> Result<Self, Self::Error> {
333 PyByteArray::from(value)
334 }
335}
336
337#[cfg(test)]
338mod tests {
339 use crate::types::{PyAnyMethods, PyByteArray, PyByteArrayMethods};
340 use crate::{exceptions, Bound, Py, PyAny, Python};
341
342 #[test]
343 fn test_len() {
344 Python::attach(|py| {
345 let src = b"Hello Python";
346 let bytearray = PyByteArray::new(py, src);
347 assert_eq!(src.len(), bytearray.len());
348 });
349 }
350
351 #[test]
352 fn test_as_bytes() {
353 Python::attach(|py| {
354 let src = b"Hello Python";
355 let bytearray = PyByteArray::new(py, src);
356
357 let slice = unsafe { bytearray.as_bytes() };
358 assert_eq!(src, slice);
359 assert_eq!(bytearray.data() as *const _, slice.as_ptr());
360 });
361 }
362
363 #[test]
364 fn test_as_bytes_mut() {
365 Python::attach(|py| {
366 let src = b"Hello Python";
367 let bytearray = PyByteArray::new(py, src);
368
369 let slice = unsafe { bytearray.as_bytes_mut() };
370 assert_eq!(src, slice);
371 assert_eq!(bytearray.data(), slice.as_mut_ptr());
372
373 slice[0..5].copy_from_slice(b"Hi...");
374
375 assert_eq!(bytearray.str().unwrap(), "bytearray(b'Hi... Python')");
376 });
377 }
378
379 #[test]
380 fn test_to_vec() {
381 Python::attach(|py| {
382 let src = b"Hello Python";
383 let bytearray = PyByteArray::new(py, src);
384
385 let vec = bytearray.to_vec();
386 assert_eq!(src, vec.as_slice());
387 });
388 }
389
390 #[test]
391 fn test_from() {
392 Python::attach(|py| {
393 let src = b"Hello Python";
394 let bytearray = PyByteArray::new(py, src);
395
396 let ba: Py<PyAny> = bytearray.into();
397 let bytearray = PyByteArray::from(ba.bind(py)).unwrap();
398
399 assert_eq!(src, unsafe { bytearray.as_bytes() });
400 });
401 }
402
403 #[test]
404 fn test_from_err() {
405 Python::attach(|py| {
406 if let Err(err) = PyByteArray::from(py.None().bind(py)) {
407 assert!(err.is_instance_of::<exceptions::PyTypeError>(py));
408 } else {
409 panic!("error");
410 }
411 });
412 }
413
414 #[test]
415 fn test_try_from() {
416 Python::attach(|py| {
417 let src = b"Hello Python";
418 let bytearray: &Bound<'_, PyAny> = &PyByteArray::new(py, src);
419 let bytearray: Bound<'_, PyByteArray> = TryInto::try_into(bytearray).unwrap();
420
421 assert_eq!(src, unsafe { bytearray.as_bytes() });
422 });
423 }
424
425 #[test]
426 fn test_resize() {
427 Python::attach(|py| {
428 let src = b"Hello Python";
429 let bytearray = PyByteArray::new(py, src);
430
431 bytearray.resize(20).unwrap();
432 assert_eq!(20, bytearray.len());
433 });
434 }
435
436 #[test]
437 fn test_byte_array_new_with() -> super::PyResult<()> {
438 Python::attach(|py| -> super::PyResult<()> {
439 let py_bytearray = PyByteArray::new_with(py, 10, |b: &mut [u8]| {
440 b.copy_from_slice(b"Hello Rust");
441 Ok(())
442 })?;
443 let bytearray: &[u8] = unsafe { py_bytearray.as_bytes() };
444 assert_eq!(bytearray, b"Hello Rust");
445 Ok(())
446 })
447 }
448
449 #[test]
450 fn test_byte_array_new_with_zero_initialised() -> super::PyResult<()> {
451 Python::attach(|py| -> super::PyResult<()> {
452 let py_bytearray = PyByteArray::new_with(py, 10, |_b: &mut [u8]| Ok(()))?;
453 let bytearray: &[u8] = unsafe { py_bytearray.as_bytes() };
454 assert_eq!(bytearray, &[0; 10]);
455 Ok(())
456 })
457 }
458
459 #[test]
460 fn test_byte_array_new_with_error() {
461 use crate::exceptions::PyValueError;
462 Python::attach(|py| {
463 let py_bytearray_result = PyByteArray::new_with(py, 10, |_b: &mut [u8]| {
464 Err(PyValueError::new_err("Hello Crustaceans!"))
465 });
466 assert!(py_bytearray_result.is_err());
467 assert!(py_bytearray_result
468 .err()
469 .unwrap()
470 .is_instance_of::<PyValueError>(py));
471 })
472 }
473
474 // * wasm has no threading support
475 // * CPython 3.13t is unsound => test fails
476 #[cfg(all(
477 not(target_family = "wasm"),
478 any(Py_3_14, not(all(Py_3_13, Py_GIL_DISABLED)))
479 ))]
480 #[test]
481 fn test_data_integrity_in_critical_section() {
482 use crate::instance::Py;
483 use crate::sync::{critical_section::with_critical_section, MutexExt};
484
485 use core::sync::atomic::{AtomicBool, Ordering};
486 use core::time::Duration;
487 use std::sync::Mutex;
488 use std::thread;
489 use std::thread::ScopedJoinHandle;
490
491 const SIZE: usize = 1_000_000;
492 const DATA_VALUE: u8 = 42;
493
494 fn make_byte_array(py: Python<'_>, size: usize, value: u8) -> Bound<'_, PyByteArray> {
495 PyByteArray::new_with(py, size, |b| {
496 b.fill(value);
497 Ok(())
498 })
499 .unwrap()
500 }
501
502 let data: Mutex<Py<PyByteArray>> = Mutex::new(Python::attach(|py| {
503 make_byte_array(py, SIZE, DATA_VALUE).unbind()
504 }));
505
506 fn get_data<'py>(
507 data: &Mutex<Py<PyByteArray>>,
508 py: Python<'py>,
509 ) -> Bound<'py, PyByteArray> {
510 data.lock_py_attached(py).unwrap().bind(py).clone()
511 }
512
513 fn set_data(data: &Mutex<Py<PyByteArray>>, new: Bound<'_, PyByteArray>) {
514 let py = new.py();
515 *data.lock_py_attached(py).unwrap() = new.unbind()
516 }
517
518 let running = AtomicBool::new(true);
519 let extending = AtomicBool::new(false);
520
521 // continuously extends and resets the bytearray in data
522 let worker1 = || {
523 let mut rounds = 0;
524 while running.load(Ordering::SeqCst) && rounds < 50 {
525 Python::attach(|py| {
526 let byte_array = get_data(&data, py);
527 extending.store(true, Ordering::SeqCst);
528 byte_array
529 .call_method("extend", (&byte_array,), None)
530 .unwrap();
531 extending.store(false, Ordering::SeqCst);
532 set_data(&data, make_byte_array(py, SIZE, DATA_VALUE));
533 rounds += 1;
534 });
535 }
536 };
537
538 // continuously checks the integrity of bytearray in data
539 let worker2 = || {
540 while running.load(Ordering::SeqCst) {
541 if !extending.load(Ordering::SeqCst) {
542 // wait until we have a chance to read inconsistent state
543 continue;
544 }
545 Python::attach(|py| {
546 let read = get_data(&data, py);
547 if read.len() == SIZE {
548 // extend is still not done => wait even more
549 return;
550 }
551 with_critical_section(&read, || {
552 // SAFETY: we are in a critical section
553 // This is the whole point of the test: make sure that a
554 // critical section is sufficient to ensure that the data
555 // read is consistent.
556 unsafe {
557 let bytes = read.as_bytes();
558 assert!(bytes.iter().rev().take(50).all(|v| *v == DATA_VALUE
559 && bytes.iter().take(50).all(|v| *v == DATA_VALUE)));
560 }
561 });
562 });
563 }
564 };
565
566 thread::scope(|s| {
567 let mut handle1 = Some(s.spawn(worker1));
568 let mut handle2 = Some(s.spawn(worker2));
569 let mut handles = [&mut handle1, &mut handle2];
570
571 let t0 = std::time::Instant::now();
572 while t0.elapsed() < Duration::from_secs(1) {
573 for handle in &mut handles {
574 if handle
575 .as_ref()
576 .map(ScopedJoinHandle::is_finished)
577 .unwrap_or(false)
578 {
579 let res = handle.take().unwrap().join();
580 if res.is_err() {
581 running.store(false, Ordering::SeqCst);
582 }
583 res.unwrap();
584 }
585 }
586 if handles.iter().any(|handle| handle.is_none()) {
587 break;
588 }
589 }
590 running.store(false, Ordering::SeqCst);
591 for handle in &mut handles {
592 if let Some(handle) = handle.take() {
593 handle.join().unwrap()
594 }
595 }
596 });
597 }
598}