@@ -33,14 +33,23 @@ cdef extern from b"bitshuffle.h":
33
33
int block_size) nogil
34
34
int bshuf_decompress_lz4(void * A, void * B, int size, int elem_size,
35
35
int block_size) nogil
36
+ IF ZSTD_SUPPORT:
37
+ int bshuf_compress_zstd_bound(int size, int elem_size, int block_size)
38
+ int bshuf_compress_zstd(void * A, void * B, int size, int elem_size,
39
+ int block_size, const int comp_lvl) nogil
40
+ int bshuf_decompress_zstd(void * A, void * B, int size, int elem_size,
41
+ int block_size) nogil
36
42
int BSHUF_VERSION_MAJOR
37
43
int BSHUF_VERSION_MINOR
38
44
int BSHUF_VERSION_POINT
39
45
46
+ __version__ = " %d .%d .%d " % (BSHUF_VERSION_MAJOR, BSHUF_VERSION_MINOR,
47
+ BSHUF_VERSION_POINT)
40
48
41
- __version__ = str (" %d .%d .%d " ).format(BSHUF_VERSION_MAJOR, BSHUF_VERSION_MINOR,
42
- BSHUF_VERSION_POINT)
43
-
49
+ IF ZSTD_SUPPORT:
50
+ __zstd__ = True
51
+ ELSE :
52
+ __zstd__ = False
44
53
45
54
# Prototypes from bitshuffle.c
46
55
cdef extern int bshuf_copy(void * A, void * B, int size, int elem_size)
@@ -451,3 +460,110 @@ def decompress_lz4(np.ndarray arr not None, shape, dtype, int block_size=0):
451
460
return out
452
461
453
462
463
+ IF ZSTD_SUPPORT:
464
+ @ cython.boundscheck (False )
465
+ @ cython.wraparound (False )
466
+ def compress_zstd (np.ndarray arr not None , int block_size = 0 , int comp_lvl = 1 ):
467
+ """ Bitshuffle then compress an array using ZSTD.
468
+
469
+ Parameters
470
+ ----------
471
+ arr : numpy array
472
+ Data to be processed.
473
+ block_size : positive integer
474
+ Block size in number of elements. By default, block size is chosen
475
+ automatically.
476
+ comp_lvl : positive integer
477
+ Compression level applied by ZSTD
478
+
479
+ Returns
480
+ -------
481
+ out : array with np.uint8 data type
482
+ Buffer holding compressed data.
483
+
484
+ """
485
+
486
+ cdef int ii, size, itemsize, count= 0
487
+ shape = (arr.shape[i] for i in range (arr.ndim))
488
+ if not arr.flags[' C_CONTIGUOUS' ]:
489
+ msg = " Input array must be C-contiguous."
490
+ raise ValueError (msg)
491
+ size = arr.size
492
+ dtype = arr.dtype
493
+ itemsize = dtype.itemsize
494
+
495
+ max_out_size = bshuf_compress_zstd_bound(size, itemsize, block_size)
496
+
497
+ cdef np.ndarray out
498
+ out = np.empty(max_out_size, dtype = np.uint8)
499
+
500
+ cdef np.ndarray[dtype= np.uint8_t, ndim= 1 , mode= " c" ] arr_flat
501
+ arr_flat = arr.view(np.uint8).ravel()
502
+ cdef np.ndarray[dtype= np.uint8_t, ndim= 1 , mode= " c" ] out_flat
503
+ out_flat = out.view(np.uint8).ravel()
504
+ cdef void * arr_ptr = < void * > & arr_flat[0 ]
505
+ cdef void * out_ptr = < void * > & out_flat[0 ]
506
+ with nogil:
507
+ for ii in range (REPEATC):
508
+ count = bshuf_compress_zstd(arr_ptr, out_ptr, size, itemsize, block_size, comp_lvl)
509
+ if count < 0 :
510
+ msg = " Failed. Error code %d ."
511
+ excp = RuntimeError (msg % count, count)
512
+ raise excp
513
+ return out[:count]
514
+
515
+ @ cython.boundscheck (False )
516
+ @ cython.wraparound (False )
517
+ def decompress_zstd (np.ndarray arr not None , shape , dtype , int block_size = 0 ):
518
+ """ Decompress a buffer using ZSTD then bitunshuffle it yielding an array.
519
+
520
+ Parameters
521
+ ----------
522
+ arr : numpy array
523
+ Input data to be decompressed.
524
+ shape : tuple of integers
525
+ Shape of the output (decompressed array). Must match the shape of the
526
+ original data array before compression.
527
+ dtype : numpy dtype
528
+ Datatype of the output array. Must match the data type of the original
529
+ data array before compression.
530
+ block_size : positive integer
531
+ Block size in number of elements. Must match value used for
532
+ compression.
533
+
534
+ Returns
535
+ -------
536
+ out : numpy array with shape *shape* and data type *dtype*
537
+ Decompressed data.
538
+
539
+ """
540
+
541
+ cdef int ii, size, itemsize, count= 0
542
+ if not arr.flags[' C_CONTIGUOUS' ]:
543
+ msg = " Input array must be C-contiguous."
544
+ raise ValueError (msg)
545
+ size = np.prod(shape)
546
+ itemsize = dtype.itemsize
547
+
548
+ cdef np.ndarray out
549
+ out = np.empty(tuple (shape), dtype = dtype)
550
+
551
+ cdef np.ndarray[dtype= np.uint8_t, ndim= 1 , mode= " c" ] arr_flat
552
+ arr_flat = arr.view(np.uint8).ravel()
553
+ cdef np.ndarray[dtype= np.uint8_t, ndim= 1 , mode= " c" ] out_flat
554
+ out_flat = out.view(np.uint8).ravel()
555
+ cdef void * arr_ptr = < void * > & arr_flat[0 ]
556
+ cdef void * out_ptr = < void * > & out_flat[0 ]
557
+ with nogil:
558
+ for ii in range (REPEATC):
559
+ count = bshuf_decompress_zstd(arr_ptr, out_ptr, size, itemsize,
560
+ block_size)
561
+ if count < 0 :
562
+ msg = " Failed. Error code %d ."
563
+ excp = RuntimeError (msg % count, count)
564
+ raise excp
565
+ if count != arr.size:
566
+ msg = " Decompressed different number of bytes than input buffer size."
567
+ msg += " Input buffer %d , decompressed %d ." % (arr.size, count)
568
+ raise RuntimeError (msg, count)
569
+ return out
0 commit comments