Source code for numcodecs.bitround

import numpy as np


from .abc import Codec
from .compat import ensure_ndarray_like, ndarray_copy

# The size in bits of the mantissa/significand for the various floating types
# You cannot keep more bits of data than you have available
# https://en.wikipedia.org/wiki/IEEE_754
max_bits = {
    "float16": 10,
    "float32": 23,
    "float64": 52,
}


[docs]class BitRound(Codec): """Floating-point bit rounding codec Drops a specified number of bits from the floating point mantissa, leaving an array more amenable to compression. The number of bits to keep should be determined by an information analysis of the data to be compressed. The approach is based on the paper by Klöwer et al. 2021 (https://www.nature.com/articles/s43588-021-00156-2). See https://github.com/zarr-developers/numcodecs/issues/298 for discussion and the original implementation in Julia referred to at https://github.com/milankl/BitInformation.jl Parameters ---------- keepbits: int The number of bits of the mantissa to keep. The range allowed depends on the dtype input data. If keepbits is equal to the maximum allowed for the data type, this is equivalent to no transform. """ codec_id = 'bitround' def __init__(self, keepbits: int): if keepbits < 0: raise ValueError("keepbits must be zero or positive") self.keepbits = keepbits
[docs] def encode(self, buf): """Create int array by rounding floating-point data The itemsize will be preserved, but the output should be much more compressible. """ a = ensure_ndarray_like(buf) if not a.dtype.kind == "f" or a.dtype.itemsize > 8: raise TypeError("Only float arrays (16-64bit) can be bit-rounded") bits = max_bits[str(a.dtype)] # cast float to int type of same width (preserve endianness) a_int_dtype = np.dtype(a.dtype.str.replace("f", "i")) all_set = np.array(-1, dtype=a_int_dtype) if self.keepbits == bits: return a if self.keepbits > bits: raise ValueError("Keepbits too large for given dtype") b = a.view(a_int_dtype) maskbits = bits - self.keepbits mask = (all_set >> maskbits) << maskbits half_quantum1 = (1 << (maskbits - 1)) - 1 b += ((b >> maskbits) & 1) + half_quantum1 b &= mask return b
[docs] def decode(self, buf, out=None): """Remake floats from ints As with ``encode``, preserves itemsize. """ buf = ensure_ndarray_like(buf) # Cast back from `int` to `float` type (noop if a `float`ing type buffer is provided) dt = np.dtype(buf.dtype.str.replace("i", "f")) data = buf.view(dt) return ndarray_copy(data, out)