import numpy as np
from .abc import Codec
from .compat import ensure_ndarray, ndarray_copy
[docs]class Delta(Codec):
"""Codec to encode data as the difference between adjacent values.
Parameters
----------
dtype : dtype
Data type to use for decoded data.
astype : dtype, optional
Data type to use for encoded data.
Notes
-----
If `astype` is an integer data type, please ensure that it is
sufficiently large to store encoded values. No checks are made and data
may become corrupted due to integer overflow if `astype` is too small.
Note also that the encoded data for each chunk includes the absolute
value of the first element in the chunk, and so the encoded data type in
general needs to be large enough to store absolute values from the array.
Examples
--------
>>> import numcodecs
>>> import numpy as np
>>> x = np.arange(100, 120, 2, dtype='i8')
>>> codec = numcodecs.Delta(dtype='i8', astype='i1')
>>> y = codec.encode(x)
>>> y
array([100, 2, 2, 2, 2, 2, 2, 2, 2, 2], dtype=int8)
>>> z = codec.decode(y)
>>> z
array([100, 102, 104, 106, 108, 110, 112, 114, 116, 118])
"""
codec_id = 'delta'
def __init__(self, dtype, astype=None):
self.dtype = np.dtype(dtype)
if astype is None:
self.astype = self.dtype
else:
self.astype = np.dtype(astype)
if self.dtype == object or self.astype == object:
raise ValueError('object arrays are not supported')
[docs] def encode(self, buf):
# normalise input
arr = ensure_ndarray(buf).view(self.dtype)
# flatten to simplify implementation
arr = arr.reshape(-1, order='A')
# setup encoded output
enc = np.empty_like(arr, dtype=self.astype)
# set first element
enc[0] = arr[0]
# compute differences
enc[1:] = np.diff(arr)
return enc
[docs] def decode(self, buf, out=None):
# normalise input
enc = ensure_ndarray(buf).view(self.astype)
# flatten to simplify implementation
enc = enc.reshape(-1, order='A')
# setup decoded output
dec = np.empty_like(enc, dtype=self.dtype)
# decode differences
np.cumsum(enc, out=dec)
# handle output
out = ndarray_copy(dec, out)
return out
[docs] def get_config(self):
# override to handle encoding dtypes
return dict(
id=self.codec_id,
dtype=self.dtype.str,
astype=self.astype.str
)
def __repr__(self):
r = '{}(dtype={!r}'.format(type(self).__name__, self.dtype.str)
if self.astype != self.dtype:
r += ', astype=%r' % self.astype.str
r += ')'
return r