Source code for numcodecs.pcodec
from typing import Literal
from numcodecs.abc import Codec
from numcodecs.compat import ensure_contiguous_ndarray
from pcodec import ChunkConfig, DeltaSpec, ModeSpec, PagingSpec, standalone
DEFAULT_MAX_PAGE_N = 262144
[docs]
class PCodec(Codec):
"""
PCodec (or pco, pronounced "pico") losslessly compresses and decompresses
numerical sequences with high compression ratio and fast speed.
See `PCodec Repo <https://github.com/mwlon/pcodec>`_ for more information.
PCodec supports only the following numerical dtypes: uint16, uint32, uint64,
int16, int32, int64, float16, float32, and float64.
Parameters
----------
level : int
A compression level from 0-12, where 12 take the longest and compresses
the most.
mode_spec : {"auto", "classic"}
Configures whether Pcodec should try to infer the best "mode" or
structure of the data (e.g. approximate multiples of 0.1) to improve
compression ratio, or skip this step and just use the numbers as-is
(Classic mode). Note that the "try*" specs are not currently supported.
delta_spec : {"auto", "none", "try_consecutive", "try_lookback"}
Configures the delta encoding strategy. By default, uses "auto" which
will try to infer the best encoding order.
paging_spec : {"equal_pages_up_to"}
Configures the paging strategy. Only "equal_pages_up_to" is currently
supported.
delta_encoding_order : int or None
Explicit delta encoding level from 0-7. Only valid if delta_spec is
"try_consecutive" or "auto" (to support backwards compatibility with
older versions of this codec).
equal_pages_up_to : int
Divide the chunk into equal pages of up to this many numbers.
"""
codec_id = "pcodec"
def __init__(
self,
level: int = 8,
*,
mode_spec: Literal["auto", "classic"] = "auto",
delta_spec: Literal["auto", "none", "try_consecutive", "try_lookback"] = "auto",
paging_spec: Literal["equal_pages_up_to"] = "equal_pages_up_to",
delta_encoding_order: int | None = None,
equal_pages_up_to: int = DEFAULT_MAX_PAGE_N,
):
# note that we use `level` instead of `compression_level` to
# match other codecs
self.level = level
self.mode_spec = mode_spec
self.delta_spec = delta_spec
self.paging_spec = paging_spec
self.delta_encoding_order = delta_encoding_order
self.equal_pages_up_to = equal_pages_up_to
def _get_chunk_config(self):
match self.mode_spec:
case "auto":
mode_spec = ModeSpec.auto()
case "classic":
mode_spec = ModeSpec.classic()
case _:
raise ValueError(f"mode_spec {self.mode_spec} is not supported")
if self.delta_encoding_order is not None and self.delta_spec == "auto":
# backwards compat for before delta_spec was introduced
delta_spec = DeltaSpec.try_consecutive(self.delta_encoding_order)
elif self.delta_encoding_order is not None and self.delta_spec != "try_consecutive":
raise ValueError(
"delta_encoding_order can only be set for delta_spec='try_consecutive'"
)
else:
match self.delta_spec:
case "auto":
delta_spec = DeltaSpec.auto()
case "none":
delta_spec = DeltaSpec.none()
case "try_consecutive":
delta_spec = DeltaSpec.try_consecutive(self.delta_encoding_order)
case "try_lookback":
delta_spec = DeltaSpec.try_lookback()
case _:
raise ValueError(f"delta_spec {self.delta_spec} is not supported")
match self.paging_spec:
case "equal_pages_up_to":
paging_spec = PagingSpec.equal_pages_up_to(self.equal_pages_up_to)
case _:
raise ValueError(f"paging_spec {self.paging_spec} is not supported")
return ChunkConfig(
compression_level=self.level,
delta_spec=delta_spec,
mode_spec=mode_spec,
paging_spec=paging_spec,
)
[docs]
def encode(self, buf):
buf = ensure_contiguous_ndarray(buf)
config = self._get_chunk_config()
return standalone.simple_compress(buf, config)
[docs]
def decode(self, buf, out=None):
if out is not None:
out = ensure_contiguous_ndarray(out)
standalone.simple_decompress_into(buf, out)
return out
else:
return standalone.simple_decompress(buf)