This commit is contained in:
Jonas Zeunert
2024-08-16 21:57:55 +02:00
parent adeb5c5ec7
commit 4309a2d185
1696 changed files with 279655 additions and 0 deletions

View File

@@ -0,0 +1,76 @@
# Copyright (c) 2008 - 2024, Ilan Schnell; All Rights Reserved
"""
This package defines an object type which can efficiently represent
a bitarray. Bitarrays are sequence types and behave very much like lists.
Please find a description of this package at:
https://github.com/ilanschnell/bitarray
Author: Ilan Schnell
"""
from __future__ import absolute_import
from bitarray._bitarray import (bitarray, decodetree, _sysinfo,
_bitarray_reconstructor,
get_default_endian, _set_default_endian,
__version__)
__all__ = ['bitarray', 'frozenbitarray', 'decodetree', 'bits2bytes']
class frozenbitarray(bitarray):
"""frozenbitarray(initializer=0, /, endian='big', buffer=None) -> \
frozenbitarray
Return a `frozenbitarray` object. Initialized the same way a `bitarray`
object is initialized. A `frozenbitarray` is immutable and hashable,
and may therefore be used as a dictionary key.
"""
def __init__(self, *args, **kwargs):
self._freeze()
def __repr__(self):
return 'frozen' + bitarray.__repr__(self)
def __hash__(self):
"Return hash(self)."
# ensure hash is independent of endianness
a = bitarray(self, 'big')
return hash((len(a), a.tobytes()))
# Technically the code below is not necessary, as all these methods will
# raise a TypeError on read-only memory. However, with a different error
# message.
def __delitem__(self, *args, **kwargs):
"" # no docstring
raise TypeError("frozenbitarray is immutable")
append = bytereverse = clear = extend = encode = fill = __delitem__
frombytes = fromfile = insert = invert = pack = pop = __delitem__
remove = reverse = setall = sort = __setitem__ = __delitem__
__iadd__ = __iand__ = __imul__ = __ior__ = __ixor__ = __delitem__
__ilshift__ = __irshift__ = __delitem__
def bits2bytes(__n):
"""bits2bytes(n, /) -> int
Return the number of bytes necessary to store n bits.
"""
import sys
if not isinstance(__n, (int, long) if sys.version_info[0] == 2 else int):
raise TypeError("integer expected")
if __n < 0:
raise ValueError("non-negative integer expected")
return (__n + 7) // 8
def test(verbosity=1):
"""test(verbosity=1) -> TextTestResult
Run self-test, and return unittest.runner.TextTestResult object.
"""
from bitarray import test_bitarray
return test_bitarray.run(verbosity=verbosity)

View File

@@ -0,0 +1,149 @@
# Copyright (c) 2021 - 2024, Ilan Schnell; All Rights Reserved
#
# This stub, as well as util.pyi, are tested with Python 3.9 and mypy 0.950
from collections.abc import Iterable, Iterator, Sequence
from unittest.runner import TextTestResult
from typing import Any, BinaryIO, Dict, Union, overload
CodeDict = Dict[Any, bitarray]
BytesLike = Union[bytes, Iterable[int]]
class decodetree:
def __init__(self, code: CodeDict) -> None: ...
def complete(self) -> bool: ...
def nodes(self) -> int: ...
def todict(self) -> CodeDict: ...
class bitarray:
def __init__(self,
initializer: Union[int, str, Iterable[int], None] = ...,
endian: Union[str, None] = ...,
buffer: Any = ...) -> None: ...
def all(self) -> bool: ...
def any(self) -> bool: ...
def append(self, value: int) -> None: ...
def buffer_info(self) -> tuple: ...
def bytereverse(self,
start: int = ...,
stop: int = ...) -> None: ...
def clear(self) -> None: ...
def copy(self) -> bitarray: ...
def count(self,
sub_bitarray: Union[bitarray, int] = ...,
start: int = ...,
stop: int = ...,
step: int = ...) -> int: ...
def decode(self, code: Union[CodeDict, decodetree]) -> list: ...
def encode(self, code: CodeDict, x: Iterable) -> None: ...
def endian(self) -> str: ...
def extend(self, x: Union[str, Iterable[int]]) -> None: ...
def fill(self) -> int: ...
def find(self,
sub_bitarray: Union[bitarray, int],
start: int = ...,
stop: int = ...,
right: int = ...) -> int: ...
def frombytes(self, a: BytesLike) -> None: ...
def fromfile(self, f: BinaryIO, n: int = ...) -> None: ...
def index(self,
sub_bitarray: Union[bitarray, int],
start: int = ...,
stop: int = ...,
right: int = ...) -> int: ...
def insert(self, i: int, value: int) -> None: ...
def invert(self, i: int = ...) -> None: ...
def iterdecode(self,
code: Union[CodeDict, decodetree]) -> Iterator: ...
def itersearch(self,
sub_bitarray: Union[bitarray, int],
start: int = ...,
stop: int = ...,
right: int = ...) -> Iterator[int]: ...
def pack(self, b: BytesLike) -> None: ...
def pop(self, i: int = ...) -> int: ...
def remove(self, value: int) -> None: ...
def reverse(self) -> None: ...
def search(self, sub_bitarray: Union[bitarray, int],
limit: int = ...) -> list[int]: ...
def setall(self, value: int) -> None: ...
def sort(self, reverse: int) -> None: ...
def to01(self) -> str: ...
def tobytes(self) -> bytes: ...
def tofile(self, f: BinaryIO) -> None: ...
def tolist(self) -> list[int]: ...
def unpack(self,
zero: bytes = ...,
one: bytes = ...) -> bytes: ...
def __len__(self) -> int: ...
def __iter__(self) -> Iterator[int]: ...
@overload
def __getitem__(self, i: int) -> int: ...
@overload
def __getitem__(self, s: Union[slice, Sequence]) -> bitarray: ...
@overload
def __setitem__(self, i: Union[int, slice, Sequence], o: int) -> None: ...
@overload
def __setitem__(self, s: Union[slice, Sequence] , o: bitarray) -> None: ...
def __delitem__(self, i: Union[int, slice, Sequence]) -> None: ...
def __add__(self, other: bitarray) -> bitarray: ...
def __iadd__(self, other: bitarray) -> bitarray: ...
def __mul__(self, n: int) -> bitarray: ...
def __imul__(self, n: int) -> bitarray: ...
def __rmul__(self, n: int) -> bitarray: ...
def __ge__(self, other: bitarray) -> bool: ...
def __gt__(self, other: bitarray) -> bool: ...
def __le__(self, other: bitarray) -> bool: ...
def __lt__(self, other: bitarray) -> bool: ...
def __and__(self, other: bitarray) -> bitarray: ...
def __or__(self, other: bitarray) -> bitarray: ...
def __xor__(self, other: bitarray) -> bitarray: ...
def __iand__(self, other: bitarray) -> bitarray: ...
def __ior__(self, other: bitarray) -> bitarray: ...
def __ixor__(self, other: bitarray) -> bitarray: ...
def __invert__(self) -> bitarray: ...
def __lshift__(self, n: int) -> bitarray: ...
def __rshift__(self, n: int) -> bitarray: ...
def __ilshift__(self, n: int) -> bitarray: ...
def __irshift__(self, n: int) -> bitarray: ...
# data descriptors
@property
def nbytes(self) -> int: ...
@property
def padbits(self) -> int: ...
@property
def readonly(self) -> bool: ...
class frozenbitarray(bitarray):
def __hash__(self) -> int: ...
__version__: str
def bits2bytes(n: int) -> int: ...
def get_default_endian() -> str: ...
def test(verbosity: int = ...) -> TextTestResult: ...
def _set_default_endian(endian: str) -> None: ...
def _sysinfo() -> tuple: ...
def _bitarray_reconstructor(cls: type,
buffer: bytes,
endian: str,
padbits: int,
readonly: int) -> bitarray: ...

View File

@@ -0,0 +1,336 @@
/*
Copyright (c) 2008 - 2024, Ilan Schnell; All Rights Reserved
bitarray is published under the PSF license.
Author: Ilan Schnell
*/
#define BITARRAY_VERSION "2.9.2"
#ifdef STDC_HEADERS
# include <stddef.h>
#else
# ifdef HAVE_SYS_TYPES_H
# include <sys/types.h> /* For size_t */
# endif
#endif
/* Compatibility with Visual Studio 2013 and older which don't support
the inline keyword in C (only in C++): use __inline instead.
(copied from pythoncapi_compat.h) */
#if (defined(_MSC_VER) && _MSC_VER < 1900 \
&& !defined(__cplusplus) && !defined(inline))
#define inline __inline
#endif
#ifdef _MSC_VER
#include <intrin.h> /* For _byteswap_uint64() */
#endif
/* --- definitions specific to Python --- */
/* Py_UNREACHABLE was introduced in Python 3.7 */
#ifndef Py_UNREACHABLE
#define Py_UNREACHABLE() abort()
#endif
#if PY_MAJOR_VERSION >= 3
#define IS_PY3K 1
#define BYTES_SIZE_FMT "y#"
#else
#define IS_PY3K 0
/* the Py_MIN and Py_MAX macros were introduced in Python 3.3 */
#define Py_MIN(x, y) (((x) > (y)) ? (y) : (x))
#define Py_MAX(x, y) (((x) > (y)) ? (x) : (y))
#define PySlice_GetIndicesEx(slice, len, start, stop, step, slicelength) \
PySlice_GetIndicesEx(((PySliceObject *) slice), \
(len), (start), (stop), (step), (slicelength))
#define PyLong_FromLong PyInt_FromLong
#define BYTES_SIZE_FMT "s#"
#endif
/* --- bitarrayobject --- */
/* .ob_size is buffer size (in bytes), not the number of elements.
The number of elements (bits) is .nbits. */
typedef struct {
PyObject_VAR_HEAD
char *ob_item; /* buffer */
Py_ssize_t allocated; /* allocated buffer size (in bytes) */
Py_ssize_t nbits; /* length of bitarray, i.e. elements */
int endian; /* bit-endianness of bitarray */
int ob_exports; /* how many buffer exports */
PyObject *weakreflist; /* list of weak references */
Py_buffer *buffer; /* used when importing a buffer */
int readonly; /* buffer is readonly */
} bitarrayobject;
/* --- bit-endianness --- */
#define ENDIAN_LITTLE 0
#define ENDIAN_BIG 1
#define IS_LE(self) ((self)->endian == ENDIAN_LITTLE)
#define IS_BE(self) ((self)->endian == ENDIAN_BIG)
/* endianness as string */
#define ENDIAN_STR(endian) ((endian) == ENDIAN_LITTLE ? "little" : "big")
/* number of pad bits */
#define PADBITS(self) (8 * Py_SIZE(self) - (self)->nbits)
/* number of bytes necessary to store given bits */
#define BYTES(bits) (((bits) + 7) >> 3)
/* we're not using bitmask_table here, as it is actually slower */
#define BITMASK(self, i) (((char) 1) << ((self)->endian == ENDIAN_LITTLE ? \
((i) % 8) : (7 - (i) % 8)))
/* buffer as uint64 array */
#define WBUFF(self) ((uint64_t *) (self)->ob_item)
/* assert that .nbits is in agreement with .ob_size */
#define assert_nbits(self) assert(BYTES((self)->nbits) == Py_SIZE(self))
/* assert byte index is in range */
#define assert_byte_in_range(self, j) \
assert(self->ob_item && 0 <= (j) && (j) < Py_SIZE(self))
/* ------------ low level access to bits in bitarrayobject ------------- */
static inline int
getbit(bitarrayobject *self, Py_ssize_t i)
{
assert_nbits(self);
assert(0 <= i && i < self->nbits);
return self->ob_item[i >> 3] & BITMASK(self, i) ? 1 : 0;
}
static inline void
setbit(bitarrayobject *self, Py_ssize_t i, int vi)
{
char *cp, mask;
assert_nbits(self);
assert(0 <= i && i < self->nbits);
assert(self->readonly == 0);
mask = BITMASK(self, i);
cp = self->ob_item + (i >> 3);
if (vi)
*cp |= mask;
else
*cp &= ~mask;
}
static const char bitmask_table[2][8] = {
{0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80}, /* little endian */
{0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01}, /* big endian */
};
/* character with n leading ones is: ones_table[endian][n] */
static const char ones_table[2][8] = {
{0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f}, /* little endian */
{0x00, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe}, /* big endian */
};
/* Return last byte in buffer with pad bits zeroed out.
If the length of the bitarray is a multiple of 8 (which includes an empty
bitarray), 0 is returned. */
static inline char
zlc(bitarrayobject *self) /* zlc = zeroed last char */
{
const int r = self->nbits % 8; /* index into mask table */
if (r == 0)
return 0;
return self->ob_item[Py_SIZE(self) - 1] & ones_table[IS_BE(self)][r];
}
/* Return a uint64_t word representing the last (up to 63) remaining bits
of the buffer. All missing bytes (to complete the word) and padbits are
treated as zeros.
If the length of the bitarray is a multiple of 64 (which includes an empty
bitarray), 0 is returned. */
static inline uint64_t
zlw(bitarrayobject *self) /* zlw = zeroed last word */
{
const Py_ssize_t nbits = self->nbits;
const Py_ssize_t nw = 8 * (nbits / 64); /* bytes in complete words */
const int nr = (nbits % 64) / 8; /* complete remaining bytes */
uint64_t res = 0;
assert(nw + nr == nbits / 8 && nw + nr <= Py_SIZE(self));
memcpy((char *) &res, self->ob_item + nw, (size_t) nr);
if (nbits % 8)
*(((char *) &res) + nr) = zlc(self);
assert(nbits % 64 || res == 0);
return res;
}
/* unless buffer is readonly, zero out pad bits - self->nbits is unchanged */
static inline void
set_padbits(bitarrayobject *self)
{
const int r = self->nbits % 8; /* index into mask table */
if (self->readonly == 0 && r)
self->ob_item[Py_SIZE(self) - 1] &= ones_table[IS_BE(self)][r];
}
/* population count - number of 1's in uint64 */
static inline int
popcnt_64(uint64_t x)
{
#if (defined(__clang__) || defined(__GNUC__))
return __builtin_popcountll(x);
#else
/* https://en.wikipedia.org/wiki/Hamming_weight popcount64c */
const uint64_t m1 = 0x5555555555555555;
const uint64_t m2 = 0x3333333333333333;
const uint64_t m4 = 0x0f0f0f0f0f0f0f0f;
const uint64_t h01 = 0x0101010101010101;
x -= (x >> 1) & m1;
x = (x & m2) + ((x >> 2) & m2);
x = (x + (x >> 4)) & m4;
return (x * h01) >> 56;
#endif
}
static inline uint64_t
builtin_bswap64(uint64_t word)
{
#if (defined(__clang__) || \
(defined(__GNUC__) \
&& ((__GNUC__ >= 5) || (__GNUC__ == 4) && (__GNUC_MINOR__ >= 3))))
/* __builtin_bswap64() is available since GCC 4.3. */
# define HAVE_BUILTIN_BSWAP64 1
return __builtin_bswap64(word);
#elif defined(_MSC_VER)
# define HAVE_BUILTIN_BSWAP64 1
return _byteswap_uint64(word);
#else
# define HAVE_BUILTIN_BSWAP64 0
Py_UNREACHABLE();
#endif
}
/* Return distance [0..3] to next aligned pointer.
While on modern compilers uint64_t pointers may be misaligned, it may
cause problems on older ones. Moreover, it may lead to slowdown (even
on modern compilers). */
static inline int
to_aligned(void *p)
{
int r = ((uintptr_t) p) % 4;
return r ? 4 - r : 0;
}
/* population count of n words starting from at uint64_t pointer w */
static inline Py_ssize_t
popcnt_words(uint64_t *w, Py_ssize_t n)
{
Py_ssize_t cnt = 0;
assert(n >= 0 && to_aligned((void *) w) == 0);
while (n--)
cnt += popcnt_64(*w++);
return cnt;
}
/* adjust index a manner consistent with the handling of normal slices */
static inline void
adjust_index(Py_ssize_t length, Py_ssize_t *i, Py_ssize_t step)
{
if (*i < 0) {
*i += length;
if (*i < 0)
*i = (step < 0) ? -1 : 0;
}
else if (*i >= length) {
*i = (step < 0) ? length - 1 : length;
}
}
/* same as PySlice_AdjustIndices() which was introduced in Python 3.6.1 */
static inline Py_ssize_t
adjust_indices(Py_ssize_t length, Py_ssize_t *start, Py_ssize_t *stop,
Py_ssize_t step)
{
#if PY_VERSION_HEX > 0x03060100
return PySlice_AdjustIndices(length, start, stop, step);
#else
assert(step != 0);
adjust_index(length, start, step);
adjust_index(length, stop, step);
/*
a / b does integer division. If either a or b is negative, the result
depends on the compiler (rounding can go toward 0 or negative infinity).
Therefore, we are careful that both a and b are always positive.
*/
if (step < 0) {
if (*stop < *start)
return (*start - *stop - 1) / (-step) + 1;
}
else {
if (*start < *stop)
return (*stop - *start - 1) / step + 1;
}
return 0;
#endif
}
/* adjust slice parameters such that step is always positive; produces
simpler loops over elements when their order is irrelevant */
static inline void
adjust_step_positive(Py_ssize_t slicelength,
Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t *step)
{
if (*step < 0) {
*stop = *start + 1;
*start = *stop + *step * (slicelength - 1) - 1;
*step = -(*step);
}
assert(*start >= 0 && *stop >= 0 && *step > 0 && slicelength >= 0);
/* slicelength == 0 implies stop <= start */
assert(slicelength != 0 || *stop <= *start);
/* step == 1 and slicelength != 0 implies stop - start == slicelength */
assert(*step != 1 || slicelength == 0 || *stop - *start == slicelength);
}
/* convert Python object to C int and set value at address -
return 1 on success, 0 on failure (and set exception) */
static inline int
conv_pybit(PyObject *value, int *vi)
{
Py_ssize_t n;
n = PyNumber_AsSsize_t(value, NULL);
if (n == -1 && PyErr_Occurred())
return 0;
if (n < 0 || n > 1) {
PyErr_Format(PyExc_ValueError, "bit must be 0 or 1, got %zd", n);
return 0;
}
*vi = (int) n;
return 1;
}
/* Return 0 if bitarrays have equal length and bit-endianness.
Otherwise, set exception and return -1. */
static inline int
ensure_eq_size_endian(bitarrayobject *a, bitarrayobject *b)
{
if (a->nbits != b->nbits) {
PyErr_SetString(PyExc_ValueError,
"bitarrays of equal length expected");
return -1;
}
if (a->endian != b->endian) {
PyErr_SetString(PyExc_ValueError,
"bitarrays of equal bit-endianness expected");
return -1;
}
return 0;
}

View File

@@ -0,0 +1,577 @@
// Header file providing new C API functions to old Python versions.
//
// File distributed under the Zero Clause BSD (0BSD) license.
// Copyright Contributors to the pythoncapi_compat project.
//
// Homepage:
// https://github.com/python/pythoncapi_compat
//
// Latest version:
// https://raw.githubusercontent.com/python/pythoncapi_compat/master/pythoncapi_compat.h
//
// SPDX-License-Identifier: 0BSD
#ifndef PYTHONCAPI_COMPAT
#define PYTHONCAPI_COMPAT
#ifdef __cplusplus
extern "C" {
#endif
#include <Python.h>
#include "frameobject.h" // PyFrameObject, PyFrame_GetBack()
// Compatibility with Visual Studio 2013 and older which don't support
// the inline keyword in C (only in C++): use __inline instead.
#if (defined(_MSC_VER) && _MSC_VER < 1900 \
&& !defined(__cplusplus) && !defined(inline))
# define PYCAPI_COMPAT_STATIC_INLINE(TYPE) static __inline TYPE
#else
# define PYCAPI_COMPAT_STATIC_INLINE(TYPE) static inline TYPE
#endif
#ifndef _Py_CAST
# define _Py_CAST(type, expr) ((type)(expr))
#endif
// On C++11 and newer, _Py_NULL is defined as nullptr on C++11,
// otherwise it is defined as NULL.
#ifndef _Py_NULL
# if defined(__cplusplus) && __cplusplus >= 201103
# define _Py_NULL nullptr
# else
# define _Py_NULL NULL
# endif
#endif
// Cast argument to PyObject* type.
#ifndef _PyObject_CAST
# define _PyObject_CAST(op) _Py_CAST(PyObject*, op)
#endif
// bpo-42262 added Py_NewRef() to Python 3.10.0a3
#if PY_VERSION_HEX < 0x030A00A3 && !defined(Py_NewRef)
PYCAPI_COMPAT_STATIC_INLINE(PyObject*)
_Py_NewRef(PyObject *obj)
{
Py_INCREF(obj);
return obj;
}
#define Py_NewRef(obj) _Py_NewRef(_PyObject_CAST(obj))
#endif
// bpo-42262 added Py_XNewRef() to Python 3.10.0a3
#if PY_VERSION_HEX < 0x030A00A3 && !defined(Py_XNewRef)
PYCAPI_COMPAT_STATIC_INLINE(PyObject*)
_Py_XNewRef(PyObject *obj)
{
Py_XINCREF(obj);
return obj;
}
#define Py_XNewRef(obj) _Py_XNewRef(_PyObject_CAST(obj))
#endif
// bpo-39573 added Py_SET_REFCNT() to Python 3.9.0a4
#if PY_VERSION_HEX < 0x030900A4 && !defined(Py_SET_REFCNT)
PYCAPI_COMPAT_STATIC_INLINE(void)
_Py_SET_REFCNT(PyObject *ob, Py_ssize_t refcnt)
{
ob->ob_refcnt = refcnt;
}
#define Py_SET_REFCNT(ob, refcnt) _Py_SET_REFCNT(_PyObject_CAST(ob), refcnt)
#endif
// Py_SETREF() and Py_XSETREF() were added to Python 3.5.2.
// It is excluded from the limited C API.
#if (PY_VERSION_HEX < 0x03050200 && !defined(Py_SETREF)) && !defined(Py_LIMITED_API)
#define Py_SETREF(dst, src) \
do { \
PyObject **_tmp_dst_ptr = _Py_CAST(PyObject**, &(dst)); \
PyObject *_tmp_dst = (*_tmp_dst_ptr); \
*_tmp_dst_ptr = _PyObject_CAST(src); \
Py_DECREF(_tmp_dst); \
} while (0)
#define Py_XSETREF(dst, src) \
do { \
PyObject **_tmp_dst_ptr = _Py_CAST(PyObject**, &(dst)); \
PyObject *_tmp_dst = (*_tmp_dst_ptr); \
*_tmp_dst_ptr = _PyObject_CAST(src); \
Py_XDECREF(_tmp_dst); \
} while (0)
#endif
// bpo-43753 added Py_Is(), Py_IsNone(), Py_IsTrue() and Py_IsFalse()
// to Python 3.10.0b1.
#if PY_VERSION_HEX < 0x030A00B1 && !defined(Py_Is)
# define Py_Is(x, y) ((x) == (y))
#endif
#if PY_VERSION_HEX < 0x030A00B1 && !defined(Py_IsNone)
# define Py_IsNone(x) Py_Is(x, Py_None)
#endif
#if PY_VERSION_HEX < 0x030A00B1 && !defined(Py_IsTrue)
# define Py_IsTrue(x) Py_Is(x, Py_True)
#endif
#if PY_VERSION_HEX < 0x030A00B1 && !defined(Py_IsFalse)
# define Py_IsFalse(x) Py_Is(x, Py_False)
#endif
// bpo-39573 added Py_SET_TYPE() to Python 3.9.0a4
#if PY_VERSION_HEX < 0x030900A4 && !defined(Py_SET_TYPE)
PYCAPI_COMPAT_STATIC_INLINE(void)
_Py_SET_TYPE(PyObject *ob, PyTypeObject *type)
{
ob->ob_type = type;
}
#define Py_SET_TYPE(ob, type) _Py_SET_TYPE(_PyObject_CAST(ob), type)
#endif
// bpo-39573 added Py_SET_SIZE() to Python 3.9.0a4
#if PY_VERSION_HEX < 0x030900A4 && !defined(Py_SET_SIZE)
PYCAPI_COMPAT_STATIC_INLINE(void)
_Py_SET_SIZE(PyVarObject *ob, Py_ssize_t size)
{
ob->ob_size = size;
}
#define Py_SET_SIZE(ob, size) _Py_SET_SIZE((PyVarObject*)(ob), size)
#endif
// bpo-40421 added PyFrame_GetCode() to Python 3.9.0b1
#if PY_VERSION_HEX < 0x030900B1 || defined(PYPY_VERSION)
PYCAPI_COMPAT_STATIC_INLINE(PyCodeObject*)
PyFrame_GetCode(PyFrameObject *frame)
{
assert(frame != _Py_NULL);
assert(frame->f_code != _Py_NULL);
return _Py_CAST(PyCodeObject*, Py_NewRef(frame->f_code));
}
#endif
PYCAPI_COMPAT_STATIC_INLINE(PyCodeObject*)
_PyFrame_GetCodeBorrow(PyFrameObject *frame)
{
PyCodeObject *code = PyFrame_GetCode(frame);
Py_DECREF(code);
return code;
}
// bpo-40421 added PyFrame_GetBack() to Python 3.9.0b1
#if PY_VERSION_HEX < 0x030900B1 && !defined(PYPY_VERSION)
PYCAPI_COMPAT_STATIC_INLINE(PyFrameObject*)
PyFrame_GetBack(PyFrameObject *frame)
{
assert(frame != _Py_NULL);
return _Py_CAST(PyFrameObject*, Py_XNewRef(frame->f_back));
}
#endif
#if !defined(PYPY_VERSION)
PYCAPI_COMPAT_STATIC_INLINE(PyFrameObject*)
_PyFrame_GetBackBorrow(PyFrameObject *frame)
{
PyFrameObject *back = PyFrame_GetBack(frame);
Py_XDECREF(back);
return back;
}
#endif
// bpo-40421 added PyFrame_GetLocals() to Python 3.11.0a7
#if PY_VERSION_HEX < 0x030B00A7 && !defined(PYPY_VERSION)
PYCAPI_COMPAT_STATIC_INLINE(PyObject*)
PyFrame_GetLocals(PyFrameObject *frame)
{
#if PY_VERSION_HEX >= 0x030400B1
if (PyFrame_FastToLocalsWithError(frame) < 0) {
return NULL;
}
#else
PyFrame_FastToLocals(frame);
#endif
return Py_NewRef(frame->f_locals);
}
#endif
// bpo-40421 added PyFrame_GetGlobals() to Python 3.11.0a7
#if PY_VERSION_HEX < 0x030B00A7 && !defined(PYPY_VERSION)
PYCAPI_COMPAT_STATIC_INLINE(PyObject*)
PyFrame_GetGlobals(PyFrameObject *frame)
{
return Py_NewRef(frame->f_globals);
}
#endif
// bpo-40421 added PyFrame_GetBuiltins() to Python 3.11.0a7
#if PY_VERSION_HEX < 0x030B00A7 && !defined(PYPY_VERSION)
PYCAPI_COMPAT_STATIC_INLINE(PyObject*)
PyFrame_GetBuiltins(PyFrameObject *frame)
{
return Py_NewRef(frame->f_builtins);
}
#endif
// bpo-40421 added PyFrame_GetLasti() to Python 3.11.0b1
#if PY_VERSION_HEX < 0x030B00B1 && !defined(PYPY_VERSION)
PYCAPI_COMPAT_STATIC_INLINE(int)
PyFrame_GetLasti(PyFrameObject *frame)
{
#if PY_VERSION_HEX >= 0x030A00A7
// bpo-27129: Since Python 3.10.0a7, f_lasti is an instruction offset,
// not a bytes offset anymore. Python uses 16-bit "wordcode" (2 bytes)
// instructions.
if (frame->f_lasti < 0) {
return -1;
}
return frame->f_lasti * 2;
#else
return frame->f_lasti;
#endif
}
#endif
// gh-91248 added PyFrame_GetVar() to Python 3.12.0a2
#if PY_VERSION_HEX < 0x030C00A2 && !defined(PYPY_VERSION)
PYCAPI_COMPAT_STATIC_INLINE(PyObject*)
PyFrame_GetVar(PyFrameObject *frame, PyObject *name)
{
PyObject *locals, *value;
locals = PyFrame_GetLocals(frame);
if (locals == NULL) {
return NULL;
}
#if PY_VERSION_HEX >= 0x03000000
value = PyDict_GetItemWithError(locals, name);
#else
value = PyDict_GetItem(locals, name);
#endif
Py_DECREF(locals);
if (value == NULL) {
if (PyErr_Occurred()) {
return NULL;
}
#if PY_VERSION_HEX >= 0x03000000
PyErr_Format(PyExc_NameError, "variable %R does not exist", name);
#else
PyErr_SetString(PyExc_NameError, "variable does not exist");
#endif
return NULL;
}
return Py_NewRef(value);
}
#endif
// gh-91248 added PyFrame_GetVarString() to Python 3.12.0a2
#if PY_VERSION_HEX < 0x030C00A2 && !defined(PYPY_VERSION)
PYCAPI_COMPAT_STATIC_INLINE(PyObject*)
PyFrame_GetVarString(PyFrameObject *frame, const char *name)
{
PyObject *name_obj, *value;
name_obj = PyUnicode_FromString(name);
if (name_obj == NULL) {
return NULL;
}
value = PyFrame_GetVar(frame, name_obj);
Py_DECREF(name_obj);
return value;
}
#endif
// bpo-39947 added PyThreadState_GetInterpreter() to Python 3.9.0a5
#if PY_VERSION_HEX < 0x030900A5 || defined(PYPY_VERSION)
PYCAPI_COMPAT_STATIC_INLINE(PyInterpreterState *)
PyThreadState_GetInterpreter(PyThreadState *tstate)
{
assert(tstate != _Py_NULL);
return tstate->interp;
}
#endif
// bpo-40429 added PyThreadState_GetFrame() to Python 3.9.0b1
#if PY_VERSION_HEX < 0x030900B1 && !defined(PYPY_VERSION)
PYCAPI_COMPAT_STATIC_INLINE(PyFrameObject*)
PyThreadState_GetFrame(PyThreadState *tstate)
{
assert(tstate != _Py_NULL);
return _Py_CAST(PyFrameObject *, Py_XNewRef(tstate->frame));
}
#endif
#if !defined(PYPY_VERSION)
PYCAPI_COMPAT_STATIC_INLINE(PyFrameObject*)
_PyThreadState_GetFrameBorrow(PyThreadState *tstate)
{
PyFrameObject *frame = PyThreadState_GetFrame(tstate);
Py_XDECREF(frame);
return frame;
}
#endif
// bpo-39947 added PyInterpreterState_Get() to Python 3.9.0a5
#if PY_VERSION_HEX < 0x030900A5 || defined(PYPY_VERSION)
PYCAPI_COMPAT_STATIC_INLINE(PyInterpreterState*)
PyInterpreterState_Get(void)
{
PyThreadState *tstate;
PyInterpreterState *interp;
tstate = PyThreadState_GET();
if (tstate == _Py_NULL) {
Py_FatalError("GIL released (tstate is NULL)");
}
interp = tstate->interp;
if (interp == _Py_NULL) {
Py_FatalError("no current interpreter");
}
return interp;
}
#endif
// bpo-39947 added PyInterpreterState_Get() to Python 3.9.0a6
#if 0x030700A1 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x030900A6 && !defined(PYPY_VERSION)
PYCAPI_COMPAT_STATIC_INLINE(uint64_t)
PyThreadState_GetID(PyThreadState *tstate)
{
assert(tstate != _Py_NULL);
return tstate->id;
}
#endif
// bpo-43760 added PyThreadState_EnterTracing() to Python 3.11.0a2
#if PY_VERSION_HEX < 0x030B00A2 && !defined(PYPY_VERSION)
PYCAPI_COMPAT_STATIC_INLINE(void)
PyThreadState_EnterTracing(PyThreadState *tstate)
{
tstate->tracing++;
#if PY_VERSION_HEX >= 0x030A00A1
tstate->cframe->use_tracing = 0;
#else
tstate->use_tracing = 0;
#endif
}
#endif
// bpo-43760 added PyThreadState_LeaveTracing() to Python 3.11.0a2
#if PY_VERSION_HEX < 0x030B00A2 && !defined(PYPY_VERSION)
PYCAPI_COMPAT_STATIC_INLINE(void)
PyThreadState_LeaveTracing(PyThreadState *tstate)
{
int use_tracing = (tstate->c_tracefunc != _Py_NULL
|| tstate->c_profilefunc != _Py_NULL);
tstate->tracing--;
#if PY_VERSION_HEX >= 0x030A00A1
tstate->cframe->use_tracing = use_tracing;
#else
tstate->use_tracing = use_tracing;
#endif
}
#endif
// bpo-37194 added PyObject_CallNoArgs() to Python 3.9.0a1
// PyObject_CallNoArgs() added to PyPy 3.9.16-v7.3.11
#if !defined(PyObject_CallNoArgs) && PY_VERSION_HEX < 0x030900A1
PYCAPI_COMPAT_STATIC_INLINE(PyObject*)
PyObject_CallNoArgs(PyObject *func)
{
return PyObject_CallFunctionObjArgs(func, NULL);
}
#endif
// bpo-39245 made PyObject_CallOneArg() public (previously called
// _PyObject_CallOneArg) in Python 3.9.0a4
// PyObject_CallOneArg() added to PyPy 3.9.16-v7.3.11
#if !defined(PyObject_CallOneArg) && PY_VERSION_HEX < 0x030900A4
PYCAPI_COMPAT_STATIC_INLINE(PyObject*)
PyObject_CallOneArg(PyObject *func, PyObject *arg)
{
return PyObject_CallFunctionObjArgs(func, arg, NULL);
}
#endif
// bpo-1635741 added PyModule_AddObjectRef() to Python 3.10.0a3
#if PY_VERSION_HEX < 0x030A00A3
PYCAPI_COMPAT_STATIC_INLINE(int)
PyModule_AddObjectRef(PyObject *module, const char *name, PyObject *value)
{
int res;
Py_XINCREF(value);
res = PyModule_AddObject(module, name, value);
if (res < 0) {
Py_XDECREF(value);
}
return res;
}
#endif
// bpo-40024 added PyModule_AddType() to Python 3.9.0a5
#if PY_VERSION_HEX < 0x030900A5
PYCAPI_COMPAT_STATIC_INLINE(int)
PyModule_AddType(PyObject *module, PyTypeObject *type)
{
const char *name, *dot;
if (PyType_Ready(type) < 0) {
return -1;
}
// inline _PyType_Name()
name = type->tp_name;
assert(name != _Py_NULL);
dot = strrchr(name, '.');
if (dot != _Py_NULL) {
name = dot + 1;
}
return PyModule_AddObjectRef(module, name, _PyObject_CAST(type));
}
#endif
// bpo-40241 added PyObject_GC_IsTracked() to Python 3.9.0a6.
// bpo-4688 added _PyObject_GC_IS_TRACKED() to Python 2.7.0a2.
#if PY_VERSION_HEX < 0x030900A6 && !defined(PYPY_VERSION)
PYCAPI_COMPAT_STATIC_INLINE(int)
PyObject_GC_IsTracked(PyObject* obj)
{
return (PyObject_IS_GC(obj) && _PyObject_GC_IS_TRACKED(obj));
}
#endif
// bpo-40241 added PyObject_GC_IsFinalized() to Python 3.9.0a6.
// bpo-18112 added _PyGCHead_FINALIZED() to Python 3.4.0 final.
#if PY_VERSION_HEX < 0x030900A6 && PY_VERSION_HEX >= 0x030400F0 && !defined(PYPY_VERSION)
PYCAPI_COMPAT_STATIC_INLINE(int)
PyObject_GC_IsFinalized(PyObject *obj)
{
PyGC_Head *gc = _Py_CAST(PyGC_Head*, obj) - 1;
return (PyObject_IS_GC(obj) && _PyGCHead_FINALIZED(gc));
}
#endif
// bpo-39573 added Py_IS_TYPE() to Python 3.9.0a4
#if PY_VERSION_HEX < 0x030900A4 && !defined(Py_IS_TYPE)
PYCAPI_COMPAT_STATIC_INLINE(int)
_Py_IS_TYPE(PyObject *ob, PyTypeObject *type) {
return Py_TYPE(ob) == type;
}
#define Py_IS_TYPE(ob, type) _Py_IS_TYPE(_PyObject_CAST(ob), type)
#endif
// bpo-46906 added PyFloat_Pack2() and PyFloat_Unpack2() to Python 3.11a7.
// bpo-11734 added _PyFloat_Pack2() and _PyFloat_Unpack2() to Python 3.6.0b1.
// Python 3.11a2 moved _PyFloat_Pack2() and _PyFloat_Unpack2() to the internal
// C API: Python 3.11a2-3.11a6 versions are not supported.
#if 0x030600B1 <= PY_VERSION_HEX && PY_VERSION_HEX <= 0x030B00A1 && !defined(PYPY_VERSION)
PYCAPI_COMPAT_STATIC_INLINE(int)
PyFloat_Pack2(double x, char *p, int le)
{ return _PyFloat_Pack2(x, (unsigned char*)p, le); }
PYCAPI_COMPAT_STATIC_INLINE(double)
PyFloat_Unpack2(const char *p, int le)
{ return _PyFloat_Unpack2((const unsigned char *)p, le); }
#endif
// bpo-46906 added PyFloat_Pack4(), PyFloat_Pack8(), PyFloat_Unpack4() and
// PyFloat_Unpack8() to Python 3.11a7.
// Python 3.11a2 moved _PyFloat_Pack4(), _PyFloat_Pack8(), _PyFloat_Unpack4()
// and _PyFloat_Unpack8() to the internal C API: Python 3.11a2-3.11a6 versions
// are not supported.
#if PY_VERSION_HEX <= 0x030B00A1 && !defined(PYPY_VERSION)
PYCAPI_COMPAT_STATIC_INLINE(int)
PyFloat_Pack4(double x, char *p, int le)
{ return _PyFloat_Pack4(x, (unsigned char*)p, le); }
PYCAPI_COMPAT_STATIC_INLINE(int)
PyFloat_Pack8(double x, char *p, int le)
{ return _PyFloat_Pack8(x, (unsigned char*)p, le); }
PYCAPI_COMPAT_STATIC_INLINE(double)
PyFloat_Unpack4(const char *p, int le)
{ return _PyFloat_Unpack4((const unsigned char *)p, le); }
PYCAPI_COMPAT_STATIC_INLINE(double)
PyFloat_Unpack8(const char *p, int le)
{ return _PyFloat_Unpack8((const unsigned char *)p, le); }
#endif
// gh-92154 added PyCode_GetCode() to Python 3.11.0b1
#if PY_VERSION_HEX < 0x030B00B1 && !defined(PYPY_VERSION)
PYCAPI_COMPAT_STATIC_INLINE(PyObject*)
PyCode_GetCode(PyCodeObject *code)
{
return Py_NewRef(code->co_code);
}
#endif
// gh-95008 added PyCode_GetVarnames() to Python 3.11.0rc1
#if PY_VERSION_HEX < 0x030B00C1 && !defined(PYPY_VERSION)
PYCAPI_COMPAT_STATIC_INLINE(PyObject*)
PyCode_GetVarnames(PyCodeObject *code)
{
return Py_NewRef(code->co_varnames);
}
#endif
// gh-95008 added PyCode_GetFreevars() to Python 3.11.0rc1
#if PY_VERSION_HEX < 0x030B00C1 && !defined(PYPY_VERSION)
PYCAPI_COMPAT_STATIC_INLINE(PyObject*)
PyCode_GetFreevars(PyCodeObject *code)
{
return Py_NewRef(code->co_freevars);
}
#endif
// gh-95008 added PyCode_GetCellvars() to Python 3.11.0rc1
#if PY_VERSION_HEX < 0x030B00C1 && !defined(PYPY_VERSION)
PYCAPI_COMPAT_STATIC_INLINE(PyObject*)
PyCode_GetCellvars(PyCodeObject *code)
{
return Py_NewRef(code->co_cellvars);
}
#endif
// Py_UNUSED() was added to Python 3.4.0b2.
#if PY_VERSION_HEX < 0x030400B2 && !defined(Py_UNUSED)
# if defined(__GNUC__) || defined(__clang__)
# define Py_UNUSED(name) _unused_ ## name __attribute__((unused))
# else
# define Py_UNUSED(name) _unused_ ## name
# endif
#endif
#ifdef __cplusplus
}
#endif
#endif // PYTHONCAPI_COMPAT

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,423 @@
# Copyright (c) 2019 - 2024, Ilan Schnell; All Rights Reserved
# bitarray is published under the PSF license.
#
# Author: Ilan Schnell
"""
Useful utilities for working with bitarrays.
"""
from __future__ import absolute_import
import os
import sys
from bitarray import bitarray, bits2bytes
from bitarray._util import (
zeros, ones, count_n, parity,
count_and, count_or, count_xor, any_and, subset,
_correspond_all,
serialize, deserialize,
ba2hex, hex2ba,
ba2base, base2ba,
sc_encode, sc_decode,
vl_encode, vl_decode,
canonical_decode,
)
__all__ = [
'zeros', 'ones', 'urandom',
'pprint', 'make_endian', 'rindex', 'strip', 'count_n',
'parity', 'count_and', 'count_or', 'count_xor', 'any_and', 'subset',
'intervals',
'ba2hex', 'hex2ba',
'ba2base', 'base2ba',
'ba2int', 'int2ba',
'serialize', 'deserialize',
'sc_encode', 'sc_decode',
'vl_encode', 'vl_decode',
'huffman_code', 'canonical_huffman', 'canonical_decode',
]
_is_py2 = bool(sys.version_info[0] == 2)
def urandom(__length, endian=None):
"""urandom(length, /, endian=None) -> bitarray
Return a bitarray of `length` random bits (uses `os.urandom`).
"""
a = bitarray(0, endian)
a.frombytes(os.urandom(bits2bytes(__length)))
del a[__length:]
return a
def rindex(__a, __sub_bitarray=1, __start=0, __stop=sys.maxsize):
"""rindex(bitarray, sub_bitarray=1, start=0, stop=<end>, /) -> int
Return rightmost (highest) index where sub_bitarray (or item - defaults
to 1) is found in bitarray (`a`), such that sub_bitarray is contained
within `a[start:stop]`.
Raises `ValueError` when the sub_bitarray is not present.
"""
from warnings import warn
warn("rindex() is deprecated and will be removed in bitarray 3.0 - "
"use .index(..., right=True) method instead.",
DeprecationWarning, stacklevel=1)
if not isinstance(__a, bitarray):
raise TypeError("bitarray expected, got '%s'" % type(__a).__name__)
return __a.index(__sub_bitarray, __start, __stop, right=True)
def pprint(__a, stream=None, group=8, indent=4, width=80):
"""pprint(bitarray, /, stream=None, group=8, indent=4, width=80)
Prints the formatted representation of object on `stream` (which defaults
to `sys.stdout`). By default, elements are grouped in bytes (8 elements),
and 8 bytes (64 elements) per line.
Non-bitarray objects are printed by the standard library
function `pprint.pprint()`.
"""
if stream is None:
stream = sys.stdout
if not isinstance(__a, bitarray):
import pprint as _pprint
_pprint.pprint(__a, stream=stream, indent=indent, width=width)
return
group = int(group)
if group < 1:
raise ValueError('group must be >= 1')
indent = int(indent)
if indent < 0:
raise ValueError('indent must be >= 0')
width = int(width)
if width <= indent:
raise ValueError('width must be > %d (indent)' % indent)
gpl = (width - indent) // (group + 1) # groups per line
epl = group * gpl # elements per line
if epl == 0:
epl = width - indent - 2
type_name = type(__a).__name__
# here 4 is len("'()'")
multiline = len(type_name) + 4 + len(__a) + len(__a) // group >= width
if multiline:
quotes = "'''"
elif __a:
quotes = "'"
else:
quotes = ""
stream.write("%s(%s" % (type_name, quotes))
for i, b in enumerate(__a):
if multiline and i % epl == 0:
stream.write('\n%s' % (indent * ' '))
if i % group == 0 and i % epl != 0:
stream.write(' ')
stream.write(str(b))
if multiline:
stream.write('\n')
stream.write("%s)\n" % quotes)
stream.flush()
def make_endian(__a, endian):
"""make_endian(bitarray, /, endian) -> bitarray
When the endianness of the given bitarray is different from `endian`,
return a new bitarray, with endianness `endian` and the same elements
as the original bitarray.
Otherwise (endianness is already `endian`) the original bitarray is returned
unchanged.
"""
from warnings import warn
warn("make_endian() is deprecated and will be removed in bitarray 3.0 - "
"use bitarray(..., endian=...) instead",
DeprecationWarning, stacklevel=1)
if not isinstance(__a, bitarray):
raise TypeError("bitarray expected, got '%s'" % type(__a).__name__)
if __a.endian() == endian:
return __a
return bitarray(__a, endian)
def strip(__a, mode='right'):
"""strip(bitarray, /, mode='right') -> bitarray
Return a new bitarray with zeros stripped from left, right or both ends.
Allowed values for mode are the strings: `left`, `right`, `both`
"""
if not isinstance(mode, str):
raise TypeError("str expected for mode, got '%s'" % type(__a).__name__)
if mode not in ('left', 'right', 'both'):
raise ValueError("mode must be 'left', 'right' or 'both', got %r" %
mode)
start = None if mode == 'right' else __a.find(1)
if start == -1:
return __a[:0]
stop = None if mode == 'left' else __a.find(1, right=1) + 1
return __a[start:stop]
def intervals(__a):
"""intervals(bitarray, /) -> iterator
Compute all uninterrupted intervals of 1s and 0s, and return an
iterator over tuples `(value, start, stop)`. The intervals are guaranteed
to be in order, and their size is always non-zero (`stop - start > 0`).
"""
try:
value = __a[0] # value of current interval
except IndexError:
return
n = len(__a)
stop = 0 # "previous" stop - becomes next start
while stop < n:
start = stop
# assert __a[start] == value
try: # find next occurrence of opposite value
stop = __a.index(not value, start)
except ValueError:
stop = n
yield int(value), start, stop
value = not value # next interval has opposite value
def ba2int(__a, signed=False):
"""ba2int(bitarray, /, signed=False) -> int
Convert the given bitarray to an integer.
The bit-endianness of the bitarray is respected.
`signed` indicates whether two's complement is used to represent the integer.
"""
if not isinstance(__a, bitarray):
raise TypeError("bitarray expected, got '%s'" % type(__a).__name__)
length = len(__a)
if length == 0:
raise ValueError("non-empty bitarray expected")
le = bool(__a.endian() == 'little')
if __a.padbits:
pad = zeros(__a.padbits, __a.endian())
__a = __a + pad if le else pad + __a
if _is_py2:
a = bitarray(__a, 'big')
if le:
a.reverse()
res = int(ba2hex(a), 16)
else: # py3
res = int.from_bytes(__a.tobytes(), byteorder=__a.endian())
if signed and res >= 1 << (length - 1):
res -= 1 << length
return res
def int2ba(__i, length=None, endian=None, signed=False):
"""int2ba(int, /, length=None, endian=None, signed=False) -> bitarray
Convert the given integer to a bitarray (with given endianness,
and no leading (big-endian) / trailing (little-endian) zeros), unless
the `length` of the bitarray is provided. An `OverflowError` is raised
if the integer is not representable with the given number of bits.
`signed` determines whether two's complement is used to represent the integer,
and requires `length` to be provided.
"""
if not isinstance(__i, (int, long) if _is_py2 else int):
raise TypeError("int expected, got '%s'" % type(__i).__name__)
if length is not None:
if not isinstance(length, int):
raise TypeError("int expected for length")
if length <= 0:
raise ValueError("length must be > 0")
if signed and length is None:
raise TypeError("signed requires length")
if __i == 0:
# there are special cases for 0 which we'd rather not deal with below
return zeros(length or 1, endian)
if signed:
m = 1 << (length - 1)
if not (-m <= __i < m):
raise OverflowError("signed integer not in range(%d, %d), "
"got %d" % (-m, m, __i))
if __i < 0:
__i += 1 << length
else: # unsigned
if __i < 0:
raise OverflowError("unsigned integer not positive, got %d" % __i)
if length and __i >= (1 << length):
raise OverflowError("unsigned integer not in range(0, %d), "
"got %d" % (1 << length, __i))
a = bitarray(0, endian)
le = bool(a.endian() == 'little')
if _is_py2:
s = hex(__i)[2:].rstrip('L')
a.extend(hex2ba(s, 'big'))
if le:
a.reverse()
else: # py3
b = __i.to_bytes(bits2bytes(__i.bit_length()), byteorder=a.endian())
a.frombytes(b)
if length is None:
return strip(a, 'right' if le else 'left')
la = len(a)
if la > length:
a = a[:length] if le else a[-length:]
if la < length:
pad = zeros(length - la, a.endian())
a = a + pad if le else pad + a
assert len(a) == length
return a
# ------------------------------ Huffman coding -----------------------------
def _huffman_tree(__freq_map):
"""_huffman_tree(dict, /) -> Node
Given a dict mapping symbols to their frequency, construct a Huffman tree
and return its root node.
"""
from heapq import heappush, heappop
class Node(object):
"""
A Node instance will either have a 'symbol' (leaf node) or
a 'child' (a tuple with both children) attribute.
The 'freq' attribute will always be present.
"""
def __lt__(self, other):
# heapq needs to be able to compare the nodes
return self.freq < other.freq
minheap = []
# create all leaf nodes and push them onto the queue
for sym, f in __freq_map.items():
leaf = Node()
leaf.symbol = sym
leaf.freq = f
heappush(minheap, leaf)
# repeat the process until only one node remains
while len(minheap) > 1:
# take the two nodes with lowest frequencies from the queue
# to construct a new node and push it onto the queue
parent = Node()
parent.child = heappop(minheap), heappop(minheap)
parent.freq = parent.child[0].freq + parent.child[1].freq
heappush(minheap, parent)
# the single remaining node is the root of the Huffman tree
return minheap[0]
def huffman_code(__freq_map, endian=None):
"""huffman_code(dict, /, endian=None) -> dict
Given a frequency map, a dictionary mapping symbols to their frequency,
calculate the Huffman code, i.e. a dict mapping those symbols to
bitarrays (with given endianness). Note that the symbols are not limited
to being strings. Symbols may may be any hashable object (such as `None`).
"""
if not isinstance(__freq_map, dict):
raise TypeError("dict expected, got '%s'" % type(__freq_map).__name__)
b0 = bitarray('0', endian)
b1 = bitarray('1', endian)
if len(__freq_map) < 2:
if len(__freq_map) == 0:
raise ValueError("cannot create Huffman code with no symbols")
# Only one symbol: Normally if only one symbol is given, the code
# could be represented with zero bits. However here, the code should
# be at least one bit for the .encode() and .decode() methods to work.
# So we represent the symbol by a single code of length one, in
# particular one 0 bit. This is an incomplete code, since if a 1 bit
# is received, it has no meaning and will result in an error.
return {list(__freq_map)[0]: b0}
result = {}
def traverse(nd, prefix=bitarray(0, endian)):
try: # leaf
result[nd.symbol] = prefix
except AttributeError: # parent, so traverse each of the children
traverse(nd.child[0], prefix + b0)
traverse(nd.child[1], prefix + b1)
traverse(_huffman_tree(__freq_map))
return result
def canonical_huffman(__freq_map):
"""canonical_huffman(dict, /) -> tuple
Given a frequency map, a dictionary mapping symbols to their frequency,
calculate the canonical Huffman code. Returns a tuple containing:
0. the canonical Huffman code as a dict mapping symbols to bitarrays
1. a list containing the number of symbols of each code length
2. a list of symbols in canonical order
Note: the two lists may be used as input for `canonical_decode()`.
"""
if not isinstance(__freq_map, dict):
raise TypeError("dict expected, got '%s'" % type(__freq_map).__name__)
if len(__freq_map) < 2:
if len(__freq_map) == 0:
raise ValueError("cannot create Huffman code with no symbols")
# Only one symbol: see note above in huffman_code()
sym = list(__freq_map)[0]
return {sym: bitarray('0', 'big')}, [0, 1], [sym]
code_length = {} # map symbols to their code length
def traverse(nd, length=0):
# traverse the Huffman tree, but (unlike in huffman_code() above) we
# now just simply record the length for reaching each symbol
try: # leaf
code_length[nd.symbol] = length
except AttributeError: # parent, so traverse each of the children
traverse(nd.child[0], length + 1)
traverse(nd.child[1], length + 1)
traverse(_huffman_tree(__freq_map))
# we now have a mapping of symbols to their code length,
# which is all we need
table = sorted(code_length.items(), key=lambda item: (item[1], item[0]))
maxbits = max(item[1] for item in table)
codedict = {}
count = (maxbits + 1) * [0]
code = 0
for i, (sym, length) in enumerate(table):
codedict[sym] = int2ba(code, length, 'big')
count[length] += 1
if i + 1 < len(table):
code += 1
code <<= table[i + 1][1] - length
return codedict, count, [item[0] for item in table]

View File

@@ -0,0 +1,71 @@
# Copyright (c) 2021 - 2024, Ilan Schnell; All Rights Reserved
from collections import Counter
from collections.abc import Iterable, Iterator, Sequence
from typing import Any, AnyStr, BinaryIO, Optional, Union
from bitarray import bitarray, BytesLike, CodeDict
FreqMap = Union[Counter[int], dict[Any, Union[int, float]]]
def zeros(length: int, endian: Optional[str] = ...) -> bitarray: ...
def ones(length: int, endian: Optional[str] = ...) -> bitarray: ...
def urandom(length: int, endian: Optional[str] = ...) -> bitarray: ...
def pprint(a: Any, stream: BinaryIO = ...,
group: int = ...,
indent: int = ...,
width: int = ...) -> None: ...
def make_endian(a: bitarray, endian: str) -> bitarray: ...
def rindex(a: bitarray,
sub_bitarray: Union[bitarray, int] = ...,
start: int = ...,
stop: int = ...) -> int: ...
def strip(a: bitarray, mode: str = ...) -> bitarray: ...
def count_n(a: bitarray,
n: int,
value: int = ...) -> int: ...
def parity(a: bitarray) -> int: ...
def count_and(a: bitarray, b: bitarray) -> int: ...
def count_or(a: bitarray, b: bitarray) -> int: ...
def count_xor(a: bitarray, b: bitarray) -> int: ...
def any_and(a: bitarray, b: bitarray) -> bool: ...
def subset(a: bitarray, b: bitarray) -> bool: ...
def _correspond_all(a: bitarray, b: bitarray) -> tuple: ...
def intervals(a: bitarray) -> Iterator: ...
def ba2hex(a: bitarray) -> str: ...
def hex2ba(s: AnyStr, endian: Optional[str] = ...) -> bitarray: ...
def ba2base(n: int, a: bitarray) -> str: ...
def base2ba(n: int,
s: AnyStr,
endian: Optional[str] = ...) -> bitarray: ...
def ba2int(a: bitarray, signed: int = ...) -> int: ...
def int2ba(i: int,
length: int = ...,
endian: str = ...,
signed: int = ...) -> bitarray: ...
def serialize(a: bitarray) -> bytes: ...
def deserialize(b: BytesLike) -> bitarray: ...
def sc_encode(a: bitarray) -> bytes: ...
def sc_decode(stream: BytesLike) -> bitarray: ...
def vl_encode(a: bitarray) -> bytes: ...
def vl_decode(stream: BytesLike,
endian: Optional[str] = ...) -> bitarray: ...
def _huffman_tree(freq_map: FreqMap) -> Any: ...
def huffman_code(freq_map: FreqMap,
endian: Optional[str] = ...) -> CodeDict: ...
def canonical_huffman(Freq_Map) -> tuple[CodeDict, list, list]: ...
def canonical_decode(a: bitarray,
count: Sequence[int],
symbol: Iterable[Any]) -> Iterator: ...