Module astrapy.data_types.data_api_vector

Expand source code
# Copyright DataStax, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

import struct
from collections import UserList
from dataclasses import dataclass
from typing import TYPE_CHECKING, Iterator

if TYPE_CHECKING:
    FloatList = UserList[float]
else:
    FloatList = UserList

# Floats are always encoded big-endian with 4 bytes per float.
ENDIANNESS_CHAR = ">"
BYTES_PER_FLOAT = 4


def floats_to_bytes(float_list: list[float], n: int | None = None) -> bytes:
    """
    Convert a list of floats into a binary blob according to the Data API's conventions,
    suitable for working with the "vector" table column type.

    Args:
        float_list: a list of n float numbers to convert.
        n: the number of components. If not provided, it is determined automatically.

    Returns:
        a bytes object expressing the input list of floats in binary-encoded form.
    """

    _n = len(float_list) if n is None else n
    fmt = f"{ENDIANNESS_CHAR}{'f' * _n}"
    return struct.pack(fmt, *float_list)


def bytes_to_floats(byte_blob: bytes, n: int | None = None) -> list[float]:
    """
    Convert a binary blob into a list of floats according to the Data API's conventions.

    Args:
        byte_blob: binary object encoding a list of floats.
        n: the number of components of the resulting list. If not provided,
            it is determined automatically.

    Returns:
        a list of floats, of the same contents as the input binary-encoded sequence.
    """

    _n = len(byte_blob) // BYTES_PER_FLOAT if n is None else n
    fmt = f"{ENDIANNESS_CHAR}{'f' * _n}"
    return list(struct.unpack(fmt, byte_blob))


@dataclass
class DataAPIVector(FloatList):
    r"""
    A class wrapping a list of float numbers to be treated as a "vector" within the
    Data API. This class has the same functionalities as the underlying `list[float]`,
    plus it can be used to signal the Data API that a certain list of numbers can
    be encoded as a binary object (which improves on the performance and bandwidth of
    the write operations to the Data API).

    Attributes:
        data: a list of float numbers, the underlying content of the vector
        n: the number of components, i.e. the length of the list.

    Example:
        >>> from astrapy.data_types import DataAPIVector
        >>>
        >>> v1 = DataAPIVector([0.1, -0.2, 0.3])
        >>> print(v1.to_bytes())
        b'=\xcc\xcc\xcd\xbeL\xcc\xcd>\x99\x99\x9a'
        >>> DataAPIVector.from_bytes(b"=\xcc\xcc\xcd\xbeL\xcc\xcd>\x99\x99\x9a")
        DataAPIVector([0.10000000149011612, -0.20000000298023224, 0.30000001192092896])
        >>> for i, x in enumerate(v1):
        ...     print(f"component {i} => {x}")
        ...
        component 0 => 0.1
        component 1 => -0.2
        component 2 => 0.3
    """

    data: list[float]
    n: int

    def __init__(self, vector: list[float] = []) -> None:
        self.data = vector
        self.n = len(self.data)

    def __iter__(self) -> Iterator[float]:
        return iter(self.data)

    def __hash__(self) -> int:
        return hash(tuple(self.data))

    def __repr__(self) -> str:
        if self.n < 5:
            return f"{self.__class__.__name__}({self.data})"
        else:
            data_start = f"[{', '.join(str(x) for x in self.data[:3])} ...]"
            return f"{self.__class__.__name__}({data_start}, n={self.n})"

    def __str__(self) -> str:
        if self.n < 5:
            return str(self.data)
        else:
            return f"[{', '.join(str(x) for x in self.data[:3])} ...]"

    def to_bytes(self) -> bytes:
        """
        Convert the vector into its binary blob (`bytes`) representation, according
        to the Data API convention (including endianness).

        Returns:
            a `bytes` object, expressing the vector values in a lossless way.
        """

        return floats_to_bytes(self.data, self.n)

    @staticmethod
    def from_bytes(byte_blob: bytes) -> DataAPIVector:
        """
        Create a DataAPIVector from a binary blob, decoding its contents according
        to the Data API convention (including endianness).

        Args:
            byte_blob: a binary sequence, encoding a vector of floats as specified
            by the Data API convention.

        Returns:
            a DataAPIVector corresponding to the provided blob.
        """

        return DataAPIVector(bytes_to_floats(byte_blob))

Functions

def bytes_to_floats(byte_blob: bytes, n: int | None = None) ‑> list[float]

Convert a binary blob into a list of floats according to the Data API's conventions.

Args

byte_blob
binary object encoding a list of floats.
n
the number of components of the resulting list. If not provided, it is determined automatically.

Returns

a list of floats, of the same contents as the input binary-encoded sequence.

Expand source code
def bytes_to_floats(byte_blob: bytes, n: int | None = None) -> list[float]:
    """
    Convert a binary blob into a list of floats according to the Data API's conventions.

    Args:
        byte_blob: binary object encoding a list of floats.
        n: the number of components of the resulting list. If not provided,
            it is determined automatically.

    Returns:
        a list of floats, of the same contents as the input binary-encoded sequence.
    """

    _n = len(byte_blob) // BYTES_PER_FLOAT if n is None else n
    fmt = f"{ENDIANNESS_CHAR}{'f' * _n}"
    return list(struct.unpack(fmt, byte_blob))
def floats_to_bytes(float_list: list[float], n: int | None = None) ‑> bytes

Convert a list of floats into a binary blob according to the Data API's conventions, suitable for working with the "vector" table column type.

Args

float_list
a list of n float numbers to convert.
n
the number of components. If not provided, it is determined automatically.

Returns

a bytes object expressing the input list of floats in binary-encoded form.

Expand source code
def floats_to_bytes(float_list: list[float], n: int | None = None) -> bytes:
    """
    Convert a list of floats into a binary blob according to the Data API's conventions,
    suitable for working with the "vector" table column type.

    Args:
        float_list: a list of n float numbers to convert.
        n: the number of components. If not provided, it is determined automatically.

    Returns:
        a bytes object expressing the input list of floats in binary-encoded form.
    """

    _n = len(float_list) if n is None else n
    fmt = f"{ENDIANNESS_CHAR}{'f' * _n}"
    return struct.pack(fmt, *float_list)

Classes

class DataAPIVector (vector: list[float] = [])

A class wrapping a list of float numbers to be treated as a "vector" within the Data API. This class has the same functionalities as the underlying list[float], plus it can be used to signal the Data API that a certain list of numbers can be encoded as a binary object (which improves on the performance and bandwidth of the write operations to the Data API).

Attributes

data
a list of float numbers, the underlying content of the vector
n
the number of components, i.e. the length of the list.

Example

>>> from astrapy.data_types import DataAPIVector
>>>
>>> v1 = DataAPIVector([0.1, -0.2, 0.3])
>>> print(v1.to_bytes())
b'=\xcc\xcc\xcd\xbeL\xcc\xcd>\x99\x99\x9a'
>>> DataAPIVector.from_bytes(b"=\xcc\xcc\xcd\xbeL\xcc\xcd>\x99\x99\x9a")
DataAPIVector([0.10000000149011612, -0.20000000298023224, 0.30000001192092896])
>>> for i, x in enumerate(v1):
...     print(f"component {i} => {x}")
...
component 0 => 0.1
component 1 => -0.2
component 2 => 0.3
Expand source code
@dataclass
class DataAPIVector(FloatList):
    r"""
    A class wrapping a list of float numbers to be treated as a "vector" within the
    Data API. This class has the same functionalities as the underlying `list[float]`,
    plus it can be used to signal the Data API that a certain list of numbers can
    be encoded as a binary object (which improves on the performance and bandwidth of
    the write operations to the Data API).

    Attributes:
        data: a list of float numbers, the underlying content of the vector
        n: the number of components, i.e. the length of the list.

    Example:
        >>> from astrapy.data_types import DataAPIVector
        >>>
        >>> v1 = DataAPIVector([0.1, -0.2, 0.3])
        >>> print(v1.to_bytes())
        b'=\xcc\xcc\xcd\xbeL\xcc\xcd>\x99\x99\x9a'
        >>> DataAPIVector.from_bytes(b"=\xcc\xcc\xcd\xbeL\xcc\xcd>\x99\x99\x9a")
        DataAPIVector([0.10000000149011612, -0.20000000298023224, 0.30000001192092896])
        >>> for i, x in enumerate(v1):
        ...     print(f"component {i} => {x}")
        ...
        component 0 => 0.1
        component 1 => -0.2
        component 2 => 0.3
    """

    data: list[float]
    n: int

    def __init__(self, vector: list[float] = []) -> None:
        self.data = vector
        self.n = len(self.data)

    def __iter__(self) -> Iterator[float]:
        return iter(self.data)

    def __hash__(self) -> int:
        return hash(tuple(self.data))

    def __repr__(self) -> str:
        if self.n < 5:
            return f"{self.__class__.__name__}({self.data})"
        else:
            data_start = f"[{', '.join(str(x) for x in self.data[:3])} ...]"
            return f"{self.__class__.__name__}({data_start}, n={self.n})"

    def __str__(self) -> str:
        if self.n < 5:
            return str(self.data)
        else:
            return f"[{', '.join(str(x) for x in self.data[:3])} ...]"

    def to_bytes(self) -> bytes:
        """
        Convert the vector into its binary blob (`bytes`) representation, according
        to the Data API convention (including endianness).

        Returns:
            a `bytes` object, expressing the vector values in a lossless way.
        """

        return floats_to_bytes(self.data, self.n)

    @staticmethod
    def from_bytes(byte_blob: bytes) -> DataAPIVector:
        """
        Create a DataAPIVector from a binary blob, decoding its contents according
        to the Data API convention (including endianness).

        Args:
            byte_blob: a binary sequence, encoding a vector of floats as specified
            by the Data API convention.

        Returns:
            a DataAPIVector corresponding to the provided blob.
        """

        return DataAPIVector(bytes_to_floats(byte_blob))

Ancestors

  • collections.UserList
  • collections.abc.MutableSequence
  • collections.abc.Sequence
  • collections.abc.Reversible
  • collections.abc.Collection
  • collections.abc.Sized
  • collections.abc.Iterable
  • collections.abc.Container

Class variables

var data : list[float]
var n : int

Static methods

def from_bytes(byte_blob: bytes) ‑> DataAPIVector

Create a DataAPIVector from a binary blob, decoding its contents according to the Data API convention (including endianness).

Args

byte_blob
a binary sequence, encoding a vector of floats as specified

by the Data API convention.

Returns

a DataAPIVector corresponding to the provided blob.

Expand source code
@staticmethod
def from_bytes(byte_blob: bytes) -> DataAPIVector:
    """
    Create a DataAPIVector from a binary blob, decoding its contents according
    to the Data API convention (including endianness).

    Args:
        byte_blob: a binary sequence, encoding a vector of floats as specified
        by the Data API convention.

    Returns:
        a DataAPIVector corresponding to the provided blob.
    """

    return DataAPIVector(bytes_to_floats(byte_blob))

Methods

def to_bytes(self) ‑> bytes

Convert the vector into its binary blob (bytes) representation, according to the Data API convention (including endianness).

Returns

a bytes object, expressing the vector values in a lossless way.

Expand source code
def to_bytes(self) -> bytes:
    """
    Convert the vector into its binary blob (`bytes`) representation, according
    to the Data API convention (including endianness).

    Returns:
        a `bytes` object, expressing the vector values in a lossless way.
    """

    return floats_to_bytes(self.data, self.n)