Module `astrapy.data.info.table_descriptor.table_creation`

Expand source code

# Copyright DataStax, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

from dataclasses import dataclass
from typing import Any

from astrapy.data.info.table_descriptor.table_columns import (
    TableColumnTypeDescriptor,
    TableKeyValuedColumnTypeDescriptor,
    TablePrimaryKeyDescriptor,
    TableScalarColumnTypeDescriptor,
    TableValuedColumnTypeDescriptor,
    TableVectorColumnTypeDescriptor,
)
from astrapy.data.info.vectorize import VectorServiceOptions
from astrapy.data.utils.table_types import (
    ColumnType,
)
from astrapy.utils.parsing import _warn_residual_keys


@dataclass
class CreateTableDefinition:
    """
    A structure expressing the definition ("schema") of a table to be created through
    the Data API. This object is passed as the `definition` parameter to the database
    `create_table` method.

    See the Data API specifications for detailed specification and allowed values.

    Instances of this object can be created in three ways: using a fluent interface,
    passing a fully-formed definition to the class constructor, or coercing an
    appropriately-shaped plain dictionary into this class.

    Attributes:
        columns: a map from column names to their type definition object.
        primary_key: a specification of the primary key for the table.

    Example:
        >>> from astrapy.constants import SortMode
        >>> from astrapy.info import (
        ...     CreateTableDefinition,
        ...     TablePrimaryKeyDescriptor,
        ...     ColumnType,
        ...     TableScalarColumnTypeDescriptor,
        ...     TableValuedColumnType,
        ...     TableValuedColumnTypeDescriptor,
        ...     TableVectorColumnTypeDescriptor,
        ... )
        >>>
        >>> # Create a table definition with the fluent interface:
        >>> table_definition = (
        ...     CreateTableDefinition.builder()
        ...     .add_column("match_id", ColumnType.TEXT)
        ...     .add_column("round", ColumnType.INT)
        ...     .add_vector_column("m_vector", dimension=3)
        ...     .add_column("score", ColumnType.INT)
        ...     .add_column("when", ColumnType.TIMESTAMP)
        ...     .add_column("winner", ColumnType.TEXT)
        ...     .add_set_column("fighters", ColumnType.UUID)
        ...     .add_partition_by(["match_id"])
        ...     .add_partition_sort({"round": SortMode.ASCENDING})
        ...     .build()
        ... )
        >>>
        >>> # Create a table definition passing everything to the constructor:
        >>> table_definition_1 = CreateTableDefinition(
        ...     columns={
        ...         "match_id": TableScalarColumnTypeDescriptor(
        ...             ColumnType.TEXT,
        ...         ),
        ...         "round": TableScalarColumnTypeDescriptor(
        ...             ColumnType.INT,
        ...         ),
        ...         "m_vector": TableVectorColumnTypeDescriptor(
        ...             column_type="vector", dimension=3
        ...         ),
        ...         "score": TableScalarColumnTypeDescriptor(
        ...             ColumnType.INT,
        ...         ),
        ...         "when": TableScalarColumnTypeDescriptor(
        ...             ColumnType.TIMESTAMP,
        ...         ),
        ...         "winner": TableScalarColumnTypeDescriptor(
        ...             ColumnType.TEXT,
        ...         ),
        ...         "fighters": TableValuedColumnTypeDescriptor(
        ...             column_type=TableValuedColumnType.SET,
        ...             value_type=ColumnType.UUID,
        ...         ),
        ...     },
        ...     primary_key=TablePrimaryKeyDescriptor(
        ...         partition_by=["match_id"],
        ...         partition_sort={"round": SortMode.ASCENDING},
        ...     ),
        ... )
        >>>
        >>> # Coerce a dictionary into a table definition:
        >>> table_definition_2_dict = {
        ...     "columns": {
        ...         "match_id": {"type": "text"},
        ...         "round": {"type": "int"},
        ...         "m_vector": {"type": "vector", "dimension": 3},
        ...         "score": {"type": "int"},
        ...         "when": {"type": "timestamp"},
        ...         "winner": {"type": "text"},
        ...         "fighters": {"type": "set", "valueType": "uuid"},
        ...     },
        ...     "primaryKey": {
        ...         "partitionBy": ["match_id"],
        ...         "partitionSort": {"round": 1},
        ...     },
        ... }
        >>> table_definition_2 = CreateTableDefinition.coerce(
        ...     table_definition_2_dict
        ... )
        >>>
        >>> # The three created objects are exactly identical:
        >>> table_definition_2 == table_definition_1
        True
        >>> table_definition_2 == table_definition
        True
    """

    columns: dict[str, TableColumnTypeDescriptor]
    primary_key: TablePrimaryKeyDescriptor

    def __repr__(self) -> str:
        not_null_pieces = [
            pc
            for pc in [
                f"columns=[{','.join(self.columns.keys())}]",
                f"primary_key={self.primary_key}",
            ]
            if pc is not None
        ]
        return f"{self.__class__.__name__}({', '.join(not_null_pieces)})"

    def as_dict(self) -> dict[str, Any]:
        """Recast this object into a dictionary."""

        return {
            k: v
            for k, v in {
                "columns": {
                    col_n: col_v.as_dict() for col_n, col_v in self.columns.items()
                },
                "primaryKey": self.primary_key.as_dict(),
            }.items()
            if v is not None
        }

    @classmethod
    def _from_dict(cls, raw_dict: dict[str, Any]) -> CreateTableDefinition:
        """
        Create an instance of CreateTableDefinition from a dictionary
        such as one from the Data API.
        """

        _warn_residual_keys(cls, raw_dict, {"columns", "primaryKey"})
        return CreateTableDefinition(
            columns={
                col_n: TableColumnTypeDescriptor.coerce(col_v)
                for col_n, col_v in raw_dict["columns"].items()
            },
            primary_key=TablePrimaryKeyDescriptor.coerce(raw_dict["primaryKey"]),
        )

    @classmethod
    def coerce(
        cls, raw_input: CreateTableDefinition | dict[str, Any]
    ) -> CreateTableDefinition:
        """
        Normalize the input, whether an object already or a plain dictionary
        of the right structure, into a CreateTableDefinition.
        """

        if isinstance(raw_input, CreateTableDefinition):
            return raw_input
        else:
            return cls._from_dict(raw_input)

    @staticmethod
    def builder() -> CreateTableDefinition:
        """
        Create an "empty" builder for constructing a table definition through
        a fluent interface. The resulting object has no columns and no primary key,
        traits that are to be added progressively with the corresponding methods.

        Since it describes a "table with no columns at all", the result of
        this method alone is not an acceptable table definition for running a table
        creation method on a Database.

        See the class docstring for a full example on using the fluent interface.

        Returns:
            a CreateTableDefinition formally describing a table with no columns.
        """

        return CreateTableDefinition(
            columns={},
            primary_key=TablePrimaryKeyDescriptor(
                partition_by=[],
                partition_sort={},
            ),
        )

    def add_scalar_column(
        self, column_name: str, column_type: str | ColumnType
    ) -> CreateTableDefinition:
        """
        Return a new table definition object with an added column
        of a scalar type (i.e. not a list, set or other composite type).
        This method is for use within the fluent interface for progressively
        building a complete table definition.

        See the class docstring for a full example on using the fluent interface.

        Args:
            column_name: the name of the new column to add to the definition.
            column_type: a string, or a `ColumnType` value, defining
                the scalar type for the column.

        Returns:
            a CreateTableDefinition obtained by adding (or replacing) the desired
            column to this table definition.
        """

        return CreateTableDefinition(
            columns={
                **self.columns,
                **{
                    column_name: TableScalarColumnTypeDescriptor(
                        column_type=ColumnType.coerce(column_type)
                    )
                },
            },
            primary_key=self.primary_key,
        )

    def add_column(
        self, column_name: str, column_type: str | ColumnType
    ) -> CreateTableDefinition:
        """
        Return a new table definition object with an added column
        of a scalar type (i.e. not a list, set or other composite type).
        This method is for use within the fluent interface for progressively
        building a complete table definition.

        This method is an alias for `add_scalar_column`.

        See the class docstring for a full example on using the fluent interface.

        Args:
            column_name: the name of the new column to add to the definition.
            column_type: a string, or a `ColumnType` value, defining
                the scalar type for the column.

        Returns:
            a CreateTableDefinition obtained by adding (or replacing) the desired
            column to this table definition.
        """

        return self.add_scalar_column(column_name=column_name, column_type=column_type)

    def add_set_column(
        self, column_name: str, value_type: str | ColumnType
    ) -> CreateTableDefinition:
        """
        Return a new table definition object with an added column
        of 'set' type. This method is for use within the
        fluent interface for progressively building a complete table definition.

        See the class docstring for a full example on using the fluent interface.

        Args:
            column_name: the name of the new column to add to the definition.
            value_type: a string, or a `ColumnType` value, defining
                the data type for the items in the set.

        Returns:
            a CreateTableDefinition obtained by adding (or replacing) the desired
            column to this table definition.
        """

        return CreateTableDefinition(
            columns={
                **self.columns,
                **{
                    column_name: TableValuedColumnTypeDescriptor(
                        column_type="set", value_type=value_type
                    )
                },
            },
            primary_key=self.primary_key,
        )

    def add_list_column(
        self, column_name: str, value_type: str | ColumnType
    ) -> CreateTableDefinition:
        """
        Return a new table definition object with an added column
        of 'list' type. This method is for use within the
        fluent interface for progressively building a complete table definition.

        See the class docstring for a full example on using the fluent interface.

        Args:
            column_name: the name of the new column to add to the definition.
            value_type: a string, or a `ColumnType` value, defining
                the data type for the items in the list.

        Returns:
            a CreateTableDefinition obtained by adding (or replacing) the desired
            column to this table definition.
        """

        return CreateTableDefinition(
            columns={
                **self.columns,
                **{
                    column_name: TableValuedColumnTypeDescriptor(
                        column_type="list", value_type=value_type
                    )
                },
            },
            primary_key=self.primary_key,
        )

    def add_map_column(
        self,
        column_name: str,
        key_type: str | ColumnType,
        value_type: str | ColumnType,
    ) -> CreateTableDefinition:
        """
        Return a new table definition object with an added column
        of 'map' type. This method is for use within the
        fluent interface for progressively building a complete table definition.

        See the class docstring for a full example on using the fluent interface.

        Args:
            column_name: the name of the new column to add to the definition.
            key_type: a string, or a `ColumnType` value, defining
                the data type for the keys in the map.
            value_type: a string, or a `ColumnType` value, defining
                the data type for the values in the map.

        Returns:
            a CreateTableDefinition obtained by adding (or replacing) the desired
            column to this table definition.
        """

        return CreateTableDefinition(
            columns={
                **self.columns,
                **{
                    column_name: TableKeyValuedColumnTypeDescriptor(
                        column_type="map", key_type=key_type, value_type=value_type
                    )
                },
            },
            primary_key=self.primary_key,
        )

    def add_vector_column(
        self,
        column_name: str,
        *,
        dimension: int | None = None,
        service: VectorServiceOptions | dict[str, Any] | None = None,
    ) -> CreateTableDefinition:
        """
        Return a new table definition object with an added column
        of 'vector' type. This method is for use within the
        fluent interface for progressively building a complete table definition.

        See the class docstring for a full example on using the fluent interface.

        Args:
            column_name: the name of the new column to add to the definition.
            dimension: the dimensionality of the vector, i.e. the number of components
                each vector in this column will have. If a `service` parameter is
                supplied and the vectorize model allows for it, the dimension may be
                left unspecified to have the API set a default value.
                The Data API will raise an error if a table creation is attempted with
                a vector column for which neither a service nor the dimension are given.
            service: a `VectorServiceOptions` object, or an equivalent plain dictionary,
                defining the server-side embedding service associated to the column,
                if desired.

        Returns:
            a CreateTableDefinition obtained by adding (or replacing) the desired
            column to this table definition.
        """

        return CreateTableDefinition(
            columns={
                **self.columns,
                **{
                    column_name: TableVectorColumnTypeDescriptor(
                        column_type="vector",
                        dimension=dimension,
                        service=VectorServiceOptions.coerce(service),
                    )
                },
            },
            primary_key=self.primary_key,
        )

    def add_partition_by(
        self, partition_columns: list[str] | str
    ) -> CreateTableDefinition:
        """
        Return a new table definition object with one or more added `partition_by`
        columns. This method is for use within the
        fluent interface for progressively building a complete table definition.

        See the class docstring for a full example on using the fluent interface.

        Successive calls append the requested columns at the end of the pre-existing
        `partition_by` list. In other words, these two patterns are equivalent:
        (1) X.add_partition_by(["col1", "col2"])
        (2) X.add_partition_by(["col1"]).add_partition_by("col2")

        Note that no deduplication is applied to the overall
        result: the caller should take care of not supplying the same column name
        more than once.

        Args:
            partition_columns: a list of column names (strings) to be added to the
                full table partition key. A single string (not a list) is also accepted.

        Returns:
            a CreateTableDefinition obtained by enriching the `partition_by`
            of this table definition as requested.
        """

        _partition_columns = (
            partition_columns
            if isinstance(partition_columns, list)
            else [partition_columns]
        )

        return CreateTableDefinition(
            columns=self.columns,
            primary_key=TablePrimaryKeyDescriptor(
                partition_by=self.primary_key.partition_by + _partition_columns,
                partition_sort=self.primary_key.partition_sort,
            ),
        )

    def add_partition_sort(
        self, partition_sort: dict[str, int]
    ) -> CreateTableDefinition:
        """
        Return a new table definition object with one or more added `partition_sort`
        column specifications. This method is for use within the
        fluent interface for progressively building a complete table definition.

        See the class docstring for a full example on using the fluent interface.

        Successive calls append (or replace) the requested columns at the end of
        the pre-existing `partition_sort` dictionary. In other words, these two
        patterns are equivalent:
        (1) X.add_partition_sort({"c1": 1, "c2": -1})
        (2) X.add_partition_sort({"c1": 1}).add_partition_sort({"c2": -1})

        Args:
            partition_sort: a dictoinary mapping column names to their sort mode
            (ascending/descending, i.e 1/-1. See also `astrapy.constants.SortMode`).

        Returns:
            a CreateTableDefinition obtained by enriching the `partition_sort`
            of this table definition as requested.
        """

        return CreateTableDefinition(
            columns=self.columns,
            primary_key=TablePrimaryKeyDescriptor(
                partition_by=self.primary_key.partition_by,
                partition_sort={**self.primary_key.partition_sort, **partition_sort},
            ),
        )

    def build(self) -> CreateTableDefinition:
        """
        The final step in the fluent (builder) interface. Calling this method
        finalizes the definition that has been built so far and makes it into a
        table definition ready for use in e.g. table creation.

        Note that this step may be automatically invoked by the receiving methods:
        however it is a good practice - and also adds to the readability of the code -
        to call it explicitly.

        See the class docstring for a full example on using the fluent interface.

        Returns:
            a CreateTableDefinition obtained by finalizing the definition being
                built so far.
        """

        return self

Classes

class CreateTableDefinition (columns: dict[str, TableColumnTypeDescriptor], primary_key: TablePrimaryKeyDescriptor)

A structure expressing the definition ("schema") of a table to be created through the Data API. This object is passed as the definition parameter to the database create_table method.

See the Data API specifications for detailed specification and allowed values.

Instances of this object can be created in three ways: using a fluent interface, passing a fully-formed definition to the class constructor, or coercing an appropriately-shaped plain dictionary into this class.

Attributes

columns: a map from column names to their type definition object.
primary_key: a specification of the primary key for the table.

Example

>>> from astrapy.constants import SortMode
>>> from astrapy.info import (
...     CreateTableDefinition,
...     TablePrimaryKeyDescriptor,
...     ColumnType,
...     TableScalarColumnTypeDescriptor,
...     TableValuedColumnType,
...     TableValuedColumnTypeDescriptor,
...     TableVectorColumnTypeDescriptor,
... )
>>>
>>> # Create a table definition with the fluent interface:
>>> table_definition = (
...     CreateTableDefinition.builder()
...     .add_column("match_id", ColumnType.TEXT)
...     .add_column("round", ColumnType.INT)
...     .add_vector_column("m_vector", dimension=3)
...     .add_column("score", ColumnType.INT)
...     .add_column("when", ColumnType.TIMESTAMP)
...     .add_column("winner", ColumnType.TEXT)
...     .add_set_column("fighters", ColumnType.UUID)
...     .add_partition_by(["match_id"])
...     .add_partition_sort({"round": SortMode.ASCENDING})
...     .build()
... )
>>>
>>> # Create a table definition passing everything to the constructor:
>>> table_definition_1 = CreateTableDefinition(
...     columns={
...         "match_id": TableScalarColumnTypeDescriptor(
...             ColumnType.TEXT,
...         ),
...         "round": TableScalarColumnTypeDescriptor(
...             ColumnType.INT,
...         ),
...         "m_vector": TableVectorColumnTypeDescriptor(
...             column_type="vector", dimension=3
...         ),
...         "score": TableScalarColumnTypeDescriptor(
...             ColumnType.INT,
...         ),
...         "when": TableScalarColumnTypeDescriptor(
...             ColumnType.TIMESTAMP,
...         ),
...         "winner": TableScalarColumnTypeDescriptor(
...             ColumnType.TEXT,
...         ),
...         "fighters": TableValuedColumnTypeDescriptor(
...             column_type=TableValuedColumnType.SET,
...             value_type=ColumnType.UUID,
...         ),
...     },
...     primary_key=TablePrimaryKeyDescriptor(
...         partition_by=["match_id"],
...         partition_sort={"round": SortMode.ASCENDING},
...     ),
... )
>>>
>>> # Coerce a dictionary into a table definition:
>>> table_definition_2_dict = {
...     "columns": {
...         "match_id": {"type": "text"},
...         "round": {"type": "int"},
...         "m_vector": {"type": "vector", "dimension": 3},
...         "score": {"type": "int"},
...         "when": {"type": "timestamp"},
...         "winner": {"type": "text"},
...         "fighters": {"type": "set", "valueType": "uuid"},
...     },
...     "primaryKey": {
...         "partitionBy": ["match_id"],
...         "partitionSort": {"round": 1},
...     },
... }
>>> table_definition_2 = CreateTableDefinition.coerce(
...     table_definition_2_dict
... )
>>>
>>> # The three created objects are exactly identical:
>>> table_definition_2 == table_definition_1
True
>>> table_definition_2 == table_definition
True

Expand source code

@dataclass
class CreateTableDefinition:
    """
    A structure expressing the definition ("schema") of a table to be created through
    the Data API. This object is passed as the `definition` parameter to the database
    `create_table` method.

    See the Data API specifications for detailed specification and allowed values.

    Instances of this object can be created in three ways: using a fluent interface,
    passing a fully-formed definition to the class constructor, or coercing an
    appropriately-shaped plain dictionary into this class.

    Attributes:
        columns: a map from column names to their type definition object.
        primary_key: a specification of the primary key for the table.

    Example:
        >>> from astrapy.constants import SortMode
        >>> from astrapy.info import (
        ...     CreateTableDefinition,
        ...     TablePrimaryKeyDescriptor,
        ...     ColumnType,
        ...     TableScalarColumnTypeDescriptor,
        ...     TableValuedColumnType,
        ...     TableValuedColumnTypeDescriptor,
        ...     TableVectorColumnTypeDescriptor,
        ... )
        >>>
        >>> # Create a table definition with the fluent interface:
        >>> table_definition = (
        ...     CreateTableDefinition.builder()
        ...     .add_column("match_id", ColumnType.TEXT)
        ...     .add_column("round", ColumnType.INT)
        ...     .add_vector_column("m_vector", dimension=3)
        ...     .add_column("score", ColumnType.INT)
        ...     .add_column("when", ColumnType.TIMESTAMP)
        ...     .add_column("winner", ColumnType.TEXT)
        ...     .add_set_column("fighters", ColumnType.UUID)
        ...     .add_partition_by(["match_id"])
        ...     .add_partition_sort({"round": SortMode.ASCENDING})
        ...     .build()
        ... )
        >>>
        >>> # Create a table definition passing everything to the constructor:
        >>> table_definition_1 = CreateTableDefinition(
        ...     columns={
        ...         "match_id": TableScalarColumnTypeDescriptor(
        ...             ColumnType.TEXT,
        ...         ),
        ...         "round": TableScalarColumnTypeDescriptor(
        ...             ColumnType.INT,
        ...         ),
        ...         "m_vector": TableVectorColumnTypeDescriptor(
        ...             column_type="vector", dimension=3
        ...         ),
        ...         "score": TableScalarColumnTypeDescriptor(
        ...             ColumnType.INT,
        ...         ),
        ...         "when": TableScalarColumnTypeDescriptor(
        ...             ColumnType.TIMESTAMP,
        ...         ),
        ...         "winner": TableScalarColumnTypeDescriptor(
        ...             ColumnType.TEXT,
        ...         ),
        ...         "fighters": TableValuedColumnTypeDescriptor(
        ...             column_type=TableValuedColumnType.SET,
        ...             value_type=ColumnType.UUID,
        ...         ),
        ...     },
        ...     primary_key=TablePrimaryKeyDescriptor(
        ...         partition_by=["match_id"],
        ...         partition_sort={"round": SortMode.ASCENDING},
        ...     ),
        ... )
        >>>
        >>> # Coerce a dictionary into a table definition:
        >>> table_definition_2_dict = {
        ...     "columns": {
        ...         "match_id": {"type": "text"},
        ...         "round": {"type": "int"},
        ...         "m_vector": {"type": "vector", "dimension": 3},
        ...         "score": {"type": "int"},
        ...         "when": {"type": "timestamp"},
        ...         "winner": {"type": "text"},
        ...         "fighters": {"type": "set", "valueType": "uuid"},
        ...     },
        ...     "primaryKey": {
        ...         "partitionBy": ["match_id"],
        ...         "partitionSort": {"round": 1},
        ...     },
        ... }
        >>> table_definition_2 = CreateTableDefinition.coerce(
        ...     table_definition_2_dict
        ... )
        >>>
        >>> # The three created objects are exactly identical:
        >>> table_definition_2 == table_definition_1
        True
        >>> table_definition_2 == table_definition
        True
    """

    columns: dict[str, TableColumnTypeDescriptor]
    primary_key: TablePrimaryKeyDescriptor

    def __repr__(self) -> str:
        not_null_pieces = [
            pc
            for pc in [
                f"columns=[{','.join(self.columns.keys())}]",
                f"primary_key={self.primary_key}",
            ]
            if pc is not None
        ]
        return f"{self.__class__.__name__}({', '.join(not_null_pieces)})"

    def as_dict(self) -> dict[str, Any]:
        """Recast this object into a dictionary."""

        return {
            k: v
            for k, v in {
                "columns": {
                    col_n: col_v.as_dict() for col_n, col_v in self.columns.items()
                },
                "primaryKey": self.primary_key.as_dict(),
            }.items()
            if v is not None
        }

    @classmethod
    def _from_dict(cls, raw_dict: dict[str, Any]) -> CreateTableDefinition:
        """
        Create an instance of CreateTableDefinition from a dictionary
        such as one from the Data API.
        """

        _warn_residual_keys(cls, raw_dict, {"columns", "primaryKey"})
        return CreateTableDefinition(
            columns={
                col_n: TableColumnTypeDescriptor.coerce(col_v)
                for col_n, col_v in raw_dict["columns"].items()
            },
            primary_key=TablePrimaryKeyDescriptor.coerce(raw_dict["primaryKey"]),
        )

    @classmethod
    def coerce(
        cls, raw_input: CreateTableDefinition | dict[str, Any]
    ) -> CreateTableDefinition:
        """
        Normalize the input, whether an object already or a plain dictionary
        of the right structure, into a CreateTableDefinition.
        """

        if isinstance(raw_input, CreateTableDefinition):
            return raw_input
        else:
            return cls._from_dict(raw_input)

    @staticmethod
    def builder() -> CreateTableDefinition:
        """
        Create an "empty" builder for constructing a table definition through
        a fluent interface. The resulting object has no columns and no primary key,
        traits that are to be added progressively with the corresponding methods.

        Since it describes a "table with no columns at all", the result of
        this method alone is not an acceptable table definition for running a table
        creation method on a Database.

        See the class docstring for a full example on using the fluent interface.

        Returns:
            a CreateTableDefinition formally describing a table with no columns.
        """

        return CreateTableDefinition(
            columns={},
            primary_key=TablePrimaryKeyDescriptor(
                partition_by=[],
                partition_sort={},
            ),
        )

    def add_scalar_column(
        self, column_name: str, column_type: str | ColumnType
    ) -> CreateTableDefinition:
        """
        Return a new table definition object with an added column
        of a scalar type (i.e. not a list, set or other composite type).
        This method is for use within the fluent interface for progressively
        building a complete table definition.

        See the class docstring for a full example on using the fluent interface.

        Args:
            column_name: the name of the new column to add to the definition.
            column_type: a string, or a `ColumnType` value, defining
                the scalar type for the column.

        Returns:
            a CreateTableDefinition obtained by adding (or replacing) the desired
            column to this table definition.
        """

        return CreateTableDefinition(
            columns={
                **self.columns,
                **{
                    column_name: TableScalarColumnTypeDescriptor(
                        column_type=ColumnType.coerce(column_type)
                    )
                },
            },
            primary_key=self.primary_key,
        )

    def add_column(
        self, column_name: str, column_type: str | ColumnType
    ) -> CreateTableDefinition:
        """
        Return a new table definition object with an added column
        of a scalar type (i.e. not a list, set or other composite type).
        This method is for use within the fluent interface for progressively
        building a complete table definition.

        This method is an alias for `add_scalar_column`.

        See the class docstring for a full example on using the fluent interface.

        Args:
            column_name: the name of the new column to add to the definition.
            column_type: a string, or a `ColumnType` value, defining
                the scalar type for the column.

        Returns:
            a CreateTableDefinition obtained by adding (or replacing) the desired
            column to this table definition.
        """

        return self.add_scalar_column(column_name=column_name, column_type=column_type)

    def add_set_column(
        self, column_name: str, value_type: str | ColumnType
    ) -> CreateTableDefinition:
        """
        Return a new table definition object with an added column
        of 'set' type. This method is for use within the
        fluent interface for progressively building a complete table definition.

        See the class docstring for a full example on using the fluent interface.

        Args:
            column_name: the name of the new column to add to the definition.
            value_type: a string, or a `ColumnType` value, defining
                the data type for the items in the set.

        Returns:
            a CreateTableDefinition obtained by adding (or replacing) the desired
            column to this table definition.
        """

        return CreateTableDefinition(
            columns={
                **self.columns,
                **{
                    column_name: TableValuedColumnTypeDescriptor(
                        column_type="set", value_type=value_type
                    )
                },
            },
            primary_key=self.primary_key,
        )

    def add_list_column(
        self, column_name: str, value_type: str | ColumnType
    ) -> CreateTableDefinition:
        """
        Return a new table definition object with an added column
        of 'list' type. This method is for use within the
        fluent interface for progressively building a complete table definition.

        See the class docstring for a full example on using the fluent interface.

        Args:
            column_name: the name of the new column to add to the definition.
            value_type: a string, or a `ColumnType` value, defining
                the data type for the items in the list.

        Returns:
            a CreateTableDefinition obtained by adding (or replacing) the desired
            column to this table definition.
        """

        return CreateTableDefinition(
            columns={
                **self.columns,
                **{
                    column_name: TableValuedColumnTypeDescriptor(
                        column_type="list", value_type=value_type
                    )
                },
            },
            primary_key=self.primary_key,
        )

    def add_map_column(
        self,
        column_name: str,
        key_type: str | ColumnType,
        value_type: str | ColumnType,
    ) -> CreateTableDefinition:
        """
        Return a new table definition object with an added column
        of 'map' type. This method is for use within the
        fluent interface for progressively building a complete table definition.

        See the class docstring for a full example on using the fluent interface.

        Args:
            column_name: the name of the new column to add to the definition.
            key_type: a string, or a `ColumnType` value, defining
                the data type for the keys in the map.
            value_type: a string, or a `ColumnType` value, defining
                the data type for the values in the map.

        Returns:
            a CreateTableDefinition obtained by adding (or replacing) the desired
            column to this table definition.
        """

        return CreateTableDefinition(
            columns={
                **self.columns,
                **{
                    column_name: TableKeyValuedColumnTypeDescriptor(
                        column_type="map", key_type=key_type, value_type=value_type
                    )
                },
            },
            primary_key=self.primary_key,
        )

    def add_vector_column(
        self,
        column_name: str,
        *,
        dimension: int | None = None,
        service: VectorServiceOptions | dict[str, Any] | None = None,
    ) -> CreateTableDefinition:
        """
        Return a new table definition object with an added column
        of 'vector' type. This method is for use within the
        fluent interface for progressively building a complete table definition.

        See the class docstring for a full example on using the fluent interface.

        Args:
            column_name: the name of the new column to add to the definition.
            dimension: the dimensionality of the vector, i.e. the number of components
                each vector in this column will have. If a `service` parameter is
                supplied and the vectorize model allows for it, the dimension may be
                left unspecified to have the API set a default value.
                The Data API will raise an error if a table creation is attempted with
                a vector column for which neither a service nor the dimension are given.
            service: a `VectorServiceOptions` object, or an equivalent plain dictionary,
                defining the server-side embedding service associated to the column,
                if desired.

        Returns:
            a CreateTableDefinition obtained by adding (or replacing) the desired
            column to this table definition.
        """

        return CreateTableDefinition(
            columns={
                **self.columns,
                **{
                    column_name: TableVectorColumnTypeDescriptor(
                        column_type="vector",
                        dimension=dimension,
                        service=VectorServiceOptions.coerce(service),
                    )
                },
            },
            primary_key=self.primary_key,
        )

    def add_partition_by(
        self, partition_columns: list[str] | str
    ) -> CreateTableDefinition:
        """
        Return a new table definition object with one or more added `partition_by`
        columns. This method is for use within the
        fluent interface for progressively building a complete table definition.

        See the class docstring for a full example on using the fluent interface.

        Successive calls append the requested columns at the end of the pre-existing
        `partition_by` list. In other words, these two patterns are equivalent:
        (1) X.add_partition_by(["col1", "col2"])
        (2) X.add_partition_by(["col1"]).add_partition_by("col2")

        Note that no deduplication is applied to the overall
        result: the caller should take care of not supplying the same column name
        more than once.

        Args:
            partition_columns: a list of column names (strings) to be added to the
                full table partition key. A single string (not a list) is also accepted.

        Returns:
            a CreateTableDefinition obtained by enriching the `partition_by`
            of this table definition as requested.
        """

        _partition_columns = (
            partition_columns
            if isinstance(partition_columns, list)
            else [partition_columns]
        )

        return CreateTableDefinition(
            columns=self.columns,
            primary_key=TablePrimaryKeyDescriptor(
                partition_by=self.primary_key.partition_by + _partition_columns,
                partition_sort=self.primary_key.partition_sort,
            ),
        )

    def add_partition_sort(
        self, partition_sort: dict[str, int]
    ) -> CreateTableDefinition:
        """
        Return a new table definition object with one or more added `partition_sort`
        column specifications. This method is for use within the
        fluent interface for progressively building a complete table definition.

        See the class docstring for a full example on using the fluent interface.

        Successive calls append (or replace) the requested columns at the end of
        the pre-existing `partition_sort` dictionary. In other words, these two
        patterns are equivalent:
        (1) X.add_partition_sort({"c1": 1, "c2": -1})
        (2) X.add_partition_sort({"c1": 1}).add_partition_sort({"c2": -1})

        Args:
            partition_sort: a dictoinary mapping column names to their sort mode
            (ascending/descending, i.e 1/-1. See also `astrapy.constants.SortMode`).

        Returns:
            a CreateTableDefinition obtained by enriching the `partition_sort`
            of this table definition as requested.
        """

        return CreateTableDefinition(
            columns=self.columns,
            primary_key=TablePrimaryKeyDescriptor(
                partition_by=self.primary_key.partition_by,
                partition_sort={**self.primary_key.partition_sort, **partition_sort},
            ),
        )

    def build(self) -> CreateTableDefinition:
        """
        The final step in the fluent (builder) interface. Calling this method
        finalizes the definition that has been built so far and makes it into a
        table definition ready for use in e.g. table creation.

        Note that this step may be automatically invoked by the receiving methods:
        however it is a good practice - and also adds to the readability of the code -
        to call it explicitly.

        See the class docstring for a full example on using the fluent interface.

        Returns:
            a CreateTableDefinition obtained by finalizing the definition being
                built so far.
        """

        return self

Class variables

var columns : dict[str, TableColumnTypeDescriptor]
var primary_key : TablePrimaryKeyDescriptor

Static methods

def builder() ‑> CreateTableDefinition

Create an "empty" builder for constructing a table definition through a fluent interface. The resulting object has no columns and no primary key, traits that are to be added progressively with the corresponding methods.

Since it describes a "table with no columns at all", the result of this method alone is not an acceptable table definition for running a table creation method on a Database.

See the class docstring for a full example on using the fluent interface.

Returns

a CreateTableDefinition formally describing a table with no columns.

Expand source code

@staticmethod
def builder() -> CreateTableDefinition:
    """
    Create an "empty" builder for constructing a table definition through
    a fluent interface. The resulting object has no columns and no primary key,
    traits that are to be added progressively with the corresponding methods.

    Since it describes a "table with no columns at all", the result of
    this method alone is not an acceptable table definition for running a table
    creation method on a Database.

    See the class docstring for a full example on using the fluent interface.

    Returns:
        a CreateTableDefinition formally describing a table with no columns.
    """

    return CreateTableDefinition(
        columns={},
        primary_key=TablePrimaryKeyDescriptor(
            partition_by=[],
            partition_sort={},
        ),
    )

def coerce(raw_input: CreateTableDefinition | dict[str, Any]) ‑> CreateTableDefinition

Normalize the input, whether an object already or a plain dictionary of the right structure, into a CreateTableDefinition.

Expand source code

@classmethod
def coerce(
    cls, raw_input: CreateTableDefinition | dict[str, Any]
) -> CreateTableDefinition:
    """
    Normalize the input, whether an object already or a plain dictionary
    of the right structure, into a CreateTableDefinition.
    """

    if isinstance(raw_input, CreateTableDefinition):
        return raw_input
    else:
        return cls._from_dict(raw_input)

Methods

def add_column(self, column_name: str, column_type: str | ColumnType) ‑> CreateTableDefinition

Return a new table definition object with an added column of a scalar type (i.e. not a list, set or other composite type). This method is for use within the fluent interface for progressively building a complete table definition.

This method is an alias for add_scalar_column.

See the class docstring for a full example on using the fluent interface.

Args

column_name: the name of the new column to add to the definition.
column_type: a string, or a ColumnType value, defining the scalar type for the column.

Returns

a CreateTableDefinition obtained by adding (or replacing) the desired column to this table definition.

Expand source code

def add_column(
    self, column_name: str, column_type: str | ColumnType
) -> CreateTableDefinition:
    """
    Return a new table definition object with an added column
    of a scalar type (i.e. not a list, set or other composite type).
    This method is for use within the fluent interface for progressively
    building a complete table definition.

    This method is an alias for `add_scalar_column`.

    See the class docstring for a full example on using the fluent interface.

    Args:
        column_name: the name of the new column to add to the definition.
        column_type: a string, or a `ColumnType` value, defining
            the scalar type for the column.

    Returns:
        a CreateTableDefinition obtained by adding (or replacing) the desired
        column to this table definition.
    """

    return self.add_scalar_column(column_name=column_name, column_type=column_type)

def add_list_column(self, column_name: str, value_type: str | ColumnType) ‑> CreateTableDefinition

Return a new table definition object with an added column of 'list' type. This method is for use within the fluent interface for progressively building a complete table definition.

See the class docstring for a full example on using the fluent interface.

Args

column_name: the name of the new column to add to the definition.
value_type: a string, or a ColumnType value, defining the data type for the items in the list.

Returns

a CreateTableDefinition obtained by adding (or replacing) the desired column to this table definition.

Expand source code

def add_list_column(
    self, column_name: str, value_type: str | ColumnType
) -> CreateTableDefinition:
    """
    Return a new table definition object with an added column
    of 'list' type. This method is for use within the
    fluent interface for progressively building a complete table definition.

    See the class docstring for a full example on using the fluent interface.

    Args:
        column_name: the name of the new column to add to the definition.
        value_type: a string, or a `ColumnType` value, defining
            the data type for the items in the list.

    Returns:
        a CreateTableDefinition obtained by adding (or replacing) the desired
        column to this table definition.
    """

    return CreateTableDefinition(
        columns={
            **self.columns,
            **{
                column_name: TableValuedColumnTypeDescriptor(
                    column_type="list", value_type=value_type
                )
            },
        },
        primary_key=self.primary_key,
    )

def add_map_column(self, column_name: str, key_type: str | ColumnType, value_type: str | ColumnType) ‑> CreateTableDefinition

Return a new table definition object with an added column of 'map' type. This method is for use within the fluent interface for progressively building a complete table definition.

See the class docstring for a full example on using the fluent interface.

Args

column_name: the name of the new column to add to the definition.
key_type: a string, or a ColumnType value, defining the data type for the keys in the map.
value_type: a string, or a ColumnType value, defining the data type for the values in the map.

Returns

a CreateTableDefinition obtained by adding (or replacing) the desired column to this table definition.

Expand source code

def add_map_column(
    self,
    column_name: str,
    key_type: str | ColumnType,
    value_type: str | ColumnType,
) -> CreateTableDefinition:
    """
    Return a new table definition object with an added column
    of 'map' type. This method is for use within the
    fluent interface for progressively building a complete table definition.

    See the class docstring for a full example on using the fluent interface.

    Args:
        column_name: the name of the new column to add to the definition.
        key_type: a string, or a `ColumnType` value, defining
            the data type for the keys in the map.
        value_type: a string, or a `ColumnType` value, defining
            the data type for the values in the map.

    Returns:
        a CreateTableDefinition obtained by adding (or replacing) the desired
        column to this table definition.
    """

    return CreateTableDefinition(
        columns={
            **self.columns,
            **{
                column_name: TableKeyValuedColumnTypeDescriptor(
                    column_type="map", key_type=key_type, value_type=value_type
                )
            },
        },
        primary_key=self.primary_key,
    )

def add_partition_by(self, partition_columns: list[str] | str) ‑> CreateTableDefinition

Return a new table definition object with one or more added partition_by columns. This method is for use within the fluent interface for progressively building a complete table definition.

See the class docstring for a full example on using the fluent interface.

Successive calls append the requested columns at the end of the pre-existing partition_by list. In other words, these two patterns are equivalent: (1) X.add_partition_by(["col1", "col2"]) (2) X.add_partition_by(["col1"]).add_partition_by("col2")

Note that no deduplication is applied to the overall result: the caller should take care of not supplying the same column name more than once.

Args

partition_columns: a list of column names (strings) to be added to the full table partition key. A single string (not a list) is also accepted.

Returns

a CreateTableDefinition obtained by enriching the partition_by of this table definition as requested.

Expand source code

def add_partition_by(
    self, partition_columns: list[str] | str
) -> CreateTableDefinition:
    """
    Return a new table definition object with one or more added `partition_by`
    columns. This method is for use within the
    fluent interface for progressively building a complete table definition.

    See the class docstring for a full example on using the fluent interface.

    Successive calls append the requested columns at the end of the pre-existing
    `partition_by` list. In other words, these two patterns are equivalent:
    (1) X.add_partition_by(["col1", "col2"])
    (2) X.add_partition_by(["col1"]).add_partition_by("col2")

    Note that no deduplication is applied to the overall
    result: the caller should take care of not supplying the same column name
    more than once.

    Args:
        partition_columns: a list of column names (strings) to be added to the
            full table partition key. A single string (not a list) is also accepted.

    Returns:
        a CreateTableDefinition obtained by enriching the `partition_by`
        of this table definition as requested.
    """

    _partition_columns = (
        partition_columns
        if isinstance(partition_columns, list)
        else [partition_columns]
    )

    return CreateTableDefinition(
        columns=self.columns,
        primary_key=TablePrimaryKeyDescriptor(
            partition_by=self.primary_key.partition_by + _partition_columns,
            partition_sort=self.primary_key.partition_sort,
        ),
    )

def add_partition_sort(self, partition_sort: dict[str, int]) ‑> CreateTableDefinition

Return a new table definition object with one or more added partition_sort column specifications. This method is for use within the fluent interface for progressively building a complete table definition.

See the class docstring for a full example on using the fluent interface.

Successive calls append (or replace) the requested columns at the end of the pre-existing partition_sort dictionary. In other words, these two patterns are equivalent: (1) X.add_partition_sort({"c1": 1, "c2": -1}) (2) X.add_partition_sort({"c1": 1}).add_partition_sort({"c2": -1})

Args

partition_sort: a dictoinary mapping column names to their sort mode

(ascending/descending, i.e 1/-1. See also SortMode).

Returns

a CreateTableDefinition obtained by enriching the partition_sort of this table definition as requested.

Expand source code

def add_partition_sort(
    self, partition_sort: dict[str, int]
) -> CreateTableDefinition:
    """
    Return a new table definition object with one or more added `partition_sort`
    column specifications. This method is for use within the
    fluent interface for progressively building a complete table definition.

    See the class docstring for a full example on using the fluent interface.

    Successive calls append (or replace) the requested columns at the end of
    the pre-existing `partition_sort` dictionary. In other words, these two
    patterns are equivalent:
    (1) X.add_partition_sort({"c1": 1, "c2": -1})
    (2) X.add_partition_sort({"c1": 1}).add_partition_sort({"c2": -1})

    Args:
        partition_sort: a dictoinary mapping column names to their sort mode
        (ascending/descending, i.e 1/-1. See also `astrapy.constants.SortMode`).

    Returns:
        a CreateTableDefinition obtained by enriching the `partition_sort`
        of this table definition as requested.
    """

    return CreateTableDefinition(
        columns=self.columns,
        primary_key=TablePrimaryKeyDescriptor(
            partition_by=self.primary_key.partition_by,
            partition_sort={**self.primary_key.partition_sort, **partition_sort},
        ),
    )

def add_scalar_column(self, column_name: str, column_type: str | ColumnType) ‑> CreateTableDefinition

See the class docstring for a full example on using the fluent interface.

Args

column_name: the name of the new column to add to the definition.
column_type: a string, or a ColumnType value, defining the scalar type for the column.

Returns

a CreateTableDefinition obtained by adding (or replacing) the desired column to this table definition.

Expand source code

def add_scalar_column(
    self, column_name: str, column_type: str | ColumnType
) -> CreateTableDefinition:
    """
    Return a new table definition object with an added column
    of a scalar type (i.e. not a list, set or other composite type).
    This method is for use within the fluent interface for progressively
    building a complete table definition.

    See the class docstring for a full example on using the fluent interface.

    Args:
        column_name: the name of the new column to add to the definition.
        column_type: a string, or a `ColumnType` value, defining
            the scalar type for the column.

    Returns:
        a CreateTableDefinition obtained by adding (or replacing) the desired
        column to this table definition.
    """

    return CreateTableDefinition(
        columns={
            **self.columns,
            **{
                column_name: TableScalarColumnTypeDescriptor(
                    column_type=ColumnType.coerce(column_type)
                )
            },
        },
        primary_key=self.primary_key,
    )

def add_set_column(self, column_name: str, value_type: str | ColumnType) ‑> CreateTableDefinition

Return a new table definition object with an added column of 'set' type. This method is for use within the fluent interface for progressively building a complete table definition.

See the class docstring for a full example on using the fluent interface.

Args

column_name: the name of the new column to add to the definition.
value_type: a string, or a ColumnType value, defining the data type for the items in the set.

Returns

a CreateTableDefinition obtained by adding (or replacing) the desired column to this table definition.

Expand source code

def add_set_column(
    self, column_name: str, value_type: str | ColumnType
) -> CreateTableDefinition:
    """
    Return a new table definition object with an added column
    of 'set' type. This method is for use within the
    fluent interface for progressively building a complete table definition.

    See the class docstring for a full example on using the fluent interface.

    Args:
        column_name: the name of the new column to add to the definition.
        value_type: a string, or a `ColumnType` value, defining
            the data type for the items in the set.

    Returns:
        a CreateTableDefinition obtained by adding (or replacing) the desired
        column to this table definition.
    """

    return CreateTableDefinition(
        columns={
            **self.columns,
            **{
                column_name: TableValuedColumnTypeDescriptor(
                    column_type="set", value_type=value_type
                )
            },
        },
        primary_key=self.primary_key,
    )

def add_vector_column(self, column_name: str, *, dimension: int | None = None, service: VectorServiceOptions | dict[str, Any] | None = None) ‑> CreateTableDefinition

Return a new table definition object with an added column of 'vector' type. This method is for use within the fluent interface for progressively building a complete table definition.

See the class docstring for a full example on using the fluent interface.

Args

column_name: the name of the new column to add to the definition.
dimension: the dimensionality of the vector, i.e. the number of components each vector in this column will have. If a service parameter is supplied and the vectorize model allows for it, the dimension may be left unspecified to have the API set a default value. The Data API will raise an error if a table creation is attempted with a vector column for which neither a service nor the dimension are given.
service: a VectorServiceOptions object, or an equivalent plain dictionary, defining the server-side embedding service associated to the column, if desired.

Returns

a CreateTableDefinition obtained by adding (or replacing) the desired column to this table definition.

Expand source code

def add_vector_column(
    self,
    column_name: str,
    *,
    dimension: int | None = None,
    service: VectorServiceOptions | dict[str, Any] | None = None,
) -> CreateTableDefinition:
    """
    Return a new table definition object with an added column
    of 'vector' type. This method is for use within the
    fluent interface for progressively building a complete table definition.

    See the class docstring for a full example on using the fluent interface.

    Args:
        column_name: the name of the new column to add to the definition.
        dimension: the dimensionality of the vector, i.e. the number of components
            each vector in this column will have. If a `service` parameter is
            supplied and the vectorize model allows for it, the dimension may be
            left unspecified to have the API set a default value.
            The Data API will raise an error if a table creation is attempted with
            a vector column for which neither a service nor the dimension are given.
        service: a `VectorServiceOptions` object, or an equivalent plain dictionary,
            defining the server-side embedding service associated to the column,
            if desired.

    Returns:
        a CreateTableDefinition obtained by adding (or replacing) the desired
        column to this table definition.
    """

    return CreateTableDefinition(
        columns={
            **self.columns,
            **{
                column_name: TableVectorColumnTypeDescriptor(
                    column_type="vector",
                    dimension=dimension,
                    service=VectorServiceOptions.coerce(service),
                )
            },
        },
        primary_key=self.primary_key,
    )

def as_dict(self) ‑> dict[str, typing.Any]

Recast this object into a dictionary.

Expand source code

def as_dict(self) -> dict[str, Any]:
    """Recast this object into a dictionary."""

    return {
        k: v
        for k, v in {
            "columns": {
                col_n: col_v.as_dict() for col_n, col_v in self.columns.items()
            },
            "primaryKey": self.primary_key.as_dict(),
        }.items()
        if v is not None
    }

def build(self) ‑> CreateTableDefinition

The final step in the fluent (builder) interface. Calling this method finalizes the definition that has been built so far and makes it into a table definition ready for use in e.g. table creation.

Note that this step may be automatically invoked by the receiving methods: however it is a good practice - and also adds to the readability of the code - to call it explicitly.

See the class docstring for a full example on using the fluent interface.

Returns

a CreateTableDefinition obtained by finalizing the definition being built so far.

Expand source code

def build(self) -> CreateTableDefinition:
    """
    The final step in the fluent (builder) interface. Calling this method
    finalizes the definition that has been built so far and makes it into a
    table definition ready for use in e.g. table creation.

    Note that this step may be automatically invoked by the receiving methods:
    however it is a good practice - and also adds to the readability of the code -
    to call it explicitly.

    See the class docstring for a full example on using the fluent interface.

    Returns:
        a CreateTableDefinition obtained by finalizing the definition being
            built so far.
    """

    return self