Module astrapy.data.info.table_descriptor.table_creation

Expand source code
# Copyright DataStax, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

from dataclasses import dataclass
from typing import Any

from astrapy.data.info.table_descriptor.table_columns import (
    TableColumnTypeDescriptor,
    TableKeyValuedColumnTypeDescriptor,
    TablePrimaryKeyDescriptor,
    TableScalarColumnTypeDescriptor,
    TableValuedColumnTypeDescriptor,
    TableVectorColumnTypeDescriptor,
)
from astrapy.data.info.vectorize import VectorServiceOptions
from astrapy.data.utils.table_types import (
    ColumnType,
)
from astrapy.utils.parsing import _warn_residual_keys


@dataclass
class CreateTableDefinition:
    """
    A structure expressing the definition ("schema") of a table to be created through
    the Data API. This object is passed as the `definition` parameter to the database
    `create_table` method.

    See the Data API specifications for detailed specification and allowed values.

    Instances of this object can be created in three ways: using a fluent interface,
    passing a fully-formed definition to the class constructor, or coercing an
    appropriately-shaped plain dictionary into this class.

    Attributes:
        columns: a map from column names to their type definition object.
        primary_key: a specification of the primary key for the table.

    Example:
        >>> from astrapy.constants import SortMode
        >>> from astrapy.info import (
        ...     CreateTableDefinition,
        ...     TablePrimaryKeyDescriptor,
        ...     ColumnType,
        ...     TableScalarColumnTypeDescriptor,
        ...     TableValuedColumnType,
        ...     TableValuedColumnTypeDescriptor,
        ...     TableVectorColumnTypeDescriptor,
        ... )
        >>>
        >>> # Create a table definition with the fluent interface:
        >>> table_definition = (
        ...     CreateTableDefinition.builder()
        ...     .add_column("match_id", ColumnType.TEXT)
        ...     .add_column("round", ColumnType.INT)
        ...     .add_vector_column("m_vector", dimension=3)
        ...     .add_column("score", ColumnType.INT)
        ...     .add_column("when", ColumnType.TIMESTAMP)
        ...     .add_column("winner", ColumnType.TEXT)
        ...     .add_set_column("fighters", ColumnType.UUID)
        ...     .add_partition_by(["match_id"])
        ...     .add_partition_sort({"round": SortMode.ASCENDING})
        ...     .build()
        ... )
        >>>
        >>> # Create a table definition passing everything to the constructor:
        >>> table_definition_1 = CreateTableDefinition(
        ...     columns={
        ...         "match_id": TableScalarColumnTypeDescriptor(
        ...             ColumnType.TEXT,
        ...         ),
        ...         "round": TableScalarColumnTypeDescriptor(
        ...             ColumnType.INT,
        ...         ),
        ...         "m_vector": TableVectorColumnTypeDescriptor(
        ...             column_type="vector", dimension=3
        ...         ),
        ...         "score": TableScalarColumnTypeDescriptor(
        ...             ColumnType.INT,
        ...         ),
        ...         "when": TableScalarColumnTypeDescriptor(
        ...             ColumnType.TIMESTAMP,
        ...         ),
        ...         "winner": TableScalarColumnTypeDescriptor(
        ...             ColumnType.TEXT,
        ...         ),
        ...         "fighters": TableValuedColumnTypeDescriptor(
        ...             column_type=TableValuedColumnType.SET,
        ...             value_type=ColumnType.UUID,
        ...         ),
        ...     },
        ...     primary_key=TablePrimaryKeyDescriptor(
        ...         partition_by=["match_id"],
        ...         partition_sort={"round": SortMode.ASCENDING},
        ...     ),
        ... )
        >>>
        >>> # Coerce a dictionary into a table definition:
        >>> table_definition_2_dict = {
        ...     "columns": {
        ...         "match_id": {"type": "text"},
        ...         "round": {"type": "int"},
        ...         "m_vector": {"type": "vector", "dimension": 3},
        ...         "score": {"type": "int"},
        ...         "when": {"type": "timestamp"},
        ...         "winner": {"type": "text"},
        ...         "fighters": {"type": "set", "valueType": "uuid"},
        ...     },
        ...     "primaryKey": {
        ...         "partitionBy": ["match_id"],
        ...         "partitionSort": {"round": 1},
        ...     },
        ... }
        >>> table_definition_2 = CreateTableDefinition.coerce(
        ...     table_definition_2_dict
        ... )
        >>>
        >>> # The three created objects are exactly identical:
        >>> table_definition_2 == table_definition_1
        True
        >>> table_definition_2 == table_definition
        True
    """

    columns: dict[str, TableColumnTypeDescriptor]
    primary_key: TablePrimaryKeyDescriptor

    def __repr__(self) -> str:
        not_null_pieces = [
            pc
            for pc in [
                f"columns=[{','.join(self.columns.keys())}]",
                f"primary_key={self.primary_key}",
            ]
            if pc is not None
        ]
        return f"{self.__class__.__name__}({', '.join(not_null_pieces)})"

    def as_dict(self) -> dict[str, Any]:
        """Recast this object into a dictionary."""

        return {
            k: v
            for k, v in {
                "columns": {
                    col_n: col_v.as_dict() for col_n, col_v in self.columns.items()
                },
                "primaryKey": self.primary_key.as_dict(),
            }.items()
            if v is not None
        }

    @classmethod
    def _from_dict(cls, raw_dict: dict[str, Any]) -> CreateTableDefinition:
        """
        Create an instance of CreateTableDefinition from a dictionary
        such as one from the Data API.
        """

        _warn_residual_keys(cls, raw_dict, {"columns", "primaryKey"})
        return CreateTableDefinition(
            columns={
                col_n: TableColumnTypeDescriptor.coerce(col_v)
                for col_n, col_v in raw_dict["columns"].items()
            },
            primary_key=TablePrimaryKeyDescriptor.coerce(raw_dict["primaryKey"]),
        )

    @classmethod
    def coerce(
        cls, raw_input: CreateTableDefinition | dict[str, Any]
    ) -> CreateTableDefinition:
        """
        Normalize the input, whether an object already or a plain dictionary
        of the right structure, into a CreateTableDefinition.
        """

        if isinstance(raw_input, CreateTableDefinition):
            return raw_input
        else:
            return cls._from_dict(raw_input)

    @staticmethod
    def builder() -> CreateTableDefinition:
        """
        Create an "empty" builder for constructing a table definition through
        a fluent interface. The resulting object has no columns and no primary key,
        traits that are to be added progressively with the corresponding methods.

        Since it describes a "table with no columns at all", the result of
        this method alone is not an acceptable table definition for running a table
        creation method on a Database.

        See the class docstring for a full example on using the fluent interface.

        Returns:
            a CreateTableDefinition formally describing a table with no columns.
        """

        return CreateTableDefinition(
            columns={},
            primary_key=TablePrimaryKeyDescriptor(
                partition_by=[],
                partition_sort={},
            ),
        )

    def add_scalar_column(
        self, column_name: str, column_type: str | ColumnType
    ) -> CreateTableDefinition:
        """
        Return a new table definition object with an added column
        of a scalar type (i.e. not a list, set or other composite type).
        This method is for use within the fluent interface for progressively
        building a complete table definition.

        See the class docstring for a full example on using the fluent interface.

        Args:
            column_name: the name of the new column to add to the definition.
            column_type: a string, or a `ColumnType` value, defining
                the scalar type for the column.

        Returns:
            a CreateTableDefinition obtained by adding (or replacing) the desired
            column to this table definition.
        """

        return CreateTableDefinition(
            columns={
                **self.columns,
                **{
                    column_name: TableScalarColumnTypeDescriptor(
                        column_type=ColumnType.coerce(column_type)
                    )
                },
            },
            primary_key=self.primary_key,
        )

    def add_column(
        self, column_name: str, column_type: str | ColumnType
    ) -> CreateTableDefinition:
        """
        Return a new table definition object with an added column
        of a scalar type (i.e. not a list, set or other composite type).
        This method is for use within the fluent interface for progressively
        building a complete table definition.

        This method is an alias for `add_scalar_column`.

        See the class docstring for a full example on using the fluent interface.

        Args:
            column_name: the name of the new column to add to the definition.
            column_type: a string, or a `ColumnType` value, defining
                the scalar type for the column.

        Returns:
            a CreateTableDefinition obtained by adding (or replacing) the desired
            column to this table definition.
        """

        return self.add_scalar_column(column_name=column_name, column_type=column_type)

    def add_set_column(
        self, column_name: str, value_type: str | ColumnType
    ) -> CreateTableDefinition:
        """
        Return a new table definition object with an added column
        of 'set' type. This method is for use within the
        fluent interface for progressively building a complete table definition.

        See the class docstring for a full example on using the fluent interface.

        Args:
            column_name: the name of the new column to add to the definition.
            value_type: a string, or a `ColumnType` value, defining
                the data type for the items in the set.

        Returns:
            a CreateTableDefinition obtained by adding (or replacing) the desired
            column to this table definition.
        """

        return CreateTableDefinition(
            columns={
                **self.columns,
                **{
                    column_name: TableValuedColumnTypeDescriptor(
                        column_type="set", value_type=value_type
                    )
                },
            },
            primary_key=self.primary_key,
        )

    def add_list_column(
        self, column_name: str, value_type: str | ColumnType
    ) -> CreateTableDefinition:
        """
        Return a new table definition object with an added column
        of 'list' type. This method is for use within the
        fluent interface for progressively building a complete table definition.

        See the class docstring for a full example on using the fluent interface.

        Args:
            column_name: the name of the new column to add to the definition.
            value_type: a string, or a `ColumnType` value, defining
                the data type for the items in the list.

        Returns:
            a CreateTableDefinition obtained by adding (or replacing) the desired
            column to this table definition.
        """

        return CreateTableDefinition(
            columns={
                **self.columns,
                **{
                    column_name: TableValuedColumnTypeDescriptor(
                        column_type="list", value_type=value_type
                    )
                },
            },
            primary_key=self.primary_key,
        )

    def add_map_column(
        self,
        column_name: str,
        key_type: str | ColumnType,
        value_type: str | ColumnType,
    ) -> CreateTableDefinition:
        """
        Return a new table definition object with an added column
        of 'map' type. This method is for use within the
        fluent interface for progressively building a complete table definition.

        See the class docstring for a full example on using the fluent interface.

        Args:
            column_name: the name of the new column to add to the definition.
            key_type: a string, or a `ColumnType` value, defining
                the data type for the keys in the map.
            value_type: a string, or a `ColumnType` value, defining
                the data type for the values in the map.

        Returns:
            a CreateTableDefinition obtained by adding (or replacing) the desired
            column to this table definition.
        """

        return CreateTableDefinition(
            columns={
                **self.columns,
                **{
                    column_name: TableKeyValuedColumnTypeDescriptor(
                        column_type="map", key_type=key_type, value_type=value_type
                    )
                },
            },
            primary_key=self.primary_key,
        )

    def add_vector_column(
        self,
        column_name: str,
        *,
        dimension: int | None = None,
        service: VectorServiceOptions | dict[str, Any] | None = None,
    ) -> CreateTableDefinition:
        """
        Return a new table definition object with an added column
        of 'vector' type. This method is for use within the
        fluent interface for progressively building a complete table definition.

        See the class docstring for a full example on using the fluent interface.

        Args:
            column_name: the name of the new column to add to the definition.
            dimension: the dimensionality of the vector, i.e. the number of components
                each vector in this column will have. If a `service` parameter is
                supplied and the vectorize model allows for it, the dimension may be
                left unspecified to have the API set a default value.
                The Data API will raise an error if a table creation is attempted with
                a vector column for which neither a service nor the dimension are given.
            service: a `VectorServiceOptions` object, or an equivalent plain dictionary,
                defining the server-side embedding service associated to the column,
                if desired.

        Returns:
            a CreateTableDefinition obtained by adding (or replacing) the desired
            column to this table definition.
        """

        return CreateTableDefinition(
            columns={
                **self.columns,
                **{
                    column_name: TableVectorColumnTypeDescriptor(
                        column_type="vector",
                        dimension=dimension,
                        service=VectorServiceOptions.coerce(service),
                    )
                },
            },
            primary_key=self.primary_key,
        )

    def add_partition_by(
        self, partition_columns: list[str] | str
    ) -> CreateTableDefinition:
        """
        Return a new table definition object with one or more added `partition_by`
        columns. This method is for use within the
        fluent interface for progressively building a complete table definition.

        See the class docstring for a full example on using the fluent interface.

        Successive calls append the requested columns at the end of the pre-existing
        `partition_by` list. In other words, these two patterns are equivalent:
        (1) X.add_partition_by(["col1", "col2"])
        (2) X.add_partition_by(["col1"]).add_partition_by("col2")

        Note that no deduplication is applied to the overall
        result: the caller should take care of not supplying the same column name
        more than once.

        Args:
            partition_columns: a list of column names (strings) to be added to the
                full table partition key. A single string (not a list) is also accepted.

        Returns:
            a CreateTableDefinition obtained by enriching the `partition_by`
            of this table definition as requested.
        """

        _partition_columns = (
            partition_columns
            if isinstance(partition_columns, list)
            else [partition_columns]
        )

        return CreateTableDefinition(
            columns=self.columns,
            primary_key=TablePrimaryKeyDescriptor(
                partition_by=self.primary_key.partition_by + _partition_columns,
                partition_sort=self.primary_key.partition_sort,
            ),
        )

    def add_partition_sort(
        self, partition_sort: dict[str, int]
    ) -> CreateTableDefinition:
        """
        Return a new table definition object with one or more added `partition_sort`
        column specifications. This method is for use within the
        fluent interface for progressively building a complete table definition.

        See the class docstring for a full example on using the fluent interface.

        Successive calls append (or replace) the requested columns at the end of
        the pre-existing `partition_sort` dictionary. In other words, these two
        patterns are equivalent:
        (1) X.add_partition_sort({"c1": 1, "c2": -1})
        (2) X.add_partition_sort({"c1": 1}).add_partition_sort({"c2": -1})

        Args:
            partition_sort: a dictoinary mapping column names to their sort mode
            (ascending/descending, i.e 1/-1. See also `astrapy.constants.SortMode`).

        Returns:
            a CreateTableDefinition obtained by enriching the `partition_sort`
            of this table definition as requested.
        """

        return CreateTableDefinition(
            columns=self.columns,
            primary_key=TablePrimaryKeyDescriptor(
                partition_by=self.primary_key.partition_by,
                partition_sort={**self.primary_key.partition_sort, **partition_sort},
            ),
        )

    def build(self) -> CreateTableDefinition:
        """
        The final step in the fluent (builder) interface. Calling this method
        finalizes the definition that has been built so far and makes it into a
        table definition ready for use in e.g. table creation.

        Note that this step may be automatically invoked by the receiving methods:
        however it is a good practice - and also adds to the readability of the code -
        to call it explicitly.

        See the class docstring for a full example on using the fluent interface.

        Returns:
            a CreateTableDefinition obtained by finalizing the definition being
                built so far.
        """

        return self

Classes

class CreateTableDefinition (columns: dict[str, TableColumnTypeDescriptor], primary_key: TablePrimaryKeyDescriptor)

A structure expressing the definition ("schema") of a table to be created through the Data API. This object is passed as the definition parameter to the database create_table method.

See the Data API specifications for detailed specification and allowed values.

Instances of this object can be created in three ways: using a fluent interface, passing a fully-formed definition to the class constructor, or coercing an appropriately-shaped plain dictionary into this class.

Attributes

columns
a map from column names to their type definition object.
primary_key
a specification of the primary key for the table.

Example

>>> from astrapy.constants import SortMode
>>> from astrapy.info import (
...     CreateTableDefinition,
...     TablePrimaryKeyDescriptor,
...     ColumnType,
...     TableScalarColumnTypeDescriptor,
...     TableValuedColumnType,
...     TableValuedColumnTypeDescriptor,
...     TableVectorColumnTypeDescriptor,
... )
>>>
>>> # Create a table definition with the fluent interface:
>>> table_definition = (
...     CreateTableDefinition.builder()
...     .add_column("match_id", ColumnType.TEXT)
...     .add_column("round", ColumnType.INT)
...     .add_vector_column("m_vector", dimension=3)
...     .add_column("score", ColumnType.INT)
...     .add_column("when", ColumnType.TIMESTAMP)
...     .add_column("winner", ColumnType.TEXT)
...     .add_set_column("fighters", ColumnType.UUID)
...     .add_partition_by(["match_id"])
...     .add_partition_sort({"round": SortMode.ASCENDING})
...     .build()
... )
>>>
>>> # Create a table definition passing everything to the constructor:
>>> table_definition_1 = CreateTableDefinition(
...     columns={
...         "match_id": TableScalarColumnTypeDescriptor(
...             ColumnType.TEXT,
...         ),
...         "round": TableScalarColumnTypeDescriptor(
...             ColumnType.INT,
...         ),
...         "m_vector": TableVectorColumnTypeDescriptor(
...             column_type="vector", dimension=3
...         ),
...         "score": TableScalarColumnTypeDescriptor(
...             ColumnType.INT,
...         ),
...         "when": TableScalarColumnTypeDescriptor(
...             ColumnType.TIMESTAMP,
...         ),
...         "winner": TableScalarColumnTypeDescriptor(
...             ColumnType.TEXT,
...         ),
...         "fighters": TableValuedColumnTypeDescriptor(
...             column_type=TableValuedColumnType.SET,
...             value_type=ColumnType.UUID,
...         ),
...     },
...     primary_key=TablePrimaryKeyDescriptor(
...         partition_by=["match_id"],
...         partition_sort={"round": SortMode.ASCENDING},
...     ),
... )
>>>
>>> # Coerce a dictionary into a table definition:
>>> table_definition_2_dict = {
...     "columns": {
...         "match_id": {"type": "text"},
...         "round": {"type": "int"},
...         "m_vector": {"type": "vector", "dimension": 3},
...         "score": {"type": "int"},
...         "when": {"type": "timestamp"},
...         "winner": {"type": "text"},
...         "fighters": {"type": "set", "valueType": "uuid"},
...     },
...     "primaryKey": {
...         "partitionBy": ["match_id"],
...         "partitionSort": {"round": 1},
...     },
... }
>>> table_definition_2 = CreateTableDefinition.coerce(
...     table_definition_2_dict
... )
>>>
>>> # The three created objects are exactly identical:
>>> table_definition_2 == table_definition_1
True
>>> table_definition_2 == table_definition
True
Expand source code
@dataclass
class CreateTableDefinition:
    """
    A structure expressing the definition ("schema") of a table to be created through
    the Data API. This object is passed as the `definition` parameter to the database
    `create_table` method.

    See the Data API specifications for detailed specification and allowed values.

    Instances of this object can be created in three ways: using a fluent interface,
    passing a fully-formed definition to the class constructor, or coercing an
    appropriately-shaped plain dictionary into this class.

    Attributes:
        columns: a map from column names to their type definition object.
        primary_key: a specification of the primary key for the table.

    Example:
        >>> from astrapy.constants import SortMode
        >>> from astrapy.info import (
        ...     CreateTableDefinition,
        ...     TablePrimaryKeyDescriptor,
        ...     ColumnType,
        ...     TableScalarColumnTypeDescriptor,
        ...     TableValuedColumnType,
        ...     TableValuedColumnTypeDescriptor,
        ...     TableVectorColumnTypeDescriptor,
        ... )
        >>>
        >>> # Create a table definition with the fluent interface:
        >>> table_definition = (
        ...     CreateTableDefinition.builder()
        ...     .add_column("match_id", ColumnType.TEXT)
        ...     .add_column("round", ColumnType.INT)
        ...     .add_vector_column("m_vector", dimension=3)
        ...     .add_column("score", ColumnType.INT)
        ...     .add_column("when", ColumnType.TIMESTAMP)
        ...     .add_column("winner", ColumnType.TEXT)
        ...     .add_set_column("fighters", ColumnType.UUID)
        ...     .add_partition_by(["match_id"])
        ...     .add_partition_sort({"round": SortMode.ASCENDING})
        ...     .build()
        ... )
        >>>
        >>> # Create a table definition passing everything to the constructor:
        >>> table_definition_1 = CreateTableDefinition(
        ...     columns={
        ...         "match_id": TableScalarColumnTypeDescriptor(
        ...             ColumnType.TEXT,
        ...         ),
        ...         "round": TableScalarColumnTypeDescriptor(
        ...             ColumnType.INT,
        ...         ),
        ...         "m_vector": TableVectorColumnTypeDescriptor(
        ...             column_type="vector", dimension=3
        ...         ),
        ...         "score": TableScalarColumnTypeDescriptor(
        ...             ColumnType.INT,
        ...         ),
        ...         "when": TableScalarColumnTypeDescriptor(
        ...             ColumnType.TIMESTAMP,
        ...         ),
        ...         "winner": TableScalarColumnTypeDescriptor(
        ...             ColumnType.TEXT,
        ...         ),
        ...         "fighters": TableValuedColumnTypeDescriptor(
        ...             column_type=TableValuedColumnType.SET,
        ...             value_type=ColumnType.UUID,
        ...         ),
        ...     },
        ...     primary_key=TablePrimaryKeyDescriptor(
        ...         partition_by=["match_id"],
        ...         partition_sort={"round": SortMode.ASCENDING},
        ...     ),
        ... )
        >>>
        >>> # Coerce a dictionary into a table definition:
        >>> table_definition_2_dict = {
        ...     "columns": {
        ...         "match_id": {"type": "text"},
        ...         "round": {"type": "int"},
        ...         "m_vector": {"type": "vector", "dimension": 3},
        ...         "score": {"type": "int"},
        ...         "when": {"type": "timestamp"},
        ...         "winner": {"type": "text"},
        ...         "fighters": {"type": "set", "valueType": "uuid"},
        ...     },
        ...     "primaryKey": {
        ...         "partitionBy": ["match_id"],
        ...         "partitionSort": {"round": 1},
        ...     },
        ... }
        >>> table_definition_2 = CreateTableDefinition.coerce(
        ...     table_definition_2_dict
        ... )
        >>>
        >>> # The three created objects are exactly identical:
        >>> table_definition_2 == table_definition_1
        True
        >>> table_definition_2 == table_definition
        True
    """

    columns: dict[str, TableColumnTypeDescriptor]
    primary_key: TablePrimaryKeyDescriptor

    def __repr__(self) -> str:
        not_null_pieces = [
            pc
            for pc in [
                f"columns=[{','.join(self.columns.keys())}]",
                f"primary_key={self.primary_key}",
            ]
            if pc is not None
        ]
        return f"{self.__class__.__name__}({', '.join(not_null_pieces)})"

    def as_dict(self) -> dict[str, Any]:
        """Recast this object into a dictionary."""

        return {
            k: v
            for k, v in {
                "columns": {
                    col_n: col_v.as_dict() for col_n, col_v in self.columns.items()
                },
                "primaryKey": self.primary_key.as_dict(),
            }.items()
            if v is not None
        }

    @classmethod
    def _from_dict(cls, raw_dict: dict[str, Any]) -> CreateTableDefinition:
        """
        Create an instance of CreateTableDefinition from a dictionary
        such as one from the Data API.
        """

        _warn_residual_keys(cls, raw_dict, {"columns", "primaryKey"})
        return CreateTableDefinition(
            columns={
                col_n: TableColumnTypeDescriptor.coerce(col_v)
                for col_n, col_v in raw_dict["columns"].items()
            },
            primary_key=TablePrimaryKeyDescriptor.coerce(raw_dict["primaryKey"]),
        )

    @classmethod
    def coerce(
        cls, raw_input: CreateTableDefinition | dict[str, Any]
    ) -> CreateTableDefinition:
        """
        Normalize the input, whether an object already or a plain dictionary
        of the right structure, into a CreateTableDefinition.
        """

        if isinstance(raw_input, CreateTableDefinition):
            return raw_input
        else:
            return cls._from_dict(raw_input)

    @staticmethod
    def builder() -> CreateTableDefinition:
        """
        Create an "empty" builder for constructing a table definition through
        a fluent interface. The resulting object has no columns and no primary key,
        traits that are to be added progressively with the corresponding methods.

        Since it describes a "table with no columns at all", the result of
        this method alone is not an acceptable table definition for running a table
        creation method on a Database.

        See the class docstring for a full example on using the fluent interface.

        Returns:
            a CreateTableDefinition formally describing a table with no columns.
        """

        return CreateTableDefinition(
            columns={},
            primary_key=TablePrimaryKeyDescriptor(
                partition_by=[],
                partition_sort={},
            ),
        )

    def add_scalar_column(
        self, column_name: str, column_type: str | ColumnType
    ) -> CreateTableDefinition:
        """
        Return a new table definition object with an added column
        of a scalar type (i.e. not a list, set or other composite type).
        This method is for use within the fluent interface for progressively
        building a complete table definition.

        See the class docstring for a full example on using the fluent interface.

        Args:
            column_name: the name of the new column to add to the definition.
            column_type: a string, or a `ColumnType` value, defining
                the scalar type for the column.

        Returns:
            a CreateTableDefinition obtained by adding (or replacing) the desired
            column to this table definition.
        """

        return CreateTableDefinition(
            columns={
                **self.columns,
                **{
                    column_name: TableScalarColumnTypeDescriptor(
                        column_type=ColumnType.coerce(column_type)
                    )
                },
            },
            primary_key=self.primary_key,
        )

    def add_column(
        self, column_name: str, column_type: str | ColumnType
    ) -> CreateTableDefinition:
        """
        Return a new table definition object with an added column
        of a scalar type (i.e. not a list, set or other composite type).
        This method is for use within the fluent interface for progressively
        building a complete table definition.

        This method is an alias for `add_scalar_column`.

        See the class docstring for a full example on using the fluent interface.

        Args:
            column_name: the name of the new column to add to the definition.
            column_type: a string, or a `ColumnType` value, defining
                the scalar type for the column.

        Returns:
            a CreateTableDefinition obtained by adding (or replacing) the desired
            column to this table definition.
        """

        return self.add_scalar_column(column_name=column_name, column_type=column_type)

    def add_set_column(
        self, column_name: str, value_type: str | ColumnType
    ) -> CreateTableDefinition:
        """
        Return a new table definition object with an added column
        of 'set' type. This method is for use within the
        fluent interface for progressively building a complete table definition.

        See the class docstring for a full example on using the fluent interface.

        Args:
            column_name: the name of the new column to add to the definition.
            value_type: a string, or a `ColumnType` value, defining
                the data type for the items in the set.

        Returns:
            a CreateTableDefinition obtained by adding (or replacing) the desired
            column to this table definition.
        """

        return CreateTableDefinition(
            columns={
                **self.columns,
                **{
                    column_name: TableValuedColumnTypeDescriptor(
                        column_type="set", value_type=value_type
                    )
                },
            },
            primary_key=self.primary_key,
        )

    def add_list_column(
        self, column_name: str, value_type: str | ColumnType
    ) -> CreateTableDefinition:
        """
        Return a new table definition object with an added column
        of 'list' type. This method is for use within the
        fluent interface for progressively building a complete table definition.

        See the class docstring for a full example on using the fluent interface.

        Args:
            column_name: the name of the new column to add to the definition.
            value_type: a string, or a `ColumnType` value, defining
                the data type for the items in the list.

        Returns:
            a CreateTableDefinition obtained by adding (or replacing) the desired
            column to this table definition.
        """

        return CreateTableDefinition(
            columns={
                **self.columns,
                **{
                    column_name: TableValuedColumnTypeDescriptor(
                        column_type="list", value_type=value_type
                    )
                },
            },
            primary_key=self.primary_key,
        )

    def add_map_column(
        self,
        column_name: str,
        key_type: str | ColumnType,
        value_type: str | ColumnType,
    ) -> CreateTableDefinition:
        """
        Return a new table definition object with an added column
        of 'map' type. This method is for use within the
        fluent interface for progressively building a complete table definition.

        See the class docstring for a full example on using the fluent interface.

        Args:
            column_name: the name of the new column to add to the definition.
            key_type: a string, or a `ColumnType` value, defining
                the data type for the keys in the map.
            value_type: a string, or a `ColumnType` value, defining
                the data type for the values in the map.

        Returns:
            a CreateTableDefinition obtained by adding (or replacing) the desired
            column to this table definition.
        """

        return CreateTableDefinition(
            columns={
                **self.columns,
                **{
                    column_name: TableKeyValuedColumnTypeDescriptor(
                        column_type="map", key_type=key_type, value_type=value_type
                    )
                },
            },
            primary_key=self.primary_key,
        )

    def add_vector_column(
        self,
        column_name: str,
        *,
        dimension: int | None = None,
        service: VectorServiceOptions | dict[str, Any] | None = None,
    ) -> CreateTableDefinition:
        """
        Return a new table definition object with an added column
        of 'vector' type. This method is for use within the
        fluent interface for progressively building a complete table definition.

        See the class docstring for a full example on using the fluent interface.

        Args:
            column_name: the name of the new column to add to the definition.
            dimension: the dimensionality of the vector, i.e. the number of components
                each vector in this column will have. If a `service` parameter is
                supplied and the vectorize model allows for it, the dimension may be
                left unspecified to have the API set a default value.
                The Data API will raise an error if a table creation is attempted with
                a vector column for which neither a service nor the dimension are given.
            service: a `VectorServiceOptions` object, or an equivalent plain dictionary,
                defining the server-side embedding service associated to the column,
                if desired.

        Returns:
            a CreateTableDefinition obtained by adding (or replacing) the desired
            column to this table definition.
        """

        return CreateTableDefinition(
            columns={
                **self.columns,
                **{
                    column_name: TableVectorColumnTypeDescriptor(
                        column_type="vector",
                        dimension=dimension,
                        service=VectorServiceOptions.coerce(service),
                    )
                },
            },
            primary_key=self.primary_key,
        )

    def add_partition_by(
        self, partition_columns: list[str] | str
    ) -> CreateTableDefinition:
        """
        Return a new table definition object with one or more added `partition_by`
        columns. This method is for use within the
        fluent interface for progressively building a complete table definition.

        See the class docstring for a full example on using the fluent interface.

        Successive calls append the requested columns at the end of the pre-existing
        `partition_by` list. In other words, these two patterns are equivalent:
        (1) X.add_partition_by(["col1", "col2"])
        (2) X.add_partition_by(["col1"]).add_partition_by("col2")

        Note that no deduplication is applied to the overall
        result: the caller should take care of not supplying the same column name
        more than once.

        Args:
            partition_columns: a list of column names (strings) to be added to the
                full table partition key. A single string (not a list) is also accepted.

        Returns:
            a CreateTableDefinition obtained by enriching the `partition_by`
            of this table definition as requested.
        """

        _partition_columns = (
            partition_columns
            if isinstance(partition_columns, list)
            else [partition_columns]
        )

        return CreateTableDefinition(
            columns=self.columns,
            primary_key=TablePrimaryKeyDescriptor(
                partition_by=self.primary_key.partition_by + _partition_columns,
                partition_sort=self.primary_key.partition_sort,
            ),
        )

    def add_partition_sort(
        self, partition_sort: dict[str, int]
    ) -> CreateTableDefinition:
        """
        Return a new table definition object with one or more added `partition_sort`
        column specifications. This method is for use within the
        fluent interface for progressively building a complete table definition.

        See the class docstring for a full example on using the fluent interface.

        Successive calls append (or replace) the requested columns at the end of
        the pre-existing `partition_sort` dictionary. In other words, these two
        patterns are equivalent:
        (1) X.add_partition_sort({"c1": 1, "c2": -1})
        (2) X.add_partition_sort({"c1": 1}).add_partition_sort({"c2": -1})

        Args:
            partition_sort: a dictoinary mapping column names to their sort mode
            (ascending/descending, i.e 1/-1. See also `astrapy.constants.SortMode`).

        Returns:
            a CreateTableDefinition obtained by enriching the `partition_sort`
            of this table definition as requested.
        """

        return CreateTableDefinition(
            columns=self.columns,
            primary_key=TablePrimaryKeyDescriptor(
                partition_by=self.primary_key.partition_by,
                partition_sort={**self.primary_key.partition_sort, **partition_sort},
            ),
        )

    def build(self) -> CreateTableDefinition:
        """
        The final step in the fluent (builder) interface. Calling this method
        finalizes the definition that has been built so far and makes it into a
        table definition ready for use in e.g. table creation.

        Note that this step may be automatically invoked by the receiving methods:
        however it is a good practice - and also adds to the readability of the code -
        to call it explicitly.

        See the class docstring for a full example on using the fluent interface.

        Returns:
            a CreateTableDefinition obtained by finalizing the definition being
                built so far.
        """

        return self

Class variables

var columns : dict[str, TableColumnTypeDescriptor]
var primary_keyTablePrimaryKeyDescriptor

Static methods

def builder() ‑> CreateTableDefinition

Create an "empty" builder for constructing a table definition through a fluent interface. The resulting object has no columns and no primary key, traits that are to be added progressively with the corresponding methods.

Since it describes a "table with no columns at all", the result of this method alone is not an acceptable table definition for running a table creation method on a Database.

See the class docstring for a full example on using the fluent interface.

Returns

a CreateTableDefinition formally describing a table with no columns.

Expand source code
@staticmethod
def builder() -> CreateTableDefinition:
    """
    Create an "empty" builder for constructing a table definition through
    a fluent interface. The resulting object has no columns and no primary key,
    traits that are to be added progressively with the corresponding methods.

    Since it describes a "table with no columns at all", the result of
    this method alone is not an acceptable table definition for running a table
    creation method on a Database.

    See the class docstring for a full example on using the fluent interface.

    Returns:
        a CreateTableDefinition formally describing a table with no columns.
    """

    return CreateTableDefinition(
        columns={},
        primary_key=TablePrimaryKeyDescriptor(
            partition_by=[],
            partition_sort={},
        ),
    )
def coerce(raw_input: CreateTableDefinition | dict[str, Any]) ‑> CreateTableDefinition

Normalize the input, whether an object already or a plain dictionary of the right structure, into a CreateTableDefinition.

Expand source code
@classmethod
def coerce(
    cls, raw_input: CreateTableDefinition | dict[str, Any]
) -> CreateTableDefinition:
    """
    Normalize the input, whether an object already or a plain dictionary
    of the right structure, into a CreateTableDefinition.
    """

    if isinstance(raw_input, CreateTableDefinition):
        return raw_input
    else:
        return cls._from_dict(raw_input)

Methods

def add_column(self, column_name: str, column_type: str | ColumnType) ‑> CreateTableDefinition

Return a new table definition object with an added column of a scalar type (i.e. not a list, set or other composite type). This method is for use within the fluent interface for progressively building a complete table definition.

This method is an alias for add_scalar_column.

See the class docstring for a full example on using the fluent interface.

Args

column_name
the name of the new column to add to the definition.
column_type
a string, or a ColumnType value, defining the scalar type for the column.

Returns

a CreateTableDefinition obtained by adding (or replacing) the desired column to this table definition.

Expand source code
def add_column(
    self, column_name: str, column_type: str | ColumnType
) -> CreateTableDefinition:
    """
    Return a new table definition object with an added column
    of a scalar type (i.e. not a list, set or other composite type).
    This method is for use within the fluent interface for progressively
    building a complete table definition.

    This method is an alias for `add_scalar_column`.

    See the class docstring for a full example on using the fluent interface.

    Args:
        column_name: the name of the new column to add to the definition.
        column_type: a string, or a `ColumnType` value, defining
            the scalar type for the column.

    Returns:
        a CreateTableDefinition obtained by adding (or replacing) the desired
        column to this table definition.
    """

    return self.add_scalar_column(column_name=column_name, column_type=column_type)
def add_list_column(self, column_name: str, value_type: str | ColumnType) ‑> CreateTableDefinition

Return a new table definition object with an added column of 'list' type. This method is for use within the fluent interface for progressively building a complete table definition.

See the class docstring for a full example on using the fluent interface.

Args

column_name
the name of the new column to add to the definition.
value_type
a string, or a ColumnType value, defining the data type for the items in the list.

Returns

a CreateTableDefinition obtained by adding (or replacing) the desired column to this table definition.

Expand source code
def add_list_column(
    self, column_name: str, value_type: str | ColumnType
) -> CreateTableDefinition:
    """
    Return a new table definition object with an added column
    of 'list' type. This method is for use within the
    fluent interface for progressively building a complete table definition.

    See the class docstring for a full example on using the fluent interface.

    Args:
        column_name: the name of the new column to add to the definition.
        value_type: a string, or a `ColumnType` value, defining
            the data type for the items in the list.

    Returns:
        a CreateTableDefinition obtained by adding (or replacing) the desired
        column to this table definition.
    """

    return CreateTableDefinition(
        columns={
            **self.columns,
            **{
                column_name: TableValuedColumnTypeDescriptor(
                    column_type="list", value_type=value_type
                )
            },
        },
        primary_key=self.primary_key,
    )
def add_map_column(self, column_name: str, key_type: str | ColumnType, value_type: str | ColumnType) ‑> CreateTableDefinition

Return a new table definition object with an added column of 'map' type. This method is for use within the fluent interface for progressively building a complete table definition.

See the class docstring for a full example on using the fluent interface.

Args

column_name
the name of the new column to add to the definition.
key_type
a string, or a ColumnType value, defining the data type for the keys in the map.
value_type
a string, or a ColumnType value, defining the data type for the values in the map.

Returns

a CreateTableDefinition obtained by adding (or replacing) the desired column to this table definition.

Expand source code
def add_map_column(
    self,
    column_name: str,
    key_type: str | ColumnType,
    value_type: str | ColumnType,
) -> CreateTableDefinition:
    """
    Return a new table definition object with an added column
    of 'map' type. This method is for use within the
    fluent interface for progressively building a complete table definition.

    See the class docstring for a full example on using the fluent interface.

    Args:
        column_name: the name of the new column to add to the definition.
        key_type: a string, or a `ColumnType` value, defining
            the data type for the keys in the map.
        value_type: a string, or a `ColumnType` value, defining
            the data type for the values in the map.

    Returns:
        a CreateTableDefinition obtained by adding (or replacing) the desired
        column to this table definition.
    """

    return CreateTableDefinition(
        columns={
            **self.columns,
            **{
                column_name: TableKeyValuedColumnTypeDescriptor(
                    column_type="map", key_type=key_type, value_type=value_type
                )
            },
        },
        primary_key=self.primary_key,
    )
def add_partition_by(self, partition_columns: list[str] | str) ‑> CreateTableDefinition

Return a new table definition object with one or more added partition_by columns. This method is for use within the fluent interface for progressively building a complete table definition.

See the class docstring for a full example on using the fluent interface.

Successive calls append the requested columns at the end of the pre-existing partition_by list. In other words, these two patterns are equivalent: (1) X.add_partition_by(["col1", "col2"]) (2) X.add_partition_by(["col1"]).add_partition_by("col2")

Note that no deduplication is applied to the overall result: the caller should take care of not supplying the same column name more than once.

Args

partition_columns
a list of column names (strings) to be added to the full table partition key. A single string (not a list) is also accepted.

Returns

a CreateTableDefinition obtained by enriching the partition_by of this table definition as requested.

Expand source code
def add_partition_by(
    self, partition_columns: list[str] | str
) -> CreateTableDefinition:
    """
    Return a new table definition object with one or more added `partition_by`
    columns. This method is for use within the
    fluent interface for progressively building a complete table definition.

    See the class docstring for a full example on using the fluent interface.

    Successive calls append the requested columns at the end of the pre-existing
    `partition_by` list. In other words, these two patterns are equivalent:
    (1) X.add_partition_by(["col1", "col2"])
    (2) X.add_partition_by(["col1"]).add_partition_by("col2")

    Note that no deduplication is applied to the overall
    result: the caller should take care of not supplying the same column name
    more than once.

    Args:
        partition_columns: a list of column names (strings) to be added to the
            full table partition key. A single string (not a list) is also accepted.

    Returns:
        a CreateTableDefinition obtained by enriching the `partition_by`
        of this table definition as requested.
    """

    _partition_columns = (
        partition_columns
        if isinstance(partition_columns, list)
        else [partition_columns]
    )

    return CreateTableDefinition(
        columns=self.columns,
        primary_key=TablePrimaryKeyDescriptor(
            partition_by=self.primary_key.partition_by + _partition_columns,
            partition_sort=self.primary_key.partition_sort,
        ),
    )
def add_partition_sort(self, partition_sort: dict[str, int]) ‑> CreateTableDefinition

Return a new table definition object with one or more added partition_sort column specifications. This method is for use within the fluent interface for progressively building a complete table definition.

See the class docstring for a full example on using the fluent interface.

Successive calls append (or replace) the requested columns at the end of the pre-existing partition_sort dictionary. In other words, these two patterns are equivalent: (1) X.add_partition_sort({"c1": 1, "c2": -1}) (2) X.add_partition_sort({"c1": 1}).add_partition_sort({"c2": -1})

Args

partition_sort
a dictoinary mapping column names to their sort mode

(ascending/descending, i.e 1/-1. See also SortMode).

Returns

a CreateTableDefinition obtained by enriching the partition_sort of this table definition as requested.

Expand source code
def add_partition_sort(
    self, partition_sort: dict[str, int]
) -> CreateTableDefinition:
    """
    Return a new table definition object with one or more added `partition_sort`
    column specifications. This method is for use within the
    fluent interface for progressively building a complete table definition.

    See the class docstring for a full example on using the fluent interface.

    Successive calls append (or replace) the requested columns at the end of
    the pre-existing `partition_sort` dictionary. In other words, these two
    patterns are equivalent:
    (1) X.add_partition_sort({"c1": 1, "c2": -1})
    (2) X.add_partition_sort({"c1": 1}).add_partition_sort({"c2": -1})

    Args:
        partition_sort: a dictoinary mapping column names to their sort mode
        (ascending/descending, i.e 1/-1. See also `astrapy.constants.SortMode`).

    Returns:
        a CreateTableDefinition obtained by enriching the `partition_sort`
        of this table definition as requested.
    """

    return CreateTableDefinition(
        columns=self.columns,
        primary_key=TablePrimaryKeyDescriptor(
            partition_by=self.primary_key.partition_by,
            partition_sort={**self.primary_key.partition_sort, **partition_sort},
        ),
    )
def add_scalar_column(self, column_name: str, column_type: str | ColumnType) ‑> CreateTableDefinition

Return a new table definition object with an added column of a scalar type (i.e. not a list, set or other composite type). This method is for use within the fluent interface for progressively building a complete table definition.

See the class docstring for a full example on using the fluent interface.

Args

column_name
the name of the new column to add to the definition.
column_type
a string, or a ColumnType value, defining the scalar type for the column.

Returns

a CreateTableDefinition obtained by adding (or replacing) the desired column to this table definition.

Expand source code
def add_scalar_column(
    self, column_name: str, column_type: str | ColumnType
) -> CreateTableDefinition:
    """
    Return a new table definition object with an added column
    of a scalar type (i.e. not a list, set or other composite type).
    This method is for use within the fluent interface for progressively
    building a complete table definition.

    See the class docstring for a full example on using the fluent interface.

    Args:
        column_name: the name of the new column to add to the definition.
        column_type: a string, or a `ColumnType` value, defining
            the scalar type for the column.

    Returns:
        a CreateTableDefinition obtained by adding (or replacing) the desired
        column to this table definition.
    """

    return CreateTableDefinition(
        columns={
            **self.columns,
            **{
                column_name: TableScalarColumnTypeDescriptor(
                    column_type=ColumnType.coerce(column_type)
                )
            },
        },
        primary_key=self.primary_key,
    )
def add_set_column(self, column_name: str, value_type: str | ColumnType) ‑> CreateTableDefinition

Return a new table definition object with an added column of 'set' type. This method is for use within the fluent interface for progressively building a complete table definition.

See the class docstring for a full example on using the fluent interface.

Args

column_name
the name of the new column to add to the definition.
value_type
a string, or a ColumnType value, defining the data type for the items in the set.

Returns

a CreateTableDefinition obtained by adding (or replacing) the desired column to this table definition.

Expand source code
def add_set_column(
    self, column_name: str, value_type: str | ColumnType
) -> CreateTableDefinition:
    """
    Return a new table definition object with an added column
    of 'set' type. This method is for use within the
    fluent interface for progressively building a complete table definition.

    See the class docstring for a full example on using the fluent interface.

    Args:
        column_name: the name of the new column to add to the definition.
        value_type: a string, or a `ColumnType` value, defining
            the data type for the items in the set.

    Returns:
        a CreateTableDefinition obtained by adding (or replacing) the desired
        column to this table definition.
    """

    return CreateTableDefinition(
        columns={
            **self.columns,
            **{
                column_name: TableValuedColumnTypeDescriptor(
                    column_type="set", value_type=value_type
                )
            },
        },
        primary_key=self.primary_key,
    )
def add_vector_column(self, column_name: str, *, dimension: int | None = None, service: VectorServiceOptions | dict[str, Any] | None = None) ‑> CreateTableDefinition

Return a new table definition object with an added column of 'vector' type. This method is for use within the fluent interface for progressively building a complete table definition.

See the class docstring for a full example on using the fluent interface.

Args

column_name
the name of the new column to add to the definition.
dimension
the dimensionality of the vector, i.e. the number of components each vector in this column will have. If a service parameter is supplied and the vectorize model allows for it, the dimension may be left unspecified to have the API set a default value. The Data API will raise an error if a table creation is attempted with a vector column for which neither a service nor the dimension are given.
service
a VectorServiceOptions object, or an equivalent plain dictionary, defining the server-side embedding service associated to the column, if desired.

Returns

a CreateTableDefinition obtained by adding (or replacing) the desired column to this table definition.

Expand source code
def add_vector_column(
    self,
    column_name: str,
    *,
    dimension: int | None = None,
    service: VectorServiceOptions | dict[str, Any] | None = None,
) -> CreateTableDefinition:
    """
    Return a new table definition object with an added column
    of 'vector' type. This method is for use within the
    fluent interface for progressively building a complete table definition.

    See the class docstring for a full example on using the fluent interface.

    Args:
        column_name: the name of the new column to add to the definition.
        dimension: the dimensionality of the vector, i.e. the number of components
            each vector in this column will have. If a `service` parameter is
            supplied and the vectorize model allows for it, the dimension may be
            left unspecified to have the API set a default value.
            The Data API will raise an error if a table creation is attempted with
            a vector column for which neither a service nor the dimension are given.
        service: a `VectorServiceOptions` object, or an equivalent plain dictionary,
            defining the server-side embedding service associated to the column,
            if desired.

    Returns:
        a CreateTableDefinition obtained by adding (or replacing) the desired
        column to this table definition.
    """

    return CreateTableDefinition(
        columns={
            **self.columns,
            **{
                column_name: TableVectorColumnTypeDescriptor(
                    column_type="vector",
                    dimension=dimension,
                    service=VectorServiceOptions.coerce(service),
                )
            },
        },
        primary_key=self.primary_key,
    )
def as_dict(self) ‑> dict[str, typing.Any]

Recast this object into a dictionary.

Expand source code
def as_dict(self) -> dict[str, Any]:
    """Recast this object into a dictionary."""

    return {
        k: v
        for k, v in {
            "columns": {
                col_n: col_v.as_dict() for col_n, col_v in self.columns.items()
            },
            "primaryKey": self.primary_key.as_dict(),
        }.items()
        if v is not None
    }
def build(self) ‑> CreateTableDefinition

The final step in the fluent (builder) interface. Calling this method finalizes the definition that has been built so far and makes it into a table definition ready for use in e.g. table creation.

Note that this step may be automatically invoked by the receiving methods: however it is a good practice - and also adds to the readability of the code - to call it explicitly.

See the class docstring for a full example on using the fluent interface.

Returns

a CreateTableDefinition obtained by finalizing the definition being built so far.

Expand source code
def build(self) -> CreateTableDefinition:
    """
    The final step in the fluent (builder) interface. Calling this method
    finalizes the definition that has been built so far and makes it into a
    table definition ready for use in e.g. table creation.

    Note that this step may be automatically invoked by the receiving methods:
    however it is a good practice - and also adds to the readability of the code -
    to call it explicitly.

    See the class docstring for a full example on using the fluent interface.

    Returns:
        a CreateTableDefinition obtained by finalizing the definition being
            built so far.
    """

    return self