Module astrapy.data.info.collection_descriptor

Expand source code
# Copyright DataStax, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

from dataclasses import dataclass
from typing import Any

from astrapy.data.info.database_info import AstraDBDatabaseInfo
from astrapy.data.info.vectorize import VectorServiceOptions
from astrapy.utils.parsing import _warn_residual_keys

INDEXING_ALLOWED_MODES = {"allow", "deny"}


@dataclass
class CollectionInfo:
    """
    Represents the identifying information for a collection,
    including the information about the database the collection belongs to.

    Attributes:
        database_info: an AstraDBDatabaseInfo instance for the underlying database.
        keyspace: the keyspace where the collection is located.
        name: collection name. Unique within a keyspace (across tables/collections).
        full_name: identifier for the collection within the database,
            in the form "keyspace.collection_name".
    """

    database_info: AstraDBDatabaseInfo
    keyspace: str
    name: str
    full_name: str


@dataclass
class CollectionDefaultIDOptions:
    """
    The "defaultId" component of the collection options.
    See the Data API specifications for allowed values.

    Attributes:
        default_id_type: this setting determines what type of IDs the Data API will
            generate when inserting documents that do not specify their
            `_id` field explicitly. Can be set to any of the values
            `DefaultIdType.UUID`, `DefaultIdType.OBJECTID`,
            `DefaultIdType.UUIDV6`, `DefaultIdType.UUIDV7`,
            `DefaultIdType.DEFAULT`.
    """

    default_id_type: str

    def as_dict(self) -> dict[str, Any]:
        """Recast this object into a dictionary."""

        return {"type": self.default_id_type}

    @staticmethod
    def _from_dict(
        raw_dict: dict[str, Any] | None,
    ) -> CollectionDefaultIDOptions | None:
        """
        Create an instance of CollectionDefaultIDOptions from a dictionary
        such as one from the Data API.
        """

        if raw_dict is not None:
            return CollectionDefaultIDOptions(default_id_type=raw_dict["type"])
        else:
            return None


@dataclass
class CollectionVectorOptions:
    """
    The "vector" component of the collection options.
    See the Data API specifications for allowed values.

    Attributes:
        dimension: an optional positive integer, the dimensionality
            of the vector space (i.e. the number of components in each vector).
        metric: an optional choice of similarity metric to use in vector search.
            It must be a (string) value among `VectorMetric.DOT_PRODUCT`,
            `VectorMetric.EUCLIDEAN` and `VectorMetric.COSINE`.
        source_model: based on this value, the vector index can tune itself so as
            to achieve optimal performance for a given embedding model. See the
            Data API documentation for the allowed values. Defaults to "other".
        service: an optional VectorServiceOptions object in case a vectorize
            service is configured to achieve server-side embedding computation
            on the collection.
    """

    dimension: int | None = None
    metric: str | None = None
    source_model: str | None = None
    service: VectorServiceOptions | None = None

    def as_dict(self) -> dict[str, Any]:
        """Recast this object into a dictionary."""

        return {
            k: v
            for k, v in {
                "dimension": self.dimension,
                "metric": self.metric,
                "service": None if self.service is None else self.service.as_dict(),
                "sourceModel": None if self.source_model is None else self.source_model,
            }.items()
            if v is not None
        }

    @staticmethod
    def _from_dict(raw_dict: dict[str, Any] | None) -> CollectionVectorOptions | None:
        """
        Create an instance of CollectionVectorOptions from a dictionary
        such as one from the Data API.
        """

        if raw_dict is not None:
            return CollectionVectorOptions(
                dimension=raw_dict.get("dimension"),
                metric=raw_dict.get("metric"),
                source_model=raw_dict.get("sourceModel"),
                service=VectorServiceOptions._from_dict(raw_dict.get("service")),
            )
        else:
            return None


@dataclass
class CollectionDefinition:
    """
    A structure expressing the options of a collection.
    See the Data API specifications for detailed specification and allowed values.

    Instances of this object can be created in three ways: using a fluent interface,
    passing a fully-formed definition to the class constructor, or coercing an
    appropriately-shaped plain dictionary into this class.
    See the examples below and the Table documentation for more details.

    Attributes:
        vector: an optional CollectionVectorOptions object.
        indexing: an optional dictionary with the "indexing" collection properties.
            This is in the form of a dictionary such as `{"deny": [...]}`
            or `{"allow": [...]}`, with a list of document paths, or alternatively
            just `["*"]`, to exclude from/include in collection indexing,
            respectively.
        default_id: an optional CollectionDefaultIDOptions object (see).

    Example:
        >>> from astrapy.constants import VectorMetric
        >>> from astrapy.info import CollectionDefinition, CollectionVectorOptions
        >>>
        >>> # Create a collection definition with the fluent interface:
        >>> collection_definition = (
        ...     CollectionDefinition.builder()
        ...     .set_vector_dimension(3)
        ...     .set_vector_metric(VectorMetric.DOT_PRODUCT)
        ...     .set_indexing("deny", ["annotations", "logs"])
        ...     .build()
        ... )
        >>>
        >>> # Create a collection definition passing everything to the constructor:
        >>> collection_definition_1 = CollectionDefinition(
        ...     vector=CollectionVectorOptions(
        ...         dimension=3,
        ...         metric=VectorMetric.DOT_PRODUCT,
        ...     ),
        ...     indexing={"deny": ["annotations", "logs"]},
        ... )
        >>>
        >>> # Coerce a dictionary into a collection definition:
        >>> collection_definition_2_dict = {
        ...     "indexing": {"deny": ["annotations", "logs"]},
        ...     "vector": {
        ...         "dimension": 3,
        ...         "metric": VectorMetric.DOT_PRODUCT,
        ...     },
        ... }
        >>> collection_definition_2 = CollectionDefinition.coerce(
        ...     collection_definition_2_dict
        ... )
        >>>
        >>> # The three created objects are exactly identical:
        >>> collection_definition_2 == collection_definition_1
        True
        >>> collection_definition_2 == collection_definition
        True
    """

    vector: CollectionVectorOptions | None = None
    indexing: dict[str, Any] | None = None
    default_id: CollectionDefaultIDOptions | None = None

    def __repr__(self) -> str:
        not_null_pieces = [
            pc
            for pc in [
                None if self.vector is None else f"vector={self.vector.__repr__()}",
                (
                    None
                    if self.indexing is None
                    else f"indexing={self.indexing.__repr__()}"
                ),
                (
                    None
                    if self.default_id is None
                    else f"default_id={self.default_id.__repr__()}"
                ),
            ]
            if pc is not None
        ]
        return f"{self.__class__.__name__}({', '.join(not_null_pieces)})"

    def as_dict(self) -> dict[str, Any]:
        """Recast this object into a dictionary."""

        return {
            k: v
            for k, v in {
                "vector": None if self.vector is None else self.vector.as_dict(),
                "indexing": self.indexing,
                "defaultId": (
                    None if self.default_id is None else self.default_id.as_dict()
                ),
            }.items()
            if v is not None
            if v != {}
        }

    @classmethod
    def _from_dict(cls, raw_dict: dict[str, Any]) -> CollectionDefinition:
        """
        Create an instance of CollectionDefinition from a dictionary
        such as one from the Data API.
        """

        _warn_residual_keys(cls, raw_dict, {"vector", "indexing", "defaultId"})
        return CollectionDefinition(
            vector=CollectionVectorOptions._from_dict(raw_dict.get("vector")),
            indexing=raw_dict.get("indexing"),
            default_id=CollectionDefaultIDOptions._from_dict(raw_dict.get("defaultId")),
        )

    @classmethod
    def coerce(
        cls, raw_input: CollectionDefinition | dict[str, Any]
    ) -> CollectionDefinition:
        """
        Normalize the input, whether an object already or a plain dictionary
        of the right structure, into a CollectionDefinition.
        """

        if isinstance(raw_input, CollectionDefinition):
            return raw_input
        else:
            return cls._from_dict(raw_input)

    @staticmethod
    def builder() -> CollectionDefinition:
        """
        Create an "empty" builder for constructing a collection definition through
        a fluent interface. The resulting object has no defined properties,
        traits that can be added progressively with the corresponding methods.

        See the class docstring for a full example on using the fluent interface.

        Returns:
            a CollectionDefinition for the simplest possible creatable collection.
        """

        return CollectionDefinition()

    def set_indexing(
        self, indexing_mode: str | None, indexing_target: list[str] | None = None
    ) -> CollectionDefinition:
        """
        Return a new collection definition object with a new indexing setting.
        The indexing can be set to something (fully overwriting any pre-existing
        configuration), or removed entirely. This method is for use within the
        fluent interface for progressively building a complete collection definition.

        See the class docstring for a full example on using the fluent interface.

        Args:
            indexing_mode: one of "allow" or "deny" to configure indexing, or None
                in case one wants to remove the setting.
            indexing_target: a list of the document paths covered by the allow/deny
                prescription. Passing this parameter when `indexing_mode` is None
                results in an error.

        Returns:
            a CollectionDefinition obtained by adding (or replacing) the desired
            indexing setting to this collection definition.
        """

        if indexing_mode is None:
            if indexing_target is not None:
                raise ValueError("Cannot pass an indexing target if unsetting indexing")
            return CollectionDefinition(
                vector=self.vector,
                indexing=None,
                default_id=self.default_id,
            )
        _i_mode = indexing_mode.lower()
        if _i_mode not in INDEXING_ALLOWED_MODES:
            msg = (
                f"Unknown indexing mode: '{indexing_mode}'. "
                f"Allowed values are: {', '.join(INDEXING_ALLOWED_MODES)}."
            )
            raise ValueError(msg)
        _i_target: list[str] = indexing_target or []
        return CollectionDefinition(
            vector=self.vector,
            indexing={indexing_mode: indexing_target},
            default_id=self.default_id,
        )

    def set_default_id(self, default_id_type: str | None) -> CollectionDefinition:
        """
        Return a new collection definition object with a new setting for the
        collection 'default ID type'. This method is for use within the
        fluent interface for progressively building a complete collection definition.

        See the class docstring for a full example on using the fluent interface.

        Args:
            default_id_type: one of the values of `astrapy.constants.DefaultIdType`
                (or the equivalent string) to set a default ID type for a collection;
                alternatively, None to remove the corresponding configuration.

        Returns:
            a CollectionDefinition obtained by adding (or replacing) the desired
            default ID type setting to this collection definition.
        """

        if default_id_type is None:
            return CollectionDefinition(
                vector=self.vector,
                indexing=self.indexing,
                default_id=None,
            )

        return CollectionDefinition(
            vector=self.vector,
            indexing=self.indexing,
            default_id=CollectionDefaultIDOptions(
                default_id_type=default_id_type,
            ),
        )

    def set_vector_dimension(self, dimension: int | None) -> CollectionDefinition:
        """
        Return a new collection definition object with a new setting for the
        collection's vector dimension. This method is for use within the
        fluent interface for progressively building a complete collection definition.

        See the class docstring for a full example on using the fluent interface.

        Args:
            dimension: an integer, the number of components of vectors in the
                collection. Setting even just one vector-related property makes
                the described collection a "vector collection".
                Providing None removes this setting.

        Returns:
            a CollectionDefinition obtained by adding (or replacing) the desired
            vector-related setting to this collection definition.
        """

        _vector_options = self.vector or CollectionVectorOptions()
        return CollectionDefinition(
            vector=CollectionVectorOptions(
                dimension=dimension,
                metric=_vector_options.metric,
                source_model=_vector_options.source_model,
                service=_vector_options.service,
            ),
            indexing=self.indexing,
            default_id=self.default_id,
        )

    def set_vector_metric(self, metric: str | None) -> CollectionDefinition:
        """
        Return a new collection definition object with a new setting for the
        collection's vector similarity metric. This method is for use within the
        fluent interface for progressively building a complete collection definition.

        See the class docstring for a full example on using the fluent interface.

        Args:
            metric: a value of those in `astrapy.constants.VectorMetric`, or an
                equivalent string such as "dot_product", used for vector search
                within the collection. Setting even just one vector-related property
                makes the described collection a "vector collection".
                Providing None removes this setting.

        Returns:
            a CollectionDefinition obtained by adding (or replacing) the desired
            vector-related setting to this collection definition.
        """

        _vector_options = self.vector or CollectionVectorOptions()
        return CollectionDefinition(
            vector=CollectionVectorOptions(
                dimension=_vector_options.dimension,
                metric=metric,
                source_model=_vector_options.source_model,
                service=_vector_options.service,
            ),
            indexing=self.indexing,
            default_id=self.default_id,
        )

    def set_vector_source_model(self, source_model: str | None) -> CollectionDefinition:
        """
        Return a new collection definition object with a new setting for the
        collection's vector 'source model' parameter. This method is for use within the
        fluent interface for progressively building a complete collection definition.

        See the class docstring for a full example on using the fluent interface.

        Args:
            source_model: an optional string setting for the vector index, to help
                it pick the set of parameters best suited to a specific embedding model.
                See the Data API documentation for more details.
                Setting even just one vector-related property makes the described
                collection a "vector collection". Providing None
                removes this setting - the Data API will use its defaults.

        Returns:
            a CollectionDefinition obtained by adding (or replacing) the desired
            vector-related setting to this collection definition.
        """

        _vector_options = self.vector or CollectionVectorOptions()
        return CollectionDefinition(
            vector=CollectionVectorOptions(
                dimension=_vector_options.dimension,
                metric=_vector_options.metric,
                source_model=source_model,
                service=_vector_options.service,
            ),
            indexing=self.indexing,
            default_id=self.default_id,
        )

    def set_vector_service(
        self,
        provider: str | VectorServiceOptions | None,
        model_name: str | None = None,
        *,
        authentication: dict[str, Any] | None = None,
        parameters: dict[str, Any] | None = None,
    ) -> CollectionDefinition:
        """
        Return a new collection definition object with a new setting for the
        collection's vectorize (i.e. server-side embeddings) service.
        This method is for use within the fluent interface for progressively
        building a complete collection definition.

        See the class docstring for a full example on using the fluent interface.

        Args:
            provider: this can be (1) a whole `VectorServiceOptions` object encoding
                all desired properties for a vectorize service; or (2) it can be None,
                to signify removal of the entire vectorize setting; alternatively,
                (3) it can be a string, the vectorize provider name as seen in the
                response from the database's `find_embedding_providers` method. In the
                latter case, the other parameters should also be provided as needed.
                See the examples below for an illustration of these usage patterns.
            model_name: a string, the name of the vectorize model to use (must be
                compatible with the chosen provider).
            authentication: a dictionary with the required authentication information
                if the vectorize makes use of secrets (API Keys) stored in the database
                Key Management System. See the Data API for more information on
                storing an API Key secret in one's Astra DB account.
            parameters: a free-form key-value mapping providing additional,
                model-dependent configuration settings. The allowed parameters for
                a given model are specified in the response of the Database
                `find_embedding_providers` method.

        Returns:
            a CollectionDefinition obtained by adding (or replacing) the desired
            vector-related setting to this collection definition.

        Example:
            >>> from astrapy.info import CollectionDefinition, VectorServiceOptions
            >>>
            >>> zero = CollectionDefinition.builder()
            >>>
            >>> svc1 = zero.set_vector_service(
            ...     "myProvider",
            ...     "myModelName",
            ...     parameters={"p": "z"},
            ... )
            >>> print(svc1.build().as_dict())
            {'vector': {'service': {'provider': 'myProvider', 'modelName': 'myModelName', 'parameters': {'p': 'z'}}}}
            >>>
            >>> myVecSvcOpt = VectorServiceOptions(
            ...     provider="myProvider",
            ...     model_name="myModelName",
            ...     parameters={"p": "z"},
            ... )
            >>> svc2 = zero.set_vector_service(myVecSvcOpt).build()
            >>> print(svc2.as_dict())
            {'vector': {'service': {'provider': 'myProvider', 'modelName': 'myModelName', 'parameters': {'p': 'z'}}}}
            >>>
            >>> reset = svc1.set_vector_service(None).build()
            >>> print(reset.as_dict())
            {}
        """

        _vector_options = self.vector or CollectionVectorOptions()
        if isinstance(provider, VectorServiceOptions):
            if (
                model_name is not None
                or authentication is not None
                or parameters is not None
            ):
                msg = (
                    "Parameters 'model_name', 'authentication' and 'parameters' "
                    "cannot be passed when setting a VectorServiceOptions directly."
                )
                raise ValueError(msg)
            return CollectionDefinition(
                vector=CollectionVectorOptions(
                    dimension=_vector_options.dimension,
                    metric=_vector_options.metric,
                    source_model=_vector_options.source_model,
                    service=provider,
                ),
                indexing=self.indexing,
                default_id=self.default_id,
            )
        else:
            new_service: VectorServiceOptions | None
            if provider is None:
                if (
                    model_name is not None
                    or authentication is not None
                    or parameters is not None
                ):
                    msg = (
                        "Parameters 'model_name', 'authentication' and 'parameters' "
                        "cannot be passed when unsetting the vector service."
                    )
                    raise ValueError(msg)
                new_service = None
            else:
                new_service = VectorServiceOptions(
                    provider=provider,
                    model_name=model_name,
                    authentication=authentication,
                    parameters=parameters,
                )
            return CollectionDefinition(
                vector=CollectionVectorOptions(
                    dimension=_vector_options.dimension,
                    metric=_vector_options.metric,
                    source_model=_vector_options.source_model,
                    service=new_service,
                ),
                indexing=self.indexing,
                default_id=self.default_id,
            )

    def build(self) -> CollectionDefinition:
        """
        The final step in the fluent (builder) interface. Calling this method
        finalizes the definition that has been built so far and makes it into a
        collection definition ready for use in e.g. table creation.

        Note that this step may be automatically invoked by the receiving methods:
        however it is a good practice - and also adds to the readability of the code -
        to call it explicitly.

        See the class docstring for a full example on using the fluent interface.

        Returns:
            a CollectionDefinition obtained by finalizing the definition being
                built so far.
        """

        return self


@dataclass
class CollectionDescriptor:
    """
    A structure expressing full description of a collection as the Data API
    returns it, i.e. its name and its definition.

    Attributes:
        name: the name of the collection.
        definition: a CollectionDefinition instance.
        raw_descriptor: the raw response from the Data API.

    Note:
        although the API format has the collection settings in a field called
        "options" (both in payloads and in responses, consistently), the corresponding
        attribute of this object is called `definition` to keep consistency with the
        TableDescriptor class and the attribute's data type (`CollectionDefinition`).
        As a consequence, when coercing a plain dictionary into this class, care must
        be taken that the plain dictionary key be "options", as could a response from
        the API have it.
    """

    name: str
    definition: CollectionDefinition
    raw_descriptor: dict[str, Any] | None

    def __repr__(self) -> str:
        not_null_pieces = [
            pc
            for pc in [
                f"name={self.name.__repr__()}",
                f"definition={self.definition.__repr__()}",
                None if self.raw_descriptor is None else "raw_descriptor=...",
            ]
            if pc is not None
        ]
        return f"{self.__class__.__name__}({', '.join(not_null_pieces)})"

    def __eq__(self, other: Any) -> bool:
        if isinstance(other, CollectionDescriptor):
            return self.name == other.name and self.definition == other.definition
        else:
            return False

    def as_dict(self) -> dict[str, Any]:
        """
        Recast this object into a dictionary.
        Empty `definition` will not be returned at all.
        """

        return {
            k: v
            for k, v in {
                "name": self.name,
                "options": self.definition.as_dict(),
            }.items()
            if v
        }

    @classmethod
    def _from_dict(cls, raw_dict: dict[str, Any]) -> CollectionDescriptor:
        """
        Create an instance of CollectionDescriptor from a dictionary
        such as one from the Data API.
        """

        _warn_residual_keys(cls, raw_dict, {"name", "options"})
        return CollectionDescriptor(
            name=raw_dict["name"],
            definition=CollectionDefinition._from_dict(raw_dict.get("options") or {}),
            raw_descriptor=raw_dict,
        )

    @classmethod
    def coerce(
        cls, raw_input: CollectionDescriptor | dict[str, Any]
    ) -> CollectionDescriptor:
        """
        Normalize the input, whether an object already or a plain dictionary
        of the right structure, into a CollectionDescriptor.
        """

        if isinstance(raw_input, CollectionDescriptor):
            return raw_input
        else:
            return cls._from_dict(raw_input)

Classes

class CollectionDefaultIDOptions (default_id_type: str)

The "defaultId" component of the collection options. See the Data API specifications for allowed values.

Attributes

default_id_type
this setting determines what type of IDs the Data API will generate when inserting documents that do not specify their _id field explicitly. Can be set to any of the values DefaultIdType.UUID, DefaultIdType.OBJECTID, DefaultIdType.UUIDV6, DefaultIdType.UUIDV7, DefaultIdType.DEFAULT.
Expand source code
@dataclass
class CollectionDefaultIDOptions:
    """
    The "defaultId" component of the collection options.
    See the Data API specifications for allowed values.

    Attributes:
        default_id_type: this setting determines what type of IDs the Data API will
            generate when inserting documents that do not specify their
            `_id` field explicitly. Can be set to any of the values
            `DefaultIdType.UUID`, `DefaultIdType.OBJECTID`,
            `DefaultIdType.UUIDV6`, `DefaultIdType.UUIDV7`,
            `DefaultIdType.DEFAULT`.
    """

    default_id_type: str

    def as_dict(self) -> dict[str, Any]:
        """Recast this object into a dictionary."""

        return {"type": self.default_id_type}

    @staticmethod
    def _from_dict(
        raw_dict: dict[str, Any] | None,
    ) -> CollectionDefaultIDOptions | None:
        """
        Create an instance of CollectionDefaultIDOptions from a dictionary
        such as one from the Data API.
        """

        if raw_dict is not None:
            return CollectionDefaultIDOptions(default_id_type=raw_dict["type"])
        else:
            return None

Class variables

var default_id_type : str

Methods

def as_dict(self) ‑> dict[str, typing.Any]

Recast this object into a dictionary.

Expand source code
def as_dict(self) -> dict[str, Any]:
    """Recast this object into a dictionary."""

    return {"type": self.default_id_type}
class CollectionDefinition (vector: CollectionVectorOptions | None = None, indexing: dict[str, Any] | None = None, default_id: CollectionDefaultIDOptions | None = None)

A structure expressing the options of a collection. See the Data API specifications for detailed specification and allowed values.

Instances of this object can be created in three ways: using a fluent interface, passing a fully-formed definition to the class constructor, or coercing an appropriately-shaped plain dictionary into this class. See the examples below and the Table documentation for more details.

Attributes

vector
an optional CollectionVectorOptions object.
indexing
an optional dictionary with the "indexing" collection properties. This is in the form of a dictionary such as {"deny": [...]} or {"allow": [...]}, with a list of document paths, or alternatively just ["*"], to exclude from/include in collection indexing, respectively.
default_id
an optional CollectionDefaultIDOptions object (see).

Example

>>> from astrapy.constants import VectorMetric
>>> from astrapy.info import CollectionDefinition, CollectionVectorOptions
>>>
>>> # Create a collection definition with the fluent interface:
>>> collection_definition = (
...     CollectionDefinition.builder()
...     .set_vector_dimension(3)
...     .set_vector_metric(VectorMetric.DOT_PRODUCT)
...     .set_indexing("deny", ["annotations", "logs"])
...     .build()
... )
>>>
>>> # Create a collection definition passing everything to the constructor:
>>> collection_definition_1 = CollectionDefinition(
...     vector=CollectionVectorOptions(
...         dimension=3,
...         metric=VectorMetric.DOT_PRODUCT,
...     ),
...     indexing={"deny": ["annotations", "logs"]},
... )
>>>
>>> # Coerce a dictionary into a collection definition:
>>> collection_definition_2_dict = {
...     "indexing": {"deny": ["annotations", "logs"]},
...     "vector": {
...         "dimension": 3,
...         "metric": VectorMetric.DOT_PRODUCT,
...     },
... }
>>> collection_definition_2 = CollectionDefinition.coerce(
...     collection_definition_2_dict
... )
>>>
>>> # The three created objects are exactly identical:
>>> collection_definition_2 == collection_definition_1
True
>>> collection_definition_2 == collection_definition
True
Expand source code
@dataclass
class CollectionDefinition:
    """
    A structure expressing the options of a collection.
    See the Data API specifications for detailed specification and allowed values.

    Instances of this object can be created in three ways: using a fluent interface,
    passing a fully-formed definition to the class constructor, or coercing an
    appropriately-shaped plain dictionary into this class.
    See the examples below and the Table documentation for more details.

    Attributes:
        vector: an optional CollectionVectorOptions object.
        indexing: an optional dictionary with the "indexing" collection properties.
            This is in the form of a dictionary such as `{"deny": [...]}`
            or `{"allow": [...]}`, with a list of document paths, or alternatively
            just `["*"]`, to exclude from/include in collection indexing,
            respectively.
        default_id: an optional CollectionDefaultIDOptions object (see).

    Example:
        >>> from astrapy.constants import VectorMetric
        >>> from astrapy.info import CollectionDefinition, CollectionVectorOptions
        >>>
        >>> # Create a collection definition with the fluent interface:
        >>> collection_definition = (
        ...     CollectionDefinition.builder()
        ...     .set_vector_dimension(3)
        ...     .set_vector_metric(VectorMetric.DOT_PRODUCT)
        ...     .set_indexing("deny", ["annotations", "logs"])
        ...     .build()
        ... )
        >>>
        >>> # Create a collection definition passing everything to the constructor:
        >>> collection_definition_1 = CollectionDefinition(
        ...     vector=CollectionVectorOptions(
        ...         dimension=3,
        ...         metric=VectorMetric.DOT_PRODUCT,
        ...     ),
        ...     indexing={"deny": ["annotations", "logs"]},
        ... )
        >>>
        >>> # Coerce a dictionary into a collection definition:
        >>> collection_definition_2_dict = {
        ...     "indexing": {"deny": ["annotations", "logs"]},
        ...     "vector": {
        ...         "dimension": 3,
        ...         "metric": VectorMetric.DOT_PRODUCT,
        ...     },
        ... }
        >>> collection_definition_2 = CollectionDefinition.coerce(
        ...     collection_definition_2_dict
        ... )
        >>>
        >>> # The three created objects are exactly identical:
        >>> collection_definition_2 == collection_definition_1
        True
        >>> collection_definition_2 == collection_definition
        True
    """

    vector: CollectionVectorOptions | None = None
    indexing: dict[str, Any] | None = None
    default_id: CollectionDefaultIDOptions | None = None

    def __repr__(self) -> str:
        not_null_pieces = [
            pc
            for pc in [
                None if self.vector is None else f"vector={self.vector.__repr__()}",
                (
                    None
                    if self.indexing is None
                    else f"indexing={self.indexing.__repr__()}"
                ),
                (
                    None
                    if self.default_id is None
                    else f"default_id={self.default_id.__repr__()}"
                ),
            ]
            if pc is not None
        ]
        return f"{self.__class__.__name__}({', '.join(not_null_pieces)})"

    def as_dict(self) -> dict[str, Any]:
        """Recast this object into a dictionary."""

        return {
            k: v
            for k, v in {
                "vector": None if self.vector is None else self.vector.as_dict(),
                "indexing": self.indexing,
                "defaultId": (
                    None if self.default_id is None else self.default_id.as_dict()
                ),
            }.items()
            if v is not None
            if v != {}
        }

    @classmethod
    def _from_dict(cls, raw_dict: dict[str, Any]) -> CollectionDefinition:
        """
        Create an instance of CollectionDefinition from a dictionary
        such as one from the Data API.
        """

        _warn_residual_keys(cls, raw_dict, {"vector", "indexing", "defaultId"})
        return CollectionDefinition(
            vector=CollectionVectorOptions._from_dict(raw_dict.get("vector")),
            indexing=raw_dict.get("indexing"),
            default_id=CollectionDefaultIDOptions._from_dict(raw_dict.get("defaultId")),
        )

    @classmethod
    def coerce(
        cls, raw_input: CollectionDefinition | dict[str, Any]
    ) -> CollectionDefinition:
        """
        Normalize the input, whether an object already or a plain dictionary
        of the right structure, into a CollectionDefinition.
        """

        if isinstance(raw_input, CollectionDefinition):
            return raw_input
        else:
            return cls._from_dict(raw_input)

    @staticmethod
    def builder() -> CollectionDefinition:
        """
        Create an "empty" builder for constructing a collection definition through
        a fluent interface. The resulting object has no defined properties,
        traits that can be added progressively with the corresponding methods.

        See the class docstring for a full example on using the fluent interface.

        Returns:
            a CollectionDefinition for the simplest possible creatable collection.
        """

        return CollectionDefinition()

    def set_indexing(
        self, indexing_mode: str | None, indexing_target: list[str] | None = None
    ) -> CollectionDefinition:
        """
        Return a new collection definition object with a new indexing setting.
        The indexing can be set to something (fully overwriting any pre-existing
        configuration), or removed entirely. This method is for use within the
        fluent interface for progressively building a complete collection definition.

        See the class docstring for a full example on using the fluent interface.

        Args:
            indexing_mode: one of "allow" or "deny" to configure indexing, or None
                in case one wants to remove the setting.
            indexing_target: a list of the document paths covered by the allow/deny
                prescription. Passing this parameter when `indexing_mode` is None
                results in an error.

        Returns:
            a CollectionDefinition obtained by adding (or replacing) the desired
            indexing setting to this collection definition.
        """

        if indexing_mode is None:
            if indexing_target is not None:
                raise ValueError("Cannot pass an indexing target if unsetting indexing")
            return CollectionDefinition(
                vector=self.vector,
                indexing=None,
                default_id=self.default_id,
            )
        _i_mode = indexing_mode.lower()
        if _i_mode not in INDEXING_ALLOWED_MODES:
            msg = (
                f"Unknown indexing mode: '{indexing_mode}'. "
                f"Allowed values are: {', '.join(INDEXING_ALLOWED_MODES)}."
            )
            raise ValueError(msg)
        _i_target: list[str] = indexing_target or []
        return CollectionDefinition(
            vector=self.vector,
            indexing={indexing_mode: indexing_target},
            default_id=self.default_id,
        )

    def set_default_id(self, default_id_type: str | None) -> CollectionDefinition:
        """
        Return a new collection definition object with a new setting for the
        collection 'default ID type'. This method is for use within the
        fluent interface for progressively building a complete collection definition.

        See the class docstring for a full example on using the fluent interface.

        Args:
            default_id_type: one of the values of `astrapy.constants.DefaultIdType`
                (or the equivalent string) to set a default ID type for a collection;
                alternatively, None to remove the corresponding configuration.

        Returns:
            a CollectionDefinition obtained by adding (or replacing) the desired
            default ID type setting to this collection definition.
        """

        if default_id_type is None:
            return CollectionDefinition(
                vector=self.vector,
                indexing=self.indexing,
                default_id=None,
            )

        return CollectionDefinition(
            vector=self.vector,
            indexing=self.indexing,
            default_id=CollectionDefaultIDOptions(
                default_id_type=default_id_type,
            ),
        )

    def set_vector_dimension(self, dimension: int | None) -> CollectionDefinition:
        """
        Return a new collection definition object with a new setting for the
        collection's vector dimension. This method is for use within the
        fluent interface for progressively building a complete collection definition.

        See the class docstring for a full example on using the fluent interface.

        Args:
            dimension: an integer, the number of components of vectors in the
                collection. Setting even just one vector-related property makes
                the described collection a "vector collection".
                Providing None removes this setting.

        Returns:
            a CollectionDefinition obtained by adding (or replacing) the desired
            vector-related setting to this collection definition.
        """

        _vector_options = self.vector or CollectionVectorOptions()
        return CollectionDefinition(
            vector=CollectionVectorOptions(
                dimension=dimension,
                metric=_vector_options.metric,
                source_model=_vector_options.source_model,
                service=_vector_options.service,
            ),
            indexing=self.indexing,
            default_id=self.default_id,
        )

    def set_vector_metric(self, metric: str | None) -> CollectionDefinition:
        """
        Return a new collection definition object with a new setting for the
        collection's vector similarity metric. This method is for use within the
        fluent interface for progressively building a complete collection definition.

        See the class docstring for a full example on using the fluent interface.

        Args:
            metric: a value of those in `astrapy.constants.VectorMetric`, or an
                equivalent string such as "dot_product", used for vector search
                within the collection. Setting even just one vector-related property
                makes the described collection a "vector collection".
                Providing None removes this setting.

        Returns:
            a CollectionDefinition obtained by adding (or replacing) the desired
            vector-related setting to this collection definition.
        """

        _vector_options = self.vector or CollectionVectorOptions()
        return CollectionDefinition(
            vector=CollectionVectorOptions(
                dimension=_vector_options.dimension,
                metric=metric,
                source_model=_vector_options.source_model,
                service=_vector_options.service,
            ),
            indexing=self.indexing,
            default_id=self.default_id,
        )

    def set_vector_source_model(self, source_model: str | None) -> CollectionDefinition:
        """
        Return a new collection definition object with a new setting for the
        collection's vector 'source model' parameter. This method is for use within the
        fluent interface for progressively building a complete collection definition.

        See the class docstring for a full example on using the fluent interface.

        Args:
            source_model: an optional string setting for the vector index, to help
                it pick the set of parameters best suited to a specific embedding model.
                See the Data API documentation for more details.
                Setting even just one vector-related property makes the described
                collection a "vector collection". Providing None
                removes this setting - the Data API will use its defaults.

        Returns:
            a CollectionDefinition obtained by adding (or replacing) the desired
            vector-related setting to this collection definition.
        """

        _vector_options = self.vector or CollectionVectorOptions()
        return CollectionDefinition(
            vector=CollectionVectorOptions(
                dimension=_vector_options.dimension,
                metric=_vector_options.metric,
                source_model=source_model,
                service=_vector_options.service,
            ),
            indexing=self.indexing,
            default_id=self.default_id,
        )

    def set_vector_service(
        self,
        provider: str | VectorServiceOptions | None,
        model_name: str | None = None,
        *,
        authentication: dict[str, Any] | None = None,
        parameters: dict[str, Any] | None = None,
    ) -> CollectionDefinition:
        """
        Return a new collection definition object with a new setting for the
        collection's vectorize (i.e. server-side embeddings) service.
        This method is for use within the fluent interface for progressively
        building a complete collection definition.

        See the class docstring for a full example on using the fluent interface.

        Args:
            provider: this can be (1) a whole `VectorServiceOptions` object encoding
                all desired properties for a vectorize service; or (2) it can be None,
                to signify removal of the entire vectorize setting; alternatively,
                (3) it can be a string, the vectorize provider name as seen in the
                response from the database's `find_embedding_providers` method. In the
                latter case, the other parameters should also be provided as needed.
                See the examples below for an illustration of these usage patterns.
            model_name: a string, the name of the vectorize model to use (must be
                compatible with the chosen provider).
            authentication: a dictionary with the required authentication information
                if the vectorize makes use of secrets (API Keys) stored in the database
                Key Management System. See the Data API for more information on
                storing an API Key secret in one's Astra DB account.
            parameters: a free-form key-value mapping providing additional,
                model-dependent configuration settings. The allowed parameters for
                a given model are specified in the response of the Database
                `find_embedding_providers` method.

        Returns:
            a CollectionDefinition obtained by adding (or replacing) the desired
            vector-related setting to this collection definition.

        Example:
            >>> from astrapy.info import CollectionDefinition, VectorServiceOptions
            >>>
            >>> zero = CollectionDefinition.builder()
            >>>
            >>> svc1 = zero.set_vector_service(
            ...     "myProvider",
            ...     "myModelName",
            ...     parameters={"p": "z"},
            ... )
            >>> print(svc1.build().as_dict())
            {'vector': {'service': {'provider': 'myProvider', 'modelName': 'myModelName', 'parameters': {'p': 'z'}}}}
            >>>
            >>> myVecSvcOpt = VectorServiceOptions(
            ...     provider="myProvider",
            ...     model_name="myModelName",
            ...     parameters={"p": "z"},
            ... )
            >>> svc2 = zero.set_vector_service(myVecSvcOpt).build()
            >>> print(svc2.as_dict())
            {'vector': {'service': {'provider': 'myProvider', 'modelName': 'myModelName', 'parameters': {'p': 'z'}}}}
            >>>
            >>> reset = svc1.set_vector_service(None).build()
            >>> print(reset.as_dict())
            {}
        """

        _vector_options = self.vector or CollectionVectorOptions()
        if isinstance(provider, VectorServiceOptions):
            if (
                model_name is not None
                or authentication is not None
                or parameters is not None
            ):
                msg = (
                    "Parameters 'model_name', 'authentication' and 'parameters' "
                    "cannot be passed when setting a VectorServiceOptions directly."
                )
                raise ValueError(msg)
            return CollectionDefinition(
                vector=CollectionVectorOptions(
                    dimension=_vector_options.dimension,
                    metric=_vector_options.metric,
                    source_model=_vector_options.source_model,
                    service=provider,
                ),
                indexing=self.indexing,
                default_id=self.default_id,
            )
        else:
            new_service: VectorServiceOptions | None
            if provider is None:
                if (
                    model_name is not None
                    or authentication is not None
                    or parameters is not None
                ):
                    msg = (
                        "Parameters 'model_name', 'authentication' and 'parameters' "
                        "cannot be passed when unsetting the vector service."
                    )
                    raise ValueError(msg)
                new_service = None
            else:
                new_service = VectorServiceOptions(
                    provider=provider,
                    model_name=model_name,
                    authentication=authentication,
                    parameters=parameters,
                )
            return CollectionDefinition(
                vector=CollectionVectorOptions(
                    dimension=_vector_options.dimension,
                    metric=_vector_options.metric,
                    source_model=_vector_options.source_model,
                    service=new_service,
                ),
                indexing=self.indexing,
                default_id=self.default_id,
            )

    def build(self) -> CollectionDefinition:
        """
        The final step in the fluent (builder) interface. Calling this method
        finalizes the definition that has been built so far and makes it into a
        collection definition ready for use in e.g. table creation.

        Note that this step may be automatically invoked by the receiving methods:
        however it is a good practice - and also adds to the readability of the code -
        to call it explicitly.

        See the class docstring for a full example on using the fluent interface.

        Returns:
            a CollectionDefinition obtained by finalizing the definition being
                built so far.
        """

        return self

Class variables

var default_idCollectionDefaultIDOptions | None
var indexing : dict[str, typing.Any] | None
var vectorCollectionVectorOptions | None

Static methods

def builder() ‑> CollectionDefinition

Create an "empty" builder for constructing a collection definition through a fluent interface. The resulting object has no defined properties, traits that can be added progressively with the corresponding methods.

See the class docstring for a full example on using the fluent interface.

Returns

a CollectionDefinition for the simplest possible creatable collection.

Expand source code
@staticmethod
def builder() -> CollectionDefinition:
    """
    Create an "empty" builder for constructing a collection definition through
    a fluent interface. The resulting object has no defined properties,
    traits that can be added progressively with the corresponding methods.

    See the class docstring for a full example on using the fluent interface.

    Returns:
        a CollectionDefinition for the simplest possible creatable collection.
    """

    return CollectionDefinition()
def coerce(raw_input: CollectionDefinition | dict[str, Any]) ‑> CollectionDefinition

Normalize the input, whether an object already or a plain dictionary of the right structure, into a CollectionDefinition.

Expand source code
@classmethod
def coerce(
    cls, raw_input: CollectionDefinition | dict[str, Any]
) -> CollectionDefinition:
    """
    Normalize the input, whether an object already or a plain dictionary
    of the right structure, into a CollectionDefinition.
    """

    if isinstance(raw_input, CollectionDefinition):
        return raw_input
    else:
        return cls._from_dict(raw_input)

Methods

def as_dict(self) ‑> dict[str, typing.Any]

Recast this object into a dictionary.

Expand source code
def as_dict(self) -> dict[str, Any]:
    """Recast this object into a dictionary."""

    return {
        k: v
        for k, v in {
            "vector": None if self.vector is None else self.vector.as_dict(),
            "indexing": self.indexing,
            "defaultId": (
                None if self.default_id is None else self.default_id.as_dict()
            ),
        }.items()
        if v is not None
        if v != {}
    }
def build(self) ‑> CollectionDefinition

The final step in the fluent (builder) interface. Calling this method finalizes the definition that has been built so far and makes it into a collection definition ready for use in e.g. table creation.

Note that this step may be automatically invoked by the receiving methods: however it is a good practice - and also adds to the readability of the code - to call it explicitly.

See the class docstring for a full example on using the fluent interface.

Returns

a CollectionDefinition obtained by finalizing the definition being built so far.

Expand source code
def build(self) -> CollectionDefinition:
    """
    The final step in the fluent (builder) interface. Calling this method
    finalizes the definition that has been built so far and makes it into a
    collection definition ready for use in e.g. table creation.

    Note that this step may be automatically invoked by the receiving methods:
    however it is a good practice - and also adds to the readability of the code -
    to call it explicitly.

    See the class docstring for a full example on using the fluent interface.

    Returns:
        a CollectionDefinition obtained by finalizing the definition being
            built so far.
    """

    return self
def set_default_id(self, default_id_type: str | None) ‑> CollectionDefinition

Return a new collection definition object with a new setting for the collection 'default ID type'. This method is for use within the fluent interface for progressively building a complete collection definition.

See the class docstring for a full example on using the fluent interface.

Args

default_id_type
one of the values of DefaultIdType (or the equivalent string) to set a default ID type for a collection; alternatively, None to remove the corresponding configuration.

Returns

a CollectionDefinition obtained by adding (or replacing) the desired default ID type setting to this collection definition.

Expand source code
def set_default_id(self, default_id_type: str | None) -> CollectionDefinition:
    """
    Return a new collection definition object with a new setting for the
    collection 'default ID type'. This method is for use within the
    fluent interface for progressively building a complete collection definition.

    See the class docstring for a full example on using the fluent interface.

    Args:
        default_id_type: one of the values of `astrapy.constants.DefaultIdType`
            (or the equivalent string) to set a default ID type for a collection;
            alternatively, None to remove the corresponding configuration.

    Returns:
        a CollectionDefinition obtained by adding (or replacing) the desired
        default ID type setting to this collection definition.
    """

    if default_id_type is None:
        return CollectionDefinition(
            vector=self.vector,
            indexing=self.indexing,
            default_id=None,
        )

    return CollectionDefinition(
        vector=self.vector,
        indexing=self.indexing,
        default_id=CollectionDefaultIDOptions(
            default_id_type=default_id_type,
        ),
    )
def set_indexing(self, indexing_mode: str | None, indexing_target: list[str] | None = None) ‑> CollectionDefinition

Return a new collection definition object with a new indexing setting. The indexing can be set to something (fully overwriting any pre-existing configuration), or removed entirely. This method is for use within the fluent interface for progressively building a complete collection definition.

See the class docstring for a full example on using the fluent interface.

Args

indexing_mode
one of "allow" or "deny" to configure indexing, or None in case one wants to remove the setting.
indexing_target
a list of the document paths covered by the allow/deny prescription. Passing this parameter when indexing_mode is None results in an error.

Returns

a CollectionDefinition obtained by adding (or replacing) the desired indexing setting to this collection definition.

Expand source code
def set_indexing(
    self, indexing_mode: str | None, indexing_target: list[str] | None = None
) -> CollectionDefinition:
    """
    Return a new collection definition object with a new indexing setting.
    The indexing can be set to something (fully overwriting any pre-existing
    configuration), or removed entirely. This method is for use within the
    fluent interface for progressively building a complete collection definition.

    See the class docstring for a full example on using the fluent interface.

    Args:
        indexing_mode: one of "allow" or "deny" to configure indexing, or None
            in case one wants to remove the setting.
        indexing_target: a list of the document paths covered by the allow/deny
            prescription. Passing this parameter when `indexing_mode` is None
            results in an error.

    Returns:
        a CollectionDefinition obtained by adding (or replacing) the desired
        indexing setting to this collection definition.
    """

    if indexing_mode is None:
        if indexing_target is not None:
            raise ValueError("Cannot pass an indexing target if unsetting indexing")
        return CollectionDefinition(
            vector=self.vector,
            indexing=None,
            default_id=self.default_id,
        )
    _i_mode = indexing_mode.lower()
    if _i_mode not in INDEXING_ALLOWED_MODES:
        msg = (
            f"Unknown indexing mode: '{indexing_mode}'. "
            f"Allowed values are: {', '.join(INDEXING_ALLOWED_MODES)}."
        )
        raise ValueError(msg)
    _i_target: list[str] = indexing_target or []
    return CollectionDefinition(
        vector=self.vector,
        indexing={indexing_mode: indexing_target},
        default_id=self.default_id,
    )
def set_vector_dimension(self, dimension: int | None) ‑> CollectionDefinition

Return a new collection definition object with a new setting for the collection's vector dimension. This method is for use within the fluent interface for progressively building a complete collection definition.

See the class docstring for a full example on using the fluent interface.

Args

dimension
an integer, the number of components of vectors in the collection. Setting even just one vector-related property makes the described collection a "vector collection". Providing None removes this setting.

Returns

a CollectionDefinition obtained by adding (or replacing) the desired vector-related setting to this collection definition.

Expand source code
def set_vector_dimension(self, dimension: int | None) -> CollectionDefinition:
    """
    Return a new collection definition object with a new setting for the
    collection's vector dimension. This method is for use within the
    fluent interface for progressively building a complete collection definition.

    See the class docstring for a full example on using the fluent interface.

    Args:
        dimension: an integer, the number of components of vectors in the
            collection. Setting even just one vector-related property makes
            the described collection a "vector collection".
            Providing None removes this setting.

    Returns:
        a CollectionDefinition obtained by adding (or replacing) the desired
        vector-related setting to this collection definition.
    """

    _vector_options = self.vector or CollectionVectorOptions()
    return CollectionDefinition(
        vector=CollectionVectorOptions(
            dimension=dimension,
            metric=_vector_options.metric,
            source_model=_vector_options.source_model,
            service=_vector_options.service,
        ),
        indexing=self.indexing,
        default_id=self.default_id,
    )
def set_vector_metric(self, metric: str | None) ‑> CollectionDefinition

Return a new collection definition object with a new setting for the collection's vector similarity metric. This method is for use within the fluent interface for progressively building a complete collection definition.

See the class docstring for a full example on using the fluent interface.

Args

metric
a value of those in VectorMetric, or an equivalent string such as "dot_product", used for vector search within the collection. Setting even just one vector-related property makes the described collection a "vector collection". Providing None removes this setting.

Returns

a CollectionDefinition obtained by adding (or replacing) the desired vector-related setting to this collection definition.

Expand source code
def set_vector_metric(self, metric: str | None) -> CollectionDefinition:
    """
    Return a new collection definition object with a new setting for the
    collection's vector similarity metric. This method is for use within the
    fluent interface for progressively building a complete collection definition.

    See the class docstring for a full example on using the fluent interface.

    Args:
        metric: a value of those in `astrapy.constants.VectorMetric`, or an
            equivalent string such as "dot_product", used for vector search
            within the collection. Setting even just one vector-related property
            makes the described collection a "vector collection".
            Providing None removes this setting.

    Returns:
        a CollectionDefinition obtained by adding (or replacing) the desired
        vector-related setting to this collection definition.
    """

    _vector_options = self.vector or CollectionVectorOptions()
    return CollectionDefinition(
        vector=CollectionVectorOptions(
            dimension=_vector_options.dimension,
            metric=metric,
            source_model=_vector_options.source_model,
            service=_vector_options.service,
        ),
        indexing=self.indexing,
        default_id=self.default_id,
    )
def set_vector_service(self, provider: str | VectorServiceOptions | None, model_name: str | None = None, *, authentication: dict[str, Any] | None = None, parameters: dict[str, Any] | None = None) ‑> CollectionDefinition

Return a new collection definition object with a new setting for the collection's vectorize (i.e. server-side embeddings) service. This method is for use within the fluent interface for progressively building a complete collection definition.

See the class docstring for a full example on using the fluent interface.

Args

provider
this can be (1) a whole VectorServiceOptions object encoding all desired properties for a vectorize service; or (2) it can be None, to signify removal of the entire vectorize setting; alternatively, (3) it can be a string, the vectorize provider name as seen in the response from the database's find_embedding_providers method. In the latter case, the other parameters should also be provided as needed. See the examples below for an illustration of these usage patterns.
model_name
a string, the name of the vectorize model to use (must be compatible with the chosen provider).
authentication
a dictionary with the required authentication information if the vectorize makes use of secrets (API Keys) stored in the database Key Management System. See the Data API for more information on storing an API Key secret in one's Astra DB account.
parameters
a free-form key-value mapping providing additional, model-dependent configuration settings. The allowed parameters for a given model are specified in the response of the Database find_embedding_providers method.

Returns

a CollectionDefinition obtained by adding (or replacing) the desired vector-related setting to this collection definition.

Example

>>> from astrapy.info import CollectionDefinition, VectorServiceOptions
>>>
>>> zero = CollectionDefinition.builder()
>>>
>>> svc1 = zero.set_vector_service(
...     "myProvider",
...     "myModelName",
...     parameters={"p": "z"},
... )
>>> print(svc1.build().as_dict())
{'vector': {'service': {'provider': 'myProvider', 'modelName': 'myModelName', 'parameters': {'p': 'z'}}}}
>>>
>>> myVecSvcOpt = VectorServiceOptions(
...     provider="myProvider",
...     model_name="myModelName",
...     parameters={"p": "z"},
... )
>>> svc2 = zero.set_vector_service(myVecSvcOpt).build()
>>> print(svc2.as_dict())
{'vector': {'service': {'provider': 'myProvider', 'modelName': 'myModelName', 'parameters': {'p': 'z'}}}}
>>>
>>> reset = svc1.set_vector_service(None).build()
>>> print(reset.as_dict())
{}
Expand source code
def set_vector_service(
    self,
    provider: str | VectorServiceOptions | None,
    model_name: str | None = None,
    *,
    authentication: dict[str, Any] | None = None,
    parameters: dict[str, Any] | None = None,
) -> CollectionDefinition:
    """
    Return a new collection definition object with a new setting for the
    collection's vectorize (i.e. server-side embeddings) service.
    This method is for use within the fluent interface for progressively
    building a complete collection definition.

    See the class docstring for a full example on using the fluent interface.

    Args:
        provider: this can be (1) a whole `VectorServiceOptions` object encoding
            all desired properties for a vectorize service; or (2) it can be None,
            to signify removal of the entire vectorize setting; alternatively,
            (3) it can be a string, the vectorize provider name as seen in the
            response from the database's `find_embedding_providers` method. In the
            latter case, the other parameters should also be provided as needed.
            See the examples below for an illustration of these usage patterns.
        model_name: a string, the name of the vectorize model to use (must be
            compatible with the chosen provider).
        authentication: a dictionary with the required authentication information
            if the vectorize makes use of secrets (API Keys) stored in the database
            Key Management System. See the Data API for more information on
            storing an API Key secret in one's Astra DB account.
        parameters: a free-form key-value mapping providing additional,
            model-dependent configuration settings. The allowed parameters for
            a given model are specified in the response of the Database
            `find_embedding_providers` method.

    Returns:
        a CollectionDefinition obtained by adding (or replacing) the desired
        vector-related setting to this collection definition.

    Example:
        >>> from astrapy.info import CollectionDefinition, VectorServiceOptions
        >>>
        >>> zero = CollectionDefinition.builder()
        >>>
        >>> svc1 = zero.set_vector_service(
        ...     "myProvider",
        ...     "myModelName",
        ...     parameters={"p": "z"},
        ... )
        >>> print(svc1.build().as_dict())
        {'vector': {'service': {'provider': 'myProvider', 'modelName': 'myModelName', 'parameters': {'p': 'z'}}}}
        >>>
        >>> myVecSvcOpt = VectorServiceOptions(
        ...     provider="myProvider",
        ...     model_name="myModelName",
        ...     parameters={"p": "z"},
        ... )
        >>> svc2 = zero.set_vector_service(myVecSvcOpt).build()
        >>> print(svc2.as_dict())
        {'vector': {'service': {'provider': 'myProvider', 'modelName': 'myModelName', 'parameters': {'p': 'z'}}}}
        >>>
        >>> reset = svc1.set_vector_service(None).build()
        >>> print(reset.as_dict())
        {}
    """

    _vector_options = self.vector or CollectionVectorOptions()
    if isinstance(provider, VectorServiceOptions):
        if (
            model_name is not None
            or authentication is not None
            or parameters is not None
        ):
            msg = (
                "Parameters 'model_name', 'authentication' and 'parameters' "
                "cannot be passed when setting a VectorServiceOptions directly."
            )
            raise ValueError(msg)
        return CollectionDefinition(
            vector=CollectionVectorOptions(
                dimension=_vector_options.dimension,
                metric=_vector_options.metric,
                source_model=_vector_options.source_model,
                service=provider,
            ),
            indexing=self.indexing,
            default_id=self.default_id,
        )
    else:
        new_service: VectorServiceOptions | None
        if provider is None:
            if (
                model_name is not None
                or authentication is not None
                or parameters is not None
            ):
                msg = (
                    "Parameters 'model_name', 'authentication' and 'parameters' "
                    "cannot be passed when unsetting the vector service."
                )
                raise ValueError(msg)
            new_service = None
        else:
            new_service = VectorServiceOptions(
                provider=provider,
                model_name=model_name,
                authentication=authentication,
                parameters=parameters,
            )
        return CollectionDefinition(
            vector=CollectionVectorOptions(
                dimension=_vector_options.dimension,
                metric=_vector_options.metric,
                source_model=_vector_options.source_model,
                service=new_service,
            ),
            indexing=self.indexing,
            default_id=self.default_id,
        )
def set_vector_source_model(self, source_model: str | None) ‑> CollectionDefinition

Return a new collection definition object with a new setting for the collection's vector 'source model' parameter. This method is for use within the fluent interface for progressively building a complete collection definition.

See the class docstring for a full example on using the fluent interface.

Args

source_model
an optional string setting for the vector index, to help it pick the set of parameters best suited to a specific embedding model. See the Data API documentation for more details. Setting even just one vector-related property makes the described collection a "vector collection". Providing None removes this setting - the Data API will use its defaults.

Returns

a CollectionDefinition obtained by adding (or replacing) the desired vector-related setting to this collection definition.

Expand source code
def set_vector_source_model(self, source_model: str | None) -> CollectionDefinition:
    """
    Return a new collection definition object with a new setting for the
    collection's vector 'source model' parameter. This method is for use within the
    fluent interface for progressively building a complete collection definition.

    See the class docstring for a full example on using the fluent interface.

    Args:
        source_model: an optional string setting for the vector index, to help
            it pick the set of parameters best suited to a specific embedding model.
            See the Data API documentation for more details.
            Setting even just one vector-related property makes the described
            collection a "vector collection". Providing None
            removes this setting - the Data API will use its defaults.

    Returns:
        a CollectionDefinition obtained by adding (or replacing) the desired
        vector-related setting to this collection definition.
    """

    _vector_options = self.vector or CollectionVectorOptions()
    return CollectionDefinition(
        vector=CollectionVectorOptions(
            dimension=_vector_options.dimension,
            metric=_vector_options.metric,
            source_model=source_model,
            service=_vector_options.service,
        ),
        indexing=self.indexing,
        default_id=self.default_id,
    )
class CollectionDescriptor (name: str, definition: CollectionDefinition, raw_descriptor: dict[str, Any] | None)

A structure expressing full description of a collection as the Data API returns it, i.e. its name and its definition.

Attributes

name
the name of the collection.
definition
a CollectionDefinition instance.
raw_descriptor
the raw response from the Data API.

Note

although the API format has the collection settings in a field called "options" (both in payloads and in responses, consistently), the corresponding attribute of this object is called definition to keep consistency with the TableDescriptor class and the attribute's data type (CollectionDefinition). As a consequence, when coercing a plain dictionary into this class, care must be taken that the plain dictionary key be "options", as could a response from the API have it.

Expand source code
@dataclass
class CollectionDescriptor:
    """
    A structure expressing full description of a collection as the Data API
    returns it, i.e. its name and its definition.

    Attributes:
        name: the name of the collection.
        definition: a CollectionDefinition instance.
        raw_descriptor: the raw response from the Data API.

    Note:
        although the API format has the collection settings in a field called
        "options" (both in payloads and in responses, consistently), the corresponding
        attribute of this object is called `definition` to keep consistency with the
        TableDescriptor class and the attribute's data type (`CollectionDefinition`).
        As a consequence, when coercing a plain dictionary into this class, care must
        be taken that the plain dictionary key be "options", as could a response from
        the API have it.
    """

    name: str
    definition: CollectionDefinition
    raw_descriptor: dict[str, Any] | None

    def __repr__(self) -> str:
        not_null_pieces = [
            pc
            for pc in [
                f"name={self.name.__repr__()}",
                f"definition={self.definition.__repr__()}",
                None if self.raw_descriptor is None else "raw_descriptor=...",
            ]
            if pc is not None
        ]
        return f"{self.__class__.__name__}({', '.join(not_null_pieces)})"

    def __eq__(self, other: Any) -> bool:
        if isinstance(other, CollectionDescriptor):
            return self.name == other.name and self.definition == other.definition
        else:
            return False

    def as_dict(self) -> dict[str, Any]:
        """
        Recast this object into a dictionary.
        Empty `definition` will not be returned at all.
        """

        return {
            k: v
            for k, v in {
                "name": self.name,
                "options": self.definition.as_dict(),
            }.items()
            if v
        }

    @classmethod
    def _from_dict(cls, raw_dict: dict[str, Any]) -> CollectionDescriptor:
        """
        Create an instance of CollectionDescriptor from a dictionary
        such as one from the Data API.
        """

        _warn_residual_keys(cls, raw_dict, {"name", "options"})
        return CollectionDescriptor(
            name=raw_dict["name"],
            definition=CollectionDefinition._from_dict(raw_dict.get("options") or {}),
            raw_descriptor=raw_dict,
        )

    @classmethod
    def coerce(
        cls, raw_input: CollectionDescriptor | dict[str, Any]
    ) -> CollectionDescriptor:
        """
        Normalize the input, whether an object already or a plain dictionary
        of the right structure, into a CollectionDescriptor.
        """

        if isinstance(raw_input, CollectionDescriptor):
            return raw_input
        else:
            return cls._from_dict(raw_input)

Class variables

var definitionCollectionDefinition
var name : str
var raw_descriptor : dict[str, typing.Any] | None

Static methods

def coerce(raw_input: CollectionDescriptor | dict[str, Any]) ‑> CollectionDescriptor

Normalize the input, whether an object already or a plain dictionary of the right structure, into a CollectionDescriptor.

Expand source code
@classmethod
def coerce(
    cls, raw_input: CollectionDescriptor | dict[str, Any]
) -> CollectionDescriptor:
    """
    Normalize the input, whether an object already or a plain dictionary
    of the right structure, into a CollectionDescriptor.
    """

    if isinstance(raw_input, CollectionDescriptor):
        return raw_input
    else:
        return cls._from_dict(raw_input)

Methods

def as_dict(self) ‑> dict[str, typing.Any]

Recast this object into a dictionary. Empty definition will not be returned at all.

Expand source code
def as_dict(self) -> dict[str, Any]:
    """
    Recast this object into a dictionary.
    Empty `definition` will not be returned at all.
    """

    return {
        k: v
        for k, v in {
            "name": self.name,
            "options": self.definition.as_dict(),
        }.items()
        if v
    }
class CollectionInfo (database_info: AstraDBDatabaseInfo, keyspace: str, name: str, full_name: str)

Represents the identifying information for a collection, including the information about the database the collection belongs to.

Attributes

database_info
an AstraDBDatabaseInfo instance for the underlying database.
keyspace
the keyspace where the collection is located.
name
collection name. Unique within a keyspace (across tables/collections).
full_name
identifier for the collection within the database, in the form "keyspace.collection_name".
Expand source code
@dataclass
class CollectionInfo:
    """
    Represents the identifying information for a collection,
    including the information about the database the collection belongs to.

    Attributes:
        database_info: an AstraDBDatabaseInfo instance for the underlying database.
        keyspace: the keyspace where the collection is located.
        name: collection name. Unique within a keyspace (across tables/collections).
        full_name: identifier for the collection within the database,
            in the form "keyspace.collection_name".
    """

    database_info: AstraDBDatabaseInfo
    keyspace: str
    name: str
    full_name: str

Class variables

var database_infoAstraDBDatabaseInfo
var full_name : str
var keyspace : str
var name : str
class CollectionVectorOptions (dimension: int | None = None, metric: str | None = None, source_model: str | None = None, service: VectorServiceOptions | None = None)

The "vector" component of the collection options. See the Data API specifications for allowed values.

Attributes

dimension
an optional positive integer, the dimensionality of the vector space (i.e. the number of components in each vector).
metric
an optional choice of similarity metric to use in vector search. It must be a (string) value among VectorMetric.DOT_PRODUCT, VectorMetric.EUCLIDEAN and VectorMetric.COSINE.
source_model
based on this value, the vector index can tune itself so as to achieve optimal performance for a given embedding model. See the Data API documentation for the allowed values. Defaults to "other".
service
an optional VectorServiceOptions object in case a vectorize service is configured to achieve server-side embedding computation on the collection.
Expand source code
@dataclass
class CollectionVectorOptions:
    """
    The "vector" component of the collection options.
    See the Data API specifications for allowed values.

    Attributes:
        dimension: an optional positive integer, the dimensionality
            of the vector space (i.e. the number of components in each vector).
        metric: an optional choice of similarity metric to use in vector search.
            It must be a (string) value among `VectorMetric.DOT_PRODUCT`,
            `VectorMetric.EUCLIDEAN` and `VectorMetric.COSINE`.
        source_model: based on this value, the vector index can tune itself so as
            to achieve optimal performance for a given embedding model. See the
            Data API documentation for the allowed values. Defaults to "other".
        service: an optional VectorServiceOptions object in case a vectorize
            service is configured to achieve server-side embedding computation
            on the collection.
    """

    dimension: int | None = None
    metric: str | None = None
    source_model: str | None = None
    service: VectorServiceOptions | None = None

    def as_dict(self) -> dict[str, Any]:
        """Recast this object into a dictionary."""

        return {
            k: v
            for k, v in {
                "dimension": self.dimension,
                "metric": self.metric,
                "service": None if self.service is None else self.service.as_dict(),
                "sourceModel": None if self.source_model is None else self.source_model,
            }.items()
            if v is not None
        }

    @staticmethod
    def _from_dict(raw_dict: dict[str, Any] | None) -> CollectionVectorOptions | None:
        """
        Create an instance of CollectionVectorOptions from a dictionary
        such as one from the Data API.
        """

        if raw_dict is not None:
            return CollectionVectorOptions(
                dimension=raw_dict.get("dimension"),
                metric=raw_dict.get("metric"),
                source_model=raw_dict.get("sourceModel"),
                service=VectorServiceOptions._from_dict(raw_dict.get("service")),
            )
        else:
            return None

Class variables

var dimension : int | None
var metric : str | None
var serviceVectorServiceOptions | None
var source_model : str | None

Methods

def as_dict(self) ‑> dict[str, typing.Any]

Recast this object into a dictionary.

Expand source code
def as_dict(self) -> dict[str, Any]:
    """Recast this object into a dictionary."""

    return {
        k: v
        for k, v in {
            "dimension": self.dimension,
            "metric": self.metric,
            "service": None if self.service is None else self.service.as_dict(),
            "sourceModel": None if self.source_model is None else self.source_model,
        }.items()
        if v is not None
    }