Package astrapy

Expand source code
# Copyright DataStax, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

import importlib.metadata
import os

import toml


def get_version() -> str:
    try:
        # Poetry will create a __version__ attribute in the package's __init__.py file
        return importlib.metadata.version(__package__)

    # If the package is not installed, we can still get the version from the pyproject.toml file
    except importlib.metadata.PackageNotFoundError:
        # Get the path to the pyproject.toml file
        dir_path = os.path.dirname(os.path.realpath(__file__))
        pyproject_path = os.path.join(dir_path, "..", "pyproject.toml")

        # Read the pyproject.toml file and get the version from the poetry section
        try:
            with open(pyproject_path, encoding="utf-8") as pyproject:
                # Load the pyproject.toml file as a dictionary
                file_contents = pyproject.read()
                pyproject_data = toml.loads(file_contents)

                # Return the version from the poetry section
                return str(pyproject_data["tool"]["poetry"]["version"])

        # If the pyproject.toml file does not exist or the version is not found, return unknown
        except (FileNotFoundError, KeyError):
            return "unknown"


__version__: str = get_version()


from astrapy import api_options  # noqa: E402, F401
from astrapy.admin import (  # noqa: E402
    AstraDBAdmin,
    AstraDBDatabaseAdmin,
    DataAPIDatabaseAdmin,
)
from astrapy.client import DataAPIClient  # noqa: E402
from astrapy.collection import AsyncCollection, Collection  # noqa: E402

# A circular-import issue requires this to happen at the end of this module:
from astrapy.database import AsyncDatabase, Database  # noqa: E402
from astrapy.table import AsyncTable, Table  # noqa: E402

__all__ = [
    "AstraDBAdmin",
    "AstraDBDatabaseAdmin",
    "AsyncCollection",
    "AsyncDatabase",
    "AsyncTable",
    "Collection",
    "Database",
    "DataAPIClient",
    "DataAPIDatabaseAdmin",
    "Table",
    "__version__",
]


__pdoc__ = {
    "ids": False,
    "settings": False,
}

Sub-modules

astrapy.admin
astrapy.api_options
astrapy.authentication
astrapy.client
astrapy.collection
astrapy.constants
astrapy.cursors
astrapy.data
astrapy.data_types
astrapy.database
astrapy.exceptions
astrapy.info
astrapy.results
astrapy.table
astrapy.utils

Classes

class AstraDBAdmin (*, api_options: FullAPIOptions)

An "admin" object, able to perform administrative tasks at the databases level, such as creating, listing or dropping databases.

This class is not meant for direct instantiation by the user, rather it is obtained by invoking methods such as get_admin of AstraDBClient.

Args

api_options
a complete specification of the API Options for this instance.

Example

>>> from astrapy import DataAPIClient
>>> my_client = DataAPIClient("AstraCS:...")
>>> my_astra_db_admin = my_client.get_admin()
>>> database_list = my_astra_db_admin.list_databases()
>>> len(database_list)
3
>>> database_list[2].id
'01234567-...'
>>> my_db_admin = my_astra_db_admin.get_database_admin("01234567-...")
>>> my_db_admin.list_keyspaces()
['default_keyspace', 'staging_keyspace']

Note

a more powerful token may be required than the one sufficient for working in the Database, Collection and Table classes. Check the provided token if "Unauthorized" errors are encountered.

Expand source code
class AstraDBAdmin:
    """
    An "admin" object, able to perform administrative tasks at the databases
    level, such as creating, listing or dropping databases.

    This class is not meant for direct instantiation by the user, rather
    it is obtained by invoking methods such as `get_admin`
    of AstraDBClient.

    Args:
        api_options: a complete specification of the API Options for this instance.

    Example:
        >>> from astrapy import DataAPIClient
        >>> my_client = DataAPIClient("AstraCS:...")
        >>> my_astra_db_admin = my_client.get_admin()
        >>> database_list = my_astra_db_admin.list_databases()
        >>> len(database_list)
        3
        >>> database_list[2].id
        '01234567-...'
        >>> my_db_admin = my_astra_db_admin.get_database_admin("01234567-...")
        >>> my_db_admin.list_keyspaces()
        ['default_keyspace', 'staging_keyspace']

    Note:
        a more powerful token may be required than the one sufficient for working
        in the Database, Collection and Table classes. Check the provided token
        if "Unauthorized" errors are encountered.
    """

    def __init__(
        self,
        *,
        api_options: FullAPIOptions,
    ) -> None:
        if api_options.environment not in Environment.astra_db_values:
            raise InvalidEnvironmentException(
                "Environments outside of Astra DB are not supported."
            )

        self.api_options = api_options
        self._dev_ops_commander_headers: dict[str, str | None]
        if self.api_options.token:
            _token_str = self.api_options.token.get_token()
            self._dev_ops_commander_headers = {
                DEFAULT_DEV_OPS_AUTH_HEADER: f"{DEFAULT_DEV_OPS_AUTH_PREFIX}{_token_str}",
                **self.api_options.admin_additional_headers,
            }
        else:
            self._dev_ops_commander_headers = {
                **self.api_options.admin_additional_headers,
            }
        self._dev_ops_api_commander = self._get_dev_ops_api_commander()

    def __repr__(self) -> str:
        return f"{self.__class__.__name__}({self.api_options})"

    def __eq__(self, other: Any) -> bool:
        if isinstance(other, AstraDBAdmin):
            return all([self.api_options == other.api_options])
        else:
            return False

    def _get_dev_ops_api_commander(self) -> APICommander:
        """Instantiate a new APICommander based on the properties of this class."""
        base_path_components = [
            comp
            for comp in (
                ncomp.strip("/")
                for ncomp in (
                    self.api_options.dev_ops_api_url_options.dev_ops_api_version,
                    "databases",
                )
                if ncomp is not None
            )
            if comp != ""
        ]
        dev_ops_base_path = "/".join(base_path_components)
        dev_ops_commander = APICommander(
            api_endpoint=self.api_options.dev_ops_api_url_options.dev_ops_url,
            path=dev_ops_base_path,
            headers=self._dev_ops_commander_headers,
            callers=self.api_options.callers,
            dev_ops_api=True,
            redacted_header_names=self.api_options.redacted_header_names,
        )
        return dev_ops_commander

    def _copy(
        self,
        *,
        token: str | TokenProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> AstraDBAdmin:
        arg_api_options = APIOptions(
            token=token,
        )
        final_api_options = self.api_options.with_override(api_options).with_override(
            arg_api_options
        )
        return AstraDBAdmin(api_options=final_api_options)

    def with_options(
        self,
        *,
        token: str | TokenProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> AstraDBAdmin:
        """
        Create a clone of this AstraDBAdmin with some changed attributes.

        Args:
            token: an Access Token to the database. Example: `"AstraCS:xyz..."`.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            api_options: any additional options to set for the clone, in the form of
                an APIOptions instance (where one can set just the needed attributes).
                In case the same setting is also provided as named parameter,
                the latter takes precedence.

        Returns:
            a new AstraDBAdmin instance.

        Example:
            >>> different_auth_astra_db_admin = my_astra_db_admin.with_options(
            ...     token="AstraCS:xyz...",
            ... )
        """

        return self._copy(
            token=token,
            api_options=api_options,
        )

    def list_databases(
        self,
        *,
        include: str | None = None,
        provider: str | None = None,
        page_size: int | None = None,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> list[AstraDBAdminDatabaseInfo]:
        """
        Get the list of databases, as obtained with a request to the DevOps API.

        Args:
            include: a filter on what databases are to be returned. As per
                DevOps API, defaults to "nonterminated". Pass "all" to include
                the already terminated databases.
            provider: a filter on the cloud provider for the databases.
                As per DevOps API, defaults to "ALL". Pass e.g. "AWS" to
                restrict the results.
            page_size: number of results per page from the DevOps API.
            database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (While in the case of very many databases this method may entail
                multiple DevOps API requests, it is assumed here that this method
                amounts almost always to one single request: the only timeout
                imposed on this method execution is one acting on each individual
                request, with no checks on its overall completion time.)
            request_timeout_ms: an alias for `database_admin_timeout_ms`.
            timeout_ms: an alias for `database_admin_timeout_ms`.

        Returns:
            A list of AstraDBAdminDatabaseInfo objects.

        Example:
            >>> database_list = my_astra_db_admin.list_databases()
            >>> len(database_list)
            3
            >>> database_list[2].id
            '01234567-...'
            >>> database_list[2].status
            'ACTIVE'
            >>> database_list[2].info.region
            'eu-west-1'
        """

        _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
            timeout_options=self.api_options.timeout_options,
            database_admin_timeout_ms=database_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        return self._list_databases_ctx(
            include=include,
            provider=provider,
            page_size=page_size,
            timeout_context=_TimeoutContext(
                request_ms=_database_admin_timeout_ms, label=_da_label
            ),
        )

    def _list_databases_ctx(
        self,
        *,
        include: str | None,
        provider: str | None,
        page_size: int | None,
        timeout_context: _TimeoutContext,
    ) -> list[AstraDBAdminDatabaseInfo]:
        # version of the method, but with timeouts made into a _TimeoutContext
        logger.info("getting databases (DevOps API)")
        request_params_0 = {
            k: v
            for k, v in {
                "include": include,
                "provider": provider,
                "limit": page_size or DEV_OPS_DEFAULT_DATABASES_PAGE_SIZE,
            }.items()
            if v is not None
        }
        responses: list[dict[str, Any]] = []
        logger.info("request 0, getting databases (DevOps API)")
        response_0 = self._dev_ops_api_commander.request(
            http_method=HttpMethod.GET,
            request_params=request_params_0,
            timeout_context=timeout_context,
        )
        if not isinstance(response_0, list):
            raise DevOpsAPIException(
                "Faulty response from get-databases DevOps API command.",
            )
        logger.info("finished request 0, getting databases (DevOps API)")
        responses += [response_0]
        while len(responses[-1]) >= request_params_0["limit"]:
            if "id" not in responses[-1][-1]:
                raise DevOpsAPIException(
                    "Faulty response from get-databases DevOps API command.",
                )
            last_received_db_id = responses[-1][-1]["id"]
            request_params_n = {
                **request_params_0,
                **{"starting_after": last_received_db_id},
            }
            logger.info(
                "request %s, getting databases (DevOps API)",
                len(responses),
            )
            response_n = self._dev_ops_api_commander.request(
                http_method=HttpMethod.GET,
                request_params=request_params_n,
                timeout_context=timeout_context,
            )
            logger.info(
                "finished request %s, getting databases (DevOps API)",
                len(responses),
            )
            if not isinstance(response_n, list):
                raise DevOpsAPIException(
                    "Faulty response from get-databases DevOps API command.",
                )
            responses += [response_n]

        logger.info("finished getting databases (DevOps API)")
        return [
            _recast_as_admin_database_info(
                db_dict,
                environment=self.api_options.environment,
            )
            for response in responses
            for db_dict in response
        ]

    async def async_list_databases(
        self,
        *,
        include: str | None = None,
        provider: str | None = None,
        page_size: int | None = None,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> list[AstraDBAdminDatabaseInfo]:
        """
        Get the list of databases, as obtained with a request to the DevOps API.
        Async version of the method, for use in an asyncio context.

        Args:
            include: a filter on what databases are to be returned. As per
                DevOps API, defaults to "nonterminated". Pass "all" to include
                the already terminated databases.
            provider: a filter on the cloud provider for the databases.
                As per DevOps API, defaults to "ALL". Pass e.g. "AWS" to
                restrict the results.
            page_size: number of results per page from the DevOps API.
            database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (While in the case of very many databases this method may entail
                multiple DevOps API requests, it is assumed here that this method
                amounts almost always to one single request: the only timeout
                imposed on this method execution is one acting on each individual
                request, with no checks on its overall completion time.)
            request_timeout_ms: an alias for `database_admin_timeout_ms`.
            timeout_ms: an alias for `database_admin_timeout_ms`.

        Returns:
            A list of AstraDBAdminDatabaseInfo objects.

        Example:
            >>> async def check_if_db_exists(db_id: str) -> bool:
            ...     db_list = await my_astra_db_admin.async_list_databases()
            ...     return db_id in db_list
            ...
            >>> asyncio.run(check_if_db_exists("xyz"))
            True
            >>> asyncio.run(check_if_db_exists("01234567-..."))
            False
        """

        _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
            timeout_options=self.api_options.timeout_options,
            database_admin_timeout_ms=database_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        return await self._async_list_databases_ctx(
            include=include,
            provider=provider,
            page_size=page_size,
            timeout_context=_TimeoutContext(
                request_ms=_database_admin_timeout_ms, label=_da_label
            ),
        )

    async def _async_list_databases_ctx(
        self,
        *,
        include: str | None,
        provider: str | None,
        page_size: int | None,
        timeout_context: _TimeoutContext,
    ) -> list[AstraDBAdminDatabaseInfo]:
        # version of the method, but with timeouts made into a _TimeoutContext
        logger.info("getting databases (DevOps API), async")
        request_params_0 = {
            k: v
            for k, v in {
                "include": include,
                "provider": provider,
                "limit": page_size or DEV_OPS_DEFAULT_DATABASES_PAGE_SIZE,
            }.items()
            if v is not None
        }
        responses: list[dict[str, Any]] = []
        logger.info("request 0, getting databases (DevOps API), async")
        response_0 = await self._dev_ops_api_commander.async_request(
            http_method=HttpMethod.GET,
            request_params=request_params_0,
            timeout_context=timeout_context,
        )
        if not isinstance(response_0, list):
            raise DevOpsAPIException(
                "Faulty response from get-databases DevOps API command.",
            )
        logger.info("finished request 0, getting databases (DevOps API), async")
        responses += [response_0]
        while len(responses[-1]) >= request_params_0["limit"]:
            if "id" not in responses[-1][-1]:
                raise DevOpsAPIException(
                    "Faulty response from get-databases DevOps API command.",
                )
            last_received_db_id = responses[-1][-1]["id"]
            request_params_n = {
                **request_params_0,
                **{"starting_after": last_received_db_id},
            }
            logger.info(
                "request %s, getting databases (DevOps API)",
                len(responses),
            )
            response_n = await self._dev_ops_api_commander.async_request(
                http_method=HttpMethod.GET,
                request_params=request_params_n,
                timeout_context=timeout_context,
            )
            logger.info(
                "finished request %s, getting databases (DevOps API), async",
                len(responses),
            )
            if not isinstance(response_n, list):
                raise DevOpsAPIException(
                    "Faulty response from get-databases DevOps API command.",
                )
            responses += [response_n]

        logger.info("finished getting databases (DevOps API), async")
        return [
            _recast_as_admin_database_info(
                db_dict,
                environment=self.api_options.environment,
            )
            for response in responses
            for db_dict in response
        ]

    def database_info(
        self,
        id: str,
        *,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> AstraDBAdminDatabaseInfo:
        """
        Get the full information on a given database, through a request to the DevOps API.

        Args:
            id: the ID of the target database, e. g.
                "01234567-89ab-cdef-0123-456789abcdef".
            database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying DevOps API request.
                If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `database_admin_timeout_ms`.
            timeout_ms: an alias for `database_admin_timeout_ms`.

        Returns:
            An AstraDBAdminDatabaseInfo object.

        Example:
            >>> details_of_my_db = my_astra_db_admin.database_info("01234567-...")
            >>> details_of_my_db.id
            '01234567-...'
            >>> details_of_my_db.status
            'ACTIVE'
            >>> details_of_my_db.info.region
            'eu-west-1'
        """

        _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
            timeout_options=self.api_options.timeout_options,
            database_admin_timeout_ms=database_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        return self._database_info_ctx(
            id=id,
            timeout_context=_TimeoutContext(
                request_ms=_database_admin_timeout_ms, label=_da_label
            ),
        )

    def _database_info_ctx(
        self,
        id: str,
        *,
        timeout_context: _TimeoutContext,
    ) -> AstraDBAdminDatabaseInfo:
        # version of the method, but with timeouts made into a _TimeoutContext
        logger.info(f"getting database info for '{id}' (DevOps API)")
        gd_response = self._dev_ops_api_commander.request(
            http_method=HttpMethod.GET,
            additional_path=id,
            timeout_context=timeout_context,
        )
        logger.info(f"finished getting database info for '{id}' (DevOps API)")
        return _recast_as_admin_database_info(
            gd_response,
            environment=self.api_options.environment,
        )

    async def async_database_info(
        self,
        id: str,
        *,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> AstraDBAdminDatabaseInfo:
        """
        Get the full information on a given database, through a request to the DevOps API.
        This is an awaitable method suitable for use within an asyncio event loop.

        Args:
            id: the ID of the target database, e. g.
                "01234567-89ab-cdef-0123-456789abcdef".
            database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying DevOps API request.
                If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `database_admin_timeout_ms`.
            timeout_ms: an alias for `database_admin_timeout_ms`.

        Returns:
            An AstraDBAdminDatabaseInfo object.

        Example:
            >>> async def check_if_db_active(db_id: str) -> bool:
            ...     db_info = await my_astra_db_admin.async_database_info(db_id)
            ...     return db_info.status == "ACTIVE"
            ...
            >>> asyncio.run(check_if_db_active("01234567-..."))
            True
        """

        _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
            timeout_options=self.api_options.timeout_options,
            database_admin_timeout_ms=database_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        return await self._async_database_info_ctx(
            id=id,
            timeout_context=_TimeoutContext(
                request_ms=_database_admin_timeout_ms, label=_da_label
            ),
        )

    async def _async_database_info_ctx(
        self,
        id: str,
        *,
        timeout_context: _TimeoutContext,
    ) -> AstraDBAdminDatabaseInfo:
        # version of the method, but with timeouts made into a _TimeoutContext
        logger.info(f"getting database info for '{id}' (DevOps API), async")
        gd_response = await self._dev_ops_api_commander.async_request(
            http_method=HttpMethod.GET,
            additional_path=id,
            timeout_context=timeout_context,
        )
        logger.info(f"finished getting database info for '{id}' (DevOps API), async")
        return _recast_as_admin_database_info(
            gd_response,
            environment=self.api_options.environment,
        )

    def create_database(
        self,
        name: str,
        *,
        cloud_provider: str,
        region: str,
        keyspace: str | None = None,
        wait_until_active: bool = True,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
        token: str | TokenProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> AstraDBDatabaseAdmin:
        """
        Create a database as requested, optionally waiting for it to be ready.

        Args:
            name: the desired name for the database.
            cloud_provider: one of 'aws', 'gcp' or 'azure'.
            region: any of the available cloud regions.
            keyspace: name for the one keyspace the database starts with.
                If omitted, DevOps API will use its default.
            wait_until_active: if True (default), the method returns only after
                the newly-created database is in ACTIVE state (a few minutes,
                usually). If False, it will return right after issuing the
                creation request to the DevOps API, and it will be responsibility
                of the caller to check the database status before working with it.
            database_admin_timeout_ms: a timeout, in milliseconds, for the whole
                requested operation to complete. This is used only
                if `wait_until_active` is true, i.e. if the method call must
                wait and keep querying the DevOps API for the status of the
                newly-created database.
            request_timeout_ms: a timeout, in milliseconds, for
                each underlying DevOps API HTTP request.
            timeout_ms: an alias for *both* the `request_timeout_ms` and
                `database_admin_timeout_ms` timeout parameters. In practice,
                regardless of `wait_until_active`, this parameter dictates an
                overall timeout on this method call.
            token: if supplied, is passed to the returned Database instead of
                the one set for this object.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            spawn_api_options: a specification - complete or partial - of the
                API Options to override the defaults inherited from the AstraDBAdmin.
                This allows for a deeper configuration of the database admin, e.g.
                concerning timeouts; if this is passed together with
                the named timeout parameters, the latter will take precedence
                in their respective settings.

        Returns:
            An AstraDBDatabaseAdmin instance.

        Note: a timeout event is no guarantee at all that the
        creation request has not reached the API server and is not going
        to be, in fact, honored.

        Example:
            >>> my_new_db_admin = my_astra_db_admin.create_database(
            ...     "new_database",
            ...     cloud_provider="aws",
            ...     region="ap-south-1",
            ... )
            >>> my_new_db = my_new_db_admin.get_database()
            >>> my_coll = my_new_db.create_collection(
            ...     "movies",
            ...     definition=(
            ...         CollectionDefinition.builder()
            ...         .set_vector_dimension(2)
            ...         .build()
            ...     )
            ... )
            >>> my_coll.insert_one({"title": "The Title", "$vector": [0.1, 0.2]})
        """

        _database_admin_timeout_ms, _da_label = _first_valid_timeout(
            (database_admin_timeout_ms, "database_admin_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (
                self.api_options.timeout_options.database_admin_timeout_ms,
                "database_admin_timeout_ms",
            ),
        )
        _request_timeout_ms, _rt_label = _first_valid_timeout(
            (request_timeout_ms, "request_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
        )
        cd_payload = {
            k: v
            for k, v in {
                "name": name,
                "tier": "serverless",
                "cloudProvider": cloud_provider,
                "region": region,
                "capacityUnits": 1,
                "dbType": "vector",
                "keyspace": keyspace,
            }.items()
            if v is not None
        }
        timeout_manager = MultiCallTimeoutManager(
            overall_timeout_ms=_database_admin_timeout_ms,
            dev_ops_api=True,
            timeout_label=_da_label,
        )
        logger.info(
            f"creating database {name}/({cloud_provider}, {region}) (DevOps API)"
        )
        cd_raw_response = self._dev_ops_api_commander.raw_request(
            http_method=HttpMethod.POST,
            payload=cd_payload,
            timeout_context=timeout_manager.remaining_timeout(
                cap_time_ms=_request_timeout_ms,
                cap_timeout_label=_rt_label,
            ),
        )
        if cd_raw_response.status_code != DEV_OPS_RESPONSE_HTTP_CREATED:
            raise DevOpsAPIException(
                f"DB creation ('{name}') failed: API returned HTTP "
                f"{cd_raw_response.status_code} instead of "
                f"{DEV_OPS_RESPONSE_HTTP_CREATED} - Created."
            )
        new_database_id = cd_raw_response.headers["Location"]
        logger.info(
            "DevOps API returned from creating database "
            f"{name}/({cloud_provider}, {region})"
        )
        if wait_until_active:
            last_status_seen = DEV_OPS_DATABASE_STATUS_PENDING
            while last_status_seen in {
                DEV_OPS_DATABASE_STATUS_PENDING,
                DEV_OPS_DATABASE_STATUS_INITIALIZING,
            }:
                logger.info(f"sleeping to poll for status of '{new_database_id}'")
                time.sleep(DEV_OPS_DATABASE_POLL_INTERVAL_S)
                last_db_info = self._database_info_ctx(
                    id=new_database_id,
                    timeout_context=timeout_manager.remaining_timeout(
                        cap_time_ms=_request_timeout_ms,
                        cap_timeout_label=_rt_label,
                    ),
                )
                last_status_seen = last_db_info.status
            if last_status_seen != DEV_OPS_DATABASE_STATUS_ACTIVE:
                raise DevOpsAPIException(
                    f"Database {name} entered unexpected status {last_status_seen} after PENDING"
                )
        # return the database instance
        logger.info(
            f"finished creating database '{new_database_id}' = "
            f"{name}/({cloud_provider}, {region}) (DevOps API)"
        )
        _final_api_options = self.api_options.with_override(
            spawn_api_options
        ).with_override(APIOptions(token=token))
        return AstraDBDatabaseAdmin.from_astra_db_admin(
            api_endpoint=build_api_endpoint(
                environment=self.api_options.environment,
                database_id=new_database_id,
                region=region,
            ),
            astra_db_admin=self,
            spawn_api_options=_final_api_options,
        )

    async def async_create_database(
        self,
        name: str,
        *,
        cloud_provider: str,
        region: str,
        keyspace: str | None = None,
        wait_until_active: bool = True,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
        token: str | TokenProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> AstraDBDatabaseAdmin:
        """
        Create a database as requested, optionally waiting for it to be ready.
        This is an awaitable method suitable for use within an asyncio event loop.

        Args:
            name: the desired name for the database.
            cloud_provider: one of 'aws', 'gcp' or 'azure'.
            region: any of the available cloud regions.
            keyspace: name for the one keyspace the database starts with.
                If omitted, DevOps API will use its default.
            wait_until_active: if True (default), the method returns only after
                the newly-created database is in ACTIVE state (a few minutes,
                usually). If False, it will return right after issuing the
                creation request to the DevOps API, and it will be responsibility
                of the caller to check the database status before working with it.
            database_admin_timeout_ms: a timeout, in milliseconds, for the whole
                requested operation to complete. This is used only
                if `wait_until_active` is true, i.e. if the method call must
                wait and keep querying the DevOps API for the status of the
                newly-created database.
            request_timeout_ms: a timeout, in milliseconds, for
                each underlying DevOps API HTTP request.
            timeout_ms: an alias for *both* the `request_timeout_ms` and
                `database_admin_timeout_ms` timeout parameters. In practice,
                regardless of `wait_until_active`, this parameter dictates an
                overall timeout on this method call.
            token: if supplied, is passed to the returned Database instead of
                the one set for this object.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            spawn_api_options: a specification - complete or partial - of the
                API Options to override the defaults inherited from the AstraDBAdmin.
                This allows for a deeper configuration of the database admin, e.g.
                concerning timeouts; if this is passed together with
                the named timeout parameters, the latter will take precedence
                in their respective settings.

        Returns:
            An AstraDBDatabaseAdmin instance.

        Note: a timeout event is no guarantee at all that the
        creation request has not reached the API server and is not going
        to be, in fact, honored.

        Example:
            >>> asyncio.run(
            ...     my_astra_db_admin.async_create_database(
            ...         "new_database",
            ...         cloud_provider="aws",
            ...         region="ap-south-1",
            ....    )
            ... )
            AstraDBDatabaseAdmin(id=...)
        """

        _database_admin_timeout_ms, _da_label = _first_valid_timeout(
            (database_admin_timeout_ms, "database_admin_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (
                self.api_options.timeout_options.database_admin_timeout_ms,
                "database_admin_timeout_ms",
            ),
        )
        _request_timeout_ms, _rt_label = _first_valid_timeout(
            (request_timeout_ms, "request_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
        )
        cd_payload = {
            k: v
            for k, v in {
                "name": name,
                "tier": "serverless",
                "cloudProvider": cloud_provider,
                "region": region,
                "capacityUnits": 1,
                "dbType": "vector",
                "keyspace": keyspace,
            }.items()
            if v is not None
        }
        timeout_manager = MultiCallTimeoutManager(
            overall_timeout_ms=_database_admin_timeout_ms,
            dev_ops_api=True,
            timeout_label=_da_label,
        )
        logger.info(
            f"creating database {name}/({cloud_provider}, {region}) "
            "(DevOps API), async"
        )
        cd_raw_response = await self._dev_ops_api_commander.async_raw_request(
            http_method=HttpMethod.POST,
            payload=cd_payload,
            timeout_context=timeout_manager.remaining_timeout(
                cap_time_ms=_request_timeout_ms,
                cap_timeout_label=_rt_label,
            ),
        )
        if cd_raw_response.status_code != DEV_OPS_RESPONSE_HTTP_CREATED:
            raise DevOpsAPIException(
                f"DB creation ('{name}') failed: API returned HTTP "
                f"{cd_raw_response.status_code} instead of "
                f"{DEV_OPS_RESPONSE_HTTP_CREATED} - Created."
            )
        new_database_id = cd_raw_response.headers["Location"]
        logger.info(
            "DevOps API returned from creating database "
            f"{name}/({cloud_provider}, {region}), async"
        )
        if wait_until_active:
            last_status_seen = DEV_OPS_DATABASE_STATUS_PENDING
            while last_status_seen in {
                DEV_OPS_DATABASE_STATUS_PENDING,
                DEV_OPS_DATABASE_STATUS_INITIALIZING,
            }:
                logger.info(
                    f"sleeping to poll for status of '{new_database_id}', async"
                )
                await asyncio.sleep(DEV_OPS_DATABASE_POLL_INTERVAL_S)
                last_db_info = await self._async_database_info_ctx(
                    id=new_database_id,
                    timeout_context=timeout_manager.remaining_timeout(
                        cap_time_ms=_request_timeout_ms,
                        cap_timeout_label=_rt_label,
                    ),
                )
                last_status_seen = last_db_info.status
            if last_status_seen != DEV_OPS_DATABASE_STATUS_ACTIVE:
                raise DevOpsAPIException(
                    f"Database {name} entered unexpected status "
                    f"{last_status_seen} after PENDING"
                )
        # return the database instance
        logger.info(
            f"finished creating database '{new_database_id}' = "
            f"{name}/({cloud_provider}, {region}) (DevOps API), async"
        )
        _final_api_options = self.api_options.with_override(
            spawn_api_options
        ).with_override(APIOptions(token=token))
        return AstraDBDatabaseAdmin.from_astra_db_admin(
            api_endpoint=build_api_endpoint(
                environment=self.api_options.environment,
                database_id=new_database_id,
                region=region,
            ),
            astra_db_admin=self,
            spawn_api_options=_final_api_options,
        )

    def drop_database(
        self,
        id: str,
        *,
        wait_until_active: bool = True,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Drop a database, i.e. delete it completely and permanently with all its data.

        Args:
            id: The ID of the database to drop, e. g.
                "01234567-89ab-cdef-0123-456789abcdef".
            wait_until_active: if True (default), the method returns only after
                the database has actually been deleted (generally a few minutes).
                If False, it will return right after issuing the
                drop request to the DevOps API, and it will be responsibility
                of the caller to check the database status/availability
                after that, if desired.
            database_admin_timeout_ms: a timeout, in milliseconds, for the whole
                requested operation to complete. This is used only
                if `wait_until_active` is true, i.e. if the method call must
                wait and keep querying the DevOps API for the status of the
                newly-deleted database.
            request_timeout_ms: a timeout, in milliseconds, for
                each underlying DevOps API HTTP request.
            timeout_ms: an alias for *both* the `request_timeout_ms` and
                `database_admin_timeout_ms` timeout parameters. In practice,
                regardless of `wait_until_active`, this parameter dictates an
                overall timeout on this method call.

        Note: a timeout event is no guarantee at all that the
        deletion request has not reached the API server and is not going
        to be, in fact, honored.

        Example:
            >>> database_list_pre = my_astra_db_admin.list_databases()
            >>> len(database_list_pre)
            3
            >>> my_astra_db_admin.drop_database("01234567-...")
            >>> database_list_post = my_astra_db_admin.list_databases()
            >>> len(database_list_post)
            2
        """

        _database_admin_timeout_ms, _da_label = _first_valid_timeout(
            (database_admin_timeout_ms, "database_admin_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (
                self.api_options.timeout_options.database_admin_timeout_ms,
                "database_admin_timeout_ms",
            ),
        )
        _request_timeout_ms, _rt_label = _first_valid_timeout(
            (request_timeout_ms, "request_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
        )
        timeout_manager = MultiCallTimeoutManager(
            overall_timeout_ms=_database_admin_timeout_ms,
            dev_ops_api=True,
            timeout_label=_da_label,
        )
        logger.info(f"dropping database '{id}' (DevOps API)")
        te_raw_response = self._dev_ops_api_commander.raw_request(
            http_method=HttpMethod.POST,
            additional_path=f"{id}/terminate",
            timeout_context=timeout_manager.remaining_timeout(
                cap_time_ms=_request_timeout_ms,
                cap_timeout_label=_rt_label,
            ),
        )
        if te_raw_response.status_code != DEV_OPS_RESPONSE_HTTP_ACCEPTED:
            raise DevOpsAPIException(
                f"DB deletion ('{id}') failed: API returned HTTP "
                f"{te_raw_response.status_code} instead of "
                f"{DEV_OPS_RESPONSE_HTTP_ACCEPTED} - Created"
            )
        logger.info(f"DevOps API returned from dropping database '{id}'")
        if wait_until_active:
            last_status_seen: str | None = DEV_OPS_DATABASE_STATUS_TERMINATING
            _db_name: str | None = None
            while last_status_seen == DEV_OPS_DATABASE_STATUS_TERMINATING:
                logger.info(f"sleeping to poll for status of '{id}'")
                time.sleep(DEV_OPS_DATABASE_POLL_INTERVAL_S)
                #
                detected_databases = [
                    a_db_info
                    for a_db_info in self._list_databases_ctx(
                        include=None,
                        provider=None,
                        page_size=None,
                        timeout_context=timeout_manager.remaining_timeout(
                            cap_time_ms=_request_timeout_ms,
                            cap_timeout_label=_rt_label,
                        ),
                    )
                    if a_db_info.id == id
                ]
                if detected_databases:
                    last_status_seen = detected_databases[0].status
                    _db_name = detected_databases[0].name
                else:
                    last_status_seen = None
            if last_status_seen is not None:
                _name_desc = f" ({_db_name})" if _db_name else ""
                raise DevOpsAPIException(
                    f"Database {id}{_name_desc} entered unexpected status "
                    f"{last_status_seen} after PENDING"
                )
        logger.info(f"finished dropping database '{id}' (DevOps API)")

    async def async_drop_database(
        self,
        id: str,
        *,
        wait_until_active: bool = True,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Drop a database, i.e. delete it completely and permanently with all its data.
        Async version of the method, for use in an asyncio context.

        Args:
            id: The ID of the database to drop, e. g.
                "01234567-89ab-cdef-0123-456789abcdef".
            wait_until_active: if True (default), the method returns only after
                the database has actually been deleted (generally a few minutes).
                If False, it will return right after issuing the
                drop request to the DevOps API, and it will be responsibility
                of the caller to check the database status/availability
                after that, if desired.
            database_admin_timeout_ms: a timeout, in milliseconds, for the whole
                requested operation to complete. This is used only
                if `wait_until_active` is true, i.e. if the method call must
                wait and keep querying the DevOps API for the status of the
                newly-deleted database.
            request_timeout_ms: a timeout, in milliseconds, for
                each underlying DevOps API HTTP request.
            timeout_ms: an alias for *both* the `request_timeout_ms` and
                `database_admin_timeout_ms` timeout parameters. In practice,
                regardless of `wait_until_active`, this parameter dictates an
                overall timeout on this method call.

        Note: a timeout event is no guarantee at all that the
        deletion request has not reached the API server and is not going
        to be, in fact, honored.

        Example:
            >>> asyncio.run(
            ...     my_astra_db_admin.async_drop_database("01234567-...")
            ... )
        """

        _database_admin_timeout_ms, _da_label = _first_valid_timeout(
            (database_admin_timeout_ms, "database_admin_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (
                self.api_options.timeout_options.database_admin_timeout_ms,
                "database_admin_timeout_ms",
            ),
        )
        _request_timeout_ms, _rt_label = _first_valid_timeout(
            (request_timeout_ms, "request_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
        )
        timeout_manager = MultiCallTimeoutManager(
            overall_timeout_ms=_database_admin_timeout_ms,
            dev_ops_api=True,
            timeout_label=_da_label,
        )
        logger.info(f"dropping database '{id}' (DevOps API), async")
        te_raw_response = await self._dev_ops_api_commander.async_raw_request(
            http_method=HttpMethod.POST,
            additional_path=f"{id}/terminate",
            timeout_context=timeout_manager.remaining_timeout(
                cap_time_ms=_request_timeout_ms,
                cap_timeout_label=_rt_label,
            ),
        )
        if te_raw_response.status_code != DEV_OPS_RESPONSE_HTTP_ACCEPTED:
            raise DevOpsAPIException(
                f"DB deletion ('{id}') failed: API returned HTTP "
                f"{te_raw_response.status_code} instead of "
                f"{DEV_OPS_RESPONSE_HTTP_ACCEPTED} - Created"
            )
        logger.info(f"DevOps API returned from dropping database '{id}', async")
        if wait_until_active:
            last_status_seen: str | None = DEV_OPS_DATABASE_STATUS_TERMINATING
            _db_name: str | None = None
            while last_status_seen == DEV_OPS_DATABASE_STATUS_TERMINATING:
                logger.info(f"sleeping to poll for status of '{id}', async")
                await asyncio.sleep(DEV_OPS_DATABASE_POLL_INTERVAL_S)
                #
                detected_databases = [
                    a_db_info
                    for a_db_info in await self._async_list_databases_ctx(
                        include=None,
                        provider=None,
                        page_size=None,
                        timeout_context=timeout_manager.remaining_timeout(
                            cap_time_ms=_request_timeout_ms,
                            cap_timeout_label=_rt_label,
                        ),
                    )
                    if a_db_info.id == id
                ]
                if detected_databases:
                    last_status_seen = detected_databases[0].status
                    _db_name = detected_databases[0].name
                else:
                    last_status_seen = None
            if last_status_seen is not None:
                _name_desc = f" ({_db_name})" if _db_name else ""
                raise DevOpsAPIException(
                    f"Database {id}{_name_desc} entered unexpected status "
                    f"{last_status_seen} after PENDING"
                )
        logger.info(f"finished dropping database '{id}' (DevOps API), async")

    def get_database_admin(
        self,
        api_endpoint_or_id: str | None = None,
        *,
        api_endpoint: str | None = None,
        id: str | None = None,
        region: str | None = None,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
        token: str | TokenProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> AstraDBDatabaseAdmin:
        """
        Create an AstraDBDatabaseAdmin object for admin work within a certain database.

        Args:
            api_endpoint_or_id: positional parameter that can stand for both
                `api_endpoint` and `id`. Passing them together is an error.
            api_endpoint: the API Endpoint for the target database
                (e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`).
                The database must exist already for the resulting object
                to be effectively used; in other words, this invocation
                does not create the database, just the object instance.
            id: the target database ID. This is alternative to using the API Endpoint.
            region: the region to use for connecting to the database. The
                database must be located in that region. This parameter can be used
                only if the database is specified by its ID (instead of API Endpoint).
                If this parameter is not passed, and cannot be inferred
                from the API endpoint, an additional DevOps API request is made
                to determine the default region and use it subsequently.
            database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying DevOps API request for 'region', should it be necessary.
                If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `database_admin_timeout_ms`.
            timeout_ms: an alias for `database_admin_timeout_ms`.
            token: if supplied, is passed to the Database instead of
                the one set for this object.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            spawn_api_options: a specification - complete or partial - of the
                API Options to override the defaults inherited from the AstraDBAdmin.
                This allows for a deeper configuration of the database admin, e.g.
                concerning timeouts; if this is passed together with
                the named timeout parameters, the latter will take precedence
                in their respective settings.

        Returns:
            An AstraDBDatabaseAdmin instance representing the requested database.

        Example:
            >>> my_db_admin = my_astra_db_admin.get_database_admin("01234567-...")
            >>> my_db_admin.list_keyspaces()
            ['default_keyspace']
            >>> my_db_admin.create_keyspace("that_other_one")
            >>> my_db_admin.list_keyspaces()
            ['default_keyspace', 'that_other_one']

        Note:
            This method does not perform any admin-level operation through
            the DevOps API. For actual creation of a database, see the
            `create_database` method.
        """

        _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
            timeout_options=self.api_options.timeout_options,
            database_admin_timeout_ms=database_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        _api_endpoint_p, _id_p = check_id_endpoint_parg_kwargs(
            p_arg=api_endpoint_or_id, api_endpoint=api_endpoint, id=id
        )
        _final_api_options = self.api_options.with_override(
            spawn_api_options
        ).with_override(APIOptions(token=token))
        # handle the "endpoint passed as id" case first:
        if _api_endpoint_p is not None:
            if region is not None:
                raise ValueError(
                    "Parameter `region` not supported with an API endpoint."
                )
            return AstraDBDatabaseAdmin.from_astra_db_admin(
                api_endpoint=_api_endpoint_p,
                astra_db_admin=self,
                spawn_api_options=_final_api_options,
            )
        else:
            if _id_p is None:
                raise ValueError("Either `api_endpoint` or `id` must be supplied.")
            _region = normalize_region_for_id(
                database_id=_id_p,
                environment=self.api_options.environment,
                region_param=region,
                request_timeout_ms=_database_admin_timeout_ms,
                api_options=self.api_options,
            )
            return AstraDBDatabaseAdmin.from_astra_db_admin(
                api_endpoint=build_api_endpoint(
                    environment=self.api_options.environment,
                    database_id=_id_p,
                    region=_region,
                ),
                astra_db_admin=self,
                spawn_api_options=_final_api_options,
            )

    def get_database(
        self,
        api_endpoint_or_id: str | None = None,
        *,
        api_endpoint: str | None = None,
        keyspace: str | None = None,
        id: str | None = None,
        region: str | None = None,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
        token: str | TokenProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> Database:
        """
        Create a Database instance for a specific database, to be used
        when doing data-level work (such as creating/managing collections).

        Args:
            api_endpoint_or_id: positional parameter that can stand for both
                `api_endpoint` and `id`. Passing them together is an error.
            api_endpoint: the API Endpoint for the target database
                (e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`).
                The database must exist already for the resulting object
                to be effectively used; in other words, this invocation
                does not create the database, just the object instance.
            keyspace: used to specify a certain keyspace the resulting
                Database will primarily work on. If not specified, an additional
                DevOps API call reveals the default keyspace for the target database.
            id: the target database ID. This is alternative to using the API Endpoint.
            region: the region to use for connecting to the database. The
                database must be located in that region. This parameter can be used
                only if the database is specified by its ID (instead of API Endpoint).
                If this parameter is not passed, and cannot be inferred
                from the API endpoint, an additional DevOps API request is made
                to determine the default region and use it subsequently.
            database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying DevOps API request for 'region', should it be necessary.
                If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `database_admin_timeout_ms`.
            timeout_ms: an alias for `database_admin_timeout_ms`.
            token: if supplied, is passed to the Database instead of
                the one set for this object.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            spawn_api_options: a specification - complete or partial - of the
                API Options to override the defaults inherited from the AstraDBAdmin.
                This allows for a deeper configuration of the database, e.g.
                concerning timeouts; if this is passed together with
                the named timeout parameters, the latter will take precedence
                in their respective settings.

        Returns:
            A Database object ready to be used.

        Example:
            >>> my_db = my_astra_db_admin.get_database(
            ...     "https://<ID>-<REGION>.apps.astra.datastax.com",
            ...     keyspace="my_prod_keyspace",
            ... )
            >>> coll = my_db.create_collection(
            ...     "movies",
            ...     definition=(
            ...         CollectionDefinition.builder()
            ...         .set_vector_dimension(2)
            ...         .build()
            ...     )
            ... )
            >>> my_coll.insert_one({"title": "The Title", "$vector": [0.3, 0.4]})
        """

        _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
            timeout_options=self.api_options.timeout_options,
            database_admin_timeout_ms=database_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        _api_endpoint_p, _id_p = check_id_endpoint_parg_kwargs(
            p_arg=api_endpoint_or_id, api_endpoint=api_endpoint, id=id
        )
        # lazy importing here to avoid circular dependency
        from astrapy import Database

        _final_api_options = self.api_options.with_override(
            spawn_api_options
        ).with_override(
            APIOptions(token=token),
        )

        _keyspace: str | None
        # handle the "endpoint passed as id" case first:
        if _api_endpoint_p is not None:
            if region is not None:
                raise ValueError(
                    "Parameter `region` not supported with an API endpoint."
                )
            if keyspace:
                _keyspace = keyspace
            else:
                parsed_api_endpoint = parse_api_endpoint(_api_endpoint_p)
                if parsed_api_endpoint is None:
                    msg = api_endpoint_parsing_error_message(_api_endpoint_p)
                    raise ValueError(msg)
                _keyspace = (
                    (
                        self.database_info(
                            parsed_api_endpoint.database_id,
                            timeout_ms=_database_admin_timeout_ms,
                        ).raw
                        or {}
                    ).get("info")
                    or {}
                ).get("keyspace", DEFAULT_ASTRA_DB_KEYSPACE)
            return Database(
                api_endpoint=_api_endpoint_p,
                keyspace=_keyspace,
                api_options=_final_api_options,
            )
        else:
            # the case where an ID is passed:
            if _id_p is None:
                raise ValueError("Either `api_endpoint` or `id` must be supplied.")
            _region = normalize_region_for_id(
                database_id=_id_p,
                environment=self.api_options.environment,
                region_param=region,
                request_timeout_ms=_database_admin_timeout_ms,
                api_options=self.api_options,
            )
            if keyspace:
                _keyspace = keyspace
            else:
                _keyspace = (
                    (
                        self.database_info(
                            _id_p, timeout_ms=_database_admin_timeout_ms
                        ).raw
                        or {}
                    ).get("info")
                    or {}
                ).get("keyspace", DEFAULT_ASTRA_DB_KEYSPACE)
            return Database(
                api_endpoint=build_api_endpoint(
                    environment=self.api_options.environment,
                    database_id=_id_p,
                    region=_region,
                ),
                keyspace=_keyspace,
                api_options=_final_api_options,
            )

    def get_async_database(
        self,
        api_endpoint_or_id: str | None = None,
        *,
        api_endpoint: str | None = None,
        keyspace: str | None = None,
        id: str | None = None,
        region: str | None = None,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
        token: str | TokenProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncDatabase:
        """
        Create an AsyncDatabase instance for a specific database, to be used
        when doing data-level work (such as creating/managing collections).

        Args:
            api_endpoint_or_id: positional parameter that can stand for both
                `api_endpoint` and `id`. Passing them together is an error.
            api_endpoint: the API Endpoint for the target database
                (e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`).
                The database must exist already for the resulting object
                to be effectively used; in other words, this invocation
                does not create the database, just the object instance.
            keyspace: used to specify a certain keyspace the resulting
                AsyncDatabase will primarily work on. If not specified, an additional
                DevOps API call reveals the default keyspace for the target database.
            id: the target database ID. This is alternative to using the API Endpoint.
            region: the region to use for connecting to the database. The
                database must be located in that region. This parameter can be used
                only if the database is specified by its ID (instead of API Endpoint).
                If this parameter is not passed, and cannot be inferred
                from the API endpoint, an additional DevOps API request is made
                to determine the default region and use it subsequently.
            database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying DevOps API request for 'region', should it be necessary.
                If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `database_admin_timeout_ms`.
            timeout_ms: an alias for `database_admin_timeout_ms`.
            token: if supplied, is passed to the Database instead of
                the one set for this object.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            spawn_api_options: a specification - complete or partial - of the
                API Options to override the defaults inherited from the AstraDBAdmin.
                This allows for a deeper configuration of the database, e.g.
                concerning timeouts; if this is passed together with
                the named timeout parameters, the latter will take precedence
                in their respective settings.

        Returns:
            An AsyncDatabase object ready to be used.

        Example:
            >>> async def create_use_collection(
            ...     admin: AstraDBAdmin,
            ...     api_endpoint: str,
            ...     keyspace: str,
            ... ) -> None:
            ...     my_async_db = admin.get_async_database(
            ...         api_endpoint,
            ...         keyspace=keyspace,
            ...     )
            ...     a_coll = await my_async_db.create_collection(
            ...         "movies",
            ...         definition=(
            ...             CollectionDefinition.builder()
            ...             .set_vector_dimension(2)
            ...             .build()
            ...         )
            ...     )
            ...     await a_coll.insert_one(
            ...         {"title": "The Title", "$vector": [0.3, 0.4]}
            ...     )
            ...
            >>> asyncio.run(create_use_collection(
            ...     my_admin,
            ...     "https://<ID>-<REGION>.apps.astra.datastax.com",
            ...     "default_keyspace",
            ... ))
            >>>
        """

        return self.get_database(
            api_endpoint_or_id=api_endpoint_or_id,
            api_endpoint=api_endpoint,
            token=token,
            keyspace=keyspace,
            id=id,
            region=region,
            database_admin_timeout_ms=database_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
            spawn_api_options=spawn_api_options,
        ).to_async()

Methods

async def async_create_database(self, name: str, *, cloud_provider: str, region: str, keyspace: str | None = None, wait_until_active: bool = True, database_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None, token: str | TokenProvider | UnsetType = (unset), spawn_api_options: APIOptions | UnsetType = (unset)) ‑> AstraDBDatabaseAdmin

Create a database as requested, optionally waiting for it to be ready. This is an awaitable method suitable for use within an asyncio event loop.

Args

name
the desired name for the database.
cloud_provider
one of 'aws', 'gcp' or 'azure'.
region
any of the available cloud regions.
keyspace
name for the one keyspace the database starts with. If omitted, DevOps API will use its default.
wait_until_active
if True (default), the method returns only after the newly-created database is in ACTIVE state (a few minutes, usually). If False, it will return right after issuing the creation request to the DevOps API, and it will be responsibility of the caller to check the database status before working with it.
database_admin_timeout_ms
a timeout, in milliseconds, for the whole requested operation to complete. This is used only if wait_until_active is true, i.e. if the method call must wait and keep querying the DevOps API for the status of the newly-created database.
request_timeout_ms
a timeout, in milliseconds, for each underlying DevOps API HTTP request.
timeout_ms
an alias for both the request_timeout_ms and database_admin_timeout_ms timeout parameters. In practice, regardless of wait_until_active, this parameter dictates an overall timeout on this method call.
token
if supplied, is passed to the returned Database instead of the one set for this object. This can be either a literal token string or a subclass of TokenProvider.
spawn_api_options
a specification - complete or partial - of the API Options to override the defaults inherited from the AstraDBAdmin. This allows for a deeper configuration of the database admin, e.g. concerning timeouts; if this is passed together with the named timeout parameters, the latter will take precedence in their respective settings.

Returns

An AstraDBDatabaseAdmin instance. Note: a timeout event is no guarantee at all that the creation request has not reached the API server and is not going to be, in fact, honored.

Example

>>> asyncio.run(
...     my_astra_db_admin.async_create_database(
...         "new_database",
...         cloud_provider="aws",
...         region="ap-south-1",
....    )
... )
AstraDBDatabaseAdmin(id=...)
Expand source code
async def async_create_database(
    self,
    name: str,
    *,
    cloud_provider: str,
    region: str,
    keyspace: str | None = None,
    wait_until_active: bool = True,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
    token: str | TokenProvider | UnsetType = _UNSET,
    spawn_api_options: APIOptions | UnsetType = _UNSET,
) -> AstraDBDatabaseAdmin:
    """
    Create a database as requested, optionally waiting for it to be ready.
    This is an awaitable method suitable for use within an asyncio event loop.

    Args:
        name: the desired name for the database.
        cloud_provider: one of 'aws', 'gcp' or 'azure'.
        region: any of the available cloud regions.
        keyspace: name for the one keyspace the database starts with.
            If omitted, DevOps API will use its default.
        wait_until_active: if True (default), the method returns only after
            the newly-created database is in ACTIVE state (a few minutes,
            usually). If False, it will return right after issuing the
            creation request to the DevOps API, and it will be responsibility
            of the caller to check the database status before working with it.
        database_admin_timeout_ms: a timeout, in milliseconds, for the whole
            requested operation to complete. This is used only
            if `wait_until_active` is true, i.e. if the method call must
            wait and keep querying the DevOps API for the status of the
            newly-created database.
        request_timeout_ms: a timeout, in milliseconds, for
            each underlying DevOps API HTTP request.
        timeout_ms: an alias for *both* the `request_timeout_ms` and
            `database_admin_timeout_ms` timeout parameters. In practice,
            regardless of `wait_until_active`, this parameter dictates an
            overall timeout on this method call.
        token: if supplied, is passed to the returned Database instead of
            the one set for this object.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        spawn_api_options: a specification - complete or partial - of the
            API Options to override the defaults inherited from the AstraDBAdmin.
            This allows for a deeper configuration of the database admin, e.g.
            concerning timeouts; if this is passed together with
            the named timeout parameters, the latter will take precedence
            in their respective settings.

    Returns:
        An AstraDBDatabaseAdmin instance.

    Note: a timeout event is no guarantee at all that the
    creation request has not reached the API server and is not going
    to be, in fact, honored.

    Example:
        >>> asyncio.run(
        ...     my_astra_db_admin.async_create_database(
        ...         "new_database",
        ...         cloud_provider="aws",
        ...         region="ap-south-1",
        ....    )
        ... )
        AstraDBDatabaseAdmin(id=...)
    """

    _database_admin_timeout_ms, _da_label = _first_valid_timeout(
        (database_admin_timeout_ms, "database_admin_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (
            self.api_options.timeout_options.database_admin_timeout_ms,
            "database_admin_timeout_ms",
        ),
    )
    _request_timeout_ms, _rt_label = _first_valid_timeout(
        (request_timeout_ms, "request_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
    )
    cd_payload = {
        k: v
        for k, v in {
            "name": name,
            "tier": "serverless",
            "cloudProvider": cloud_provider,
            "region": region,
            "capacityUnits": 1,
            "dbType": "vector",
            "keyspace": keyspace,
        }.items()
        if v is not None
    }
    timeout_manager = MultiCallTimeoutManager(
        overall_timeout_ms=_database_admin_timeout_ms,
        dev_ops_api=True,
        timeout_label=_da_label,
    )
    logger.info(
        f"creating database {name}/({cloud_provider}, {region}) "
        "(DevOps API), async"
    )
    cd_raw_response = await self._dev_ops_api_commander.async_raw_request(
        http_method=HttpMethod.POST,
        payload=cd_payload,
        timeout_context=timeout_manager.remaining_timeout(
            cap_time_ms=_request_timeout_ms,
            cap_timeout_label=_rt_label,
        ),
    )
    if cd_raw_response.status_code != DEV_OPS_RESPONSE_HTTP_CREATED:
        raise DevOpsAPIException(
            f"DB creation ('{name}') failed: API returned HTTP "
            f"{cd_raw_response.status_code} instead of "
            f"{DEV_OPS_RESPONSE_HTTP_CREATED} - Created."
        )
    new_database_id = cd_raw_response.headers["Location"]
    logger.info(
        "DevOps API returned from creating database "
        f"{name}/({cloud_provider}, {region}), async"
    )
    if wait_until_active:
        last_status_seen = DEV_OPS_DATABASE_STATUS_PENDING
        while last_status_seen in {
            DEV_OPS_DATABASE_STATUS_PENDING,
            DEV_OPS_DATABASE_STATUS_INITIALIZING,
        }:
            logger.info(
                f"sleeping to poll for status of '{new_database_id}', async"
            )
            await asyncio.sleep(DEV_OPS_DATABASE_POLL_INTERVAL_S)
            last_db_info = await self._async_database_info_ctx(
                id=new_database_id,
                timeout_context=timeout_manager.remaining_timeout(
                    cap_time_ms=_request_timeout_ms,
                    cap_timeout_label=_rt_label,
                ),
            )
            last_status_seen = last_db_info.status
        if last_status_seen != DEV_OPS_DATABASE_STATUS_ACTIVE:
            raise DevOpsAPIException(
                f"Database {name} entered unexpected status "
                f"{last_status_seen} after PENDING"
            )
    # return the database instance
    logger.info(
        f"finished creating database '{new_database_id}' = "
        f"{name}/({cloud_provider}, {region}) (DevOps API), async"
    )
    _final_api_options = self.api_options.with_override(
        spawn_api_options
    ).with_override(APIOptions(token=token))
    return AstraDBDatabaseAdmin.from_astra_db_admin(
        api_endpoint=build_api_endpoint(
            environment=self.api_options.environment,
            database_id=new_database_id,
            region=region,
        ),
        astra_db_admin=self,
        spawn_api_options=_final_api_options,
    )
async def async_database_info(self, id: str, *, database_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> AstraDBAdminDatabaseInfo

Get the full information on a given database, through a request to the DevOps API. This is an awaitable method suitable for use within an asyncio event loop.

Args

id
the ID of the target database, e. g. "01234567-89ab-cdef-0123-456789abcdef".
database_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying DevOps API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for database_admin_timeout_ms.
timeout_ms
an alias for database_admin_timeout_ms.

Returns

An AstraDBAdminDatabaseInfo object.

Example

>>> async def check_if_db_active(db_id: str) -> bool:
...     db_info = await my_astra_db_admin.async_database_info(db_id)
...     return db_info.status == "ACTIVE"
...
>>> asyncio.run(check_if_db_active("01234567-..."))
True
Expand source code
async def async_database_info(
    self,
    id: str,
    *,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> AstraDBAdminDatabaseInfo:
    """
    Get the full information on a given database, through a request to the DevOps API.
    This is an awaitable method suitable for use within an asyncio event loop.

    Args:
        id: the ID of the target database, e. g.
            "01234567-89ab-cdef-0123-456789abcdef".
        database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying DevOps API request.
            If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `database_admin_timeout_ms`.
        timeout_ms: an alias for `database_admin_timeout_ms`.

    Returns:
        An AstraDBAdminDatabaseInfo object.

    Example:
        >>> async def check_if_db_active(db_id: str) -> bool:
        ...     db_info = await my_astra_db_admin.async_database_info(db_id)
        ...     return db_info.status == "ACTIVE"
        ...
        >>> asyncio.run(check_if_db_active("01234567-..."))
        True
    """

    _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
        timeout_options=self.api_options.timeout_options,
        database_admin_timeout_ms=database_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    return await self._async_database_info_ctx(
        id=id,
        timeout_context=_TimeoutContext(
            request_ms=_database_admin_timeout_ms, label=_da_label
        ),
    )
async def async_drop_database(self, id: str, *, wait_until_active: bool = True, database_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> None

Drop a database, i.e. delete it completely and permanently with all its data. Async version of the method, for use in an asyncio context.

Args

id
The ID of the database to drop, e. g. "01234567-89ab-cdef-0123-456789abcdef".
wait_until_active
if True (default), the method returns only after the database has actually been deleted (generally a few minutes). If False, it will return right after issuing the drop request to the DevOps API, and it will be responsibility of the caller to check the database status/availability after that, if desired.
database_admin_timeout_ms
a timeout, in milliseconds, for the whole requested operation to complete. This is used only if wait_until_active is true, i.e. if the method call must wait and keep querying the DevOps API for the status of the newly-deleted database.
request_timeout_ms
a timeout, in milliseconds, for each underlying DevOps API HTTP request.
timeout_ms
an alias for both the request_timeout_ms and database_admin_timeout_ms timeout parameters. In practice, regardless of wait_until_active, this parameter dictates an overall timeout on this method call.

Note: a timeout event is no guarantee at all that the deletion request has not reached the API server and is not going to be, in fact, honored.

Example

>>> asyncio.run(
...     my_astra_db_admin.async_drop_database("01234567-...")
... )
Expand source code
async def async_drop_database(
    self,
    id: str,
    *,
    wait_until_active: bool = True,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Drop a database, i.e. delete it completely and permanently with all its data.
    Async version of the method, for use in an asyncio context.

    Args:
        id: The ID of the database to drop, e. g.
            "01234567-89ab-cdef-0123-456789abcdef".
        wait_until_active: if True (default), the method returns only after
            the database has actually been deleted (generally a few minutes).
            If False, it will return right after issuing the
            drop request to the DevOps API, and it will be responsibility
            of the caller to check the database status/availability
            after that, if desired.
        database_admin_timeout_ms: a timeout, in milliseconds, for the whole
            requested operation to complete. This is used only
            if `wait_until_active` is true, i.e. if the method call must
            wait and keep querying the DevOps API for the status of the
            newly-deleted database.
        request_timeout_ms: a timeout, in milliseconds, for
            each underlying DevOps API HTTP request.
        timeout_ms: an alias for *both* the `request_timeout_ms` and
            `database_admin_timeout_ms` timeout parameters. In practice,
            regardless of `wait_until_active`, this parameter dictates an
            overall timeout on this method call.

    Note: a timeout event is no guarantee at all that the
    deletion request has not reached the API server and is not going
    to be, in fact, honored.

    Example:
        >>> asyncio.run(
        ...     my_astra_db_admin.async_drop_database("01234567-...")
        ... )
    """

    _database_admin_timeout_ms, _da_label = _first_valid_timeout(
        (database_admin_timeout_ms, "database_admin_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (
            self.api_options.timeout_options.database_admin_timeout_ms,
            "database_admin_timeout_ms",
        ),
    )
    _request_timeout_ms, _rt_label = _first_valid_timeout(
        (request_timeout_ms, "request_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
    )
    timeout_manager = MultiCallTimeoutManager(
        overall_timeout_ms=_database_admin_timeout_ms,
        dev_ops_api=True,
        timeout_label=_da_label,
    )
    logger.info(f"dropping database '{id}' (DevOps API), async")
    te_raw_response = await self._dev_ops_api_commander.async_raw_request(
        http_method=HttpMethod.POST,
        additional_path=f"{id}/terminate",
        timeout_context=timeout_manager.remaining_timeout(
            cap_time_ms=_request_timeout_ms,
            cap_timeout_label=_rt_label,
        ),
    )
    if te_raw_response.status_code != DEV_OPS_RESPONSE_HTTP_ACCEPTED:
        raise DevOpsAPIException(
            f"DB deletion ('{id}') failed: API returned HTTP "
            f"{te_raw_response.status_code} instead of "
            f"{DEV_OPS_RESPONSE_HTTP_ACCEPTED} - Created"
        )
    logger.info(f"DevOps API returned from dropping database '{id}', async")
    if wait_until_active:
        last_status_seen: str | None = DEV_OPS_DATABASE_STATUS_TERMINATING
        _db_name: str | None = None
        while last_status_seen == DEV_OPS_DATABASE_STATUS_TERMINATING:
            logger.info(f"sleeping to poll for status of '{id}', async")
            await asyncio.sleep(DEV_OPS_DATABASE_POLL_INTERVAL_S)
            #
            detected_databases = [
                a_db_info
                for a_db_info in await self._async_list_databases_ctx(
                    include=None,
                    provider=None,
                    page_size=None,
                    timeout_context=timeout_manager.remaining_timeout(
                        cap_time_ms=_request_timeout_ms,
                        cap_timeout_label=_rt_label,
                    ),
                )
                if a_db_info.id == id
            ]
            if detected_databases:
                last_status_seen = detected_databases[0].status
                _db_name = detected_databases[0].name
            else:
                last_status_seen = None
        if last_status_seen is not None:
            _name_desc = f" ({_db_name})" if _db_name else ""
            raise DevOpsAPIException(
                f"Database {id}{_name_desc} entered unexpected status "
                f"{last_status_seen} after PENDING"
            )
    logger.info(f"finished dropping database '{id}' (DevOps API), async")
async def async_list_databases(self, *, include: str | None = None, provider: str | None = None, page_size: int | None = None, database_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> list[AstraDBAdminDatabaseInfo]

Get the list of databases, as obtained with a request to the DevOps API. Async version of the method, for use in an asyncio context.

Args

include
a filter on what databases are to be returned. As per DevOps API, defaults to "nonterminated". Pass "all" to include the already terminated databases.
provider
a filter on the cloud provider for the databases. As per DevOps API, defaults to "ALL". Pass e.g. "AWS" to restrict the results.
page_size
number of results per page from the DevOps API.
database_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (While in the case of very many databases this method may entail multiple DevOps API requests, it is assumed here that this method amounts almost always to one single request: the only timeout imposed on this method execution is one acting on each individual request, with no checks on its overall completion time.)
request_timeout_ms
an alias for database_admin_timeout_ms.
timeout_ms
an alias for database_admin_timeout_ms.

Returns

A list of AstraDBAdminDatabaseInfo objects.

Example

>>> async def check_if_db_exists(db_id: str) -> bool:
...     db_list = await my_astra_db_admin.async_list_databases()
...     return db_id in db_list
...
>>> asyncio.run(check_if_db_exists("xyz"))
True
>>> asyncio.run(check_if_db_exists("01234567-..."))
False
Expand source code
async def async_list_databases(
    self,
    *,
    include: str | None = None,
    provider: str | None = None,
    page_size: int | None = None,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> list[AstraDBAdminDatabaseInfo]:
    """
    Get the list of databases, as obtained with a request to the DevOps API.
    Async version of the method, for use in an asyncio context.

    Args:
        include: a filter on what databases are to be returned. As per
            DevOps API, defaults to "nonterminated". Pass "all" to include
            the already terminated databases.
        provider: a filter on the cloud provider for the databases.
            As per DevOps API, defaults to "ALL". Pass e.g. "AWS" to
            restrict the results.
        page_size: number of results per page from the DevOps API.
        database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (While in the case of very many databases this method may entail
            multiple DevOps API requests, it is assumed here that this method
            amounts almost always to one single request: the only timeout
            imposed on this method execution is one acting on each individual
            request, with no checks on its overall completion time.)
        request_timeout_ms: an alias for `database_admin_timeout_ms`.
        timeout_ms: an alias for `database_admin_timeout_ms`.

    Returns:
        A list of AstraDBAdminDatabaseInfo objects.

    Example:
        >>> async def check_if_db_exists(db_id: str) -> bool:
        ...     db_list = await my_astra_db_admin.async_list_databases()
        ...     return db_id in db_list
        ...
        >>> asyncio.run(check_if_db_exists("xyz"))
        True
        >>> asyncio.run(check_if_db_exists("01234567-..."))
        False
    """

    _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
        timeout_options=self.api_options.timeout_options,
        database_admin_timeout_ms=database_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    return await self._async_list_databases_ctx(
        include=include,
        provider=provider,
        page_size=page_size,
        timeout_context=_TimeoutContext(
            request_ms=_database_admin_timeout_ms, label=_da_label
        ),
    )
def create_database(self, name: str, *, cloud_provider: str, region: str, keyspace: str | None = None, wait_until_active: bool = True, database_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None, token: str | TokenProvider | UnsetType = (unset), spawn_api_options: APIOptions | UnsetType = (unset)) ‑> AstraDBDatabaseAdmin

Create a database as requested, optionally waiting for it to be ready.

Args

name
the desired name for the database.
cloud_provider
one of 'aws', 'gcp' or 'azure'.
region
any of the available cloud regions.
keyspace
name for the one keyspace the database starts with. If omitted, DevOps API will use its default.
wait_until_active
if True (default), the method returns only after the newly-created database is in ACTIVE state (a few minutes, usually). If False, it will return right after issuing the creation request to the DevOps API, and it will be responsibility of the caller to check the database status before working with it.
database_admin_timeout_ms
a timeout, in milliseconds, for the whole requested operation to complete. This is used only if wait_until_active is true, i.e. if the method call must wait and keep querying the DevOps API for the status of the newly-created database.
request_timeout_ms
a timeout, in milliseconds, for each underlying DevOps API HTTP request.
timeout_ms
an alias for both the request_timeout_ms and database_admin_timeout_ms timeout parameters. In practice, regardless of wait_until_active, this parameter dictates an overall timeout on this method call.
token
if supplied, is passed to the returned Database instead of the one set for this object. This can be either a literal token string or a subclass of TokenProvider.
spawn_api_options
a specification - complete or partial - of the API Options to override the defaults inherited from the AstraDBAdmin. This allows for a deeper configuration of the database admin, e.g. concerning timeouts; if this is passed together with the named timeout parameters, the latter will take precedence in their respective settings.

Returns

An AstraDBDatabaseAdmin instance. Note: a timeout event is no guarantee at all that the creation request has not reached the API server and is not going to be, in fact, honored.

Example

>>> my_new_db_admin = my_astra_db_admin.create_database(
...     "new_database",
...     cloud_provider="aws",
...     region="ap-south-1",
... )
>>> my_new_db = my_new_db_admin.get_database()
>>> my_coll = my_new_db.create_collection(
...     "movies",
...     definition=(
...         CollectionDefinition.builder()
...         .set_vector_dimension(2)
...         .build()
...     )
... )
>>> my_coll.insert_one({"title": "The Title", "$vector": [0.1, 0.2]})
Expand source code
def create_database(
    self,
    name: str,
    *,
    cloud_provider: str,
    region: str,
    keyspace: str | None = None,
    wait_until_active: bool = True,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
    token: str | TokenProvider | UnsetType = _UNSET,
    spawn_api_options: APIOptions | UnsetType = _UNSET,
) -> AstraDBDatabaseAdmin:
    """
    Create a database as requested, optionally waiting for it to be ready.

    Args:
        name: the desired name for the database.
        cloud_provider: one of 'aws', 'gcp' or 'azure'.
        region: any of the available cloud regions.
        keyspace: name for the one keyspace the database starts with.
            If omitted, DevOps API will use its default.
        wait_until_active: if True (default), the method returns only after
            the newly-created database is in ACTIVE state (a few minutes,
            usually). If False, it will return right after issuing the
            creation request to the DevOps API, and it will be responsibility
            of the caller to check the database status before working with it.
        database_admin_timeout_ms: a timeout, in milliseconds, for the whole
            requested operation to complete. This is used only
            if `wait_until_active` is true, i.e. if the method call must
            wait and keep querying the DevOps API for the status of the
            newly-created database.
        request_timeout_ms: a timeout, in milliseconds, for
            each underlying DevOps API HTTP request.
        timeout_ms: an alias for *both* the `request_timeout_ms` and
            `database_admin_timeout_ms` timeout parameters. In practice,
            regardless of `wait_until_active`, this parameter dictates an
            overall timeout on this method call.
        token: if supplied, is passed to the returned Database instead of
            the one set for this object.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        spawn_api_options: a specification - complete or partial - of the
            API Options to override the defaults inherited from the AstraDBAdmin.
            This allows for a deeper configuration of the database admin, e.g.
            concerning timeouts; if this is passed together with
            the named timeout parameters, the latter will take precedence
            in their respective settings.

    Returns:
        An AstraDBDatabaseAdmin instance.

    Note: a timeout event is no guarantee at all that the
    creation request has not reached the API server and is not going
    to be, in fact, honored.

    Example:
        >>> my_new_db_admin = my_astra_db_admin.create_database(
        ...     "new_database",
        ...     cloud_provider="aws",
        ...     region="ap-south-1",
        ... )
        >>> my_new_db = my_new_db_admin.get_database()
        >>> my_coll = my_new_db.create_collection(
        ...     "movies",
        ...     definition=(
        ...         CollectionDefinition.builder()
        ...         .set_vector_dimension(2)
        ...         .build()
        ...     )
        ... )
        >>> my_coll.insert_one({"title": "The Title", "$vector": [0.1, 0.2]})
    """

    _database_admin_timeout_ms, _da_label = _first_valid_timeout(
        (database_admin_timeout_ms, "database_admin_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (
            self.api_options.timeout_options.database_admin_timeout_ms,
            "database_admin_timeout_ms",
        ),
    )
    _request_timeout_ms, _rt_label = _first_valid_timeout(
        (request_timeout_ms, "request_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
    )
    cd_payload = {
        k: v
        for k, v in {
            "name": name,
            "tier": "serverless",
            "cloudProvider": cloud_provider,
            "region": region,
            "capacityUnits": 1,
            "dbType": "vector",
            "keyspace": keyspace,
        }.items()
        if v is not None
    }
    timeout_manager = MultiCallTimeoutManager(
        overall_timeout_ms=_database_admin_timeout_ms,
        dev_ops_api=True,
        timeout_label=_da_label,
    )
    logger.info(
        f"creating database {name}/({cloud_provider}, {region}) (DevOps API)"
    )
    cd_raw_response = self._dev_ops_api_commander.raw_request(
        http_method=HttpMethod.POST,
        payload=cd_payload,
        timeout_context=timeout_manager.remaining_timeout(
            cap_time_ms=_request_timeout_ms,
            cap_timeout_label=_rt_label,
        ),
    )
    if cd_raw_response.status_code != DEV_OPS_RESPONSE_HTTP_CREATED:
        raise DevOpsAPIException(
            f"DB creation ('{name}') failed: API returned HTTP "
            f"{cd_raw_response.status_code} instead of "
            f"{DEV_OPS_RESPONSE_HTTP_CREATED} - Created."
        )
    new_database_id = cd_raw_response.headers["Location"]
    logger.info(
        "DevOps API returned from creating database "
        f"{name}/({cloud_provider}, {region})"
    )
    if wait_until_active:
        last_status_seen = DEV_OPS_DATABASE_STATUS_PENDING
        while last_status_seen in {
            DEV_OPS_DATABASE_STATUS_PENDING,
            DEV_OPS_DATABASE_STATUS_INITIALIZING,
        }:
            logger.info(f"sleeping to poll for status of '{new_database_id}'")
            time.sleep(DEV_OPS_DATABASE_POLL_INTERVAL_S)
            last_db_info = self._database_info_ctx(
                id=new_database_id,
                timeout_context=timeout_manager.remaining_timeout(
                    cap_time_ms=_request_timeout_ms,
                    cap_timeout_label=_rt_label,
                ),
            )
            last_status_seen = last_db_info.status
        if last_status_seen != DEV_OPS_DATABASE_STATUS_ACTIVE:
            raise DevOpsAPIException(
                f"Database {name} entered unexpected status {last_status_seen} after PENDING"
            )
    # return the database instance
    logger.info(
        f"finished creating database '{new_database_id}' = "
        f"{name}/({cloud_provider}, {region}) (DevOps API)"
    )
    _final_api_options = self.api_options.with_override(
        spawn_api_options
    ).with_override(APIOptions(token=token))
    return AstraDBDatabaseAdmin.from_astra_db_admin(
        api_endpoint=build_api_endpoint(
            environment=self.api_options.environment,
            database_id=new_database_id,
            region=region,
        ),
        astra_db_admin=self,
        spawn_api_options=_final_api_options,
    )
def database_info(self, id: str, *, database_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> AstraDBAdminDatabaseInfo

Get the full information on a given database, through a request to the DevOps API.

Args

id
the ID of the target database, e. g. "01234567-89ab-cdef-0123-456789abcdef".
database_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying DevOps API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for database_admin_timeout_ms.
timeout_ms
an alias for database_admin_timeout_ms.

Returns

An AstraDBAdminDatabaseInfo object.

Example

>>> details_of_my_db = my_astra_db_admin.database_info("01234567-...")
>>> details_of_my_db.id
'01234567-...'
>>> details_of_my_db.status
'ACTIVE'
>>> details_of_my_db.info.region
'eu-west-1'
Expand source code
def database_info(
    self,
    id: str,
    *,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> AstraDBAdminDatabaseInfo:
    """
    Get the full information on a given database, through a request to the DevOps API.

    Args:
        id: the ID of the target database, e. g.
            "01234567-89ab-cdef-0123-456789abcdef".
        database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying DevOps API request.
            If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `database_admin_timeout_ms`.
        timeout_ms: an alias for `database_admin_timeout_ms`.

    Returns:
        An AstraDBAdminDatabaseInfo object.

    Example:
        >>> details_of_my_db = my_astra_db_admin.database_info("01234567-...")
        >>> details_of_my_db.id
        '01234567-...'
        >>> details_of_my_db.status
        'ACTIVE'
        >>> details_of_my_db.info.region
        'eu-west-1'
    """

    _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
        timeout_options=self.api_options.timeout_options,
        database_admin_timeout_ms=database_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    return self._database_info_ctx(
        id=id,
        timeout_context=_TimeoutContext(
            request_ms=_database_admin_timeout_ms, label=_da_label
        ),
    )
def drop_database(self, id: str, *, wait_until_active: bool = True, database_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> None

Drop a database, i.e. delete it completely and permanently with all its data.

Args

id
The ID of the database to drop, e. g. "01234567-89ab-cdef-0123-456789abcdef".
wait_until_active
if True (default), the method returns only after the database has actually been deleted (generally a few minutes). If False, it will return right after issuing the drop request to the DevOps API, and it will be responsibility of the caller to check the database status/availability after that, if desired.
database_admin_timeout_ms
a timeout, in milliseconds, for the whole requested operation to complete. This is used only if wait_until_active is true, i.e. if the method call must wait and keep querying the DevOps API for the status of the newly-deleted database.
request_timeout_ms
a timeout, in milliseconds, for each underlying DevOps API HTTP request.
timeout_ms
an alias for both the request_timeout_ms and database_admin_timeout_ms timeout parameters. In practice, regardless of wait_until_active, this parameter dictates an overall timeout on this method call.

Note: a timeout event is no guarantee at all that the deletion request has not reached the API server and is not going to be, in fact, honored.

Example

>>> database_list_pre = my_astra_db_admin.list_databases()
>>> len(database_list_pre)
3
>>> my_astra_db_admin.drop_database("01234567-...")
>>> database_list_post = my_astra_db_admin.list_databases()
>>> len(database_list_post)
2
Expand source code
def drop_database(
    self,
    id: str,
    *,
    wait_until_active: bool = True,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Drop a database, i.e. delete it completely and permanently with all its data.

    Args:
        id: The ID of the database to drop, e. g.
            "01234567-89ab-cdef-0123-456789abcdef".
        wait_until_active: if True (default), the method returns only after
            the database has actually been deleted (generally a few minutes).
            If False, it will return right after issuing the
            drop request to the DevOps API, and it will be responsibility
            of the caller to check the database status/availability
            after that, if desired.
        database_admin_timeout_ms: a timeout, in milliseconds, for the whole
            requested operation to complete. This is used only
            if `wait_until_active` is true, i.e. if the method call must
            wait and keep querying the DevOps API for the status of the
            newly-deleted database.
        request_timeout_ms: a timeout, in milliseconds, for
            each underlying DevOps API HTTP request.
        timeout_ms: an alias for *both* the `request_timeout_ms` and
            `database_admin_timeout_ms` timeout parameters. In practice,
            regardless of `wait_until_active`, this parameter dictates an
            overall timeout on this method call.

    Note: a timeout event is no guarantee at all that the
    deletion request has not reached the API server and is not going
    to be, in fact, honored.

    Example:
        >>> database_list_pre = my_astra_db_admin.list_databases()
        >>> len(database_list_pre)
        3
        >>> my_astra_db_admin.drop_database("01234567-...")
        >>> database_list_post = my_astra_db_admin.list_databases()
        >>> len(database_list_post)
        2
    """

    _database_admin_timeout_ms, _da_label = _first_valid_timeout(
        (database_admin_timeout_ms, "database_admin_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (
            self.api_options.timeout_options.database_admin_timeout_ms,
            "database_admin_timeout_ms",
        ),
    )
    _request_timeout_ms, _rt_label = _first_valid_timeout(
        (request_timeout_ms, "request_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
    )
    timeout_manager = MultiCallTimeoutManager(
        overall_timeout_ms=_database_admin_timeout_ms,
        dev_ops_api=True,
        timeout_label=_da_label,
    )
    logger.info(f"dropping database '{id}' (DevOps API)")
    te_raw_response = self._dev_ops_api_commander.raw_request(
        http_method=HttpMethod.POST,
        additional_path=f"{id}/terminate",
        timeout_context=timeout_manager.remaining_timeout(
            cap_time_ms=_request_timeout_ms,
            cap_timeout_label=_rt_label,
        ),
    )
    if te_raw_response.status_code != DEV_OPS_RESPONSE_HTTP_ACCEPTED:
        raise DevOpsAPIException(
            f"DB deletion ('{id}') failed: API returned HTTP "
            f"{te_raw_response.status_code} instead of "
            f"{DEV_OPS_RESPONSE_HTTP_ACCEPTED} - Created"
        )
    logger.info(f"DevOps API returned from dropping database '{id}'")
    if wait_until_active:
        last_status_seen: str | None = DEV_OPS_DATABASE_STATUS_TERMINATING
        _db_name: str | None = None
        while last_status_seen == DEV_OPS_DATABASE_STATUS_TERMINATING:
            logger.info(f"sleeping to poll for status of '{id}'")
            time.sleep(DEV_OPS_DATABASE_POLL_INTERVAL_S)
            #
            detected_databases = [
                a_db_info
                for a_db_info in self._list_databases_ctx(
                    include=None,
                    provider=None,
                    page_size=None,
                    timeout_context=timeout_manager.remaining_timeout(
                        cap_time_ms=_request_timeout_ms,
                        cap_timeout_label=_rt_label,
                    ),
                )
                if a_db_info.id == id
            ]
            if detected_databases:
                last_status_seen = detected_databases[0].status
                _db_name = detected_databases[0].name
            else:
                last_status_seen = None
        if last_status_seen is not None:
            _name_desc = f" ({_db_name})" if _db_name else ""
            raise DevOpsAPIException(
                f"Database {id}{_name_desc} entered unexpected status "
                f"{last_status_seen} after PENDING"
            )
    logger.info(f"finished dropping database '{id}' (DevOps API)")
def get_async_database(self, api_endpoint_or_id: str | None = None, *, api_endpoint: str | None = None, keyspace: str | None = None, id: str | None = None, region: str | None = None, database_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None, token: str | TokenProvider | UnsetType = (unset), spawn_api_options: APIOptions | UnsetType = (unset)) ‑> AsyncDatabase

Create an AsyncDatabase instance for a specific database, to be used when doing data-level work (such as creating/managing collections).

Args

api_endpoint_or_id
positional parameter that can stand for both api_endpoint and id. Passing them together is an error.
api_endpoint
the API Endpoint for the target database (e.g. https://<ID>-<REGION>.apps.astra.datastax.com). The database must exist already for the resulting object to be effectively used; in other words, this invocation does not create the database, just the object instance.
keyspace
used to specify a certain keyspace the resulting AsyncDatabase will primarily work on. If not specified, an additional DevOps API call reveals the default keyspace for the target database.
id
the target database ID. This is alternative to using the API Endpoint.
region
the region to use for connecting to the database. The database must be located in that region. This parameter can be used only if the database is specified by its ID (instead of API Endpoint). If this parameter is not passed, and cannot be inferred from the API endpoint, an additional DevOps API request is made to determine the default region and use it subsequently.
database_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying DevOps API request for 'region', should it be necessary. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for database_admin_timeout_ms.
timeout_ms
an alias for database_admin_timeout_ms.
token
if supplied, is passed to the Database instead of the one set for this object. This can be either a literal token string or a subclass of TokenProvider.
spawn_api_options
a specification - complete or partial - of the API Options to override the defaults inherited from the AstraDBAdmin. This allows for a deeper configuration of the database, e.g. concerning timeouts; if this is passed together with the named timeout parameters, the latter will take precedence in their respective settings.

Returns

An AsyncDatabase object ready to be used.

Example

>>> async def create_use_collection(
...     admin: AstraDBAdmin,
...     api_endpoint: str,
...     keyspace: str,
... ) -> None:
...     my_async_db = admin.get_async_database(
...         api_endpoint,
...         keyspace=keyspace,
...     )
...     a_coll = await my_async_db.create_collection(
...         "movies",
...         definition=(
...             CollectionDefinition.builder()
...             .set_vector_dimension(2)
...             .build()
...         )
...     )
...     await a_coll.insert_one(
...         {"title": "The Title", "$vector": [0.3, 0.4]}
...     )
...
>>> asyncio.run(create_use_collection(
...     my_admin,
...     "https://<ID>-<REGION>.apps.astra.datastax.com",
...     "default_keyspace",
... ))
>>>
Expand source code
def get_async_database(
    self,
    api_endpoint_or_id: str | None = None,
    *,
    api_endpoint: str | None = None,
    keyspace: str | None = None,
    id: str | None = None,
    region: str | None = None,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
    token: str | TokenProvider | UnsetType = _UNSET,
    spawn_api_options: APIOptions | UnsetType = _UNSET,
) -> AsyncDatabase:
    """
    Create an AsyncDatabase instance for a specific database, to be used
    when doing data-level work (such as creating/managing collections).

    Args:
        api_endpoint_or_id: positional parameter that can stand for both
            `api_endpoint` and `id`. Passing them together is an error.
        api_endpoint: the API Endpoint for the target database
            (e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`).
            The database must exist already for the resulting object
            to be effectively used; in other words, this invocation
            does not create the database, just the object instance.
        keyspace: used to specify a certain keyspace the resulting
            AsyncDatabase will primarily work on. If not specified, an additional
            DevOps API call reveals the default keyspace for the target database.
        id: the target database ID. This is alternative to using the API Endpoint.
        region: the region to use for connecting to the database. The
            database must be located in that region. This parameter can be used
            only if the database is specified by its ID (instead of API Endpoint).
            If this parameter is not passed, and cannot be inferred
            from the API endpoint, an additional DevOps API request is made
            to determine the default region and use it subsequently.
        database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying DevOps API request for 'region', should it be necessary.
            If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `database_admin_timeout_ms`.
        timeout_ms: an alias for `database_admin_timeout_ms`.
        token: if supplied, is passed to the Database instead of
            the one set for this object.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        spawn_api_options: a specification - complete or partial - of the
            API Options to override the defaults inherited from the AstraDBAdmin.
            This allows for a deeper configuration of the database, e.g.
            concerning timeouts; if this is passed together with
            the named timeout parameters, the latter will take precedence
            in their respective settings.

    Returns:
        An AsyncDatabase object ready to be used.

    Example:
        >>> async def create_use_collection(
        ...     admin: AstraDBAdmin,
        ...     api_endpoint: str,
        ...     keyspace: str,
        ... ) -> None:
        ...     my_async_db = admin.get_async_database(
        ...         api_endpoint,
        ...         keyspace=keyspace,
        ...     )
        ...     a_coll = await my_async_db.create_collection(
        ...         "movies",
        ...         definition=(
        ...             CollectionDefinition.builder()
        ...             .set_vector_dimension(2)
        ...             .build()
        ...         )
        ...     )
        ...     await a_coll.insert_one(
        ...         {"title": "The Title", "$vector": [0.3, 0.4]}
        ...     )
        ...
        >>> asyncio.run(create_use_collection(
        ...     my_admin,
        ...     "https://<ID>-<REGION>.apps.astra.datastax.com",
        ...     "default_keyspace",
        ... ))
        >>>
    """

    return self.get_database(
        api_endpoint_or_id=api_endpoint_or_id,
        api_endpoint=api_endpoint,
        token=token,
        keyspace=keyspace,
        id=id,
        region=region,
        database_admin_timeout_ms=database_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
        spawn_api_options=spawn_api_options,
    ).to_async()
def get_database(self, api_endpoint_or_id: str | None = None, *, api_endpoint: str | None = None, keyspace: str | None = None, id: str | None = None, region: str | None = None, database_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None, token: str | TokenProvider | UnsetType = (unset), spawn_api_options: APIOptions | UnsetType = (unset)) ‑> Database

Create a Database instance for a specific database, to be used when doing data-level work (such as creating/managing collections).

Args

api_endpoint_or_id
positional parameter that can stand for both api_endpoint and id. Passing them together is an error.
api_endpoint
the API Endpoint for the target database (e.g. https://<ID>-<REGION>.apps.astra.datastax.com). The database must exist already for the resulting object to be effectively used; in other words, this invocation does not create the database, just the object instance.
keyspace
used to specify a certain keyspace the resulting Database will primarily work on. If not specified, an additional DevOps API call reveals the default keyspace for the target database.
id
the target database ID. This is alternative to using the API Endpoint.
region
the region to use for connecting to the database. The database must be located in that region. This parameter can be used only if the database is specified by its ID (instead of API Endpoint). If this parameter is not passed, and cannot be inferred from the API endpoint, an additional DevOps API request is made to determine the default region and use it subsequently.
database_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying DevOps API request for 'region', should it be necessary. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for database_admin_timeout_ms.
timeout_ms
an alias for database_admin_timeout_ms.
token
if supplied, is passed to the Database instead of the one set for this object. This can be either a literal token string or a subclass of TokenProvider.
spawn_api_options
a specification - complete or partial - of the API Options to override the defaults inherited from the AstraDBAdmin. This allows for a deeper configuration of the database, e.g. concerning timeouts; if this is passed together with the named timeout parameters, the latter will take precedence in their respective settings.

Returns

A Database object ready to be used.

Example

>>> my_db = my_astra_db_admin.get_database(
...     "https://<ID>-<REGION>.apps.astra.datastax.com",
...     keyspace="my_prod_keyspace",
... )
>>> coll = my_db.create_collection(
...     "movies",
...     definition=(
...         CollectionDefinition.builder()
...         .set_vector_dimension(2)
...         .build()
...     )
... )
>>> my_coll.insert_one({"title": "The Title", "$vector": [0.3, 0.4]})
Expand source code
def get_database(
    self,
    api_endpoint_or_id: str | None = None,
    *,
    api_endpoint: str | None = None,
    keyspace: str | None = None,
    id: str | None = None,
    region: str | None = None,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
    token: str | TokenProvider | UnsetType = _UNSET,
    spawn_api_options: APIOptions | UnsetType = _UNSET,
) -> Database:
    """
    Create a Database instance for a specific database, to be used
    when doing data-level work (such as creating/managing collections).

    Args:
        api_endpoint_or_id: positional parameter that can stand for both
            `api_endpoint` and `id`. Passing them together is an error.
        api_endpoint: the API Endpoint for the target database
            (e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`).
            The database must exist already for the resulting object
            to be effectively used; in other words, this invocation
            does not create the database, just the object instance.
        keyspace: used to specify a certain keyspace the resulting
            Database will primarily work on. If not specified, an additional
            DevOps API call reveals the default keyspace for the target database.
        id: the target database ID. This is alternative to using the API Endpoint.
        region: the region to use for connecting to the database. The
            database must be located in that region. This parameter can be used
            only if the database is specified by its ID (instead of API Endpoint).
            If this parameter is not passed, and cannot be inferred
            from the API endpoint, an additional DevOps API request is made
            to determine the default region and use it subsequently.
        database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying DevOps API request for 'region', should it be necessary.
            If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `database_admin_timeout_ms`.
        timeout_ms: an alias for `database_admin_timeout_ms`.
        token: if supplied, is passed to the Database instead of
            the one set for this object.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        spawn_api_options: a specification - complete or partial - of the
            API Options to override the defaults inherited from the AstraDBAdmin.
            This allows for a deeper configuration of the database, e.g.
            concerning timeouts; if this is passed together with
            the named timeout parameters, the latter will take precedence
            in their respective settings.

    Returns:
        A Database object ready to be used.

    Example:
        >>> my_db = my_astra_db_admin.get_database(
        ...     "https://<ID>-<REGION>.apps.astra.datastax.com",
        ...     keyspace="my_prod_keyspace",
        ... )
        >>> coll = my_db.create_collection(
        ...     "movies",
        ...     definition=(
        ...         CollectionDefinition.builder()
        ...         .set_vector_dimension(2)
        ...         .build()
        ...     )
        ... )
        >>> my_coll.insert_one({"title": "The Title", "$vector": [0.3, 0.4]})
    """

    _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
        timeout_options=self.api_options.timeout_options,
        database_admin_timeout_ms=database_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    _api_endpoint_p, _id_p = check_id_endpoint_parg_kwargs(
        p_arg=api_endpoint_or_id, api_endpoint=api_endpoint, id=id
    )
    # lazy importing here to avoid circular dependency
    from astrapy import Database

    _final_api_options = self.api_options.with_override(
        spawn_api_options
    ).with_override(
        APIOptions(token=token),
    )

    _keyspace: str | None
    # handle the "endpoint passed as id" case first:
    if _api_endpoint_p is not None:
        if region is not None:
            raise ValueError(
                "Parameter `region` not supported with an API endpoint."
            )
        if keyspace:
            _keyspace = keyspace
        else:
            parsed_api_endpoint = parse_api_endpoint(_api_endpoint_p)
            if parsed_api_endpoint is None:
                msg = api_endpoint_parsing_error_message(_api_endpoint_p)
                raise ValueError(msg)
            _keyspace = (
                (
                    self.database_info(
                        parsed_api_endpoint.database_id,
                        timeout_ms=_database_admin_timeout_ms,
                    ).raw
                    or {}
                ).get("info")
                or {}
            ).get("keyspace", DEFAULT_ASTRA_DB_KEYSPACE)
        return Database(
            api_endpoint=_api_endpoint_p,
            keyspace=_keyspace,
            api_options=_final_api_options,
        )
    else:
        # the case where an ID is passed:
        if _id_p is None:
            raise ValueError("Either `api_endpoint` or `id` must be supplied.")
        _region = normalize_region_for_id(
            database_id=_id_p,
            environment=self.api_options.environment,
            region_param=region,
            request_timeout_ms=_database_admin_timeout_ms,
            api_options=self.api_options,
        )
        if keyspace:
            _keyspace = keyspace
        else:
            _keyspace = (
                (
                    self.database_info(
                        _id_p, timeout_ms=_database_admin_timeout_ms
                    ).raw
                    or {}
                ).get("info")
                or {}
            ).get("keyspace", DEFAULT_ASTRA_DB_KEYSPACE)
        return Database(
            api_endpoint=build_api_endpoint(
                environment=self.api_options.environment,
                database_id=_id_p,
                region=_region,
            ),
            keyspace=_keyspace,
            api_options=_final_api_options,
        )
def get_database_admin(self, api_endpoint_or_id: str | None = None, *, api_endpoint: str | None = None, id: str | None = None, region: str | None = None, database_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None, token: str | TokenProvider | UnsetType = (unset), spawn_api_options: APIOptions | UnsetType = (unset)) ‑> AstraDBDatabaseAdmin

Create an AstraDBDatabaseAdmin object for admin work within a certain database.

Args

api_endpoint_or_id
positional parameter that can stand for both api_endpoint and id. Passing them together is an error.
api_endpoint
the API Endpoint for the target database (e.g. https://<ID>-<REGION>.apps.astra.datastax.com). The database must exist already for the resulting object to be effectively used; in other words, this invocation does not create the database, just the object instance.
id
the target database ID. This is alternative to using the API Endpoint.
region
the region to use for connecting to the database. The database must be located in that region. This parameter can be used only if the database is specified by its ID (instead of API Endpoint). If this parameter is not passed, and cannot be inferred from the API endpoint, an additional DevOps API request is made to determine the default region and use it subsequently.
database_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying DevOps API request for 'region', should it be necessary. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for database_admin_timeout_ms.
timeout_ms
an alias for database_admin_timeout_ms.
token
if supplied, is passed to the Database instead of the one set for this object. This can be either a literal token string or a subclass of TokenProvider.
spawn_api_options
a specification - complete or partial - of the API Options to override the defaults inherited from the AstraDBAdmin. This allows for a deeper configuration of the database admin, e.g. concerning timeouts; if this is passed together with the named timeout parameters, the latter will take precedence in their respective settings.

Returns

An AstraDBDatabaseAdmin instance representing the requested database.

Example

>>> my_db_admin = my_astra_db_admin.get_database_admin("01234567-...")
>>> my_db_admin.list_keyspaces()
['default_keyspace']
>>> my_db_admin.create_keyspace("that_other_one")
>>> my_db_admin.list_keyspaces()
['default_keyspace', 'that_other_one']

Note

This method does not perform any admin-level operation through the DevOps API. For actual creation of a database, see the create_database method.

Expand source code
def get_database_admin(
    self,
    api_endpoint_or_id: str | None = None,
    *,
    api_endpoint: str | None = None,
    id: str | None = None,
    region: str | None = None,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
    token: str | TokenProvider | UnsetType = _UNSET,
    spawn_api_options: APIOptions | UnsetType = _UNSET,
) -> AstraDBDatabaseAdmin:
    """
    Create an AstraDBDatabaseAdmin object for admin work within a certain database.

    Args:
        api_endpoint_or_id: positional parameter that can stand for both
            `api_endpoint` and `id`. Passing them together is an error.
        api_endpoint: the API Endpoint for the target database
            (e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`).
            The database must exist already for the resulting object
            to be effectively used; in other words, this invocation
            does not create the database, just the object instance.
        id: the target database ID. This is alternative to using the API Endpoint.
        region: the region to use for connecting to the database. The
            database must be located in that region. This parameter can be used
            only if the database is specified by its ID (instead of API Endpoint).
            If this parameter is not passed, and cannot be inferred
            from the API endpoint, an additional DevOps API request is made
            to determine the default region and use it subsequently.
        database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying DevOps API request for 'region', should it be necessary.
            If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `database_admin_timeout_ms`.
        timeout_ms: an alias for `database_admin_timeout_ms`.
        token: if supplied, is passed to the Database instead of
            the one set for this object.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        spawn_api_options: a specification - complete or partial - of the
            API Options to override the defaults inherited from the AstraDBAdmin.
            This allows for a deeper configuration of the database admin, e.g.
            concerning timeouts; if this is passed together with
            the named timeout parameters, the latter will take precedence
            in their respective settings.

    Returns:
        An AstraDBDatabaseAdmin instance representing the requested database.

    Example:
        >>> my_db_admin = my_astra_db_admin.get_database_admin("01234567-...")
        >>> my_db_admin.list_keyspaces()
        ['default_keyspace']
        >>> my_db_admin.create_keyspace("that_other_one")
        >>> my_db_admin.list_keyspaces()
        ['default_keyspace', 'that_other_one']

    Note:
        This method does not perform any admin-level operation through
        the DevOps API. For actual creation of a database, see the
        `create_database` method.
    """

    _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
        timeout_options=self.api_options.timeout_options,
        database_admin_timeout_ms=database_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    _api_endpoint_p, _id_p = check_id_endpoint_parg_kwargs(
        p_arg=api_endpoint_or_id, api_endpoint=api_endpoint, id=id
    )
    _final_api_options = self.api_options.with_override(
        spawn_api_options
    ).with_override(APIOptions(token=token))
    # handle the "endpoint passed as id" case first:
    if _api_endpoint_p is not None:
        if region is not None:
            raise ValueError(
                "Parameter `region` not supported with an API endpoint."
            )
        return AstraDBDatabaseAdmin.from_astra_db_admin(
            api_endpoint=_api_endpoint_p,
            astra_db_admin=self,
            spawn_api_options=_final_api_options,
        )
    else:
        if _id_p is None:
            raise ValueError("Either `api_endpoint` or `id` must be supplied.")
        _region = normalize_region_for_id(
            database_id=_id_p,
            environment=self.api_options.environment,
            region_param=region,
            request_timeout_ms=_database_admin_timeout_ms,
            api_options=self.api_options,
        )
        return AstraDBDatabaseAdmin.from_astra_db_admin(
            api_endpoint=build_api_endpoint(
                environment=self.api_options.environment,
                database_id=_id_p,
                region=_region,
            ),
            astra_db_admin=self,
            spawn_api_options=_final_api_options,
        )
def list_databases(self, *, include: str | None = None, provider: str | None = None, page_size: int | None = None, database_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> list[AstraDBAdminDatabaseInfo]

Get the list of databases, as obtained with a request to the DevOps API.

Args

include
a filter on what databases are to be returned. As per DevOps API, defaults to "nonterminated". Pass "all" to include the already terminated databases.
provider
a filter on the cloud provider for the databases. As per DevOps API, defaults to "ALL". Pass e.g. "AWS" to restrict the results.
page_size
number of results per page from the DevOps API.
database_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (While in the case of very many databases this method may entail multiple DevOps API requests, it is assumed here that this method amounts almost always to one single request: the only timeout imposed on this method execution is one acting on each individual request, with no checks on its overall completion time.)
request_timeout_ms
an alias for database_admin_timeout_ms.
timeout_ms
an alias for database_admin_timeout_ms.

Returns

A list of AstraDBAdminDatabaseInfo objects.

Example

>>> database_list = my_astra_db_admin.list_databases()
>>> len(database_list)
3
>>> database_list[2].id
'01234567-...'
>>> database_list[2].status
'ACTIVE'
>>> database_list[2].info.region
'eu-west-1'
Expand source code
def list_databases(
    self,
    *,
    include: str | None = None,
    provider: str | None = None,
    page_size: int | None = None,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> list[AstraDBAdminDatabaseInfo]:
    """
    Get the list of databases, as obtained with a request to the DevOps API.

    Args:
        include: a filter on what databases are to be returned. As per
            DevOps API, defaults to "nonterminated". Pass "all" to include
            the already terminated databases.
        provider: a filter on the cloud provider for the databases.
            As per DevOps API, defaults to "ALL". Pass e.g. "AWS" to
            restrict the results.
        page_size: number of results per page from the DevOps API.
        database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (While in the case of very many databases this method may entail
            multiple DevOps API requests, it is assumed here that this method
            amounts almost always to one single request: the only timeout
            imposed on this method execution is one acting on each individual
            request, with no checks on its overall completion time.)
        request_timeout_ms: an alias for `database_admin_timeout_ms`.
        timeout_ms: an alias for `database_admin_timeout_ms`.

    Returns:
        A list of AstraDBAdminDatabaseInfo objects.

    Example:
        >>> database_list = my_astra_db_admin.list_databases()
        >>> len(database_list)
        3
        >>> database_list[2].id
        '01234567-...'
        >>> database_list[2].status
        'ACTIVE'
        >>> database_list[2].info.region
        'eu-west-1'
    """

    _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
        timeout_options=self.api_options.timeout_options,
        database_admin_timeout_ms=database_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    return self._list_databases_ctx(
        include=include,
        provider=provider,
        page_size=page_size,
        timeout_context=_TimeoutContext(
            request_ms=_database_admin_timeout_ms, label=_da_label
        ),
    )
def with_options(self, *, token: str | TokenProvider | UnsetType = (unset), api_options: APIOptions | UnsetType = (unset)) ‑> AstraDBAdmin

Create a clone of this AstraDBAdmin with some changed attributes.

Args

token
an Access Token to the database. Example: "AstraCS:xyz...". This can be either a literal token string or a subclass of TokenProvider.
api_options
any additional options to set for the clone, in the form of an APIOptions instance (where one can set just the needed attributes). In case the same setting is also provided as named parameter, the latter takes precedence.

Returns

a new AstraDBAdmin instance.

Example

>>> different_auth_astra_db_admin = my_astra_db_admin.with_options(
...     token="AstraCS:xyz...",
... )
Expand source code
def with_options(
    self,
    *,
    token: str | TokenProvider | UnsetType = _UNSET,
    api_options: APIOptions | UnsetType = _UNSET,
) -> AstraDBAdmin:
    """
    Create a clone of this AstraDBAdmin with some changed attributes.

    Args:
        token: an Access Token to the database. Example: `"AstraCS:xyz..."`.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        api_options: any additional options to set for the clone, in the form of
            an APIOptions instance (where one can set just the needed attributes).
            In case the same setting is also provided as named parameter,
            the latter takes precedence.

    Returns:
        a new AstraDBAdmin instance.

    Example:
        >>> different_auth_astra_db_admin = my_astra_db_admin.with_options(
        ...     token="AstraCS:xyz...",
        ... )
    """

    return self._copy(
        token=token,
        api_options=api_options,
    )
class AstraDBDatabaseAdmin (*, api_endpoint: str, api_options: FullAPIOptions, spawner_database: Database | AsyncDatabase | None = None, spawner_astra_db_admin: AstraDBAdmin | None = None)

An "admin" object, able to perform administrative tasks at the keyspaces level (i.e. within a certain database), such as creating/listing/dropping keyspaces.

This is one layer below the AstraDBAdmin concept, in that it is tied to a single database and enables admin work within it.

This class is not meant for direct instantiation by the user, rather it is obtained by invoking methods such as get_database_admin of AstraDBAdmin.

Args

api_endpoint
the API Endpoint for the target database (e.g. https://<ID>-<REGION>.apps.astra.datastax.com). The database must exist already for the resulting object to be effectively used; in other words, this invocation does not create the database, just the object instance.
api_options
a complete specification of the API Options for this instance.
spawner_database
either a Database or an AsyncDatabase instance. This represents the database class which spawns this admin object, so that, if required, a keyspace creation can retroactively "use" the new keyspace in the spawner. Used to enable the Async/Database.get_admin_database().create_keyspace() pattern.
spawner_astra_db_admin
an AstraDBAdmin instance. This, if provided, is the instance that spawned this Database Admin and is used to delegate operations such as drop, get_database and so on. If not passed, a new one is created automatically.

Example

>>> from astrapy import DataAPIClient
>>> my_client = DataAPIClient("AstraCS:...")
>>> admin_for_my_db = my_client.get_admin().get_database_admin(
...     "https://<ID>-<REGION>.apps.astra.datastax.com"
... )
>>> admin_for_my_db.list_keyspaces()
['default_keyspace', 'staging_keyspace']
>>> admin_for_my_db.info().status
'ACTIVE'

Note

creating an instance of AstraDBDatabaseAdmin does not trigger actual creation of the database itself, which should exist beforehand. To create databases, see the AstraDBAdmin class.

Note

a more powerful token may be required than the one sufficient for working in the Database, Collection and Table classes. Check the provided token if "Unauthorized" errors are encountered.

Expand source code
class AstraDBDatabaseAdmin(DatabaseAdmin):
    """
    An "admin" object, able to perform administrative tasks at the keyspaces level
    (i.e. within a certain database), such as creating/listing/dropping keyspaces.

    This is one layer below the AstraDBAdmin concept, in that it is tied to
    a single database and enables admin work within it.

    This class is not meant for direct instantiation by the user, rather
    it is obtained by invoking methods such as `get_database_admin`
    of AstraDBAdmin.

    Args:
        api_endpoint: the API Endpoint for the target database
            (e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`).
            The database must exist already for the resulting object
            to be effectively used; in other words, this invocation
            does not create the database, just the object instance.
        api_options: a complete specification of the API Options for this instance.
        spawner_database: either a Database or an AsyncDatabase instance. This represents
            the database class which spawns this admin object, so that, if required,
            a keyspace creation can retroactively "use" the new keyspace in the spawner.
            Used to enable the Async/Database.get_admin_database().create_keyspace()
            pattern.
        spawner_astra_db_admin: an AstraDBAdmin instance. This, if provided, is
            the instance that spawned this Database Admin and is used to delegate
            operations such as drop, get_database and so on. If not passed, a new
            one is created automatically.

    Example:
        >>> from astrapy import DataAPIClient
        >>> my_client = DataAPIClient("AstraCS:...")
        >>> admin_for_my_db = my_client.get_admin().get_database_admin(
        ...     "https://<ID>-<REGION>.apps.astra.datastax.com"
        ... )
        >>> admin_for_my_db.list_keyspaces()
        ['default_keyspace', 'staging_keyspace']
        >>> admin_for_my_db.info().status
        'ACTIVE'

    Note:
        creating an instance of AstraDBDatabaseAdmin does not trigger actual creation
        of the database itself, which should exist beforehand. To create databases,
        see the AstraDBAdmin class.

    Note:
        a more powerful token may be required than the one sufficient for working
        in the Database, Collection and Table classes. Check the provided token
        if "Unauthorized" errors are encountered.
    """

    def __init__(
        self,
        *,
        api_endpoint: str,
        api_options: FullAPIOptions,
        spawner_database: Database | AsyncDatabase | None = None,
        spawner_astra_db_admin: AstraDBAdmin | None = None,
    ) -> None:
        # lazy import here to avoid circular dependency
        from astrapy.database import Database

        if api_options.environment not in Environment.astra_db_values:
            raise InvalidEnvironmentException(
                "Environments outside of Astra DB are not supported."
            )

        self.api_options = api_options
        self.api_endpoint = api_endpoint
        parsed_api_endpoint = parse_api_endpoint(self.api_endpoint)
        if parsed_api_endpoint is None:
            msg = api_endpoint_parsing_error_message(self.api_endpoint)
            raise ValueError(msg)
        self._database_id = parsed_api_endpoint.database_id
        self._region = parsed_api_endpoint.region
        if parsed_api_endpoint.environment != self.api_options.environment:
            raise InvalidEnvironmentException(
                "Environment mismatch between client and provided "
                "API endpoint. You can try adding "
                f'`environment="{parsed_api_endpoint.environment}"` '
                "to the class constructor."
            )
        if spawner_database is not None:
            self.spawner_database = spawner_database
        else:
            # leaving the keyspace to its per-environment default
            # (a task for the Database)
            self.spawner_database = Database(
                api_endpoint=self.api_endpoint,
                keyspace=None,
                api_options=self.api_options,
            )

        # API-commander-specific init (for the vectorizeOps invocations)
        # even if Data API, this is admin and must use the Admin additional headers:
        self._commander_headers = {
            DEFAULT_DATA_API_AUTH_HEADER: self.api_options.token.get_token(),
            **self.api_options.admin_additional_headers,
        }
        self._api_commander = self._get_api_commander()

        # DevOps-API-commander specific init (keyspace CRUD, etc)
        self._dev_ops_commander_headers: dict[str, str | None]
        if self.api_options.token:
            _token = self.api_options.token.get_token()
            self._dev_ops_commander_headers = {
                DEFAULT_DEV_OPS_AUTH_HEADER: f"{DEFAULT_DEV_OPS_AUTH_PREFIX}{_token}",
                **self.api_options.admin_additional_headers,
            }
        else:
            self._dev_ops_commander_headers = {
                **self.api_options.admin_additional_headers,
            }
        self._dev_ops_api_commander = self._get_dev_ops_api_commander()

        # this class keeps a reference to the AstraDBAdmin associated to this org:
        if spawner_astra_db_admin is None:
            self._astra_db_admin = AstraDBAdmin(api_options=self.api_options)
        else:
            self._astra_db_admin = spawner_astra_db_admin

    def __repr__(self) -> str:
        parts = [
            f'api_endpoint="{self.api_endpoint}"',
            f"api_options={self.api_options}",
        ]
        return f"{self.__class__.__name__}({', '.join(parts)})"

    def __eq__(self, other: Any) -> bool:
        if isinstance(other, AstraDBDatabaseAdmin):
            return all(
                [
                    self.api_endpoint == other.api_endpoint,
                    self.api_options == other.api_options,
                ]
            )
        else:
            return False

    def _get_api_commander(self) -> APICommander:
        """Instantiate a new APICommander for Data API calls."""
        base_path_components = [
            comp
            for comp in (
                ncomp.strip("/")
                for ncomp in (
                    self.api_options.data_api_url_options.api_path,
                    self.api_options.data_api_url_options.api_version,
                )
                if ncomp is not None
            )
            if comp != ""
        ]
        base_path = f"/{'/'.join(base_path_components)}"
        api_commander = APICommander(
            api_endpoint=self.api_endpoint,
            path=base_path,
            headers=self._commander_headers,
            callers=self.api_options.callers,
            redacted_header_names=self.api_options.redacted_header_names,
        )
        return api_commander

    def _get_dev_ops_api_commander(self) -> APICommander:
        """Instantiate a new APICommander for DevOps calls."""
        base_path_components = [
            comp
            for comp in (
                ncomp.strip("/")
                for ncomp in (
                    self.api_options.dev_ops_api_url_options.dev_ops_api_version,
                    "databases",
                    self._database_id,
                )
                if ncomp is not None
            )
            if comp != ""
        ]
        dev_ops_base_path = "/".join(base_path_components)
        dev_ops_commander = APICommander(
            api_endpoint=self.api_options.dev_ops_api_url_options.dev_ops_url,
            path=dev_ops_base_path,
            headers=self._dev_ops_commander_headers,
            callers=self.api_options.callers,
            dev_ops_api=True,
            redacted_header_names=self.api_options.redacted_header_names,
        )
        return dev_ops_commander

    def _copy(
        self,
        *,
        token: str | TokenProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> AstraDBDatabaseAdmin:
        arg_api_options = APIOptions(
            token=token,
        )
        final_api_options = self.api_options.with_override(api_options).with_override(
            arg_api_options
        )
        return AstraDBDatabaseAdmin(
            api_endpoint=self.api_endpoint,
            api_options=final_api_options,
            spawner_database=self.spawner_database,
            spawner_astra_db_admin=self._astra_db_admin,
        )

    def with_options(
        self,
        *,
        token: str | TokenProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> AstraDBDatabaseAdmin:
        """
        Create a clone of this AstraDBDatabaseAdmin with some changed attributes.

        Args:
            token: an Access Token to the database. Example: `"AstraCS:xyz..."`.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            api_options: any additional options to set for the clone, in the form of
                an APIOptions instance (where one can set just the needed attributes).
                In case the same setting is also provided as named parameter,
                the latter takes precedence.

        Returns:
            a new AstraDBDatabaseAdmin instance.

        Example:
            >>> admin_for_my_other_db = admin_for_my_db.with_options(
            ...     "https://<ID>-<REGION>.apps.astra.datastax.com",
            ... )
        """

        return self._copy(
            token=token,
            api_options=api_options,
        )

    @property
    def id(self) -> str:
        """
        The ID of this database admin.

        Example:
            >>> my_db_admin.id
            '01234567-89ab-cdef-0123-456789abcdef'
        """
        return self._database_id

    @property
    def region(self) -> str:
        """
        The region for this database admin.

        Example:
            >>> my_db_admin.region
            'us-east-1'
        """
        return self._region

    @staticmethod
    def from_astra_db_admin(
        api_endpoint: str,
        *,
        astra_db_admin: AstraDBAdmin,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> AstraDBDatabaseAdmin:
        """
        Create an AstraDBDatabaseAdmin from an AstraDBAdmin and an API Endpoint.

        Args:
            api_endpoint: the API Endpoint for the target database
                (e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`).
                The database must exist already for the resulting object
                to be effectively used; in other words, this invocation
                does not create the database, just the object instance.
            astra_db_admin: an AstraDBAdmin object that has visibility over
                the target database.
            spawn_api_options: a specification - complete or partial - of the
                API Options to override the defaults inherited from the AstraDBAdmin.
                This allows for a deeper configuration of the database, e.g.
                concerning timeouts.

        Returns:
            An AstraDBDatabaseAdmin object, for admin work within the database.

        Example:
            >>> from astrapy import DataAPIClient, AstraDBDatabaseAdmin
            >>> admin_for_my_db = AstraDBDatabaseAdmin.from_astra_db_admin(
            ...     "https://<ID>-<REGION>.apps.astra.datastax.com",
            ...     astra_db_admin=DataAPIClient("AstraCS:...").get_admin(),
            ... )
            >>> admin_for_my_db.list_keyspaces()
            ['default_keyspace', 'staging_keyspace']
            >>> admin_for_my_db.info().status
            'ACTIVE'

        Note:
            Creating an instance of AstraDBDatabaseAdmin does not trigger actual creation
            of the database itself, which should exist beforehand. To create databases,
            see the AstraDBAdmin class.
        """

        return AstraDBDatabaseAdmin(
            api_endpoint=api_endpoint,
            api_options=astra_db_admin.api_options.with_override(spawn_api_options),
            spawner_astra_db_admin=astra_db_admin,
        )

    def info(
        self,
        *,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> AstraDBAdminDatabaseInfo:
        """
        Query the DevOps API for the full info on this database.

        Args:
            database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `database_admin_timeout_ms`.
            timeout_ms: an alias for `database_admin_timeout_ms`.

        Returns:
            An AstraDBAdminDatabaseInfo object.

        Example:
            >>> my_db_info = admin_for_my_db.info()
            >>> my_db_info.status
            'ACTIVE'
            >>> my_db_info.info.region
            'us-east1'
        """

        logger.info(f"getting info ('{self._database_id}')")
        req_response = self._astra_db_admin.database_info(
            id=self._database_id,
            database_admin_timeout_ms=database_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info(f"finished getting info ('{self._database_id}')")
        return req_response

    async def async_info(
        self,
        *,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> AstraDBAdminDatabaseInfo:
        """
        Query the DevOps API for the full info on this database.
        Async version of the method, for use in an asyncio context.

        Args:
            database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `database_admin_timeout_ms`.
            timeout_ms: an alias for `database_admin_timeout_ms`.

        Returns:
            An AstraDBAdminDatabaseInfo object.

        Example:
            >>> async def wait_until_active(db_admin: AstraDBDatabaseAdmin) -> None:
            ...     while True:
            ...         info = await db_admin.async_info()
            ...         if info.status == "ACTIVE":
            ...             return
            ...
            >>> asyncio.run(wait_until_active(admin_for_my_db))
        """

        logger.info(f"getting info ('{self._database_id}'), async")
        req_response = await self._astra_db_admin.async_database_info(
            id=self._database_id,
            database_admin_timeout_ms=database_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info(f"finished getting info ('{self._database_id}'), async")
        return req_response

    def list_keyspaces(
        self,
        *,
        keyspace_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> list[str]:
        """
        Query the DevOps API for a list of the keyspaces in the database.

        Args:
            keyspace_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `keyspace_admin_timeout_ms`.
            timeout_ms: an alias for `keyspace_admin_timeout_ms`.

        Returns:
            A list of the keyspaces, each a string, in no particular order.

        Example:
            >>> admin_for_my_db.list_keyspaces()
            ['default_keyspace', 'staging_keyspace']
        """

        logger.info(f"getting keyspaces ('{self._database_id}')")
        info = self.info(
            database_admin_timeout_ms=keyspace_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info(f"finished getting keyspaces ('{self._database_id}')")
        if info.raw is None:
            raise DevOpsAPIException("Could not get the keyspace list.")
        else:
            return info.raw.get("info", {}).get("keyspaces") or []

    async def async_list_keyspaces(
        self,
        *,
        keyspace_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> list[str]:
        """
        Query the DevOps API for a list of the keyspaces in the database.
        Async version of the method, for use in an asyncio context.

        Args:
            keyspace_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `keyspace_admin_timeout_ms`.
            timeout_ms: an alias for `keyspace_admin_timeout_ms`.

        Returns:
            A list of the keyspaces, each a string, in no particular order.

        Example:
            >>> async def check_if_ks_exists(
            ...     db_admin: AstraDBDatabaseAdmin, keyspace: str
            ... ) -> bool:
            ...     ks_list = await db_admin.async_list_keyspaces()
            ...     return keyspace in ks_list
            ...
            >>> asyncio.run(check_if_ks_exists(admin_for_my_db, "dragons"))
            False
            >>> asyncio.run(check_if_db_exists(admin_for_my_db, "app_keyspace"))
            True
        """

        logger.info(f"getting keyspaces ('{self._database_id}'), async")
        info = await self.async_info(
            database_admin_timeout_ms=keyspace_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info(f"finished getting keyspaces ('{self._database_id}'), async")
        if info.raw is None:
            raise DevOpsAPIException("Could not get the keyspace list.")
        else:
            return info.raw.get("info", {}).get("keyspaces") or []

    def create_keyspace(
        self,
        name: str,
        *,
        wait_until_active: bool = True,
        update_db_keyspace: bool | None = None,
        keyspace_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
        **kwargs: Any,
    ) -> None:
        """
        Create a keyspace in this database as requested,
        optionally waiting for it to be ready.

        Args:
            name: the keyspace name. If supplying a keyspace that exists
                already, the method call proceeds as usual, no errors are
                raised, and the whole invocation is a no-op.
            wait_until_active: if True (default), the method returns only after
                the target database is in ACTIVE state again (a few
                seconds, usually). If False, it will return right after issuing the
                creation request to the DevOps API, and it will be responsibility
                of the caller to check the database status/keyspace availability
                before working with it.
            update_db_keyspace: if True, the `Database` or `AsyncDatabase` class
                that spawned this DatabaseAdmin, if any, gets updated to work on
                the newly-created keyspace starting when this method returns.
            keyspace_admin_timeout_ms: a timeout, in milliseconds, for the whole
                requested operation to complete. This is used only
                if `wait_until_active` is true, i.e. if the method call must
                wait and keep querying the DevOps API for the status of the
                database during keyspace creation.
            request_timeout_ms: a timeout, in milliseconds, for
                each underlying DevOps API HTTP request.
            timeout_ms: an alias for *both* the `request_timeout_ms` and
                `keyspace_admin_timeout_ms` timeout parameters. In practice,
                regardless of `wait_until_active`, this parameter dictates an
                overall timeout on this method call.

        Note: a timeout event is no guarantee at all that the
        creation request has not reached the API server and is not going
        to be, in fact, honored.

        Example:
            >>> my_db_admin.keyspaces()
            ['default_keyspace']
            >>> my_db_admin.create_keyspace("that_other_one")
            >>> my_db_admin.list_keyspaces()
            ['default_keyspace', 'that_other_one']
        """

        _keyspace_admin_timeout_ms, _ka_label = _first_valid_timeout(
            (keyspace_admin_timeout_ms, "keyspace_admin_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (
                self.api_options.timeout_options.keyspace_admin_timeout_ms,
                "keyspace_admin_timeout_ms",
            ),
        )
        _request_timeout_ms, _rt_label = _first_valid_timeout(
            (request_timeout_ms, "request_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
        )
        timeout_manager = MultiCallTimeoutManager(
            overall_timeout_ms=_keyspace_admin_timeout_ms,
            dev_ops_api=True,
            timeout_label=_ka_label,
        )
        logger.info(
            f"creating keyspace '{name}' on " f"'{self._database_id}' (DevOps API)"
        )
        cn_raw_response = self._dev_ops_api_commander.raw_request(
            http_method=HttpMethod.POST,
            additional_path=f"keyspaces/{name}",
            timeout_context=timeout_manager.remaining_timeout(
                cap_time_ms=_request_timeout_ms,
                cap_timeout_label=_rt_label,
            ),
        )
        if cn_raw_response.status_code != DEV_OPS_RESPONSE_HTTP_CREATED:
            raise DevOpsAPIException(
                f"keyspace creation ('{name}') failed: API returned HTTP "
                f"{cn_raw_response.status_code} instead of "
                f"{DEV_OPS_RESPONSE_HTTP_CREATED} - Created."
            )
        logger.info(
            "DevOps API returned from creating keyspace "
            f"'{name}' on '{self._database_id}'"
        )
        if wait_until_active:
            last_status_seen = DEV_OPS_DATABASE_STATUS_MAINTENANCE
            while last_status_seen == DEV_OPS_DATABASE_STATUS_MAINTENANCE:
                logger.info(f"sleeping to poll for status of '{self._database_id}'")
                time.sleep(DEV_OPS_KEYSPACE_POLL_INTERVAL_S)
                last_status_seen = self._astra_db_admin._database_info_ctx(
                    id=self._database_id,
                    timeout_context=timeout_manager.remaining_timeout(
                        cap_time_ms=_request_timeout_ms,
                        cap_timeout_label=_rt_label,
                    ),
                ).status
            if last_status_seen != DEV_OPS_DATABASE_STATUS_ACTIVE:
                raise DevOpsAPIException(
                    f"Database entered unexpected status {last_status_seen} after MAINTENANCE."
                )
            # is the keyspace found?
            if name not in self.list_keyspaces():
                raise DevOpsAPIException("Could not create the keyspace.")
        logger.info(
            f"finished creating keyspace '{name}' on "
            f"'{self._database_id}' (DevOps API)"
        )
        if update_db_keyspace:
            self.spawner_database.use_keyspace(name)

    async def async_create_keyspace(
        self,
        name: str,
        *,
        wait_until_active: bool = True,
        update_db_keyspace: bool | None = None,
        keyspace_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
        **kwargs: Any,
    ) -> None:
        """
        Create a keyspace in this database as requested,
        optionally waiting for it to be ready.
        Async version of the method, for use in an asyncio context.

        Args:
            name: the keyspace name. If supplying a keyspace that exists
                already, the method call proceeds as usual, no errors are
                raised, and the whole invocation is a no-op.
            wait_until_active: if True (default), the method returns only after
                the target database is in ACTIVE state again (a few
                seconds, usually). If False, it will return right after issuing the
                creation request to the DevOps API, and it will be responsibility
                of the caller to check the database status/keyspace availability
                before working with it.
            update_db_keyspace: if True, the `Database` or `AsyncDatabase` class
                that spawned this DatabaseAdmin, if any, gets updated to work on
                the newly-created keyspace starting when this method returns.
            keyspace_admin_timeout_ms: a timeout, in milliseconds, for the whole
                requested operation to complete. This is used only
                if `wait_until_active` is true, i.e. if the method call must
                wait and keep querying the DevOps API for the status of the
                database during keyspace creation.
            request_timeout_ms: a timeout, in milliseconds, for
                each underlying DevOps API HTTP request.
            timeout_ms: an alias for *both* the `request_timeout_ms` and
                `keyspace_admin_timeout_ms` timeout parameters. In practice,
                regardless of `wait_until_active`, this parameter dictates an
                overall timeout on this method call.

        Note: a timeout event is no guarantee at all that the
        creation request has not reached the API server and is not going
        to be, in fact, honored.

        Example:
            >>> asyncio.run(
            ...     my_db_admin.async_create_keyspace("app_keyspace")
            ... )
        """

        _keyspace_admin_timeout_ms, _ka_label = _first_valid_timeout(
            (keyspace_admin_timeout_ms, "keyspace_admin_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (
                self.api_options.timeout_options.keyspace_admin_timeout_ms,
                "keyspace_admin_timeout_ms",
            ),
        )
        _request_timeout_ms, _rt_label = _first_valid_timeout(
            (request_timeout_ms, "request_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
        )
        timeout_manager = MultiCallTimeoutManager(
            overall_timeout_ms=_keyspace_admin_timeout_ms,
            dev_ops_api=True,
            timeout_label=_ka_label,
        )
        logger.info(
            f"creating keyspace '{name}' on "
            f"'{self._database_id}' (DevOps API), async"
        )
        cn_raw_response = await self._dev_ops_api_commander.async_raw_request(
            http_method=HttpMethod.POST,
            additional_path=f"keyspaces/{name}",
            timeout_context=timeout_manager.remaining_timeout(
                cap_time_ms=_request_timeout_ms,
                cap_timeout_label=_rt_label,
            ),
        )
        if cn_raw_response.status_code != DEV_OPS_RESPONSE_HTTP_CREATED:
            raise DevOpsAPIException(
                f"keyspace creation ('{name}') failed: API returned HTTP "
                f"{cn_raw_response.status_code} instead of "
                f"{DEV_OPS_RESPONSE_HTTP_CREATED} - Created."
            )
        logger.info(
            f"DevOps API returned from creating keyspace "
            f"'{name}' on '{self._database_id}', async"
        )
        if wait_until_active:
            last_status_seen = DEV_OPS_DATABASE_STATUS_MAINTENANCE
            while last_status_seen == DEV_OPS_DATABASE_STATUS_MAINTENANCE:
                logger.info(
                    f"sleeping to poll for status of '{self._database_id}', async"
                )
                await asyncio.sleep(DEV_OPS_KEYSPACE_POLL_INTERVAL_S)
                last_db_info = await self._astra_db_admin._async_database_info_ctx(
                    id=self._database_id,
                    timeout_context=timeout_manager.remaining_timeout(
                        cap_time_ms=_request_timeout_ms,
                        cap_timeout_label=_rt_label,
                    ),
                )
                last_status_seen = last_db_info.status
            if last_status_seen != DEV_OPS_DATABASE_STATUS_ACTIVE:
                raise DevOpsAPIException(
                    f"Database entered unexpected status {last_status_seen} after MAINTENANCE."
                )
            # is the keyspace found?
            if name not in await self.async_list_keyspaces():
                raise DevOpsAPIException("Could not create the keyspace.")
        logger.info(
            f"finished creating keyspace '{name}' on "
            f"'{self._database_id}' (DevOps API), async"
        )
        if update_db_keyspace:
            self.spawner_database.use_keyspace(name)

    def drop_keyspace(
        self,
        name: str,
        *,
        wait_until_active: bool = True,
        keyspace_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Delete a keyspace from the database, optionally waiting for the database
        to become active again.

        Args:
            name: the keyspace to delete. If it does not exist in this database,
                an error is raised.
            wait_until_active: if True (default), the method returns only after
                the target database is in ACTIVE state again (a few
                seconds, usually). If False, it will return right after issuing the
                deletion request to the DevOps API, and it will be responsibility
                of the caller to check the database status/keyspace availability
                before working with it.
            keyspace_admin_timeout_ms: a timeout, in milliseconds, for the whole
                requested operation to complete. This is used only
                if `wait_until_active` is true, i.e. if the method call must
                wait and keep querying the DevOps API for the status of the
                database during keyspace deletion.
            request_timeout_ms: a timeout, in milliseconds, for
                each underlying DevOps API HTTP request.
            timeout_ms: an alias for *both* the `request_timeout_ms` and
                `keyspace_admin_timeout_ms` timeout parameters. In practice,
                regardless of `wait_until_active`, this parameter dictates an
                overall timeout on this method call.

        Note: a timeout event is no guarantee at all that the
        deletion request has not reached the API server and is not going
        to be, in fact, honored.

        Example:
            >>> my_db_admin.list_keyspaces()
            ['default_keyspace', 'that_other_one']
            >>> my_db_admin.drop_keyspace("that_other_one")
            >>> my_db_admin.list_keyspaces()
            ['default_keyspace']
        """

        _keyspace_admin_timeout_ms, _ka_label = _first_valid_timeout(
            (keyspace_admin_timeout_ms, "keyspace_admin_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (
                self.api_options.timeout_options.keyspace_admin_timeout_ms,
                "keyspace_admin_timeout_ms",
            ),
        )
        _request_timeout_ms, _rt_label = _first_valid_timeout(
            (request_timeout_ms, "request_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
        )
        timeout_manager = MultiCallTimeoutManager(
            overall_timeout_ms=_keyspace_admin_timeout_ms,
            dev_ops_api=True,
            timeout_label=_ka_label,
        )
        logger.info(
            f"dropping keyspace '{name}' on " f"'{self._database_id}' (DevOps API)"
        )
        dk_raw_response = self._dev_ops_api_commander.raw_request(
            http_method=HttpMethod.DELETE,
            additional_path=f"keyspaces/{name}",
            timeout_context=timeout_manager.remaining_timeout(
                cap_time_ms=_request_timeout_ms,
                cap_timeout_label=_rt_label,
            ),
        )
        if dk_raw_response.status_code != DEV_OPS_RESPONSE_HTTP_ACCEPTED:
            raise DevOpsAPIException(
                f"keyspace deletion ('{id}') failed: API returned HTTP "
                f"{dk_raw_response.status_code} instead of "
                f"{DEV_OPS_RESPONSE_HTTP_ACCEPTED} - Created"
            )
        logger.info(
            "DevOps API returned from dropping keyspace "
            f"'{name}' on '{self._database_id}'"
        )
        if wait_until_active:
            last_status_seen = DEV_OPS_DATABASE_STATUS_MAINTENANCE
            while last_status_seen == DEV_OPS_DATABASE_STATUS_MAINTENANCE:
                logger.info(f"sleeping to poll for status of '{self._database_id}'")
                time.sleep(DEV_OPS_KEYSPACE_POLL_INTERVAL_S)
                last_status_seen = self._astra_db_admin._database_info_ctx(
                    id=self._database_id,
                    timeout_context=timeout_manager.remaining_timeout(
                        cap_time_ms=_request_timeout_ms,
                        cap_timeout_label=_rt_label,
                    ),
                ).status
            if last_status_seen != DEV_OPS_DATABASE_STATUS_ACTIVE:
                raise DevOpsAPIException(
                    f"Database entered unexpected status {last_status_seen} after MAINTENANCE."
                )
            # is the keyspace found?
            if name in self.list_keyspaces():
                raise DevOpsAPIException("Could not drop the keyspace.")
        logger.info(
            f"finished dropping keyspace '{name}' on "
            f"'{self._database_id}' (DevOps API)"
        )

    async def async_drop_keyspace(
        self,
        name: str,
        *,
        wait_until_active: bool = True,
        keyspace_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Delete a keyspace from the database, optionally waiting for the database
        to become active again.
        Async version of the method, for use in an asyncio context.

        Args:
            name: the keyspace to delete. If it does not exist in this database,
                an error is raised.
            wait_until_active: if True (default), the method returns only after
                the target database is in ACTIVE state again (a few
                seconds, usually). If False, it will return right after issuing the
                deletion request to the DevOps API, and it will be responsibility
                of the caller to check the database status/keyspace availability
                before working with it.
            keyspace_admin_timeout_ms: a timeout, in milliseconds, for the whole
                requested operation to complete. This is used only
                if `wait_until_active` is true, i.e. if the method call must
                wait and keep querying the DevOps API for the status of the
                database during keyspace deletion.
            request_timeout_ms: a timeout, in milliseconds, for
                each underlying DevOps API HTTP request.
            timeout_ms: an alias for *both* the `request_timeout_ms` and
                `keyspace_admin_timeout_ms` timeout parameters. In practice,
                regardless of `wait_until_active`, this parameter dictates an
                overall timeout on this method call.

        Note: a timeout event is no guarantee at all that the
        deletion request has not reached the API server and is not going
        to be, in fact, honored.

        Example:
            >>> asyncio.run(
            ...     my_db_admin.async_drop_keyspace("app_keyspace")
            ... )
        """

        _keyspace_admin_timeout_ms, _ka_label = _first_valid_timeout(
            (keyspace_admin_timeout_ms, "keyspace_admin_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (
                self.api_options.timeout_options.keyspace_admin_timeout_ms,
                "keyspace_admin_timeout_ms",
            ),
        )
        _request_timeout_ms, _rt_label = _first_valid_timeout(
            (request_timeout_ms, "request_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
        )
        timeout_manager = MultiCallTimeoutManager(
            overall_timeout_ms=_keyspace_admin_timeout_ms,
            dev_ops_api=True,
            timeout_label=_ka_label,
        )
        logger.info(
            f"dropping keyspace '{name}' on "
            f"'{self._database_id}' (DevOps API), async"
        )
        dk_raw_response = await self._dev_ops_api_commander.async_raw_request(
            http_method=HttpMethod.DELETE,
            additional_path=f"keyspaces/{name}",
            timeout_context=timeout_manager.remaining_timeout(
                cap_time_ms=_request_timeout_ms,
                cap_timeout_label=_rt_label,
            ),
        )
        if dk_raw_response.status_code != DEV_OPS_RESPONSE_HTTP_ACCEPTED:
            raise DevOpsAPIException(
                f"keyspace deletion ('{id}') failed: API returned HTTP "
                f"{dk_raw_response.status_code} instead of "
                f"{DEV_OPS_RESPONSE_HTTP_ACCEPTED} - Created"
            )
        logger.info(
            f"DevOps API returned from dropping keyspace "
            f"'{name}' on '{self._database_id}', async"
        )
        if wait_until_active:
            last_status_seen = DEV_OPS_DATABASE_STATUS_MAINTENANCE
            while last_status_seen == DEV_OPS_DATABASE_STATUS_MAINTENANCE:
                logger.info(
                    f"sleeping to poll for status of '{self._database_id}', async"
                )
                await asyncio.sleep(DEV_OPS_KEYSPACE_POLL_INTERVAL_S)
                last_db_info = await self._astra_db_admin._async_database_info_ctx(
                    id=self._database_id,
                    timeout_context=timeout_manager.remaining_timeout(
                        cap_time_ms=_request_timeout_ms,
                        cap_timeout_label=_rt_label,
                    ),
                )
                last_status_seen = last_db_info.status
            if last_status_seen != DEV_OPS_DATABASE_STATUS_ACTIVE:
                raise DevOpsAPIException(
                    f"Database entered unexpected status {last_status_seen} after MAINTENANCE."
                )
            # is the keyspace found?
            if name in await self.async_list_keyspaces():
                raise DevOpsAPIException("Could not drop the keyspace.")
        logger.info(
            f"finished dropping keyspace '{name}' on "
            f"'{self._database_id}' (DevOps API), async"
        )

    def drop(
        self,
        *,
        wait_until_active: bool = True,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Drop this database, i.e. delete it completely and permanently with all its data.

        This method wraps the `drop_database` method of the AstraDBAdmin class,
        where more information may be found.

        Args:
            wait_until_active: if True (default), the method returns only after
                the database has actually been deleted (generally a few minutes).
                If False, it will return right after issuing the
                drop request to the DevOps API, and it will be responsibility
                of the caller to check the database status/availability
                after that, if desired.
            database_admin_timeout_ms: a timeout, in milliseconds, for the whole
                requested operation to complete. This is used only
                if `wait_until_active` is true, i.e. if the method call must
                wait and keep querying the DevOps API for the status of the
                newly-deleted database.
            request_timeout_ms: a timeout, in milliseconds, for
                each underlying DevOps API HTTP request.
            timeout_ms: an alias for *both* the `request_timeout_ms` and
                `database_admin_timeout_ms` timeout parameters. In practice,
                regardless of `wait_until_active`, this parameter dictates an
                overall timeout on this method call.

        Note: a timeout event is no guarantee at all that the
        deletion request has not reached the API server and is not going
        to be, in fact, honored.

        Example:
            >>> my_db_admin.list_keyspaces()
            ['default_keyspace', 'that_other_one']
            >>> my_db_admin.drop()
            >>> my_db_admin.list_keyspaces()  # raises a 404 Not Found http error

        Note:
            Once the method succeeds, methods on this object -- such as `info()`,
            or `list_keyspaces()` -- can still be invoked: however, this hardly
            makes sense as the underlying actual database is no more.
            It is responsibility of the developer to design a correct flow
            which avoids using a deceased database any further.
        """

        logger.info(f"dropping this database ('{self._database_id}')")
        return self._astra_db_admin.drop_database(
            id=self._database_id,
            wait_until_active=wait_until_active,
            database_admin_timeout_ms=database_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info(f"finished dropping this database ('{self._database_id}')")

    async def async_drop(
        self,
        *,
        wait_until_active: bool = True,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Drop this database, i.e. delete it completely and permanently with all its data.
        Async version of the method, for use in an asyncio context.

        This method wraps the `drop_database` method of the AstraDBAdmin class,
        where more information may be found.

        Args:
            wait_until_active: if True (default), the method returns only after
                the database has actually been deleted (generally a few minutes).
                If False, it will return right after issuing the
                drop request to the DevOps API, and it will be responsibility
                of the caller to check the database status/availability
                after that, if desired.
            database_admin_timeout_ms: a timeout, in milliseconds, for the whole
                requested operation to complete. This is used only
                if `wait_until_active` is true, i.e. if the method call must
                wait and keep querying the DevOps API for the status of the
                newly-deleted database.
            request_timeout_ms: a timeout, in milliseconds, for
                each underlying DevOps API HTTP request.
            timeout_ms: an alias for *both* the `request_timeout_ms` and
                `database_admin_timeout_ms` timeout parameters. In practice,
                regardless of `wait_until_active`, this parameter dictates an
                overall timeout on this method call.

        Note: a timeout event is no guarantee at all that the
        deletion request has not reached the API server and is not going
        to be, in fact, honored.

        Example:
            >>> asyncio.run(my_db_admin.async_drop())

        Note:
            Once the method succeeds, methods on this object -- such as `info()`,
            or `list_keyspaces()` -- can still be invoked: however, this hardly
            makes sense as the underlying actual database is no more.
            It is responsibility of the developer to design a correct flow
            which avoids using a deceased database any further.
        """

        logger.info(f"dropping this database ('{self._database_id}'), async")
        return await self._astra_db_admin.async_drop_database(
            id=self._database_id,
            wait_until_active=wait_until_active,
            database_admin_timeout_ms=database_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info(f"finished dropping this database ('{self._database_id}'), async")

    def get_database(
        self,
        *,
        keyspace: str | None = None,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
        token: str | TokenProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> Database:
        """
        Create a Database instance from this database admin, for data-related tasks.

        Args:
            keyspace: an optional keyspace to set in the resulting Database.
                The same default logic as for `AstraDBAdmin.get_database` applies.
            database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying DevOps API request for 'region', should it be necessary.
                If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `database_admin_timeout_ms`.
            timeout_ms: an alias for `database_admin_timeout_ms`.
            token: if supplied, is passed to the Database instead of
                the one set for this object. Useful if one wants to work in
                a least-privilege manner, limiting the permissions for non-admin work.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            spawn_api_options: a specification - complete or partial - of the
                API Options to override the defaults inherited from the database admin.
                This allows for a deeper configuration of the database admin, e.g.
                concerning timeouts; if this is passed together with
                the named timeout parameters, the latter will take precedence
                in their respective settings.

        Returns:
            A Database object, ready to be used for working with data and collections.

        Example:
            >>> my_db = my_db_admin.get_database()
            >>> my_db.list_collection_names()
            ['movies', 'another_collection']

        Note:
            creating an instance of Database does not trigger actual creation
            of the database itself, which should exist beforehand. To create databases,
            see the AstraDBAdmin class.
        """

        return self._astra_db_admin.get_database(
            api_endpoint=self.api_endpoint,
            token=token,
            keyspace=keyspace,
            database_admin_timeout_ms=database_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
            spawn_api_options=spawn_api_options,
        )

    def get_async_database(
        self,
        *,
        keyspace: str | None = None,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
        token: str | TokenProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncDatabase:
        """
        Create an AsyncDatabase instance from this database admin,
        for data-related tasks.

        Args:
            keyspace: an optional keyspace to set in the resulting AsyncDatabase.
                The same default logic as for `AstraDBAdmin.get_database` applies.
            database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying DevOps API request for 'region', should it be necessary.
                If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `database_admin_timeout_ms`.
            timeout_ms: an alias for `database_admin_timeout_ms`.
            token: if supplied, is passed to the AsyncDatabase instead of
                the one set for this object. Useful if one wants to work in
                a least-privilege manner, limiting the permissions for non-admin work.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            spawn_api_options: a specification - complete or partial - of the
                API Options to override the defaults inherited from the database admin.
                This allows for a deeper configuration of the database admin, e.g.
                concerning timeouts; if this is passed together with
                the named timeout parameters, the latter will take precedence
                in their respective settings.

        Returns:
            An AsyncDatabase object, ready to be used for working with
            data and collections.
        """

        return self._astra_db_admin.get_database(
            api_endpoint=self.api_endpoint,
            token=token,
            keyspace=keyspace,
            database_admin_timeout_ms=database_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
            spawn_api_options=spawn_api_options,
        ).to_async()

    def find_embedding_providers(
        self,
        *,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> FindEmbeddingProvidersResult:
        """
        Query the API for the full information on available embedding providers.

        Args:
            database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `database_admin_timeout_ms`.
            timeout_ms: an alias for `database_admin_timeout_ms`.

        Returns:
            A `FindEmbeddingProvidersResult` object with the complete information
            returned by the API about available embedding providers

        Example (output abridged and indented for clarity):
            >>> admin_for_my_db.find_embedding_providers()
            FindEmbeddingProvidersResult(embedding_providers=..., openai, ...)
            >>> admin_for_my_db.find_embedding_providers().embedding_providers
            {
                'openai': EmbeddingProvider(
                    display_name='OpenAI',
                    models=[
                        EmbeddingProviderModel(name='text-embedding-3-small'),
                        ...
                    ]
                ),
                ...
            }
        """

        _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
            timeout_options=self.api_options.timeout_options,
            database_admin_timeout_ms=database_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info("findEmbeddingProviders")
        fe_response = self._api_commander.request(
            payload={"findEmbeddingProviders": {}},
            timeout_context=_TimeoutContext(
                request_ms=_database_admin_timeout_ms, label=_da_label
            ),
        )
        if "embeddingProviders" not in fe_response.get("status", {}):
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from findEmbeddingProviders API command.",
                raw_response=fe_response,
            )
        else:
            logger.info("finished findEmbeddingProviders")
            return FindEmbeddingProvidersResult._from_dict(fe_response["status"])

    async def async_find_embedding_providers(
        self,
        *,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> FindEmbeddingProvidersResult:
        """
        Query the API for the full information on available embedding providers.
        Async version of the method, for use in an asyncio context.

        Args:
            database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `database_admin_timeout_ms`.
            timeout_ms: an alias for `database_admin_timeout_ms`.

        Returns:
            A `FindEmbeddingProvidersResult` object with the complete information
            returned by the API about available embedding providers

        Example (output abridged and indented for clarity):
            >>> admin_for_my_db.find_embedding_providers()
            FindEmbeddingProvidersResult(embedding_providers=..., openai, ...)
            >>> admin_for_my_db.find_embedding_providers().embedding_providers
            {
                'openai': EmbeddingProvider(
                    display_name='OpenAI',
                    models=[
                        EmbeddingProviderModel(name='text-embedding-3-small'),
                        ...
                    ]
                ),
                ...
            }
        """

        _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
            timeout_options=self.api_options.timeout_options,
            database_admin_timeout_ms=database_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info("findEmbeddingProviders, async")
        fe_response = await self._api_commander.async_request(
            payload={"findEmbeddingProviders": {}},
            timeout_context=_TimeoutContext(
                request_ms=_database_admin_timeout_ms, label=_da_label
            ),
        )
        if "embeddingProviders" not in fe_response.get("status", {}):
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from findEmbeddingProviders API command.",
                raw_response=fe_response,
            )
        else:
            logger.info("finished findEmbeddingProviders, async")
            return FindEmbeddingProvidersResult._from_dict(fe_response["status"])

Ancestors

Static methods

def from_astra_db_admin(api_endpoint: str, *, astra_db_admin: AstraDBAdmin, spawn_api_options: APIOptions | UnsetType = (unset)) ‑> AstraDBDatabaseAdmin

Create an AstraDBDatabaseAdmin from an AstraDBAdmin and an API Endpoint.

Args

api_endpoint
the API Endpoint for the target database (e.g. https://<ID>-<REGION>.apps.astra.datastax.com). The database must exist already for the resulting object to be effectively used; in other words, this invocation does not create the database, just the object instance.
astra_db_admin
an AstraDBAdmin object that has visibility over the target database.
spawn_api_options
a specification - complete or partial - of the API Options to override the defaults inherited from the AstraDBAdmin. This allows for a deeper configuration of the database, e.g. concerning timeouts.

Returns

An AstraDBDatabaseAdmin object, for admin work within the database.

Example

>>> from astrapy import DataAPIClient, AstraDBDatabaseAdmin
>>> admin_for_my_db = AstraDBDatabaseAdmin.from_astra_db_admin(
...     "https://<ID>-<REGION>.apps.astra.datastax.com",
...     astra_db_admin=DataAPIClient("AstraCS:...").get_admin(),
... )
>>> admin_for_my_db.list_keyspaces()
['default_keyspace', 'staging_keyspace']
>>> admin_for_my_db.info().status
'ACTIVE'

Note

Creating an instance of AstraDBDatabaseAdmin does not trigger actual creation of the database itself, which should exist beforehand. To create databases, see the AstraDBAdmin class.

Expand source code
@staticmethod
def from_astra_db_admin(
    api_endpoint: str,
    *,
    astra_db_admin: AstraDBAdmin,
    spawn_api_options: APIOptions | UnsetType = _UNSET,
) -> AstraDBDatabaseAdmin:
    """
    Create an AstraDBDatabaseAdmin from an AstraDBAdmin and an API Endpoint.

    Args:
        api_endpoint: the API Endpoint for the target database
            (e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`).
            The database must exist already for the resulting object
            to be effectively used; in other words, this invocation
            does not create the database, just the object instance.
        astra_db_admin: an AstraDBAdmin object that has visibility over
            the target database.
        spawn_api_options: a specification - complete or partial - of the
            API Options to override the defaults inherited from the AstraDBAdmin.
            This allows for a deeper configuration of the database, e.g.
            concerning timeouts.

    Returns:
        An AstraDBDatabaseAdmin object, for admin work within the database.

    Example:
        >>> from astrapy import DataAPIClient, AstraDBDatabaseAdmin
        >>> admin_for_my_db = AstraDBDatabaseAdmin.from_astra_db_admin(
        ...     "https://<ID>-<REGION>.apps.astra.datastax.com",
        ...     astra_db_admin=DataAPIClient("AstraCS:...").get_admin(),
        ... )
        >>> admin_for_my_db.list_keyspaces()
        ['default_keyspace', 'staging_keyspace']
        >>> admin_for_my_db.info().status
        'ACTIVE'

    Note:
        Creating an instance of AstraDBDatabaseAdmin does not trigger actual creation
        of the database itself, which should exist beforehand. To create databases,
        see the AstraDBAdmin class.
    """

    return AstraDBDatabaseAdmin(
        api_endpoint=api_endpoint,
        api_options=astra_db_admin.api_options.with_override(spawn_api_options),
        spawner_astra_db_admin=astra_db_admin,
    )

Instance variables

var id : str

The ID of this database admin.

Example

>>> my_db_admin.id
'01234567-89ab-cdef-0123-456789abcdef'
Expand source code
@property
def id(self) -> str:
    """
    The ID of this database admin.

    Example:
        >>> my_db_admin.id
        '01234567-89ab-cdef-0123-456789abcdef'
    """
    return self._database_id
var region : str

The region for this database admin.

Example

>>> my_db_admin.region
'us-east-1'
Expand source code
@property
def region(self) -> str:
    """
    The region for this database admin.

    Example:
        >>> my_db_admin.region
        'us-east-1'
    """
    return self._region

Methods

async def async_create_keyspace(self, name: str, *, wait_until_active: bool = True, update_db_keyspace: bool | None = None, keyspace_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None, **kwargs: Any) ‑> None

Create a keyspace in this database as requested, optionally waiting for it to be ready. Async version of the method, for use in an asyncio context.

Args

name
the keyspace name. If supplying a keyspace that exists already, the method call proceeds as usual, no errors are raised, and the whole invocation is a no-op.
wait_until_active
if True (default), the method returns only after the target database is in ACTIVE state again (a few seconds, usually). If False, it will return right after issuing the creation request to the DevOps API, and it will be responsibility of the caller to check the database status/keyspace availability before working with it.
update_db_keyspace
if True, the Database or AsyncDatabase class that spawned this DatabaseAdmin, if any, gets updated to work on the newly-created keyspace starting when this method returns.
keyspace_admin_timeout_ms
a timeout, in milliseconds, for the whole requested operation to complete. This is used only if wait_until_active is true, i.e. if the method call must wait and keep querying the DevOps API for the status of the database during keyspace creation.
request_timeout_ms
a timeout, in milliseconds, for each underlying DevOps API HTTP request.
timeout_ms
an alias for both the request_timeout_ms and keyspace_admin_timeout_ms timeout parameters. In practice, regardless of wait_until_active, this parameter dictates an overall timeout on this method call.

Note: a timeout event is no guarantee at all that the creation request has not reached the API server and is not going to be, in fact, honored.

Example

>>> asyncio.run(
...     my_db_admin.async_create_keyspace("app_keyspace")
... )
Expand source code
async def async_create_keyspace(
    self,
    name: str,
    *,
    wait_until_active: bool = True,
    update_db_keyspace: bool | None = None,
    keyspace_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
    **kwargs: Any,
) -> None:
    """
    Create a keyspace in this database as requested,
    optionally waiting for it to be ready.
    Async version of the method, for use in an asyncio context.

    Args:
        name: the keyspace name. If supplying a keyspace that exists
            already, the method call proceeds as usual, no errors are
            raised, and the whole invocation is a no-op.
        wait_until_active: if True (default), the method returns only after
            the target database is in ACTIVE state again (a few
            seconds, usually). If False, it will return right after issuing the
            creation request to the DevOps API, and it will be responsibility
            of the caller to check the database status/keyspace availability
            before working with it.
        update_db_keyspace: if True, the `Database` or `AsyncDatabase` class
            that spawned this DatabaseAdmin, if any, gets updated to work on
            the newly-created keyspace starting when this method returns.
        keyspace_admin_timeout_ms: a timeout, in milliseconds, for the whole
            requested operation to complete. This is used only
            if `wait_until_active` is true, i.e. if the method call must
            wait and keep querying the DevOps API for the status of the
            database during keyspace creation.
        request_timeout_ms: a timeout, in milliseconds, for
            each underlying DevOps API HTTP request.
        timeout_ms: an alias for *both* the `request_timeout_ms` and
            `keyspace_admin_timeout_ms` timeout parameters. In practice,
            regardless of `wait_until_active`, this parameter dictates an
            overall timeout on this method call.

    Note: a timeout event is no guarantee at all that the
    creation request has not reached the API server and is not going
    to be, in fact, honored.

    Example:
        >>> asyncio.run(
        ...     my_db_admin.async_create_keyspace("app_keyspace")
        ... )
    """

    _keyspace_admin_timeout_ms, _ka_label = _first_valid_timeout(
        (keyspace_admin_timeout_ms, "keyspace_admin_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (
            self.api_options.timeout_options.keyspace_admin_timeout_ms,
            "keyspace_admin_timeout_ms",
        ),
    )
    _request_timeout_ms, _rt_label = _first_valid_timeout(
        (request_timeout_ms, "request_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
    )
    timeout_manager = MultiCallTimeoutManager(
        overall_timeout_ms=_keyspace_admin_timeout_ms,
        dev_ops_api=True,
        timeout_label=_ka_label,
    )
    logger.info(
        f"creating keyspace '{name}' on "
        f"'{self._database_id}' (DevOps API), async"
    )
    cn_raw_response = await self._dev_ops_api_commander.async_raw_request(
        http_method=HttpMethod.POST,
        additional_path=f"keyspaces/{name}",
        timeout_context=timeout_manager.remaining_timeout(
            cap_time_ms=_request_timeout_ms,
            cap_timeout_label=_rt_label,
        ),
    )
    if cn_raw_response.status_code != DEV_OPS_RESPONSE_HTTP_CREATED:
        raise DevOpsAPIException(
            f"keyspace creation ('{name}') failed: API returned HTTP "
            f"{cn_raw_response.status_code} instead of "
            f"{DEV_OPS_RESPONSE_HTTP_CREATED} - Created."
        )
    logger.info(
        f"DevOps API returned from creating keyspace "
        f"'{name}' on '{self._database_id}', async"
    )
    if wait_until_active:
        last_status_seen = DEV_OPS_DATABASE_STATUS_MAINTENANCE
        while last_status_seen == DEV_OPS_DATABASE_STATUS_MAINTENANCE:
            logger.info(
                f"sleeping to poll for status of '{self._database_id}', async"
            )
            await asyncio.sleep(DEV_OPS_KEYSPACE_POLL_INTERVAL_S)
            last_db_info = await self._astra_db_admin._async_database_info_ctx(
                id=self._database_id,
                timeout_context=timeout_manager.remaining_timeout(
                    cap_time_ms=_request_timeout_ms,
                    cap_timeout_label=_rt_label,
                ),
            )
            last_status_seen = last_db_info.status
        if last_status_seen != DEV_OPS_DATABASE_STATUS_ACTIVE:
            raise DevOpsAPIException(
                f"Database entered unexpected status {last_status_seen} after MAINTENANCE."
            )
        # is the keyspace found?
        if name not in await self.async_list_keyspaces():
            raise DevOpsAPIException("Could not create the keyspace.")
    logger.info(
        f"finished creating keyspace '{name}' on "
        f"'{self._database_id}' (DevOps API), async"
    )
    if update_db_keyspace:
        self.spawner_database.use_keyspace(name)
async def async_drop(self, *, wait_until_active: bool = True, database_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> None

Drop this database, i.e. delete it completely and permanently with all its data. Async version of the method, for use in an asyncio context.

This method wraps the drop_database method of the AstraDBAdmin class, where more information may be found.

Args

wait_until_active
if True (default), the method returns only after the database has actually been deleted (generally a few minutes). If False, it will return right after issuing the drop request to the DevOps API, and it will be responsibility of the caller to check the database status/availability after that, if desired.
database_admin_timeout_ms
a timeout, in milliseconds, for the whole requested operation to complete. This is used only if wait_until_active is true, i.e. if the method call must wait and keep querying the DevOps API for the status of the newly-deleted database.
request_timeout_ms
a timeout, in milliseconds, for each underlying DevOps API HTTP request.
timeout_ms
an alias for both the request_timeout_ms and database_admin_timeout_ms timeout parameters. In practice, regardless of wait_until_active, this parameter dictates an overall timeout on this method call.

Note: a timeout event is no guarantee at all that the deletion request has not reached the API server and is not going to be, in fact, honored.

Example

>>> asyncio.run(my_db_admin.async_drop())

Note

Once the method succeeds, methods on this object – such as astrapy.info, or list_keyspaces() – can still be invoked: however, this hardly makes sense as the underlying actual database is no more. It is responsibility of the developer to design a correct flow which avoids using a deceased database any further.

Expand source code
async def async_drop(
    self,
    *,
    wait_until_active: bool = True,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Drop this database, i.e. delete it completely and permanently with all its data.
    Async version of the method, for use in an asyncio context.

    This method wraps the `drop_database` method of the AstraDBAdmin class,
    where more information may be found.

    Args:
        wait_until_active: if True (default), the method returns only after
            the database has actually been deleted (generally a few minutes).
            If False, it will return right after issuing the
            drop request to the DevOps API, and it will be responsibility
            of the caller to check the database status/availability
            after that, if desired.
        database_admin_timeout_ms: a timeout, in milliseconds, for the whole
            requested operation to complete. This is used only
            if `wait_until_active` is true, i.e. if the method call must
            wait and keep querying the DevOps API for the status of the
            newly-deleted database.
        request_timeout_ms: a timeout, in milliseconds, for
            each underlying DevOps API HTTP request.
        timeout_ms: an alias for *both* the `request_timeout_ms` and
            `database_admin_timeout_ms` timeout parameters. In practice,
            regardless of `wait_until_active`, this parameter dictates an
            overall timeout on this method call.

    Note: a timeout event is no guarantee at all that the
    deletion request has not reached the API server and is not going
    to be, in fact, honored.

    Example:
        >>> asyncio.run(my_db_admin.async_drop())

    Note:
        Once the method succeeds, methods on this object -- such as `info()`,
        or `list_keyspaces()` -- can still be invoked: however, this hardly
        makes sense as the underlying actual database is no more.
        It is responsibility of the developer to design a correct flow
        which avoids using a deceased database any further.
    """

    logger.info(f"dropping this database ('{self._database_id}'), async")
    return await self._astra_db_admin.async_drop_database(
        id=self._database_id,
        wait_until_active=wait_until_active,
        database_admin_timeout_ms=database_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info(f"finished dropping this database ('{self._database_id}'), async")
async def async_drop_keyspace(self, name: str, *, wait_until_active: bool = True, keyspace_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> None

Delete a keyspace from the database, optionally waiting for the database to become active again. Async version of the method, for use in an asyncio context.

Args

name
the keyspace to delete. If it does not exist in this database, an error is raised.
wait_until_active
if True (default), the method returns only after the target database is in ACTIVE state again (a few seconds, usually). If False, it will return right after issuing the deletion request to the DevOps API, and it will be responsibility of the caller to check the database status/keyspace availability before working with it.
keyspace_admin_timeout_ms
a timeout, in milliseconds, for the whole requested operation to complete. This is used only if wait_until_active is true, i.e. if the method call must wait and keep querying the DevOps API for the status of the database during keyspace deletion.
request_timeout_ms
a timeout, in milliseconds, for each underlying DevOps API HTTP request.
timeout_ms
an alias for both the request_timeout_ms and keyspace_admin_timeout_ms timeout parameters. In practice, regardless of wait_until_active, this parameter dictates an overall timeout on this method call.

Note: a timeout event is no guarantee at all that the deletion request has not reached the API server and is not going to be, in fact, honored.

Example

>>> asyncio.run(
...     my_db_admin.async_drop_keyspace("app_keyspace")
... )
Expand source code
async def async_drop_keyspace(
    self,
    name: str,
    *,
    wait_until_active: bool = True,
    keyspace_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Delete a keyspace from the database, optionally waiting for the database
    to become active again.
    Async version of the method, for use in an asyncio context.

    Args:
        name: the keyspace to delete. If it does not exist in this database,
            an error is raised.
        wait_until_active: if True (default), the method returns only after
            the target database is in ACTIVE state again (a few
            seconds, usually). If False, it will return right after issuing the
            deletion request to the DevOps API, and it will be responsibility
            of the caller to check the database status/keyspace availability
            before working with it.
        keyspace_admin_timeout_ms: a timeout, in milliseconds, for the whole
            requested operation to complete. This is used only
            if `wait_until_active` is true, i.e. if the method call must
            wait and keep querying the DevOps API for the status of the
            database during keyspace deletion.
        request_timeout_ms: a timeout, in milliseconds, for
            each underlying DevOps API HTTP request.
        timeout_ms: an alias for *both* the `request_timeout_ms` and
            `keyspace_admin_timeout_ms` timeout parameters. In practice,
            regardless of `wait_until_active`, this parameter dictates an
            overall timeout on this method call.

    Note: a timeout event is no guarantee at all that the
    deletion request has not reached the API server and is not going
    to be, in fact, honored.

    Example:
        >>> asyncio.run(
        ...     my_db_admin.async_drop_keyspace("app_keyspace")
        ... )
    """

    _keyspace_admin_timeout_ms, _ka_label = _first_valid_timeout(
        (keyspace_admin_timeout_ms, "keyspace_admin_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (
            self.api_options.timeout_options.keyspace_admin_timeout_ms,
            "keyspace_admin_timeout_ms",
        ),
    )
    _request_timeout_ms, _rt_label = _first_valid_timeout(
        (request_timeout_ms, "request_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
    )
    timeout_manager = MultiCallTimeoutManager(
        overall_timeout_ms=_keyspace_admin_timeout_ms,
        dev_ops_api=True,
        timeout_label=_ka_label,
    )
    logger.info(
        f"dropping keyspace '{name}' on "
        f"'{self._database_id}' (DevOps API), async"
    )
    dk_raw_response = await self._dev_ops_api_commander.async_raw_request(
        http_method=HttpMethod.DELETE,
        additional_path=f"keyspaces/{name}",
        timeout_context=timeout_manager.remaining_timeout(
            cap_time_ms=_request_timeout_ms,
            cap_timeout_label=_rt_label,
        ),
    )
    if dk_raw_response.status_code != DEV_OPS_RESPONSE_HTTP_ACCEPTED:
        raise DevOpsAPIException(
            f"keyspace deletion ('{id}') failed: API returned HTTP "
            f"{dk_raw_response.status_code} instead of "
            f"{DEV_OPS_RESPONSE_HTTP_ACCEPTED} - Created"
        )
    logger.info(
        f"DevOps API returned from dropping keyspace "
        f"'{name}' on '{self._database_id}', async"
    )
    if wait_until_active:
        last_status_seen = DEV_OPS_DATABASE_STATUS_MAINTENANCE
        while last_status_seen == DEV_OPS_DATABASE_STATUS_MAINTENANCE:
            logger.info(
                f"sleeping to poll for status of '{self._database_id}', async"
            )
            await asyncio.sleep(DEV_OPS_KEYSPACE_POLL_INTERVAL_S)
            last_db_info = await self._astra_db_admin._async_database_info_ctx(
                id=self._database_id,
                timeout_context=timeout_manager.remaining_timeout(
                    cap_time_ms=_request_timeout_ms,
                    cap_timeout_label=_rt_label,
                ),
            )
            last_status_seen = last_db_info.status
        if last_status_seen != DEV_OPS_DATABASE_STATUS_ACTIVE:
            raise DevOpsAPIException(
                f"Database entered unexpected status {last_status_seen} after MAINTENANCE."
            )
        # is the keyspace found?
        if name in await self.async_list_keyspaces():
            raise DevOpsAPIException("Could not drop the keyspace.")
    logger.info(
        f"finished dropping keyspace '{name}' on "
        f"'{self._database_id}' (DevOps API), async"
    )
async def async_find_embedding_providers(self, *, database_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> FindEmbeddingProvidersResult

Query the API for the full information on available embedding providers. Async version of the method, for use in an asyncio context.

Args

database_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for database_admin_timeout_ms.
timeout_ms
an alias for database_admin_timeout_ms.

Returns

A FindEmbeddingProvidersResult object with the complete information returned by the API about available embedding providers Example (output abridged and indented for clarity): >>> admin_for_my_db.find_embedding_providers() FindEmbeddingProvidersResult(embedding_providers=…, openai, …) >>> admin_for_my_db.find_embedding_providers().embedding_providers { 'openai': EmbeddingProvider( display_name='OpenAI', models=[ EmbeddingProviderModel(name='text-embedding-3-small'), … ] ), … }

Expand source code
async def async_find_embedding_providers(
    self,
    *,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> FindEmbeddingProvidersResult:
    """
    Query the API for the full information on available embedding providers.
    Async version of the method, for use in an asyncio context.

    Args:
        database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `database_admin_timeout_ms`.
        timeout_ms: an alias for `database_admin_timeout_ms`.

    Returns:
        A `FindEmbeddingProvidersResult` object with the complete information
        returned by the API about available embedding providers

    Example (output abridged and indented for clarity):
        >>> admin_for_my_db.find_embedding_providers()
        FindEmbeddingProvidersResult(embedding_providers=..., openai, ...)
        >>> admin_for_my_db.find_embedding_providers().embedding_providers
        {
            'openai': EmbeddingProvider(
                display_name='OpenAI',
                models=[
                    EmbeddingProviderModel(name='text-embedding-3-small'),
                    ...
                ]
            ),
            ...
        }
    """

    _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
        timeout_options=self.api_options.timeout_options,
        database_admin_timeout_ms=database_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info("findEmbeddingProviders, async")
    fe_response = await self._api_commander.async_request(
        payload={"findEmbeddingProviders": {}},
        timeout_context=_TimeoutContext(
            request_ms=_database_admin_timeout_ms, label=_da_label
        ),
    )
    if "embeddingProviders" not in fe_response.get("status", {}):
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from findEmbeddingProviders API command.",
            raw_response=fe_response,
        )
    else:
        logger.info("finished findEmbeddingProviders, async")
        return FindEmbeddingProvidersResult._from_dict(fe_response["status"])
async def async_info(self, *, database_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> AstraDBAdminDatabaseInfo

Query the DevOps API for the full info on this database. Async version of the method, for use in an asyncio context.

Args

database_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for database_admin_timeout_ms.
timeout_ms
an alias for database_admin_timeout_ms.

Returns

An AstraDBAdminDatabaseInfo object.

Example

>>> async def wait_until_active(db_admin: AstraDBDatabaseAdmin) -> None:
...     while True:
...         info = await db_admin.async_info()
...         if info.status == "ACTIVE":
...             return
...
>>> asyncio.run(wait_until_active(admin_for_my_db))
Expand source code
async def async_info(
    self,
    *,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> AstraDBAdminDatabaseInfo:
    """
    Query the DevOps API for the full info on this database.
    Async version of the method, for use in an asyncio context.

    Args:
        database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `database_admin_timeout_ms`.
        timeout_ms: an alias for `database_admin_timeout_ms`.

    Returns:
        An AstraDBAdminDatabaseInfo object.

    Example:
        >>> async def wait_until_active(db_admin: AstraDBDatabaseAdmin) -> None:
        ...     while True:
        ...         info = await db_admin.async_info()
        ...         if info.status == "ACTIVE":
        ...             return
        ...
        >>> asyncio.run(wait_until_active(admin_for_my_db))
    """

    logger.info(f"getting info ('{self._database_id}'), async")
    req_response = await self._astra_db_admin.async_database_info(
        id=self._database_id,
        database_admin_timeout_ms=database_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info(f"finished getting info ('{self._database_id}'), async")
    return req_response
async def async_list_keyspaces(self, *, keyspace_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> list[str]

Query the DevOps API for a list of the keyspaces in the database. Async version of the method, for use in an asyncio context.

Args

keyspace_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for keyspace_admin_timeout_ms.
timeout_ms
an alias for keyspace_admin_timeout_ms.

Returns

A list of the keyspaces, each a string, in no particular order.

Example

>>> async def check_if_ks_exists(
...     db_admin: AstraDBDatabaseAdmin, keyspace: str
... ) -> bool:
...     ks_list = await db_admin.async_list_keyspaces()
...     return keyspace in ks_list
...
>>> asyncio.run(check_if_ks_exists(admin_for_my_db, "dragons"))
False
>>> asyncio.run(check_if_db_exists(admin_for_my_db, "app_keyspace"))
True
Expand source code
async def async_list_keyspaces(
    self,
    *,
    keyspace_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> list[str]:
    """
    Query the DevOps API for a list of the keyspaces in the database.
    Async version of the method, for use in an asyncio context.

    Args:
        keyspace_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `keyspace_admin_timeout_ms`.
        timeout_ms: an alias for `keyspace_admin_timeout_ms`.

    Returns:
        A list of the keyspaces, each a string, in no particular order.

    Example:
        >>> async def check_if_ks_exists(
        ...     db_admin: AstraDBDatabaseAdmin, keyspace: str
        ... ) -> bool:
        ...     ks_list = await db_admin.async_list_keyspaces()
        ...     return keyspace in ks_list
        ...
        >>> asyncio.run(check_if_ks_exists(admin_for_my_db, "dragons"))
        False
        >>> asyncio.run(check_if_db_exists(admin_for_my_db, "app_keyspace"))
        True
    """

    logger.info(f"getting keyspaces ('{self._database_id}'), async")
    info = await self.async_info(
        database_admin_timeout_ms=keyspace_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info(f"finished getting keyspaces ('{self._database_id}'), async")
    if info.raw is None:
        raise DevOpsAPIException("Could not get the keyspace list.")
    else:
        return info.raw.get("info", {}).get("keyspaces") or []
def create_keyspace(self, name: str, *, wait_until_active: bool = True, update_db_keyspace: bool | None = None, keyspace_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None, **kwargs: Any) ‑> None

Create a keyspace in this database as requested, optionally waiting for it to be ready.

Args

name
the keyspace name. If supplying a keyspace that exists already, the method call proceeds as usual, no errors are raised, and the whole invocation is a no-op.
wait_until_active
if True (default), the method returns only after the target database is in ACTIVE state again (a few seconds, usually). If False, it will return right after issuing the creation request to the DevOps API, and it will be responsibility of the caller to check the database status/keyspace availability before working with it.
update_db_keyspace
if True, the Database or AsyncDatabase class that spawned this DatabaseAdmin, if any, gets updated to work on the newly-created keyspace starting when this method returns.
keyspace_admin_timeout_ms
a timeout, in milliseconds, for the whole requested operation to complete. This is used only if wait_until_active is true, i.e. if the method call must wait and keep querying the DevOps API for the status of the database during keyspace creation.
request_timeout_ms
a timeout, in milliseconds, for each underlying DevOps API HTTP request.
timeout_ms
an alias for both the request_timeout_ms and keyspace_admin_timeout_ms timeout parameters. In practice, regardless of wait_until_active, this parameter dictates an overall timeout on this method call.

Note: a timeout event is no guarantee at all that the creation request has not reached the API server and is not going to be, in fact, honored.

Example

>>> my_db_admin.keyspaces()
['default_keyspace']
>>> my_db_admin.create_keyspace("that_other_one")
>>> my_db_admin.list_keyspaces()
['default_keyspace', 'that_other_one']
Expand source code
def create_keyspace(
    self,
    name: str,
    *,
    wait_until_active: bool = True,
    update_db_keyspace: bool | None = None,
    keyspace_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
    **kwargs: Any,
) -> None:
    """
    Create a keyspace in this database as requested,
    optionally waiting for it to be ready.

    Args:
        name: the keyspace name. If supplying a keyspace that exists
            already, the method call proceeds as usual, no errors are
            raised, and the whole invocation is a no-op.
        wait_until_active: if True (default), the method returns only after
            the target database is in ACTIVE state again (a few
            seconds, usually). If False, it will return right after issuing the
            creation request to the DevOps API, and it will be responsibility
            of the caller to check the database status/keyspace availability
            before working with it.
        update_db_keyspace: if True, the `Database` or `AsyncDatabase` class
            that spawned this DatabaseAdmin, if any, gets updated to work on
            the newly-created keyspace starting when this method returns.
        keyspace_admin_timeout_ms: a timeout, in milliseconds, for the whole
            requested operation to complete. This is used only
            if `wait_until_active` is true, i.e. if the method call must
            wait and keep querying the DevOps API for the status of the
            database during keyspace creation.
        request_timeout_ms: a timeout, in milliseconds, for
            each underlying DevOps API HTTP request.
        timeout_ms: an alias for *both* the `request_timeout_ms` and
            `keyspace_admin_timeout_ms` timeout parameters. In practice,
            regardless of `wait_until_active`, this parameter dictates an
            overall timeout on this method call.

    Note: a timeout event is no guarantee at all that the
    creation request has not reached the API server and is not going
    to be, in fact, honored.

    Example:
        >>> my_db_admin.keyspaces()
        ['default_keyspace']
        >>> my_db_admin.create_keyspace("that_other_one")
        >>> my_db_admin.list_keyspaces()
        ['default_keyspace', 'that_other_one']
    """

    _keyspace_admin_timeout_ms, _ka_label = _first_valid_timeout(
        (keyspace_admin_timeout_ms, "keyspace_admin_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (
            self.api_options.timeout_options.keyspace_admin_timeout_ms,
            "keyspace_admin_timeout_ms",
        ),
    )
    _request_timeout_ms, _rt_label = _first_valid_timeout(
        (request_timeout_ms, "request_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
    )
    timeout_manager = MultiCallTimeoutManager(
        overall_timeout_ms=_keyspace_admin_timeout_ms,
        dev_ops_api=True,
        timeout_label=_ka_label,
    )
    logger.info(
        f"creating keyspace '{name}' on " f"'{self._database_id}' (DevOps API)"
    )
    cn_raw_response = self._dev_ops_api_commander.raw_request(
        http_method=HttpMethod.POST,
        additional_path=f"keyspaces/{name}",
        timeout_context=timeout_manager.remaining_timeout(
            cap_time_ms=_request_timeout_ms,
            cap_timeout_label=_rt_label,
        ),
    )
    if cn_raw_response.status_code != DEV_OPS_RESPONSE_HTTP_CREATED:
        raise DevOpsAPIException(
            f"keyspace creation ('{name}') failed: API returned HTTP "
            f"{cn_raw_response.status_code} instead of "
            f"{DEV_OPS_RESPONSE_HTTP_CREATED} - Created."
        )
    logger.info(
        "DevOps API returned from creating keyspace "
        f"'{name}' on '{self._database_id}'"
    )
    if wait_until_active:
        last_status_seen = DEV_OPS_DATABASE_STATUS_MAINTENANCE
        while last_status_seen == DEV_OPS_DATABASE_STATUS_MAINTENANCE:
            logger.info(f"sleeping to poll for status of '{self._database_id}'")
            time.sleep(DEV_OPS_KEYSPACE_POLL_INTERVAL_S)
            last_status_seen = self._astra_db_admin._database_info_ctx(
                id=self._database_id,
                timeout_context=timeout_manager.remaining_timeout(
                    cap_time_ms=_request_timeout_ms,
                    cap_timeout_label=_rt_label,
                ),
            ).status
        if last_status_seen != DEV_OPS_DATABASE_STATUS_ACTIVE:
            raise DevOpsAPIException(
                f"Database entered unexpected status {last_status_seen} after MAINTENANCE."
            )
        # is the keyspace found?
        if name not in self.list_keyspaces():
            raise DevOpsAPIException("Could not create the keyspace.")
    logger.info(
        f"finished creating keyspace '{name}' on "
        f"'{self._database_id}' (DevOps API)"
    )
    if update_db_keyspace:
        self.spawner_database.use_keyspace(name)
def drop(self, *, wait_until_active: bool = True, database_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> None

Drop this database, i.e. delete it completely and permanently with all its data.

This method wraps the drop_database method of the AstraDBAdmin class, where more information may be found.

Args

wait_until_active
if True (default), the method returns only after the database has actually been deleted (generally a few minutes). If False, it will return right after issuing the drop request to the DevOps API, and it will be responsibility of the caller to check the database status/availability after that, if desired.
database_admin_timeout_ms
a timeout, in milliseconds, for the whole requested operation to complete. This is used only if wait_until_active is true, i.e. if the method call must wait and keep querying the DevOps API for the status of the newly-deleted database.
request_timeout_ms
a timeout, in milliseconds, for each underlying DevOps API HTTP request.
timeout_ms
an alias for both the request_timeout_ms and database_admin_timeout_ms timeout parameters. In practice, regardless of wait_until_active, this parameter dictates an overall timeout on this method call.

Note: a timeout event is no guarantee at all that the deletion request has not reached the API server and is not going to be, in fact, honored.

Example

>>> my_db_admin.list_keyspaces()
['default_keyspace', 'that_other_one']
>>> my_db_admin.drop()
>>> my_db_admin.list_keyspaces()  # raises a 404 Not Found http error

Note

Once the method succeeds, methods on this object – such as astrapy.info, or list_keyspaces() – can still be invoked: however, this hardly makes sense as the underlying actual database is no more. It is responsibility of the developer to design a correct flow which avoids using a deceased database any further.

Expand source code
def drop(
    self,
    *,
    wait_until_active: bool = True,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Drop this database, i.e. delete it completely and permanently with all its data.

    This method wraps the `drop_database` method of the AstraDBAdmin class,
    where more information may be found.

    Args:
        wait_until_active: if True (default), the method returns only after
            the database has actually been deleted (generally a few minutes).
            If False, it will return right after issuing the
            drop request to the DevOps API, and it will be responsibility
            of the caller to check the database status/availability
            after that, if desired.
        database_admin_timeout_ms: a timeout, in milliseconds, for the whole
            requested operation to complete. This is used only
            if `wait_until_active` is true, i.e. if the method call must
            wait and keep querying the DevOps API for the status of the
            newly-deleted database.
        request_timeout_ms: a timeout, in milliseconds, for
            each underlying DevOps API HTTP request.
        timeout_ms: an alias for *both* the `request_timeout_ms` and
            `database_admin_timeout_ms` timeout parameters. In practice,
            regardless of `wait_until_active`, this parameter dictates an
            overall timeout on this method call.

    Note: a timeout event is no guarantee at all that the
    deletion request has not reached the API server and is not going
    to be, in fact, honored.

    Example:
        >>> my_db_admin.list_keyspaces()
        ['default_keyspace', 'that_other_one']
        >>> my_db_admin.drop()
        >>> my_db_admin.list_keyspaces()  # raises a 404 Not Found http error

    Note:
        Once the method succeeds, methods on this object -- such as `info()`,
        or `list_keyspaces()` -- can still be invoked: however, this hardly
        makes sense as the underlying actual database is no more.
        It is responsibility of the developer to design a correct flow
        which avoids using a deceased database any further.
    """

    logger.info(f"dropping this database ('{self._database_id}')")
    return self._astra_db_admin.drop_database(
        id=self._database_id,
        wait_until_active=wait_until_active,
        database_admin_timeout_ms=database_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info(f"finished dropping this database ('{self._database_id}')")
def drop_keyspace(self, name: str, *, wait_until_active: bool = True, keyspace_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> None

Delete a keyspace from the database, optionally waiting for the database to become active again.

Args

name
the keyspace to delete. If it does not exist in this database, an error is raised.
wait_until_active
if True (default), the method returns only after the target database is in ACTIVE state again (a few seconds, usually). If False, it will return right after issuing the deletion request to the DevOps API, and it will be responsibility of the caller to check the database status/keyspace availability before working with it.
keyspace_admin_timeout_ms
a timeout, in milliseconds, for the whole requested operation to complete. This is used only if wait_until_active is true, i.e. if the method call must wait and keep querying the DevOps API for the status of the database during keyspace deletion.
request_timeout_ms
a timeout, in milliseconds, for each underlying DevOps API HTTP request.
timeout_ms
an alias for both the request_timeout_ms and keyspace_admin_timeout_ms timeout parameters. In practice, regardless of wait_until_active, this parameter dictates an overall timeout on this method call.

Note: a timeout event is no guarantee at all that the deletion request has not reached the API server and is not going to be, in fact, honored.

Example

>>> my_db_admin.list_keyspaces()
['default_keyspace', 'that_other_one']
>>> my_db_admin.drop_keyspace("that_other_one")
>>> my_db_admin.list_keyspaces()
['default_keyspace']
Expand source code
def drop_keyspace(
    self,
    name: str,
    *,
    wait_until_active: bool = True,
    keyspace_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Delete a keyspace from the database, optionally waiting for the database
    to become active again.

    Args:
        name: the keyspace to delete. If it does not exist in this database,
            an error is raised.
        wait_until_active: if True (default), the method returns only after
            the target database is in ACTIVE state again (a few
            seconds, usually). If False, it will return right after issuing the
            deletion request to the DevOps API, and it will be responsibility
            of the caller to check the database status/keyspace availability
            before working with it.
        keyspace_admin_timeout_ms: a timeout, in milliseconds, for the whole
            requested operation to complete. This is used only
            if `wait_until_active` is true, i.e. if the method call must
            wait and keep querying the DevOps API for the status of the
            database during keyspace deletion.
        request_timeout_ms: a timeout, in milliseconds, for
            each underlying DevOps API HTTP request.
        timeout_ms: an alias for *both* the `request_timeout_ms` and
            `keyspace_admin_timeout_ms` timeout parameters. In practice,
            regardless of `wait_until_active`, this parameter dictates an
            overall timeout on this method call.

    Note: a timeout event is no guarantee at all that the
    deletion request has not reached the API server and is not going
    to be, in fact, honored.

    Example:
        >>> my_db_admin.list_keyspaces()
        ['default_keyspace', 'that_other_one']
        >>> my_db_admin.drop_keyspace("that_other_one")
        >>> my_db_admin.list_keyspaces()
        ['default_keyspace']
    """

    _keyspace_admin_timeout_ms, _ka_label = _first_valid_timeout(
        (keyspace_admin_timeout_ms, "keyspace_admin_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (
            self.api_options.timeout_options.keyspace_admin_timeout_ms,
            "keyspace_admin_timeout_ms",
        ),
    )
    _request_timeout_ms, _rt_label = _first_valid_timeout(
        (request_timeout_ms, "request_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
    )
    timeout_manager = MultiCallTimeoutManager(
        overall_timeout_ms=_keyspace_admin_timeout_ms,
        dev_ops_api=True,
        timeout_label=_ka_label,
    )
    logger.info(
        f"dropping keyspace '{name}' on " f"'{self._database_id}' (DevOps API)"
    )
    dk_raw_response = self._dev_ops_api_commander.raw_request(
        http_method=HttpMethod.DELETE,
        additional_path=f"keyspaces/{name}",
        timeout_context=timeout_manager.remaining_timeout(
            cap_time_ms=_request_timeout_ms,
            cap_timeout_label=_rt_label,
        ),
    )
    if dk_raw_response.status_code != DEV_OPS_RESPONSE_HTTP_ACCEPTED:
        raise DevOpsAPIException(
            f"keyspace deletion ('{id}') failed: API returned HTTP "
            f"{dk_raw_response.status_code} instead of "
            f"{DEV_OPS_RESPONSE_HTTP_ACCEPTED} - Created"
        )
    logger.info(
        "DevOps API returned from dropping keyspace "
        f"'{name}' on '{self._database_id}'"
    )
    if wait_until_active:
        last_status_seen = DEV_OPS_DATABASE_STATUS_MAINTENANCE
        while last_status_seen == DEV_OPS_DATABASE_STATUS_MAINTENANCE:
            logger.info(f"sleeping to poll for status of '{self._database_id}'")
            time.sleep(DEV_OPS_KEYSPACE_POLL_INTERVAL_S)
            last_status_seen = self._astra_db_admin._database_info_ctx(
                id=self._database_id,
                timeout_context=timeout_manager.remaining_timeout(
                    cap_time_ms=_request_timeout_ms,
                    cap_timeout_label=_rt_label,
                ),
            ).status
        if last_status_seen != DEV_OPS_DATABASE_STATUS_ACTIVE:
            raise DevOpsAPIException(
                f"Database entered unexpected status {last_status_seen} after MAINTENANCE."
            )
        # is the keyspace found?
        if name in self.list_keyspaces():
            raise DevOpsAPIException("Could not drop the keyspace.")
    logger.info(
        f"finished dropping keyspace '{name}' on "
        f"'{self._database_id}' (DevOps API)"
    )
def find_embedding_providers(self, *, database_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> FindEmbeddingProvidersResult

Query the API for the full information on available embedding providers.

Args

database_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for database_admin_timeout_ms.
timeout_ms
an alias for database_admin_timeout_ms.

Returns

A FindEmbeddingProvidersResult object with the complete information returned by the API about available embedding providers Example (output abridged and indented for clarity): >>> admin_for_my_db.find_embedding_providers() FindEmbeddingProvidersResult(embedding_providers=…, openai, …) >>> admin_for_my_db.find_embedding_providers().embedding_providers { 'openai': EmbeddingProvider( display_name='OpenAI', models=[ EmbeddingProviderModel(name='text-embedding-3-small'), … ] ), … }

Expand source code
def find_embedding_providers(
    self,
    *,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> FindEmbeddingProvidersResult:
    """
    Query the API for the full information on available embedding providers.

    Args:
        database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `database_admin_timeout_ms`.
        timeout_ms: an alias for `database_admin_timeout_ms`.

    Returns:
        A `FindEmbeddingProvidersResult` object with the complete information
        returned by the API about available embedding providers

    Example (output abridged and indented for clarity):
        >>> admin_for_my_db.find_embedding_providers()
        FindEmbeddingProvidersResult(embedding_providers=..., openai, ...)
        >>> admin_for_my_db.find_embedding_providers().embedding_providers
        {
            'openai': EmbeddingProvider(
                display_name='OpenAI',
                models=[
                    EmbeddingProviderModel(name='text-embedding-3-small'),
                    ...
                ]
            ),
            ...
        }
    """

    _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
        timeout_options=self.api_options.timeout_options,
        database_admin_timeout_ms=database_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info("findEmbeddingProviders")
    fe_response = self._api_commander.request(
        payload={"findEmbeddingProviders": {}},
        timeout_context=_TimeoutContext(
            request_ms=_database_admin_timeout_ms, label=_da_label
        ),
    )
    if "embeddingProviders" not in fe_response.get("status", {}):
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from findEmbeddingProviders API command.",
            raw_response=fe_response,
        )
    else:
        logger.info("finished findEmbeddingProviders")
        return FindEmbeddingProvidersResult._from_dict(fe_response["status"])
def get_async_database(self, *, keyspace: str | None = None, database_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None, token: str | TokenProvider | UnsetType = (unset), spawn_api_options: APIOptions | UnsetType = (unset)) ‑> AsyncDatabase

Create an AsyncDatabase instance from this database admin, for data-related tasks.

Args

keyspace
an optional keyspace to set in the resulting AsyncDatabase. The same default logic as for AstraDBAdmin.get_database() applies.
database_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying DevOps API request for 'region', should it be necessary. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for database_admin_timeout_ms.
timeout_ms
an alias for database_admin_timeout_ms.
token
if supplied, is passed to the AsyncDatabase instead of the one set for this object. Useful if one wants to work in a least-privilege manner, limiting the permissions for non-admin work. This can be either a literal token string or a subclass of TokenProvider.
spawn_api_options
a specification - complete or partial - of the API Options to override the defaults inherited from the database admin. This allows for a deeper configuration of the database admin, e.g. concerning timeouts; if this is passed together with the named timeout parameters, the latter will take precedence in their respective settings.

Returns

An AsyncDatabase object, ready to be used for working with data and collections.

Expand source code
def get_async_database(
    self,
    *,
    keyspace: str | None = None,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
    token: str | TokenProvider | UnsetType = _UNSET,
    spawn_api_options: APIOptions | UnsetType = _UNSET,
) -> AsyncDatabase:
    """
    Create an AsyncDatabase instance from this database admin,
    for data-related tasks.

    Args:
        keyspace: an optional keyspace to set in the resulting AsyncDatabase.
            The same default logic as for `AstraDBAdmin.get_database` applies.
        database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying DevOps API request for 'region', should it be necessary.
            If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `database_admin_timeout_ms`.
        timeout_ms: an alias for `database_admin_timeout_ms`.
        token: if supplied, is passed to the AsyncDatabase instead of
            the one set for this object. Useful if one wants to work in
            a least-privilege manner, limiting the permissions for non-admin work.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        spawn_api_options: a specification - complete or partial - of the
            API Options to override the defaults inherited from the database admin.
            This allows for a deeper configuration of the database admin, e.g.
            concerning timeouts; if this is passed together with
            the named timeout parameters, the latter will take precedence
            in their respective settings.

    Returns:
        An AsyncDatabase object, ready to be used for working with
        data and collections.
    """

    return self._astra_db_admin.get_database(
        api_endpoint=self.api_endpoint,
        token=token,
        keyspace=keyspace,
        database_admin_timeout_ms=database_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
        spawn_api_options=spawn_api_options,
    ).to_async()
def get_database(self, *, keyspace: str | None = None, database_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None, token: str | TokenProvider | UnsetType = (unset), spawn_api_options: APIOptions | UnsetType = (unset)) ‑> Database

Create a Database instance from this database admin, for data-related tasks.

Args

keyspace
an optional keyspace to set in the resulting Database. The same default logic as for AstraDBAdmin.get_database() applies.
database_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying DevOps API request for 'region', should it be necessary. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for database_admin_timeout_ms.
timeout_ms
an alias for database_admin_timeout_ms.
token
if supplied, is passed to the Database instead of the one set for this object. Useful if one wants to work in a least-privilege manner, limiting the permissions for non-admin work. This can be either a literal token string or a subclass of TokenProvider.
spawn_api_options
a specification - complete or partial - of the API Options to override the defaults inherited from the database admin. This allows for a deeper configuration of the database admin, e.g. concerning timeouts; if this is passed together with the named timeout parameters, the latter will take precedence in their respective settings.

Returns

A Database object, ready to be used for working with data and collections.

Example

>>> my_db = my_db_admin.get_database()
>>> my_db.list_collection_names()
['movies', 'another_collection']

Note

creating an instance of Database does not trigger actual creation of the database itself, which should exist beforehand. To create databases, see the AstraDBAdmin class.

Expand source code
def get_database(
    self,
    *,
    keyspace: str | None = None,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
    token: str | TokenProvider | UnsetType = _UNSET,
    spawn_api_options: APIOptions | UnsetType = _UNSET,
) -> Database:
    """
    Create a Database instance from this database admin, for data-related tasks.

    Args:
        keyspace: an optional keyspace to set in the resulting Database.
            The same default logic as for `AstraDBAdmin.get_database` applies.
        database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying DevOps API request for 'region', should it be necessary.
            If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `database_admin_timeout_ms`.
        timeout_ms: an alias for `database_admin_timeout_ms`.
        token: if supplied, is passed to the Database instead of
            the one set for this object. Useful if one wants to work in
            a least-privilege manner, limiting the permissions for non-admin work.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        spawn_api_options: a specification - complete or partial - of the
            API Options to override the defaults inherited from the database admin.
            This allows for a deeper configuration of the database admin, e.g.
            concerning timeouts; if this is passed together with
            the named timeout parameters, the latter will take precedence
            in their respective settings.

    Returns:
        A Database object, ready to be used for working with data and collections.

    Example:
        >>> my_db = my_db_admin.get_database()
        >>> my_db.list_collection_names()
        ['movies', 'another_collection']

    Note:
        creating an instance of Database does not trigger actual creation
        of the database itself, which should exist beforehand. To create databases,
        see the AstraDBAdmin class.
    """

    return self._astra_db_admin.get_database(
        api_endpoint=self.api_endpoint,
        token=token,
        keyspace=keyspace,
        database_admin_timeout_ms=database_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
        spawn_api_options=spawn_api_options,
    )
def info(self, *, database_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> AstraDBAdminDatabaseInfo

Query the DevOps API for the full info on this database.

Args

database_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for database_admin_timeout_ms.
timeout_ms
an alias for database_admin_timeout_ms.

Returns

An AstraDBAdminDatabaseInfo object.

Example

>>> my_db_info = admin_for_my_db.info()
>>> my_db_info.status
'ACTIVE'
>>> my_db_info.info.region
'us-east1'
Expand source code
def info(
    self,
    *,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> AstraDBAdminDatabaseInfo:
    """
    Query the DevOps API for the full info on this database.

    Args:
        database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `database_admin_timeout_ms`.
        timeout_ms: an alias for `database_admin_timeout_ms`.

    Returns:
        An AstraDBAdminDatabaseInfo object.

    Example:
        >>> my_db_info = admin_for_my_db.info()
        >>> my_db_info.status
        'ACTIVE'
        >>> my_db_info.info.region
        'us-east1'
    """

    logger.info(f"getting info ('{self._database_id}')")
    req_response = self._astra_db_admin.database_info(
        id=self._database_id,
        database_admin_timeout_ms=database_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info(f"finished getting info ('{self._database_id}')")
    return req_response
def list_keyspaces(self, *, keyspace_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> list[str]

Query the DevOps API for a list of the keyspaces in the database.

Args

keyspace_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for keyspace_admin_timeout_ms.
timeout_ms
an alias for keyspace_admin_timeout_ms.

Returns

A list of the keyspaces, each a string, in no particular order.

Example

>>> admin_for_my_db.list_keyspaces()
['default_keyspace', 'staging_keyspace']
Expand source code
def list_keyspaces(
    self,
    *,
    keyspace_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> list[str]:
    """
    Query the DevOps API for a list of the keyspaces in the database.

    Args:
        keyspace_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `keyspace_admin_timeout_ms`.
        timeout_ms: an alias for `keyspace_admin_timeout_ms`.

    Returns:
        A list of the keyspaces, each a string, in no particular order.

    Example:
        >>> admin_for_my_db.list_keyspaces()
        ['default_keyspace', 'staging_keyspace']
    """

    logger.info(f"getting keyspaces ('{self._database_id}')")
    info = self.info(
        database_admin_timeout_ms=keyspace_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info(f"finished getting keyspaces ('{self._database_id}')")
    if info.raw is None:
        raise DevOpsAPIException("Could not get the keyspace list.")
    else:
        return info.raw.get("info", {}).get("keyspaces") or []
def with_options(self, *, token: str | TokenProvider | UnsetType = (unset), api_options: APIOptions | UnsetType = (unset)) ‑> AstraDBDatabaseAdmin

Create a clone of this AstraDBDatabaseAdmin with some changed attributes.

Args

token
an Access Token to the database. Example: "AstraCS:xyz...". This can be either a literal token string or a subclass of TokenProvider.
api_options
any additional options to set for the clone, in the form of an APIOptions instance (where one can set just the needed attributes). In case the same setting is also provided as named parameter, the latter takes precedence.

Returns

a new AstraDBDatabaseAdmin instance.

Example

>>> admin_for_my_other_db = admin_for_my_db.with_options(
...     "https://<ID>-<REGION>.apps.astra.datastax.com",
... )
Expand source code
def with_options(
    self,
    *,
    token: str | TokenProvider | UnsetType = _UNSET,
    api_options: APIOptions | UnsetType = _UNSET,
) -> AstraDBDatabaseAdmin:
    """
    Create a clone of this AstraDBDatabaseAdmin with some changed attributes.

    Args:
        token: an Access Token to the database. Example: `"AstraCS:xyz..."`.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        api_options: any additional options to set for the clone, in the form of
            an APIOptions instance (where one can set just the needed attributes).
            In case the same setting is also provided as named parameter,
            the latter takes precedence.

    Returns:
        a new AstraDBDatabaseAdmin instance.

    Example:
        >>> admin_for_my_other_db = admin_for_my_db.with_options(
        ...     "https://<ID>-<REGION>.apps.astra.datastax.com",
        ... )
    """

    return self._copy(
        token=token,
        api_options=api_options,
    )
class AsyncCollection (*, database: AsyncDatabase, name: str, keyspace: str | None, api_options: FullAPIOptions)

A Data API collection, the object to interact with the Data API for unstructured (schemaless) data, especially for DDL operations. This class has an asynchronous interface for use with asyncio.

This class is not meant for direct instantiation by the user, rather it is obtained by invoking methods such as get_collection of AsyncDatabase, wherefrom the AsyncCollection inherits its API options such as authentication token and API endpoint.

Args

database
a Database object, instantiated earlier. This represents the database the collection belongs to.
name
the collection name. This parameter should match an existing collection on the database.
keyspace
this is the keyspace to which the collection belongs. If nothing is specified, the database's working keyspace is used.
api_options
a complete specification of the API Options for this instance.

Examples

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> from astrapy import DataAPIClient
>>> client = DataAPIClient()
>>> async_database = client.get_async_database(
...     "https://01234567-....apps.astra.datastax.com",
...     token="AstraCS:..."
... )
>>> # Create a collection using the fluent syntax for its definition
>>> from astrapy.constants import VectorMetric
>>> from astrapy.info import CollectionDefinition
>>>
>>> collection_definition = (
...     CollectionDefinition.builder()
...     .set_vector_dimension(3)
...     .set_vector_metric(VectorMetric.DOT_PRODUCT)
...     .set_indexing("deny", ["annotations", "logs"])
...     .build()
... )
>>> my_collection = await async_database.create_collection(
...     "my_events",
...     definition=collection_definition,
... )
>>> # Create a collection with the definition as object
>>> from astrapy.info import CollectionVectorOptions
>>>
>>> collection_definition_1 = CollectionDefinition(
...     vector=CollectionVectorOptions(
...         dimension=3,
...         metric=VectorMetric.DOT_PRODUCT,
...     ),
...     indexing={"deny": ["annotations", "logs"]},
... )
>>> my_collection_1 = await async_database.create_collection(
...     "my_events",
...     definition=collection_definition_1,
... )
>>>
>>> # Create a collection with the definition as plain dictionary
>>> collection_definition_2 = {
...     "indexing": {"deny": ["annotations", "logs"]},
...     "vector": {
...         "dimension": 3,
...         "metric": VectorMetric.DOT_PRODUCT,
...     },
... }
>>> my_collection_2 = await async_database.create_collection(
...     "my_events",
...     definition=collection_definition_2,
... )
>>> # Get a reference to an existing collection
>>> # (no checks are performed on DB)
>>> my_collection_3a = async_database.get_collection("my_events")
>>> my_collection_3b = async_database.my_events
>>> my_collection_3c = async_database["my_events"]

Note

creating an instance of AsyncCollection does not trigger actual creation of the collection on the database. The latter should have been created beforehand, e.g. through the create_collection method of an AsyncDatabase.

Expand source code
class AsyncCollection(Generic[DOC]):
    """
    A Data API collection, the object to interact with the Data API for unstructured
    (schemaless) data, especially for DDL operations.
    This class has an asynchronous interface for use with asyncio.

    This class is not meant for direct instantiation by the user, rather
    it is obtained by invoking methods such as `get_collection` of AsyncDatabase,
    wherefrom the AsyncCollection inherits its API options such as authentication
    token and API endpoint.

    Args:
        database: a Database object, instantiated earlier. This represents
            the database the collection belongs to.
        name: the collection name. This parameter should match an existing
            collection on the database.
        keyspace: this is the keyspace to which the collection belongs.
            If nothing is specified, the database's working keyspace is used.
        api_options: a complete specification of the API Options for this instance.

    Examples:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> from astrapy import DataAPIClient
        >>> client = DataAPIClient()
        >>> async_database = client.get_async_database(
        ...     "https://01234567-....apps.astra.datastax.com",
        ...     token="AstraCS:..."
        ... )

        >>> # Create a collection using the fluent syntax for its definition
        >>> from astrapy.constants import VectorMetric
        >>> from astrapy.info import CollectionDefinition
        >>>
        >>> collection_definition = (
        ...     CollectionDefinition.builder()
        ...     .set_vector_dimension(3)
        ...     .set_vector_metric(VectorMetric.DOT_PRODUCT)
        ...     .set_indexing("deny", ["annotations", "logs"])
        ...     .build()
        ... )
        >>> my_collection = await async_database.create_collection(
        ...     "my_events",
        ...     definition=collection_definition,
        ... )

        >>>
        >>> # Create a collection with the definition as object
        >>> from astrapy.info import CollectionVectorOptions
        >>>
        >>> collection_definition_1 = CollectionDefinition(
        ...     vector=CollectionVectorOptions(
        ...         dimension=3,
        ...         metric=VectorMetric.DOT_PRODUCT,
        ...     ),
        ...     indexing={"deny": ["annotations", "logs"]},
        ... )
        >>> my_collection_1 = await async_database.create_collection(
        ...     "my_events",
        ...     definition=collection_definition_1,
        ... )
        >>>

        >>> # Create a collection with the definition as plain dictionary
        >>> collection_definition_2 = {
        ...     "indexing": {"deny": ["annotations", "logs"]},
        ...     "vector": {
        ...         "dimension": 3,
        ...         "metric": VectorMetric.DOT_PRODUCT,
        ...     },
        ... }
        >>> my_collection_2 = await async_database.create_collection(
        ...     "my_events",
        ...     definition=collection_definition_2,
        ... )

        >>> # Get a reference to an existing collection
        >>> # (no checks are performed on DB)
        >>> my_collection_3a = async_database.get_collection("my_events")
        >>> my_collection_3b = async_database.my_events
        >>> my_collection_3c = async_database["my_events"]

    Note:
        creating an instance of AsyncCollection does not trigger actual creation
        of the collection on the database. The latter should have been created
        beforehand, e.g. through the `create_collection` method of an AsyncDatabase.
    """

    def __init__(
        self,
        *,
        database: AsyncDatabase,
        name: str,
        keyspace: str | None,
        api_options: FullAPIOptions,
    ) -> None:
        self.api_options = api_options
        self._name = name
        _keyspace = keyspace if keyspace is not None else database.keyspace

        if _keyspace is None:
            raise ValueError("Attempted to create Collection with 'keyspace' unset.")

        self._database = database._copy(
            keyspace=_keyspace, api_options=self.api_options
        )
        self._commander_headers = {
            **{DEFAULT_DATA_API_AUTH_HEADER: self.api_options.token.get_token()},
            **self.api_options.embedding_api_key.get_headers(),
            **self.api_options.database_additional_headers,
        }
        self._api_commander = self._get_api_commander()

    def __repr__(self) -> str:
        _db_desc = f'database.api_endpoint="{self.database.api_endpoint}"'
        return (
            f'{self.__class__.__name__}(name="{self.name}", '
            f'keyspace="{self.keyspace}", {_db_desc}, '
            f"api_options={self.api_options})"
        )

    def __eq__(self, other: Any) -> bool:
        if isinstance(other, AsyncCollection):
            return all(
                [
                    self._name == other._name,
                    self._database == other._database,
                    self.api_options == other.api_options,
                ]
            )
        else:
            return False

    def __call__(self, *pargs: Any, **kwargs: Any) -> None:
        raise TypeError(
            f"'{self.__class__.__name__}' object is not callable. If you "
            f"meant to call the '{self.name}' method on a "
            f"'{self.database.__class__.__name__}' object "
            "it is failing because no such method exists."
        )

    def _get_api_commander(self) -> APICommander:
        """Instantiate a new APICommander based on the properties of this class."""

        if self._database.keyspace is None:
            raise ValueError(
                "No keyspace specified. AsyncCollection requires a keyspace to "
                "be set, e.g. through the `keyspace` constructor parameter."
            )

        base_path_components = [
            comp
            for comp in (
                ncomp.strip("/")
                for ncomp in (
                    self._database.api_options.data_api_url_options.api_path,
                    self._database.api_options.data_api_url_options.api_version,
                    self._database.keyspace,
                    self._name,
                )
                if ncomp is not None
            )
            if comp != ""
        ]
        base_path = f"/{'/'.join(base_path_components)}"
        api_commander = APICommander(
            api_endpoint=self._database.api_endpoint,
            path=base_path,
            headers=self._commander_headers,
            callers=self.api_options.callers,
            redacted_header_names=self.api_options.redacted_header_names,
            handle_decimals_writes=(
                self.api_options.serdes_options.use_decimals_in_collections
            ),
            handle_decimals_reads=(
                self.api_options.serdes_options.use_decimals_in_collections
            ),
        )
        return api_commander

    async def __aenter__(self: AsyncCollection[DOC]) -> AsyncCollection[DOC]:
        return self

    async def __aexit__(
        self,
        exc_type: type[BaseException] | None = None,
        exc_value: BaseException | None = None,
        traceback: TracebackType | None = None,
    ) -> None:
        if self._api_commander is not None:
            await self._api_commander.__aexit__(
                exc_type=exc_type,
                exc_value=exc_value,
                traceback=traceback,
            )

    async def _converted_request(
        self,
        *,
        http_method: str = HttpMethod.POST,
        payload: dict[str, Any] | None = None,
        additional_path: str | None = None,
        request_params: dict[str, Any] = {},
        raise_api_errors: bool = True,
        timeout_context: _TimeoutContext,
    ) -> dict[str, Any]:
        converted_payload = preprocess_collection_payload(
            payload, options=self.api_options.serdes_options
        )
        raw_response_json = await self._api_commander.async_request(
            http_method=http_method,
            payload=converted_payload,
            additional_path=additional_path,
            request_params=request_params,
            raise_api_errors=raise_api_errors,
            timeout_context=timeout_context,
        )
        response_json = postprocess_collection_response(
            raw_response_json, options=self.api_options.serdes_options
        )
        return response_json

    def _copy(
        self: AsyncCollection[DOC],
        *,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncCollection[DOC]:
        arg_api_options = APIOptions(
            embedding_api_key=embedding_api_key,
        )
        final_api_options = self.api_options.with_override(api_options).with_override(
            arg_api_options
        )
        return AsyncCollection(
            database=self.database,
            name=self.name,
            keyspace=self.keyspace,
            api_options=final_api_options,
        )

    def with_options(
        self: AsyncCollection[DOC],
        *,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncCollection[DOC]:
        """
        Create a clone of this collection with some changed attributes.

        Args:
            embedding_api_key: optional API key(s) for interacting with the collection.
                If an embedding service is configured, and this parameter is not None,
                each Data API call will include the necessary embedding-related headers
                as specified by this parameter. If a string is passed, it translates
                into the one "embedding api key" header
                (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
                For some vectorize providers/models, if using header-based authentication,
                specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
                should be supplied.
            api_options: any additional options to set for the clone, in the form of
                an APIOptions instance (where one can set just the needed attributes).
                In case the same setting is also provided as named parameter,
                the latter takes precedence.

        Returns:
            a new AsyncCollection instance.

        Example:
            >>> collection_with_api_key_configured = my_async_collection.with_options(
            ...     embedding_api_key="secret-key-0123abcd...",
            ... )
        """

        return self._copy(
            embedding_api_key=embedding_api_key,
            api_options=api_options,
        )

    def to_sync(
        self: AsyncCollection[DOC],
        *,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> Collection[DOC]:
        """
        Create a Collection from this one. Save for the arguments
        explicitly provided as overrides, everything else is kept identical
        to this collection in the copy (the database is converted into
        a sync object).

        Args:
            embedding_api_key: optional API key(s) for interacting with the collection.
                If an embedding service is configured, and this parameter is not None,
                each Data API call will include the necessary embedding-related headers
                as specified by this parameter. If a string is passed, it translates
                into the one "embedding api key" header
                (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
                For some vectorize providers/models, if using header-based authentication,
                specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
                should be supplied.
            api_options: any additional options to set for the result, in the form of
                an APIOptions instance (where one can set just the needed attributes).
                In case the same setting is also provided as named parameter,
                the latter takes precedence.

        Returns:
            the new copy, a Collection instance.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> my_async_coll.to_sync().count_documents({}, upper_bound=100)
            77
        """

        arg_api_options = APIOptions(
            embedding_api_key=embedding_api_key,
        )
        final_api_options = self.api_options.with_override(api_options).with_override(
            arg_api_options
        )
        return Collection(
            database=self.database.to_sync(),
            name=self.name,
            keyspace=self.keyspace,
            api_options=final_api_options,
        )

    async def options(
        self,
        *,
        collection_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> CollectionDefinition:
        """
        Get the collection options, i.e. its configuration as read from the database.

        The method issues a request to the Data API each time is invoked,
        without caching mechanisms: this ensures up-to-date information
        for usages such as real-time collection validation by the application.

        Args:
            collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `collection_admin_timeout_ms`.
            timeout_ms: an alias for `collection_admin_timeout_ms`.

        Returns:
            a CollectionDefinition instance describing the collection.
            (See also the database `list_collections` method.)

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> asyncio.run(my_async_coll.options())
            CollectionDefinition(vector=CollectionVectorOptions(dimension=3, metric='cosine'))
        """

        _collection_admin_timeout_ms, _ca_label = _select_singlereq_timeout_ca(
            timeout_options=self.api_options.timeout_options,
            collection_admin_timeout_ms=collection_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info(f"getting collections in search of '{self.name}'")
        self_descriptors = [
            coll_desc
            for coll_desc in await self.database._list_collections_ctx(
                keyspace=None,
                timeout_context=_TimeoutContext(
                    request_ms=_collection_admin_timeout_ms,
                    label=_ca_label,
                ),
            )
            if coll_desc.name == self.name
        ]
        logger.info(f"finished getting collections in search of '{self.name}'")
        if self_descriptors:
            return self_descriptors[0].definition
        else:
            raise ValueError(
                f"Collection {self.keyspace}.{self.name} not found.",
            )

    async def info(
        self,
        *,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> CollectionInfo:
        """
        Information on the collection (name, location, database), in the
        form of a CollectionInfo object.

        Not to be confused with the collection `options` method (related
        to the collection internal configuration).

        Args:
            database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying DevOps API request.
                If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `database_admin_timeout_ms`.
            timeout_ms: an alias for `database_admin_timeout_ms`.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> asyncio.run(my_async_coll.info()).database_info.region
            'us-east1'
            >>> asyncio.run(my_async_coll.info()).full_name
            'default_keyspace.my_v_collection'

        Note:
            the returned CollectionInfo wraps, among other things,
            the database information: as such, calling this method
            triggers the same-named method of a Database object (which, in turn,
            performs a HTTP request to the DevOps API).
            See the documentation for `Database.info()` for more details.
        """

        db_info = await self.database.info(
            database_admin_timeout_ms=database_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        return CollectionInfo(
            database_info=db_info,
            keyspace=self.keyspace,
            name=self.name,
            full_name=self.full_name,
        )

    @property
    def database(self) -> AsyncDatabase:
        """
        a Database object, the database this collection belongs to.

        Example:
            >>> my_async_coll.database.name
            'the_db'
        """

        return self._database

    @property
    def keyspace(self) -> str:
        """
        The keyspace this collection is in.

        Example:
            >>> my_async_coll.keyspace
            'default_keyspace'
        """

        _keyspace = self.database.keyspace
        if _keyspace is None:
            raise ValueError("The collection's DB is set with keyspace=None")
        return _keyspace

    @property
    def name(self) -> str:
        """
        The name of this collection.

        Example:
            >>> my_async_coll.name
            'my_v_collection'
        """

        return self._name

    @property
    def full_name(self) -> str:
        """
        The fully-qualified collection name within the database,
        in the form "keyspace.collection_name".

        Example:
            >>> my_async_coll.full_name
            'default_keyspace.my_v_collection'
        """

        return f"{self.keyspace}.{self.name}"

    async def insert_one(
        self,
        document: DOC,
        *,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> CollectionInsertOneResult:
        """
        Insert a single document in the collection in an atomic operation.

        Args:
            document: the dictionary expressing the document to insert.
                The `_id` field of the document can be left out, in which
                case it will be created automatically.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a CollectionInsertOneResult object.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> async def write_and_count(acol: AsyncCollection) -> None:
            ...     count0 = await acol.count_documents({}, upper_bound=10)
            ...     print("count0", count0)
            ...     await acol.insert_one(
            ...         {
            ...             "age": 30,
            ...             "name": "Smith",
            ...             "food": ["pear", "peach"],
            ...             "likes_fruit": True,
            ...         },
            ...     )
            ...     await acol.insert_one({"_id": "user-123", "age": 50, "name": "Maccio"})
            ...     count1 = await acol.count_documents({}, upper_bound=10)
            ...     print("count1", count1)
            ...
            >>> asyncio.run(write_and_count(my_async_coll))
            count0 0
            count1 2

            >>> asyncio.run(my_async_coll.insert_one({"tag": v", "$vector": [10, 11]}))
            CollectionInsertOneResult(...)

        Note:
            If an `_id` is explicitly provided, which corresponds to a document
            that exists already in the collection, an error is raised and
            the insertion fails.
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        io_payload = {"insertOne": {"document": document}}
        logger.info(f"insertOne on '{self.name}'")
        io_response = await self._converted_request(
            payload=io_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished insertOne on '{self.name}'")
        if "insertedIds" in io_response.get("status", {}):
            if io_response["status"]["insertedIds"]:
                inserted_id = io_response["status"]["insertedIds"][0]
                return CollectionInsertOneResult(
                    raw_results=[io_response],
                    inserted_id=inserted_id,
                )
            else:
                raise ValueError(
                    "Could not complete a insert_one operation. "
                    f"(gotten '${json.dumps(io_response)}')"
                )
        else:
            raise ValueError(
                "Could not complete a insert_one operation. "
                f"(gotten '${json.dumps(io_response)}')"
            )

    async def insert_many(
        self,
        documents: Iterable[DOC],
        *,
        ordered: bool = False,
        chunk_size: int | None = None,
        concurrency: int | None = None,
        request_timeout_ms: int | None = None,
        general_method_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> CollectionInsertManyResult:
        """
        Insert a list of documents into the collection.
        This is not an atomic operation.

        Args:
            documents: an iterable of dictionaries, each a document to insert.
                Documents may specify their `_id` field or leave it out, in which
                case it will be added automatically.
            ordered: if False (default), the insertions can occur in arbitrary order
                and possibly concurrently. If True, they are processed sequentially.
                If there are no specific reasons against it, unordered insertions are to
                be preferred as they complete much faster.
            chunk_size: how many documents to include in a single API request.
                Exceeding the server maximum allowed value results in an error.
                Leave it unspecified (recommended) to use the system default.
            concurrency: maximum number of concurrent requests to the API at
                a given time. It cannot be more than one for ordered insertions.
            request_timeout_ms: a timeout, in milliseconds, for each API request.
                If not passed, the collection-level setting is used instead.
            general_method_timeout_ms: a timeout, in milliseconds, for the whole
                requested operation (which may involve multiple API requests).
                If not passed, the collection-level setting is used instead.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a CollectionInsertManyResult object.

        Examples:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> async def write_and_count(acol: AsyncCollection) -> None:
            ...             count0 = await acol.count_documents({}, upper_bound=10)
            ...             print("count0", count0)
            ...             im_result1 = await acol.insert_many(
            ...                 [
            ...                     {"a": 10},
            ...                     {"a": 5},
            ...                     {"b": [True, False, False]},
            ...                 ],
            ...                 ordered=True,
            ...             )
            ...             print("inserted1", im_result1.inserted_ids)
            ...             count1 = await acol.count_documents({}, upper_bound=100)
            ...             print("count1", count1)
            ...             await acol.insert_many(
            ...                 [{"seq": i} for i in range(50)],
            ...                 concurrency=5,
            ...             )
            ...             count2 = await acol.count_documents({}, upper_bound=100)
            ...             print("count2", count2)
            ...
            >>> asyncio.run(write_and_count(my_async_coll))
            count0 0
            inserted1 ['e3c2a684-...', '1de4949f-...', '167dacc3-...']
            count1 3
            count2 53
            >>> asyncio.run(my_async_coll.insert_many(
            ...     [
            ...         {"tag": "a", "$vector": [1, 2]},
            ...         {"tag": "b", "$vector": [3, 4]},
            ...     ]
            ... ))
            CollectionInsertManyResult(...)

        Note:
            Unordered insertions are executed with some degree of concurrency,
            so it is usually better to prefer this mode unless the order in the
            document sequence is important.

        Note:
            A failure mode for this command is related to certain faulty documents
            found among those to insert: a document may have the an `_id` already
            present on the collection, or its vector dimension may not
            match the collection setting.

            For an ordered insertion, the method will raise an exception at
            the first such faulty document -- nevertheless, all documents processed
            until then will end up being written to the database.

            For unordered insertions, if the error stems from faulty documents
            the insertion proceeds until exhausting the input documents: then,
            an exception is raised -- and all insertable documents will have been
            written to the database, including those "after" the troublesome ones.

            If, on the other hand, there are errors not related to individual
            documents (such as a network connectivity error), the whole
            `insert_many` operation will stop in mid-way, an exception will be raised,
            and only a certain amount of the input documents will
            have made their way to the database.
        """

        _general_method_timeout_ms, _gmt_label = _first_valid_timeout(
            (general_method_timeout_ms, "general_method_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (
                self.api_options.timeout_options.general_method_timeout_ms,
                "general_method_timeout_ms",
            ),
        )
        _request_timeout_ms, _rt_label = _first_valid_timeout(
            (request_timeout_ms, "request_timeout_ms"),
            (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
        )
        if concurrency is None:
            if ordered:
                _concurrency = 1
            else:
                _concurrency = DEFAULT_INSERT_MANY_CONCURRENCY
        else:
            _concurrency = concurrency
        if _concurrency > 1 and ordered:
            raise ValueError("Cannot run ordered insert_many concurrently.")
        if chunk_size is None:
            _chunk_size = DEFAULT_INSERT_MANY_CHUNK_SIZE
        else:
            _chunk_size = chunk_size
        _documents = list(documents)
        logger.info(f"inserting {len(_documents)} documents in '{self.name}'")
        raw_results: list[dict[str, Any]] = []
        timeout_manager = MultiCallTimeoutManager(
            overall_timeout_ms=_general_method_timeout_ms,
            timeout_label=_gmt_label,
        )
        if ordered:
            options = {"ordered": True}
            inserted_ids: list[Any] = []
            for i in range(0, len(_documents), _chunk_size):
                im_payload = {
                    "insertMany": {
                        "documents": _documents[i : i + _chunk_size],
                        "options": options,
                    },
                }
                logger.info(f"insertMany(chunk) on '{self.name}'")
                chunk_response = await self._converted_request(
                    payload=im_payload,
                    raise_api_errors=False,
                    timeout_context=timeout_manager.remaining_timeout(
                        cap_time_ms=_request_timeout_ms,
                        cap_timeout_label=_rt_label,
                    ),
                )
                logger.info(f"finished insertMany(chunk) on '{self.name}'")
                # accumulate the results in this call
                chunk_inserted_ids = (chunk_response.get("status") or {}).get(
                    "insertedIds", []
                )
                inserted_ids += chunk_inserted_ids
                raw_results += [chunk_response]
                # if errors, quit early
                if chunk_response.get("errors", []):
                    partial_result = CollectionInsertManyResult(
                        raw_results=raw_results,
                        inserted_ids=inserted_ids,
                    )
                    raise CollectionInsertManyException.from_response(
                        command=None,
                        raw_response=chunk_response,
                        partial_result=partial_result,
                    )

            # return
            full_result = CollectionInsertManyResult(
                raw_results=raw_results,
                inserted_ids=inserted_ids,
            )
            logger.info(
                f"finished inserting {len(_documents)} documents in '{self.name}'"
            )
            return full_result

        else:
            # unordered: concurrent or not, do all of them and parse the results
            options = {"ordered": False}

            sem = asyncio.Semaphore(_concurrency)

            async def concurrent_insert_chunk(
                document_chunk: list[DOC],
            ) -> dict[str, Any]:
                async with sem:
                    im_payload = {
                        "insertMany": {
                            "documents": document_chunk,
                            "options": options,
                        },
                    }
                    logger.info(f"insertMany(chunk) on '{self.name}'")
                    im_response = await self._converted_request(
                        payload=im_payload,
                        raise_api_errors=False,
                        timeout_context=timeout_manager.remaining_timeout(
                            cap_time_ms=_request_timeout_ms,
                            cap_timeout_label=_rt_label,
                        ),
                    )
                    logger.info(f"finished insertMany(chunk) on '{self.name}'")
                    return im_response

            if _concurrency > 1:
                tasks = [
                    asyncio.create_task(
                        concurrent_insert_chunk(_documents[i : i + _chunk_size])
                    )
                    for i in range(0, len(_documents), _chunk_size)
                ]
                raw_results = await asyncio.gather(*tasks)
            else:
                raw_results = [
                    await concurrent_insert_chunk(_documents[i : i + _chunk_size])
                    for i in range(0, len(_documents), _chunk_size)
                ]

            # recast raw_results
            inserted_ids = [
                inserted_id
                for chunk_response in raw_results
                for inserted_id in (chunk_response.get("status") or {}).get(
                    "insertedIds", []
                )
            ]

            # check-raise
            if any(
                [chunk_response.get("errors", []) for chunk_response in raw_results]
            ):
                partial_result = CollectionInsertManyResult(
                    raw_results=raw_results,
                    inserted_ids=inserted_ids,
                )
                raise CollectionInsertManyException.from_responses(
                    commands=[None for _ in raw_results],
                    raw_responses=raw_results,
                    partial_result=partial_result,
                )

            # return
            full_result = CollectionInsertManyResult(
                raw_results=raw_results,
                inserted_ids=inserted_ids,
            )
            logger.info(
                f"finished inserting {len(_documents)} documents in '{self.name}'"
            )
            return full_result

    @overload
    def find(
        self,
        filter: FilterType | None = None,
        *,
        projection: ProjectionType | None = None,
        document_type: None = None,
        skip: int | None = None,
        limit: int | None = None,
        include_similarity: bool | None = None,
        include_sort_vector: bool | None = None,
        sort: SortType | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> AsyncCollectionFindCursor[DOC, DOC]: ...

    @overload
    def find(
        self,
        filter: FilterType | None = None,
        *,
        projection: ProjectionType | None = None,
        document_type: type[DOC2],
        skip: int | None = None,
        limit: int | None = None,
        include_similarity: bool | None = None,
        include_sort_vector: bool | None = None,
        sort: SortType | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> AsyncCollectionFindCursor[DOC, DOC2]: ...

    def find(
        self,
        filter: FilterType | None = None,
        *,
        projection: ProjectionType | None = None,
        document_type: type[DOC2] | None = None,
        skip: int | None = None,
        limit: int | None = None,
        include_similarity: bool | None = None,
        include_sort_vector: bool | None = None,
        sort: SortType | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> AsyncCollectionFindCursor[DOC, DOC2]:
        """
        Find documents on the collection, matching a certain provided filter.

        The method returns a Cursor that can then be iterated over. Depending
        on the method call pattern, the iteration over all documents can reflect
        collection mutations occurred since the `find` method was called, or not.
        In cases where the cursor reflects mutations in real-time, it will iterate
        over cursors in an approximate way (i.e. exhibiting occasional skipped
        or duplicate documents). This happens when making use of the `sort`
        option in a non-vector-search manner.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            projection: it controls which parts of the document are returned.
                It can be an allow-list: `{"f1": True, "f2": True}`,
                or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
                (except for the `_id` and other special fields, which can be
                associated to both True or False independently of the rest
                of the specification).
                The special star-projections `{"*": True}` and `{"*": False}`
                have the effect of returning the whole document and `{}` respectively.
                For lists in documents, slice directives can be passed to select
                portions of the list: for instance, `{"array": {"$slice": 2}}`,
                `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
                `{"array": {"$slice": [-4, 2]}}`.
                An iterable over strings will be treated implicitly as an allow-list.
                The default projection (used if this parameter is not passed) does not
                necessarily include "special" fields such as `$vector` or `$vectorize`.
                See the Data API documentation for more on projections.
            document_type: this parameter acts a formal specifier for the type checker.
                If omitted, the resulting cursor is implicitly an
                `AsyncCollectionFindCursor[DOC, DOC]`, i.e. maintains the same type for
                the items it returns as that for the documents in the table. Strictly
                typed code may want to specify this parameter especially when a
                projection is given.
            skip: with this integer parameter, what would be the first `skip`
                documents returned by the query are discarded, and the results
                start from the (skip+1)-th document.
                This parameter can be used only in conjunction with an explicit
                `sort` criterion of the ascending/descending type (i.e. it cannot
                be used when not sorting, nor with vector-based ANN search).
            limit: this (integer) parameter sets a limit over how many documents
                are returned. Once `limit` is reached (or the cursor is exhausted
                for lack of matching documents), nothing more is returned.
            include_similarity: a boolean to request the numeric value of the
                similarity to be returned as an added "$similarity" key in each
                returned document. Can only be used for vector ANN search, i.e.
                when either `vector` is supplied or the `sort` parameter has the
                shape {"$vector": ...}.
            include_sort_vector: a boolean to request the search query vector.
                If set to True (and if the invocation is a vector search), calling
                the `get_sort_vector` method on the returned cursor will yield
                the vector used for the ANN search.
            sort: with this dictionary parameter one can control the order
                the documents are returned. See the Note about sorting, as well as
                the one about upper bounds, for details.
                Vector-based ANN sorting is achieved by providing a "$vector"
                or a "$vectorize" key in `sort`.
            request_timeout_ms: a timeout, in milliseconds, for each single one
                of the underlying HTTP requests used to fetch documents as the
                cursor is iterated over.
                If not passed, the collection-level setting is used instead.
            timeout_ms: an alias for `request_timeout_ms`.

        Returns:
            an AsyncCursor object representing iterations over the matching documents
            (see the AsyncCursor object for how to use it. The simplest thing is to
            run a for loop: `for document in collection.sort(...):`).

        Examples:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> async def run_finds(acol: AsyncCollection) -> None:
            ...             filter = {"seq": {"$exists": True}}
            ...             print("find results 1:")
            ...             async for doc in acol.find(filter, projection={"seq": True}, limit=5):
            ...                 print(doc["seq"])
            ...             async_cursor1 = acol.find(
            ...                 {},
            ...                 limit=4,
            ...                 sort={"seq": astrapy.constants.SortMode.DESCENDING},
            ...             )
            ...             ids = [doc["_id"] async for doc in async_cursor1]
            ...             print("find results 2:", ids)
            ...             async_cursor2 = acol.find({}, limit=3)
            ...             seqs = await async_cursor2.distinct("seq")
            ...             print("distinct results 3:", seqs)
            ...
            >>> asyncio.run(run_finds(my_async_coll))
            find results 1:
            48
            35
            7
            11
            13
            find results 2: ['d656cd9d-...', '479c7ce8-...', '96dc87fd-...', '83f0a21f-...']
            distinct results 3: [48, 35, 7]

            >>> async def run_vector_finds(acol: AsyncCollection) -> None:
            ...     await acol.insert_many([
            ...         {"tag": "A", "$vector": [4, 5]},
            ...         {"tag": "B", "$vector": [3, 4]},
            ...         {"tag": "C", "$vector": [3, 2]},
            ...         {"tag": "D", "$vector": [4, 1]},
            ...         {"tag": "E", "$vector": [2, 5]},
            ...     ])
            ...     ann_tags = [
            ...         document["tag"]
            ...         async for document in acol.find(
            ...             {},
            ...             sort={"$vector": [3, 3]},
            ...             limit=3,
            ...         )
            ...     ]
            ...     return ann_tags
            ...
            >>> asyncio.run(run_vector_finds(my_async_coll))
            ['A', 'B', 'C']
            >>> # (assuming the collection has metric VectorMetric.COSINE)

            >>> async_cursor = my_async_coll.find(
            ...     sort={"$vector": [3, 3]},
            ...     limit=3,
            ...     include_sort_vector=True,
            ... )
            >>> asyncio.run(async_cursor.get_sort_vector())
            [3.0, 3.0]
            >>> asyncio.run(async_cursor.__anext__())
            {'_id': 'b13ce177-738e-47ec-bce1-77738ee7ec93', 'tag': 'A'}
            >>> asyncio.run(async_cursor.get_sort_vector())
            [3.0, 3.0]

        Note:
            The following are example values for the `sort` parameter.
            When no particular order is required:
                sort={}
            When sorting by a certain value in ascending/descending order:
                sort={"field": SortMode.ASCENDING}
                sort={"field": SortMode.DESCENDING}
            When sorting first by "field" and then by "subfield"
            (while modern Python versions preserve the order of dictionaries,
            it is suggested for clarity to employ a `collections.OrderedDict`
            in these cases):
                sort={
                    "field": SortMode.ASCENDING,
                    "subfield": SortMode.ASCENDING,
                }
            When running a vector similarity (ANN) search:
                sort={"$vector": [0.4, 0.15, -0.5]}

        Note:
            Some combinations of arguments impose an implicit upper bound on the
            number of documents that are returned by the Data API. More specifically:
            (a) Vector ANN searches cannot return more than a number of documents
            that at the time of writing is set to 1000 items.
            (b) When using a sort criterion of the ascending/descending type,
            the Data API will return a smaller number of documents, set to 20
            at the time of writing, and stop there. The returned documents are
            the top results across the whole collection according to the requested
            criterion.
            These provisions should be kept in mind even when subsequently running
            a command such as `.distinct()` on a cursor.

        Note:
            When not specifying sorting criteria at all (by vector or otherwise),
            the cursor can scroll through an arbitrary number of documents as
            the Data API and the client periodically exchange new chunks of documents.
            It should be noted that the behavior of the cursor in the case documents
            have been added/removed after the `find` was started depends on database
            internals and it is not guaranteed, nor excluded, that such "real-time"
            changes in the data would be picked up by the cursor.
        """

        # lazy-import here to avoid circular import issues
        from astrapy.cursors import AsyncCollectionFindCursor

        _request_timeout_ms, _rt_label = _first_valid_timeout(
            (request_timeout_ms, "request_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
        )
        return (
            AsyncCollectionFindCursor(
                collection=self,
                request_timeout_ms=_request_timeout_ms,
                overall_timeout_ms=None,
                request_timeout_label=_rt_label,
            )
            .filter(filter)
            .project(projection)
            .skip(skip)
            .limit(limit)
            .sort(sort)
            .include_similarity(include_similarity)
            .include_sort_vector(include_sort_vector)
        )

    async def find_one(
        self,
        filter: FilterType | None = None,
        *,
        projection: ProjectionType | None = None,
        include_similarity: bool | None = None,
        sort: SortType | None = None,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> DOC | None:
        """
        Run a search, returning the first document in the collection that matches
        provided filters, if any is found.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            projection: it controls which parts of the document are returned.
                It can be an allow-list: `{"f1": True, "f2": True}`,
                or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
                (except for the `_id` and other special fields, which can be
                associated to both True or False independently of the rest
                of the specification).
                The special star-projections `{"*": True}` and `{"*": False}`
                have the effect of returning the whole document and `{}` respectively.
                For lists in documents, slice directives can be passed to select
                portions of the list: for instance, `{"array": {"$slice": 2}}`,
                `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
                `{"array": {"$slice": [-4, 2]}}`.
                An iterable over strings will be treated implicitly as an allow-list.
                The default projection (used if this parameter is not passed) does not
                necessarily include "special" fields such as `$vector` or `$vectorize`.
                See the Data API documentation for more on projections.
            include_similarity: a boolean to request the numeric value of the
                similarity to be returned as an added "$similarity" key in the
                returned document. Can only be used for vector ANN search, i.e.
                when either `vector` is supplied or the `sort` parameter has the
                shape {"$vector": ...}.
            sort: with this dictionary parameter one can control the order
                the documents are returned. See the Note about sorting for details.
                Vector-based ANN sorting is achieved by providing a "$vector"
                or a "$vectorize" key in `sort`.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a dictionary expressing the required document, otherwise None.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> async def demo_find_one(acol: AsyncCollection) -> None:
            ....    print("Count:", await acol.count_documents({}, upper_bound=100))
            ...     result0 = await acol.find_one({})
            ...     print("result0", result0)
            ...     result1 = await acol.find_one({"seq": 10})
            ...     print("result1", result1)
            ...     result2 = await acol.find_one({"seq": 1011})
            ...     print("result2", result2)
            ...     result3 = await acol.find_one({}, projection={"seq": False})
            ...     print("result3", result3)
            ...     result4 = await acol.find_one(
            ...         {},
            ...         sort={"seq": astrapy.constants.SortMode.DESCENDING},
            ...     )
            ...     print("result4", result4)
            ...
            >>>
            >>> asyncio.run(demo_find_one(my_async_coll))
            Count: 50
            result0 {'_id': '479c7ce8-...', 'seq': 48}
            result1 {'_id': '93e992c4-...', 'seq': 10}
            result2 None
            result3 {'_id': '479c7ce8-...'}
            result4 {'_id': 'd656cd9d-...', 'seq': 49}

            >>> asyncio.run(my_async_coll.find_one(
            ...     {},
            ...     sort={"$vector": [1, 0]},
            ...     projection={"*": True},
            ... ))
            {'_id': '...', 'tag': 'D', '$vector': [4.0, 1.0]}

        Note:
            See the `find` method for more details on the accepted parameters
            (whereas `skip` and `limit` are not valid parameters for `find_one`).
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        fo_options = (
            None
            if include_similarity is None
            else {"includeSimilarity": include_similarity}
        )
        fo_payload = {
            "findOne": {
                k: v
                for k, v in {
                    "filter": filter,
                    "projection": normalize_optional_projection(projection),
                    "options": fo_options,
                    "sort": sort,
                }.items()
                if v is not None
            }
        }
        fo_response = await self._converted_request(
            payload=fo_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        if "document" not in (fo_response.get("data") or {}):
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from findOne API command.",
                raw_response=fo_response,
            )
        doc_response = fo_response["data"]["document"]
        if doc_response is None:
            return None
        return fo_response["data"]["document"]  # type: ignore[no-any-return]

    async def distinct(
        self,
        key: str,
        *,
        filter: FilterType | None = None,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> list[Any]:
        """
        Return a list of the unique values of `key` across the documents
        in the collection that match the provided filter.

        Args:
            key: the name of the field whose value is inspected across documents.
                Keys can use dot-notation to descend to deeper document levels.
                Example of acceptable `key` values:
                    "field"
                    "field.subfield"
                    "field.3"
                    "field.3.subfield"
                If lists are encountered and no numeric index is specified,
                all items in the list are visited.
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            general_method_timeout_ms: a timeout, in milliseconds, for the whole
                requested operation (which may involve multiple API requests).
                This method, being based on `find` (see) may entail successive HTTP API
                requests, depending on the amount of involved documents.
            request_timeout_ms: a timeout, in milliseconds, for each API request.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a list of all different values for `key` found across the documents
            that match the filter. The result list has no repeated items.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> async def run_distinct(acol: AsyncCollection) -> None:
            ...     await acol.insert_many(
            ...         [
            ...             {"name": "Marco", "food": ["apple", "orange"], "city": "Helsinki"},
            ...             {"name": "Emma", "food": {"likes_fruit": True, "allergies": []}},
            ...         ]
            ...     )
            ...     distinct0 = await acol.distinct("name")
            ...     print("distinct('name')", distinct0)
            ...     distinct1 = await acol.distinct("city")
            ...     print("distinct('city')", distinct1)
            ...     distinct2 = await acol.distinct("food")
            ...     print("distinct('food')", distinct2)
            ...     distinct3 = await acol.distinct("food.1")
            ...     print("distinct('food.1')", distinct3)
            ...     distinct4 = await acol.distinct("food.allergies")
            ...     print("distinct('food.allergies')", distinct4)
            ...     distinct5 = await acol.distinct("food.likes_fruit")
            ...     print("distinct('food.likes_fruit')", distinct5)
            ...
            >>> asyncio.run(run_distinct(my_async_coll))
            distinct('name') ['Emma', 'Marco']
            distinct('city') ['Helsinki']
            distinct('food') [{'likes_fruit': True, 'allergies': []}, 'apple', 'orange']
            distinct('food.1') ['orange']
            distinct('food.allergies') []
            distinct('food.likes_fruit') [True]

        Note:
            It must be kept in mind that `distinct` is a client-side operation,
            which effectively browses all required documents using the logic
            of the `find` method and collects the unique values found for `key`.
            As such, there may be performance, latency and ultimately
            billing implications if the amount of matching documents is large.

        Note:
            For details on the behaviour of "distinct" in conjunction with
            real-time changes in the collection contents, see the
            Note of the `find` command.
        """

        # lazy-import here to avoid circular import issues
        from astrapy.cursors import AsyncCollectionFindCursor

        _general_method_timeout_ms, _gmt_label = _first_valid_timeout(
            (general_method_timeout_ms, "general_method_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (
                self.api_options.timeout_options.general_method_timeout_ms,
                "general_method_timeout_ms",
            ),
        )
        _request_timeout_ms, _rt_label = _first_valid_timeout(
            (request_timeout_ms, "request_timeout_ms"),
            (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
        )
        # preparing cursor:
        _extractor = _create_document_key_extractor(key)
        _key = _reduce_distinct_key_to_safe(key)
        if _key == "":
            raise ValueError(
                "The 'key' parameter for distinct cannot be empty "
                "or start with a list index."
            )
        # relaxing the type hint (limited to within this method body)
        f_cursor: AsyncCollectionFindCursor[dict[str, Any], dict[str, Any]] = (
            AsyncCollectionFindCursor(
                collection=self,
                request_timeout_ms=_request_timeout_ms,
                overall_timeout_ms=_general_method_timeout_ms,
                request_timeout_label=_rt_label,
                overall_timeout_label=_gmt_label,
            )  # type: ignore[assignment]
            .filter(filter)
            .project({_key: True})
        )
        # consuming it:
        _item_hashes = set()
        distinct_items: list[Any] = []
        logger.info(f"running distinct() on '{self.name}'")
        async for document in f_cursor:
            for item in _extractor(document):
                _item_hash = _hash_document(
                    item, options=self.api_options.serdes_options
                )
                if _item_hash not in _item_hashes:
                    _item_hashes.add(_item_hash)
                    distinct_items.append(item)
        logger.info(f"finished running distinct() on '{self.name}'")
        return distinct_items

    async def count_documents(
        self,
        filter: FilterType,
        *,
        upper_bound: int,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> int:
        """
        Count the documents in the collection matching the specified filter.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            upper_bound: a required ceiling on the result of the count operation.
                If the actual number of documents exceeds this value,
                an exception will be raised.
                Furthermore, if the actual number of documents exceeds the maximum
                count that the Data API can reach (regardless of upper_bound),
                an exception will be raised.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            the exact count of matching documents.

        Example:
            >>> async def do_count_docs(acol: AsyncCollection) -> None:
            ...     await acol.insert_many([{"seq": i} for i in range(20)])
            ...     count0 = await acol.count_documents({}, upper_bound=100)
            ...     print("count0", count0)
            ...     count1 = await acol.count_documents(
            ...         {"seq":{"$gt": 15}}, upper_bound=100
            ...     )
            ...     print("count1", count1)
            ...     count2 = await acol.count_documents({}, upper_bound=10)
            ...     print("count2", count2)
            ...
            >>> asyncio.run(do_count_docs(my_async_coll))
            count0 20
            count1 4
            Traceback (most recent call last):
                ... ...
            astrapy.exceptions.TooManyDocumentsToCountException

        Note:
            Count operations are expensive: for this reason, the best practice
            is to provide a reasonable `upper_bound` according to the caller
            expectations. Moreover, indiscriminate usage of count operations
            for sizeable amounts of documents (i.e. in the thousands and more)
            is discouraged in favor of alternative application-specific solutions.
            Keep in mind that the Data API has a hard upper limit on the amount
            of documents it will count, and that an exception will be thrown
            by this method if this limit is encountered.
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        cd_payload = {"countDocuments": {"filter": filter}}
        logger.info(f"countDocuments on '{self.name}'")
        cd_response = await self._converted_request(
            payload=cd_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished countDocuments on '{self.name}'")
        if "count" in cd_response.get("status", {}):
            count: int = cd_response["status"]["count"]
            if cd_response["status"].get("moreData", False):
                raise TooManyDocumentsToCountException(
                    text=f"Document count exceeds {count}, the maximum allowed by the server",
                    server_max_count_exceeded=True,
                )
            else:
                if count > upper_bound:
                    raise TooManyDocumentsToCountException(
                        text="Document count exceeds required upper bound",
                        server_max_count_exceeded=False,
                    )
                else:
                    return count
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from countDocuments API command.",
                raw_response=cd_response,
            )

    async def estimated_document_count(
        self,
        *,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> int:
        """
        Query the API server for an estimate of the document count in the collection.

        Contrary to `count_documents`, this method has no filtering parameters.

        Args:
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a server-provided estimate count of the documents in the collection.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> asyncio.run(my_async_coll.estimated_document_count())
            35700
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        ed_payload: dict[str, Any] = {"estimatedDocumentCount": {}}
        logger.info(f"estimatedDocumentCount on '{self.name}'")
        ed_response = await self._converted_request(
            payload=ed_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished estimatedDocumentCount on '{self.name}'")
        if "count" in ed_response.get("status", {}):
            count: int = ed_response["status"]["count"]
            return count
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from estimatedDocumentCount API command.",
                raw_response=ed_response,
            )

    async def find_one_and_replace(
        self,
        filter: FilterType,
        replacement: DOC,
        *,
        projection: ProjectionType | None = None,
        sort: SortType | None = None,
        upsert: bool = False,
        return_document: str = ReturnDocument.BEFORE,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> DOC | None:
        """
        Find a document on the collection and replace it entirely with a new one,
        optionally inserting a new one if no match is found.

        Args:

            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            replacement: the new document to write into the collection.
            projection: it controls which parts of the document are returned.
                It can be an allow-list: `{"f1": True, "f2": True}`,
                or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
                (except for the `_id` and other special fields, which can be
                associated to both True or False independently of the rest
                of the specification).
                The special star-projections `{"*": True}` and `{"*": False}`
                have the effect of returning the whole document and `{}` respectively.
                For lists in documents, slice directives can be passed to select
                portions of the list: for instance, `{"array": {"$slice": 2}}`,
                `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
                `{"array": {"$slice": [-4, 2]}}`.
                An iterable over strings will be treated implicitly as an allow-list.
                The default projection (used if this parameter is not passed) does not
                necessarily include "special" fields such as `$vector` or `$vectorize`.
                See the Data API documentation for more on projections.
            sort: with this dictionary parameter one can control the sorting
                order of the documents matching the filter, effectively
                determining what document will come first and hence be the
                replaced one. See the `find` method for more on sorting.
                Vector-based ANN sorting is achieved by providing a "$vector"
                or a "$vectorize" key in `sort`.
            upsert: this parameter controls the behavior in absence of matches.
                If True, `replacement` is inserted as a new document
                if no matches are found on the collection. If False,
                the operation silently does nothing in case of no matches.
            return_document: a flag controlling what document is returned:
                if set to `ReturnDocument.BEFORE`, or the string "before",
                the document found on database is returned; if set to
                `ReturnDocument.AFTER`, or the string "after", the new
                document is returned. The default is "before".
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            A document, either the one before the replace operation or the
            one after that. Alternatively, the method returns None to represent
            that no matching document was found, or that no replacement
            was inserted (depending on the `return_document` parameter).

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> async def do_find_one_and_replace(
            ...     acol: AsyncCollection
            ... ) -> None:
            ...     await acol.insert_one(
            ...         {"_id": "rule1", "text": "all animals are equal"}
            ...     )
            ...     result0 = await acol.find_one_and_replace(
            ...         {"_id": "rule1"},
            ...         {"text": "some animals are more equal!"},
            ...     )
            ...     print("result0", result0)
            ...     result1 = await acol.find_one_and_replace(
            ...         {"text": "some animals are more equal!"},
            ...         {"text": "and the pigs are the rulers"},
            ...         return_document=astrapy.constants.ReturnDocument.AFTER,
            ...     )
            ...     print("result1", result1)
            ...     result2 = await acol.find_one_and_replace(
            ...         {"_id": "rule2"},
            ...         {"text": "F=ma^2"},
            ...         return_document=astrapy.constants.ReturnDocument.AFTER,
            ...     )
            ...     print("result2", result2)
            ...     result3 = await acol.find_one_and_replace(
            ...         {"_id": "rule2"},
            ...         {"text": "F=ma"},
            ...         upsert=True,
            ...         return_document=astrapy.constants.ReturnDocument.AFTER,
            ...         projection={"_id": False},
            ...     )
            ...     print("result3", result3)
            ...
            >>> asyncio.run(do_find_one_and_replace(my_async_coll))
            result0 {'_id': 'rule1', 'text': 'all animals are equal'}
            result1 {'_id': 'rule1', 'text': 'and the pigs are the rulers'}
            result2 None
            result3 {'text': 'F=ma'}
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        options = {
            "returnDocument": return_document,
            "upsert": upsert,
        }
        fo_payload = {
            "findOneAndReplace": {
                k: v
                for k, v in {
                    "filter": filter,
                    "projection": normalize_optional_projection(projection),
                    "replacement": replacement,
                    "options": options,
                    "sort": sort,
                }.items()
                if v is not None
            }
        }
        logger.info(f"findOneAndReplace on '{self.name}'")
        fo_response = await self._converted_request(
            payload=fo_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished findOneAndReplace on '{self.name}'")
        if "document" in fo_response.get("data", {}):
            ret_document = fo_response.get("data", {}).get("document")
            if ret_document is None:
                return None
            else:
                return ret_document  # type: ignore[no-any-return]
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from find_one_and_replace API command.",
                raw_response=fo_response,
            )

    async def replace_one(
        self,
        filter: FilterType,
        replacement: DOC,
        *,
        sort: SortType | None = None,
        upsert: bool = False,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> CollectionUpdateResult:
        """
        Replace a single document on the collection with a new one,
        optionally inserting a new one if no match is found.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            replacement: the new document to write into the collection.
            sort: with this dictionary parameter one can control the sorting
                order of the documents matching the filter, effectively
                determining what document will come first and hence be the
                replaced one. See the `find` method for more on sorting.
                Vector-based ANN sorting is achieved by providing a "$vector"
                or a "$vectorize" key in `sort`.
            upsert: this parameter controls the behavior in absence of matches.
                If True, `replacement` is inserted as a new document
                if no matches are found on the collection. If False,
                the operation silently does nothing in case of no matches.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a CollectionUpdateResult object summarizing the outcome of
            the replace operation.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> async def do_replace_one(acol: AsyncCollection) -> None:
            ...     await acol.insert_one({"Marco": "Polo"})
            ...     result0 = await acol.replace_one(
            ...         {"Marco": {"$exists": True}},
            ...         {"Buda": "Pest"},
            ...     )
            ...     print("result0.update_info", result0.update_info)
            ...     doc1 = await acol.find_one({"Buda": "Pest"})
            ...     print("doc1", doc1)
            ...     result1 = await acol.replace_one(
            ...         {"Mirco": {"$exists": True}},
            ...         {"Oh": "yeah?"},
            ...     )
            ...     print("result1.update_info", result1.update_info)
            ...     result2 = await acol.replace_one(
            ...         {"Mirco": {"$exists": True}},
            ...         {"Oh": "yeah?"},
            ...         upsert=True,
            ...     )
            ...     print("result2.update_info", result2.update_info)
            ...
            >>> asyncio.run(do_replace_one(my_async_coll))
            result0.update_info {'n': 1, 'updatedExisting': True, 'ok': 1.0, 'nModified': 1}
            doc1 {'_id': '6e669a5a-...', 'Buda': 'Pest'}
            result1.update_info {'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0}
            result2.update_info {'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '30e34e00-...'}
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        options = {
            "upsert": upsert,
        }
        fo_payload = {
            "findOneAndReplace": {
                k: v
                for k, v in {
                    "filter": filter,
                    "replacement": replacement,
                    "options": options,
                    "sort": sort,
                }.items()
                if v is not None
            }
        }
        logger.info(f"findOneAndReplace on '{self.name}'")
        fo_response = await self._converted_request(
            payload=fo_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished findOneAndReplace on '{self.name}'")
        if "document" in fo_response.get("data", {}):
            fo_status = fo_response.get("status") or {}
            _update_info = _prepare_update_info([fo_status])
            return CollectionUpdateResult(
                raw_results=[fo_response],
                update_info=_update_info,
            )
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from find_one_and_replace API command.",
                raw_response=fo_response,
            )

    async def find_one_and_update(
        self,
        filter: FilterType,
        update: dict[str, Any],
        *,
        projection: ProjectionType | None = None,
        sort: SortType | None = None,
        upsert: bool = False,
        return_document: str = ReturnDocument.BEFORE,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> DOC | None:
        """
        Find a document on the collection and update it as requested,
        optionally inserting a new one if no match is found.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            update: the update prescription to apply to the document, expressed
                as a dictionary as per Data API syntax. Examples are:
                    {"$set": {"field": "value}}
                    {"$inc": {"counter": 10}}
                    {"$unset": {"field": ""}}
                See the Data API documentation for the full syntax.
            projection: it controls which parts of the document are returned.
                It can be an allow-list: `{"f1": True, "f2": True}`,
                or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
                (except for the `_id` and other special fields, which can be
                associated to both True or False independently of the rest
                of the specification).
                The special star-projections `{"*": True}` and `{"*": False}`
                have the effect of returning the whole document and `{}` respectively.
                For lists in documents, slice directives can be passed to select
                portions of the list: for instance, `{"array": {"$slice": 2}}`,
                `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
                `{"array": {"$slice": [-4, 2]}}`.
                An iterable over strings will be treated implicitly as an allow-list.
                The default projection (used if this parameter is not passed) does not
                necessarily include "special" fields such as `$vector` or `$vectorize`.
                See the Data API documentation for more on projections.
            sort: with this dictionary parameter one can control the sorting
                order of the documents matching the filter, effectively
                determining what document will come first and hence be the
                replaced one. See the `find` method for more on sorting.
                Vector-based ANN sorting is achieved by providing a "$vector"
                or a "$vectorize" key in `sort`.
            upsert: this parameter controls the behavior in absence of matches.
                If True, a new document (resulting from applying the `update`
                to an empty document) is inserted if no matches are found on
                the collection. If False, the operation silently does nothing
                in case of no matches.
            return_document: a flag controlling what document is returned:
                if set to `ReturnDocument.BEFORE`, or the string "before",
                the document found on database is returned; if set to
                `ReturnDocument.AFTER`, or the string "after", the new
                document is returned. The default is "before".
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            A document (or a projection thereof, as required), either the one
            before the replace operation or the one after that.
            Alternatively, the method returns None to represent
            that no matching document was found, or that no update
            was applied (depending on the `return_document` parameter).

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> async def do_find_one_and_update(acol: AsyncCollection) -> None:
            ...     await acol.insert_one({"Marco": "Polo"})
            ...     result0 = await acol.find_one_and_update(
            ...         {"Marco": {"$exists": True}},
            ...         {"$set": {"title": "Mr."}},
            ...     )
            ...     print("result0", result0)
            ...     result1 = await acol.find_one_and_update(
            ...         {"title": "Mr."},
            ...         {"$inc": {"rank": 3}},
            ...         projection=["title", "rank"],
            ...         return_document=astrapy.constants.ReturnDocument.AFTER,
            ...     )
            ...     print("result1", result1)
            ...     result2 = await acol.find_one_and_update(
            ...         {"name": "Johnny"},
            ...         {"$set": {"rank": 0}},
            ...         return_document=astrapy.constants.ReturnDocument.AFTER,
            ...     )
            ...     print("result2", result2)
            ...     result3 = await acol.find_one_and_update(
            ...         {"name": "Johnny"},
            ...         {"$set": {"rank": 0}},
            ...         upsert=True,
            ...         return_document=astrapy.constants.ReturnDocument.AFTER,
            ...     )
            ...     print("result3", result3)
            ...
            >>> asyncio.run(do_find_one_and_update(my_async_coll))
            result0 {'_id': 'f7c936d3-b0a0-45eb-a676-e2829662a57c', 'Marco': 'Polo'}
            result1 {'_id': 'f7c936d3-b0a0-45eb-a676-e2829662a57c', 'title': 'Mr.', 'rank': 3}
            result2 None
            result3 {'_id': 'db3d678d-14d4-4caa-82d2-d5fb77dab7ec', 'name': 'Johnny', 'rank': 0}
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        options = {
            "returnDocument": return_document,
            "upsert": upsert,
        }
        fo_payload = {
            "findOneAndUpdate": {
                k: v
                for k, v in {
                    "filter": filter,
                    "update": update,
                    "options": options,
                    "sort": sort,
                    "projection": normalize_optional_projection(projection),
                }.items()
                if v is not None
            }
        }
        logger.info(f"findOneAndUpdate on '{self.name}'")
        fo_response = await self._converted_request(
            payload=fo_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished findOneAndUpdate on '{self.name}'")
        if "document" in fo_response.get("data", {}):
            ret_document = fo_response.get("data", {}).get("document")
            if ret_document is None:
                return None
            else:
                return ret_document  # type: ignore[no-any-return]
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from find_one_and_update API command.",
                raw_response=fo_response,
            )

    async def update_one(
        self,
        filter: FilterType,
        update: dict[str, Any],
        *,
        sort: SortType | None = None,
        upsert: bool = False,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> CollectionUpdateResult:
        """
        Update a single document on the collection as requested,
        optionally inserting a new one if no match is found.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            update: the update prescription to apply to the document, expressed
                as a dictionary as per Data API syntax. Examples are:
                    {"$set": {"field": "value}}
                    {"$inc": {"counter": 10}}
                    {"$unset": {"field": ""}}
                See the Data API documentation for the full syntax.
            sort: with this dictionary parameter one can control the sorting
                order of the documents matching the filter, effectively
                determining what document will come first and hence be the
                replaced one. See the `find` method for more on sorting.
                Vector-based ANN sorting is achieved by providing a "$vector"
                or a "$vectorize" key in `sort`.
            upsert: this parameter controls the behavior in absence of matches.
                If True, a new document (resulting from applying the `update`
                to an empty document) is inserted if no matches are found on
                the collection. If False, the operation silently does nothing
                in case of no matches.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a CollectionUpdateResult object summarizing the outcome of
            the update operation.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> async def do_update_one(acol: AsyncCollection) -> None:
            ...     await acol.insert_one({"Marco": "Polo"})
            ...     result0 = await acol.update_one(
            ...         {"Marco": {"$exists": True}},
            ...         {"$inc": {"rank": 3}},
            ...     )
            ...     print("result0.update_info", result0.update_info)
            ...     result1 = await acol.update_one(
            ...         {"Mirko": {"$exists": True}},
            ...         {"$inc": {"rank": 3}},
            ...     )
            ...     print("result1.update_info", result1.update_info)
            ...     result2 = await acol.update_one(
            ...         {"Mirko": {"$exists": True}},
            ...         {"$inc": {"rank": 3}},
            ...         upsert=True,
            ...     )
            ...     print("result2.update_info", result2.update_info)
            ...
            >>> asyncio.run(do_update_one(my_async_coll))
            result0.update_info {'n': 1, 'updatedExisting': True, 'ok': 1.0, 'nModified': 1})
            result1.update_info {'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0})
            result2.update_info {'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '75748092-...'}
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        options = {
            "upsert": upsert,
        }
        uo_payload = {
            "updateOne": {
                k: v
                for k, v in {
                    "filter": filter,
                    "update": update,
                    "options": options,
                    "sort": sort,
                }.items()
                if v is not None
            }
        }
        logger.info(f"updateOne on '{self.name}'")
        uo_response = await self._converted_request(
            payload=uo_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished updateOne on '{self.name}'")
        if "status" in uo_response:
            uo_status = uo_response["status"]
            _update_info = _prepare_update_info([uo_status])
            return CollectionUpdateResult(
                raw_results=[uo_response],
                update_info=_update_info,
            )
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from updateOne API command.",
                raw_response=uo_response,
            )

    async def update_many(
        self,
        filter: FilterType,
        update: dict[str, Any],
        *,
        upsert: bool = False,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> CollectionUpdateResult:
        """
        Apply an update operation to all documents matching a condition,
        optionally inserting one documents in absence of matches.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            update: the update prescription to apply to the documents, expressed
                as a dictionary as per Data API syntax. Examples are:
                    {"$set": {"field": "value}}
                    {"$inc": {"counter": 10}}
                    {"$unset": {"field": ""}}
                See the Data API documentation for the full syntax.
            upsert: this parameter controls the behavior in absence of matches.
                If True, a single new document (resulting from applying `update`
                to an empty document) is inserted if no matches are found on
                the collection. If False, the operation silently does nothing
                in case of no matches.
            general_method_timeout_ms: a timeout, in milliseconds, for the whole
                requested operation (which may involve multiple API requests).
                This method may entail successive HTTP API requests,
                depending on the amount of involved documents.
                If not passed, the collection-level setting is used instead.
            request_timeout_ms: a timeout, in milliseconds, for each API request.
                If not passed, the collection-level setting is used instead.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a CollectionUpdateResult object summarizing the outcome of
            the update operation.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> async def do_update_many(acol: AsyncCollection) -> None:
            ...     await acol.insert_many([{"c": "red"}, {"c": "green"}, {"c": "blue"}])
            ...     result0 = await acol.update_many(
            ...         {"c": {"$ne": "green"}},
            ...         {"$set": {"nongreen": True}},
            ...     )
            ...     print("result0.update_info", result0.update_info)
            ...     result1 = await acol.update_many(
            ...         {"c": "orange"},
            ...         {"$set": {"is_also_fruit": True}},
            ...     )
            ...     print("result1.update_info", result1.update_info)
            ...     result2 = await acol.update_many(
            ...         {"c": "orange"},
            ...         {"$set": {"is_also_fruit": True}},
            ...         upsert=True,
            ...     )
            ...     print("result2.update_info", result2.update_info)
            ...
            >>> asyncio.run(do_update_many(my_async_coll))
            result0.update_info {'n': 2, 'updatedExisting': True, 'ok': 1.0, 'nModified': 2}
            result1.update_info {'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0}
            result2.update_info {'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '79ffd5a3-ab99-4dff-a2a5-4aaa0e59e854'}

        Note:
            Similarly to the case of `find` (see its docstring for more details),
            running this command while, at the same time, another process is
            inserting new documents which match the filter of the `update_many`
            can result in an unpredictable fraction of these documents being updated.
            In other words, it cannot be easily predicted whether a given
            newly-inserted document will be picked up by the update_many command or not.
        """

        _general_method_timeout_ms, _gmt_label = _first_valid_timeout(
            (general_method_timeout_ms, "general_method_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (
                self.api_options.timeout_options.general_method_timeout_ms,
                "general_method_timeout_ms",
            ),
        )
        _request_timeout_ms, _rt_label = _first_valid_timeout(
            (request_timeout_ms, "request_timeout_ms"),
            (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
        )
        api_options = {
            "upsert": upsert,
        }
        page_state_options: dict[str, str] = {}
        um_responses: list[dict[str, Any]] = []
        um_statuses: list[dict[str, Any]] = []
        must_proceed = True
        logger.info(f"starting update_many on '{self.name}'")
        timeout_manager = MultiCallTimeoutManager(
            overall_timeout_ms=_general_method_timeout_ms,
            timeout_label=_gmt_label,
        )
        while must_proceed:
            options = {**api_options, **page_state_options}
            this_um_payload = {
                "updateMany": {
                    k: v
                    for k, v in {
                        "filter": filter,
                        "update": update,
                        "options": options,
                    }.items()
                    if v is not None
                }
            }
            logger.info(f"updateMany on '{self.name}'")
            this_um_response = await self._converted_request(
                payload=this_um_payload,
                timeout_context=timeout_manager.remaining_timeout(
                    cap_time_ms=_request_timeout_ms,
                    cap_timeout_label=_rt_label,
                ),
            )
            logger.info(f"finished updateMany on '{self.name}'")
            this_um_status = this_um_response.get("status") or {}
            #
            # if errors, quit early
            if this_um_response.get("errors", []):
                partial_update_info = _prepare_update_info(um_statuses)
                partial_result = CollectionUpdateResult(
                    raw_results=um_responses,
                    update_info=partial_update_info,
                )
                all_um_responses = um_responses + [this_um_response]
                raise CollectionUpdateManyException.from_responses(
                    commands=[None for _ in all_um_responses],
                    raw_responses=all_um_responses,
                    partial_result=partial_result,
                )
            else:
                if "status" not in this_um_response:
                    raise UnexpectedDataAPIResponseException(
                        text="Faulty response from update_many API command.",
                        raw_response=this_um_response,
                    )
                um_responses.append(this_um_response)
                um_statuses.append(this_um_status)
                next_page_state = this_um_status.get("nextPageState")
                if next_page_state is not None:
                    must_proceed = True
                    page_state_options = {"pageState": next_page_state}
                else:
                    must_proceed = False
                    page_state_options = {}

        update_info = _prepare_update_info(um_statuses)
        logger.info(f"finished update_many on '{self.name}'")
        return CollectionUpdateResult(
            raw_results=um_responses,
            update_info=update_info,
        )

    async def find_one_and_delete(
        self,
        filter: FilterType,
        *,
        projection: ProjectionType | None = None,
        sort: SortType | None = None,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> DOC | None:
        """
        Find a document in the collection and delete it. The deleted document,
        however, is the return value of the method.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            projection: it controls which parts of the document are returned.
                It can be an allow-list: `{"f1": True, "f2": True}`,
                or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
                (except for the `_id` and other special fields, which can be
                associated to both True or False independently of the rest
                of the specification).
                The special star-projections `{"*": True}` and `{"*": False}`
                have the effect of returning the whole document and `{}` respectively.
                For lists in documents, slice directives can be passed to select
                portions of the list: for instance, `{"array": {"$slice": 2}}`,
                `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
                `{"array": {"$slice": [-4, 2]}}`.
                An iterable over strings will be treated implicitly as an allow-list.
                The default projection (used if this parameter is not passed) does not
                necessarily include "special" fields such as `$vector` or `$vectorize`.
                See the Data API documentation for more on projections.
            sort: with this dictionary parameter one can control the sorting
                order of the documents matching the filter, effectively
                determining what document will come first and hence be the
                replaced one. See the `find` method for more on sorting.
                Vector-based ANN sorting is achieved by providing a "$vector"
                or a "$vectorize" key in `sort`.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            Either the document (or a projection thereof, as requested), or None
            if no matches were found in the first place.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> async def do_find_one_and_delete(acol: AsyncCollection) -> None:
            ...     await acol.insert_many(
            ...         [
            ...             {"species": "swan", "class": "Aves"},
            ...             {"species": "frog", "class": "Amphibia"},
            ...         ],
            ...     )
            ...     delete_result0 = await acol.find_one_and_delete(
            ...         {"species": {"$ne": "frog"}},
            ...         projection=["species"],
            ...     )
            ...     print("delete_result0", delete_result0)
            ...     delete_result1 = await acol.find_one_and_delete(
            ...         {"species": {"$ne": "frog"}},
            ...     )
            ...     print("delete_result1", delete_result1)
            ...
            >>> asyncio.run(do_find_one_and_delete(my_async_coll))
            delete_result0 {'_id': 'f335cd0f-...', 'species': 'swan'}
            delete_result1 None
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        _projection = normalize_optional_projection(projection)
        fo_payload = {
            "findOneAndDelete": {
                k: v
                for k, v in {
                    "filter": filter,
                    "sort": sort,
                    "projection": _projection,
                }.items()
                if v is not None
            }
        }
        logger.info(f"findOneAndDelete on '{self.name}'")
        fo_response = await self._converted_request(
            payload=fo_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished findOneAndDelete on '{self.name}'")
        if "document" in fo_response.get("data", {}):
            document = fo_response["data"]["document"]
            return document  # type: ignore[no-any-return]
        else:
            deleted_count = fo_response.get("status", {}).get("deletedCount")
            if deleted_count == 0:
                return None
            else:
                raise UnexpectedDataAPIResponseException(
                    text="Faulty response from find_one_and_delete API command.",
                    raw_response=fo_response,
                )

    async def delete_one(
        self,
        filter: FilterType,
        *,
        sort: SortType | None = None,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> CollectionDeleteResult:
        """
        Delete one document matching a provided filter.
        This method never deletes more than a single document, regardless
        of the number of matches to the provided filters.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            sort: with this dictionary parameter one can control the sorting
                order of the documents matching the filter, effectively
                determining what document will come first and hence be the
                replaced one. See the `find` method for more on sorting.
                Vector-based ANN sorting is achieved by providing a "$vector"
                or a "$vectorize" key in `sort`.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a CollectionDeleteResult object summarizing the outcome of the
            delete operation.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> asyncio.run(my_async_coll.insert_many(
            ...     [{"seq": 1}, {"seq": 0}, {"seq": 2}]
            ... ))
            CollectionInsertManyResult(...)
            >>> asyncio.run(my_async_coll.delete_one({"seq": 1}))
            CollectionDeleteResult(raw_results=..., deleted_count=1)
            >>> asyncio.run(my_async_coll.distinct("seq"))
            [0, 2]
            >>> asyncio.run(my_async_coll.delete_one(
            ...     {"seq": {"$exists": True}},
            ...     sort={"seq": astrapy.constants.SortMode.DESCENDING},
            ... ))
            CollectionDeleteResult(raw_results=..., deleted_count=1)
            >>> asyncio.run(my_async_coll.distinct("seq"))
            [0]
            >>> asyncio.run(my_async_coll.delete_one({"seq": 2}))
            CollectionDeleteResult(raw_results=..., deleted_count=0)
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        do_payload = {
            "deleteOne": {
                k: v
                for k, v in {
                    "filter": filter,
                    "sort": sort,
                }.items()
                if v is not None
            }
        }
        logger.info(f"deleteOne on '{self.name}'")
        do_response = await self._converted_request(
            payload=do_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished deleteOne on '{self.name}'")
        if "deletedCount" in do_response.get("status", {}):
            deleted_count = do_response["status"]["deletedCount"]
            return CollectionDeleteResult(
                deleted_count=deleted_count,
                raw_results=[do_response],
            )
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from delete_one API command.",
                raw_response=do_response,
            )

    async def delete_many(
        self,
        filter: FilterType,
        *,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> CollectionDeleteResult:
        """
        Delete all documents matching a provided filter.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
                Passing an empty filter, `{}`, completely erases all contents
                of the collection.
            general_method_timeout_ms: a timeout, in milliseconds, for the whole
                requested operation (which may involve multiple API requests).
                This method may entail successive HTTP API requests,
                depending on the amount of involved documents.
                If not passed, the collection-level setting is used instead.
            request_timeout_ms: a timeout, in milliseconds, for each API request.
                If not passed, the collection-level setting is used instead.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a CollectionDeleteResult object summarizing the outcome of the
            delete operation.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> async def do_delete_many(acol: AsyncCollection) -> None:
            ...     await acol.insert_many([{"seq": 1}, {"seq": 0}, {"seq": 2}])
            ...     delete_result0 = await acol.delete_many({"seq": {"$lte": 1}})
            ...     print("delete_result0.deleted_count", delete_result0.deleted_count)
            ...     distinct1 = await acol.distinct("seq")
            ...     print("distinct1", distinct1)
            ...     delete_result2 = await acol.delete_many({"seq": {"$lte": 1}})
            ...     print("delete_result2.deleted_count", delete_result2.deleted_count)
            ...
            >>> asyncio.run(do_delete_many(my_async_coll))
            delete_result0.deleted_count 2
            distinct1 [2]
            delete_result2.deleted_count 0

        Note:
            This operation is in general not atomic. Depending on the amount
            of matching documents, it can keep running (in a blocking way)
            for a macroscopic time. In that case, new documents that are
            meanwhile inserted (e.g. from another process/application) will be
            deleted during the execution of this method call until the
            collection is devoid of matches.
            An exception is the `filter={}` case, whereby the operation is atomic.
        """

        _general_method_timeout_ms, _gmt_label = _first_valid_timeout(
            (general_method_timeout_ms, "general_method_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (
                self.api_options.timeout_options.general_method_timeout_ms,
                "general_method_timeout_ms",
            ),
        )
        _request_timeout_ms, _rt_label = _first_valid_timeout(
            (request_timeout_ms, "request_timeout_ms"),
            (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
        )
        dm_responses: list[dict[str, Any]] = []
        deleted_count = 0
        must_proceed = True
        timeout_manager = MultiCallTimeoutManager(
            overall_timeout_ms=_general_method_timeout_ms,
            timeout_label=_gmt_label,
        )
        this_dm_payload = {"deleteMany": {"filter": filter}}
        logger.info(f"starting delete_many on '{self.name}'")
        while must_proceed:
            logger.info(f"deleteMany on '{self.name}'")
            this_dm_response = await self._converted_request(
                payload=this_dm_payload,
                raise_api_errors=False,
                timeout_context=timeout_manager.remaining_timeout(
                    cap_time_ms=_request_timeout_ms,
                    cap_timeout_label=_rt_label,
                ),
            )
            logger.info(f"finished deleteMany on '{self.name}'")
            # if errors, quit early
            if this_dm_response.get("errors", []):
                partial_result = CollectionDeleteResult(
                    deleted_count=deleted_count,
                    raw_results=dm_responses,
                )
                all_dm_responses = dm_responses + [this_dm_response]
                raise CollectionDeleteManyException.from_responses(
                    commands=[None for _ in all_dm_responses],
                    raw_responses=all_dm_responses,
                    partial_result=partial_result,
                )
            else:
                this_dc = this_dm_response.get("status", {}).get("deletedCount")
                if this_dc is None:
                    raise UnexpectedDataAPIResponseException(
                        text="Faulty response from delete_many API command.",
                        raw_response=this_dm_response,
                    )
                dm_responses.append(this_dm_response)
                deleted_count += this_dc
                must_proceed = this_dm_response.get("status", {}).get("moreData", False)

        logger.info(f"finished delete_many on '{self.name}'")
        return CollectionDeleteResult(
            deleted_count=deleted_count,
            raw_results=dm_responses,
        )

    async def drop(
        self,
        *,
        collection_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Drop the collection, i.e. delete it from the database along with
        all the documents it contains.

        Args:
            collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `collection_admin_timeout_ms`.
            timeout_ms: an alias for `collection_admin_timeout_ms`.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> async def drop_and_check(acol: AsyncCollection) -> None:
            ...     doc0 = await acol.find_one({})
            ...     print("doc0", doc0)
            ...     await acol.drop()
            ...     doc1 = await acol.find_one({})
            ...
            >>> asyncio.run(drop_and_check(my_async_coll))
            doc0 {'_id': '...', 'z': -10}
            Traceback (most recent call last):
                ... ...
            astrapy.exceptions.DataAPIResponseException: Collection does not exist, ...

        Note:
            Use with caution.

        Note:
            Once the method succeeds, methods on this object can still be invoked:
            however, this hardly makes sense as the underlying actual collection
            is no more.
            It is responsibility of the developer to design a correct flow
            which avoids using a deceased collection any further.
        """

        logger.info(f"dropping collection '{self.name}' (self)")
        await self.database.drop_collection(
            self.name,
            collection_admin_timeout_ms=collection_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info(f"finished dropping collection '{self.name}' (self)")

    async def command(
        self,
        body: dict[str, Any] | None,
        *,
        raise_api_errors: bool = True,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> dict[str, Any]:
        """
        Send a POST request to the Data API for this collection with
        an arbitrary, caller-provided payload.
        No transformations or type conversions are made on the provided payload.

        Args:
            body: a JSON-serializable dictionary, the payload of the request.
            raise_api_errors: if True, responses with a nonempty 'errors' field
                result in an astrapy exception being raised.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a dictionary with the response of the HTTP request.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> asyncio.await(my_async_coll.command({"countDocuments": {}}))
            {'status': {'count': 123}}
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        _cmd_desc: str
        if body:
            _cmd_desc = ",".join(sorted(body.keys()))
        else:
            _cmd_desc = "(none)"
        logger.info(f"command={_cmd_desc} on '{self.name}'")
        command_result = await self._api_commander.async_request(
            payload=body,
            raise_api_errors=raise_api_errors,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished command={_cmd_desc} on '{self.name}'")
        return command_result

Ancestors

  • typing.Generic

Instance variables

var databaseAsyncDatabase

a Database object, the database this collection belongs to.

Example

>>> my_async_coll.database.name
'the_db'
Expand source code
@property
def database(self) -> AsyncDatabase:
    """
    a Database object, the database this collection belongs to.

    Example:
        >>> my_async_coll.database.name
        'the_db'
    """

    return self._database
var full_name : str

The fully-qualified collection name within the database, in the form "keyspace.collection_name".

Example

>>> my_async_coll.full_name
'default_keyspace.my_v_collection'
Expand source code
@property
def full_name(self) -> str:
    """
    The fully-qualified collection name within the database,
    in the form "keyspace.collection_name".

    Example:
        >>> my_async_coll.full_name
        'default_keyspace.my_v_collection'
    """

    return f"{self.keyspace}.{self.name}"
var keyspace : str

The keyspace this collection is in.

Example

>>> my_async_coll.keyspace
'default_keyspace'
Expand source code
@property
def keyspace(self) -> str:
    """
    The keyspace this collection is in.

    Example:
        >>> my_async_coll.keyspace
        'default_keyspace'
    """

    _keyspace = self.database.keyspace
    if _keyspace is None:
        raise ValueError("The collection's DB is set with keyspace=None")
    return _keyspace
var name : str

The name of this collection.

Example

>>> my_async_coll.name
'my_v_collection'
Expand source code
@property
def name(self) -> str:
    """
    The name of this collection.

    Example:
        >>> my_async_coll.name
        'my_v_collection'
    """

    return self._name

Methods

async def command(self, body: dict[str, Any] | None, *, raise_api_errors: bool = True, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> dict[str, typing.Any]

Send a POST request to the Data API for this collection with an arbitrary, caller-provided payload. No transformations or type conversions are made on the provided payload.

Args

body
a JSON-serializable dictionary, the payload of the request.
raise_api_errors
if True, responses with a nonempty 'errors' field result in an astrapy exception being raised.
general_method_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for general_method_timeout_ms.
timeout_ms
an alias for general_method_timeout_ms.

Returns

a dictionary with the response of the HTTP request.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> asyncio.await(my_async_coll.command({"countDocuments": {}}))
{'status': {'count': 123}}
Expand source code
async def command(
    self,
    body: dict[str, Any] | None,
    *,
    raise_api_errors: bool = True,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> dict[str, Any]:
    """
    Send a POST request to the Data API for this collection with
    an arbitrary, caller-provided payload.
    No transformations or type conversions are made on the provided payload.

    Args:
        body: a JSON-serializable dictionary, the payload of the request.
        raise_api_errors: if True, responses with a nonempty 'errors' field
            result in an astrapy exception being raised.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a dictionary with the response of the HTTP request.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> asyncio.await(my_async_coll.command({"countDocuments": {}}))
        {'status': {'count': 123}}
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    _cmd_desc: str
    if body:
        _cmd_desc = ",".join(sorted(body.keys()))
    else:
        _cmd_desc = "(none)"
    logger.info(f"command={_cmd_desc} on '{self.name}'")
    command_result = await self._api_commander.async_request(
        payload=body,
        raise_api_errors=raise_api_errors,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished command={_cmd_desc} on '{self.name}'")
    return command_result
async def count_documents(self, filter: FilterType, *, upper_bound: int, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> int

Count the documents in the collection matching the specified filter.

Args

filter
a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
upper_bound
a required ceiling on the result of the count operation. If the actual number of documents exceeds this value, an exception will be raised. Furthermore, if the actual number of documents exceeds the maximum count that the Data API can reach (regardless of upper_bound), an exception will be raised.
general_method_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for general_method_timeout_ms.
timeout_ms
an alias for general_method_timeout_ms.

Returns

the exact count of matching documents.

Example

>>> async def do_count_docs(acol: AsyncCollection) -> None:
...     await acol.insert_many([{"seq": i} for i in range(20)])
...     count0 = await acol.count_documents({}, upper_bound=100)
...     print("count0", count0)
...     count1 = await acol.count_documents(
...         {"seq":{"$gt": 15}}, upper_bound=100
...     )
...     print("count1", count1)
...     count2 = await acol.count_documents({}, upper_bound=10)
...     print("count2", count2)
...
>>> asyncio.run(do_count_docs(my_async_coll))
count0 20
count1 4
Traceback (most recent call last):
    ... ...
astrapy.exceptions.TooManyDocumentsToCountException

Note

Count operations are expensive: for this reason, the best practice is to provide a reasonable upper_bound according to the caller expectations. Moreover, indiscriminate usage of count operations for sizeable amounts of documents (i.e. in the thousands and more) is discouraged in favor of alternative application-specific solutions. Keep in mind that the Data API has a hard upper limit on the amount of documents it will count, and that an exception will be thrown by this method if this limit is encountered.

Expand source code
async def count_documents(
    self,
    filter: FilterType,
    *,
    upper_bound: int,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> int:
    """
    Count the documents in the collection matching the specified filter.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        upper_bound: a required ceiling on the result of the count operation.
            If the actual number of documents exceeds this value,
            an exception will be raised.
            Furthermore, if the actual number of documents exceeds the maximum
            count that the Data API can reach (regardless of upper_bound),
            an exception will be raised.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        the exact count of matching documents.

    Example:
        >>> async def do_count_docs(acol: AsyncCollection) -> None:
        ...     await acol.insert_many([{"seq": i} for i in range(20)])
        ...     count0 = await acol.count_documents({}, upper_bound=100)
        ...     print("count0", count0)
        ...     count1 = await acol.count_documents(
        ...         {"seq":{"$gt": 15}}, upper_bound=100
        ...     )
        ...     print("count1", count1)
        ...     count2 = await acol.count_documents({}, upper_bound=10)
        ...     print("count2", count2)
        ...
        >>> asyncio.run(do_count_docs(my_async_coll))
        count0 20
        count1 4
        Traceback (most recent call last):
            ... ...
        astrapy.exceptions.TooManyDocumentsToCountException

    Note:
        Count operations are expensive: for this reason, the best practice
        is to provide a reasonable `upper_bound` according to the caller
        expectations. Moreover, indiscriminate usage of count operations
        for sizeable amounts of documents (i.e. in the thousands and more)
        is discouraged in favor of alternative application-specific solutions.
        Keep in mind that the Data API has a hard upper limit on the amount
        of documents it will count, and that an exception will be thrown
        by this method if this limit is encountered.
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    cd_payload = {"countDocuments": {"filter": filter}}
    logger.info(f"countDocuments on '{self.name}'")
    cd_response = await self._converted_request(
        payload=cd_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished countDocuments on '{self.name}'")
    if "count" in cd_response.get("status", {}):
        count: int = cd_response["status"]["count"]
        if cd_response["status"].get("moreData", False):
            raise TooManyDocumentsToCountException(
                text=f"Document count exceeds {count}, the maximum allowed by the server",
                server_max_count_exceeded=True,
            )
        else:
            if count > upper_bound:
                raise TooManyDocumentsToCountException(
                    text="Document count exceeds required upper bound",
                    server_max_count_exceeded=False,
                )
            else:
                return count
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from countDocuments API command.",
            raw_response=cd_response,
        )
async def delete_many(self, filter: FilterType, *, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> CollectionDeleteResult

Delete all documents matching a provided filter.

Args

filter
a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators. Passing an empty filter, {}, completely erases all contents of the collection.
general_method_timeout_ms
a timeout, in milliseconds, for the whole requested operation (which may involve multiple API requests). This method may entail successive HTTP API requests, depending on the amount of involved documents. If not passed, the collection-level setting is used instead.
request_timeout_ms
a timeout, in milliseconds, for each API request. If not passed, the collection-level setting is used instead.
timeout_ms
an alias for general_method_timeout_ms.

Returns

a CollectionDeleteResult object summarizing the outcome of the delete operation.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> async def do_delete_many(acol: AsyncCollection) -> None:
...     await acol.insert_many([{"seq": 1}, {"seq": 0}, {"seq": 2}])
...     delete_result0 = await acol.delete_many({"seq": {"$lte": 1}})
...     print("delete_result0.deleted_count", delete_result0.deleted_count)
...     distinct1 = await acol.distinct("seq")
...     print("distinct1", distinct1)
...     delete_result2 = await acol.delete_many({"seq": {"$lte": 1}})
...     print("delete_result2.deleted_count", delete_result2.deleted_count)
...
>>> asyncio.run(do_delete_many(my_async_coll))
delete_result0.deleted_count 2
distinct1 [2]
delete_result2.deleted_count 0

Note

This operation is in general not atomic. Depending on the amount of matching documents, it can keep running (in a blocking way) for a macroscopic time. In that case, new documents that are meanwhile inserted (e.g. from another process/application) will be deleted during the execution of this method call until the collection is devoid of matches. An exception is the filter={} case, whereby the operation is atomic.

Expand source code
async def delete_many(
    self,
    filter: FilterType,
    *,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> CollectionDeleteResult:
    """
    Delete all documents matching a provided filter.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
            Passing an empty filter, `{}`, completely erases all contents
            of the collection.
        general_method_timeout_ms: a timeout, in milliseconds, for the whole
            requested operation (which may involve multiple API requests).
            This method may entail successive HTTP API requests,
            depending on the amount of involved documents.
            If not passed, the collection-level setting is used instead.
        request_timeout_ms: a timeout, in milliseconds, for each API request.
            If not passed, the collection-level setting is used instead.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a CollectionDeleteResult object summarizing the outcome of the
        delete operation.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> async def do_delete_many(acol: AsyncCollection) -> None:
        ...     await acol.insert_many([{"seq": 1}, {"seq": 0}, {"seq": 2}])
        ...     delete_result0 = await acol.delete_many({"seq": {"$lte": 1}})
        ...     print("delete_result0.deleted_count", delete_result0.deleted_count)
        ...     distinct1 = await acol.distinct("seq")
        ...     print("distinct1", distinct1)
        ...     delete_result2 = await acol.delete_many({"seq": {"$lte": 1}})
        ...     print("delete_result2.deleted_count", delete_result2.deleted_count)
        ...
        >>> asyncio.run(do_delete_many(my_async_coll))
        delete_result0.deleted_count 2
        distinct1 [2]
        delete_result2.deleted_count 0

    Note:
        This operation is in general not atomic. Depending on the amount
        of matching documents, it can keep running (in a blocking way)
        for a macroscopic time. In that case, new documents that are
        meanwhile inserted (e.g. from another process/application) will be
        deleted during the execution of this method call until the
        collection is devoid of matches.
        An exception is the `filter={}` case, whereby the operation is atomic.
    """

    _general_method_timeout_ms, _gmt_label = _first_valid_timeout(
        (general_method_timeout_ms, "general_method_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (
            self.api_options.timeout_options.general_method_timeout_ms,
            "general_method_timeout_ms",
        ),
    )
    _request_timeout_ms, _rt_label = _first_valid_timeout(
        (request_timeout_ms, "request_timeout_ms"),
        (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
    )
    dm_responses: list[dict[str, Any]] = []
    deleted_count = 0
    must_proceed = True
    timeout_manager = MultiCallTimeoutManager(
        overall_timeout_ms=_general_method_timeout_ms,
        timeout_label=_gmt_label,
    )
    this_dm_payload = {"deleteMany": {"filter": filter}}
    logger.info(f"starting delete_many on '{self.name}'")
    while must_proceed:
        logger.info(f"deleteMany on '{self.name}'")
        this_dm_response = await self._converted_request(
            payload=this_dm_payload,
            raise_api_errors=False,
            timeout_context=timeout_manager.remaining_timeout(
                cap_time_ms=_request_timeout_ms,
                cap_timeout_label=_rt_label,
            ),
        )
        logger.info(f"finished deleteMany on '{self.name}'")
        # if errors, quit early
        if this_dm_response.get("errors", []):
            partial_result = CollectionDeleteResult(
                deleted_count=deleted_count,
                raw_results=dm_responses,
            )
            all_dm_responses = dm_responses + [this_dm_response]
            raise CollectionDeleteManyException.from_responses(
                commands=[None for _ in all_dm_responses],
                raw_responses=all_dm_responses,
                partial_result=partial_result,
            )
        else:
            this_dc = this_dm_response.get("status", {}).get("deletedCount")
            if this_dc is None:
                raise UnexpectedDataAPIResponseException(
                    text="Faulty response from delete_many API command.",
                    raw_response=this_dm_response,
                )
            dm_responses.append(this_dm_response)
            deleted_count += this_dc
            must_proceed = this_dm_response.get("status", {}).get("moreData", False)

    logger.info(f"finished delete_many on '{self.name}'")
    return CollectionDeleteResult(
        deleted_count=deleted_count,
        raw_results=dm_responses,
    )
async def delete_one(self, filter: FilterType, *, sort: SortType | None = None, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> CollectionDeleteResult

Delete one document matching a provided filter. This method never deletes more than a single document, regardless of the number of matches to the provided filters.

Args

filter
a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
sort
with this dictionary parameter one can control the sorting order of the documents matching the filter, effectively determining what document will come first and hence be the replaced one. See the find method for more on sorting. Vector-based ANN sorting is achieved by providing a "$vector" or a "$vectorize" key in sort.
general_method_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for general_method_timeout_ms.
timeout_ms
an alias for general_method_timeout_ms.

Returns

a CollectionDeleteResult object summarizing the outcome of the delete operation.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> asyncio.run(my_async_coll.insert_many(
...     [{"seq": 1}, {"seq": 0}, {"seq": 2}]
... ))
CollectionInsertManyResult(...)
>>> asyncio.run(my_async_coll.delete_one({"seq": 1}))
CollectionDeleteResult(raw_results=..., deleted_count=1)
>>> asyncio.run(my_async_coll.distinct("seq"))
[0, 2]
>>> asyncio.run(my_async_coll.delete_one(
...     {"seq": {"$exists": True}},
...     sort={"seq": astrapy.constants.SortMode.DESCENDING},
... ))
CollectionDeleteResult(raw_results=..., deleted_count=1)
>>> asyncio.run(my_async_coll.distinct("seq"))
[0]
>>> asyncio.run(my_async_coll.delete_one({"seq": 2}))
CollectionDeleteResult(raw_results=..., deleted_count=0)
Expand source code
async def delete_one(
    self,
    filter: FilterType,
    *,
    sort: SortType | None = None,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> CollectionDeleteResult:
    """
    Delete one document matching a provided filter.
    This method never deletes more than a single document, regardless
    of the number of matches to the provided filters.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        sort: with this dictionary parameter one can control the sorting
            order of the documents matching the filter, effectively
            determining what document will come first and hence be the
            replaced one. See the `find` method for more on sorting.
            Vector-based ANN sorting is achieved by providing a "$vector"
            or a "$vectorize" key in `sort`.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a CollectionDeleteResult object summarizing the outcome of the
        delete operation.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> asyncio.run(my_async_coll.insert_many(
        ...     [{"seq": 1}, {"seq": 0}, {"seq": 2}]
        ... ))
        CollectionInsertManyResult(...)
        >>> asyncio.run(my_async_coll.delete_one({"seq": 1}))
        CollectionDeleteResult(raw_results=..., deleted_count=1)
        >>> asyncio.run(my_async_coll.distinct("seq"))
        [0, 2]
        >>> asyncio.run(my_async_coll.delete_one(
        ...     {"seq": {"$exists": True}},
        ...     sort={"seq": astrapy.constants.SortMode.DESCENDING},
        ... ))
        CollectionDeleteResult(raw_results=..., deleted_count=1)
        >>> asyncio.run(my_async_coll.distinct("seq"))
        [0]
        >>> asyncio.run(my_async_coll.delete_one({"seq": 2}))
        CollectionDeleteResult(raw_results=..., deleted_count=0)
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    do_payload = {
        "deleteOne": {
            k: v
            for k, v in {
                "filter": filter,
                "sort": sort,
            }.items()
            if v is not None
        }
    }
    logger.info(f"deleteOne on '{self.name}'")
    do_response = await self._converted_request(
        payload=do_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished deleteOne on '{self.name}'")
    if "deletedCount" in do_response.get("status", {}):
        deleted_count = do_response["status"]["deletedCount"]
        return CollectionDeleteResult(
            deleted_count=deleted_count,
            raw_results=[do_response],
        )
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from delete_one API command.",
            raw_response=do_response,
        )
async def distinct(self, key: str, *, filter: FilterType | None = None, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> list[typing.Any]

Return a list of the unique values of key across the documents in the collection that match the provided filter.

Args

key
the name of the field whose value is inspected across documents. Keys can use dot-notation to descend to deeper document levels. Example of acceptable key values: "field" "field.subfield" "field.3" "field.3.subfield" If lists are encountered and no numeric index is specified, all items in the list are visited.
filter
a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
general_method_timeout_ms
a timeout, in milliseconds, for the whole requested operation (which may involve multiple API requests). This method, being based on find (see) may entail successive HTTP API requests, depending on the amount of involved documents.
request_timeout_ms
a timeout, in milliseconds, for each API request.
timeout_ms
an alias for general_method_timeout_ms.

Returns

a list of all different values for key found across the documents that match the filter. The result list has no repeated items.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> async def run_distinct(acol: AsyncCollection) -> None:
...     await acol.insert_many(
...         [
...             {"name": "Marco", "food": ["apple", "orange"], "city": "Helsinki"},
...             {"name": "Emma", "food": {"likes_fruit": True, "allergies": []}},
...         ]
...     )
...     distinct0 = await acol.distinct("name")
...     print("distinct('name')", distinct0)
...     distinct1 = await acol.distinct("city")
...     print("distinct('city')", distinct1)
...     distinct2 = await acol.distinct("food")
...     print("distinct('food')", distinct2)
...     distinct3 = await acol.distinct("food.1")
...     print("distinct('food.1')", distinct3)
...     distinct4 = await acol.distinct("food.allergies")
...     print("distinct('food.allergies')", distinct4)
...     distinct5 = await acol.distinct("food.likes_fruit")
...     print("distinct('food.likes_fruit')", distinct5)
...
>>> asyncio.run(run_distinct(my_async_coll))
distinct('name') ['Emma', 'Marco']
distinct('city') ['Helsinki']
distinct('food') [{'likes_fruit': True, 'allergies': []}, 'apple', 'orange']
distinct('food.1') ['orange']
distinct('food.allergies') []
distinct('food.likes_fruit') [True]

Note

It must be kept in mind that distinct is a client-side operation, which effectively browses all required documents using the logic of the find method and collects the unique values found for key. As such, there may be performance, latency and ultimately billing implications if the amount of matching documents is large.

Note

For details on the behaviour of "distinct" in conjunction with real-time changes in the collection contents, see the Note of the find command.

Expand source code
async def distinct(
    self,
    key: str,
    *,
    filter: FilterType | None = None,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> list[Any]:
    """
    Return a list of the unique values of `key` across the documents
    in the collection that match the provided filter.

    Args:
        key: the name of the field whose value is inspected across documents.
            Keys can use dot-notation to descend to deeper document levels.
            Example of acceptable `key` values:
                "field"
                "field.subfield"
                "field.3"
                "field.3.subfield"
            If lists are encountered and no numeric index is specified,
            all items in the list are visited.
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        general_method_timeout_ms: a timeout, in milliseconds, for the whole
            requested operation (which may involve multiple API requests).
            This method, being based on `find` (see) may entail successive HTTP API
            requests, depending on the amount of involved documents.
        request_timeout_ms: a timeout, in milliseconds, for each API request.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a list of all different values for `key` found across the documents
        that match the filter. The result list has no repeated items.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> async def run_distinct(acol: AsyncCollection) -> None:
        ...     await acol.insert_many(
        ...         [
        ...             {"name": "Marco", "food": ["apple", "orange"], "city": "Helsinki"},
        ...             {"name": "Emma", "food": {"likes_fruit": True, "allergies": []}},
        ...         ]
        ...     )
        ...     distinct0 = await acol.distinct("name")
        ...     print("distinct('name')", distinct0)
        ...     distinct1 = await acol.distinct("city")
        ...     print("distinct('city')", distinct1)
        ...     distinct2 = await acol.distinct("food")
        ...     print("distinct('food')", distinct2)
        ...     distinct3 = await acol.distinct("food.1")
        ...     print("distinct('food.1')", distinct3)
        ...     distinct4 = await acol.distinct("food.allergies")
        ...     print("distinct('food.allergies')", distinct4)
        ...     distinct5 = await acol.distinct("food.likes_fruit")
        ...     print("distinct('food.likes_fruit')", distinct5)
        ...
        >>> asyncio.run(run_distinct(my_async_coll))
        distinct('name') ['Emma', 'Marco']
        distinct('city') ['Helsinki']
        distinct('food') [{'likes_fruit': True, 'allergies': []}, 'apple', 'orange']
        distinct('food.1') ['orange']
        distinct('food.allergies') []
        distinct('food.likes_fruit') [True]

    Note:
        It must be kept in mind that `distinct` is a client-side operation,
        which effectively browses all required documents using the logic
        of the `find` method and collects the unique values found for `key`.
        As such, there may be performance, latency and ultimately
        billing implications if the amount of matching documents is large.

    Note:
        For details on the behaviour of "distinct" in conjunction with
        real-time changes in the collection contents, see the
        Note of the `find` command.
    """

    # lazy-import here to avoid circular import issues
    from astrapy.cursors import AsyncCollectionFindCursor

    _general_method_timeout_ms, _gmt_label = _first_valid_timeout(
        (general_method_timeout_ms, "general_method_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (
            self.api_options.timeout_options.general_method_timeout_ms,
            "general_method_timeout_ms",
        ),
    )
    _request_timeout_ms, _rt_label = _first_valid_timeout(
        (request_timeout_ms, "request_timeout_ms"),
        (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
    )
    # preparing cursor:
    _extractor = _create_document_key_extractor(key)
    _key = _reduce_distinct_key_to_safe(key)
    if _key == "":
        raise ValueError(
            "The 'key' parameter for distinct cannot be empty "
            "or start with a list index."
        )
    # relaxing the type hint (limited to within this method body)
    f_cursor: AsyncCollectionFindCursor[dict[str, Any], dict[str, Any]] = (
        AsyncCollectionFindCursor(
            collection=self,
            request_timeout_ms=_request_timeout_ms,
            overall_timeout_ms=_general_method_timeout_ms,
            request_timeout_label=_rt_label,
            overall_timeout_label=_gmt_label,
        )  # type: ignore[assignment]
        .filter(filter)
        .project({_key: True})
    )
    # consuming it:
    _item_hashes = set()
    distinct_items: list[Any] = []
    logger.info(f"running distinct() on '{self.name}'")
    async for document in f_cursor:
        for item in _extractor(document):
            _item_hash = _hash_document(
                item, options=self.api_options.serdes_options
            )
            if _item_hash not in _item_hashes:
                _item_hashes.add(_item_hash)
                distinct_items.append(item)
    logger.info(f"finished running distinct() on '{self.name}'")
    return distinct_items
async def drop(self, *, collection_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> None

Drop the collection, i.e. delete it from the database along with all the documents it contains.

Args

collection_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for collection_admin_timeout_ms.
timeout_ms
an alias for collection_admin_timeout_ms.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> async def drop_and_check(acol: AsyncCollection) -> None:
...     doc0 = await acol.find_one({})
...     print("doc0", doc0)
...     await acol.drop()
...     doc1 = await acol.find_one({})
...
>>> asyncio.run(drop_and_check(my_async_coll))
doc0 {'_id': '...', 'z': -10}
Traceback (most recent call last):
    ... ...
astrapy.exceptions.DataAPIResponseException: Collection does not exist, ...

Note

Use with caution.

Note

Once the method succeeds, methods on this object can still be invoked: however, this hardly makes sense as the underlying actual collection is no more. It is responsibility of the developer to design a correct flow which avoids using a deceased collection any further.

Expand source code
async def drop(
    self,
    *,
    collection_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Drop the collection, i.e. delete it from the database along with
    all the documents it contains.

    Args:
        collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `collection_admin_timeout_ms`.
        timeout_ms: an alias for `collection_admin_timeout_ms`.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> async def drop_and_check(acol: AsyncCollection) -> None:
        ...     doc0 = await acol.find_one({})
        ...     print("doc0", doc0)
        ...     await acol.drop()
        ...     doc1 = await acol.find_one({})
        ...
        >>> asyncio.run(drop_and_check(my_async_coll))
        doc0 {'_id': '...', 'z': -10}
        Traceback (most recent call last):
            ... ...
        astrapy.exceptions.DataAPIResponseException: Collection does not exist, ...

    Note:
        Use with caution.

    Note:
        Once the method succeeds, methods on this object can still be invoked:
        however, this hardly makes sense as the underlying actual collection
        is no more.
        It is responsibility of the developer to design a correct flow
        which avoids using a deceased collection any further.
    """

    logger.info(f"dropping collection '{self.name}' (self)")
    await self.database.drop_collection(
        self.name,
        collection_admin_timeout_ms=collection_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info(f"finished dropping collection '{self.name}' (self)")
async def estimated_document_count(self, *, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> int

Query the API server for an estimate of the document count in the collection.

Contrary to count_documents, this method has no filtering parameters.

Args

general_method_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for general_method_timeout_ms.
timeout_ms
an alias for general_method_timeout_ms.

Returns

a server-provided estimate count of the documents in the collection.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> asyncio.run(my_async_coll.estimated_document_count())
35700
Expand source code
async def estimated_document_count(
    self,
    *,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> int:
    """
    Query the API server for an estimate of the document count in the collection.

    Contrary to `count_documents`, this method has no filtering parameters.

    Args:
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a server-provided estimate count of the documents in the collection.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> asyncio.run(my_async_coll.estimated_document_count())
        35700
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    ed_payload: dict[str, Any] = {"estimatedDocumentCount": {}}
    logger.info(f"estimatedDocumentCount on '{self.name}'")
    ed_response = await self._converted_request(
        payload=ed_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished estimatedDocumentCount on '{self.name}'")
    if "count" in ed_response.get("status", {}):
        count: int = ed_response["status"]["count"]
        return count
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from estimatedDocumentCount API command.",
            raw_response=ed_response,
        )
def find(self, filter: FilterType | None = None, *, projection: ProjectionType | None = None, document_type: type[DOC2] | None = None, skip: int | None = None, limit: int | None = None, include_similarity: bool | None = None, include_sort_vector: bool | None = None, sort: SortType | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> AsyncCollectionFindCursor[DOC, DOC2]

Find documents on the collection, matching a certain provided filter.

The method returns a Cursor that can then be iterated over. Depending on the method call pattern, the iteration over all documents can reflect collection mutations occurred since the find method was called, or not. In cases where the cursor reflects mutations in real-time, it will iterate over cursors in an approximate way (i.e. exhibiting occasional skipped or duplicate documents). This happens when making use of the sort option in a non-vector-search manner.

Args

filter
a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
projection
it controls which parts of the document are returned. It can be an allow-list: {"f1": True, "f2": True}, or a deny-list: {"fx": False, "fy": False}, but not a mixture (except for the _id and other special fields, which can be associated to both True or False independently of the rest of the specification). The special star-projections {"*": True} and {"*": False} have the effect of returning the whole document and {} respectively. For lists in documents, slice directives can be passed to select portions of the list: for instance, {"array": {"$slice": 2}}, {"array": {"$slice": -2}}, {"array": {"$slice": [4, 2]}} or {"array": {"$slice": [-4, 2]}}. An iterable over strings will be treated implicitly as an allow-list. The default projection (used if this parameter is not passed) does not necessarily include "special" fields such as $vector or $vectorize. See the Data API documentation for more on projections.
document_type
this parameter acts a formal specifier for the type checker. If omitted, the resulting cursor is implicitly an AsyncCollectionFindCursor[DOC, DOC], i.e. maintains the same type for the items it returns as that for the documents in the table. Strictly typed code may want to specify this parameter especially when a projection is given.
skip
with this integer parameter, what would be the first skip documents returned by the query are discarded, and the results start from the (skip+1)-th document. This parameter can be used only in conjunction with an explicit sort criterion of the ascending/descending type (i.e. it cannot be used when not sorting, nor with vector-based ANN search).
limit
this (integer) parameter sets a limit over how many documents are returned. Once limit is reached (or the cursor is exhausted for lack of matching documents), nothing more is returned.
include_similarity
a boolean to request the numeric value of the similarity to be returned as an added "$similarity" key in each returned document. Can only be used for vector ANN search, i.e. when either vector is supplied or the sort parameter has the shape {"$vector": …}.
include_sort_vector
a boolean to request the search query vector. If set to True (and if the invocation is a vector search), calling the get_sort_vector method on the returned cursor will yield the vector used for the ANN search.
sort
with this dictionary parameter one can control the order the documents are returned. See the Note about sorting, as well as the one about upper bounds, for details. Vector-based ANN sorting is achieved by providing a "$vector" or a "$vectorize" key in sort.
request_timeout_ms
a timeout, in milliseconds, for each single one of the underlying HTTP requests used to fetch documents as the cursor is iterated over. If not passed, the collection-level setting is used instead.
timeout_ms
an alias for request_timeout_ms.

Returns

an AsyncCursor object representing iterations over the matching documents
(see the AsyncCursor object for how to use it. The simplest thing is to
run a for loop
for document in collection.sort(...):).

Examples

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> async def run_finds(acol: AsyncCollection) -> None:
...             filter = {"seq": {"$exists": True}}
...             print("find results 1:")
...             async for doc in acol.find(filter, projection={"seq": True}, limit=5):
...                 print(doc["seq"])
...             async_cursor1 = acol.find(
...                 {},
...                 limit=4,
...                 sort={"seq": astrapy.constants.SortMode.DESCENDING},
...             )
...             ids = [doc["_id"] async for doc in async_cursor1]
...             print("find results 2:", ids)
...             async_cursor2 = acol.find({}, limit=3)
...             seqs = await async_cursor2.distinct("seq")
...             print("distinct results 3:", seqs)
...
>>> asyncio.run(run_finds(my_async_coll))
find results 1:
48
35
7
11
13
find results 2: ['d656cd9d-...', '479c7ce8-...', '96dc87fd-...', '83f0a21f-...']
distinct results 3: [48, 35, 7]
>>> async def run_vector_finds(acol: AsyncCollection) -> None:
...     await acol.insert_many([
...         {"tag": "A", "$vector": [4, 5]},
...         {"tag": "B", "$vector": [3, 4]},
...         {"tag": "C", "$vector": [3, 2]},
...         {"tag": "D", "$vector": [4, 1]},
...         {"tag": "E", "$vector": [2, 5]},
...     ])
...     ann_tags = [
...         document["tag"]
...         async for document in acol.find(
...             {},
...             sort={"$vector": [3, 3]},
...             limit=3,
...         )
...     ]
...     return ann_tags
...
>>> asyncio.run(run_vector_finds(my_async_coll))
['A', 'B', 'C']
>>> # (assuming the collection has metric VectorMetric.COSINE)
>>> async_cursor = my_async_coll.find(
...     sort={"$vector": [3, 3]},
...     limit=3,
...     include_sort_vector=True,
... )
>>> asyncio.run(async_cursor.get_sort_vector())
[3.0, 3.0]
>>> asyncio.run(async_cursor.__anext__())
{'_id': 'b13ce177-738e-47ec-bce1-77738ee7ec93', 'tag': 'A'}
>>> asyncio.run(async_cursor.get_sort_vector())
[3.0, 3.0]

Note

The following are example values for the sort parameter. When no particular order is required: sort={} When sorting by a certain value in ascending/descending order: sort={"field": SortMode.ASCENDING} sort={"field": SortMode.DESCENDING} When sorting first by "field" and then by "subfield" (while modern Python versions preserve the order of dictionaries, it is suggested for clarity to employ a collections.OrderedDict in these cases): sort={ "field": SortMode.ASCENDING, "subfield": SortMode.ASCENDING, } When running a vector similarity (ANN) search: sort={"$vector": [0.4, 0.15, -0.5]}

Note

Some combinations of arguments impose an implicit upper bound on the number of documents that are returned by the Data API. More specifically: (a) Vector ANN searches cannot return more than a number of documents that at the time of writing is set to 1000 items. (b) When using a sort criterion of the ascending/descending type, the Data API will return a smaller number of documents, set to 20 at the time of writing, and stop there. The returned documents are the top results across the whole collection according to the requested criterion. These provisions should be kept in mind even when subsequently running a command such as .distinct() on a cursor.

Note

When not specifying sorting criteria at all (by vector or otherwise), the cursor can scroll through an arbitrary number of documents as the Data API and the client periodically exchange new chunks of documents. It should be noted that the behavior of the cursor in the case documents have been added/removed after the find was started depends on database internals and it is not guaranteed, nor excluded, that such "real-time" changes in the data would be picked up by the cursor.

Expand source code
def find(
    self,
    filter: FilterType | None = None,
    *,
    projection: ProjectionType | None = None,
    document_type: type[DOC2] | None = None,
    skip: int | None = None,
    limit: int | None = None,
    include_similarity: bool | None = None,
    include_sort_vector: bool | None = None,
    sort: SortType | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> AsyncCollectionFindCursor[DOC, DOC2]:
    """
    Find documents on the collection, matching a certain provided filter.

    The method returns a Cursor that can then be iterated over. Depending
    on the method call pattern, the iteration over all documents can reflect
    collection mutations occurred since the `find` method was called, or not.
    In cases where the cursor reflects mutations in real-time, it will iterate
    over cursors in an approximate way (i.e. exhibiting occasional skipped
    or duplicate documents). This happens when making use of the `sort`
    option in a non-vector-search manner.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        projection: it controls which parts of the document are returned.
            It can be an allow-list: `{"f1": True, "f2": True}`,
            or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
            (except for the `_id` and other special fields, which can be
            associated to both True or False independently of the rest
            of the specification).
            The special star-projections `{"*": True}` and `{"*": False}`
            have the effect of returning the whole document and `{}` respectively.
            For lists in documents, slice directives can be passed to select
            portions of the list: for instance, `{"array": {"$slice": 2}}`,
            `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
            `{"array": {"$slice": [-4, 2]}}`.
            An iterable over strings will be treated implicitly as an allow-list.
            The default projection (used if this parameter is not passed) does not
            necessarily include "special" fields such as `$vector` or `$vectorize`.
            See the Data API documentation for more on projections.
        document_type: this parameter acts a formal specifier for the type checker.
            If omitted, the resulting cursor is implicitly an
            `AsyncCollectionFindCursor[DOC, DOC]`, i.e. maintains the same type for
            the items it returns as that for the documents in the table. Strictly
            typed code may want to specify this parameter especially when a
            projection is given.
        skip: with this integer parameter, what would be the first `skip`
            documents returned by the query are discarded, and the results
            start from the (skip+1)-th document.
            This parameter can be used only in conjunction with an explicit
            `sort` criterion of the ascending/descending type (i.e. it cannot
            be used when not sorting, nor with vector-based ANN search).
        limit: this (integer) parameter sets a limit over how many documents
            are returned. Once `limit` is reached (or the cursor is exhausted
            for lack of matching documents), nothing more is returned.
        include_similarity: a boolean to request the numeric value of the
            similarity to be returned as an added "$similarity" key in each
            returned document. Can only be used for vector ANN search, i.e.
            when either `vector` is supplied or the `sort` parameter has the
            shape {"$vector": ...}.
        include_sort_vector: a boolean to request the search query vector.
            If set to True (and if the invocation is a vector search), calling
            the `get_sort_vector` method on the returned cursor will yield
            the vector used for the ANN search.
        sort: with this dictionary parameter one can control the order
            the documents are returned. See the Note about sorting, as well as
            the one about upper bounds, for details.
            Vector-based ANN sorting is achieved by providing a "$vector"
            or a "$vectorize" key in `sort`.
        request_timeout_ms: a timeout, in milliseconds, for each single one
            of the underlying HTTP requests used to fetch documents as the
            cursor is iterated over.
            If not passed, the collection-level setting is used instead.
        timeout_ms: an alias for `request_timeout_ms`.

    Returns:
        an AsyncCursor object representing iterations over the matching documents
        (see the AsyncCursor object for how to use it. The simplest thing is to
        run a for loop: `for document in collection.sort(...):`).

    Examples:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> async def run_finds(acol: AsyncCollection) -> None:
        ...             filter = {"seq": {"$exists": True}}
        ...             print("find results 1:")
        ...             async for doc in acol.find(filter, projection={"seq": True}, limit=5):
        ...                 print(doc["seq"])
        ...             async_cursor1 = acol.find(
        ...                 {},
        ...                 limit=4,
        ...                 sort={"seq": astrapy.constants.SortMode.DESCENDING},
        ...             )
        ...             ids = [doc["_id"] async for doc in async_cursor1]
        ...             print("find results 2:", ids)
        ...             async_cursor2 = acol.find({}, limit=3)
        ...             seqs = await async_cursor2.distinct("seq")
        ...             print("distinct results 3:", seqs)
        ...
        >>> asyncio.run(run_finds(my_async_coll))
        find results 1:
        48
        35
        7
        11
        13
        find results 2: ['d656cd9d-...', '479c7ce8-...', '96dc87fd-...', '83f0a21f-...']
        distinct results 3: [48, 35, 7]

        >>> async def run_vector_finds(acol: AsyncCollection) -> None:
        ...     await acol.insert_many([
        ...         {"tag": "A", "$vector": [4, 5]},
        ...         {"tag": "B", "$vector": [3, 4]},
        ...         {"tag": "C", "$vector": [3, 2]},
        ...         {"tag": "D", "$vector": [4, 1]},
        ...         {"tag": "E", "$vector": [2, 5]},
        ...     ])
        ...     ann_tags = [
        ...         document["tag"]
        ...         async for document in acol.find(
        ...             {},
        ...             sort={"$vector": [3, 3]},
        ...             limit=3,
        ...         )
        ...     ]
        ...     return ann_tags
        ...
        >>> asyncio.run(run_vector_finds(my_async_coll))
        ['A', 'B', 'C']
        >>> # (assuming the collection has metric VectorMetric.COSINE)

        >>> async_cursor = my_async_coll.find(
        ...     sort={"$vector": [3, 3]},
        ...     limit=3,
        ...     include_sort_vector=True,
        ... )
        >>> asyncio.run(async_cursor.get_sort_vector())
        [3.0, 3.0]
        >>> asyncio.run(async_cursor.__anext__())
        {'_id': 'b13ce177-738e-47ec-bce1-77738ee7ec93', 'tag': 'A'}
        >>> asyncio.run(async_cursor.get_sort_vector())
        [3.0, 3.0]

    Note:
        The following are example values for the `sort` parameter.
        When no particular order is required:
            sort={}
        When sorting by a certain value in ascending/descending order:
            sort={"field": SortMode.ASCENDING}
            sort={"field": SortMode.DESCENDING}
        When sorting first by "field" and then by "subfield"
        (while modern Python versions preserve the order of dictionaries,
        it is suggested for clarity to employ a `collections.OrderedDict`
        in these cases):
            sort={
                "field": SortMode.ASCENDING,
                "subfield": SortMode.ASCENDING,
            }
        When running a vector similarity (ANN) search:
            sort={"$vector": [0.4, 0.15, -0.5]}

    Note:
        Some combinations of arguments impose an implicit upper bound on the
        number of documents that are returned by the Data API. More specifically:
        (a) Vector ANN searches cannot return more than a number of documents
        that at the time of writing is set to 1000 items.
        (b) When using a sort criterion of the ascending/descending type,
        the Data API will return a smaller number of documents, set to 20
        at the time of writing, and stop there. The returned documents are
        the top results across the whole collection according to the requested
        criterion.
        These provisions should be kept in mind even when subsequently running
        a command such as `.distinct()` on a cursor.

    Note:
        When not specifying sorting criteria at all (by vector or otherwise),
        the cursor can scroll through an arbitrary number of documents as
        the Data API and the client periodically exchange new chunks of documents.
        It should be noted that the behavior of the cursor in the case documents
        have been added/removed after the `find` was started depends on database
        internals and it is not guaranteed, nor excluded, that such "real-time"
        changes in the data would be picked up by the cursor.
    """

    # lazy-import here to avoid circular import issues
    from astrapy.cursors import AsyncCollectionFindCursor

    _request_timeout_ms, _rt_label = _first_valid_timeout(
        (request_timeout_ms, "request_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
    )
    return (
        AsyncCollectionFindCursor(
            collection=self,
            request_timeout_ms=_request_timeout_ms,
            overall_timeout_ms=None,
            request_timeout_label=_rt_label,
        )
        .filter(filter)
        .project(projection)
        .skip(skip)
        .limit(limit)
        .sort(sort)
        .include_similarity(include_similarity)
        .include_sort_vector(include_sort_vector)
    )
async def find_one(self, filter: FilterType | None = None, *, projection: ProjectionType | None = None, include_similarity: bool | None = None, sort: SortType | None = None, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> Optional[~DOC]

Run a search, returning the first document in the collection that matches provided filters, if any is found.

Args

filter
a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
projection
it controls which parts of the document are returned. It can be an allow-list: {"f1": True, "f2": True}, or a deny-list: {"fx": False, "fy": False}, but not a mixture (except for the _id and other special fields, which can be associated to both True or False independently of the rest of the specification). The special star-projections {"*": True} and {"*": False} have the effect of returning the whole document and {} respectively. For lists in documents, slice directives can be passed to select portions of the list: for instance, {"array": {"$slice": 2}}, {"array": {"$slice": -2}}, {"array": {"$slice": [4, 2]}} or {"array": {"$slice": [-4, 2]}}. An iterable over strings will be treated implicitly as an allow-list. The default projection (used if this parameter is not passed) does not necessarily include "special" fields such as $vector or $vectorize. See the Data API documentation for more on projections.
include_similarity
a boolean to request the numeric value of the similarity to be returned as an added "$similarity" key in the returned document. Can only be used for vector ANN search, i.e. when either vector is supplied or the sort parameter has the shape {"$vector": …}.
sort
with this dictionary parameter one can control the order the documents are returned. See the Note about sorting for details. Vector-based ANN sorting is achieved by providing a "$vector" or a "$vectorize" key in sort.
general_method_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for general_method_timeout_ms.
timeout_ms
an alias for general_method_timeout_ms.

Returns

a dictionary expressing the required document, otherwise None.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> async def demo_find_one(acol: AsyncCollection) -> None:
....    print("Count:", await acol.count_documents({}, upper_bound=100))
...     result0 = await acol.find_one({})
...     print("result0", result0)
...     result1 = await acol.find_one({"seq": 10})
...     print("result1", result1)
...     result2 = await acol.find_one({"seq": 1011})
...     print("result2", result2)
...     result3 = await acol.find_one({}, projection={"seq": False})
...     print("result3", result3)
...     result4 = await acol.find_one(
...         {},
...         sort={"seq": astrapy.constants.SortMode.DESCENDING},
...     )
...     print("result4", result4)
...
>>>
>>> asyncio.run(demo_find_one(my_async_coll))
Count: 50
result0 {'_id': '479c7ce8-...', 'seq': 48}
result1 {'_id': '93e992c4-...', 'seq': 10}
result2 None
result3 {'_id': '479c7ce8-...'}
result4 {'_id': 'd656cd9d-...', 'seq': 49}
>>> asyncio.run(my_async_coll.find_one(
...     {},
...     sort={"$vector": [1, 0]},
...     projection={"*": True},
... ))
{'_id': '...', 'tag': 'D', '$vector': [4.0, 1.0]}

Note

See the find method for more details on the accepted parameters (whereas skip and limit are not valid parameters for find_one).

Expand source code
async def find_one(
    self,
    filter: FilterType | None = None,
    *,
    projection: ProjectionType | None = None,
    include_similarity: bool | None = None,
    sort: SortType | None = None,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> DOC | None:
    """
    Run a search, returning the first document in the collection that matches
    provided filters, if any is found.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        projection: it controls which parts of the document are returned.
            It can be an allow-list: `{"f1": True, "f2": True}`,
            or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
            (except for the `_id` and other special fields, which can be
            associated to both True or False independently of the rest
            of the specification).
            The special star-projections `{"*": True}` and `{"*": False}`
            have the effect of returning the whole document and `{}` respectively.
            For lists in documents, slice directives can be passed to select
            portions of the list: for instance, `{"array": {"$slice": 2}}`,
            `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
            `{"array": {"$slice": [-4, 2]}}`.
            An iterable over strings will be treated implicitly as an allow-list.
            The default projection (used if this parameter is not passed) does not
            necessarily include "special" fields such as `$vector` or `$vectorize`.
            See the Data API documentation for more on projections.
        include_similarity: a boolean to request the numeric value of the
            similarity to be returned as an added "$similarity" key in the
            returned document. Can only be used for vector ANN search, i.e.
            when either `vector` is supplied or the `sort` parameter has the
            shape {"$vector": ...}.
        sort: with this dictionary parameter one can control the order
            the documents are returned. See the Note about sorting for details.
            Vector-based ANN sorting is achieved by providing a "$vector"
            or a "$vectorize" key in `sort`.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a dictionary expressing the required document, otherwise None.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> async def demo_find_one(acol: AsyncCollection) -> None:
        ....    print("Count:", await acol.count_documents({}, upper_bound=100))
        ...     result0 = await acol.find_one({})
        ...     print("result0", result0)
        ...     result1 = await acol.find_one({"seq": 10})
        ...     print("result1", result1)
        ...     result2 = await acol.find_one({"seq": 1011})
        ...     print("result2", result2)
        ...     result3 = await acol.find_one({}, projection={"seq": False})
        ...     print("result3", result3)
        ...     result4 = await acol.find_one(
        ...         {},
        ...         sort={"seq": astrapy.constants.SortMode.DESCENDING},
        ...     )
        ...     print("result4", result4)
        ...
        >>>
        >>> asyncio.run(demo_find_one(my_async_coll))
        Count: 50
        result0 {'_id': '479c7ce8-...', 'seq': 48}
        result1 {'_id': '93e992c4-...', 'seq': 10}
        result2 None
        result3 {'_id': '479c7ce8-...'}
        result4 {'_id': 'd656cd9d-...', 'seq': 49}

        >>> asyncio.run(my_async_coll.find_one(
        ...     {},
        ...     sort={"$vector": [1, 0]},
        ...     projection={"*": True},
        ... ))
        {'_id': '...', 'tag': 'D', '$vector': [4.0, 1.0]}

    Note:
        See the `find` method for more details on the accepted parameters
        (whereas `skip` and `limit` are not valid parameters for `find_one`).
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    fo_options = (
        None
        if include_similarity is None
        else {"includeSimilarity": include_similarity}
    )
    fo_payload = {
        "findOne": {
            k: v
            for k, v in {
                "filter": filter,
                "projection": normalize_optional_projection(projection),
                "options": fo_options,
                "sort": sort,
            }.items()
            if v is not None
        }
    }
    fo_response = await self._converted_request(
        payload=fo_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    if "document" not in (fo_response.get("data") or {}):
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from findOne API command.",
            raw_response=fo_response,
        )
    doc_response = fo_response["data"]["document"]
    if doc_response is None:
        return None
    return fo_response["data"]["document"]  # type: ignore[no-any-return]
async def find_one_and_delete(self, filter: FilterType, *, projection: ProjectionType | None = None, sort: SortType | None = None, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> Optional[~DOC]

Find a document in the collection and delete it. The deleted document, however, is the return value of the method.

Args

filter
a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
projection
it controls which parts of the document are returned. It can be an allow-list: {"f1": True, "f2": True}, or a deny-list: {"fx": False, "fy": False}, but not a mixture (except for the _id and other special fields, which can be associated to both True or False independently of the rest of the specification). The special star-projections {"*": True} and {"*": False} have the effect of returning the whole document and {} respectively. For lists in documents, slice directives can be passed to select portions of the list: for instance, {"array": {"$slice": 2}}, {"array": {"$slice": -2}}, {"array": {"$slice": [4, 2]}} or {"array": {"$slice": [-4, 2]}}. An iterable over strings will be treated implicitly as an allow-list. The default projection (used if this parameter is not passed) does not necessarily include "special" fields such as $vector or $vectorize. See the Data API documentation for more on projections.
sort
with this dictionary parameter one can control the sorting order of the documents matching the filter, effectively determining what document will come first and hence be the replaced one. See the find method for more on sorting. Vector-based ANN sorting is achieved by providing a "$vector" or a "$vectorize" key in sort.
general_method_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for general_method_timeout_ms.
timeout_ms
an alias for general_method_timeout_ms.

Returns

Either the document (or a projection thereof, as requested), or None if no matches were found in the first place.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> async def do_find_one_and_delete(acol: AsyncCollection) -> None:
...     await acol.insert_many(
...         [
...             {"species": "swan", "class": "Aves"},
...             {"species": "frog", "class": "Amphibia"},
...         ],
...     )
...     delete_result0 = await acol.find_one_and_delete(
...         {"species": {"$ne": "frog"}},
...         projection=["species"],
...     )
...     print("delete_result0", delete_result0)
...     delete_result1 = await acol.find_one_and_delete(
...         {"species": {"$ne": "frog"}},
...     )
...     print("delete_result1", delete_result1)
...
>>> asyncio.run(do_find_one_and_delete(my_async_coll))
delete_result0 {'_id': 'f335cd0f-...', 'species': 'swan'}
delete_result1 None
Expand source code
async def find_one_and_delete(
    self,
    filter: FilterType,
    *,
    projection: ProjectionType | None = None,
    sort: SortType | None = None,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> DOC | None:
    """
    Find a document in the collection and delete it. The deleted document,
    however, is the return value of the method.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        projection: it controls which parts of the document are returned.
            It can be an allow-list: `{"f1": True, "f2": True}`,
            or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
            (except for the `_id` and other special fields, which can be
            associated to both True or False independently of the rest
            of the specification).
            The special star-projections `{"*": True}` and `{"*": False}`
            have the effect of returning the whole document and `{}` respectively.
            For lists in documents, slice directives can be passed to select
            portions of the list: for instance, `{"array": {"$slice": 2}}`,
            `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
            `{"array": {"$slice": [-4, 2]}}`.
            An iterable over strings will be treated implicitly as an allow-list.
            The default projection (used if this parameter is not passed) does not
            necessarily include "special" fields such as `$vector` or `$vectorize`.
            See the Data API documentation for more on projections.
        sort: with this dictionary parameter one can control the sorting
            order of the documents matching the filter, effectively
            determining what document will come first and hence be the
            replaced one. See the `find` method for more on sorting.
            Vector-based ANN sorting is achieved by providing a "$vector"
            or a "$vectorize" key in `sort`.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        Either the document (or a projection thereof, as requested), or None
        if no matches were found in the first place.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> async def do_find_one_and_delete(acol: AsyncCollection) -> None:
        ...     await acol.insert_many(
        ...         [
        ...             {"species": "swan", "class": "Aves"},
        ...             {"species": "frog", "class": "Amphibia"},
        ...         ],
        ...     )
        ...     delete_result0 = await acol.find_one_and_delete(
        ...         {"species": {"$ne": "frog"}},
        ...         projection=["species"],
        ...     )
        ...     print("delete_result0", delete_result0)
        ...     delete_result1 = await acol.find_one_and_delete(
        ...         {"species": {"$ne": "frog"}},
        ...     )
        ...     print("delete_result1", delete_result1)
        ...
        >>> asyncio.run(do_find_one_and_delete(my_async_coll))
        delete_result0 {'_id': 'f335cd0f-...', 'species': 'swan'}
        delete_result1 None
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    _projection = normalize_optional_projection(projection)
    fo_payload = {
        "findOneAndDelete": {
            k: v
            for k, v in {
                "filter": filter,
                "sort": sort,
                "projection": _projection,
            }.items()
            if v is not None
        }
    }
    logger.info(f"findOneAndDelete on '{self.name}'")
    fo_response = await self._converted_request(
        payload=fo_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished findOneAndDelete on '{self.name}'")
    if "document" in fo_response.get("data", {}):
        document = fo_response["data"]["document"]
        return document  # type: ignore[no-any-return]
    else:
        deleted_count = fo_response.get("status", {}).get("deletedCount")
        if deleted_count == 0:
            return None
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from find_one_and_delete API command.",
                raw_response=fo_response,
            )
async def find_one_and_replace(self, filter: FilterType, replacement: DOC, *, projection: ProjectionType | None = None, sort: SortType | None = None, upsert: bool = False, return_document: str = 'before', general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> Optional[~DOC]

Find a document on the collection and replace it entirely with a new one, optionally inserting a new one if no match is found.

Args

filter
a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
replacement
the new document to write into the collection.
projection
it controls which parts of the document are returned. It can be an allow-list: {"f1": True, "f2": True}, or a deny-list: {"fx": False, "fy": False}, but not a mixture (except for the _id and other special fields, which can be associated to both True or False independently of the rest of the specification). The special star-projections {"*": True} and {"*": False} have the effect of returning the whole document and {} respectively. For lists in documents, slice directives can be passed to select portions of the list: for instance, {"array": {"$slice": 2}}, {"array": {"$slice": -2}}, {"array": {"$slice": [4, 2]}} or {"array": {"$slice": [-4, 2]}}. An iterable over strings will be treated implicitly as an allow-list. The default projection (used if this parameter is not passed) does not necessarily include "special" fields such as $vector or $vectorize. See the Data API documentation for more on projections.
sort
with this dictionary parameter one can control the sorting order of the documents matching the filter, effectively determining what document will come first and hence be the replaced one. See the find method for more on sorting. Vector-based ANN sorting is achieved by providing a "$vector" or a "$vectorize" key in sort.
upsert
this parameter controls the behavior in absence of matches. If True, replacement is inserted as a new document if no matches are found on the collection. If False, the operation silently does nothing in case of no matches.
return_document
a flag controlling what document is returned: if set to ReturnDocument.BEFORE, or the string "before", the document found on database is returned; if set to ReturnDocument.AFTER, or the string "after", the new document is returned. The default is "before".
general_method_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for general_method_timeout_ms.
timeout_ms
an alias for general_method_timeout_ms.

Returns

A document, either the one before the replace operation or the one after that. Alternatively, the method returns None to represent that no matching document was found, or that no replacement was inserted (depending on the return_document parameter).

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> async def do_find_one_and_replace(
...     acol: AsyncCollection
... ) -> None:
...     await acol.insert_one(
...         {"_id": "rule1", "text": "all animals are equal"}
...     )
...     result0 = await acol.find_one_and_replace(
...         {"_id": "rule1"},
...         {"text": "some animals are more equal!"},
...     )
...     print("result0", result0)
...     result1 = await acol.find_one_and_replace(
...         {"text": "some animals are more equal!"},
...         {"text": "and the pigs are the rulers"},
...         return_document=astrapy.constants.ReturnDocument.AFTER,
...     )
...     print("result1", result1)
...     result2 = await acol.find_one_and_replace(
...         {"_id": "rule2"},
...         {"text": "F=ma^2"},
...         return_document=astrapy.constants.ReturnDocument.AFTER,
...     )
...     print("result2", result2)
...     result3 = await acol.find_one_and_replace(
...         {"_id": "rule2"},
...         {"text": "F=ma"},
...         upsert=True,
...         return_document=astrapy.constants.ReturnDocument.AFTER,
...         projection={"_id": False},
...     )
...     print("result3", result3)
...
>>> asyncio.run(do_find_one_and_replace(my_async_coll))
result0 {'_id': 'rule1', 'text': 'all animals are equal'}
result1 {'_id': 'rule1', 'text': 'and the pigs are the rulers'}
result2 None
result3 {'text': 'F=ma'}
Expand source code
async def find_one_and_replace(
    self,
    filter: FilterType,
    replacement: DOC,
    *,
    projection: ProjectionType | None = None,
    sort: SortType | None = None,
    upsert: bool = False,
    return_document: str = ReturnDocument.BEFORE,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> DOC | None:
    """
    Find a document on the collection and replace it entirely with a new one,
    optionally inserting a new one if no match is found.

    Args:

        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        replacement: the new document to write into the collection.
        projection: it controls which parts of the document are returned.
            It can be an allow-list: `{"f1": True, "f2": True}`,
            or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
            (except for the `_id` and other special fields, which can be
            associated to both True or False independently of the rest
            of the specification).
            The special star-projections `{"*": True}` and `{"*": False}`
            have the effect of returning the whole document and `{}` respectively.
            For lists in documents, slice directives can be passed to select
            portions of the list: for instance, `{"array": {"$slice": 2}}`,
            `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
            `{"array": {"$slice": [-4, 2]}}`.
            An iterable over strings will be treated implicitly as an allow-list.
            The default projection (used if this parameter is not passed) does not
            necessarily include "special" fields such as `$vector` or `$vectorize`.
            See the Data API documentation for more on projections.
        sort: with this dictionary parameter one can control the sorting
            order of the documents matching the filter, effectively
            determining what document will come first and hence be the
            replaced one. See the `find` method for more on sorting.
            Vector-based ANN sorting is achieved by providing a "$vector"
            or a "$vectorize" key in `sort`.
        upsert: this parameter controls the behavior in absence of matches.
            If True, `replacement` is inserted as a new document
            if no matches are found on the collection. If False,
            the operation silently does nothing in case of no matches.
        return_document: a flag controlling what document is returned:
            if set to `ReturnDocument.BEFORE`, or the string "before",
            the document found on database is returned; if set to
            `ReturnDocument.AFTER`, or the string "after", the new
            document is returned. The default is "before".
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        A document, either the one before the replace operation or the
        one after that. Alternatively, the method returns None to represent
        that no matching document was found, or that no replacement
        was inserted (depending on the `return_document` parameter).

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> async def do_find_one_and_replace(
        ...     acol: AsyncCollection
        ... ) -> None:
        ...     await acol.insert_one(
        ...         {"_id": "rule1", "text": "all animals are equal"}
        ...     )
        ...     result0 = await acol.find_one_and_replace(
        ...         {"_id": "rule1"},
        ...         {"text": "some animals are more equal!"},
        ...     )
        ...     print("result0", result0)
        ...     result1 = await acol.find_one_and_replace(
        ...         {"text": "some animals are more equal!"},
        ...         {"text": "and the pigs are the rulers"},
        ...         return_document=astrapy.constants.ReturnDocument.AFTER,
        ...     )
        ...     print("result1", result1)
        ...     result2 = await acol.find_one_and_replace(
        ...         {"_id": "rule2"},
        ...         {"text": "F=ma^2"},
        ...         return_document=astrapy.constants.ReturnDocument.AFTER,
        ...     )
        ...     print("result2", result2)
        ...     result3 = await acol.find_one_and_replace(
        ...         {"_id": "rule2"},
        ...         {"text": "F=ma"},
        ...         upsert=True,
        ...         return_document=astrapy.constants.ReturnDocument.AFTER,
        ...         projection={"_id": False},
        ...     )
        ...     print("result3", result3)
        ...
        >>> asyncio.run(do_find_one_and_replace(my_async_coll))
        result0 {'_id': 'rule1', 'text': 'all animals are equal'}
        result1 {'_id': 'rule1', 'text': 'and the pigs are the rulers'}
        result2 None
        result3 {'text': 'F=ma'}
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    options = {
        "returnDocument": return_document,
        "upsert": upsert,
    }
    fo_payload = {
        "findOneAndReplace": {
            k: v
            for k, v in {
                "filter": filter,
                "projection": normalize_optional_projection(projection),
                "replacement": replacement,
                "options": options,
                "sort": sort,
            }.items()
            if v is not None
        }
    }
    logger.info(f"findOneAndReplace on '{self.name}'")
    fo_response = await self._converted_request(
        payload=fo_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished findOneAndReplace on '{self.name}'")
    if "document" in fo_response.get("data", {}):
        ret_document = fo_response.get("data", {}).get("document")
        if ret_document is None:
            return None
        else:
            return ret_document  # type: ignore[no-any-return]
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from find_one_and_replace API command.",
            raw_response=fo_response,
        )
async def find_one_and_update(self, filter: FilterType, update: dict[str, Any], *, projection: ProjectionType | None = None, sort: SortType | None = None, upsert: bool = False, return_document: str = 'before', general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> Optional[~DOC]

Find a document on the collection and update it as requested, optionally inserting a new one if no match is found.

Args

filter
a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
update
the update prescription to apply to the document, expressed as a dictionary as per Data API syntax. Examples are: {"$set": {"field": "value}} {"$inc": {"counter": 10}} {"$unset": {"field": ""}} See the Data API documentation for the full syntax.
projection
it controls which parts of the document are returned. It can be an allow-list: {"f1": True, "f2": True}, or a deny-list: {"fx": False, "fy": False}, but not a mixture (except for the _id and other special fields, which can be associated to both True or False independently of the rest of the specification). The special star-projections {"*": True} and {"*": False} have the effect of returning the whole document and {} respectively. For lists in documents, slice directives can be passed to select portions of the list: for instance, {"array": {"$slice": 2}}, {"array": {"$slice": -2}}, {"array": {"$slice": [4, 2]}} or {"array": {"$slice": [-4, 2]}}. An iterable over strings will be treated implicitly as an allow-list. The default projection (used if this parameter is not passed) does not necessarily include "special" fields such as $vector or $vectorize. See the Data API documentation for more on projections.
sort
with this dictionary parameter one can control the sorting order of the documents matching the filter, effectively determining what document will come first and hence be the replaced one. See the find method for more on sorting. Vector-based ANN sorting is achieved by providing a "$vector" or a "$vectorize" key in sort.
upsert
this parameter controls the behavior in absence of matches. If True, a new document (resulting from applying the update to an empty document) is inserted if no matches are found on the collection. If False, the operation silently does nothing in case of no matches.
return_document
a flag controlling what document is returned: if set to ReturnDocument.BEFORE, or the string "before", the document found on database is returned; if set to ReturnDocument.AFTER, or the string "after", the new document is returned. The default is "before".
general_method_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for general_method_timeout_ms.
timeout_ms
an alias for general_method_timeout_ms.

Returns

A document (or a projection thereof, as required), either the one before the replace operation or the one after that. Alternatively, the method returns None to represent that no matching document was found, or that no update was applied (depending on the return_document parameter).

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> async def do_find_one_and_update(acol: AsyncCollection) -> None:
...     await acol.insert_one({"Marco": "Polo"})
...     result0 = await acol.find_one_and_update(
...         {"Marco": {"$exists": True}},
...         {"$set": {"title": "Mr."}},
...     )
...     print("result0", result0)
...     result1 = await acol.find_one_and_update(
...         {"title": "Mr."},
...         {"$inc": {"rank": 3}},
...         projection=["title", "rank"],
...         return_document=astrapy.constants.ReturnDocument.AFTER,
...     )
...     print("result1", result1)
...     result2 = await acol.find_one_and_update(
...         {"name": "Johnny"},
...         {"$set": {"rank": 0}},
...         return_document=astrapy.constants.ReturnDocument.AFTER,
...     )
...     print("result2", result2)
...     result3 = await acol.find_one_and_update(
...         {"name": "Johnny"},
...         {"$set": {"rank": 0}},
...         upsert=True,
...         return_document=astrapy.constants.ReturnDocument.AFTER,
...     )
...     print("result3", result3)
...
>>> asyncio.run(do_find_one_and_update(my_async_coll))
result0 {'_id': 'f7c936d3-b0a0-45eb-a676-e2829662a57c', 'Marco': 'Polo'}
result1 {'_id': 'f7c936d3-b0a0-45eb-a676-e2829662a57c', 'title': 'Mr.', 'rank': 3}
result2 None
result3 {'_id': 'db3d678d-14d4-4caa-82d2-d5fb77dab7ec', 'name': 'Johnny', 'rank': 0}
Expand source code
async def find_one_and_update(
    self,
    filter: FilterType,
    update: dict[str, Any],
    *,
    projection: ProjectionType | None = None,
    sort: SortType | None = None,
    upsert: bool = False,
    return_document: str = ReturnDocument.BEFORE,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> DOC | None:
    """
    Find a document on the collection and update it as requested,
    optionally inserting a new one if no match is found.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        update: the update prescription to apply to the document, expressed
            as a dictionary as per Data API syntax. Examples are:
                {"$set": {"field": "value}}
                {"$inc": {"counter": 10}}
                {"$unset": {"field": ""}}
            See the Data API documentation for the full syntax.
        projection: it controls which parts of the document are returned.
            It can be an allow-list: `{"f1": True, "f2": True}`,
            or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
            (except for the `_id` and other special fields, which can be
            associated to both True or False independently of the rest
            of the specification).
            The special star-projections `{"*": True}` and `{"*": False}`
            have the effect of returning the whole document and `{}` respectively.
            For lists in documents, slice directives can be passed to select
            portions of the list: for instance, `{"array": {"$slice": 2}}`,
            `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
            `{"array": {"$slice": [-4, 2]}}`.
            An iterable over strings will be treated implicitly as an allow-list.
            The default projection (used if this parameter is not passed) does not
            necessarily include "special" fields such as `$vector` or `$vectorize`.
            See the Data API documentation for more on projections.
        sort: with this dictionary parameter one can control the sorting
            order of the documents matching the filter, effectively
            determining what document will come first and hence be the
            replaced one. See the `find` method for more on sorting.
            Vector-based ANN sorting is achieved by providing a "$vector"
            or a "$vectorize" key in `sort`.
        upsert: this parameter controls the behavior in absence of matches.
            If True, a new document (resulting from applying the `update`
            to an empty document) is inserted if no matches are found on
            the collection. If False, the operation silently does nothing
            in case of no matches.
        return_document: a flag controlling what document is returned:
            if set to `ReturnDocument.BEFORE`, or the string "before",
            the document found on database is returned; if set to
            `ReturnDocument.AFTER`, or the string "after", the new
            document is returned. The default is "before".
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        A document (or a projection thereof, as required), either the one
        before the replace operation or the one after that.
        Alternatively, the method returns None to represent
        that no matching document was found, or that no update
        was applied (depending on the `return_document` parameter).

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> async def do_find_one_and_update(acol: AsyncCollection) -> None:
        ...     await acol.insert_one({"Marco": "Polo"})
        ...     result0 = await acol.find_one_and_update(
        ...         {"Marco": {"$exists": True}},
        ...         {"$set": {"title": "Mr."}},
        ...     )
        ...     print("result0", result0)
        ...     result1 = await acol.find_one_and_update(
        ...         {"title": "Mr."},
        ...         {"$inc": {"rank": 3}},
        ...         projection=["title", "rank"],
        ...         return_document=astrapy.constants.ReturnDocument.AFTER,
        ...     )
        ...     print("result1", result1)
        ...     result2 = await acol.find_one_and_update(
        ...         {"name": "Johnny"},
        ...         {"$set": {"rank": 0}},
        ...         return_document=astrapy.constants.ReturnDocument.AFTER,
        ...     )
        ...     print("result2", result2)
        ...     result3 = await acol.find_one_and_update(
        ...         {"name": "Johnny"},
        ...         {"$set": {"rank": 0}},
        ...         upsert=True,
        ...         return_document=astrapy.constants.ReturnDocument.AFTER,
        ...     )
        ...     print("result3", result3)
        ...
        >>> asyncio.run(do_find_one_and_update(my_async_coll))
        result0 {'_id': 'f7c936d3-b0a0-45eb-a676-e2829662a57c', 'Marco': 'Polo'}
        result1 {'_id': 'f7c936d3-b0a0-45eb-a676-e2829662a57c', 'title': 'Mr.', 'rank': 3}
        result2 None
        result3 {'_id': 'db3d678d-14d4-4caa-82d2-d5fb77dab7ec', 'name': 'Johnny', 'rank': 0}
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    options = {
        "returnDocument": return_document,
        "upsert": upsert,
    }
    fo_payload = {
        "findOneAndUpdate": {
            k: v
            for k, v in {
                "filter": filter,
                "update": update,
                "options": options,
                "sort": sort,
                "projection": normalize_optional_projection(projection),
            }.items()
            if v is not None
        }
    }
    logger.info(f"findOneAndUpdate on '{self.name}'")
    fo_response = await self._converted_request(
        payload=fo_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished findOneAndUpdate on '{self.name}'")
    if "document" in fo_response.get("data", {}):
        ret_document = fo_response.get("data", {}).get("document")
        if ret_document is None:
            return None
        else:
            return ret_document  # type: ignore[no-any-return]
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from find_one_and_update API command.",
            raw_response=fo_response,
        )
async def info(self, *, database_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> CollectionInfo

Information on the collection (name, location, database), in the form of a CollectionInfo object.

Not to be confused with the collection options method (related to the collection internal configuration).

Args

database_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying DevOps API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for database_admin_timeout_ms.
timeout_ms
an alias for database_admin_timeout_ms.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> asyncio.run(my_async_coll.info()).database_info.region
'us-east1'
>>> asyncio.run(my_async_coll.info()).full_name
'default_keyspace.my_v_collection'

Note

the returned CollectionInfo wraps, among other things, the database information: as such, calling this method triggers the same-named method of a Database object (which, in turn, performs a HTTP request to the DevOps API). See the documentation for Database.info() for more details.

Expand source code
async def info(
    self,
    *,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> CollectionInfo:
    """
    Information on the collection (name, location, database), in the
    form of a CollectionInfo object.

    Not to be confused with the collection `options` method (related
    to the collection internal configuration).

    Args:
        database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying DevOps API request.
            If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `database_admin_timeout_ms`.
        timeout_ms: an alias for `database_admin_timeout_ms`.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> asyncio.run(my_async_coll.info()).database_info.region
        'us-east1'
        >>> asyncio.run(my_async_coll.info()).full_name
        'default_keyspace.my_v_collection'

    Note:
        the returned CollectionInfo wraps, among other things,
        the database information: as such, calling this method
        triggers the same-named method of a Database object (which, in turn,
        performs a HTTP request to the DevOps API).
        See the documentation for `Database.info()` for more details.
    """

    db_info = await self.database.info(
        database_admin_timeout_ms=database_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    return CollectionInfo(
        database_info=db_info,
        keyspace=self.keyspace,
        name=self.name,
        full_name=self.full_name,
    )
async def insert_many(self, documents: Iterable[DOC], *, ordered: bool = False, chunk_size: int | None = None, concurrency: int | None = None, request_timeout_ms: int | None = None, general_method_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> CollectionInsertManyResult

Insert a list of documents into the collection. This is not an atomic operation.

Args

documents
an iterable of dictionaries, each a document to insert. Documents may specify their _id field or leave it out, in which case it will be added automatically.
ordered
if False (default), the insertions can occur in arbitrary order and possibly concurrently. If True, they are processed sequentially. If there are no specific reasons against it, unordered insertions are to be preferred as they complete much faster.
chunk_size
how many documents to include in a single API request. Exceeding the server maximum allowed value results in an error. Leave it unspecified (recommended) to use the system default.
concurrency
maximum number of concurrent requests to the API at a given time. It cannot be more than one for ordered insertions.
request_timeout_ms
a timeout, in milliseconds, for each API request. If not passed, the collection-level setting is used instead.
general_method_timeout_ms
a timeout, in milliseconds, for the whole requested operation (which may involve multiple API requests). If not passed, the collection-level setting is used instead.
timeout_ms
an alias for general_method_timeout_ms.

Returns

a CollectionInsertManyResult object.

Examples

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> async def write_and_count(acol: AsyncCollection) -> None:
...             count0 = await acol.count_documents({}, upper_bound=10)
...             print("count0", count0)
...             im_result1 = await acol.insert_many(
...                 [
...                     {"a": 10},
...                     {"a": 5},
...                     {"b": [True, False, False]},
...                 ],
...                 ordered=True,
...             )
...             print("inserted1", im_result1.inserted_ids)
...             count1 = await acol.count_documents({}, upper_bound=100)
...             print("count1", count1)
...             await acol.insert_many(
...                 [{"seq": i} for i in range(50)],
...                 concurrency=5,
...             )
...             count2 = await acol.count_documents({}, upper_bound=100)
...             print("count2", count2)
...
>>> asyncio.run(write_and_count(my_async_coll))
count0 0
inserted1 ['e3c2a684-...', '1de4949f-...', '167dacc3-...']
count1 3
count2 53
>>> asyncio.run(my_async_coll.insert_many(
...     [
...         {"tag": "a", "$vector": [1, 2]},
...         {"tag": "b", "$vector": [3, 4]},
...     ]
... ))
CollectionInsertManyResult(...)

Note

Unordered insertions are executed with some degree of concurrency, so it is usually better to prefer this mode unless the order in the document sequence is important.

Note

A failure mode for this command is related to certain faulty documents found among those to insert: a document may have the an _id already present on the collection, or its vector dimension may not match the collection setting.

For an ordered insertion, the method will raise an exception at the first such faulty document – nevertheless, all documents processed until then will end up being written to the database.

For unordered insertions, if the error stems from faulty documents the insertion proceeds until exhausting the input documents: then, an exception is raised – and all insertable documents will have been written to the database, including those "after" the troublesome ones.

If, on the other hand, there are errors not related to individual documents (such as a network connectivity error), the whole insert_many operation will stop in mid-way, an exception will be raised, and only a certain amount of the input documents will have made their way to the database.

Expand source code
async def insert_many(
    self,
    documents: Iterable[DOC],
    *,
    ordered: bool = False,
    chunk_size: int | None = None,
    concurrency: int | None = None,
    request_timeout_ms: int | None = None,
    general_method_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> CollectionInsertManyResult:
    """
    Insert a list of documents into the collection.
    This is not an atomic operation.

    Args:
        documents: an iterable of dictionaries, each a document to insert.
            Documents may specify their `_id` field or leave it out, in which
            case it will be added automatically.
        ordered: if False (default), the insertions can occur in arbitrary order
            and possibly concurrently. If True, they are processed sequentially.
            If there are no specific reasons against it, unordered insertions are to
            be preferred as they complete much faster.
        chunk_size: how many documents to include in a single API request.
            Exceeding the server maximum allowed value results in an error.
            Leave it unspecified (recommended) to use the system default.
        concurrency: maximum number of concurrent requests to the API at
            a given time. It cannot be more than one for ordered insertions.
        request_timeout_ms: a timeout, in milliseconds, for each API request.
            If not passed, the collection-level setting is used instead.
        general_method_timeout_ms: a timeout, in milliseconds, for the whole
            requested operation (which may involve multiple API requests).
            If not passed, the collection-level setting is used instead.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a CollectionInsertManyResult object.

    Examples:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> async def write_and_count(acol: AsyncCollection) -> None:
        ...             count0 = await acol.count_documents({}, upper_bound=10)
        ...             print("count0", count0)
        ...             im_result1 = await acol.insert_many(
        ...                 [
        ...                     {"a": 10},
        ...                     {"a": 5},
        ...                     {"b": [True, False, False]},
        ...                 ],
        ...                 ordered=True,
        ...             )
        ...             print("inserted1", im_result1.inserted_ids)
        ...             count1 = await acol.count_documents({}, upper_bound=100)
        ...             print("count1", count1)
        ...             await acol.insert_many(
        ...                 [{"seq": i} for i in range(50)],
        ...                 concurrency=5,
        ...             )
        ...             count2 = await acol.count_documents({}, upper_bound=100)
        ...             print("count2", count2)
        ...
        >>> asyncio.run(write_and_count(my_async_coll))
        count0 0
        inserted1 ['e3c2a684-...', '1de4949f-...', '167dacc3-...']
        count1 3
        count2 53
        >>> asyncio.run(my_async_coll.insert_many(
        ...     [
        ...         {"tag": "a", "$vector": [1, 2]},
        ...         {"tag": "b", "$vector": [3, 4]},
        ...     ]
        ... ))
        CollectionInsertManyResult(...)

    Note:
        Unordered insertions are executed with some degree of concurrency,
        so it is usually better to prefer this mode unless the order in the
        document sequence is important.

    Note:
        A failure mode for this command is related to certain faulty documents
        found among those to insert: a document may have the an `_id` already
        present on the collection, or its vector dimension may not
        match the collection setting.

        For an ordered insertion, the method will raise an exception at
        the first such faulty document -- nevertheless, all documents processed
        until then will end up being written to the database.

        For unordered insertions, if the error stems from faulty documents
        the insertion proceeds until exhausting the input documents: then,
        an exception is raised -- and all insertable documents will have been
        written to the database, including those "after" the troublesome ones.

        If, on the other hand, there are errors not related to individual
        documents (such as a network connectivity error), the whole
        `insert_many` operation will stop in mid-way, an exception will be raised,
        and only a certain amount of the input documents will
        have made their way to the database.
    """

    _general_method_timeout_ms, _gmt_label = _first_valid_timeout(
        (general_method_timeout_ms, "general_method_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (
            self.api_options.timeout_options.general_method_timeout_ms,
            "general_method_timeout_ms",
        ),
    )
    _request_timeout_ms, _rt_label = _first_valid_timeout(
        (request_timeout_ms, "request_timeout_ms"),
        (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
    )
    if concurrency is None:
        if ordered:
            _concurrency = 1
        else:
            _concurrency = DEFAULT_INSERT_MANY_CONCURRENCY
    else:
        _concurrency = concurrency
    if _concurrency > 1 and ordered:
        raise ValueError("Cannot run ordered insert_many concurrently.")
    if chunk_size is None:
        _chunk_size = DEFAULT_INSERT_MANY_CHUNK_SIZE
    else:
        _chunk_size = chunk_size
    _documents = list(documents)
    logger.info(f"inserting {len(_documents)} documents in '{self.name}'")
    raw_results: list[dict[str, Any]] = []
    timeout_manager = MultiCallTimeoutManager(
        overall_timeout_ms=_general_method_timeout_ms,
        timeout_label=_gmt_label,
    )
    if ordered:
        options = {"ordered": True}
        inserted_ids: list[Any] = []
        for i in range(0, len(_documents), _chunk_size):
            im_payload = {
                "insertMany": {
                    "documents": _documents[i : i + _chunk_size],
                    "options": options,
                },
            }
            logger.info(f"insertMany(chunk) on '{self.name}'")
            chunk_response = await self._converted_request(
                payload=im_payload,
                raise_api_errors=False,
                timeout_context=timeout_manager.remaining_timeout(
                    cap_time_ms=_request_timeout_ms,
                    cap_timeout_label=_rt_label,
                ),
            )
            logger.info(f"finished insertMany(chunk) on '{self.name}'")
            # accumulate the results in this call
            chunk_inserted_ids = (chunk_response.get("status") or {}).get(
                "insertedIds", []
            )
            inserted_ids += chunk_inserted_ids
            raw_results += [chunk_response]
            # if errors, quit early
            if chunk_response.get("errors", []):
                partial_result = CollectionInsertManyResult(
                    raw_results=raw_results,
                    inserted_ids=inserted_ids,
                )
                raise CollectionInsertManyException.from_response(
                    command=None,
                    raw_response=chunk_response,
                    partial_result=partial_result,
                )

        # return
        full_result = CollectionInsertManyResult(
            raw_results=raw_results,
            inserted_ids=inserted_ids,
        )
        logger.info(
            f"finished inserting {len(_documents)} documents in '{self.name}'"
        )
        return full_result

    else:
        # unordered: concurrent or not, do all of them and parse the results
        options = {"ordered": False}

        sem = asyncio.Semaphore(_concurrency)

        async def concurrent_insert_chunk(
            document_chunk: list[DOC],
        ) -> dict[str, Any]:
            async with sem:
                im_payload = {
                    "insertMany": {
                        "documents": document_chunk,
                        "options": options,
                    },
                }
                logger.info(f"insertMany(chunk) on '{self.name}'")
                im_response = await self._converted_request(
                    payload=im_payload,
                    raise_api_errors=False,
                    timeout_context=timeout_manager.remaining_timeout(
                        cap_time_ms=_request_timeout_ms,
                        cap_timeout_label=_rt_label,
                    ),
                )
                logger.info(f"finished insertMany(chunk) on '{self.name}'")
                return im_response

        if _concurrency > 1:
            tasks = [
                asyncio.create_task(
                    concurrent_insert_chunk(_documents[i : i + _chunk_size])
                )
                for i in range(0, len(_documents), _chunk_size)
            ]
            raw_results = await asyncio.gather(*tasks)
        else:
            raw_results = [
                await concurrent_insert_chunk(_documents[i : i + _chunk_size])
                for i in range(0, len(_documents), _chunk_size)
            ]

        # recast raw_results
        inserted_ids = [
            inserted_id
            for chunk_response in raw_results
            for inserted_id in (chunk_response.get("status") or {}).get(
                "insertedIds", []
            )
        ]

        # check-raise
        if any(
            [chunk_response.get("errors", []) for chunk_response in raw_results]
        ):
            partial_result = CollectionInsertManyResult(
                raw_results=raw_results,
                inserted_ids=inserted_ids,
            )
            raise CollectionInsertManyException.from_responses(
                commands=[None for _ in raw_results],
                raw_responses=raw_results,
                partial_result=partial_result,
            )

        # return
        full_result = CollectionInsertManyResult(
            raw_results=raw_results,
            inserted_ids=inserted_ids,
        )
        logger.info(
            f"finished inserting {len(_documents)} documents in '{self.name}'"
        )
        return full_result
async def insert_one(self, document: DOC, *, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> CollectionInsertOneResult

Insert a single document in the collection in an atomic operation.

Args

document
the dictionary expressing the document to insert. The _id field of the document can be left out, in which case it will be created automatically.
general_method_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for general_method_timeout_ms.
timeout_ms
an alias for general_method_timeout_ms.

Returns

a CollectionInsertOneResult object.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> async def write_and_count(acol: AsyncCollection) -> None:
...     count0 = await acol.count_documents({}, upper_bound=10)
...     print("count0", count0)
...     await acol.insert_one(
...         {
...             "age": 30,
...             "name": "Smith",
...             "food": ["pear", "peach"],
...             "likes_fruit": True,
...         },
...     )
...     await acol.insert_one({"_id": "user-123", "age": 50, "name": "Maccio"})
...     count1 = await acol.count_documents({}, upper_bound=10)
...     print("count1", count1)
...
>>> asyncio.run(write_and_count(my_async_coll))
count0 0
count1 2
>>> asyncio.run(my_async_coll.insert_one({"tag": v", "$vector": [10, 11]}))
CollectionInsertOneResult(...)

Note

If an _id is explicitly provided, which corresponds to a document that exists already in the collection, an error is raised and the insertion fails.

Expand source code
async def insert_one(
    self,
    document: DOC,
    *,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> CollectionInsertOneResult:
    """
    Insert a single document in the collection in an atomic operation.

    Args:
        document: the dictionary expressing the document to insert.
            The `_id` field of the document can be left out, in which
            case it will be created automatically.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a CollectionInsertOneResult object.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> async def write_and_count(acol: AsyncCollection) -> None:
        ...     count0 = await acol.count_documents({}, upper_bound=10)
        ...     print("count0", count0)
        ...     await acol.insert_one(
        ...         {
        ...             "age": 30,
        ...             "name": "Smith",
        ...             "food": ["pear", "peach"],
        ...             "likes_fruit": True,
        ...         },
        ...     )
        ...     await acol.insert_one({"_id": "user-123", "age": 50, "name": "Maccio"})
        ...     count1 = await acol.count_documents({}, upper_bound=10)
        ...     print("count1", count1)
        ...
        >>> asyncio.run(write_and_count(my_async_coll))
        count0 0
        count1 2

        >>> asyncio.run(my_async_coll.insert_one({"tag": v", "$vector": [10, 11]}))
        CollectionInsertOneResult(...)

    Note:
        If an `_id` is explicitly provided, which corresponds to a document
        that exists already in the collection, an error is raised and
        the insertion fails.
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    io_payload = {"insertOne": {"document": document}}
    logger.info(f"insertOne on '{self.name}'")
    io_response = await self._converted_request(
        payload=io_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished insertOne on '{self.name}'")
    if "insertedIds" in io_response.get("status", {}):
        if io_response["status"]["insertedIds"]:
            inserted_id = io_response["status"]["insertedIds"][0]
            return CollectionInsertOneResult(
                raw_results=[io_response],
                inserted_id=inserted_id,
            )
        else:
            raise ValueError(
                "Could not complete a insert_one operation. "
                f"(gotten '${json.dumps(io_response)}')"
            )
    else:
        raise ValueError(
            "Could not complete a insert_one operation. "
            f"(gotten '${json.dumps(io_response)}')"
        )
async def options(self, *, collection_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> CollectionDefinition

Get the collection options, i.e. its configuration as read from the database.

The method issues a request to the Data API each time is invoked, without caching mechanisms: this ensures up-to-date information for usages such as real-time collection validation by the application.

Args

collection_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for collection_admin_timeout_ms.
timeout_ms
an alias for collection_admin_timeout_ms.

Returns

a CollectionDefinition instance describing the collection. (See also the database list_collections method.)

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> asyncio.run(my_async_coll.options())
CollectionDefinition(vector=CollectionVectorOptions(dimension=3, metric='cosine'))
Expand source code
async def options(
    self,
    *,
    collection_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> CollectionDefinition:
    """
    Get the collection options, i.e. its configuration as read from the database.

    The method issues a request to the Data API each time is invoked,
    without caching mechanisms: this ensures up-to-date information
    for usages such as real-time collection validation by the application.

    Args:
        collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `collection_admin_timeout_ms`.
        timeout_ms: an alias for `collection_admin_timeout_ms`.

    Returns:
        a CollectionDefinition instance describing the collection.
        (See also the database `list_collections` method.)

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> asyncio.run(my_async_coll.options())
        CollectionDefinition(vector=CollectionVectorOptions(dimension=3, metric='cosine'))
    """

    _collection_admin_timeout_ms, _ca_label = _select_singlereq_timeout_ca(
        timeout_options=self.api_options.timeout_options,
        collection_admin_timeout_ms=collection_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info(f"getting collections in search of '{self.name}'")
    self_descriptors = [
        coll_desc
        for coll_desc in await self.database._list_collections_ctx(
            keyspace=None,
            timeout_context=_TimeoutContext(
                request_ms=_collection_admin_timeout_ms,
                label=_ca_label,
            ),
        )
        if coll_desc.name == self.name
    ]
    logger.info(f"finished getting collections in search of '{self.name}'")
    if self_descriptors:
        return self_descriptors[0].definition
    else:
        raise ValueError(
            f"Collection {self.keyspace}.{self.name} not found.",
        )
async def replace_one(self, filter: FilterType, replacement: DOC, *, sort: SortType | None = None, upsert: bool = False, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> CollectionUpdateResult

Replace a single document on the collection with a new one, optionally inserting a new one if no match is found.

Args

filter
a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
replacement
the new document to write into the collection.
sort
with this dictionary parameter one can control the sorting order of the documents matching the filter, effectively determining what document will come first and hence be the replaced one. See the find method for more on sorting. Vector-based ANN sorting is achieved by providing a "$vector" or a "$vectorize" key in sort.
upsert
this parameter controls the behavior in absence of matches. If True, replacement is inserted as a new document if no matches are found on the collection. If False, the operation silently does nothing in case of no matches.
general_method_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for general_method_timeout_ms.
timeout_ms
an alias for general_method_timeout_ms.

Returns

a CollectionUpdateResult object summarizing the outcome of the replace operation.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> async def do_replace_one(acol: AsyncCollection) -> None:
...     await acol.insert_one({"Marco": "Polo"})
...     result0 = await acol.replace_one(
...         {"Marco": {"$exists": True}},
...         {"Buda": "Pest"},
...     )
...     print("result0.update_info", result0.update_info)
...     doc1 = await acol.find_one({"Buda": "Pest"})
...     print("doc1", doc1)
...     result1 = await acol.replace_one(
...         {"Mirco": {"$exists": True}},
...         {"Oh": "yeah?"},
...     )
...     print("result1.update_info", result1.update_info)
...     result2 = await acol.replace_one(
...         {"Mirco": {"$exists": True}},
...         {"Oh": "yeah?"},
...         upsert=True,
...     )
...     print("result2.update_info", result2.update_info)
...
>>> asyncio.run(do_replace_one(my_async_coll))
result0.update_info {'n': 1, 'updatedExisting': True, 'ok': 1.0, 'nModified': 1}
doc1 {'_id': '6e669a5a-...', 'Buda': 'Pest'}
result1.update_info {'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0}
result2.update_info {'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '30e34e00-...'}
Expand source code
async def replace_one(
    self,
    filter: FilterType,
    replacement: DOC,
    *,
    sort: SortType | None = None,
    upsert: bool = False,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> CollectionUpdateResult:
    """
    Replace a single document on the collection with a new one,
    optionally inserting a new one if no match is found.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        replacement: the new document to write into the collection.
        sort: with this dictionary parameter one can control the sorting
            order of the documents matching the filter, effectively
            determining what document will come first and hence be the
            replaced one. See the `find` method for more on sorting.
            Vector-based ANN sorting is achieved by providing a "$vector"
            or a "$vectorize" key in `sort`.
        upsert: this parameter controls the behavior in absence of matches.
            If True, `replacement` is inserted as a new document
            if no matches are found on the collection. If False,
            the operation silently does nothing in case of no matches.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a CollectionUpdateResult object summarizing the outcome of
        the replace operation.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> async def do_replace_one(acol: AsyncCollection) -> None:
        ...     await acol.insert_one({"Marco": "Polo"})
        ...     result0 = await acol.replace_one(
        ...         {"Marco": {"$exists": True}},
        ...         {"Buda": "Pest"},
        ...     )
        ...     print("result0.update_info", result0.update_info)
        ...     doc1 = await acol.find_one({"Buda": "Pest"})
        ...     print("doc1", doc1)
        ...     result1 = await acol.replace_one(
        ...         {"Mirco": {"$exists": True}},
        ...         {"Oh": "yeah?"},
        ...     )
        ...     print("result1.update_info", result1.update_info)
        ...     result2 = await acol.replace_one(
        ...         {"Mirco": {"$exists": True}},
        ...         {"Oh": "yeah?"},
        ...         upsert=True,
        ...     )
        ...     print("result2.update_info", result2.update_info)
        ...
        >>> asyncio.run(do_replace_one(my_async_coll))
        result0.update_info {'n': 1, 'updatedExisting': True, 'ok': 1.0, 'nModified': 1}
        doc1 {'_id': '6e669a5a-...', 'Buda': 'Pest'}
        result1.update_info {'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0}
        result2.update_info {'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '30e34e00-...'}
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    options = {
        "upsert": upsert,
    }
    fo_payload = {
        "findOneAndReplace": {
            k: v
            for k, v in {
                "filter": filter,
                "replacement": replacement,
                "options": options,
                "sort": sort,
            }.items()
            if v is not None
        }
    }
    logger.info(f"findOneAndReplace on '{self.name}'")
    fo_response = await self._converted_request(
        payload=fo_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished findOneAndReplace on '{self.name}'")
    if "document" in fo_response.get("data", {}):
        fo_status = fo_response.get("status") or {}
        _update_info = _prepare_update_info([fo_status])
        return CollectionUpdateResult(
            raw_results=[fo_response],
            update_info=_update_info,
        )
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from find_one_and_replace API command.",
            raw_response=fo_response,
        )
def to_sync(self: AsyncCollection[DOC], *, embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = (unset), api_options: APIOptions | UnsetType = (unset)) ‑> Collection[DOC]

Create a Collection from this one. Save for the arguments explicitly provided as overrides, everything else is kept identical to this collection in the copy (the database is converted into a sync object).

Args

embedding_api_key
optional API key(s) for interacting with the collection. If an embedding service is configured, and this parameter is not None, each Data API call will include the necessary embedding-related headers as specified by this parameter. If a string is passed, it translates into the one "embedding api key" header (i.e. EmbeddingAPIKeyHeaderProvider). For some vectorize providers/models, if using header-based authentication, specialized subclasses of EmbeddingHeadersProvider should be supplied.
api_options
any additional options to set for the result, in the form of an APIOptions instance (where one can set just the needed attributes). In case the same setting is also provided as named parameter, the latter takes precedence.

Returns

the new copy, a Collection instance.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> my_async_coll.to_sync().count_documents({}, upper_bound=100)
77
Expand source code
def to_sync(
    self: AsyncCollection[DOC],
    *,
    embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
    api_options: APIOptions | UnsetType = _UNSET,
) -> Collection[DOC]:
    """
    Create a Collection from this one. Save for the arguments
    explicitly provided as overrides, everything else is kept identical
    to this collection in the copy (the database is converted into
    a sync object).

    Args:
        embedding_api_key: optional API key(s) for interacting with the collection.
            If an embedding service is configured, and this parameter is not None,
            each Data API call will include the necessary embedding-related headers
            as specified by this parameter. If a string is passed, it translates
            into the one "embedding api key" header
            (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
            For some vectorize providers/models, if using header-based authentication,
            specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
            should be supplied.
        api_options: any additional options to set for the result, in the form of
            an APIOptions instance (where one can set just the needed attributes).
            In case the same setting is also provided as named parameter,
            the latter takes precedence.

    Returns:
        the new copy, a Collection instance.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> my_async_coll.to_sync().count_documents({}, upper_bound=100)
        77
    """

    arg_api_options = APIOptions(
        embedding_api_key=embedding_api_key,
    )
    final_api_options = self.api_options.with_override(api_options).with_override(
        arg_api_options
    )
    return Collection(
        database=self.database.to_sync(),
        name=self.name,
        keyspace=self.keyspace,
        api_options=final_api_options,
    )
async def update_many(self, filter: FilterType, update: dict[str, Any], *, upsert: bool = False, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> CollectionUpdateResult

Apply an update operation to all documents matching a condition, optionally inserting one documents in absence of matches.

Args

filter
a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
update
the update prescription to apply to the documents, expressed as a dictionary as per Data API syntax. Examples are: {"$set": {"field": "value}} {"$inc": {"counter": 10}} {"$unset": {"field": ""}} See the Data API documentation for the full syntax.
upsert
this parameter controls the behavior in absence of matches. If True, a single new document (resulting from applying update to an empty document) is inserted if no matches are found on the collection. If False, the operation silently does nothing in case of no matches.
general_method_timeout_ms
a timeout, in milliseconds, for the whole requested operation (which may involve multiple API requests). This method may entail successive HTTP API requests, depending on the amount of involved documents. If not passed, the collection-level setting is used instead.
request_timeout_ms
a timeout, in milliseconds, for each API request. If not passed, the collection-level setting is used instead.
timeout_ms
an alias for general_method_timeout_ms.

Returns

a CollectionUpdateResult object summarizing the outcome of the update operation.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> async def do_update_many(acol: AsyncCollection) -> None:
...     await acol.insert_many([{"c": "red"}, {"c": "green"}, {"c": "blue"}])
...     result0 = await acol.update_many(
...         {"c": {"$ne": "green"}},
...         {"$set": {"nongreen": True}},
...     )
...     print("result0.update_info", result0.update_info)
...     result1 = await acol.update_many(
...         {"c": "orange"},
...         {"$set": {"is_also_fruit": True}},
...     )
...     print("result1.update_info", result1.update_info)
...     result2 = await acol.update_many(
...         {"c": "orange"},
...         {"$set": {"is_also_fruit": True}},
...         upsert=True,
...     )
...     print("result2.update_info", result2.update_info)
...
>>> asyncio.run(do_update_many(my_async_coll))
result0.update_info {'n': 2, 'updatedExisting': True, 'ok': 1.0, 'nModified': 2}
result1.update_info {'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0}
result2.update_info {'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '79ffd5a3-ab99-4dff-a2a5-4aaa0e59e854'}

Note

Similarly to the case of find (see its docstring for more details), running this command while, at the same time, another process is inserting new documents which match the filter of the update_many can result in an unpredictable fraction of these documents being updated. In other words, it cannot be easily predicted whether a given newly-inserted document will be picked up by the update_many command or not.

Expand source code
async def update_many(
    self,
    filter: FilterType,
    update: dict[str, Any],
    *,
    upsert: bool = False,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> CollectionUpdateResult:
    """
    Apply an update operation to all documents matching a condition,
    optionally inserting one documents in absence of matches.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        update: the update prescription to apply to the documents, expressed
            as a dictionary as per Data API syntax. Examples are:
                {"$set": {"field": "value}}
                {"$inc": {"counter": 10}}
                {"$unset": {"field": ""}}
            See the Data API documentation for the full syntax.
        upsert: this parameter controls the behavior in absence of matches.
            If True, a single new document (resulting from applying `update`
            to an empty document) is inserted if no matches are found on
            the collection. If False, the operation silently does nothing
            in case of no matches.
        general_method_timeout_ms: a timeout, in milliseconds, for the whole
            requested operation (which may involve multiple API requests).
            This method may entail successive HTTP API requests,
            depending on the amount of involved documents.
            If not passed, the collection-level setting is used instead.
        request_timeout_ms: a timeout, in milliseconds, for each API request.
            If not passed, the collection-level setting is used instead.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a CollectionUpdateResult object summarizing the outcome of
        the update operation.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> async def do_update_many(acol: AsyncCollection) -> None:
        ...     await acol.insert_many([{"c": "red"}, {"c": "green"}, {"c": "blue"}])
        ...     result0 = await acol.update_many(
        ...         {"c": {"$ne": "green"}},
        ...         {"$set": {"nongreen": True}},
        ...     )
        ...     print("result0.update_info", result0.update_info)
        ...     result1 = await acol.update_many(
        ...         {"c": "orange"},
        ...         {"$set": {"is_also_fruit": True}},
        ...     )
        ...     print("result1.update_info", result1.update_info)
        ...     result2 = await acol.update_many(
        ...         {"c": "orange"},
        ...         {"$set": {"is_also_fruit": True}},
        ...         upsert=True,
        ...     )
        ...     print("result2.update_info", result2.update_info)
        ...
        >>> asyncio.run(do_update_many(my_async_coll))
        result0.update_info {'n': 2, 'updatedExisting': True, 'ok': 1.0, 'nModified': 2}
        result1.update_info {'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0}
        result2.update_info {'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '79ffd5a3-ab99-4dff-a2a5-4aaa0e59e854'}

    Note:
        Similarly to the case of `find` (see its docstring for more details),
        running this command while, at the same time, another process is
        inserting new documents which match the filter of the `update_many`
        can result in an unpredictable fraction of these documents being updated.
        In other words, it cannot be easily predicted whether a given
        newly-inserted document will be picked up by the update_many command or not.
    """

    _general_method_timeout_ms, _gmt_label = _first_valid_timeout(
        (general_method_timeout_ms, "general_method_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (
            self.api_options.timeout_options.general_method_timeout_ms,
            "general_method_timeout_ms",
        ),
    )
    _request_timeout_ms, _rt_label = _first_valid_timeout(
        (request_timeout_ms, "request_timeout_ms"),
        (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
    )
    api_options = {
        "upsert": upsert,
    }
    page_state_options: dict[str, str] = {}
    um_responses: list[dict[str, Any]] = []
    um_statuses: list[dict[str, Any]] = []
    must_proceed = True
    logger.info(f"starting update_many on '{self.name}'")
    timeout_manager = MultiCallTimeoutManager(
        overall_timeout_ms=_general_method_timeout_ms,
        timeout_label=_gmt_label,
    )
    while must_proceed:
        options = {**api_options, **page_state_options}
        this_um_payload = {
            "updateMany": {
                k: v
                for k, v in {
                    "filter": filter,
                    "update": update,
                    "options": options,
                }.items()
                if v is not None
            }
        }
        logger.info(f"updateMany on '{self.name}'")
        this_um_response = await self._converted_request(
            payload=this_um_payload,
            timeout_context=timeout_manager.remaining_timeout(
                cap_time_ms=_request_timeout_ms,
                cap_timeout_label=_rt_label,
            ),
        )
        logger.info(f"finished updateMany on '{self.name}'")
        this_um_status = this_um_response.get("status") or {}
        #
        # if errors, quit early
        if this_um_response.get("errors", []):
            partial_update_info = _prepare_update_info(um_statuses)
            partial_result = CollectionUpdateResult(
                raw_results=um_responses,
                update_info=partial_update_info,
            )
            all_um_responses = um_responses + [this_um_response]
            raise CollectionUpdateManyException.from_responses(
                commands=[None for _ in all_um_responses],
                raw_responses=all_um_responses,
                partial_result=partial_result,
            )
        else:
            if "status" not in this_um_response:
                raise UnexpectedDataAPIResponseException(
                    text="Faulty response from update_many API command.",
                    raw_response=this_um_response,
                )
            um_responses.append(this_um_response)
            um_statuses.append(this_um_status)
            next_page_state = this_um_status.get("nextPageState")
            if next_page_state is not None:
                must_proceed = True
                page_state_options = {"pageState": next_page_state}
            else:
                must_proceed = False
                page_state_options = {}

    update_info = _prepare_update_info(um_statuses)
    logger.info(f"finished update_many on '{self.name}'")
    return CollectionUpdateResult(
        raw_results=um_responses,
        update_info=update_info,
    )
async def update_one(self, filter: FilterType, update: dict[str, Any], *, sort: SortType | None = None, upsert: bool = False, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> CollectionUpdateResult

Update a single document on the collection as requested, optionally inserting a new one if no match is found.

Args

filter
a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
update
the update prescription to apply to the document, expressed as a dictionary as per Data API syntax. Examples are: {"$set": {"field": "value}} {"$inc": {"counter": 10}} {"$unset": {"field": ""}} See the Data API documentation for the full syntax.
sort
with this dictionary parameter one can control the sorting order of the documents matching the filter, effectively determining what document will come first and hence be the replaced one. See the find method for more on sorting. Vector-based ANN sorting is achieved by providing a "$vector" or a "$vectorize" key in sort.
upsert
this parameter controls the behavior in absence of matches. If True, a new document (resulting from applying the update to an empty document) is inserted if no matches are found on the collection. If False, the operation silently does nothing in case of no matches.
general_method_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for general_method_timeout_ms.
timeout_ms
an alias for general_method_timeout_ms.

Returns

a CollectionUpdateResult object summarizing the outcome of the update operation.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> async def do_update_one(acol: AsyncCollection) -> None:
...     await acol.insert_one({"Marco": "Polo"})
...     result0 = await acol.update_one(
...         {"Marco": {"$exists": True}},
...         {"$inc": {"rank": 3}},
...     )
...     print("result0.update_info", result0.update_info)
...     result1 = await acol.update_one(
...         {"Mirko": {"$exists": True}},
...         {"$inc": {"rank": 3}},
...     )
...     print("result1.update_info", result1.update_info)
...     result2 = await acol.update_one(
...         {"Mirko": {"$exists": True}},
...         {"$inc": {"rank": 3}},
...         upsert=True,
...     )
...     print("result2.update_info", result2.update_info)
...
>>> asyncio.run(do_update_one(my_async_coll))
result0.update_info {'n': 1, 'updatedExisting': True, 'ok': 1.0, 'nModified': 1})
result1.update_info {'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0})
result2.update_info {'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '75748092-...'}
Expand source code
async def update_one(
    self,
    filter: FilterType,
    update: dict[str, Any],
    *,
    sort: SortType | None = None,
    upsert: bool = False,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> CollectionUpdateResult:
    """
    Update a single document on the collection as requested,
    optionally inserting a new one if no match is found.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        update: the update prescription to apply to the document, expressed
            as a dictionary as per Data API syntax. Examples are:
                {"$set": {"field": "value}}
                {"$inc": {"counter": 10}}
                {"$unset": {"field": ""}}
            See the Data API documentation for the full syntax.
        sort: with this dictionary parameter one can control the sorting
            order of the documents matching the filter, effectively
            determining what document will come first and hence be the
            replaced one. See the `find` method for more on sorting.
            Vector-based ANN sorting is achieved by providing a "$vector"
            or a "$vectorize" key in `sort`.
        upsert: this parameter controls the behavior in absence of matches.
            If True, a new document (resulting from applying the `update`
            to an empty document) is inserted if no matches are found on
            the collection. If False, the operation silently does nothing
            in case of no matches.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a CollectionUpdateResult object summarizing the outcome of
        the update operation.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> async def do_update_one(acol: AsyncCollection) -> None:
        ...     await acol.insert_one({"Marco": "Polo"})
        ...     result0 = await acol.update_one(
        ...         {"Marco": {"$exists": True}},
        ...         {"$inc": {"rank": 3}},
        ...     )
        ...     print("result0.update_info", result0.update_info)
        ...     result1 = await acol.update_one(
        ...         {"Mirko": {"$exists": True}},
        ...         {"$inc": {"rank": 3}},
        ...     )
        ...     print("result1.update_info", result1.update_info)
        ...     result2 = await acol.update_one(
        ...         {"Mirko": {"$exists": True}},
        ...         {"$inc": {"rank": 3}},
        ...         upsert=True,
        ...     )
        ...     print("result2.update_info", result2.update_info)
        ...
        >>> asyncio.run(do_update_one(my_async_coll))
        result0.update_info {'n': 1, 'updatedExisting': True, 'ok': 1.0, 'nModified': 1})
        result1.update_info {'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0})
        result2.update_info {'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '75748092-...'}
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    options = {
        "upsert": upsert,
    }
    uo_payload = {
        "updateOne": {
            k: v
            for k, v in {
                "filter": filter,
                "update": update,
                "options": options,
                "sort": sort,
            }.items()
            if v is not None
        }
    }
    logger.info(f"updateOne on '{self.name}'")
    uo_response = await self._converted_request(
        payload=uo_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished updateOne on '{self.name}'")
    if "status" in uo_response:
        uo_status = uo_response["status"]
        _update_info = _prepare_update_info([uo_status])
        return CollectionUpdateResult(
            raw_results=[uo_response],
            update_info=_update_info,
        )
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from updateOne API command.",
            raw_response=uo_response,
        )
def with_options(self: AsyncCollection[DOC], *, embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = (unset), api_options: APIOptions | UnsetType = (unset)) ‑> AsyncCollection[DOC]

Create a clone of this collection with some changed attributes.

Args

embedding_api_key
optional API key(s) for interacting with the collection. If an embedding service is configured, and this parameter is not None, each Data API call will include the necessary embedding-related headers as specified by this parameter. If a string is passed, it translates into the one "embedding api key" header (i.e. EmbeddingAPIKeyHeaderProvider). For some vectorize providers/models, if using header-based authentication, specialized subclasses of EmbeddingHeadersProvider should be supplied.
api_options
any additional options to set for the clone, in the form of an APIOptions instance (where one can set just the needed attributes). In case the same setting is also provided as named parameter, the latter takes precedence.

Returns

a new AsyncCollection instance.

Example

>>> collection_with_api_key_configured = my_async_collection.with_options(
...     embedding_api_key="secret-key-0123abcd...",
... )
Expand source code
def with_options(
    self: AsyncCollection[DOC],
    *,
    embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
    api_options: APIOptions | UnsetType = _UNSET,
) -> AsyncCollection[DOC]:
    """
    Create a clone of this collection with some changed attributes.

    Args:
        embedding_api_key: optional API key(s) for interacting with the collection.
            If an embedding service is configured, and this parameter is not None,
            each Data API call will include the necessary embedding-related headers
            as specified by this parameter. If a string is passed, it translates
            into the one "embedding api key" header
            (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
            For some vectorize providers/models, if using header-based authentication,
            specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
            should be supplied.
        api_options: any additional options to set for the clone, in the form of
            an APIOptions instance (where one can set just the needed attributes).
            In case the same setting is also provided as named parameter,
            the latter takes precedence.

    Returns:
        a new AsyncCollection instance.

    Example:
        >>> collection_with_api_key_configured = my_async_collection.with_options(
        ...     embedding_api_key="secret-key-0123abcd...",
        ... )
    """

    return self._copy(
        embedding_api_key=embedding_api_key,
        api_options=api_options,
    )
class AsyncDatabase (*, api_endpoint: str, keyspace: str | None, api_options: FullAPIOptions)

A Data API database. This is the object for doing database-level DML, such as creating/deleting collections, and for obtaining Collection objects themselves. This class has an asynchronous interface.

This class is not meant for direct instantiation by the user, rather it is usually obtained by invoking methods such as get_async_database of AstraDBClient.

On Astra DB, an AsyncDatabase comes with an "API Endpoint", which implies an AsyncDatabase object instance reaches a specific region (relevant point in case of multi-region databases).

An AsyncDatabase is also always set with a "working keyspace" on which all data operations are done (unless otherwise specified).

Args

api_endpoint
the full "API Endpoint" string used to reach the Data API. Example: "https://-.apps.astra.datastax.com"
keyspace
this is the keyspace all method calls will target, unless one is explicitly specified in the call. If no keyspace is supplied when creating a Database, on Astra DB the name "default_keyspace" is set, while on other environments the keyspace is left unspecified: in this case, most operations are unavailable until a keyspace is set (through an explicit use_keyspace invocation or equivalent).
api_options
a complete specification of the API Options for this instance.

Example

>>> from astrapy import DataAPIClient
>>> my_client = astrapy.DataAPIClient()
>>> my_db = my_client.get_async_database(
...    "https://01234567-....apps.astra.datastax.com",
...     token="AstraCS:...",
... )

Note

creating an instance of AsyncDatabase does not trigger actual creation of the database itself, which should exist beforehand. To create databases, see the AstraDBAdmin class.

Expand source code
class AsyncDatabase:
    """
    A Data API database. This is the object for doing database-level
    DML, such as creating/deleting collections, and for obtaining Collection
    objects themselves. This class has an asynchronous interface.

    This class is not meant for direct instantiation by the user, rather
    it is usually obtained by invoking methods such as `get_async_database`
    of AstraDBClient.

    On Astra DB, an AsyncDatabase comes with an "API Endpoint", which implies
    an AsyncDatabase object instance reaches a specific region (relevant point in
    case of multi-region databases).

    An AsyncDatabase is also always set with a "working keyspace" on which all
    data operations are done (unless otherwise specified).

    Args:
        api_endpoint: the full "API Endpoint" string used to reach the Data API.
            Example: "https://<database_id>-<region>.apps.astra.datastax.com"
        keyspace: this is the keyspace all method calls will target, unless
            one is explicitly specified in the call. If no keyspace is supplied
            when creating a Database, on Astra DB the name "default_keyspace" is set,
            while on other environments the keyspace is left unspecified: in this case,
            most operations are unavailable until a keyspace is set (through an explicit
            `use_keyspace` invocation or equivalent).
        api_options: a complete specification of the API Options for this instance.

    Example:
        >>> from astrapy import DataAPIClient
        >>> my_client = astrapy.DataAPIClient()
        >>> my_db = my_client.get_async_database(
        ...    "https://01234567-....apps.astra.datastax.com",
        ...     token="AstraCS:...",
        ... )

    Note:
        creating an instance of AsyncDatabase does not trigger actual creation
        of the database itself, which should exist beforehand. To create databases,
        see the AstraDBAdmin class.
    """

    def __init__(
        self,
        *,
        api_endpoint: str,
        keyspace: str | None,
        api_options: FullAPIOptions,
    ) -> None:
        self.api_options = api_options
        self.api_endpoint = api_endpoint.strip("/")
        # enforce defaults if on Astra DB:
        self._using_keyspace: str | None
        if (
            keyspace is None
            and self.api_options.environment in Environment.astra_db_values
        ):
            self._using_keyspace = DEFAULT_ASTRA_DB_KEYSPACE
        else:
            self._using_keyspace = keyspace

        self._commander_headers = {
            DEFAULT_DATA_API_AUTH_HEADER: self.api_options.token.get_token(),
            **self.api_options.database_additional_headers,
        }
        self._name: str | None = None
        self._api_commander = self._get_api_commander(keyspace=self.keyspace)

    def __getattr__(self, collection_name: str) -> AsyncCollection[DefaultDocumentType]:
        return self.get_collection(name=collection_name)

    def __getitem__(self, collection_name: str) -> AsyncCollection[DefaultDocumentType]:
        return self.get_collection(name=collection_name)

    def __repr__(self) -> str:
        ep_desc = f'api_endpoint="{self.api_endpoint}"'
        keyspace_desc: str | None
        if self._using_keyspace is None:
            keyspace_desc = "keyspace not set"
        else:
            keyspace_desc = f'keyspace="{self._using_keyspace}"'
        api_options_desc = f"api_options={self.api_options}"
        parts = [
            pt for pt in [ep_desc, keyspace_desc, api_options_desc] if pt is not None
        ]
        return f"{self.__class__.__name__}({', '.join(parts)})"

    def __eq__(self, other: Any) -> bool:
        if isinstance(other, AsyncDatabase):
            return all(
                [
                    self.api_endpoint == other.api_endpoint,
                    self.keyspace == other.keyspace,
                    self.api_options == other.api_options,
                ]
            )
        else:
            return False

    def _get_api_commander(self, keyspace: str | None) -> APICommander | None:
        """
        Instantiate a new APICommander based on the properties of this class
        and a provided keyspace.

        If keyspace is None, return None (signaling a "keyspace not set").
        """

        if keyspace is None:
            return None
        else:
            base_path_components = [
                comp
                for comp in (
                    ncomp.strip("/")
                    for ncomp in (
                        self.api_options.data_api_url_options.api_path,
                        self.api_options.data_api_url_options.api_version,
                        keyspace,
                    )
                    if ncomp is not None
                )
                if comp != ""
            ]
            base_path = f"/{'/'.join(base_path_components)}"
            api_commander = APICommander(
                api_endpoint=self.api_endpoint,
                path=base_path,
                headers=self._commander_headers,
                callers=self.api_options.callers,
                redacted_header_names=self.api_options.redacted_header_names,
            )
            return api_commander

    def _get_driver_commander(self, keyspace: str | None) -> APICommander:
        """
        Building on _get_api_commander, fall back to class keyspace in
        creating/returning a commander, and in any case raise an error if not set.
        """
        driver_commander: APICommander | None
        if keyspace:
            driver_commander = self._get_api_commander(keyspace=keyspace)
        else:
            driver_commander = self._api_commander
        if driver_commander is None:
            raise ValueError(
                "No keyspace specified. This operation requires a keyspace to "
                "be set, e.g. through the `use_keyspace` method."
            )
        return driver_commander

    async def __aenter__(self) -> AsyncDatabase:
        return self

    async def __aexit__(
        self,
        exc_type: type[BaseException] | None = None,
        exc_value: BaseException | None = None,
        traceback: TracebackType | None = None,
    ) -> None:
        if self._api_commander is not None:
            await self._api_commander.__aexit__(
                exc_type=exc_type,
                exc_value=exc_value,
                traceback=traceback,
            )

    def _copy(
        self,
        *,
        keyspace: str | None = None,
        token: str | TokenProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncDatabase:
        arg_api_options = APIOptions(
            token=token,
        )
        final_api_options = self.api_options.with_override(api_options).with_override(
            arg_api_options
        )
        return AsyncDatabase(
            api_endpoint=self.api_endpoint,
            keyspace=keyspace or self.keyspace,
            api_options=final_api_options,
        )

    def with_options(
        self,
        *,
        keyspace: str | None = None,
        token: str | TokenProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncDatabase:
        """
        Create a clone of this database with some changed attributes.

        Args:
            keyspace: this is the keyspace all method calls will target, unless
                one is explicitly specified in the call. If no keyspace is supplied
                when creating a Database, the name "default_keyspace" is set.
            token: an Access Token to the database. Example: `"AstraCS:xyz..."`.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            api_options: any additional options to set for the clone, in the form of
                an APIOptions instance (where one can set just the needed attributes).
                In case the same setting is also provided as named parameter,
                the latter takes precedence.

        Returns:
            a new `AsyncDatabase` instance.

        Example:
            >>> async_database_2 = async_database.with_options(
            ...     keyspace="the_other_keyspace",
            ...     token="AstraCS:xyz...",
            ... )
        """

        return self._copy(
            keyspace=keyspace,
            token=token,
            api_options=api_options,
        )

    def to_sync(
        self,
        *,
        keyspace: str | None = None,
        token: str | TokenProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> Database:
        """
        Create a (synchronous) Database from this one. Save for the arguments
        explicitly provided as overrides, everything else is kept identical
        to this database in the copy.

        Args:
            keyspace: this is the keyspace all method calls will target, unless
                one is explicitly specified in the call. If no keyspace is supplied
                when creating a Database, the name "default_keyspace" is set.
            token: an Access Token to the database. Example: "AstraCS:xyz..."
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            api_options: any additional options to set for the result, in the form of
                an APIOptions instance (where one can set just the needed attributes).
                In case the same setting is also provided as named parameter,
                the latter takes precedence.

        Returns:
            the new copy, a `Database` instance.

        Example:
            >>> my_sync_db = async_database.to_sync()
            >>> my_sync_db.list_collection_names()
            ['a_collection', 'another_collection']
        """

        arg_api_options = APIOptions(
            token=token,
        )
        final_api_options = self.api_options.with_override(api_options).with_override(
            arg_api_options
        )
        return Database(
            api_endpoint=self.api_endpoint,
            keyspace=keyspace or self.keyspace,
            api_options=final_api_options,
        )

    def use_keyspace(self, keyspace: str) -> None:
        """
        Switch to a new working keyspace for this database.
        This method changes (mutates) the AsyncDatabase instance.

        Note that this method does not create the keyspace, which should exist
        already (created for instance with a `DatabaseAdmin.async_create_keyspace` call).

        Args:
            keyspace: the new keyspace to use as the database working keyspace.

        Returns:
            None.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> asyncio.run(async_database.list_collection_names())
            ['coll_1', 'coll_2']
            >>> async_database.use_keyspace("an_empty_keyspace")
            >>> asyncio.run(async_database.list_collection_names())
            []
        """
        logger.info(f"switching to keyspace '{keyspace}'")
        self._using_keyspace = keyspace
        self._api_commander = self._get_api_commander(keyspace=self.keyspace)

    async def info(
        self,
        *,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> AstraDBDatabaseInfo:
        """
        Additional information on the database as a AstraDBDatabaseInfo instance.

        Some of the returned properties are dynamic throughout the lifetime
        of the database (such as raw_info["keyspaces"]). For this reason,
        each invocation of this method triggers a new request to the DevOps API.

        Args:
            database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `database_admin_timeout_ms`.
            timeout_ms: an alias for `database_admin_timeout_ms`.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> asyncio.run(async_database.info()).region
            'eu-west-1'
            >>> asyncio.run(
            ...     async_database.info()
            ... ).raw_info['datacenters'][0]['dateCreated']
            '2023-01-30T12:34:56Z'

        Note:
            see the AstraDBDatabaseInfo documentation for a caveat about the difference
            between the `region` and the `raw["region"]` attributes.
        """

        if self.api_options.environment not in Environment.astra_db_values:
            raise InvalidEnvironmentException(
                "Environments outside of Astra DB are not supported."
            )

        _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
            timeout_options=self.api_options.timeout_options,
            database_admin_timeout_ms=database_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info("getting database info")
        database_info = await async_fetch_database_info(
            self.api_endpoint,
            keyspace=self.keyspace,
            request_timeout_ms=_database_admin_timeout_ms,
            api_options=self.api_options,
        )
        if database_info is not None:
            logger.info("finished getting database info")
            return database_info
        else:
            raise DevOpsAPIException("Failure while fetching database info.")

    @property
    def id(self) -> str:
        """
        The ID of this database.

        Example:
            >>> my_async_database.id
            '01234567-89ab-cdef-0123-456789abcdef'
        """

        parsed_api_endpoint = parse_api_endpoint(self.api_endpoint)
        if parsed_api_endpoint is not None:
            return parsed_api_endpoint.database_id
        else:
            raise DevOpsAPIException(
                "Database is not in a supported environment for this operation."
            )

    @property
    def region(self) -> str:
        """
        The region where this database is located.

        The region is still well defined in case of multi-region databases,
        since a Database instance connects to exactly one of the regions
        (as specified by the API Endpoint).

        Example:
            >>> my_async_database.region
            'us-west-2'
        """

        parsed_api_endpoint = parse_api_endpoint(self.api_endpoint)
        if parsed_api_endpoint is not None:
            return parsed_api_endpoint.region
        else:
            raise DevOpsAPIException(
                "Database is not in a supported environment for this operation."
            )

    async def name(self) -> str:
        """
        The name of this database. Note that this bears no unicity guarantees.

        Calling this method the first time involves a request
        to the DevOps API (the resulting database name is then cached).
        See the `info()` method for more details.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> asyncio.run(async_database.name())
            'the_application_database'
        """

        if self._name is None:
            self._name = (await self.info()).name
        return self._name

    @property
    def keyspace(self) -> str | None:
        """
        The keyspace this database uses as target for all commands when
        no method-call-specific keyspace is specified.

        Returns:
            the working keyspace (a string), or None if not set.

        Example:
            >>> async_database.keyspace
            'the_keyspace'
        """

        return self._using_keyspace

    @overload
    def get_collection(
        self,
        name: str,
        *,
        keyspace: str | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncCollection[DefaultDocumentType]: ...

    @overload
    def get_collection(
        self,
        name: str,
        *,
        document_type: type[DOC],
        keyspace: str | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncCollection[DOC]: ...

    def get_collection(
        self,
        name: str,
        *,
        document_type: type[Any] = DefaultDocumentType,
        keyspace: str | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncCollection[DOC]:
        """
        Spawn an `AsyncCollection` object instance representing a collection
        on this database.

        Creating an `AsyncCollection` instance does not have any effect on the
        actual state of the database: in other words, for the created
        `AsyncCollection` instance to be used meaningfully, the collection
        must exist already (for instance, it should have been created
        previously by calling the `create_collection` method).

        Args:
            name: the name of the collection.
            document_type: this parameter acts a formal specifier for the type checker.
                If omitted, the resulting AsyncCollection is implicitly
                an `AsyncCollection[dict[str, Any]]`. If provided, it must match the
                type hint specified in the assignment.
                See the examples below.
            keyspace: the keyspace containing the collection. If no keyspace
                is specified, the setting for this database is used.
            embedding_api_key: optional API key(s) for interacting with the collection.
                If an embedding service is configured, and this parameter is not None,
                each Data API call will include the necessary embedding-related headers
                as specified by this parameter. If a string is passed, it translates
                into the one "embedding api key" header
                (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
                For some vectorize providers/models, if using header-based
                authentication, specialized subclasses of
                `astrapy.authentication.EmbeddingHeadersProvider` should be supplied.
            spawn_api_options: a specification - complete or partial - of the
                API Options to override the defaults inherited from the Database.
                This allows for a deeper configuration of the collection, e.g.
                concerning timeouts; if this is passed together with
                the named timeout parameters, the latter will take precedence
                in their respective settings.

        Returns:
            an `AsyncCollection` instance, representing the desired collection
                (but without any form of validation).

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> async def count_docs(adb: AsyncDatabase, c_name: str) -> int:
            ...    async_col = adb.get_collection(c_name)
            ...    return await async_col.count_documents({}, upper_bound=100)
            ...
            >>> asyncio.run(count_docs(async_database, "my_collection"))
            45

        Note: the attribute and indexing syntax forms achieve the same effect
            as this method, returning an AsyncCollection.
            In other words, the following are equivalent:
                async_database.get_collection("coll_name")
                async_database.coll_name
                async_database["coll_name"]
        """

        # lazy importing here against circular-import error
        from astrapy.collection import AsyncCollection

        resulting_api_options = self.api_options.with_override(
            spawn_api_options,
        ).with_override(
            APIOptions(
                embedding_api_key=embedding_api_key,
            ),
        )

        _keyspace = keyspace or self.keyspace
        if _keyspace is None:
            raise ValueError(
                "No keyspace specified. This operation requires a keyspace to "
                "be set, e.g. through the `use_keyspace` method."
            )
        return AsyncCollection(
            database=self,
            name=name,
            keyspace=_keyspace,
            api_options=resulting_api_options,
        )

    @overload
    async def create_collection(
        self,
        name: str,
        *,
        definition: CollectionDefinition | dict[str, Any] | None = None,
        keyspace: str | None = None,
        collection_admin_timeout_ms: int | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncCollection[DefaultDocumentType]: ...

    @overload
    async def create_collection(
        self,
        name: str,
        *,
        definition: CollectionDefinition | dict[str, Any] | None = None,
        document_type: type[DOC],
        keyspace: str | None = None,
        collection_admin_timeout_ms: int | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncCollection[DOC]: ...

    async def create_collection(
        self,
        name: str,
        *,
        definition: CollectionDefinition | dict[str, Any] | None = None,
        document_type: type[Any] = DefaultDocumentType,
        keyspace: str | None = None,
        collection_admin_timeout_ms: int | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncCollection[DOC]:
        """
        Creates a collection on the database and return the AsyncCollection
        instance that represents it.

        This is a blocking operation: the method returns when the collection
        is ready to be used. As opposed to the `get_collection` instance,
        this method triggers causes the collection to be actually created on DB.

        Args:
            name: the name of the collection.
            definition: a complete collection definition for the table. This can be an
                instance of `CollectionDefinition` or an equivalent (nested) dictionary,
                in which case it will be parsed into a `CollectionDefinition`.
                See the `astrapy.info.CollectionDefinition` class and the
                `AsyncCollection` class for more details and ways to construct this object.
            document_type: this parameter acts a formal specifier for the type checker.
                If omitted, the resulting AsyncCollection is implicitly
                an `AsyncCollection[dict[str, Any]]`. If provided, it must match the
                type hint specified in the assignment.
                See the examples below.
            keyspace: the keyspace where the collection is to be created.
                If not specified, the general setting for this database is used.
            collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
            embedding_api_key: optional API key(s) for interacting with the collection.
                If an embedding service is configured, and this parameter is not None,
                each Data API call will include the necessary embedding-related headers
                as specified by this parameter. If a string is passed, it translates
                into the one "embedding api key" header
                (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
                For some vectorize providers/models, if using header-based authentication,
                specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
                should be supplied.
            spawn_api_options: a specification - complete or partial - of the
                API Options to override the defaults inherited from the Database.
                This allows for a deeper configuration of the collection, e.g.
                concerning timeouts; if this is passed together with
                the named timeout parameters, the latter will take precedence
                in their respective settings.

        Returns:
            an `AsyncCollection` instance, representing the newly-created collection.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> # Create a collection using the fluent syntax for its definition
            >>> from astrapy.constants import VectorMetric
            >>> from astrapy.info import CollectionDefinition
            >>>
            >>> collection_definition = (
            ...     CollectionDefinition.builder()
            ...     .set_vector_dimension(3)
            ...     .set_vector_metric(VectorMetric.DOT_PRODUCT)
            ...     .set_indexing("deny", ["annotations", "logs"])
            ...     .build()
            ... )
            >>> my_collection = asyncio.run(async_database.create_collection(
            ...     "my_events",
            ...     definition=collection_definition,
            ... ))
            >>>
            >>> # Create a collection with the definition as object
            >>> from astrapy.info import CollectionVectorOptions
            >>>
            >>> collection_definition_1 = CollectionDefinition(
            ...     vector=CollectionVectorOptions(
            ...         dimension=3,
            ...         metric=VectorMetric.DOT_PRODUCT,
            ...     ),
            ...     indexing={"deny": ["annotations", "logs"]},
            ... )
            >>> my_collection_1 = asyncio.run(async_database.create_collection(
            ...     "my_events",
            ...     definition=collection_definition_1,
            ... ))
            >>>
            >>>
            >>> # Create a collection with the definition as plain dictionary
            >>> collection_definition_2 = {
            ...     "indexing": {"deny": ["annotations", "logs"]},
            ...     "vector": {
            ...         "dimension": 3,
            ...         "metric": VectorMetric.DOT_PRODUCT,
            ...     },
            ... }
            >>> my_collection_2 = asyncio.run(async_database.create_collection(
            ...     "my_events",
            ...     definition=collection_definition_2,
            ... ))
        """

        cc_definition: dict[str, Any] = CollectionDefinition.coerce(
            definition or {}
        ).as_dict()
        if collection_admin_timeout_ms is not None:
            _collection_admin_timeout_ms = collection_admin_timeout_ms
            _ca_label = "collection_admin_timeout_ms"
        else:
            _collection_admin_timeout_ms = (
                self.api_options.timeout_options.collection_admin_timeout_ms
            )
            _ca_label = "collection_admin_timeout_ms"
        driver_commander = self._get_driver_commander(keyspace=keyspace)
        cc_payload = {
            "createCollection": {
                k: v
                for k, v in {
                    "name": name,
                    "options": cc_definition,
                }.items()
                if v is not None
                if v != {}
            }
        }
        logger.info(f"createCollection('{name}')")
        cc_response = await driver_commander.async_request(
            payload=cc_payload,
            timeout_context=_TimeoutContext(
                request_ms=_collection_admin_timeout_ms, label=_ca_label
            ),
        )
        if cc_response.get("status") != {"ok": 1}:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from createCollection API command.",
                raw_response=cc_response,
            )
        logger.info(f"finished createCollection('{name}')")
        return self.get_collection(
            name,
            document_type=document_type,
            keyspace=keyspace,
            embedding_api_key=embedding_api_key,
            spawn_api_options=spawn_api_options,
        )

    async def drop_collection(
        self,
        name: str,
        *,
        keyspace: str | None = None,
        collection_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> dict[str, Any]:
        """
        Drop a collection from the database, along with all documents therein.

        Args:
            name: the name of the collection to drop.
            keyspace: the keyspace where the collection resides. If not specified,
                the database working keyspace is assumed.
            collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `collection_admin_timeout_ms`.
            timeout_ms: an alias for `collection_admin_timeout_ms`.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> asyncio.run(async_database.list_collection_names())
            ['a_collection', 'my_v_col', 'another_col']
            >>> asyncio.run(async_database.drop_collection("my_v_col"))
            >>> asyncio.run(async_database.list_collection_names())
            ['a_collection', 'another_col']
        """

        _collection_admin_timeout_ms, _ca_label = _select_singlereq_timeout_ca(
            timeout_options=self.api_options.timeout_options,
            collection_admin_timeout_ms=collection_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        _keyspace = keyspace or self.keyspace
        driver_commander = self._get_driver_commander(keyspace=_keyspace)
        dc_payload = {"deleteCollection": {"name": name}}
        logger.info(f"deleteCollection('{name}')")
        dc_response = await driver_commander.async_request(
            payload=dc_payload,
            timeout_context=_TimeoutContext(
                request_ms=_collection_admin_timeout_ms, label=_ca_label
            ),
        )
        if dc_response.get("status") != {"ok": 1}:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from deleteCollection API command.",
                raw_response=dc_response,
            )
        logger.info(f"finished deleteCollection('{name}')")
        return dc_response.get("status", {})  # type: ignore[no-any-return]

    async def list_collections(
        self,
        *,
        keyspace: str | None = None,
        collection_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> list[CollectionDescriptor]:
        """
        List all collections in a given keyspace for this database.

        Args:
            keyspace: the keyspace to be inspected. If not specified,
                the general setting for this database is assumed.
            collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `collection_admin_timeout_ms`.
            timeout_ms: an alias for `collection_admin_timeout_ms`.

        Returns:
            a list of CollectionDescriptor instances one for each collection.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> async def a_list_colls(adb: AsyncDatabase) -> None:
            ...     a_coll_list = await adb.list_collections()
            ...     print("* list:", a_coll_list)
            ...     for coll in await adb.list_collections():
            ...         print("* coll:", coll)
            ...
            >>> asyncio.run(a_list_colls(async_database))
            * list: [CollectionDescriptor(name='my_v_col', options=CollectionDefinition())]
            * coll: CollectionDescriptor(name='my_v_col', options=CollectionDefinition())
        """

        _collection_admin_timeout_ms, _ca_label = _select_singlereq_timeout_ca(
            timeout_options=self.api_options.timeout_options,
            collection_admin_timeout_ms=collection_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        return await self._list_collections_ctx(
            keyspace=keyspace,
            timeout_context=_TimeoutContext(
                request_ms=_collection_admin_timeout_ms, label=_ca_label
            ),
        )

    async def _list_collections_ctx(
        self,
        *,
        keyspace: str | None,
        timeout_context: _TimeoutContext,
    ) -> list[CollectionDescriptor]:
        driver_commander = self._get_driver_commander(keyspace=keyspace)
        gc_payload = {"findCollections": {"options": {"explain": True}}}
        logger.info("findCollections")
        gc_response = await driver_commander.async_request(
            payload=gc_payload,
            timeout_context=timeout_context,
        )
        if "collections" not in gc_response.get("status", {}):
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from findCollections API command.",
                raw_response=gc_response,
            )
        else:
            # we know this is a list of dicts, to marshal into "descriptors"
            logger.info("finished findCollections")
            return [
                CollectionDescriptor._from_dict(col_dict)
                for col_dict in gc_response["status"]["collections"]
            ]

    async def list_collection_names(
        self,
        *,
        keyspace: str | None = None,
        collection_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> list[str]:
        """
        List the names of all collections in a given keyspace of this database.

        Args:
            keyspace: the keyspace to be inspected. If not specified,
                the general setting for this database is assumed.
            collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `collection_admin_timeout_ms`.
            timeout_ms: an alias for `collection_admin_timeout_ms`.

        Returns:
            a list of the collection names as strings, in no particular order.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> asyncio.run(async_database.list_collection_names())
            ['a_collection', 'another_col']
        """

        _collection_admin_timeout_ms, _ca_label = _select_singlereq_timeout_ca(
            timeout_options=self.api_options.timeout_options,
            collection_admin_timeout_ms=collection_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        driver_commander = self._get_driver_commander(keyspace=keyspace)
        gc_payload: dict[str, Any] = {"findCollections": {}}
        logger.info("findCollections")
        gc_response = await driver_commander.async_request(
            payload=gc_payload,
            timeout_context=_TimeoutContext(
                request_ms=_collection_admin_timeout_ms, label=_ca_label
            ),
        )
        if "collections" not in gc_response.get("status", {}):
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from findCollections API command.",
                raw_response=gc_response,
            )
        else:
            logger.info("finished findCollections")
            return gc_response["status"]["collections"]  # type: ignore[no-any-return]

    @overload
    def get_table(
        self,
        name: str,
        *,
        keyspace: str | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncTable[DefaultRowType]: ...

    @overload
    def get_table(
        self,
        name: str,
        *,
        row_type: type[ROW],
        keyspace: str | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncTable[ROW]: ...

    def get_table(
        self,
        name: str,
        *,
        row_type: type[Any] = DefaultRowType,
        keyspace: str | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncTable[ROW]:
        """
        Spawn an `AsyncTable` object instance representing a table
        on this database.

        Creating a `AsyncTable` instance does not have any effect on the
        actual state of the database: in other words, for the created
        `AsyncTable` instance to be used meaningfully, the table
        must exist already (for instance, it should have been created
        previously by calling the `create_table` method).

        Args:
            name: the name of the table.
            row_type: this parameter acts a formal specifier for the type checker.
                If omitted, the resulting AsyncTable is implicitly
                an `AsyncTable[dict[str, Any]]`. If provided, it must match
                the type hint specified in the assignment.
                See the examples below.
            keyspace: the keyspace containing the table. If no keyspace
                is specified, the general setting for this database is used.
            embedding_api_key: optional API key(s) for interacting with the table.
                If an embedding service is configured, and this parameter is not None,
                each Data API call will include the necessary embedding-related headers
                as specified by this parameter. If a string is passed, it translates
                into the one "embedding api key" header
                (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
                For some vectorize providers/models, if using header-based
                authentication, specialized subclasses of
                `astrapy.authentication.EmbeddingHeadersProvider` should be supplied.
            spawn_api_options: a specification - complete or partial - of the
                API Options to override the defaults inherited from the Database.
                This allows for a deeper configuration of the table, e.g.
                concerning timeouts; if this is passed together with
                the named timeout parameters, the latter will take precedence
                in their respective settings.

        Returns:
            an `AsyncTable` instance, representing the desired table
                (but without any form of validation).

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> # Get an AsyncTable object (and read a property of it as an example):
            >>> my_async_table = async_database.get_table("games")
            >>> my_async_table.full_name
            'default_keyspace.games'
            >>>
            >>> # Get an AsyncTable object in a specific keyspace,
            >>> # and set an embedding API key to it:
            >>> my_other_async_table = async_database.get_table(
            ...     "tournaments",
            ...     keyspace="the_other_keyspace",
            ...     embedding_api_key="secret-012abc...",
            ... )
            >>> from astrapy import AsyncTable
            >>> MyCustomDictType = dict[str, int]
            >>>
            >>> # Get an AsyncTable object typed with a specific type for its rows:
            >>> my_typed_async_table: AsyncTable[MyCustomDictType] = async_database.get_table(
            ...     "games",
            ...     row_type=MyCustomDictType,
            ... )
        """

        # lazy importing here against circular-import error
        from astrapy.table import AsyncTable

        resulting_api_options = self.api_options.with_override(
            spawn_api_options,
        ).with_override(
            APIOptions(
                embedding_api_key=embedding_api_key,
            ),
        )

        _keyspace = keyspace or self.keyspace
        if _keyspace is None:
            raise ValueError(
                "No keyspace specified. This operation requires a keyspace to "
                "be set, e.g. through the `use_keyspace` method."
            )
        return AsyncTable[ROW](
            database=self,
            name=name,
            keyspace=_keyspace,
            api_options=resulting_api_options,
        )

    @overload
    async def create_table(
        self,
        name: str,
        *,
        definition: CreateTableDefinition | dict[str, Any],
        keyspace: str | None = None,
        if_not_exists: bool | None = None,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncTable[DefaultRowType]: ...

    @overload
    async def create_table(
        self,
        name: str,
        *,
        definition: CreateTableDefinition | dict[str, Any],
        row_type: type[ROW],
        keyspace: str | None = None,
        if_not_exists: bool | None = None,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncTable[ROW]: ...

    async def create_table(
        self,
        name: str,
        *,
        definition: CreateTableDefinition | dict[str, Any],
        row_type: type[Any] = DefaultRowType,
        keyspace: str | None = None,
        if_not_exists: bool | None = None,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncTable[ROW]:
        """
        Creates a table on the database and return the AsyncTable
        instance that represents it.

        This is a blocking operation: the method returns when the table
        is ready to be used. As opposed to the `get_table` method call,
        this method causes the table to be actually created on DB.

        Args:
            name: the name of the table.
            definition: a complete table definition for the table. This can be an
                instance of `CreateTableDefinition` or an equivalent (nested) dictionary,
                in which case it will be parsed into a `CreateTableDefinition`.
                See the `astrapy.info.CreateTableDefinition` class and the
                `AsyncTable` class for more details and ways to construct this object.
            row_type: this parameter acts a formal specifier for the type checker.
                If omitted, the resulting AsyncTable is implicitly
                an `AsyncTable[dict[str, Any]]`. If provided, it must match
                the type hint specified in the assignment.
                See the examples below.
            keyspace: the keyspace where the table is to be created.
                If not specified, the general setting for this database is used.
            if_not_exists: if set to True, the command will succeed even if a table
                with the specified name already exists (in which case no actual
                table creation takes place on the database). Defaults to False,
                i.e. an error is raised by the API in case of table-name collision.
            table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `table_admin_timeout_ms`.
            timeout_ms: an alias for `table_admin_timeout_ms`.
            embedding_api_key: optional API key(s) for interacting with the table.
                If an embedding service is configured, and this parameter is not None,
                each Data API call will include the necessary embedding-related headers
                as specified by this parameter. If a string is passed, it translates
                into the one "embedding api key" header
                (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
                For some vectorize providers/models, if using header-based authentication,
                specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
                should be supplied.
            spawn_api_options: a specification - complete or partial - of the
                API Options to override the defaults inherited from the Database.
                This allows for a deeper configuration of the table, e.g.
                concerning timeouts; if this is passed together with
                the named timeout parameters, the latter will take precedence
                in their respective settings.

        Returns:
            an `AsyncTable` instance, representing the
            newly-created table.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> # Create a table using the fluent syntax for definition
            >>> from astrapy.constants import SortMode
            >>> from astrapy.info import (
            ...     CreateTableDefinition,
            ...     ColumnType,
            ... )
            >>> table_definition = (
            ...     CreateTableDefinition.builder()
            ...     .add_column("match_id", ColumnType.TEXT)
            ...     .add_column("round", ColumnType.INT)
            ...     .add_vector_column("m_vector", dimension=3)
            ...     .add_column("score", ColumnType.INT)
            ...     .add_column("when", ColumnType.TIMESTAMP)
            ...     .add_column("winner", ColumnType.TEXT)
            ...     .add_set_column("fighters", ColumnType.UUID)
            ...     .add_partition_by(["match_id"])
            ...     .add_partition_sort({"round": SortMode.ASCENDING})
            ...     .build()
            ... )
            >>> my_async_table = asyncio.run(async_database.create_table(
            ...     "games",
            ...     definition=table_definition,
            ... ))
            >>>
            >>> # Create a table with the definition as object
            >>> # (and do not raise an error if the table exists already)
            >>> from astrapy.info import (
            ...     CreateTableDefinition,
            ...     TablePrimaryKeyDescriptor,
            ...     TableScalarColumnTypeDescriptor,
            ...     TableValuedColumnType,
            ...     TableValuedColumnTypeDescriptor,
            ...     TableVectorColumnTypeDescriptor,
            ... )
            >>> table_definition_1 = CreateTableDefinition(
            ...     columns={
            ...         "match_id": TableScalarColumnTypeDescriptor(
            ...             ColumnType.TEXT,
            ...         ),
            ...         "round": TableScalarColumnTypeDescriptor(
            ...             ColumnType.INT,
            ...         ),
            ...         "m_vector": TableVectorColumnTypeDescriptor(
            ...             column_type="vector", dimension=3
            ...         ),
            ...         "score": TableScalarColumnTypeDescriptor(
            ...             ColumnType.INT,
            ...         ),
            ...         "when": TableScalarColumnTypeDescriptor(
            ...             ColumnType.TIMESTAMP,
            ...         ),
            ...         "winner": TableScalarColumnTypeDescriptor(
            ...             ColumnType.TEXT,
            ...         ),
            ...         "fighters": TableValuedColumnTypeDescriptor(
            ...             column_type=TableValuedColumnType.SET,
            ...             value_type=ColumnType.UUID,
            ...         ),
            ...     },
            ...     primary_key=TablePrimaryKeyDescriptor(
            ...         partition_by=["match_id"],
            ...         partition_sort={"round": SortMode.ASCENDING},
            ...     ),
            ... )
            >>> my_async_table_1 = asyncio.run(async_database.create_table(
            ...     "games",
            ...     definition=table_definition_1,
            ...     if_not_exists=True,
            ... ))
            >>>
            >>> # Create a table with the definition as plain dictionary
            >>> # (and do not raise an error if the table exists already)
            >>> table_definition_2 = {
            ...     "columns": {
            ...         "match_id": {"type": "text"},
            ...         "round": {"type": "int"},
            ...         "m_vector": {"type": "vector", "dimension": 3},
            ...         "score": {"type": "int"},
            ...         "when": {"type": "timestamp"},
            ...         "winner": {"type": "text"},
            ...         "fighters": {"type": "set", "valueType": "uuid"},
            ...     },
            ...     "primaryKey": {
            ...         "partitionBy": ["match_id"],
            ...         "partitionSort": {"round": 1},
            ...     },
            ... }
            >>> my_async_table_2 = asyncio.run(async_database.create_table(
            ...     "games",
            ...     definition=table_definition_2,
            ...     if_not_exists=True,
            ... ))
        """

        ct_options: dict[str, bool]
        if if_not_exists is not None:
            ct_options = {"ifNotExists": if_not_exists}
        else:
            ct_options = {}
        ct_definition: dict[str, Any] = CreateTableDefinition.coerce(
            definition
        ).as_dict()
        _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
            timeout_options=self.api_options.timeout_options,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        driver_commander = self._get_driver_commander(keyspace=keyspace)
        ct_payload = {
            "createTable": {
                k: v
                for k, v in {
                    "name": name,
                    "definition": ct_definition,
                    "options": ct_options,
                }.items()
                if v is not None
                if v != {}
            }
        }
        logger.info(f"createTable('{name}')")
        ct_response = await driver_commander.async_request(
            payload=ct_payload,
            timeout_context=_TimeoutContext(
                request_ms=_table_admin_timeout_ms, label=_ta_label
            ),
        )
        if ct_response.get("status") != {"ok": 1}:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from createTable API command.",
                raw_response=ct_response,
            )
        logger.info(f"finished createTable('{name}')")
        return self.get_table(
            name,
            row_type=row_type,
            keyspace=keyspace,
            embedding_api_key=embedding_api_key,
            spawn_api_options=spawn_api_options,
        )

    async def drop_table_index(
        self,
        name: str,
        *,
        keyspace: str | None = None,
        if_exists: bool | None = None,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Drops (deletes) an index (of any kind) from the table it is associated to.

        This is a blocking operation: the method returns once the index
        is deleted.

        Note:
            Although associated to a table, index names are unique across a keyspace.
            For this reason, no table name is required in this call.

        Args:
            name: the name of the index.
            keyspace: the keyspace to which the index belongs.
                If not specified, the general setting for this database is used.
            if_exists: if passed as True, trying to drop a non-existing index
                will not error, just silently do nothing instead. If not provided,
                the API default behaviour will hold.
            table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `table_admin_timeout_ms`.
            timeout_ms: an alias for `table_admin_timeout_ms`.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> # Drop an index from the keyspace:
            >>> await async_database.drop_table_index("score_index")
            >>> # Drop an index, unless it does not exist already:
            >>> await async_database.drop_table_index("score_index", if_exists=True)
        """

        _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
            timeout_options=self.api_options.timeout_options,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        di_options: dict[str, bool]
        if if_exists is not None:
            di_options = {"ifExists": if_exists}
        else:
            di_options = {}
        di_payload = {
            "dropIndex": {
                k: v
                for k, v in {
                    "name": name,
                    "options": di_options,
                }.items()
                if v is not None
                if v != {}
            }
        }
        driver_commander = self._get_driver_commander(keyspace=keyspace)
        logger.info(f"dropIndex('{name}')")
        di_response = await driver_commander.async_request(
            payload=di_payload,
            timeout_context=_TimeoutContext(
                request_ms=_table_admin_timeout_ms, label=_ta_label
            ),
        )
        if di_response.get("status") != {"ok": 1}:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from dropIndex API command.",
                raw_response=di_response,
            )
        logger.info(f"finished dropIndex('{name}')")

    async def drop_table(
        self,
        name: str,
        *,
        keyspace: str | None = None,
        if_exists: bool | None = None,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> dict[str, Any]:
        """
        Drop a table from the database, along with all rows therein and related indexes.

        Args:
            name: the name of the table to drop.
            keyspace: the keyspace where the table resides. If not specified,
                the database working keyspace is assumed.
            if_exists: if passed as True, trying to drop a non-existing table
                will not error, just silently do nothing instead. If not provided,
                the API default behaviour will hold.
            table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `table_admin_timeout_ms`.
            timeout_ms: an alias for `table_admin_timeout_ms`.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> asyncio.run(async_database.list_table_names())
            ['fighters', 'games']
            >>> asyncio.run(async_database.drop_table("fighters"))
            >>> asyncio.run(async_database.list_table_names())
            ['games']
            >>> # not erroring because of if_not_exists:
            >>> asyncio.run(async_database.drop_table("fighters", if_not_exists=True))
        """

        _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
            timeout_options=self.api_options.timeout_options,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        _keyspace = keyspace or self.keyspace
        dt_options: dict[str, bool]
        if if_exists is not None:
            dt_options = {"ifExists": if_exists}
        else:
            dt_options = {}
        driver_commander = self._get_driver_commander(keyspace=_keyspace)
        dt_payload = {
            "dropTable": {
                k: v
                for k, v in {
                    "name": name,
                    "options": dt_options,
                }.items()
                if v is not None
                if v != {}
            }
        }
        logger.info(f"dropTable('{name}')")
        dt_response = await driver_commander.async_request(
            payload=dt_payload,
            timeout_context=_TimeoutContext(
                request_ms=_table_admin_timeout_ms, label=_ta_label
            ),
        )
        if dt_response.get("status") != {"ok": 1}:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from dropTable API command.",
                raw_response=dt_response,
            )
        logger.info(f"finished dropTable('{name}')")
        return dt_response.get("status", {})  # type: ignore[no-any-return]

    async def list_tables(
        self,
        *,
        keyspace: str | None = None,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> list[ListTableDescriptor]:
        """
        List all tables in a given keyspace for this database.

        Args:
            keyspace: the keyspace to be inspected. If not specified,
                the general setting for this database is assumed.
            table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `table_admin_timeout_ms`.
            timeout_ms: an alias for `table_admin_timeout_ms`.

        Returns:
            a list of ListTableDescriptor instances, one for each table.

        Example:
            >>> tables = asyncio.run(my_async_database.list_tables())
            >>> tables
            [ListTableDescriptor(name='fighters', definition=ListTableDefinition(...
            >>> tables[1].name
            'games'
            >>> tables[1].definition.columns
            {'match_id': TableScalarColumnTypeDescriptor(ColumnType.TEXT),...
            >>> tables[1].definition.columns['score']
            TableScalarColumnTypeDescriptor(ColumnType.INT)
            >>> tables[1].definition.primary_key.partition_by
            ['match_id']
            >>> tables[1].definition.primary_key.partition_sort
            {'round': 1}
        """

        _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
            timeout_options=self.api_options.timeout_options,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        return await self._list_tables_ctx(
            keyspace=keyspace,
            timeout_context=_TimeoutContext(
                request_ms=_table_admin_timeout_ms, label=_ta_label
            ),
        )

    async def _list_tables_ctx(
        self,
        *,
        keyspace: str | None,
        timeout_context: _TimeoutContext,
    ) -> list[ListTableDescriptor]:
        driver_commander = self._get_driver_commander(keyspace=keyspace)
        lt_payload = {"listTables": {"options": {"explain": True}}}
        logger.info("listTables")
        lt_response = driver_commander.request(
            payload=lt_payload,
            timeout_context=timeout_context,
        )
        if "tables" not in lt_response.get("status", {}):
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from listTables API command.",
                raw_response=lt_response,
            )
        else:
            # we know this is a list of dicts, to marshal into "descriptors"
            logger.info("finished listTables")
            return [
                ListTableDescriptor.coerce(tab_dict)
                for tab_dict in lt_response["status"]["tables"]
            ]

    async def list_table_names(
        self,
        *,
        keyspace: str | None = None,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> list[str]:
        """
        List the names of all tables in a given keyspace of this database.

        Args:
            keyspace: the keyspace to be inspected. If not specified,
                the general setting for this database is assumed.
            table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `table_admin_timeout_ms`.
            timeout_ms: an alias for `table_admin_timeout_ms`.

        Returns:
            a list of the table names as strings, in no particular order.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> async def destroy_temp_table(async_db: AsyncDatabase) -> None:
            ...     print(await async_db.list_table_names())
            ...     await async_db.drop_table("my_v_tab")
            ...     print(await async_db.list_table_names())
            ...
            >>> asyncio.run(destroy_temp_table(async_database))
            ['fighters', 'my_v_tab', 'games']
            ['fighters', 'games']
        """

        _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
            timeout_options=self.api_options.timeout_options,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        driver_commander = self._get_driver_commander(keyspace=keyspace)
        lt_payload: dict[str, Any] = {"listTables": {}}
        logger.info("listTables")
        lt_response = await driver_commander.async_request(
            payload=lt_payload,
            timeout_context=_TimeoutContext(
                request_ms=_table_admin_timeout_ms, label=_ta_label
            ),
        )
        if "tables" not in lt_response.get("status", {}):
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from listTables API command.",
                raw_response=lt_response,
            )
        else:
            logger.info("finished listTables")
            return lt_response["status"]["tables"]  # type: ignore[no-any-return]

    async def command(
        self,
        body: dict[str, Any],
        *,
        keyspace: str | None | UnsetType = _UNSET,
        collection_or_table_name: str | None = None,
        raise_api_errors: bool = True,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> dict[str, Any]:
        """
        Send a POST request to the Data API for this database with
        an arbitrary, caller-provided payload.

        Args:
            body: a JSON-serializable dictionary, the payload of the request.
            keyspace: the keyspace to use, if any. If a keyspace is employed,
                it is used to construct the full request URL. To run a command
                targeting no specific keyspace (rather, the database as a whole),
                pass an explicit `None`: the request URL will lack the suffix
                "/<keyspace>" component. If unspecified, the working keyspace of
                this database is used. If another keyspace is passed, it will be
                used instead of the database's working one.
            collection_or_table_name: if provided, the name is appended at the end
                of the endpoint. In this way, this method allows collection-
                and table-level arbitrary POST requests as well.
                This parameter cannot be used if `keyspace=None` is explicitly provided.
            raise_api_errors: if True, responses with a nonempty 'errors' field
                result in an astrapy exception being raised.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a dictionary with the response of the HTTP request.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> my_db.command({"findCollections": {}})
            {'status': {'collections': ['my_coll']}}
            >>> my_db.command({"countDocuments": {}}, collection_or_table_name="my_coll")
            {'status': {'count': 123}}
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        _keyspace: str | None
        if keyspace is None:
            if collection_or_table_name is not None:
                raise ValueError(
                    "Cannot pass collection_or_table_name to database "
                    "`command` on a no-keyspace command"
                )
            _keyspace = None
        else:
            if isinstance(keyspace, UnsetType):
                _keyspace = self.keyspace
            else:
                _keyspace = keyspace
        # build the ad-hoc-commander path with _keyspace and the coll.or.table
        base_path_components = [
            comp
            for comp in (
                ncomp.strip("/")
                for ncomp in (
                    self.api_options.data_api_url_options.api_path,
                    self.api_options.data_api_url_options.api_version,
                    _keyspace,
                    collection_or_table_name,
                )
                if ncomp is not None
            )
            if comp != ""
        ]
        base_path = f"/{'/'.join(base_path_components)}"
        command_commander = APICommander(
            api_endpoint=self.api_endpoint,
            path=base_path,
            headers=self._commander_headers,
            callers=self.api_options.callers,
            redacted_header_names=self.api_options.redacted_header_names,
        )

        _cmd_desc = ",".join(sorted(body.keys()))
        logger.info(f"command={_cmd_desc} on {self.__class__.__name__}")
        req_response = await command_commander.async_request(
            payload=body,
            raise_api_errors=raise_api_errors,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"command={_cmd_desc} on {self.__class__.__name__}")
        return req_response

    def get_database_admin(
        self,
        *,
        token: str | TokenProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> DatabaseAdmin:
        """
        Return a DatabaseAdmin object corresponding to this database, for
        use in admin tasks such as managing keyspaces.

        This method, depending on the environment where the database resides,
        returns an appropriate subclass of DatabaseAdmin.

        Args:
            token: an access token with enough permission on the database to
                perform the desired tasks. If omitted (as it can generally be done),
                the token of this Database is used.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            spawn_api_options: a specification - complete or partial - of the
                API Options to override the defaults.
                This allows for a deeper configuration of the database admin, e.g.
                concerning timeouts; if this is passed together with
                the equivalent named parameters, the latter will take precedence
                in their respective settings.

        Returns:
            A DatabaseAdmin instance targeting this database. More precisely,
            for Astra DB an instance of `AstraDBDatabaseAdmin` is returned;
            for other environments, an instance of `DataAPIDatabaseAdmin` is returned.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> my_db_admin = async_database.get_database_admin()
            >>> if "new_keyspace" not in my_db_admin.list_keyspaces():
            ...     my_db_admin.create_keyspace("new_keyspace")
            >>> my_db_admin.list_keyspaces()
            ['default_keyspace', 'new_keyspace']
        """

        # lazy importing here to avoid circular dependency
        from astrapy.admin.admin import AstraDBDatabaseAdmin, DataAPIDatabaseAdmin

        arg_api_options = APIOptions(
            token=token,
        )
        api_options = self.api_options.with_override(spawn_api_options).with_override(
            arg_api_options
        )

        if api_options.environment in Environment.astra_db_values:
            return AstraDBDatabaseAdmin(
                api_endpoint=self.api_endpoint,
                api_options=api_options,
                spawner_database=self,
            )
        else:
            return DataAPIDatabaseAdmin(
                api_endpoint=self.api_endpoint,
                api_options=api_options,
                spawner_database=self,
            )

Instance variables

var id : str

The ID of this database.

Example

>>> my_async_database.id
'01234567-89ab-cdef-0123-456789abcdef'
Expand source code
@property
def id(self) -> str:
    """
    The ID of this database.

    Example:
        >>> my_async_database.id
        '01234567-89ab-cdef-0123-456789abcdef'
    """

    parsed_api_endpoint = parse_api_endpoint(self.api_endpoint)
    if parsed_api_endpoint is not None:
        return parsed_api_endpoint.database_id
    else:
        raise DevOpsAPIException(
            "Database is not in a supported environment for this operation."
        )
var keyspace : str | None

The keyspace this database uses as target for all commands when no method-call-specific keyspace is specified.

Returns

the working keyspace (a string), or None if not set.

Example

>>> async_database.keyspace
'the_keyspace'
Expand source code
@property
def keyspace(self) -> str | None:
    """
    The keyspace this database uses as target for all commands when
    no method-call-specific keyspace is specified.

    Returns:
        the working keyspace (a string), or None if not set.

    Example:
        >>> async_database.keyspace
        'the_keyspace'
    """

    return self._using_keyspace
var region : str

The region where this database is located.

The region is still well defined in case of multi-region databases, since a Database instance connects to exactly one of the regions (as specified by the API Endpoint).

Example

>>> my_async_database.region
'us-west-2'
Expand source code
@property
def region(self) -> str:
    """
    The region where this database is located.

    The region is still well defined in case of multi-region databases,
    since a Database instance connects to exactly one of the regions
    (as specified by the API Endpoint).

    Example:
        >>> my_async_database.region
        'us-west-2'
    """

    parsed_api_endpoint = parse_api_endpoint(self.api_endpoint)
    if parsed_api_endpoint is not None:
        return parsed_api_endpoint.region
    else:
        raise DevOpsAPIException(
            "Database is not in a supported environment for this operation."
        )

Methods

async def command(self, body: dict[str, Any], *, keyspace: str | None | UnsetType = (unset), collection_or_table_name: str | None = None, raise_api_errors: bool = True, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> dict[str, typing.Any]

Send a POST request to the Data API for this database with an arbitrary, caller-provided payload.

Args

body
a JSON-serializable dictionary, the payload of the request.
keyspace
the keyspace to use, if any. If a keyspace is employed, it is used to construct the full request URL. To run a command targeting no specific keyspace (rather, the database as a whole), pass an explicit None: the request URL will lack the suffix "/" component. If unspecified, the working keyspace of this database is used. If another keyspace is passed, it will be used instead of the database's working one.
collection_or_table_name
if provided, the name is appended at the end of the endpoint. In this way, this method allows collection- and table-level arbitrary POST requests as well. This parameter cannot be used if keyspace=None is explicitly provided.
raise_api_errors
if True, responses with a nonempty 'errors' field result in an astrapy exception being raised.
general_method_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for general_method_timeout_ms.
timeout_ms
an alias for general_method_timeout_ms.

Returns

a dictionary with the response of the HTTP request.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> my_db.command({"findCollections": {}})
{'status': {'collections': ['my_coll']}}
>>> my_db.command({"countDocuments": {}}, collection_or_table_name="my_coll")
{'status': {'count': 123}}
Expand source code
async def command(
    self,
    body: dict[str, Any],
    *,
    keyspace: str | None | UnsetType = _UNSET,
    collection_or_table_name: str | None = None,
    raise_api_errors: bool = True,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> dict[str, Any]:
    """
    Send a POST request to the Data API for this database with
    an arbitrary, caller-provided payload.

    Args:
        body: a JSON-serializable dictionary, the payload of the request.
        keyspace: the keyspace to use, if any. If a keyspace is employed,
            it is used to construct the full request URL. To run a command
            targeting no specific keyspace (rather, the database as a whole),
            pass an explicit `None`: the request URL will lack the suffix
            "/<keyspace>" component. If unspecified, the working keyspace of
            this database is used. If another keyspace is passed, it will be
            used instead of the database's working one.
        collection_or_table_name: if provided, the name is appended at the end
            of the endpoint. In this way, this method allows collection-
            and table-level arbitrary POST requests as well.
            This parameter cannot be used if `keyspace=None` is explicitly provided.
        raise_api_errors: if True, responses with a nonempty 'errors' field
            result in an astrapy exception being raised.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a dictionary with the response of the HTTP request.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> my_db.command({"findCollections": {}})
        {'status': {'collections': ['my_coll']}}
        >>> my_db.command({"countDocuments": {}}, collection_or_table_name="my_coll")
        {'status': {'count': 123}}
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    _keyspace: str | None
    if keyspace is None:
        if collection_or_table_name is not None:
            raise ValueError(
                "Cannot pass collection_or_table_name to database "
                "`command` on a no-keyspace command"
            )
        _keyspace = None
    else:
        if isinstance(keyspace, UnsetType):
            _keyspace = self.keyspace
        else:
            _keyspace = keyspace
    # build the ad-hoc-commander path with _keyspace and the coll.or.table
    base_path_components = [
        comp
        for comp in (
            ncomp.strip("/")
            for ncomp in (
                self.api_options.data_api_url_options.api_path,
                self.api_options.data_api_url_options.api_version,
                _keyspace,
                collection_or_table_name,
            )
            if ncomp is not None
        )
        if comp != ""
    ]
    base_path = f"/{'/'.join(base_path_components)}"
    command_commander = APICommander(
        api_endpoint=self.api_endpoint,
        path=base_path,
        headers=self._commander_headers,
        callers=self.api_options.callers,
        redacted_header_names=self.api_options.redacted_header_names,
    )

    _cmd_desc = ",".join(sorted(body.keys()))
    logger.info(f"command={_cmd_desc} on {self.__class__.__name__}")
    req_response = await command_commander.async_request(
        payload=body,
        raise_api_errors=raise_api_errors,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"command={_cmd_desc} on {self.__class__.__name__}")
    return req_response
async def create_collection(self, name: str, *, definition: CollectionDefinition | dict[str, Any] | None = None, document_type: type[Any] = dict[str, typing.Any], keyspace: str | None = None, collection_admin_timeout_ms: int | None = None, embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = (unset), spawn_api_options: APIOptions | UnsetType = (unset)) ‑> AsyncCollection[DOC]

Creates a collection on the database and return the AsyncCollection instance that represents it.

This is a blocking operation: the method returns when the collection is ready to be used. As opposed to the get_collection instance, this method triggers causes the collection to be actually created on DB.

Args

name
the name of the collection.
definition
a complete collection definition for the table. This can be an instance of CollectionDefinition or an equivalent (nested) dictionary, in which case it will be parsed into a CollectionDefinition. See the CollectionDefinition class and the AsyncCollection class for more details and ways to construct this object.
document_type
this parameter acts a formal specifier for the type checker. If omitted, the resulting AsyncCollection is implicitly an AsyncCollection[dict[str, Any]]. If provided, it must match the type hint specified in the assignment. See the examples below.
keyspace
the keyspace where the collection is to be created. If not specified, the general setting for this database is used.
collection_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply.
embedding_api_key
optional API key(s) for interacting with the collection. If an embedding service is configured, and this parameter is not None, each Data API call will include the necessary embedding-related headers as specified by this parameter. If a string is passed, it translates into the one "embedding api key" header (i.e. EmbeddingAPIKeyHeaderProvider). For some vectorize providers/models, if using header-based authentication, specialized subclasses of EmbeddingHeadersProvider should be supplied.
spawn_api_options
a specification - complete or partial - of the API Options to override the defaults inherited from the Database. This allows for a deeper configuration of the collection, e.g. concerning timeouts; if this is passed together with the named timeout parameters, the latter will take precedence in their respective settings.

Returns

an AsyncCollection instance, representing the newly-created collection.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> # Create a collection using the fluent syntax for its definition
>>> from astrapy.constants import VectorMetric
>>> from astrapy.info import CollectionDefinition
>>>
>>> collection_definition = (
...     CollectionDefinition.builder()
...     .set_vector_dimension(3)
...     .set_vector_metric(VectorMetric.DOT_PRODUCT)
...     .set_indexing("deny", ["annotations", "logs"])
...     .build()
... )
>>> my_collection = asyncio.run(async_database.create_collection(
...     "my_events",
...     definition=collection_definition,
... ))
>>>
>>> # Create a collection with the definition as object
>>> from astrapy.info import CollectionVectorOptions
>>>
>>> collection_definition_1 = CollectionDefinition(
...     vector=CollectionVectorOptions(
...         dimension=3,
...         metric=VectorMetric.DOT_PRODUCT,
...     ),
...     indexing={"deny": ["annotations", "logs"]},
... )
>>> my_collection_1 = asyncio.run(async_database.create_collection(
...     "my_events",
...     definition=collection_definition_1,
... ))
>>>
>>>
>>> # Create a collection with the definition as plain dictionary
>>> collection_definition_2 = {
...     "indexing": {"deny": ["annotations", "logs"]},
...     "vector": {
...         "dimension": 3,
...         "metric": VectorMetric.DOT_PRODUCT,
...     },
... }
>>> my_collection_2 = asyncio.run(async_database.create_collection(
...     "my_events",
...     definition=collection_definition_2,
... ))
Expand source code
async def create_collection(
    self,
    name: str,
    *,
    definition: CollectionDefinition | dict[str, Any] | None = None,
    document_type: type[Any] = DefaultDocumentType,
    keyspace: str | None = None,
    collection_admin_timeout_ms: int | None = None,
    embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
    spawn_api_options: APIOptions | UnsetType = _UNSET,
) -> AsyncCollection[DOC]:
    """
    Creates a collection on the database and return the AsyncCollection
    instance that represents it.

    This is a blocking operation: the method returns when the collection
    is ready to be used. As opposed to the `get_collection` instance,
    this method triggers causes the collection to be actually created on DB.

    Args:
        name: the name of the collection.
        definition: a complete collection definition for the table. This can be an
            instance of `CollectionDefinition` or an equivalent (nested) dictionary,
            in which case it will be parsed into a `CollectionDefinition`.
            See the `astrapy.info.CollectionDefinition` class and the
            `AsyncCollection` class for more details and ways to construct this object.
        document_type: this parameter acts a formal specifier for the type checker.
            If omitted, the resulting AsyncCollection is implicitly
            an `AsyncCollection[dict[str, Any]]`. If provided, it must match the
            type hint specified in the assignment.
            See the examples below.
        keyspace: the keyspace where the collection is to be created.
            If not specified, the general setting for this database is used.
        collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
        embedding_api_key: optional API key(s) for interacting with the collection.
            If an embedding service is configured, and this parameter is not None,
            each Data API call will include the necessary embedding-related headers
            as specified by this parameter. If a string is passed, it translates
            into the one "embedding api key" header
            (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
            For some vectorize providers/models, if using header-based authentication,
            specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
            should be supplied.
        spawn_api_options: a specification - complete or partial - of the
            API Options to override the defaults inherited from the Database.
            This allows for a deeper configuration of the collection, e.g.
            concerning timeouts; if this is passed together with
            the named timeout parameters, the latter will take precedence
            in their respective settings.

    Returns:
        an `AsyncCollection` instance, representing the newly-created collection.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> # Create a collection using the fluent syntax for its definition
        >>> from astrapy.constants import VectorMetric
        >>> from astrapy.info import CollectionDefinition
        >>>
        >>> collection_definition = (
        ...     CollectionDefinition.builder()
        ...     .set_vector_dimension(3)
        ...     .set_vector_metric(VectorMetric.DOT_PRODUCT)
        ...     .set_indexing("deny", ["annotations", "logs"])
        ...     .build()
        ... )
        >>> my_collection = asyncio.run(async_database.create_collection(
        ...     "my_events",
        ...     definition=collection_definition,
        ... ))
        >>>
        >>> # Create a collection with the definition as object
        >>> from astrapy.info import CollectionVectorOptions
        >>>
        >>> collection_definition_1 = CollectionDefinition(
        ...     vector=CollectionVectorOptions(
        ...         dimension=3,
        ...         metric=VectorMetric.DOT_PRODUCT,
        ...     ),
        ...     indexing={"deny": ["annotations", "logs"]},
        ... )
        >>> my_collection_1 = asyncio.run(async_database.create_collection(
        ...     "my_events",
        ...     definition=collection_definition_1,
        ... ))
        >>>
        >>>
        >>> # Create a collection with the definition as plain dictionary
        >>> collection_definition_2 = {
        ...     "indexing": {"deny": ["annotations", "logs"]},
        ...     "vector": {
        ...         "dimension": 3,
        ...         "metric": VectorMetric.DOT_PRODUCT,
        ...     },
        ... }
        >>> my_collection_2 = asyncio.run(async_database.create_collection(
        ...     "my_events",
        ...     definition=collection_definition_2,
        ... ))
    """

    cc_definition: dict[str, Any] = CollectionDefinition.coerce(
        definition or {}
    ).as_dict()
    if collection_admin_timeout_ms is not None:
        _collection_admin_timeout_ms = collection_admin_timeout_ms
        _ca_label = "collection_admin_timeout_ms"
    else:
        _collection_admin_timeout_ms = (
            self.api_options.timeout_options.collection_admin_timeout_ms
        )
        _ca_label = "collection_admin_timeout_ms"
    driver_commander = self._get_driver_commander(keyspace=keyspace)
    cc_payload = {
        "createCollection": {
            k: v
            for k, v in {
                "name": name,
                "options": cc_definition,
            }.items()
            if v is not None
            if v != {}
        }
    }
    logger.info(f"createCollection('{name}')")
    cc_response = await driver_commander.async_request(
        payload=cc_payload,
        timeout_context=_TimeoutContext(
            request_ms=_collection_admin_timeout_ms, label=_ca_label
        ),
    )
    if cc_response.get("status") != {"ok": 1}:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from createCollection API command.",
            raw_response=cc_response,
        )
    logger.info(f"finished createCollection('{name}')")
    return self.get_collection(
        name,
        document_type=document_type,
        keyspace=keyspace,
        embedding_api_key=embedding_api_key,
        spawn_api_options=spawn_api_options,
    )
async def create_table(self, name: str, *, definition: CreateTableDefinition | dict[str, Any], row_type: type[Any] = dict[str, typing.Any], keyspace: str | None = None, if_not_exists: bool | None = None, table_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None, embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = (unset), spawn_api_options: APIOptions | UnsetType = (unset)) ‑> AsyncTable[ROW]

Creates a table on the database and return the AsyncTable instance that represents it.

This is a blocking operation: the method returns when the table is ready to be used. As opposed to the get_table method call, this method causes the table to be actually created on DB.

Args

name
the name of the table.
definition
a complete table definition for the table. This can be an instance of CreateTableDefinition or an equivalent (nested) dictionary, in which case it will be parsed into a CreateTableDefinition. See the CreateTableDefinition class and the AsyncTable class for more details and ways to construct this object.
row_type
this parameter acts a formal specifier for the type checker. If omitted, the resulting AsyncTable is implicitly an AsyncTable[dict[str, Any]]. If provided, it must match the type hint specified in the assignment. See the examples below.
keyspace
the keyspace where the table is to be created. If not specified, the general setting for this database is used.
if_not_exists
if set to True, the command will succeed even if a table with the specified name already exists (in which case no actual table creation takes place on the database). Defaults to False, i.e. an error is raised by the API in case of table-name collision.
table_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for table_admin_timeout_ms.
timeout_ms
an alias for table_admin_timeout_ms.
embedding_api_key
optional API key(s) for interacting with the table. If an embedding service is configured, and this parameter is not None, each Data API call will include the necessary embedding-related headers as specified by this parameter. If a string is passed, it translates into the one "embedding api key" header (i.e. EmbeddingAPIKeyHeaderProvider). For some vectorize providers/models, if using header-based authentication, specialized subclasses of EmbeddingHeadersProvider should be supplied.
spawn_api_options
a specification - complete or partial - of the API Options to override the defaults inherited from the Database. This allows for a deeper configuration of the table, e.g. concerning timeouts; if this is passed together with the named timeout parameters, the latter will take precedence in their respective settings.

Returns

an AsyncTable instance, representing the newly-created table.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> # Create a table using the fluent syntax for definition
>>> from astrapy.constants import SortMode
>>> from astrapy.info import (
...     CreateTableDefinition,
...     ColumnType,
... )
>>> table_definition = (
...     CreateTableDefinition.builder()
...     .add_column("match_id", ColumnType.TEXT)
...     .add_column("round", ColumnType.INT)
...     .add_vector_column("m_vector", dimension=3)
...     .add_column("score", ColumnType.INT)
...     .add_column("when", ColumnType.TIMESTAMP)
...     .add_column("winner", ColumnType.TEXT)
...     .add_set_column("fighters", ColumnType.UUID)
...     .add_partition_by(["match_id"])
...     .add_partition_sort({"round": SortMode.ASCENDING})
...     .build()
... )
>>> my_async_table = asyncio.run(async_database.create_table(
...     "games",
...     definition=table_definition,
... ))
>>>
>>> # Create a table with the definition as object
>>> # (and do not raise an error if the table exists already)
>>> from astrapy.info import (
...     CreateTableDefinition,
...     TablePrimaryKeyDescriptor,
...     TableScalarColumnTypeDescriptor,
...     TableValuedColumnType,
...     TableValuedColumnTypeDescriptor,
...     TableVectorColumnTypeDescriptor,
... )
>>> table_definition_1 = CreateTableDefinition(
...     columns={
...         "match_id": TableScalarColumnTypeDescriptor(
...             ColumnType.TEXT,
...         ),
...         "round": TableScalarColumnTypeDescriptor(
...             ColumnType.INT,
...         ),
...         "m_vector": TableVectorColumnTypeDescriptor(
...             column_type="vector", dimension=3
...         ),
...         "score": TableScalarColumnTypeDescriptor(
...             ColumnType.INT,
...         ),
...         "when": TableScalarColumnTypeDescriptor(
...             ColumnType.TIMESTAMP,
...         ),
...         "winner": TableScalarColumnTypeDescriptor(
...             ColumnType.TEXT,
...         ),
...         "fighters": TableValuedColumnTypeDescriptor(
...             column_type=TableValuedColumnType.SET,
...             value_type=ColumnType.UUID,
...         ),
...     },
...     primary_key=TablePrimaryKeyDescriptor(
...         partition_by=["match_id"],
...         partition_sort={"round": SortMode.ASCENDING},
...     ),
... )
>>> my_async_table_1 = asyncio.run(async_database.create_table(
...     "games",
...     definition=table_definition_1,
...     if_not_exists=True,
... ))
>>>
>>> # Create a table with the definition as plain dictionary
>>> # (and do not raise an error if the table exists already)
>>> table_definition_2 = {
...     "columns": {
...         "match_id": {"type": "text"},
...         "round": {"type": "int"},
...         "m_vector": {"type": "vector", "dimension": 3},
...         "score": {"type": "int"},
...         "when": {"type": "timestamp"},
...         "winner": {"type": "text"},
...         "fighters": {"type": "set", "valueType": "uuid"},
...     },
...     "primaryKey": {
...         "partitionBy": ["match_id"],
...         "partitionSort": {"round": 1},
...     },
... }
>>> my_async_table_2 = asyncio.run(async_database.create_table(
...     "games",
...     definition=table_definition_2,
...     if_not_exists=True,
... ))
Expand source code
async def create_table(
    self,
    name: str,
    *,
    definition: CreateTableDefinition | dict[str, Any],
    row_type: type[Any] = DefaultRowType,
    keyspace: str | None = None,
    if_not_exists: bool | None = None,
    table_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
    embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
    spawn_api_options: APIOptions | UnsetType = _UNSET,
) -> AsyncTable[ROW]:
    """
    Creates a table on the database and return the AsyncTable
    instance that represents it.

    This is a blocking operation: the method returns when the table
    is ready to be used. As opposed to the `get_table` method call,
    this method causes the table to be actually created on DB.

    Args:
        name: the name of the table.
        definition: a complete table definition for the table. This can be an
            instance of `CreateTableDefinition` or an equivalent (nested) dictionary,
            in which case it will be parsed into a `CreateTableDefinition`.
            See the `astrapy.info.CreateTableDefinition` class and the
            `AsyncTable` class for more details and ways to construct this object.
        row_type: this parameter acts a formal specifier for the type checker.
            If omitted, the resulting AsyncTable is implicitly
            an `AsyncTable[dict[str, Any]]`. If provided, it must match
            the type hint specified in the assignment.
            See the examples below.
        keyspace: the keyspace where the table is to be created.
            If not specified, the general setting for this database is used.
        if_not_exists: if set to True, the command will succeed even if a table
            with the specified name already exists (in which case no actual
            table creation takes place on the database). Defaults to False,
            i.e. an error is raised by the API in case of table-name collision.
        table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `table_admin_timeout_ms`.
        timeout_ms: an alias for `table_admin_timeout_ms`.
        embedding_api_key: optional API key(s) for interacting with the table.
            If an embedding service is configured, and this parameter is not None,
            each Data API call will include the necessary embedding-related headers
            as specified by this parameter. If a string is passed, it translates
            into the one "embedding api key" header
            (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
            For some vectorize providers/models, if using header-based authentication,
            specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
            should be supplied.
        spawn_api_options: a specification - complete or partial - of the
            API Options to override the defaults inherited from the Database.
            This allows for a deeper configuration of the table, e.g.
            concerning timeouts; if this is passed together with
            the named timeout parameters, the latter will take precedence
            in their respective settings.

    Returns:
        an `AsyncTable` instance, representing the
        newly-created table.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> # Create a table using the fluent syntax for definition
        >>> from astrapy.constants import SortMode
        >>> from astrapy.info import (
        ...     CreateTableDefinition,
        ...     ColumnType,
        ... )
        >>> table_definition = (
        ...     CreateTableDefinition.builder()
        ...     .add_column("match_id", ColumnType.TEXT)
        ...     .add_column("round", ColumnType.INT)
        ...     .add_vector_column("m_vector", dimension=3)
        ...     .add_column("score", ColumnType.INT)
        ...     .add_column("when", ColumnType.TIMESTAMP)
        ...     .add_column("winner", ColumnType.TEXT)
        ...     .add_set_column("fighters", ColumnType.UUID)
        ...     .add_partition_by(["match_id"])
        ...     .add_partition_sort({"round": SortMode.ASCENDING})
        ...     .build()
        ... )
        >>> my_async_table = asyncio.run(async_database.create_table(
        ...     "games",
        ...     definition=table_definition,
        ... ))
        >>>
        >>> # Create a table with the definition as object
        >>> # (and do not raise an error if the table exists already)
        >>> from astrapy.info import (
        ...     CreateTableDefinition,
        ...     TablePrimaryKeyDescriptor,
        ...     TableScalarColumnTypeDescriptor,
        ...     TableValuedColumnType,
        ...     TableValuedColumnTypeDescriptor,
        ...     TableVectorColumnTypeDescriptor,
        ... )
        >>> table_definition_1 = CreateTableDefinition(
        ...     columns={
        ...         "match_id": TableScalarColumnTypeDescriptor(
        ...             ColumnType.TEXT,
        ...         ),
        ...         "round": TableScalarColumnTypeDescriptor(
        ...             ColumnType.INT,
        ...         ),
        ...         "m_vector": TableVectorColumnTypeDescriptor(
        ...             column_type="vector", dimension=3
        ...         ),
        ...         "score": TableScalarColumnTypeDescriptor(
        ...             ColumnType.INT,
        ...         ),
        ...         "when": TableScalarColumnTypeDescriptor(
        ...             ColumnType.TIMESTAMP,
        ...         ),
        ...         "winner": TableScalarColumnTypeDescriptor(
        ...             ColumnType.TEXT,
        ...         ),
        ...         "fighters": TableValuedColumnTypeDescriptor(
        ...             column_type=TableValuedColumnType.SET,
        ...             value_type=ColumnType.UUID,
        ...         ),
        ...     },
        ...     primary_key=TablePrimaryKeyDescriptor(
        ...         partition_by=["match_id"],
        ...         partition_sort={"round": SortMode.ASCENDING},
        ...     ),
        ... )
        >>> my_async_table_1 = asyncio.run(async_database.create_table(
        ...     "games",
        ...     definition=table_definition_1,
        ...     if_not_exists=True,
        ... ))
        >>>
        >>> # Create a table with the definition as plain dictionary
        >>> # (and do not raise an error if the table exists already)
        >>> table_definition_2 = {
        ...     "columns": {
        ...         "match_id": {"type": "text"},
        ...         "round": {"type": "int"},
        ...         "m_vector": {"type": "vector", "dimension": 3},
        ...         "score": {"type": "int"},
        ...         "when": {"type": "timestamp"},
        ...         "winner": {"type": "text"},
        ...         "fighters": {"type": "set", "valueType": "uuid"},
        ...     },
        ...     "primaryKey": {
        ...         "partitionBy": ["match_id"],
        ...         "partitionSort": {"round": 1},
        ...     },
        ... }
        >>> my_async_table_2 = asyncio.run(async_database.create_table(
        ...     "games",
        ...     definition=table_definition_2,
        ...     if_not_exists=True,
        ... ))
    """

    ct_options: dict[str, bool]
    if if_not_exists is not None:
        ct_options = {"ifNotExists": if_not_exists}
    else:
        ct_options = {}
    ct_definition: dict[str, Any] = CreateTableDefinition.coerce(
        definition
    ).as_dict()
    _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
        timeout_options=self.api_options.timeout_options,
        table_admin_timeout_ms=table_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    driver_commander = self._get_driver_commander(keyspace=keyspace)
    ct_payload = {
        "createTable": {
            k: v
            for k, v in {
                "name": name,
                "definition": ct_definition,
                "options": ct_options,
            }.items()
            if v is not None
            if v != {}
        }
    }
    logger.info(f"createTable('{name}')")
    ct_response = await driver_commander.async_request(
        payload=ct_payload,
        timeout_context=_TimeoutContext(
            request_ms=_table_admin_timeout_ms, label=_ta_label
        ),
    )
    if ct_response.get("status") != {"ok": 1}:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from createTable API command.",
            raw_response=ct_response,
        )
    logger.info(f"finished createTable('{name}')")
    return self.get_table(
        name,
        row_type=row_type,
        keyspace=keyspace,
        embedding_api_key=embedding_api_key,
        spawn_api_options=spawn_api_options,
    )
async def drop_collection(self, name: str, *, keyspace: str | None = None, collection_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> dict[str, typing.Any]

Drop a collection from the database, along with all documents therein.

Args

name
the name of the collection to drop.
keyspace
the keyspace where the collection resides. If not specified, the database working keyspace is assumed.
collection_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for collection_admin_timeout_ms.
timeout_ms
an alias for collection_admin_timeout_ms.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> asyncio.run(async_database.list_collection_names())
['a_collection', 'my_v_col', 'another_col']
>>> asyncio.run(async_database.drop_collection("my_v_col"))
>>> asyncio.run(async_database.list_collection_names())
['a_collection', 'another_col']
Expand source code
async def drop_collection(
    self,
    name: str,
    *,
    keyspace: str | None = None,
    collection_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> dict[str, Any]:
    """
    Drop a collection from the database, along with all documents therein.

    Args:
        name: the name of the collection to drop.
        keyspace: the keyspace where the collection resides. If not specified,
            the database working keyspace is assumed.
        collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `collection_admin_timeout_ms`.
        timeout_ms: an alias for `collection_admin_timeout_ms`.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> asyncio.run(async_database.list_collection_names())
        ['a_collection', 'my_v_col', 'another_col']
        >>> asyncio.run(async_database.drop_collection("my_v_col"))
        >>> asyncio.run(async_database.list_collection_names())
        ['a_collection', 'another_col']
    """

    _collection_admin_timeout_ms, _ca_label = _select_singlereq_timeout_ca(
        timeout_options=self.api_options.timeout_options,
        collection_admin_timeout_ms=collection_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    _keyspace = keyspace or self.keyspace
    driver_commander = self._get_driver_commander(keyspace=_keyspace)
    dc_payload = {"deleteCollection": {"name": name}}
    logger.info(f"deleteCollection('{name}')")
    dc_response = await driver_commander.async_request(
        payload=dc_payload,
        timeout_context=_TimeoutContext(
            request_ms=_collection_admin_timeout_ms, label=_ca_label
        ),
    )
    if dc_response.get("status") != {"ok": 1}:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from deleteCollection API command.",
            raw_response=dc_response,
        )
    logger.info(f"finished deleteCollection('{name}')")
    return dc_response.get("status", {})  # type: ignore[no-any-return]
async def drop_table(self, name: str, *, keyspace: str | None = None, if_exists: bool | None = None, table_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> dict[str, typing.Any]

Drop a table from the database, along with all rows therein and related indexes.

Args

name
the name of the table to drop.
keyspace
the keyspace where the table resides. If not specified, the database working keyspace is assumed.
if_exists
if passed as True, trying to drop a non-existing table will not error, just silently do nothing instead. If not provided, the API default behaviour will hold.
table_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for table_admin_timeout_ms.
timeout_ms
an alias for table_admin_timeout_ms.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> asyncio.run(async_database.list_table_names())
['fighters', 'games']
>>> asyncio.run(async_database.drop_table("fighters"))
>>> asyncio.run(async_database.list_table_names())
['games']
>>> # not erroring because of if_not_exists:
>>> asyncio.run(async_database.drop_table("fighters", if_not_exists=True))
Expand source code
async def drop_table(
    self,
    name: str,
    *,
    keyspace: str | None = None,
    if_exists: bool | None = None,
    table_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> dict[str, Any]:
    """
    Drop a table from the database, along with all rows therein and related indexes.

    Args:
        name: the name of the table to drop.
        keyspace: the keyspace where the table resides. If not specified,
            the database working keyspace is assumed.
        if_exists: if passed as True, trying to drop a non-existing table
            will not error, just silently do nothing instead. If not provided,
            the API default behaviour will hold.
        table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `table_admin_timeout_ms`.
        timeout_ms: an alias for `table_admin_timeout_ms`.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> asyncio.run(async_database.list_table_names())
        ['fighters', 'games']
        >>> asyncio.run(async_database.drop_table("fighters"))
        >>> asyncio.run(async_database.list_table_names())
        ['games']
        >>> # not erroring because of if_not_exists:
        >>> asyncio.run(async_database.drop_table("fighters", if_not_exists=True))
    """

    _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
        timeout_options=self.api_options.timeout_options,
        table_admin_timeout_ms=table_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    _keyspace = keyspace or self.keyspace
    dt_options: dict[str, bool]
    if if_exists is not None:
        dt_options = {"ifExists": if_exists}
    else:
        dt_options = {}
    driver_commander = self._get_driver_commander(keyspace=_keyspace)
    dt_payload = {
        "dropTable": {
            k: v
            for k, v in {
                "name": name,
                "options": dt_options,
            }.items()
            if v is not None
            if v != {}
        }
    }
    logger.info(f"dropTable('{name}')")
    dt_response = await driver_commander.async_request(
        payload=dt_payload,
        timeout_context=_TimeoutContext(
            request_ms=_table_admin_timeout_ms, label=_ta_label
        ),
    )
    if dt_response.get("status") != {"ok": 1}:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from dropTable API command.",
            raw_response=dt_response,
        )
    logger.info(f"finished dropTable('{name}')")
    return dt_response.get("status", {})  # type: ignore[no-any-return]
async def drop_table_index(self, name: str, *, keyspace: str | None = None, if_exists: bool | None = None, table_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> None

Drops (deletes) an index (of any kind) from the table it is associated to.

This is a blocking operation: the method returns once the index is deleted.

Note

Although associated to a table, index names are unique across a keyspace. For this reason, no table name is required in this call.

Args

name
the name of the index.
keyspace
the keyspace to which the index belongs. If not specified, the general setting for this database is used.
if_exists
if passed as True, trying to drop a non-existing index will not error, just silently do nothing instead. If not provided, the API default behaviour will hold.
table_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for table_admin_timeout_ms.
timeout_ms
an alias for table_admin_timeout_ms.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> # Drop an index from the keyspace:
>>> await async_database.drop_table_index("score_index")
>>> # Drop an index, unless it does not exist already:
>>> await async_database.drop_table_index("score_index", if_exists=True)
Expand source code
async def drop_table_index(
    self,
    name: str,
    *,
    keyspace: str | None = None,
    if_exists: bool | None = None,
    table_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Drops (deletes) an index (of any kind) from the table it is associated to.

    This is a blocking operation: the method returns once the index
    is deleted.

    Note:
        Although associated to a table, index names are unique across a keyspace.
        For this reason, no table name is required in this call.

    Args:
        name: the name of the index.
        keyspace: the keyspace to which the index belongs.
            If not specified, the general setting for this database is used.
        if_exists: if passed as True, trying to drop a non-existing index
            will not error, just silently do nothing instead. If not provided,
            the API default behaviour will hold.
        table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `table_admin_timeout_ms`.
        timeout_ms: an alias for `table_admin_timeout_ms`.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> # Drop an index from the keyspace:
        >>> await async_database.drop_table_index("score_index")
        >>> # Drop an index, unless it does not exist already:
        >>> await async_database.drop_table_index("score_index", if_exists=True)
    """

    _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
        timeout_options=self.api_options.timeout_options,
        table_admin_timeout_ms=table_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    di_options: dict[str, bool]
    if if_exists is not None:
        di_options = {"ifExists": if_exists}
    else:
        di_options = {}
    di_payload = {
        "dropIndex": {
            k: v
            for k, v in {
                "name": name,
                "options": di_options,
            }.items()
            if v is not None
            if v != {}
        }
    }
    driver_commander = self._get_driver_commander(keyspace=keyspace)
    logger.info(f"dropIndex('{name}')")
    di_response = await driver_commander.async_request(
        payload=di_payload,
        timeout_context=_TimeoutContext(
            request_ms=_table_admin_timeout_ms, label=_ta_label
        ),
    )
    if di_response.get("status") != {"ok": 1}:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from dropIndex API command.",
            raw_response=di_response,
        )
    logger.info(f"finished dropIndex('{name}')")
def get_collection(self, name: str, *, document_type: type[Any] = dict[str, typing.Any], keyspace: str | None = None, embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = (unset), spawn_api_options: APIOptions | UnsetType = (unset)) ‑> AsyncCollection[DOC]

Spawn an AsyncCollection object instance representing a collection on this database.

Creating an AsyncCollection instance does not have any effect on the actual state of the database: in other words, for the created AsyncCollection instance to be used meaningfully, the collection must exist already (for instance, it should have been created previously by calling the create_collection method).

Args

name
the name of the collection.
document_type
this parameter acts a formal specifier for the type checker. If omitted, the resulting AsyncCollection is implicitly an AsyncCollection[dict[str, Any]]. If provided, it must match the type hint specified in the assignment. See the examples below.
keyspace
the keyspace containing the collection. If no keyspace is specified, the setting for this database is used.
embedding_api_key
optional API key(s) for interacting with the collection. If an embedding service is configured, and this parameter is not None, each Data API call will include the necessary embedding-related headers as specified by this parameter. If a string is passed, it translates into the one "embedding api key" header (i.e. EmbeddingAPIKeyHeaderProvider). For some vectorize providers/models, if using header-based authentication, specialized subclasses of EmbeddingHeadersProvider should be supplied.
spawn_api_options
a specification - complete or partial - of the API Options to override the defaults inherited from the Database. This allows for a deeper configuration of the collection, e.g. concerning timeouts; if this is passed together with the named timeout parameters, the latter will take precedence in their respective settings.

Returns

an AsyncCollection instance, representing the desired collection (but without any form of validation).

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> async def count_docs(adb: AsyncDatabase, c_name: str) -> int:
...    async_col = adb.get_collection(c_name)
...    return await async_col.count_documents({}, upper_bound=100)
...
>>> asyncio.run(count_docs(async_database, "my_collection"))
45

Note: the attribute and indexing syntax forms achieve the same effect as this method, returning an AsyncCollection. In other words, the following are equivalent: async_database.get_collection("coll_name") async_database.coll_name async_database["coll_name"]

Expand source code
def get_collection(
    self,
    name: str,
    *,
    document_type: type[Any] = DefaultDocumentType,
    keyspace: str | None = None,
    embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
    spawn_api_options: APIOptions | UnsetType = _UNSET,
) -> AsyncCollection[DOC]:
    """
    Spawn an `AsyncCollection` object instance representing a collection
    on this database.

    Creating an `AsyncCollection` instance does not have any effect on the
    actual state of the database: in other words, for the created
    `AsyncCollection` instance to be used meaningfully, the collection
    must exist already (for instance, it should have been created
    previously by calling the `create_collection` method).

    Args:
        name: the name of the collection.
        document_type: this parameter acts a formal specifier for the type checker.
            If omitted, the resulting AsyncCollection is implicitly
            an `AsyncCollection[dict[str, Any]]`. If provided, it must match the
            type hint specified in the assignment.
            See the examples below.
        keyspace: the keyspace containing the collection. If no keyspace
            is specified, the setting for this database is used.
        embedding_api_key: optional API key(s) for interacting with the collection.
            If an embedding service is configured, and this parameter is not None,
            each Data API call will include the necessary embedding-related headers
            as specified by this parameter. If a string is passed, it translates
            into the one "embedding api key" header
            (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
            For some vectorize providers/models, if using header-based
            authentication, specialized subclasses of
            `astrapy.authentication.EmbeddingHeadersProvider` should be supplied.
        spawn_api_options: a specification - complete or partial - of the
            API Options to override the defaults inherited from the Database.
            This allows for a deeper configuration of the collection, e.g.
            concerning timeouts; if this is passed together with
            the named timeout parameters, the latter will take precedence
            in their respective settings.

    Returns:
        an `AsyncCollection` instance, representing the desired collection
            (but without any form of validation).

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> async def count_docs(adb: AsyncDatabase, c_name: str) -> int:
        ...    async_col = adb.get_collection(c_name)
        ...    return await async_col.count_documents({}, upper_bound=100)
        ...
        >>> asyncio.run(count_docs(async_database, "my_collection"))
        45

    Note: the attribute and indexing syntax forms achieve the same effect
        as this method, returning an AsyncCollection.
        In other words, the following are equivalent:
            async_database.get_collection("coll_name")
            async_database.coll_name
            async_database["coll_name"]
    """

    # lazy importing here against circular-import error
    from astrapy.collection import AsyncCollection

    resulting_api_options = self.api_options.with_override(
        spawn_api_options,
    ).with_override(
        APIOptions(
            embedding_api_key=embedding_api_key,
        ),
    )

    _keyspace = keyspace or self.keyspace
    if _keyspace is None:
        raise ValueError(
            "No keyspace specified. This operation requires a keyspace to "
            "be set, e.g. through the `use_keyspace` method."
        )
    return AsyncCollection(
        database=self,
        name=name,
        keyspace=_keyspace,
        api_options=resulting_api_options,
    )
def get_database_admin(self, *, token: str | TokenProvider | UnsetType = (unset), spawn_api_options: APIOptions | UnsetType = (unset)) ‑> DatabaseAdmin

Return a DatabaseAdmin object corresponding to this database, for use in admin tasks such as managing keyspaces.

This method, depending on the environment where the database resides, returns an appropriate subclass of DatabaseAdmin.

Args

token
an access token with enough permission on the database to perform the desired tasks. If omitted (as it can generally be done), the token of this Database is used. This can be either a literal token string or a subclass of TokenProvider.
spawn_api_options
a specification - complete or partial - of the API Options to override the defaults. This allows for a deeper configuration of the database admin, e.g. concerning timeouts; if this is passed together with the equivalent named parameters, the latter will take precedence in their respective settings.

Returns

A DatabaseAdmin instance targeting this database. More precisely, for Astra DB an instance of AstraDBDatabaseAdmin is returned; for other environments, an instance of DataAPIDatabaseAdmin is returned.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> my_db_admin = async_database.get_database_admin()
>>> if "new_keyspace" not in my_db_admin.list_keyspaces():
...     my_db_admin.create_keyspace("new_keyspace")
>>> my_db_admin.list_keyspaces()
['default_keyspace', 'new_keyspace']
Expand source code
def get_database_admin(
    self,
    *,
    token: str | TokenProvider | UnsetType = _UNSET,
    spawn_api_options: APIOptions | UnsetType = _UNSET,
) -> DatabaseAdmin:
    """
    Return a DatabaseAdmin object corresponding to this database, for
    use in admin tasks such as managing keyspaces.

    This method, depending on the environment where the database resides,
    returns an appropriate subclass of DatabaseAdmin.

    Args:
        token: an access token with enough permission on the database to
            perform the desired tasks. If omitted (as it can generally be done),
            the token of this Database is used.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        spawn_api_options: a specification - complete or partial - of the
            API Options to override the defaults.
            This allows for a deeper configuration of the database admin, e.g.
            concerning timeouts; if this is passed together with
            the equivalent named parameters, the latter will take precedence
            in their respective settings.

    Returns:
        A DatabaseAdmin instance targeting this database. More precisely,
        for Astra DB an instance of `AstraDBDatabaseAdmin` is returned;
        for other environments, an instance of `DataAPIDatabaseAdmin` is returned.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> my_db_admin = async_database.get_database_admin()
        >>> if "new_keyspace" not in my_db_admin.list_keyspaces():
        ...     my_db_admin.create_keyspace("new_keyspace")
        >>> my_db_admin.list_keyspaces()
        ['default_keyspace', 'new_keyspace']
    """

    # lazy importing here to avoid circular dependency
    from astrapy.admin.admin import AstraDBDatabaseAdmin, DataAPIDatabaseAdmin

    arg_api_options = APIOptions(
        token=token,
    )
    api_options = self.api_options.with_override(spawn_api_options).with_override(
        arg_api_options
    )

    if api_options.environment in Environment.astra_db_values:
        return AstraDBDatabaseAdmin(
            api_endpoint=self.api_endpoint,
            api_options=api_options,
            spawner_database=self,
        )
    else:
        return DataAPIDatabaseAdmin(
            api_endpoint=self.api_endpoint,
            api_options=api_options,
            spawner_database=self,
        )
def get_table(self, name: str, *, row_type: type[Any] = dict[str, typing.Any], keyspace: str | None = None, embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = (unset), spawn_api_options: APIOptions | UnsetType = (unset)) ‑> AsyncTable[ROW]

Spawn an AsyncTable object instance representing a table on this database.

Creating a AsyncTable instance does not have any effect on the actual state of the database: in other words, for the created AsyncTable instance to be used meaningfully, the table must exist already (for instance, it should have been created previously by calling the create_table method).

Args

name
the name of the table.
row_type
this parameter acts a formal specifier for the type checker. If omitted, the resulting AsyncTable is implicitly an AsyncTable[dict[str, Any]]. If provided, it must match the type hint specified in the assignment. See the examples below.
keyspace
the keyspace containing the table. If no keyspace is specified, the general setting for this database is used.
embedding_api_key
optional API key(s) for interacting with the table. If an embedding service is configured, and this parameter is not None, each Data API call will include the necessary embedding-related headers as specified by this parameter. If a string is passed, it translates into the one "embedding api key" header (i.e. EmbeddingAPIKeyHeaderProvider). For some vectorize providers/models, if using header-based authentication, specialized subclasses of EmbeddingHeadersProvider should be supplied.
spawn_api_options
a specification - complete or partial - of the API Options to override the defaults inherited from the Database. This allows for a deeper configuration of the table, e.g. concerning timeouts; if this is passed together with the named timeout parameters, the latter will take precedence in their respective settings.

Returns

an AsyncTable instance, representing the desired table (but without any form of validation).

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> # Get an AsyncTable object (and read a property of it as an example):
>>> my_async_table = async_database.get_table("games")
>>> my_async_table.full_name
'default_keyspace.games'
>>>
>>> # Get an AsyncTable object in a specific keyspace,
>>> # and set an embedding API key to it:
>>> my_other_async_table = async_database.get_table(
...     "tournaments",
...     keyspace="the_other_keyspace",
...     embedding_api_key="secret-012abc...",
... )
>>> from astrapy import AsyncTable
>>> MyCustomDictType = dict[str, int]
>>>
>>> # Get an AsyncTable object typed with a specific type for its rows:
>>> my_typed_async_table: AsyncTable[MyCustomDictType] = async_database.get_table(
...     "games",
...     row_type=MyCustomDictType,
... )
Expand source code
def get_table(
    self,
    name: str,
    *,
    row_type: type[Any] = DefaultRowType,
    keyspace: str | None = None,
    embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
    spawn_api_options: APIOptions | UnsetType = _UNSET,
) -> AsyncTable[ROW]:
    """
    Spawn an `AsyncTable` object instance representing a table
    on this database.

    Creating a `AsyncTable` instance does not have any effect on the
    actual state of the database: in other words, for the created
    `AsyncTable` instance to be used meaningfully, the table
    must exist already (for instance, it should have been created
    previously by calling the `create_table` method).

    Args:
        name: the name of the table.
        row_type: this parameter acts a formal specifier for the type checker.
            If omitted, the resulting AsyncTable is implicitly
            an `AsyncTable[dict[str, Any]]`. If provided, it must match
            the type hint specified in the assignment.
            See the examples below.
        keyspace: the keyspace containing the table. If no keyspace
            is specified, the general setting for this database is used.
        embedding_api_key: optional API key(s) for interacting with the table.
            If an embedding service is configured, and this parameter is not None,
            each Data API call will include the necessary embedding-related headers
            as specified by this parameter. If a string is passed, it translates
            into the one "embedding api key" header
            (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
            For some vectorize providers/models, if using header-based
            authentication, specialized subclasses of
            `astrapy.authentication.EmbeddingHeadersProvider` should be supplied.
        spawn_api_options: a specification - complete or partial - of the
            API Options to override the defaults inherited from the Database.
            This allows for a deeper configuration of the table, e.g.
            concerning timeouts; if this is passed together with
            the named timeout parameters, the latter will take precedence
            in their respective settings.

    Returns:
        an `AsyncTable` instance, representing the desired table
            (but without any form of validation).

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> # Get an AsyncTable object (and read a property of it as an example):
        >>> my_async_table = async_database.get_table("games")
        >>> my_async_table.full_name
        'default_keyspace.games'
        >>>
        >>> # Get an AsyncTable object in a specific keyspace,
        >>> # and set an embedding API key to it:
        >>> my_other_async_table = async_database.get_table(
        ...     "tournaments",
        ...     keyspace="the_other_keyspace",
        ...     embedding_api_key="secret-012abc...",
        ... )
        >>> from astrapy import AsyncTable
        >>> MyCustomDictType = dict[str, int]
        >>>
        >>> # Get an AsyncTable object typed with a specific type for its rows:
        >>> my_typed_async_table: AsyncTable[MyCustomDictType] = async_database.get_table(
        ...     "games",
        ...     row_type=MyCustomDictType,
        ... )
    """

    # lazy importing here against circular-import error
    from astrapy.table import AsyncTable

    resulting_api_options = self.api_options.with_override(
        spawn_api_options,
    ).with_override(
        APIOptions(
            embedding_api_key=embedding_api_key,
        ),
    )

    _keyspace = keyspace or self.keyspace
    if _keyspace is None:
        raise ValueError(
            "No keyspace specified. This operation requires a keyspace to "
            "be set, e.g. through the `use_keyspace` method."
        )
    return AsyncTable[ROW](
        database=self,
        name=name,
        keyspace=_keyspace,
        api_options=resulting_api_options,
    )
async def info(self, *, database_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> AstraDBDatabaseInfo

Additional information on the database as a AstraDBDatabaseInfo instance.

Some of the returned properties are dynamic throughout the lifetime of the database (such as raw_info["keyspaces"]). For this reason, each invocation of this method triggers a new request to the DevOps API.

Args

database_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for database_admin_timeout_ms.
timeout_ms
an alias for database_admin_timeout_ms.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> asyncio.run(async_database.info()).region
'eu-west-1'
>>> asyncio.run(
...     async_database.info()
... ).raw_info['datacenters'][0]['dateCreated']
'2023-01-30T12:34:56Z'

Note

see the AstraDBDatabaseInfo documentation for a caveat about the difference between the region and the raw["region"] attributes.

Expand source code
async def info(
    self,
    *,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> AstraDBDatabaseInfo:
    """
    Additional information on the database as a AstraDBDatabaseInfo instance.

    Some of the returned properties are dynamic throughout the lifetime
    of the database (such as raw_info["keyspaces"]). For this reason,
    each invocation of this method triggers a new request to the DevOps API.

    Args:
        database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `database_admin_timeout_ms`.
        timeout_ms: an alias for `database_admin_timeout_ms`.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> asyncio.run(async_database.info()).region
        'eu-west-1'
        >>> asyncio.run(
        ...     async_database.info()
        ... ).raw_info['datacenters'][0]['dateCreated']
        '2023-01-30T12:34:56Z'

    Note:
        see the AstraDBDatabaseInfo documentation for a caveat about the difference
        between the `region` and the `raw["region"]` attributes.
    """

    if self.api_options.environment not in Environment.astra_db_values:
        raise InvalidEnvironmentException(
            "Environments outside of Astra DB are not supported."
        )

    _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
        timeout_options=self.api_options.timeout_options,
        database_admin_timeout_ms=database_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info("getting database info")
    database_info = await async_fetch_database_info(
        self.api_endpoint,
        keyspace=self.keyspace,
        request_timeout_ms=_database_admin_timeout_ms,
        api_options=self.api_options,
    )
    if database_info is not None:
        logger.info("finished getting database info")
        return database_info
    else:
        raise DevOpsAPIException("Failure while fetching database info.")
async def list_collection_names(self, *, keyspace: str | None = None, collection_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> list[str]

List the names of all collections in a given keyspace of this database.

Args

keyspace
the keyspace to be inspected. If not specified, the general setting for this database is assumed.
collection_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for collection_admin_timeout_ms.
timeout_ms
an alias for collection_admin_timeout_ms.

Returns

a list of the collection names as strings, in no particular order.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> asyncio.run(async_database.list_collection_names())
['a_collection', 'another_col']
Expand source code
async def list_collection_names(
    self,
    *,
    keyspace: str | None = None,
    collection_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> list[str]:
    """
    List the names of all collections in a given keyspace of this database.

    Args:
        keyspace: the keyspace to be inspected. If not specified,
            the general setting for this database is assumed.
        collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `collection_admin_timeout_ms`.
        timeout_ms: an alias for `collection_admin_timeout_ms`.

    Returns:
        a list of the collection names as strings, in no particular order.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> asyncio.run(async_database.list_collection_names())
        ['a_collection', 'another_col']
    """

    _collection_admin_timeout_ms, _ca_label = _select_singlereq_timeout_ca(
        timeout_options=self.api_options.timeout_options,
        collection_admin_timeout_ms=collection_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    driver_commander = self._get_driver_commander(keyspace=keyspace)
    gc_payload: dict[str, Any] = {"findCollections": {}}
    logger.info("findCollections")
    gc_response = await driver_commander.async_request(
        payload=gc_payload,
        timeout_context=_TimeoutContext(
            request_ms=_collection_admin_timeout_ms, label=_ca_label
        ),
    )
    if "collections" not in gc_response.get("status", {}):
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from findCollections API command.",
            raw_response=gc_response,
        )
    else:
        logger.info("finished findCollections")
        return gc_response["status"]["collections"]  # type: ignore[no-any-return]
async def list_collections(self, *, keyspace: str | None = None, collection_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> list[CollectionDescriptor]

List all collections in a given keyspace for this database.

Args

keyspace
the keyspace to be inspected. If not specified, the general setting for this database is assumed.
collection_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for collection_admin_timeout_ms.
timeout_ms
an alias for collection_admin_timeout_ms.

Returns

a list of CollectionDescriptor instances one for each collection.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> async def a_list_colls(adb: AsyncDatabase) -> None:
...     a_coll_list = await adb.list_collections()
...     print("* list:", a_coll_list)
...     for coll in await adb.list_collections():
...         print("* coll:", coll)
...
>>> asyncio.run(a_list_colls(async_database))
* list: [CollectionDescriptor(name='my_v_col', options=CollectionDefinition())]
* coll: CollectionDescriptor(name='my_v_col', options=CollectionDefinition())
Expand source code
async def list_collections(
    self,
    *,
    keyspace: str | None = None,
    collection_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> list[CollectionDescriptor]:
    """
    List all collections in a given keyspace for this database.

    Args:
        keyspace: the keyspace to be inspected. If not specified,
            the general setting for this database is assumed.
        collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `collection_admin_timeout_ms`.
        timeout_ms: an alias for `collection_admin_timeout_ms`.

    Returns:
        a list of CollectionDescriptor instances one for each collection.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> async def a_list_colls(adb: AsyncDatabase) -> None:
        ...     a_coll_list = await adb.list_collections()
        ...     print("* list:", a_coll_list)
        ...     for coll in await adb.list_collections():
        ...         print("* coll:", coll)
        ...
        >>> asyncio.run(a_list_colls(async_database))
        * list: [CollectionDescriptor(name='my_v_col', options=CollectionDefinition())]
        * coll: CollectionDescriptor(name='my_v_col', options=CollectionDefinition())
    """

    _collection_admin_timeout_ms, _ca_label = _select_singlereq_timeout_ca(
        timeout_options=self.api_options.timeout_options,
        collection_admin_timeout_ms=collection_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    return await self._list_collections_ctx(
        keyspace=keyspace,
        timeout_context=_TimeoutContext(
            request_ms=_collection_admin_timeout_ms, label=_ca_label
        ),
    )
async def list_table_names(self, *, keyspace: str | None = None, table_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> list[str]

List the names of all tables in a given keyspace of this database.

Args

keyspace
the keyspace to be inspected. If not specified, the general setting for this database is assumed.
table_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for table_admin_timeout_ms.
timeout_ms
an alias for table_admin_timeout_ms.

Returns

a list of the table names as strings, in no particular order.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> async def destroy_temp_table(async_db: AsyncDatabase) -> None:
...     print(await async_db.list_table_names())
...     await async_db.drop_table("my_v_tab")
...     print(await async_db.list_table_names())
...
>>> asyncio.run(destroy_temp_table(async_database))
['fighters', 'my_v_tab', 'games']
['fighters', 'games']
Expand source code
async def list_table_names(
    self,
    *,
    keyspace: str | None = None,
    table_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> list[str]:
    """
    List the names of all tables in a given keyspace of this database.

    Args:
        keyspace: the keyspace to be inspected. If not specified,
            the general setting for this database is assumed.
        table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `table_admin_timeout_ms`.
        timeout_ms: an alias for `table_admin_timeout_ms`.

    Returns:
        a list of the table names as strings, in no particular order.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> async def destroy_temp_table(async_db: AsyncDatabase) -> None:
        ...     print(await async_db.list_table_names())
        ...     await async_db.drop_table("my_v_tab")
        ...     print(await async_db.list_table_names())
        ...
        >>> asyncio.run(destroy_temp_table(async_database))
        ['fighters', 'my_v_tab', 'games']
        ['fighters', 'games']
    """

    _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
        timeout_options=self.api_options.timeout_options,
        table_admin_timeout_ms=table_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    driver_commander = self._get_driver_commander(keyspace=keyspace)
    lt_payload: dict[str, Any] = {"listTables": {}}
    logger.info("listTables")
    lt_response = await driver_commander.async_request(
        payload=lt_payload,
        timeout_context=_TimeoutContext(
            request_ms=_table_admin_timeout_ms, label=_ta_label
        ),
    )
    if "tables" not in lt_response.get("status", {}):
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from listTables API command.",
            raw_response=lt_response,
        )
    else:
        logger.info("finished listTables")
        return lt_response["status"]["tables"]  # type: ignore[no-any-return]
async def list_tables(self, *, keyspace: str | None = None, table_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> list[ListTableDescriptor]

List all tables in a given keyspace for this database.

Args

keyspace
the keyspace to be inspected. If not specified, the general setting for this database is assumed.
table_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for table_admin_timeout_ms.
timeout_ms
an alias for table_admin_timeout_ms.

Returns

a list of ListTableDescriptor instances, one for each table.

Example

>>> tables = asyncio.run(my_async_database.list_tables())
>>> tables
[ListTableDescriptor(name='fighters', definition=ListTableDefinition(...
>>> tables[1].name
'games'
>>> tables[1].definition.columns
{'match_id': TableScalarColumnTypeDescriptor(ColumnType.TEXT),...
>>> tables[1].definition.columns['score']
TableScalarColumnTypeDescriptor(ColumnType.INT)
>>> tables[1].definition.primary_key.partition_by
['match_id']
>>> tables[1].definition.primary_key.partition_sort
{'round': 1}
Expand source code
async def list_tables(
    self,
    *,
    keyspace: str | None = None,
    table_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> list[ListTableDescriptor]:
    """
    List all tables in a given keyspace for this database.

    Args:
        keyspace: the keyspace to be inspected. If not specified,
            the general setting for this database is assumed.
        table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `table_admin_timeout_ms`.
        timeout_ms: an alias for `table_admin_timeout_ms`.

    Returns:
        a list of ListTableDescriptor instances, one for each table.

    Example:
        >>> tables = asyncio.run(my_async_database.list_tables())
        >>> tables
        [ListTableDescriptor(name='fighters', definition=ListTableDefinition(...
        >>> tables[1].name
        'games'
        >>> tables[1].definition.columns
        {'match_id': TableScalarColumnTypeDescriptor(ColumnType.TEXT),...
        >>> tables[1].definition.columns['score']
        TableScalarColumnTypeDescriptor(ColumnType.INT)
        >>> tables[1].definition.primary_key.partition_by
        ['match_id']
        >>> tables[1].definition.primary_key.partition_sort
        {'round': 1}
    """

    _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
        timeout_options=self.api_options.timeout_options,
        table_admin_timeout_ms=table_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    return await self._list_tables_ctx(
        keyspace=keyspace,
        timeout_context=_TimeoutContext(
            request_ms=_table_admin_timeout_ms, label=_ta_label
        ),
    )
async def name(self) ‑> str

The name of this database. Note that this bears no unicity guarantees.

Calling this method the first time involves a request to the DevOps API (the resulting database name is then cached). See the astrapy.info method for more details.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> asyncio.run(async_database.name())
'the_application_database'
Expand source code
async def name(self) -> str:
    """
    The name of this database. Note that this bears no unicity guarantees.

    Calling this method the first time involves a request
    to the DevOps API (the resulting database name is then cached).
    See the `info()` method for more details.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> asyncio.run(async_database.name())
        'the_application_database'
    """

    if self._name is None:
        self._name = (await self.info()).name
    return self._name
def to_sync(self, *, keyspace: str | None = None, token: str | TokenProvider | UnsetType = (unset), api_options: APIOptions | UnsetType = (unset)) ‑> Database

Create a (synchronous) Database from this one. Save for the arguments explicitly provided as overrides, everything else is kept identical to this database in the copy.

Args

keyspace
this is the keyspace all method calls will target, unless one is explicitly specified in the call. If no keyspace is supplied when creating a Database, the name "default_keyspace" is set.
token
an Access Token to the database. Example: "AstraCS:xyz…" This can be either a literal token string or a subclass of TokenProvider.
api_options
any additional options to set for the result, in the form of an APIOptions instance (where one can set just the needed attributes). In case the same setting is also provided as named parameter, the latter takes precedence.

Returns

the new copy, a Database instance.

Example

>>> my_sync_db = async_database.to_sync()
>>> my_sync_db.list_collection_names()
['a_collection', 'another_collection']
Expand source code
def to_sync(
    self,
    *,
    keyspace: str | None = None,
    token: str | TokenProvider | UnsetType = _UNSET,
    api_options: APIOptions | UnsetType = _UNSET,
) -> Database:
    """
    Create a (synchronous) Database from this one. Save for the arguments
    explicitly provided as overrides, everything else is kept identical
    to this database in the copy.

    Args:
        keyspace: this is the keyspace all method calls will target, unless
            one is explicitly specified in the call. If no keyspace is supplied
            when creating a Database, the name "default_keyspace" is set.
        token: an Access Token to the database. Example: "AstraCS:xyz..."
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        api_options: any additional options to set for the result, in the form of
            an APIOptions instance (where one can set just the needed attributes).
            In case the same setting is also provided as named parameter,
            the latter takes precedence.

    Returns:
        the new copy, a `Database` instance.

    Example:
        >>> my_sync_db = async_database.to_sync()
        >>> my_sync_db.list_collection_names()
        ['a_collection', 'another_collection']
    """

    arg_api_options = APIOptions(
        token=token,
    )
    final_api_options = self.api_options.with_override(api_options).with_override(
        arg_api_options
    )
    return Database(
        api_endpoint=self.api_endpoint,
        keyspace=keyspace or self.keyspace,
        api_options=final_api_options,
    )
def use_keyspace(self, keyspace: str) ‑> None

Switch to a new working keyspace for this database. This method changes (mutates) the AsyncDatabase instance.

Note that this method does not create the keyspace, which should exist already (created for instance with a DatabaseAdmin.async_create_keyspace call).

Args

keyspace
the new keyspace to use as the database working keyspace.

Returns

None.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> asyncio.run(async_database.list_collection_names())
['coll_1', 'coll_2']
>>> async_database.use_keyspace("an_empty_keyspace")
>>> asyncio.run(async_database.list_collection_names())
[]
Expand source code
def use_keyspace(self, keyspace: str) -> None:
    """
    Switch to a new working keyspace for this database.
    This method changes (mutates) the AsyncDatabase instance.

    Note that this method does not create the keyspace, which should exist
    already (created for instance with a `DatabaseAdmin.async_create_keyspace` call).

    Args:
        keyspace: the new keyspace to use as the database working keyspace.

    Returns:
        None.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> asyncio.run(async_database.list_collection_names())
        ['coll_1', 'coll_2']
        >>> async_database.use_keyspace("an_empty_keyspace")
        >>> asyncio.run(async_database.list_collection_names())
        []
    """
    logger.info(f"switching to keyspace '{keyspace}'")
    self._using_keyspace = keyspace
    self._api_commander = self._get_api_commander(keyspace=self.keyspace)
def with_options(self, *, keyspace: str | None = None, token: str | TokenProvider | UnsetType = (unset), api_options: APIOptions | UnsetType = (unset)) ‑> AsyncDatabase

Create a clone of this database with some changed attributes.

Args

keyspace
this is the keyspace all method calls will target, unless one is explicitly specified in the call. If no keyspace is supplied when creating a Database, the name "default_keyspace" is set.
token
an Access Token to the database. Example: "AstraCS:xyz...". This can be either a literal token string or a subclass of TokenProvider.
api_options
any additional options to set for the clone, in the form of an APIOptions instance (where one can set just the needed attributes). In case the same setting is also provided as named parameter, the latter takes precedence.

Returns

a new AsyncDatabase instance.

Example

>>> async_database_2 = async_database.with_options(
...     keyspace="the_other_keyspace",
...     token="AstraCS:xyz...",
... )
Expand source code
def with_options(
    self,
    *,
    keyspace: str | None = None,
    token: str | TokenProvider | UnsetType = _UNSET,
    api_options: APIOptions | UnsetType = _UNSET,
) -> AsyncDatabase:
    """
    Create a clone of this database with some changed attributes.

    Args:
        keyspace: this is the keyspace all method calls will target, unless
            one is explicitly specified in the call. If no keyspace is supplied
            when creating a Database, the name "default_keyspace" is set.
        token: an Access Token to the database. Example: `"AstraCS:xyz..."`.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        api_options: any additional options to set for the clone, in the form of
            an APIOptions instance (where one can set just the needed attributes).
            In case the same setting is also provided as named parameter,
            the latter takes precedence.

    Returns:
        a new `AsyncDatabase` instance.

    Example:
        >>> async_database_2 = async_database.with_options(
        ...     keyspace="the_other_keyspace",
        ...     token="AstraCS:xyz...",
        ... )
    """

    return self._copy(
        keyspace=keyspace,
        token=token,
        api_options=api_options,
    )
class AsyncTable (*, database: AsyncDatabase, name: str, keyspace: str | None, api_options: FullAPIOptions)

A Data API table, the object to interact with the Data API for structured data, especially for DDL operations. This class has an asynchronous interface for use with asyncio.

This class is not meant for direct instantiation by the user, rather it is obtained by invoking methods such as get_table of AsyncDatabase, wherefrom the AsyncTable inherits its API options such as authentication token and API endpoint. In order to create a table, instead, one should call the create_table method of AsyncDatabase, providing a table definition parameter that can be built in different ways (see the CreateTableDefinition object and examples below).

Args

database
an AsyncDatabase object, instantiated earlier. This represents the database the table belongs to.
name
the table name. This parameter should match an existing table on the database.
keyspace
this is the keyspace to which the table belongs. If nothing is specified, the database's working keyspace is used.
api_options
a complete specification of the API Options for this instance.

Examples

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> from astrapy import DataAPIClient, AsyncTable
>>> client = astrapy.DataAPIClient()
>>> async_database = client.get_async_database(
...     "https://01234567-....apps.astra.datastax.com",
...     token="AstraCS:..."
... )
>>> # Create a table using the fluent syntax for definition
>>> from astrapy.constants import SortMode
>>> from astrapy.info import (
...     CreateTableDefinition,
...     ColumnType,
... )
>>> table_definition = (
...     CreateTableDefinition.builder()
...     .add_column("match_id", ColumnType.TEXT)
...     .add_column("round", ColumnType.INT)
...     .add_vector_column("m_vector", dimension=3)
...     .add_column("score", ColumnType.INT)
...     .add_column("when", ColumnType.TIMESTAMP)
...     .add_column("winner", ColumnType.TEXT)
...     .add_set_column("fighters", ColumnType.UUID)
...     .add_partition_by(["match_id"])
...     .add_partition_sort({"round": SortMode.ASCENDING})
...     .build()
... )
>>> my_table = await async_database.create_table(
...     "games",
...     definition=table_definition,
... )
>>> # Create a table with the definition as object
>>> # (and do not raise an error if the table exists already)
>>> from astrapy.info import (
...     CreateTableDefinition,
...     TablePrimaryKeyDescriptor,
...     TableScalarColumnTypeDescriptor,
...     TableValuedColumnType,
...     TableValuedColumnTypeDescriptor,
...     TableVectorColumnTypeDescriptor,
... )
>>> table_definition_1 = CreateTableDefinition(
...     columns={
...         "match_id": TableScalarColumnTypeDescriptor(
...             ColumnType.TEXT,
...         ),
...         "round": TableScalarColumnTypeDescriptor(
...             ColumnType.INT,
...         ),
...         "m_vector": TableVectorColumnTypeDescriptor(
...             column_type="vector", dimension=3
...         ),
...         "score": TableScalarColumnTypeDescriptor(
...             ColumnType.INT,
...         ),
...         "when": TableScalarColumnTypeDescriptor(
...             ColumnType.TIMESTAMP,
...         ),
...         "winner": TableScalarColumnTypeDescriptor(
...             ColumnType.TEXT,
...         ),
...         "fighters": TableValuedColumnTypeDescriptor(
...             column_type=TableValuedColumnType.SET,
...             value_type=ColumnType.UUID,
...         ),
...     },
...     primary_key=TablePrimaryKeyDescriptor(
...         partition_by=["match_id"],
...         partition_sort={"round": SortMode.ASCENDING},
...     ),
... )
>>> my_table_1 = await async_database.create_table(
...     "games",
...     definition=table_definition_1,
...     if_not_exists=True,
... )
>>> # Create a table with the definition as plain dictionary
>>> # (and do not raise an error if the table exists already)
>>> table_definition_2 = {
...     "columns": {
...         "match_id": {"type": "text"},
...         "round": {"type": "int"},
...         "m_vector": {"type": "vector", "dimension": 3},
...         "score": {"type": "int"},
...         "when": {"type": "timestamp"},
...         "winner": {"type": "text"},
...         "fighters": {"type": "set", "valueType": "uuid"},
...     },
...     "primaryKey": {
...         "partitionBy": ["match_id"],
...         "partitionSort": {"round": 1},
...     },
... }
>>> my_table_2 = await async_database.create_table(
...     "games",
...     definition=table_definition_2,
...     if_not_exists=True,
... )
>>> # Get a reference to an existing table
>>> # (no checks are performed on DB)
>>> my_table_4 = async_database.get_table("my_already_existing_table")

Note

creating an instance of AsyncTable does not trigger, in itself, actual creation of the table on the database. The latter should have been created beforehand, e.g. through the create_table method of a Database.

Expand source code
class AsyncTable(Generic[ROW]):
    """
    A Data API table, the object to interact with the Data API for structured data,
    especially for DDL operations.
    This class has an asynchronous interface for use with asyncio.

    This class is not meant for direct instantiation by the user, rather
    it is obtained by invoking methods such as `get_table` of AsyncDatabase,
    wherefrom the AsyncTable inherits its API options such as authentication
    token and API endpoint.
    In order to create a table, instead, one should call the `create_table`
    method of AsyncDatabase, providing a table definition parameter that can be built
    in different ways (see the `CreateTableDefinition` object and examples below).

    Args:
        database: an AsyncDatabase object, instantiated earlier. This represents
            the database the table belongs to.
        name: the table name. This parameter should match an existing
            table on the database.
        keyspace: this is the keyspace to which the table belongs.
            If nothing is specified, the database's working keyspace is used.
        api_options: a complete specification of the API Options for this instance.

    Examples:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> from astrapy import DataAPIClient, AsyncTable
        >>> client = astrapy.DataAPIClient()
        >>> async_database = client.get_async_database(
        ...     "https://01234567-....apps.astra.datastax.com",
        ...     token="AstraCS:..."
        ... )

        >>> # Create a table using the fluent syntax for definition
        >>> from astrapy.constants import SortMode
        >>> from astrapy.info import (
        ...     CreateTableDefinition,
        ...     ColumnType,
        ... )
        >>> table_definition = (
        ...     CreateTableDefinition.builder()
        ...     .add_column("match_id", ColumnType.TEXT)
        ...     .add_column("round", ColumnType.INT)
        ...     .add_vector_column("m_vector", dimension=3)
        ...     .add_column("score", ColumnType.INT)
        ...     .add_column("when", ColumnType.TIMESTAMP)
        ...     .add_column("winner", ColumnType.TEXT)
        ...     .add_set_column("fighters", ColumnType.UUID)
        ...     .add_partition_by(["match_id"])
        ...     .add_partition_sort({"round": SortMode.ASCENDING})
        ...     .build()
        ... )
        >>> my_table = await async_database.create_table(
        ...     "games",
        ...     definition=table_definition,
        ... )

        >>> # Create a table with the definition as object
        >>> # (and do not raise an error if the table exists already)
        >>> from astrapy.info import (
        ...     CreateTableDefinition,
        ...     TablePrimaryKeyDescriptor,
        ...     TableScalarColumnTypeDescriptor,
        ...     TableValuedColumnType,
        ...     TableValuedColumnTypeDescriptor,
        ...     TableVectorColumnTypeDescriptor,
        ... )
        >>> table_definition_1 = CreateTableDefinition(
        ...     columns={
        ...         "match_id": TableScalarColumnTypeDescriptor(
        ...             ColumnType.TEXT,
        ...         ),
        ...         "round": TableScalarColumnTypeDescriptor(
        ...             ColumnType.INT,
        ...         ),
        ...         "m_vector": TableVectorColumnTypeDescriptor(
        ...             column_type="vector", dimension=3
        ...         ),
        ...         "score": TableScalarColumnTypeDescriptor(
        ...             ColumnType.INT,
        ...         ),
        ...         "when": TableScalarColumnTypeDescriptor(
        ...             ColumnType.TIMESTAMP,
        ...         ),
        ...         "winner": TableScalarColumnTypeDescriptor(
        ...             ColumnType.TEXT,
        ...         ),
        ...         "fighters": TableValuedColumnTypeDescriptor(
        ...             column_type=TableValuedColumnType.SET,
        ...             value_type=ColumnType.UUID,
        ...         ),
        ...     },
        ...     primary_key=TablePrimaryKeyDescriptor(
        ...         partition_by=["match_id"],
        ...         partition_sort={"round": SortMode.ASCENDING},
        ...     ),
        ... )
        >>> my_table_1 = await async_database.create_table(
        ...     "games",
        ...     definition=table_definition_1,
        ...     if_not_exists=True,
        ... )

        >>> # Create a table with the definition as plain dictionary
        >>> # (and do not raise an error if the table exists already)
        >>> table_definition_2 = {
        ...     "columns": {
        ...         "match_id": {"type": "text"},
        ...         "round": {"type": "int"},
        ...         "m_vector": {"type": "vector", "dimension": 3},
        ...         "score": {"type": "int"},
        ...         "when": {"type": "timestamp"},
        ...         "winner": {"type": "text"},
        ...         "fighters": {"type": "set", "valueType": "uuid"},
        ...     },
        ...     "primaryKey": {
        ...         "partitionBy": ["match_id"],
        ...         "partitionSort": {"round": 1},
        ...     },
        ... }
        >>> my_table_2 = await async_database.create_table(
        ...     "games",
        ...     definition=table_definition_2,
        ...     if_not_exists=True,
        ... )

        >>> # Get a reference to an existing table
        >>> # (no checks are performed on DB)
        >>> my_table_4 = async_database.get_table("my_already_existing_table")

    Note:
        creating an instance of AsyncTable does not trigger, in itself, actual
        creation of the table on the database. The latter should have been created
        beforehand, e.g. through the `create_table` method of a Database.
    """

    def __init__(
        self,
        *,
        database: AsyncDatabase,
        name: str,
        keyspace: str | None,
        api_options: FullAPIOptions,
    ) -> None:
        self.api_options = api_options
        self._name = name
        _keyspace = keyspace if keyspace is not None else database.keyspace

        if _keyspace is None:
            raise ValueError("Attempted to create AsyncTable with 'keyspace' unset.")

        self._database = database._copy(
            keyspace=_keyspace, api_options=self.api_options
        )
        self._commander_headers = {
            **{DEFAULT_DATA_API_AUTH_HEADER: self.api_options.token.get_token()},
            **self.api_options.embedding_api_key.get_headers(),
            **self.api_options.database_additional_headers,
        }
        self._api_commander = self._get_api_commander()
        self._converter_agent: _TableConverterAgent[ROW] = _TableConverterAgent(
            options=self.api_options.serdes_options,
        )

    def __repr__(self) -> str:
        _db_desc = f'database.api_endpoint="{self.database.api_endpoint}"'
        return (
            f'{self.__class__.__name__}(name="{self.name}", '
            f'keyspace="{self.keyspace}", {_db_desc}, '
            f"api_options={self.api_options})"
        )

    def __eq__(self, other: Any) -> bool:
        if isinstance(other, AsyncTable):
            return all(
                [
                    self._name == other._name,
                    self._database == other._database,
                    self.api_options == other.api_options,
                ]
            )
        else:
            return False

    def _get_api_commander(self) -> APICommander:
        """Instantiate a new APICommander based on the properties of this class."""

        if self._database.keyspace is None:
            raise ValueError(
                "No keyspace specified. AsyncTable requires a keyspace to "
                "be set, e.g. through the `keyspace` constructor parameter."
            )

        base_path_components = [
            comp
            for comp in (
                ncomp.strip("/")
                for ncomp in (
                    self._database.api_options.data_api_url_options.api_path,
                    self._database.api_options.data_api_url_options.api_version,
                    self._database.keyspace,
                    self._name,
                )
                if ncomp is not None
            )
            if comp != ""
        ]
        base_path = f"/{'/'.join(base_path_components)}"
        api_commander = APICommander(
            api_endpoint=self._database.api_endpoint,
            path=base_path,
            headers=self._commander_headers,
            callers=self.api_options.callers,
            redacted_header_names=self.api_options.redacted_header_names,
            handle_decimals_writes=True,
            handle_decimals_reads=True,
        )
        return api_commander

    async def __aenter__(self: AsyncTable[ROW]) -> AsyncTable[ROW]:
        return self

    async def __aexit__(
        self,
        exc_type: type[BaseException] | None = None,
        exc_value: BaseException | None = None,
        traceback: TracebackType | None = None,
    ) -> None:
        if self._api_commander is not None:
            await self._api_commander.__aexit__(
                exc_type=exc_type,
                exc_value=exc_value,
                traceback=traceback,
            )

    def _copy(
        self: AsyncTable[ROW],
        *,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncTable[ROW]:
        arg_api_options = APIOptions(
            embedding_api_key=embedding_api_key,
        )
        final_api_options = self.api_options.with_override(api_options).with_override(
            arg_api_options
        )
        return AsyncTable(
            database=self.database,
            name=self.name,
            keyspace=self.keyspace,
            api_options=final_api_options,
        )

    def with_options(
        self: AsyncTable[ROW],
        *,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncTable[ROW]:
        """
        Create a clone of this table with some changed attributes.

        Args:
            embedding_api_key: optional API key(s) for interacting with the table.
                If an embedding service is configured, and this parameter is not None,
                each Data API call will include the necessary embedding-related headers
                as specified by this parameter. If a string is passed, it translates
                into the one "embedding api key" header
                (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
                For some vectorize providers/models, if using header-based authentication,
                specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
                should be supplied.
            api_options: any additional options to set for the clone, in the form of
                an APIOptions instance (where one can set just the needed attributes).
                In case the same setting is also provided as named parameter,
                the latter takes precedence.

        Returns:
            a new AsyncTable instance.

        Example:
            >>> table_with_api_key_configured = my_async_table.with_options(
            ...     embedding_api_key="secret-key-0123abcd...",
            ... )
        """

        return self._copy(
            embedding_api_key=embedding_api_key,
            api_options=api_options,
        )

    def to_sync(
        self: AsyncTable[ROW],
        *,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> Table[ROW]:
        """
        Create a Table from this one. Save for the arguments
        explicitly provided as overrides, everything else is kept identical
        to this table in the copy (the database is converted into
        an async object).

        Args:
            embedding_api_key: optional API key(s) for interacting with the table.
                If an embedding service is configured, and this parameter is not None,
                each Data API call will include the necessary embedding-related headers
                as specified by this parameter. If a string is passed, it translates
                into the one "embedding api key" header
                (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
                For some vectorize providers/models, if using header-based authentication,
                specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
                should be supplied.
            api_options: any additional options to set for the result, in the form of
                an APIOptions instance (where one can set just the needed attributes).
                In case the same setting is also provided as named parameter,
                the latter takes precedence.

        Returns:
            the new copy, a Table instance.

        Example:
            >>> my_async_table.to_sync().find_one(
            ...     {"match_id": "fight4"},
            ...     projection={"winner": True},
            ... )
            {"pk": 1, "column": "value}
        """

        arg_api_options = APIOptions(
            embedding_api_key=embedding_api_key,
        )
        final_api_options = self.api_options.with_override(api_options).with_override(
            arg_api_options
        )
        return Table(
            database=self.database.to_sync(),
            name=self.name,
            keyspace=self.keyspace,
            api_options=final_api_options,
        )

    async def definition(
        self,
        *,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> ListTableDefinition:
        """
        Query the Data API and return a structure defining the table schema.
        If there are no unsupported colums in the table, the return value has
        the same contents as could have been provided to a `create_table` method call.

        Args:
            table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `table_admin_timeout_ms`.
            timeout_ms: an alias for `table_admin_timeout_ms`.

        Returns:
            A `ListTableDefinition` object, available for inspection.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> asyncio.run(my_table.definition())
            ListTableDefinition(columns=[match_id,round,fighters, ...  # shortened
        """

        _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
            timeout_options=self.api_options.timeout_options,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info(f"getting tables in search of '{self.name}'")
        self_descriptors = [
            table_desc
            for table_desc in await self.database._list_tables_ctx(
                keyspace=None,
                timeout_context=_TimeoutContext(
                    request_ms=_table_admin_timeout_ms,
                    label=_ta_label,
                ),
            )
            if table_desc.name == self.name
        ]
        logger.info(f"finished getting tables in search of '{self.name}'")
        if self_descriptors:
            return self_descriptors[0].definition
        else:
            raise ValueError(
                f"Table {self.keyspace}.{self.name} not found.",
            )

    async def info(
        self,
        *,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> TableInfo:
        """
        Return information on the table. This should not be confused with the table
        definition (i.e. the schema).

        Args:
            database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying DevOps API request.
                If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `database_admin_timeout_ms`.
            timeout_ms: an alias for `database_admin_timeout_ms`.

        Returns:
            A TableInfo object for inspection.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> # Note: output reformatted for clarity.
            >>> asyncio.run(my_async_table.info())
            TableInfo(
                database_info=AstraDBDatabaseInfo(id=..., name=..., ...),
                keyspace='default_keyspace',
                name='games',
                full_name='default_keyspace.games'
            )
        """

        db_info = await self.database.info(
            database_admin_timeout_ms=database_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        return TableInfo(
            database_info=db_info,
            keyspace=self.keyspace,
            name=self.name,
            full_name=self.full_name,
        )

    @property
    def database(self) -> AsyncDatabase:
        """
        a Database object, the database this table belongs to.

        Example:
            >>> my_async_table.database.name
            'the_db'
        """

        return self._database

    @property
    def keyspace(self) -> str:
        """
        The keyspace this table is in.

        Example:
            >>> my_async_table.keyspace
            'default_keyspace'
        """

        _keyspace = self.database.keyspace
        if _keyspace is None:
            raise ValueError("The table's DB is set with keyspace=None")
        return _keyspace

    @property
    def name(self) -> str:
        """
        The name of this table.

        Example:
            >>> my_async_table.name
            'my_table'
        """

        return self._name

    @property
    def full_name(self) -> str:
        """
        The fully-qualified table name within the database,
        in the form "keyspace.table_name".

        Example:
            >>> my_async_table.full_name
            'default_keyspace.my_table'
        """

        return f"{self.keyspace}.{self.name}"

    async def _create_generic_index(
        self,
        i_name: str,
        ci_definition: dict[str, Any],
        ci_command: str,
        if_not_exists: bool | None,
        table_admin_timeout_ms: int | None,
        request_timeout_ms: int | None,
        timeout_ms: int | None,
    ) -> None:
        ci_options: dict[str, bool]
        if if_not_exists is not None:
            ci_options = {"ifNotExists": if_not_exists}
        else:
            ci_options = {}
        _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
            timeout_options=self.api_options.timeout_options,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        ci_payload = {
            ci_command: {
                "name": i_name,
                "definition": ci_definition,
                "options": ci_options,
            }
        }
        logger.info(f"{ci_command}('{i_name}')")
        ci_response = await self._api_commander.async_request(
            payload=ci_payload,
            timeout_context=_TimeoutContext(
                request_ms=_table_admin_timeout_ms, label=_ta_label
            ),
        )
        if ci_response.get("status") != {"ok": 1}:
            raise UnexpectedDataAPIResponseException(
                text=f"Faulty response from {ci_command} API command.",
                raw_response=ci_response,
            )
        logger.info(f"finished {ci_command}('{i_name}')")

    async def create_index(
        self,
        name: str,
        *,
        column: str,
        options: TableIndexOptions | dict[str, Any] | None = None,
        if_not_exists: bool | None = None,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Create an index on a non-vector column of the table.

        This is a blocking operation: the method returns once the index
        is created and ready to use.

        For creation of a vector index, see method `create_vector_index` instead.

        Args:
            name: the name of the index. Index names must be unique across the keyspace.
            column: the table column on which the index is to be created.
            options: if passed, it must be an instance of `TableIndexOptions`,
                or an equivalent dictionary, which specifies index settings
                such as -- for a text column -- case-sensitivity and so on.
                See the `astrapy.info.TableIndexOptions` class for more details.
            if_not_exists: if set to True, the command will succeed even if an index
                with the specified name already exists (in which case no actual
                index creation takes place on the database). The API default of False
                means that an error is raised by the API in case of name collision.
            table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `table_admin_timeout_ms`.
            timeout_ms: an alias for `table_admin_timeout_ms`.

        Examples:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> from astrapy.info import TableIndexOptions
            >>>
            >>> # create an index on a column
            >>> await my_async_table.create_index(
            ...     "score_index",
            ...     column="score",
            ... )
            >>>
            >>> # create an index on a textual column, specifying indexing options
            >>> await my_async_table.create_index(
            ...     "winner_index",
            ...     column="winner",
            ...     options=TableIndexOptions(
            ...         ascii=False,
            ...         normalize=True,
            ...         case_sensitive=False,
            ...     ),
            ... )
        """

        ci_definition: dict[str, Any] = TableIndexDefinition(
            column=column,
            options=TableIndexOptions.coerce(options or {}),
        ).as_dict()
        ci_command = "createIndex"
        return await self._create_generic_index(
            i_name=name,
            ci_definition=ci_definition,
            ci_command=ci_command,
            if_not_exists=if_not_exists,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )

    async def create_vector_index(
        self,
        name: str,
        *,
        column: str,
        options: TableVectorIndexOptions | dict[str, Any] | None = None,
        if_not_exists: bool | None = None,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Create a vector index on a vector column of the table, enabling vector
        similarity search operations on it.

        This is a blocking operation: the method returns once the index
        is created and ready to use.

        For creation of a non-vector index, see method `create_index` instead.

        Args:
            name: the name of the index. Index names must be unique across the keyspace.
            column: the table column, of type "vector" on which to create the index.
            options: an instance of `TableVectorIndexOptions`, or an equivalent
                dictionary, which specifies settings for the vector index,
                such as the metric to use or, if desired, a "source model" setting.
                If omitted, the Data API defaults will apply for the index.
                See the `astrapy.info.TableVectorIndexOptions` class for more details.
            if_not_exists: if set to True, the command will succeed even if an index
                with the specified name already exists (in which case no actual
                index creation takes place on the database). The API default of False
                means that an error is raised by the API in case of name collision.
            table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `table_admin_timeout_ms`.
            timeout_ms: an alias for `table_admin_timeout_ms`.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> from astrapy.constants import VectorMetric
            >>> from astrapy.info import TableVectorIndexOptions
            >>>
            >>> # create a vector index with dot-product similarity
            >>> await my_async_table.create_vector_index(
            ...     "m_vector_index",
            ...     column="m_vector",
            ...     options=TableVectorIndexOptions(
            ...         metric=VectorMetric.DOT_PRODUCT,
            ...     ),
            ... )
            >>> # specify a source_model (since the previous statement
            >>> # succeeded, this will do nothing because of `if_not_exists`):
            >>> await my_async_table.create_vector_index(
            ...     "m_vector_index",
            ...     column="m_vector",
            ...     options=TableVectorIndexOptions(
            ...         metric=VectorMetric.DOT_PRODUCT,
            ...         source_model="nv-qa-4",
            ...     ),
            ...     if_not_exists=True,
            ... )
            >>> # leave the settings to the Data API defaults of cosine
            >>> # similarity metric (since the previous statement
            >>> # succeeded, this will do nothing because of `if_not_exists`):
            >>> await my_async_table.create_vector_index(
            ...     "m_vector_index",
            ...     column="m_vector",
            ...     if_not_exists=True,
            ... )
        """

        ci_definition: dict[str, Any] = TableVectorIndexDefinition(
            column=column,
            options=TableVectorIndexOptions.coerce(options),
        ).as_dict()
        ci_command = "createVectorIndex"
        return await self._create_generic_index(
            i_name=name,
            ci_definition=ci_definition,
            ci_command=ci_command,
            if_not_exists=if_not_exists,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )

    async def list_index_names(
        self,
        *,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> list[str]:
        """
        List the names of all indexes existing on this table.

        Args:
            table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `table_admin_timeout_ms`.
            timeout_ms: an alias for `table_admin_timeout_ms`.

        Returns:
            a list of the index names as strings, in no particular order.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> asyncio.run(my_async_table.list_index_names())
            ['m_vector_index', 'winner_index', 'score_index']
        """

        _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
            timeout_options=self.api_options.timeout_options,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        li_payload: dict[str, Any] = {"listIndexes": {"options": {}}}
        logger.info("listIndexes")
        li_response = await self._api_commander.async_request(
            payload=li_payload,
            timeout_context=_TimeoutContext(
                request_ms=_table_admin_timeout_ms, label=_ta_label
            ),
        )
        if "indexes" not in li_response.get("status", {}):
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from listIndexes API command.",
                raw_response=li_response,
            )
        else:
            logger.info("finished listIndexes")
            return li_response["status"]["indexes"]  # type: ignore[no-any-return]

    async def list_indexes(
        self,
        *,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> list[TableIndexDescriptor]:
        """
        List the full definitions of all indexes existing on this table.

        Args:
            table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `table_admin_timeout_ms`.
            timeout_ms: an alias for `table_admin_timeout_ms`.

        Returns:
            a list of `astrapy.info.TableIndexDescriptor` objects in no particular
            order, each providing the details of an index present on the table.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> indexes = asyncio.run(my_async_table.list_indexes())
            >>> indexes
            [TableIndexDescriptor(name='m_vector_index', definition=...)...]
            >>> # (Note: shortened output above)
            >>> indexes[1].definition.column
            'winner'
            >>> indexes[1].definition.options.case_sensitive
            False
        """

        _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
            timeout_options=self.api_options.timeout_options,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        li_payload: dict[str, Any] = {"listIndexes": {"options": {"explain": True}}}
        logger.info("listIndexes")
        li_response = await self._api_commander.async_request(
            payload=li_payload,
            timeout_context=_TimeoutContext(
                request_ms=_table_admin_timeout_ms, label=_ta_label
            ),
        )
        if "indexes" not in li_response.get("status", {}):
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from listIndexes API command.",
                raw_response=li_response,
            )
        else:
            logger.info("finished listIndexes")
            return [
                TableIndexDescriptor.coerce(index_object)
                for index_object in li_response["status"]["indexes"]
            ]

    @overload
    async def alter(
        self,
        operation: AlterTableOperation | dict[str, Any],
        *,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> AsyncTable[DefaultRowType]: ...

    @overload
    async def alter(
        self,
        operation: AlterTableOperation | dict[str, Any],
        *,
        row_type: type[NEW_ROW],
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> AsyncTable[NEW_ROW]: ...

    async def alter(
        self,
        operation: AlterTableOperation | dict[str, Any],
        *,
        row_type: type[Any] = DefaultRowType,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> AsyncTable[NEW_ROW]:
        """
        Executes one of the available alter-table operations on this table,
        such as adding/dropping columns.

        This is a blocking operation: the method returns once the index
        is created and ready to use.

        Args:
            operation: an instance of one of the `astrapy.info.AlterTable*` classes,
                representing which alter operation to perform and the details thereof.
                A regular dictionary can also be provided, but then it must have the
                alter operation name at its top level: {"add": {"columns": ...}}.
            row_type: this parameter acts a formal specifier for the type checker.
                If omitted, the resulting AsyncTable is implicitly
                an `AsyncTable[dict[str, Any]]`. If provided, it must match
                the type hint specified in the assignment.
                See the examples below.
            table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `table_admin_timeout_ms`.
            timeout_ms: an alias for `table_admin_timeout_ms`.

        Examples:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> from astrapy.info import (
            ...     AlterTableAddColumns,
            ...     AlterTableAddVectorize,
            ...     AlterTableDropColumns,
            ...     AlterTableDropVectorize,
            ...     ColumnType,
            ...     TableScalarColumnTypeDescriptor,
            ...     VectorServiceOptions,
            ... )
            >>>
            >>> # Add a column
            >>> new_table_1 = await my_table.alter(
            ...     AlterTableAddColumns(
            ...         columns={
            ...             "tie_break": TableScalarColumnTypeDescriptor(
            ...                 column_type=ColumnType.BOOLEAN,
            ...             ),
            ...         }
            ...     )
            ... )
            >>>
            >>> # Drop a column
            >>> new_table_2 = await new_table_1.alter(AlterTableDropColumns(
            ...     columns=["tie_break"]
            ... ))
            >>>
            >>> # Add vectorize to a (vector) column
            >>> new_table_3 = await new_table_2.alter(
            ...     AlterTableAddVectorize(
            ...         columns={
            ...             "m_vector": VectorServiceOptions(
            ...                 provider="openai",
            ...                 model_name="text-embedding-3-small",
            ...                 authentication={
            ...                     "providerKey": "ASTRA_KMS_API_KEY_NAME",
            ...                 },
            ...             ),
            ...         }
            ...     )
            ... )
            >>>
            >>> # Drop vectorize from a (vector) column
            >>> # (Also demonstrates type hint usage)
            >>> from typing import TypedDict
            >>> from astrapy import AsyncTable
            >>> from astrapy.data_types import (
            ...     DataAPISet,
            ...     DataAPITimestamp,
            ...     DataAPIVector,
            ... )
            >>> from astrapy.ids import UUID
            >>>
            >>> class MyMatch(TypedDict):
            ...     match_id: str
            ...     round: int
            ...     m_vector: DataAPIVector
            ...     score: int
            ...     when: DataAPITimestamp
            ...     winner: str
            ...     fighters: DataAPISet[UUID]
            ...
            >>> new_table_4: AsyncTable[MyMatch] = await new_table_3.alter(
            ...     AlterTableDropVectorize(columns=["m_vector"]),
            ...     row_type=MyMatch,
            ... )
        """

        n_operation: AlterTableOperation
        if isinstance(operation, AlterTableOperation):
            n_operation = operation
        else:
            n_operation = AlterTableOperation.from_full_dict(operation)
        _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
            timeout_options=self.api_options.timeout_options,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        at_operation_name = n_operation._name
        at_payload = {
            "alterTable": {
                "operation": {
                    at_operation_name: n_operation.as_dict(),
                },
            },
        }
        logger.info(f"alterTable({at_operation_name})")
        at_response = await self._api_commander.async_request(
            payload=at_payload,
            timeout_context=_TimeoutContext(
                request_ms=_table_admin_timeout_ms, label=_ta_label
            ),
        )
        if at_response.get("status") != {"ok": 1}:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from alterTable API command.",
                raw_response=at_response,
            )
        logger.info(f"finished alterTable({at_operation_name})")
        return AsyncTable(
            database=self.database,
            name=self.name,
            keyspace=self.keyspace,
            api_options=self.api_options,
        )

    async def insert_one(
        self,
        row: ROW,
        *,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> TableInsertOneResult:
        """
        Insert a single row in the table,
        with implied overwrite in case of primary key collision.

        Inserting a row whose primary key correspond to an entry alredy stored
        in the table has the effect of an in-place update: the row is overwritten.
        However, if the row being inserted is partially provided, i.e. some columns
        are not specified, these are left unchanged on the database. To explicitly
        reset them, specify their value as appropriate to their data type,
        i.e. `None`, `{}` or analogous.

        Args:
            row: a dictionary expressing the row to insert. The primary key
                must be specified in full, while any other column may be omitted
                if desired (in which case it is left as is on DB).
                The values for the various columns supplied in the row must
                be of the right data type for the insertion to succeed.
                Non-primary-key columns can also be explicitly set to null.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a TableInsertOneResult object, whose attributes are the primary key
            of the inserted row both in the form of a dictionary and of a tuple.

        Examples:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> # a full-row insert using astrapy's datatypes
            >>> from astrapy.data_types import (
            ...     DataAPISet,
            ...     DataAPITimestamp,
            ...     DataAPIVector,
            ... )
            >>> from astrapy.ids import UUID
            >>>
            >>> insert_result = asyncio.run(my_async_table.insert_one(
            ...     {
            ...         "match_id": "mtch_0",
            ...         "round": 1,
            ...         "m_vector": DataAPIVector([0.4, -0.6, 0.2]),
            ...         "score": 18,
            ...         "when": DataAPITimestamp.from_string("2024-11-28T11:30:00Z"),
            ...         "winner": "Victor",
            ...         "fighters": DataAPISet([
            ...             UUID("0193539a-2770-8c09-a32a-111111111111"),
            ...         ]),
            ...     },
            ... ))
            >>> insert_result.inserted_id
            {'match_id': 'mtch_0', 'round': 1}
            >>> insert_result.inserted_id_tuple
            ('mtch_0', 1)
            >>>
            >>> # a partial-row (which in this case overwrites some of the values)
            >>> asyncio.run(my_async_table.insert_one(
            ...     {
            ...         "match_id": "mtch_0",
            ...         "round": 1,
            ...         "winner": "Victor Vector",
            ...         "fighters": DataAPISet([
            ...             UUID("0193539a-2770-8c09-a32a-111111111111"),
            ...             UUID("0193539a-2880-8875-9f07-222222222222"),
            ...         ]),
            ...     },
            ... ))
            TableInsertOneResult(inserted_id={'match_id': 'mtch_0', 'round': 1} ...
            >>>
            >>> # another insertion demonstrating standard-library datatypes in values
            >>> import datetime
            >>>
            >>> asyncio.run(my_async_table.insert_one(
            ...     {
            ...         "match_id": "mtch_0",
            ...         "round": 2,
            ...         "winner": "Angela",
            ...         "score": 25,
            ...         "when": datetime.datetime(
            ...             2024, 7, 13, 12, 55, 30, 889,
            ...             tzinfo=datetime.timezone.utc,
            ...         ),
            ...         "fighters": {
            ...             UUID("019353cb-8e01-8276-a190-333333333333"),
            ...         },
            ...         "m_vector": [0.4, -0.6, 0.2],
            ...     },
            ... ))
            TableInsertOneResult(inserted_id={'match_id': 'mtch_0', 'round': 2}, ...
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        io_payload = self._converter_agent.preprocess_payload(
            {"insertOne": {"document": row}}
        )
        logger.info(f"insertOne on '{self.name}'")
        io_response = await self._api_commander.async_request(
            payload=io_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished insertOne on '{self.name}'")
        if "insertedIds" in io_response.get("status", {}):
            if not io_response["status"]["insertedIds"]:
                raise UnexpectedDataAPIResponseException(
                    text="Response from insertOne API command has empty 'insertedIds'.",
                    raw_response=io_response,
                )
            if not io_response["status"]["primaryKeySchema"]:
                raise UnexpectedDataAPIResponseException(
                    text="Response from insertOne API command has empty 'primaryKeySchema'.",
                    raw_response=io_response,
                )
            inserted_id_list = io_response["status"]["insertedIds"][0]
            inserted_id_tuple, inserted_id = self._converter_agent.postprocess_key(
                inserted_id_list,
                primary_key_schema_dict=io_response["status"]["primaryKeySchema"],
            )
            return TableInsertOneResult(
                raw_results=[io_response],
                inserted_id=inserted_id,
                inserted_id_tuple=inserted_id_tuple,
            )
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from insertOne API command.",
                raw_response=io_response,
            )

    def _prepare_keys_from_status(
        self, status: dict[str, Any] | None, raise_on_missing: bool = False
    ) -> tuple[list[dict[str, Any]], list[tuple[Any, ...]]]:
        ids: list[dict[str, Any]]
        id_tuples: list[tuple[Any, ...]]
        if status is None:
            if raise_on_missing:
                raise UnexpectedDataAPIResponseException(
                    text="'status' not found in API response",
                    raw_response=None,
                )
            else:
                ids = []
                id_tuples = []
        else:
            if "primaryKeySchema" not in status:
                raise UnexpectedDataAPIResponseException(
                    text=(
                        "received a 'status' without 'primaryKeySchema' "
                        f"in API response (received: {status})"
                    ),
                    raw_response=None,
                )
            if "insertedIds" not in status:
                raise UnexpectedDataAPIResponseException(
                    text=(
                        "received a 'status' without 'insertedIds' "
                        f"in API response (received: {status})"
                    ),
                    raw_response=None,
                )
            primary_key_schema = status["primaryKeySchema"]
            id_tuples_and_ids = self._converter_agent.postprocess_keys(
                status["insertedIds"],
                primary_key_schema_dict=primary_key_schema,
            )
            id_tuples = [tpl for tpl, _ in id_tuples_and_ids]
            ids = [id for _, id in id_tuples_and_ids]
        return ids, id_tuples

    async def insert_many(
        self,
        rows: Iterable[ROW],
        *,
        ordered: bool = False,
        chunk_size: int | None = None,
        concurrency: int | None = None,
        request_timeout_ms: int | None = None,
        general_method_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> TableInsertManyResult:
        """
        Insert a number of rows into the table,
        with implied overwrite in case of primary key collision.

        Inserting rows whose primary key correspond to entries alredy stored
        in the table has the effect of an in-place update: the rows are overwritten.
        However, if the rows being inserted are partially provided, i.e. some columns
        are not specified, these are left unchanged on the database. To explicitly
        reset them, specify their value as appropriate to their data type,
        i.e. `None`, `{}` or analogous.

        Args:
            rows: an iterable of dictionaries, each expressing a row to insert.
                Each row must at least fully specify the primary key column values,
                while any other column may be omitted if desired (in which case
                it is left as is on DB).
                The values for the various columns supplied in each row must
                be of the right data type for the insertion to succeed.
                Non-primary-key columns can also be explicitly set to null.
            ordered: if False (default), the insertions can occur in arbitrary order
                and possibly concurrently. If True, they are processed sequentially.
                If there are no specific reasons against it, unordered insertions
                re to be preferred as they complete much faster.
            chunk_size: how many rows to include in each single API request.
                Exceeding the server maximum allowed value results in an error.
                Leave it unspecified (recommended) to use the system default.
            concurrency: maximum number of concurrent requests to the API at
                a given time. It cannot be more than one for ordered insertions.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                whole operation, which may consist of several API requests.
                If not provided, this object's defaults apply.
            request_timeout_ms: a timeout, in milliseconds, to impose on each
                individual HTTP request to the Data API to accomplish the operation.
                If not provided, this object's defaults apply.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a TableInsertManyResult object, whose attributes are the primary key
            of the inserted rows both in the form of dictionaries and of tuples.

        Examples:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> # Insert complete and partial rows at once (concurrently)
            >>> from astrapy.data_types import (
            ...     DataAPISet,
            ...     DataAPITimestamp,
            ...     DataAPIVector,
            ... )
            >>> from astrapy.ids import UUID
            >>>
            >>> insert_result = asyncio.run(my_async_table.insert_many(
            ...     [
            ...         {
            ...             "match_id": "fight4",
            ...             "round": 1,
            ...             "winner": "Victor",
            ...             "score": 18,
            ...             "when": DataAPITimestamp.from_string(
            ...                 "2024-11-28T11:30:00Z",
            ...             ),
            ...             "fighters": DataAPISet([
            ...                 UUID("0193539a-2770-8c09-a32a-111111111111"),
            ...                 UUID('019353e3-00b4-83f9-a127-222222222222'),
            ...             ]),
            ...             "m_vector": DataAPIVector([0.4, -0.6, 0.2]),
            ...         },
            ...         {"match_id": "fight5", "round": 1, "winner": "Adam"},
            ...         {"match_id": "fight5", "round": 2, "winner": "Betta"},
            ...         {"match_id": "fight5", "round": 3, "winner": "Caio"},
            ...         {
            ...             "match_id": "challenge6",
            ...             "round": 1,
            ...             "winner": "Donna",
            ...             "m_vector": [0.9, -0.1, -0.3],
            ...         },
            ...         {"match_id": "challenge6", "round": 2, "winner": "Erick"},
            ...         {"match_id": "challenge6", "round": 3, "winner": "Fiona"},
            ...         {"match_id": "tournamentA", "round": 1, "winner": "Gael"},
            ...         {"match_id": "tournamentA", "round": 2, "winner": "Hanna"},
            ...         {
            ...             "match_id": "tournamentA",
            ...             "round": 3,
            ...             "winner": "Ian",
            ...             "fighters": DataAPISet([
            ...                 UUID("0193539a-2770-8c09-a32a-111111111111"),
            ...             ]),
            ...         },
            ...         {"match_id": "fight7", "round": 1, "winner": "Joy"},
            ...         {"match_id": "fight7", "round": 2, "winner": "Kevin"},
            ...         {"match_id": "fight7", "round": 3, "winner": "Lauretta"},
            ...     ],
            ...     concurrency=10,
            ...     chunk_size=3,
            ... ))
            >>> insert_result.inserted_ids
            [{'match_id': 'fight4', 'round': 1}, {'match_id': 'fight5', ...
            >>> insert_result.inserted_id_tuples
            [('fight4', 1), ('fight5', 1), ('fight5', 2), ('fight5', 3), ...
            >>>
            >>> # Ordered insertion
            >>> # (would stop on first failure; predictable end result on DB)
            >>> asyncio.run(my_async_table.insert_many(
            ...     [
            ...         {"match_id": "fight5", "round": 1, "winner": "Adam0"},
            ...         {"match_id": "fight5", "round": 2, "winner": "Betta0"},
            ...         {"match_id": "fight5", "round": 3, "winner": "Caio0"},
            ...         {"match_id": "fight5", "round": 1, "winner": "Adam Zuul"},
            ...         {"match_id": "fight5", "round": 2, "winner": "Betta Vigo"},
            ...         {"match_id": "fight5", "round": 3, "winner": "Caio Gozer"},
            ...     ],
            ...     ordered=True,
            ... ))
            TableInsertManyResult(inserted_ids=[{'match_id': 'fight5', 'round': 1}, ...

        Note:
            Unordered insertions are executed with some degree of concurrency,
            so it is usually better to prefer this mode unless the order in the
            row sequence is important.

        Note:
            If some of the rows are unsuitable for insertion, for instance
            have the wrong data type for a column or lack the primary key,
            the Data API validation check will fail for those specific requests
            that contain the faulty rows. Depending on concurrency and the value
            of the `ordered` parameter, a number of rows in general could have
            been successfully inserted.
            It is possible to capture such a scenario, and inspect which rows
            actually got inserted, by catching an error of type
            `astrapy.exceptions.TableInsertManyException`: its `partial_result`
            attribute is precisely a `TableInsertManyResult`, encoding details
            on the successful writes.
        """

        _general_method_timeout_ms, _gmt_label = _first_valid_timeout(
            (general_method_timeout_ms, "general_method_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (
                self.api_options.timeout_options.general_method_timeout_ms,
                "general_method_timeout_ms",
            ),
        )
        _request_timeout_ms, _rt_label = _first_valid_timeout(
            (request_timeout_ms, "request_timeout_ms"),
            (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
        )
        if concurrency is None:
            if ordered:
                _concurrency = 1
            else:
                _concurrency = DEFAULT_INSERT_MANY_CONCURRENCY
        else:
            _concurrency = concurrency
        if _concurrency > 1 and ordered:
            raise ValueError("Cannot run ordered insert_many concurrently.")
        if chunk_size is None:
            _chunk_size = DEFAULT_INSERT_MANY_CHUNK_SIZE
        else:
            _chunk_size = chunk_size
        _rows = list(rows)
        logger.info(f"inserting {len(_rows)} rows in '{self.name}'")
        raw_results: list[dict[str, Any]] = []
        timeout_manager = MultiCallTimeoutManager(
            overall_timeout_ms=_general_method_timeout_ms,
            timeout_label=_gmt_label,
        )
        if ordered:
            options = {"ordered": True}
            inserted_ids: list[Any] = []
            inserted_id_tuples: list[Any] = []
            for i in range(0, len(_rows), _chunk_size):
                im_payload = self._converter_agent.preprocess_payload(
                    {
                        "insertMany": {
                            "documents": _rows[i : i + _chunk_size],
                            "options": options,
                        },
                    },
                )
                logger.info(f"insertMany(chunk) on '{self.name}'")
                chunk_response = await self._api_commander.async_request(
                    payload=im_payload,
                    raise_api_errors=False,
                    timeout_context=timeout_manager.remaining_timeout(
                        cap_time_ms=_request_timeout_ms,
                        cap_timeout_label=_rt_label,
                    ),
                )
                logger.info(f"finished insertMany(chunk) on '{self.name}'")
                # accumulate the results in this call
                chunk_inserted_ids, chunk_inserted_ids_tuples = (
                    self._prepare_keys_from_status(chunk_response.get("status"))
                )
                inserted_ids += chunk_inserted_ids
                inserted_id_tuples += chunk_inserted_ids_tuples
                raw_results += [chunk_response]
                # if errors, quit early
                if chunk_response.get("errors", []):
                    partial_result = TableInsertManyResult(
                        raw_results=raw_results,
                        inserted_ids=inserted_ids,
                        inserted_id_tuples=inserted_id_tuples,
                    )
                    raise TableInsertManyException.from_response(
                        command=None,
                        raw_response=chunk_response,
                        partial_result=partial_result,
                    )

            # return
            full_result = TableInsertManyResult(
                raw_results=raw_results,
                inserted_ids=inserted_ids,
                inserted_id_tuples=inserted_id_tuples,
            )
            logger.info(f"finished inserting {len(_rows)} rows in '{self.name}'")
            return full_result

        else:
            # unordered: concurrent or not, do all of them and parse the results
            options = {"ordered": False}

            sem = asyncio.Semaphore(_concurrency)

            async def concurrent_insert_chunk(
                row_chunk: list[ROW],
            ) -> dict[str, Any]:
                async with sem:
                    im_payload = self._converter_agent.preprocess_payload(
                        {
                            "insertMany": {
                                "documents": row_chunk,
                                "options": options,
                            },
                        },
                    )
                    logger.info(f"insertMany(chunk) on '{self.name}'")
                    im_response = await self._api_commander.async_request(
                        payload=im_payload,
                        raise_api_errors=False,
                        timeout_context=timeout_manager.remaining_timeout(
                            cap_time_ms=_request_timeout_ms,
                            cap_timeout_label=_rt_label,
                        ),
                    )
                    logger.info(f"finished insertMany(chunk) on '{self.name}'")
                    return im_response

            if _concurrency > 1:
                tasks = [
                    asyncio.create_task(
                        concurrent_insert_chunk(_rows[i : i + _chunk_size])
                    )
                    for i in range(0, len(_rows), _chunk_size)
                ]
                raw_results = await asyncio.gather(*tasks)
            else:
                raw_results = [
                    await concurrent_insert_chunk(_rows[i : i + _chunk_size])
                    for i in range(0, len(_rows), _chunk_size)
                ]

            # recast raw_results. Each response has its schema: unfold appropriately
            ids_and_tuples_per_chunk = [
                self._prepare_keys_from_status(chunk_response.get("status"))
                for chunk_response in raw_results
            ]
            inserted_ids = [
                inserted_id
                for chunk_ids, _ in ids_and_tuples_per_chunk
                for inserted_id in chunk_ids
            ]
            inserted_id_tuples = [
                inserted_id_tuple
                for _, chunk_id_tuples in ids_and_tuples_per_chunk
                for inserted_id_tuple in chunk_id_tuples
            ]
            # check-raise
            if any(
                [chunk_response.get("errors", []) for chunk_response in raw_results]
            ):
                partial_result = TableInsertManyResult(
                    raw_results=raw_results,
                    inserted_ids=inserted_ids,
                    inserted_id_tuples=inserted_id_tuples,
                )
                raise TableInsertManyException.from_responses(
                    commands=[None for _ in raw_results],
                    raw_responses=raw_results,
                    partial_result=partial_result,
                )

            # return
            full_result = TableInsertManyResult(
                raw_results=raw_results,
                inserted_ids=inserted_ids,
                inserted_id_tuples=inserted_id_tuples,
            )
            logger.info(f"finished inserting {len(_rows)} rows in '{self.name}'")
            return full_result

    @overload
    def find(
        self,
        filter: FilterType | None = None,
        *,
        projection: ProjectionType | None = None,
        row_type: None = None,
        skip: int | None = None,
        limit: int | None = None,
        include_similarity: bool | None = None,
        include_sort_vector: bool | None = None,
        sort: SortType | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> AsyncTableFindCursor[ROW, ROW]: ...

    @overload
    def find(
        self,
        filter: FilterType | None = None,
        *,
        projection: ProjectionType | None = None,
        row_type: type[ROW2],
        skip: int | None = None,
        limit: int | None = None,
        include_similarity: bool | None = None,
        include_sort_vector: bool | None = None,
        sort: SortType | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> AsyncTableFindCursor[ROW, ROW2]: ...

    def find(
        self,
        filter: FilterType | None = None,
        *,
        projection: ProjectionType | None = None,
        row_type: type[ROW2] | None = None,
        skip: int | None = None,
        limit: int | None = None,
        include_similarity: bool | None = None,
        include_sort_vector: bool | None = None,
        sort: SortType | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> AsyncTableFindCursor[ROW, ROW2]:
        """
        Find rows on the table matching the provided filters
        and according to sorting criteria including vector similarity.

        The returned TableFindCursor object, representing the stream of results,
        can be iterated over, or consumed and manipulated in several other ways
        (see the examples below and the `TableFindCursor` documentation for details).
        Since the amount of returned items can be large, TableFindCursor is a lazy
        object, that fetches new data while it is being read using the Data API
        pagination mechanism.

        Invoking `.to_list()` on a TableFindCursor will cause it to consume all
        rows and materialize the entire result set as a list. This is not recommended
        if the amount of results is very large.

        Args:
            filter: a dictionary expressing which condition the returned rows
                must satisfy. The filter can use operators, such as "$eq" for equality,
                and require columns to compare with literal values. Simple examples
                are `{}` (zero filter, not recommended for large tables),
                `{"match_no": 123}` (a shorthand for `{"match_no": {"$eq": 123}}`,
                or `{"match_no": 123, "round": "C"}` (multiple conditions are
                implicitly combined with "$and").
                Please consult the Data API documentation for a more detailed
                explanation of table search filters and tips on their usage.
            projection: a prescription on which columns to return for the matching rows.
                The projection can take the form `{"column1": True, "column2": True}`.
                `{"*": True}` (i.e. return the whole row), or the complementary
                form that excludes columns: `{"column1": False, "column2": False}`.
                To optimize bandwidth usage, it is recommended to use a projection,
                especially to avoid unnecessary columns of type vector with
                high-dimensional embeddings.
            row_type: this parameter acts a formal specifier for the type checker.
                If omitted, the resulting cursor is implicitly an
                `AsyncTableFindCursor[ROW, ROW]`, i.e. maintains the same type for
                the items it returns as that for the rows in the table. Strictly
                typed code may want to specify this parameter especially when a
                projection is given.
            skip: if provided, it is a number of rows that would be obtained first
                in the response and are instead skipped.
            limit: a maximum amount of rows to get from the table. The returned cursor
                will stop yielding rows when either this number is reached or there
                really are no more matches in the table.
            include_similarity: a boolean to request the numeric value of the
                similarity to be returned as an added "$similarity" key in each returned
                row. It can be used meaningfully only in a vector search (see `sort`).
            include_sort_vector: a boolean to request the search query vector.
                If set to True (and if the search is a vector search), calling
                the `get_sort_vector` method on the returned cursor will yield
                the vector used for the ANN search.
            sort: this dictionary parameter controls the order in which the rows
                are returned. The sort parameter can express either a vector search or
                a regular (ascending/descending, even hierarchical) sorting.
                * For a vector search the parameter takes the form
                `{"vector_column": qv}`, with the query vector `qv` of the appropriate
                type (list of floats or DataAPIVector). If the table has automatic
                embedding generation ("vectorize") enabled on that column, the form
                `{"vectorize_enabled_column": "query text"}` is also valid.
                * In the case of non-vector sorting, the parameter specifies the
                column(s) and the ascending/descending ordering required.
                If multiple columns are provided, the sorting applies them
                hierarchically to the rows. Examples are `{"score": SortMode.ASCENDING}`
                (equivalently `{"score": +1}`), `{"score": +1, "when": -1}`.
                Note that, depending on the column(s) chosen for sorting, the table
                partitioning structure, and the presence of indexes, the sorting
                may be done in-memory by the API. In that case, there may be performance
                implications and limitations on the amount of items returned.
                Consult the Data API documentation for more details on this topic.
            request_timeout_ms: a timeout, in milliseconds, to impose on each
                individual HTTP request to the Data API to accomplish the operation.
                If not provided, this object's defaults apply.
            timeout_ms: an alias for `request_timeout_ms`.

        Returns:
            a TableFindCursor object, that can be iterated over (and manipulated
            in several ways), that if needed handles pagination under the hood
            as the rows are consumed.

        Note:
            As the rows are retrieved in chunks progressively, while the cursor
            is being iterated over, it is possible that the actual results
            obtained will reflect changes occurring to the table contents in
            real time.

        Examples:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> # Iterate over results:
            >>> async def loop1():
            ...     async for row in my_async_table.find({"match_id": "challenge6"}):
            ...         print(f"(R:{row['round']}): winner {row['winner']}")
            ...
            >>> asyncio.run(loop1())
            (R:1): winner Donna
            (R:2): winner Erick
            (R:3): winner Fiona
            >>>
            >>> # Optimize bandwidth using a projection:
            >>> proj = {"round": True, "winner": True}
            >>> async def loop2():
            ...     async for row in my_async_table.find(
            ...           {"match_id": "challenge6"},
            ...           projection=proj,
            ...     ):
            ...         print(f"(R:{row['round']}): winner {row['winner']}")
            ...
            >>> asyncio.run(loop2())
            (R:1): winner Donna
            (R:2): winner Erick
            (R:3): winner Fiona
            >>>
            >>> # Filter on the partitioning:
            >>> asyncio.run(
            ...     my_async_table.find({"match_id": "challenge6"}).to_list()
            ... )
            [{'match_id': 'challenge6', 'round': 1, 'fighters': DataAPISet([]), ...
            >>>
            >>> # Filter on primary key:
            >>> asyncio.run(
            ...     my_async_table.find(
            ...         {"match_id": "challenge6", "round": 1}
            ...     ).to_list()
            ... )
            [{'match_id': 'challenge6', 'round': 1, 'fighters': DataAPISet([]), ...
            >>>
            >>> # Filter on a regular indexed column:
            >>> asyncio.run(my_async_table.find({"winner": "Caio Gozer"}).to_list())
            [{'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
            >>>
            >>> # Non-equality filter on a regular indexed column:
            >>> asyncio.run(my_async_table.find({"score": {"$gte": 15}}).to_list())
            [{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
            >>>
            >>> # Filter on a regular non-indexed column:
            >>> # (not recommended performance-wise)
            >>> asyncio.run(my_async_table.find(
            ...     {"when": {
            ...         "$gte": DataAPITimestamp.from_string("1999-12-31T01:23:44Z")
            ...     }}
            ... ).to_list())
            The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
            [{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
            >>>
            >>> # Empty filter (not recommended performance-wise):
            >>> asyncio.run(my_async_table.find({}).to_list())
            The Data API returned a warning: {'errorCode': 'ZERO_FILTER_OPERATIONS', ...
            [{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
            >>>
            >>> # Filter on the primary key and a regular non-indexed column:
            >>> # (not recommended performance-wise)
            >>> asyncio.run(my_async_table.find(
            ...     {"match_id": "fight5", "round": 3, "winner": "Caio Gozer"}
            ... ).to_list())
            The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
            [{'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
            >>>
            >>> # Filter on a regular non-indexed column (and incomplete primary key)
            >>> # (not recommended performance-wise)
            >>> asyncio.run(my_async_table.find(
            ...     {"round": 3, "winner": "Caio Gozer"}
            ... ).to_list())
            The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
            [{'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
            >>>
            >>> # Vector search with "sort" (on an appropriately-indexed vector column):
            >>> asyncio.run(my_async_table.find(
            ...     {},
            ...     sort={"m_vector": DataAPIVector([0.2, 0.3, 0.4])},
            ...     projection={"winner": True},
            ...     limit=3,
            ... ).to_list())
            [{'winner': 'Donna'}, {'winner': 'Victor'}]
            >>>
            >>> # Hybrid search with vector sort and non-vector filtering:
            >>> my_table.find(
            ...     {"match_id": "fight4"},
            ...     sort={"m_vector": DataAPIVector([0.2, 0.3, 0.4])},
            ...     projection={"winner": True},
            ... ).to_list()
            [{'winner': 'Victor'}]
            >>>
            >>> # Return the numeric value of the vector similarity
            >>> # (also demonstrating that one can pass a plain list for a vector):
            >>> asyncio.run(my_async_table.find(
            ...     {},
            ...     sort={"m_vector": [0.2, 0.3, 0.4]},
            ...     projection={"winner": True},
            ...     limit=3,
            ...     include_similarity=True,
            ... ).to_list())
            [{'winner': 'Donna', '$similarity': 0.515}, {'winner': 'Victor', ...
            >>>
            >>> # Non-vector sorting on a 'partitionSort' column:
            >>> asyncio.run(my_async_table.find(
            ...     {"match_id": "fight5"},
            ...     sort={"round": SortMode.DESCENDING},
            ...     projection={"winner": True},
            ... ).to_list())
            [{'winner': 'Caio Gozer'}, {'winner': 'Betta Vigo'}, ...
            >>>
            >>> # Using `skip` and `limit`:
            >>> asyncio.run(my_async_table.find(
            ...     {"match_id": "fight5"},
            ...     sort={"round": SortMode.DESCENDING},
            ...     projection={"winner": True},
            ...     skip=1,
            ...     limit=2,
            ... ).to_list())
            The Data API returned a warning: {'errorCode': 'IN_MEMORY_SORTING...
            [{'winner': 'Betta Vigo'}, {'winner': 'Adam Zuul'}]
            >>>
            >>> # Non-vector sorting on a regular column:
            >>> # (not recommended performance-wise)
            >>> asyncio.run(my_async_table.find(
            ...     {"match_id": "fight5"},
            ...     sort={"winner": SortMode.ASCENDING},
            ...     projection={"winner": True},
            ... ).to_list())
            The Data API returned a warning: {'errorCode': 'IN_MEMORY_SORTING...
            [{'winner': 'Adam Zuul'}, {'winner': 'Betta Vigo'}, ...
            >>>
            >>> # Using `.map()` on a cursor:
            >>> winner_cursor = my_async_table.find(
            ...     {"match_id": "fight5"},
            ...     sort={"round": SortMode.DESCENDING},
            ...     projection={"winner": True},
            ...     limit=5,
            ... )
            >>> print("/".join(asyncio.run(
            ...     winner_cursor.map(lambda row: row["winner"].upper()).to_list())
            ... ))
            CAIO GOZER/BETTA VIGO/ADAM ZUUL
            >>>
            >>> # Some other examples of cursor manipulation
            >>> matches_async_cursor = my_async_table.find(
            ...     sort={"m_vector": DataAPIVector([-0.1, 0.15, 0.3])}
            ... )
            >>> asyncio.run(matches_async_cursor.has_next())
            True
            >>> asyncio.run(matches_async_cursor.__anext__())
            {'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
            >>> matches_async_cursor.consumed
            1
            >>> matches_async_cursor.rewind()
            >>> matches_async_cursor.consumed
            0
            >>> asyncio.run(matches_async_cursor.has_next())
            True
            >>> matches_async_cursor.close()
            >>>
            >>> async def try_consume():
            ...     try:
            ...         await matches_async_cursor.__anext__()
            ...     except StopAsyncIteration:
            ...         print("StopAsyncIteration triggered.")
            ...
            >>> asyncio.run(try_consume())
            StopAsyncIteration triggered.
        """

        # lazy-import here to avoid circular import issues
        from astrapy.cursors import AsyncTableFindCursor

        _request_timeout_ms, _rt_label = _first_valid_timeout(
            (request_timeout_ms, "request_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
        )
        return (
            AsyncTableFindCursor(
                table=self,
                request_timeout_ms=_request_timeout_ms,
                overall_timeout_ms=None,
                request_timeout_label=_rt_label,
            )
            .filter(filter)
            .project(projection)
            .skip(skip)
            .limit(limit)
            .sort(sort)
            .include_similarity(include_similarity)
            .include_sort_vector(include_sort_vector)
        )

    async def find_one(
        self,
        filter: FilterType | None = None,
        *,
        projection: ProjectionType | None = None,
        include_similarity: bool | None = None,
        sort: SortType | None = None,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> ROW | None:
        """
        Run a search according to the given filtering and sorting criteria
        and return the top row matching it, or nothing if there are none.

        The parameters are analogous to some of the parameters to the `find` method
        (which has a few more that do not make sense in this case, such as `limit`).

        Args:
            filter: a dictionary expressing which condition the returned row
                must satisfy. The filter can use operators, such as "$eq" for equality,
                and require columns to compare with literal values. Simple examples
                are `{}` (zero filter), `{"match_no": 123}` (a shorthand for
                `{"match_no": {"$eq": 123}}`, or `{"match_no": 123, "round": "C"}`
                (multiple conditions are implicitly combined with "$and").
                Please consult the Data API documentation for a more detailed
                explanation of table search filters and tips on their usage.
            projection: a prescription on which columns to return for the matching row.
                The projection can take the form `{"column1": True, "column2": True}`.
                `{"*": True}` (i.e. return the whole row), or the complementary
                form that excludes columns: `{"column1": False, "column2": False}`.
                To optimize bandwidth usage, it is recommended to use a projection,
                especially to avoid unnecessary columns of type vector with
                high-dimensional embeddings.
            include_similarity: a boolean to request the numeric value of the
                similarity to be returned as an added "$similarity" key in the returned
                row. It can be used meaningfully only in a vector search (see `sort`).
            sort: this dictionary parameter controls the sorting order, hence determines
                which row is being returned.
                The sort parameter can express either a vector search or
                a regular (ascending/descending, even hierarchical) sorting.
                * For a vector search the parameter takes the form
                `{"vector_column": qv}`, with the query vector `qv` of the appropriate
                type (list of floats or DataAPIVector). If the table has automatic
                embedding generation ("vectorize") enabled on that column, the form
                `{"vectorize_enabled_column": "query text"}` is also valid.
                * In the case of non-vector sorting, the parameter specifies the
                column(s) and the ascending/descending ordering required.
                If multiple columns are provided, the sorting applies them
                hierarchically to the rows. Examples are `{"score": SortMode.ASCENDING}`
                (equivalently `{"score": +1}`), `{"score": +1, "when": -1}`.
                Note that, depending on the column(s) chosen for sorting, the table
                partitioning structure, and the presence of indexes, the sorting
                may be done in-memory by the API. In that case, there may be performance
                implications.
                Consult the Data API documentation for more details on this topic.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a dictionary expressing the result if a row is found, otherwise None.

        Examples:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> from astrapy.constants import SortMode
            >>> from astrapy.data_types import DataAPITimestamp, DataAPIVector
            >>>
            >>> # Filter on the partitioning:
            >>> asyncio.run(my_async_table.find_one({"match_id": "challenge6"}))
            {'match_id': 'challenge6', 'round': 1, 'fighters': DataAPISet([]), ...
            >>>
            >>> # A find with no matches:
            >>> str(asyncio.run(my_async_table.find_one({"match_id": "not_real"})))
            'None'
            >>>
            >>> # Optimize bandwidth using a projection:
            >>> asyncio.run(my_async_table.find_one(
            ...     {"match_id": "challenge6"},
            ...     projection={"round": True, "winner": True},
            ... ))
            {'round': 1, 'winner': 'Donna'}
            >>>
            >>> # Filter on primary key:
            >>> asyncio.run(
            ...     my_async_table.find_one({"match_id": "challenge6", "round": 1})
            ... )
            {'match_id': 'challenge6', 'round': 1, 'fighters': DataAPISet([]), ...
            >>>
            >>> # Filter on a regular indexed column:
            >>> asyncio.run(my_async_table.find_one({"winner": "Caio Gozer"}))
            {'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
            >>>
            >>> # Non-equality filter on a regular indexed column:
            >>> asyncio.run(my_async_table.find_one({"score": {"$gte": 15}}))
            {'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
            >>>
            >>> # Filter on a regular non-indexed column:
            >>> # (not recommended performance-wise)
            >>> asyncio.run(my_async_table.find_one(
            ...     {"when": {
            ...         "$gte": DataAPITimestamp.from_string("1999-12-31T01:23:44Z")
            ...     }}
            ... ))
            The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
            {'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
            >>>
            >>> # Empty filter:
            >>> asyncio.run(my_async_table.find_one({}))
            The Data API returned a warning: {'errorCode': 'ZERO_FILTER_OPERATIONS', ...
            {'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
            >>>
            >>> # Filter on the primary key and a regular non-indexed column:
            >>> # (not recommended performance-wise)
            >>> asyncio.run(my_async_table.find_one(
            ...     {"match_id": "fight5", "round": 3, "winner": "Caio Gozer"}
            ... ))
            The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
            {'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
            >>>
            >>> # Filter on a regular non-indexed column (and incomplete primary key)
            >>> # (not recommended performance-wise)
            >>> asyncio.run(
            ...     my_async_table.find_one({"round": 3, "winner": "Caio Gozer"})
            ... )
            The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
            {'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
            >>>
            >>> # Vector search with "sort" (on an appropriately-indexed vector column):
            >>> asyncio.run(my_async_table.find_one(
            ...     {},
            ...     sort={"m_vector": DataAPIVector([0.2, 0.3, 0.4])},
            ...     projection={"winner": True},
            ... ))
            {'winner': 'Donna'}
            >>>
            >>> # Hybrid search with vector sort and non-vector filtering:
            >>> asyncio.run(my_table.find_one(
            ...     {"match_id": "fight4"},
            ...     sort={"m_vector": DataAPIVector([0.2, 0.3, 0.4])},
            ...     projection={"winner": True},
            ... ))
            {'winner': 'Victor'}
            >>>
            >>> # Return the numeric value of the vector similarity
            >>> # (also demonstrating that one can pass a plain list for a vector):
            >>> asyncio.run(my_async_table.find_one(
            ...     {},
            ...     sort={"m_vector": [0.2, 0.3, 0.4]},
            ...     projection={"winner": True},
            ...     include_similarity=True,
            ... ))
            {'winner': 'Donna', '$similarity': 0.515}
            >>>
            >>> # Non-vector sorting on a 'partitionSort' column:
            >>> asyncio.run(my_async_table.find_one(
            ...     {"match_id": "fight5"},
            ...     sort={"round": SortMode.DESCENDING},
            ...     projection={"winner": True},
            ... ))
            {'winner': 'Caio Gozer'}
            >>>
            >>> # Non-vector sorting on a regular column:
            >>> # (not recommended performance-wise)
            >>> asyncio.run(my_async_table.find_one(
            ...     {"match_id": "fight5"},
            ...     sort={"winner": SortMode.ASCENDING},
            ...     projection={"winner": True},
            ... ))
            The Data API returned a warning: {'errorCode': 'IN_MEMORY_SORTING...
            {'winner': 'Adam Zuul'}
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        fo_options = (
            None
            if include_similarity is None
            else {"includeSimilarity": include_similarity}
        )
        fo_payload = self._converter_agent.preprocess_payload(
            {
                "findOne": {
                    k: v
                    for k, v in {
                        "filter": filter,
                        "projection": normalize_optional_projection(projection),
                        "options": fo_options,
                        "sort": sort,
                    }.items()
                    if v is not None
                }
            }
        )
        fo_response = await self._api_commander.async_request(
            payload=fo_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        if "document" not in (fo_response.get("data") or {}):
            raise UnexpectedDataAPIResponseException(
                text="Response from findOne API command missing 'document'.",
                raw_response=fo_response,
            )
        if "projectionSchema" not in (fo_response.get("status") or {}):
            raise UnexpectedDataAPIResponseException(
                text="Response from findOne API command missing 'projectionSchema'.",
                raw_response=fo_response,
            )
        doc_response = fo_response["data"]["document"]
        if doc_response is None:
            return None
        return self._converter_agent.postprocess_row(
            fo_response["data"]["document"],
            columns_dict=fo_response["status"]["projectionSchema"],
            similarity_pseudocolumn="$similarity" if include_similarity else None,
        )

    async def distinct(
        self,
        key: str,
        *,
        filter: FilterType | None = None,
        request_timeout_ms: int | None = None,
        general_method_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> list[Any]:
        """
        Return a list of the unique values of `key` across the rows
        in the table that match the provided filter.

        Args:
            key: the name of the field whose value is inspected across rows.
                Keys are typically just column names, although they can use
                the dot notation to select particular entries in map columns.
                For set and list columns, individual entries are "unrolled"
                automatically; in particular, for lists, numeric indices
                can be used in the key dot-notation syntax.
                Example of acceptable `key` values:
                    "a_column"
                    "map_column.map_key"
                    "list_column.2"
            filter: a dictionary expressing which condition the inspected rows
                must satisfy. The filter can use operators, such as "$eq" for equality,
                and require columns to compare with literal values. Simple examples
                are `{}` (zero filter), `{"match_no": 123}` (a shorthand for
                `{"match_no": {"$eq": 123}}`, or `{"match_no": 123, "round": "C"}`
                (multiple conditions are implicitly combined with "$and").
                Please consult the Data API documentation for a more detailed
                explanation of table search filters and tips on their usage.
            general_method_timeout_ms: a timeout, in milliseconds, for the whole
                requested operation (which may involve multiple API requests).
                This method, being based on `find` (see) may entail successive HTTP API
                requests, depending on the amount of involved rows.
                If not provided, this object's defaults apply.
            request_timeout_ms: a timeout, in milliseconds, for each API request.
                If not provided, this object's defaults apply.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a list of all different values for `key` found across the rows
            that match the filter. The result list has no repeated items.

        Examples:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> asyncio.run(my_async_table.distinct(
            ...     "winner",
            ...     filter={"match_id": "challenge6"},
            ... ))
            ['Donna', 'Erick', 'Fiona']
            >>>
            >>> # distinct values across the whole table:
            >>> # (not recommended performance-wise)
            >>> asyncio.run(my_async_table.distinct("winner"))
            The Data API returned a warning: {'errorCode': 'ZERO_FILTER_OPERATIONS', ...
            ['Victor', 'Adam Zuul', 'Betta Vigo', 'Caio Gozer', 'Donna', 'Erick', ...
            >>>
            >>> # Over a column containing null values
            >>> # (also with composite filter):
            >>> asyncio.run(my_async_table.distinct(
            ...     "score",
            ...     filter={"match_id": {"$in": ["fight4", "tournamentA"]}},
            ... ))
            [18, None]
            >>>
            >>> # distinct over a set column (automatically "unrolled"):
            >>> asyncio.run(my_async_table.distinct(
            ...     "fighters",
            ...     filter={"match_id": {"$in": ["fight4", "tournamentA"]}},
            ... ))
            [UUID('0193539a-2770-8c09-a32a-111111111111'), UUID('019353e3-00b4-...

        Note:
            It must be kept in mind that `distinct` is a client-side operation,
            which effectively browses all required rows using the logic
            of the `find` method and collects the unique values found for `key`.
            As such, there may be performance, latency and ultimately
            billing implications if the amount of matching rows is large.

        Note:
            For details on the behaviour of "distinct" in conjunction with
            real-time changes in the table contents, see the
            Note of the `find` command.
        """

        # lazy-import here to avoid circular import issues
        from astrapy.cursors import AsyncTableFindCursor

        _general_method_timeout_ms, _gmt_label = _first_valid_timeout(
            (general_method_timeout_ms, "general_method_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (
                self.api_options.timeout_options.general_method_timeout_ms,
                "general_method_timeout_ms",
            ),
        )
        _request_timeout_ms, _rt_label = _first_valid_timeout(
            (request_timeout_ms, "request_timeout_ms"),
            (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
        )
        # preparing cursor:
        _extractor = _create_document_key_extractor(key)
        _key = _reduce_distinct_key_to_shallow_safe(key)
        if _key == "":
            raise ValueError(
                "The 'key' parameter for distinct cannot be empty "
                "or start with a list index."
            )
        # relaxing the type hint (limited to within this method body)
        f_cursor: AsyncTableFindCursor[dict[str, Any], dict[str, Any]] = (
            AsyncTableFindCursor(
                table=self,
                request_timeout_ms=_request_timeout_ms,
                overall_timeout_ms=_general_method_timeout_ms,
                request_timeout_label=_rt_label,
                overall_timeout_label=_gmt_label,
            )  # type: ignore[assignment]
            .filter(filter)
            .project({_key: True})
        )
        # consuming it:
        _item_hashes = set()
        distinct_items: list[Any] = []
        logger.info(f"running distinct() on '{self.name}'")
        async for document in f_cursor:
            for item in _extractor(document):
                _item_hash = _hash_document(
                    item, options=self.api_options.serdes_options
                )
                if _item_hash not in _item_hashes:
                    _item_hashes.add(_item_hash)
                    distinct_items.append(item)
        logger.info(f"finished running distinct() on '{self.name}'")
        return distinct_items

    async def count_documents(
        self,
        filter: FilterType,
        *,
        upper_bound: int,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> int:
        """
        Count the row in the table matching the specified filter.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"name": "John", "age": 59}
                    {"$and": [{"name": {"$eq": "John"}}, {"age": {"$gt": 58}}]}
                See the Data API documentation for the full set of operators.
            upper_bound: a required ceiling on the result of the count operation.
                If the actual number of rows exceeds this value,
                an exception will be raised.
                Furthermore, if the actual number of rows exceeds the maximum
                count that the Data API can reach (regardless of upper_bound),
                an exception will be raised.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            the exact count of matching rows.

        Examples:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> asyncio.run(my_async_table.insert_many([{"seq": i} for i in range(20)]))
            TableInsertManyResult(...)
            >>> asyncio.run(my_async_table.count_documents({}, upper_bound=100))
            20
            >>> asyncio.run(my_async_table.count_documents({"seq":{"$gt": 15}}, upper_bound=100))
            4
            >>> asyncio.run(my_async_table.count_documents({}, upper_bound=10))
            Traceback (most recent call last):
                ... ...
            astrapy.exceptions.TooManyRowsToCountException

        Note:
            Count operations are expensive: for this reason, the best practice
            is to provide a reasonable `upper_bound` according to the caller
            expectations. Moreover, indiscriminate usage of count operations
            for sizeable amounts of rows (i.e. in the thousands and more)
            is discouraged in favor of alternative application-specific solutions.
            Keep in mind that the Data API has a hard upper limit on the amount
            of rows it will count, and that an exception will be thrown
            by this method if this limit is encountered.
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        cd_payload = {"countDocuments": {"filter": filter}}
        logger.info(f"countDocuments on '{self.name}'")
        cd_response = await self._api_commander.async_request(
            payload=cd_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished countDocuments on '{self.name}'")
        if "count" in cd_response.get("status", {}):
            count: int = cd_response["status"]["count"]
            if cd_response["status"].get("moreData", False):
                raise TooManyRowsToCountException(
                    text=f"Document count exceeds {count}, the maximum allowed by the server",
                    server_max_count_exceeded=True,
                )
            else:
                if count > upper_bound:
                    raise TooManyRowsToCountException(
                        text="Document count exceeds required upper bound",
                        server_max_count_exceeded=False,
                    )
                else:
                    return count
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from countDocuments API command.",
                raw_response=cd_response,
            )

    async def estimated_document_count(
        self,
        *,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> int:
        """
        Query the API server for an estimate of the document count in the table.

        Contrary to `count_documents`, this method has no filtering parameters.

        Args:
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a server-provided estimate count of the documents in the table.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> asyncio.run(my_async_table.estimated_document_count())
            5820
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        ed_payload: dict[str, Any] = {"estimatedDocumentCount": {}}
        logger.info(f"estimatedDocumentCount on '{self.name}'")
        ed_response = await self._api_commander.async_request(
            payload=ed_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished estimatedDocumentCount on '{self.name}'")
        if "count" in ed_response.get("status", {}):
            count: int = ed_response["status"]["count"]
            return count
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from estimatedDocumentCount API command.",
                raw_response=ed_response,
            )

    async def update_one(
        self,
        filter: FilterType,
        update: dict[str, Any],
        *,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Update a single document on the table, changing some or all of the columns,
        with the implicit behaviour of inserting a new row if no match is found.

        Args:
            filter: a predicate expressing the table primary key in full,
                i.e. a dictionary defining values for all columns that form the
                primary key. An example may be `{"match_id": "fight4", "round": 1}`.
            update: the update prescription to apply to the row, expressed
                as a dictionary conforming to the Data API syntax. The update
                operators for tables are `$set` and `$unset` (in particular,
                setting a column to None has the same effect as the $unset operator).
                Examples are `{"$set": {"round": 12}}` and
                `{"$unset": {"winner": "", "score": ""}}`.
                Note that the update operation cannot alter the primary key columns.
                See the Data API documentation for more details.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Examples:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> from astrapy.data_types import DataAPISet
            >>>
            >>> # Set a new value for a column
            >>> await my_async_table.update_one(
            ...     {"match_id": "fight4", "round": 1},
            ...     update={"$set": {"winner": "Winona"}},
            ... )
            >>>
            >>> # Set a new value for a column while unsetting another colum
            >>> await my_async_table.update_one(
            ...     {"match_id": "fight4", "round": 1},
            ...     update={"$set": {"winner": None, "score": 24}},
            ... )
            >>>
            >>> # Set a 'set' column to empty
            >>> await my_async_table.update_one(
            ...     {"match_id": "fight4", "round": 1},
            ...     update={"$set": {"fighters": DataAPISet()}},
            ... )
            >>>
            >>> # Set a 'set' column to empty using None
            >>> await my_async_table.update_one(
            ...     {"match_id": "fight4", "round": 1},
            ...     update={"$set": {"fighters": None}},
            ... )
            >>>
            >>> # Set a 'set' column to empty using a regular (empty) set
            >>> await my_async_table.update_one(
            ...     {"match_id": "fight4", "round": 1},
            ...     update={"$set": {"fighters": set()}},
            ... )
            >>>
            >>> # Set a 'set' column to empty using $unset
            >>> await my_async_table.update_one(
            ...     {"match_id": "fight4", "round": 1},
            ...     update={"$unset": {"fighters": None}},
            ... )
            >>>
            >>> # A non-existing primary key creates a new row
            >>> await my_async_table.update_one(
            ...     {"match_id": "bar_fight", "round": 4},
            ...     update={"$set": {"score": 8, "winner": "Jack"}},
            ... )
            >>>
            >>> # Delete column values for a row (they'll read as None now)
            >>> await my_async_table.update_one(
            ...     {"match_id": "challenge6", "round": 2},
            ...     update={"$unset": {"winner": None, "score": None}},
            ... )

        Note:
            a row created entirely with update operations (as opposed to insertions)
            may, correspondingly, be deleted by means of an $unset update on all columns.
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        uo_payload = {
            "updateOne": {
                k: v
                for k, v in {
                    "filter": filter,
                    "update": self._converter_agent.preprocess_payload(update),
                }.items()
                if v is not None
            }
        }
        logger.info(f"updateOne on '{self.name}'")
        uo_response = await self._api_commander.async_request(
            payload=uo_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished updateOne on '{self.name}'")
        if "status" in uo_response:
            # the contents are disregarded and the method just returns:
            return
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from updateOne API command.",
                raw_response=uo_response,
            )

    async def delete_one(
        self,
        filter: FilterType,
        *,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Delete a row, matching the provided value of the primary key.
        If no row is found with that primary key, the method does nothing.

        Args:
            filter: a predicate expressing the table primary key in full,
                i.e. a dictionary defining values for all columns that form the
                primary key. A row (at most one) is deleted if it matches that primary
                key. An example filter may be `{"match_id": "fight4", "round": 1}`.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Examples:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> # Count the rows matching a certain filter
            >>> len(asyncio.run(my_async_table.find({"match_id": "fight7"}).to_list()))
            3
            >>>
            >>> # Delete a row belonging to the group
            >>> asyncio.run(
            ...     my_async_table.delete_one({"match_id": "fight7", "round": 2})
            ... )
            >>>
            >>> # Count again
            >>> len(asyncio.run(my_async_table.find({"match_id": "fight7"}).to_list()))
            2
            >>>
            >>> # Attempt the delete again (nothing to delete)
            >>> asyncio.run(
            ...     my_async_table.delete_one({"match_id": "fight7", "round": 2})
            ... )
            >>>
            >>> # The count is unchanged
            >>> len(asyncio.run(my_async_table.find({"match_id": "fight7"}).to_list()))
            2
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        do_payload = self._converter_agent.preprocess_payload(
            {
                "deleteOne": {
                    k: v
                    for k, v in {
                        "filter": filter,
                    }.items()
                    if v is not None
                }
            }
        )
        logger.info(f"deleteOne on '{self.name}'")
        do_response = await self._api_commander.async_request(
            payload=do_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished deleteOne on '{self.name}'")
        if do_response.get("status", {}).get("deletedCount") == -1:
            return
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from deleteOne API command.",
                raw_response=do_response,
            )

    async def delete_many(
        self,
        filter: FilterType,
        *,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Delete all rows matching a provided filter condition.
        This operation can target from a single row to the entirety of the table.

        Args:
            filter: a filter dictionary to specify which row(s) must be deleted.
                1. If the filter is in the form `{"pk1": val1, "pk2": val2 ...}`
                and specified the primary key in full, at most one row is deleted,
                the one with that primary key.
                2. If the table has "partitionSort" columns, some or all of them
                may be left out (the least significant of them can also employ
                an inequality, or range, predicate): a range of rows, but always
                within a single partition, will be deleted.
                3. If an empty filter, `{}`, is passed, this operation empties
                the table completely. *USE WITH CARE*.
                4. Other kinds of filtering clauses are forbidden.
                In the following examples, the table is partitioned
                by columns ["pa1", "pa2"] and has partitionSort "ps1" and "ps2" in that
                order.
                Valid filter examples:
                - `{"pa1": x, "pa2": y, "ps1": z, "ps2": t}`: deletes one row
                - `{"pa1": x, "pa2": y, "ps1": z}`: deletes multiple rows
                - `{"pa1": x, "pa2": y, "ps1": z, "ps2": {"$lt": q}}`: del. multiple rows
                - `{"pa1": x, "pa2": y}`: deletes all rows in the partition
                - `{}`: empties the table (*CAUTION*)
                Invalid filter examples:
                - `{"pa1": x}`: incomplete partition key
                - `{"pa1": x, "ps1" z}`: incomplete partition key (whatever is added)
                - `{"pa1": x, "pa2": y, "ps1": {"$lt": r}, "ps2": t}`: inequality on
                  a non-least-significant partitionSort column provided.
                - `{"pa1": x, "pa2": y, "ps2": t}`: cannot skip "ps1"
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Examples:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> # Delete a single row (full primary key specified):
            >>> await my_async_table.delete_many({"match_id": "fight4", "round": 1})
            >>>
            >>> # Delete part of a partition (inequality on the
            >>> # last-mentioned 'partitionSort' column):
            >>> await my_async_table.delete_many({"match_id": "fight5", "round": {"$gte": 5}})
            >>>
            >>> # Delete a whole partition (leave 'partitionSort' unspecified):
            >>> await my_async_table.delete_many({"match_id": "fight7"})
            >>>
            >>> # empty the table entirely with empty filter (*CAUTION*):
            >>> await my_async_table.delete_many({})
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        dm_payload = self._converter_agent.preprocess_payload(
            {
                "deleteMany": {
                    k: v
                    for k, v in {
                        "filter": filter,
                    }.items()
                    if v is not None
                }
            }
        )
        logger.info(f"deleteMany on '{self.name}'")
        dm_response = await self._api_commander.async_request(
            payload=dm_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished deleteMany on '{self.name}'")
        if dm_response.get("status", {}).get("deletedCount") == -1:
            return
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from deleteMany API command.",
                raw_response=dm_response,
            )

    async def drop(
        self,
        *,
        if_exists: bool | None = None,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> dict[str, Any]:
        """
        Drop the table, i.e. delete it from the database along with
        all the rows stored therein.

        Args:
            if_exists: if passed as True, trying to drop a non-existing table
                will not error, just silently do nothing instead. If not provided,
                the API default behaviour will hold.
            table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `table_admin_timeout_ms`.
            timeout_ms: an alias for `table_admin_timeout_ms`.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> # List tables:
            >>> asyncio.run(my_async_table.database.list_table_names())
            ['games']
            >>>
            >>> # Drop this table:
            >>> asyncio.run(my_table.drop())
            >>>
            >>> # List tables again:
            >>> asyncio.run(my_table.database.list_table_names())
            []
            >>>
            >>> # Try working on the table now:
            >>> from astrapy.exceptions import DataAPIResponseException
            >>>
            >>> async def try_use_table():
            ...     try:
            ...         my_table.find_one({})
            ...     except DataAPIResponseException as err:
            ...         print(str(err))
            ...
            >>> asyncio.run(try_use_table())
            Collection does not exist [...] (COLLECTION_NOT_EXIST)

        Note:
            Use with caution.

        Note:
            Once the method succeeds, methods on this object can still be invoked:
            however, this hardly makes sense as the underlying actual table
            is no more.
            It is responsibility of the developer to design a correct flow
            which avoids using a deceased collection any further.
        """

        logger.info(f"dropping table '{self.name}' (self)")
        drop_result = await self.database.drop_table(
            self.name,
            if_exists=if_exists,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info(f"finished dropping table '{self.name}' (self)")
        return drop_result

    async def command(
        self,
        body: dict[str, Any] | None,
        *,
        raise_api_errors: bool = True,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> dict[str, Any]:
        """
        Send a POST request to the Data API for this table with
        an arbitrary, caller-provided payload.
        No transformations or type conversions are made on the provided payload.

        Args:
            body: a JSON-serializable dictionary, the payload of the request.
            raise_api_errors: if True, responses with a nonempty 'errors' field
                result in an astrapy exception being raised.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a dictionary with the response of the HTTP request.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> asyncio.run(my_async_table.command({
            ...     "findOne": {
            ...         "filter": {"match_id": "fight4"},
            ...         "projection": {"winner": True},
            ...     }
            ... }))
            {'data': {'document': {'winner': 'Victor'}}, 'status': ...  # shortened
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        _cmd_desc: str
        if body:
            _cmd_desc = ",".join(sorted(body.keys()))
        else:
            _cmd_desc = "(none)"
        logger.info(f"command={_cmd_desc} on '{self.name}'")
        command_result = await self._api_commander.async_request(
            payload=body,
            raise_api_errors=raise_api_errors,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished command={_cmd_desc} on '{self.name}'")
        return command_result

Ancestors

  • typing.Generic

Instance variables

var databaseAsyncDatabase

a Database object, the database this table belongs to.

Example

>>> my_async_table.database.name
'the_db'
Expand source code
@property
def database(self) -> AsyncDatabase:
    """
    a Database object, the database this table belongs to.

    Example:
        >>> my_async_table.database.name
        'the_db'
    """

    return self._database
var full_name : str

The fully-qualified table name within the database, in the form "keyspace.table_name".

Example

>>> my_async_table.full_name
'default_keyspace.my_table'
Expand source code
@property
def full_name(self) -> str:
    """
    The fully-qualified table name within the database,
    in the form "keyspace.table_name".

    Example:
        >>> my_async_table.full_name
        'default_keyspace.my_table'
    """

    return f"{self.keyspace}.{self.name}"
var keyspace : str

The keyspace this table is in.

Example

>>> my_async_table.keyspace
'default_keyspace'
Expand source code
@property
def keyspace(self) -> str:
    """
    The keyspace this table is in.

    Example:
        >>> my_async_table.keyspace
        'default_keyspace'
    """

    _keyspace = self.database.keyspace
    if _keyspace is None:
        raise ValueError("The table's DB is set with keyspace=None")
    return _keyspace
var name : str

The name of this table.

Example

>>> my_async_table.name
'my_table'
Expand source code
@property
def name(self) -> str:
    """
    The name of this table.

    Example:
        >>> my_async_table.name
        'my_table'
    """

    return self._name

Methods

async def alter(self, operation: AlterTableOperation | dict[str, Any], *, row_type: type[Any] = dict[str, typing.Any], table_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> AsyncTable[~NEW_ROW]

Executes one of the available alter-table operations on this table, such as adding/dropping columns.

This is a blocking operation: the method returns once the index is created and ready to use.

Args

operation
an instance of one of the astrapy.info.AlterTable* classes, representing which alter operation to perform and the details thereof. A regular dictionary can also be provided, but then it must have the alter operation name at its top level: {"add": {"columns": …}}.
row_type
this parameter acts a formal specifier for the type checker. If omitted, the resulting AsyncTable is implicitly an AsyncTable[dict[str, Any]]. If provided, it must match the type hint specified in the assignment. See the examples below.
table_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for table_admin_timeout_ms.
timeout_ms
an alias for table_admin_timeout_ms.

Examples

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> from astrapy.info import (
...     AlterTableAddColumns,
...     AlterTableAddVectorize,
...     AlterTableDropColumns,
...     AlterTableDropVectorize,
...     ColumnType,
...     TableScalarColumnTypeDescriptor,
...     VectorServiceOptions,
... )
>>>
>>> # Add a column
>>> new_table_1 = await my_table.alter(
...     AlterTableAddColumns(
...         columns={
...             "tie_break": TableScalarColumnTypeDescriptor(
...                 column_type=ColumnType.BOOLEAN,
...             ),
...         }
...     )
... )
>>>
>>> # Drop a column
>>> new_table_2 = await new_table_1.alter(AlterTableDropColumns(
...     columns=["tie_break"]
... ))
>>>
>>> # Add vectorize to a (vector) column
>>> new_table_3 = await new_table_2.alter(
...     AlterTableAddVectorize(
...         columns={
...             "m_vector": VectorServiceOptions(
...                 provider="openai",
...                 model_name="text-embedding-3-small",
...                 authentication={
...                     "providerKey": "ASTRA_KMS_API_KEY_NAME",
...                 },
...             ),
...         }
...     )
... )
>>>
>>> # Drop vectorize from a (vector) column
>>> # (Also demonstrates type hint usage)
>>> from typing import TypedDict
>>> from astrapy import AsyncTable
>>> from astrapy.data_types import (
...     DataAPISet,
...     DataAPITimestamp,
...     DataAPIVector,
... )
>>> from astrapy.ids import UUID
>>>
>>> class MyMatch(TypedDict):
...     match_id: str
...     round: int
...     m_vector: DataAPIVector
...     score: int
...     when: DataAPITimestamp
...     winner: str
...     fighters: DataAPISet[UUID]
...
>>> new_table_4: AsyncTable[MyMatch] = await new_table_3.alter(
...     AlterTableDropVectorize(columns=["m_vector"]),
...     row_type=MyMatch,
... )
Expand source code
async def alter(
    self,
    operation: AlterTableOperation | dict[str, Any],
    *,
    row_type: type[Any] = DefaultRowType,
    table_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> AsyncTable[NEW_ROW]:
    """
    Executes one of the available alter-table operations on this table,
    such as adding/dropping columns.

    This is a blocking operation: the method returns once the index
    is created and ready to use.

    Args:
        operation: an instance of one of the `astrapy.info.AlterTable*` classes,
            representing which alter operation to perform and the details thereof.
            A regular dictionary can also be provided, but then it must have the
            alter operation name at its top level: {"add": {"columns": ...}}.
        row_type: this parameter acts a formal specifier for the type checker.
            If omitted, the resulting AsyncTable is implicitly
            an `AsyncTable[dict[str, Any]]`. If provided, it must match
            the type hint specified in the assignment.
            See the examples below.
        table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `table_admin_timeout_ms`.
        timeout_ms: an alias for `table_admin_timeout_ms`.

    Examples:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> from astrapy.info import (
        ...     AlterTableAddColumns,
        ...     AlterTableAddVectorize,
        ...     AlterTableDropColumns,
        ...     AlterTableDropVectorize,
        ...     ColumnType,
        ...     TableScalarColumnTypeDescriptor,
        ...     VectorServiceOptions,
        ... )
        >>>
        >>> # Add a column
        >>> new_table_1 = await my_table.alter(
        ...     AlterTableAddColumns(
        ...         columns={
        ...             "tie_break": TableScalarColumnTypeDescriptor(
        ...                 column_type=ColumnType.BOOLEAN,
        ...             ),
        ...         }
        ...     )
        ... )
        >>>
        >>> # Drop a column
        >>> new_table_2 = await new_table_1.alter(AlterTableDropColumns(
        ...     columns=["tie_break"]
        ... ))
        >>>
        >>> # Add vectorize to a (vector) column
        >>> new_table_3 = await new_table_2.alter(
        ...     AlterTableAddVectorize(
        ...         columns={
        ...             "m_vector": VectorServiceOptions(
        ...                 provider="openai",
        ...                 model_name="text-embedding-3-small",
        ...                 authentication={
        ...                     "providerKey": "ASTRA_KMS_API_KEY_NAME",
        ...                 },
        ...             ),
        ...         }
        ...     )
        ... )
        >>>
        >>> # Drop vectorize from a (vector) column
        >>> # (Also demonstrates type hint usage)
        >>> from typing import TypedDict
        >>> from astrapy import AsyncTable
        >>> from astrapy.data_types import (
        ...     DataAPISet,
        ...     DataAPITimestamp,
        ...     DataAPIVector,
        ... )
        >>> from astrapy.ids import UUID
        >>>
        >>> class MyMatch(TypedDict):
        ...     match_id: str
        ...     round: int
        ...     m_vector: DataAPIVector
        ...     score: int
        ...     when: DataAPITimestamp
        ...     winner: str
        ...     fighters: DataAPISet[UUID]
        ...
        >>> new_table_4: AsyncTable[MyMatch] = await new_table_3.alter(
        ...     AlterTableDropVectorize(columns=["m_vector"]),
        ...     row_type=MyMatch,
        ... )
    """

    n_operation: AlterTableOperation
    if isinstance(operation, AlterTableOperation):
        n_operation = operation
    else:
        n_operation = AlterTableOperation.from_full_dict(operation)
    _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
        timeout_options=self.api_options.timeout_options,
        table_admin_timeout_ms=table_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    at_operation_name = n_operation._name
    at_payload = {
        "alterTable": {
            "operation": {
                at_operation_name: n_operation.as_dict(),
            },
        },
    }
    logger.info(f"alterTable({at_operation_name})")
    at_response = await self._api_commander.async_request(
        payload=at_payload,
        timeout_context=_TimeoutContext(
            request_ms=_table_admin_timeout_ms, label=_ta_label
        ),
    )
    if at_response.get("status") != {"ok": 1}:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from alterTable API command.",
            raw_response=at_response,
        )
    logger.info(f"finished alterTable({at_operation_name})")
    return AsyncTable(
        database=self.database,
        name=self.name,
        keyspace=self.keyspace,
        api_options=self.api_options,
    )
async def command(self, body: dict[str, Any] | None, *, raise_api_errors: bool = True, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> dict[str, typing.Any]

Send a POST request to the Data API for this table with an arbitrary, caller-provided payload. No transformations or type conversions are made on the provided payload.

Args

body
a JSON-serializable dictionary, the payload of the request.
raise_api_errors
if True, responses with a nonempty 'errors' field result in an astrapy exception being raised.
general_method_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for general_method_timeout_ms.
timeout_ms
an alias for general_method_timeout_ms.

Returns

a dictionary with the response of the HTTP request.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> asyncio.run(my_async_table.command({
...     "findOne": {
...         "filter": {"match_id": "fight4"},
...         "projection": {"winner": True},
...     }
... }))
{'data': {'document': {'winner': 'Victor'}}, 'status': ...  # shortened
Expand source code
async def command(
    self,
    body: dict[str, Any] | None,
    *,
    raise_api_errors: bool = True,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> dict[str, Any]:
    """
    Send a POST request to the Data API for this table with
    an arbitrary, caller-provided payload.
    No transformations or type conversions are made on the provided payload.

    Args:
        body: a JSON-serializable dictionary, the payload of the request.
        raise_api_errors: if True, responses with a nonempty 'errors' field
            result in an astrapy exception being raised.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a dictionary with the response of the HTTP request.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> asyncio.run(my_async_table.command({
        ...     "findOne": {
        ...         "filter": {"match_id": "fight4"},
        ...         "projection": {"winner": True},
        ...     }
        ... }))
        {'data': {'document': {'winner': 'Victor'}}, 'status': ...  # shortened
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    _cmd_desc: str
    if body:
        _cmd_desc = ",".join(sorted(body.keys()))
    else:
        _cmd_desc = "(none)"
    logger.info(f"command={_cmd_desc} on '{self.name}'")
    command_result = await self._api_commander.async_request(
        payload=body,
        raise_api_errors=raise_api_errors,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished command={_cmd_desc} on '{self.name}'")
    return command_result
async def count_documents(self, filter: FilterType, *, upper_bound: int, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> int

Count the row in the table matching the specified filter.

Args

filter
a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"name": "John", "age": 59} {"$and": [{"name": {"$eq": "John"}}, {"age": {"$gt": 58}}]} See the Data API documentation for the full set of operators.
upper_bound
a required ceiling on the result of the count operation. If the actual number of rows exceeds this value, an exception will be raised. Furthermore, if the actual number of rows exceeds the maximum count that the Data API can reach (regardless of upper_bound), an exception will be raised.
general_method_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for general_method_timeout_ms.
timeout_ms
an alias for general_method_timeout_ms.

Returns

the exact count of matching rows.

Examples

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> asyncio.run(my_async_table.insert_many([{"seq": i} for i in range(20)]))
TableInsertManyResult(...)
>>> asyncio.run(my_async_table.count_documents({}, upper_bound=100))
20
>>> asyncio.run(my_async_table.count_documents({"seq":{"$gt": 15}}, upper_bound=100))
4
>>> asyncio.run(my_async_table.count_documents({}, upper_bound=10))
Traceback (most recent call last):
    ... ...
astrapy.exceptions.TooManyRowsToCountException

Note

Count operations are expensive: for this reason, the best practice is to provide a reasonable upper_bound according to the caller expectations. Moreover, indiscriminate usage of count operations for sizeable amounts of rows (i.e. in the thousands and more) is discouraged in favor of alternative application-specific solutions. Keep in mind that the Data API has a hard upper limit on the amount of rows it will count, and that an exception will be thrown by this method if this limit is encountered.

Expand source code
async def count_documents(
    self,
    filter: FilterType,
    *,
    upper_bound: int,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> int:
    """
    Count the row in the table matching the specified filter.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"name": "John", "age": 59}
                {"$and": [{"name": {"$eq": "John"}}, {"age": {"$gt": 58}}]}
            See the Data API documentation for the full set of operators.
        upper_bound: a required ceiling on the result of the count operation.
            If the actual number of rows exceeds this value,
            an exception will be raised.
            Furthermore, if the actual number of rows exceeds the maximum
            count that the Data API can reach (regardless of upper_bound),
            an exception will be raised.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        the exact count of matching rows.

    Examples:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> asyncio.run(my_async_table.insert_many([{"seq": i} for i in range(20)]))
        TableInsertManyResult(...)
        >>> asyncio.run(my_async_table.count_documents({}, upper_bound=100))
        20
        >>> asyncio.run(my_async_table.count_documents({"seq":{"$gt": 15}}, upper_bound=100))
        4
        >>> asyncio.run(my_async_table.count_documents({}, upper_bound=10))
        Traceback (most recent call last):
            ... ...
        astrapy.exceptions.TooManyRowsToCountException

    Note:
        Count operations are expensive: for this reason, the best practice
        is to provide a reasonable `upper_bound` according to the caller
        expectations. Moreover, indiscriminate usage of count operations
        for sizeable amounts of rows (i.e. in the thousands and more)
        is discouraged in favor of alternative application-specific solutions.
        Keep in mind that the Data API has a hard upper limit on the amount
        of rows it will count, and that an exception will be thrown
        by this method if this limit is encountered.
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    cd_payload = {"countDocuments": {"filter": filter}}
    logger.info(f"countDocuments on '{self.name}'")
    cd_response = await self._api_commander.async_request(
        payload=cd_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished countDocuments on '{self.name}'")
    if "count" in cd_response.get("status", {}):
        count: int = cd_response["status"]["count"]
        if cd_response["status"].get("moreData", False):
            raise TooManyRowsToCountException(
                text=f"Document count exceeds {count}, the maximum allowed by the server",
                server_max_count_exceeded=True,
            )
        else:
            if count > upper_bound:
                raise TooManyRowsToCountException(
                    text="Document count exceeds required upper bound",
                    server_max_count_exceeded=False,
                )
            else:
                return count
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from countDocuments API command.",
            raw_response=cd_response,
        )
async def create_index(self, name: str, *, column: str, options: TableIndexOptions | dict[str, Any] | None = None, if_not_exists: bool | None = None, table_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> None

Create an index on a non-vector column of the table.

This is a blocking operation: the method returns once the index is created and ready to use.

For creation of a vector index, see method create_vector_index instead.

Args

name
the name of the index. Index names must be unique across the keyspace.
column
the table column on which the index is to be created.
options
if passed, it must be an instance of TableIndexOptions, or an equivalent dictionary, which specifies index settings such as – for a text column – case-sensitivity and so on. See the TableIndexOptions class for more details.
if_not_exists
if set to True, the command will succeed even if an index with the specified name already exists (in which case no actual index creation takes place on the database). The API default of False means that an error is raised by the API in case of name collision.
table_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for table_admin_timeout_ms.
timeout_ms
an alias for table_admin_timeout_ms.

Examples

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> from astrapy.info import TableIndexOptions
>>>
>>> # create an index on a column
>>> await my_async_table.create_index(
...     "score_index",
...     column="score",
... )
>>>
>>> # create an index on a textual column, specifying indexing options
>>> await my_async_table.create_index(
...     "winner_index",
...     column="winner",
...     options=TableIndexOptions(
...         ascii=False,
...         normalize=True,
...         case_sensitive=False,
...     ),
... )
Expand source code
async def create_index(
    self,
    name: str,
    *,
    column: str,
    options: TableIndexOptions | dict[str, Any] | None = None,
    if_not_exists: bool | None = None,
    table_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Create an index on a non-vector column of the table.

    This is a blocking operation: the method returns once the index
    is created and ready to use.

    For creation of a vector index, see method `create_vector_index` instead.

    Args:
        name: the name of the index. Index names must be unique across the keyspace.
        column: the table column on which the index is to be created.
        options: if passed, it must be an instance of `TableIndexOptions`,
            or an equivalent dictionary, which specifies index settings
            such as -- for a text column -- case-sensitivity and so on.
            See the `astrapy.info.TableIndexOptions` class for more details.
        if_not_exists: if set to True, the command will succeed even if an index
            with the specified name already exists (in which case no actual
            index creation takes place on the database). The API default of False
            means that an error is raised by the API in case of name collision.
        table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `table_admin_timeout_ms`.
        timeout_ms: an alias for `table_admin_timeout_ms`.

    Examples:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> from astrapy.info import TableIndexOptions
        >>>
        >>> # create an index on a column
        >>> await my_async_table.create_index(
        ...     "score_index",
        ...     column="score",
        ... )
        >>>
        >>> # create an index on a textual column, specifying indexing options
        >>> await my_async_table.create_index(
        ...     "winner_index",
        ...     column="winner",
        ...     options=TableIndexOptions(
        ...         ascii=False,
        ...         normalize=True,
        ...         case_sensitive=False,
        ...     ),
        ... )
    """

    ci_definition: dict[str, Any] = TableIndexDefinition(
        column=column,
        options=TableIndexOptions.coerce(options or {}),
    ).as_dict()
    ci_command = "createIndex"
    return await self._create_generic_index(
        i_name=name,
        ci_definition=ci_definition,
        ci_command=ci_command,
        if_not_exists=if_not_exists,
        table_admin_timeout_ms=table_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
async def create_vector_index(self, name: str, *, column: str, options: TableVectorIndexOptions | dict[str, Any] | None = None, if_not_exists: bool | None = None, table_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> None

Create a vector index on a vector column of the table, enabling vector similarity search operations on it.

This is a blocking operation: the method returns once the index is created and ready to use.

For creation of a non-vector index, see method create_index instead.

Args

name
the name of the index. Index names must be unique across the keyspace.
column
the table column, of type "vector" on which to create the index.
options
an instance of TableVectorIndexOptions, or an equivalent dictionary, which specifies settings for the vector index, such as the metric to use or, if desired, a "source model" setting. If omitted, the Data API defaults will apply for the index. See the TableVectorIndexOptions class for more details.
if_not_exists
if set to True, the command will succeed even if an index with the specified name already exists (in which case no actual index creation takes place on the database). The API default of False means that an error is raised by the API in case of name collision.
table_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for table_admin_timeout_ms.
timeout_ms
an alias for table_admin_timeout_ms.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> from astrapy.constants import VectorMetric
>>> from astrapy.info import TableVectorIndexOptions
>>>
>>> # create a vector index with dot-product similarity
>>> await my_async_table.create_vector_index(
...     "m_vector_index",
...     column="m_vector",
...     options=TableVectorIndexOptions(
...         metric=VectorMetric.DOT_PRODUCT,
...     ),
... )
>>> # specify a source_model (since the previous statement
>>> # succeeded, this will do nothing because of <code>if\_not\_exists</code>):
>>> await my_async_table.create_vector_index(
...     "m_vector_index",
...     column="m_vector",
...     options=TableVectorIndexOptions(
...         metric=VectorMetric.DOT_PRODUCT,
...         source_model="nv-qa-4",
...     ),
...     if_not_exists=True,
... )
>>> # leave the settings to the Data API defaults of cosine
>>> # similarity metric (since the previous statement
>>> # succeeded, this will do nothing because of <code>if\_not\_exists</code>):
>>> await my_async_table.create_vector_index(
...     "m_vector_index",
...     column="m_vector",
...     if_not_exists=True,
... )
Expand source code
async def create_vector_index(
    self,
    name: str,
    *,
    column: str,
    options: TableVectorIndexOptions | dict[str, Any] | None = None,
    if_not_exists: bool | None = None,
    table_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Create a vector index on a vector column of the table, enabling vector
    similarity search operations on it.

    This is a blocking operation: the method returns once the index
    is created and ready to use.

    For creation of a non-vector index, see method `create_index` instead.

    Args:
        name: the name of the index. Index names must be unique across the keyspace.
        column: the table column, of type "vector" on which to create the index.
        options: an instance of `TableVectorIndexOptions`, or an equivalent
            dictionary, which specifies settings for the vector index,
            such as the metric to use or, if desired, a "source model" setting.
            If omitted, the Data API defaults will apply for the index.
            See the `astrapy.info.TableVectorIndexOptions` class for more details.
        if_not_exists: if set to True, the command will succeed even if an index
            with the specified name already exists (in which case no actual
            index creation takes place on the database). The API default of False
            means that an error is raised by the API in case of name collision.
        table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `table_admin_timeout_ms`.
        timeout_ms: an alias for `table_admin_timeout_ms`.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> from astrapy.constants import VectorMetric
        >>> from astrapy.info import TableVectorIndexOptions
        >>>
        >>> # create a vector index with dot-product similarity
        >>> await my_async_table.create_vector_index(
        ...     "m_vector_index",
        ...     column="m_vector",
        ...     options=TableVectorIndexOptions(
        ...         metric=VectorMetric.DOT_PRODUCT,
        ...     ),
        ... )
        >>> # specify a source_model (since the previous statement
        >>> # succeeded, this will do nothing because of `if_not_exists`):
        >>> await my_async_table.create_vector_index(
        ...     "m_vector_index",
        ...     column="m_vector",
        ...     options=TableVectorIndexOptions(
        ...         metric=VectorMetric.DOT_PRODUCT,
        ...         source_model="nv-qa-4",
        ...     ),
        ...     if_not_exists=True,
        ... )
        >>> # leave the settings to the Data API defaults of cosine
        >>> # similarity metric (since the previous statement
        >>> # succeeded, this will do nothing because of `if_not_exists`):
        >>> await my_async_table.create_vector_index(
        ...     "m_vector_index",
        ...     column="m_vector",
        ...     if_not_exists=True,
        ... )
    """

    ci_definition: dict[str, Any] = TableVectorIndexDefinition(
        column=column,
        options=TableVectorIndexOptions.coerce(options),
    ).as_dict()
    ci_command = "createVectorIndex"
    return await self._create_generic_index(
        i_name=name,
        ci_definition=ci_definition,
        ci_command=ci_command,
        if_not_exists=if_not_exists,
        table_admin_timeout_ms=table_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
async def definition(self, *, table_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> ListTableDefinition

Query the Data API and return a structure defining the table schema. If there are no unsupported colums in the table, the return value has the same contents as could have been provided to a create_table method call.

Args

table_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for table_admin_timeout_ms.
timeout_ms
an alias for table_admin_timeout_ms.

Returns

A ListTableDefinition object, available for inspection.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> asyncio.run(my_table.definition())
ListTableDefinition(columns=[match_id,round,fighters, ...  # shortened
Expand source code
async def definition(
    self,
    *,
    table_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> ListTableDefinition:
    """
    Query the Data API and return a structure defining the table schema.
    If there are no unsupported colums in the table, the return value has
    the same contents as could have been provided to a `create_table` method call.

    Args:
        table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `table_admin_timeout_ms`.
        timeout_ms: an alias for `table_admin_timeout_ms`.

    Returns:
        A `ListTableDefinition` object, available for inspection.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> asyncio.run(my_table.definition())
        ListTableDefinition(columns=[match_id,round,fighters, ...  # shortened
    """

    _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
        timeout_options=self.api_options.timeout_options,
        table_admin_timeout_ms=table_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info(f"getting tables in search of '{self.name}'")
    self_descriptors = [
        table_desc
        for table_desc in await self.database._list_tables_ctx(
            keyspace=None,
            timeout_context=_TimeoutContext(
                request_ms=_table_admin_timeout_ms,
                label=_ta_label,
            ),
        )
        if table_desc.name == self.name
    ]
    logger.info(f"finished getting tables in search of '{self.name}'")
    if self_descriptors:
        return self_descriptors[0].definition
    else:
        raise ValueError(
            f"Table {self.keyspace}.{self.name} not found.",
        )
async def delete_many(self, filter: FilterType, *, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> None

Delete all rows matching a provided filter condition. This operation can target from a single row to the entirety of the table.

Args

filter
a filter dictionary to specify which row(s) must be deleted. 1. If the filter is in the form {"pk1": val1, "pk2": val2 ...} and specified the primary key in full, at most one row is deleted, the one with that primary key. 2. If the table has "partitionSort" columns, some or all of them may be left out (the least significant of them can also employ an inequality, or range, predicate): a range of rows, but always within a single partition, will be deleted. 3. If an empty filter, {}, is passed, this operation empties the table completely. USE WITH CARE. 4. Other kinds of filtering clauses are forbidden. In the following examples, the table is partitioned by columns ["pa1", "pa2"] and has partitionSort "ps1" and "ps2" in that order. Valid filter examples: - {"pa1": x, "pa2": y, "ps1": z, "ps2": t}: deletes one row - {"pa1": x, "pa2": y, "ps1": z}: deletes multiple rows - {"pa1": x, "pa2": y, "ps1": z, "ps2": {"$lt": q}}: del. multiple rows - {"pa1": x, "pa2": y}: deletes all rows in the partition - {}: empties the table (CAUTION) Invalid filter examples: - {"pa1": x}: incomplete partition key - {"pa1": x, "ps1" z}: incomplete partition key (whatever is added) - {"pa1": x, "pa2": y, "ps1": {"$lt": r}, "ps2": t}: inequality on a non-least-significant partitionSort column provided. - {"pa1": x, "pa2": y, "ps2": t}: cannot skip "ps1"
general_method_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for general_method_timeout_ms.
timeout_ms
an alias for general_method_timeout_ms.

Examples

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> # Delete a single row (full primary key specified):
>>> await my_async_table.delete_many({"match_id": "fight4", "round": 1})
>>>
>>> # Delete part of a partition (inequality on the
>>> # last-mentioned 'partitionSort' column):
>>> await my_async_table.delete_many({"match_id": "fight5", "round": {"$gte": 5}})
>>>
>>> # Delete a whole partition (leave 'partitionSort' unspecified):
>>> await my_async_table.delete_many({"match_id": "fight7"})
>>>
>>> # empty the table entirely with empty filter (*CAUTION*):
>>> await my_async_table.delete_many({})
Expand source code
async def delete_many(
    self,
    filter: FilterType,
    *,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Delete all rows matching a provided filter condition.
    This operation can target from a single row to the entirety of the table.

    Args:
        filter: a filter dictionary to specify which row(s) must be deleted.
            1. If the filter is in the form `{"pk1": val1, "pk2": val2 ...}`
            and specified the primary key in full, at most one row is deleted,
            the one with that primary key.
            2. If the table has "partitionSort" columns, some or all of them
            may be left out (the least significant of them can also employ
            an inequality, or range, predicate): a range of rows, but always
            within a single partition, will be deleted.
            3. If an empty filter, `{}`, is passed, this operation empties
            the table completely. *USE WITH CARE*.
            4. Other kinds of filtering clauses are forbidden.
            In the following examples, the table is partitioned
            by columns ["pa1", "pa2"] and has partitionSort "ps1" and "ps2" in that
            order.
            Valid filter examples:
            - `{"pa1": x, "pa2": y, "ps1": z, "ps2": t}`: deletes one row
            - `{"pa1": x, "pa2": y, "ps1": z}`: deletes multiple rows
            - `{"pa1": x, "pa2": y, "ps1": z, "ps2": {"$lt": q}}`: del. multiple rows
            - `{"pa1": x, "pa2": y}`: deletes all rows in the partition
            - `{}`: empties the table (*CAUTION*)
            Invalid filter examples:
            - `{"pa1": x}`: incomplete partition key
            - `{"pa1": x, "ps1" z}`: incomplete partition key (whatever is added)
            - `{"pa1": x, "pa2": y, "ps1": {"$lt": r}, "ps2": t}`: inequality on
              a non-least-significant partitionSort column provided.
            - `{"pa1": x, "pa2": y, "ps2": t}`: cannot skip "ps1"
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Examples:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> # Delete a single row (full primary key specified):
        >>> await my_async_table.delete_many({"match_id": "fight4", "round": 1})
        >>>
        >>> # Delete part of a partition (inequality on the
        >>> # last-mentioned 'partitionSort' column):
        >>> await my_async_table.delete_many({"match_id": "fight5", "round": {"$gte": 5}})
        >>>
        >>> # Delete a whole partition (leave 'partitionSort' unspecified):
        >>> await my_async_table.delete_many({"match_id": "fight7"})
        >>>
        >>> # empty the table entirely with empty filter (*CAUTION*):
        >>> await my_async_table.delete_many({})
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    dm_payload = self._converter_agent.preprocess_payload(
        {
            "deleteMany": {
                k: v
                for k, v in {
                    "filter": filter,
                }.items()
                if v is not None
            }
        }
    )
    logger.info(f"deleteMany on '{self.name}'")
    dm_response = await self._api_commander.async_request(
        payload=dm_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished deleteMany on '{self.name}'")
    if dm_response.get("status", {}).get("deletedCount") == -1:
        return
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from deleteMany API command.",
            raw_response=dm_response,
        )
async def delete_one(self, filter: FilterType, *, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> None

Delete a row, matching the provided value of the primary key. If no row is found with that primary key, the method does nothing.

Args

filter
a predicate expressing the table primary key in full, i.e. a dictionary defining values for all columns that form the primary key. A row (at most one) is deleted if it matches that primary key. An example filter may be {"match_id": "fight4", "round": 1}.
general_method_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for general_method_timeout_ms.
timeout_ms
an alias for general_method_timeout_ms.

Examples

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> # Count the rows matching a certain filter
>>> len(asyncio.run(my_async_table.find({"match_id": "fight7"}).to_list()))
3
>>>
>>> # Delete a row belonging to the group
>>> asyncio.run(
...     my_async_table.delete_one({"match_id": "fight7", "round": 2})
... )
>>>
>>> # Count again
>>> len(asyncio.run(my_async_table.find({"match_id": "fight7"}).to_list()))
2
>>>
>>> # Attempt the delete again (nothing to delete)
>>> asyncio.run(
...     my_async_table.delete_one({"match_id": "fight7", "round": 2})
... )
>>>
>>> # The count is unchanged
>>> len(asyncio.run(my_async_table.find({"match_id": "fight7"}).to_list()))
2
Expand source code
async def delete_one(
    self,
    filter: FilterType,
    *,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Delete a row, matching the provided value of the primary key.
    If no row is found with that primary key, the method does nothing.

    Args:
        filter: a predicate expressing the table primary key in full,
            i.e. a dictionary defining values for all columns that form the
            primary key. A row (at most one) is deleted if it matches that primary
            key. An example filter may be `{"match_id": "fight4", "round": 1}`.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Examples:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> # Count the rows matching a certain filter
        >>> len(asyncio.run(my_async_table.find({"match_id": "fight7"}).to_list()))
        3
        >>>
        >>> # Delete a row belonging to the group
        >>> asyncio.run(
        ...     my_async_table.delete_one({"match_id": "fight7", "round": 2})
        ... )
        >>>
        >>> # Count again
        >>> len(asyncio.run(my_async_table.find({"match_id": "fight7"}).to_list()))
        2
        >>>
        >>> # Attempt the delete again (nothing to delete)
        >>> asyncio.run(
        ...     my_async_table.delete_one({"match_id": "fight7", "round": 2})
        ... )
        >>>
        >>> # The count is unchanged
        >>> len(asyncio.run(my_async_table.find({"match_id": "fight7"}).to_list()))
        2
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    do_payload = self._converter_agent.preprocess_payload(
        {
            "deleteOne": {
                k: v
                for k, v in {
                    "filter": filter,
                }.items()
                if v is not None
            }
        }
    )
    logger.info(f"deleteOne on '{self.name}'")
    do_response = await self._api_commander.async_request(
        payload=do_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished deleteOne on '{self.name}'")
    if do_response.get("status", {}).get("deletedCount") == -1:
        return
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from deleteOne API command.",
            raw_response=do_response,
        )
async def distinct(self, key: str, *, filter: FilterType | None = None, request_timeout_ms: int | None = None, general_method_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> list[typing.Any]

Return a list of the unique values of key across the rows in the table that match the provided filter.

Args

key
the name of the field whose value is inspected across rows. Keys are typically just column names, although they can use the dot notation to select particular entries in map columns. For set and list columns, individual entries are "unrolled" automatically; in particular, for lists, numeric indices can be used in the key dot-notation syntax. Example of acceptable key values: "a_column" "map_column.map_key" "list_column.2"
filter
a dictionary expressing which condition the inspected rows must satisfy. The filter can use operators, such as "$eq" for equality, and require columns to compare with literal values. Simple examples are {} (zero filter), {"match_no": 123} (a shorthand for {"match_no": {"$eq": 123}}, or {"match_no": 123, "round": "C"} (multiple conditions are implicitly combined with "$and"). Please consult the Data API documentation for a more detailed explanation of table search filters and tips on their usage.
general_method_timeout_ms
a timeout, in milliseconds, for the whole requested operation (which may involve multiple API requests). This method, being based on find (see) may entail successive HTTP API requests, depending on the amount of involved rows. If not provided, this object's defaults apply.
request_timeout_ms
a timeout, in milliseconds, for each API request. If not provided, this object's defaults apply.
timeout_ms
an alias for general_method_timeout_ms.

Returns

a list of all different values for key found across the rows that match the filter. The result list has no repeated items.

Examples

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> asyncio.run(my_async_table.distinct(
...     "winner",
...     filter={"match_id": "challenge6"},
... ))
['Donna', 'Erick', 'Fiona']
>>>
>>> # distinct values across the whole table:
>>> # (not recommended performance-wise)
>>> asyncio.run(my_async_table.distinct("winner"))
The Data API returned a warning: {'errorCode': 'ZERO_FILTER_OPERATIONS', ...
['Victor', 'Adam Zuul', 'Betta Vigo', 'Caio Gozer', 'Donna', 'Erick', ...
>>>
>>> # Over a column containing null values
>>> # (also with composite filter):
>>> asyncio.run(my_async_table.distinct(
...     "score",
...     filter={"match_id": {"$in": ["fight4", "tournamentA"]}},
... ))
[18, None]
>>>
>>> # distinct over a set column (automatically "unrolled"):
>>> asyncio.run(my_async_table.distinct(
...     "fighters",
...     filter={"match_id": {"$in": ["fight4", "tournamentA"]}},
... ))
[UUID('0193539a-2770-8c09-a32a-111111111111'), UUID('019353e3-00b4-...

Note

It must be kept in mind that distinct is a client-side operation, which effectively browses all required rows using the logic of the find method and collects the unique values found for key. As such, there may be performance, latency and ultimately billing implications if the amount of matching rows is large.

Note

For details on the behaviour of "distinct" in conjunction with real-time changes in the table contents, see the Note of the find command.

Expand source code
async def distinct(
    self,
    key: str,
    *,
    filter: FilterType | None = None,
    request_timeout_ms: int | None = None,
    general_method_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> list[Any]:
    """
    Return a list of the unique values of `key` across the rows
    in the table that match the provided filter.

    Args:
        key: the name of the field whose value is inspected across rows.
            Keys are typically just column names, although they can use
            the dot notation to select particular entries in map columns.
            For set and list columns, individual entries are "unrolled"
            automatically; in particular, for lists, numeric indices
            can be used in the key dot-notation syntax.
            Example of acceptable `key` values:
                "a_column"
                "map_column.map_key"
                "list_column.2"
        filter: a dictionary expressing which condition the inspected rows
            must satisfy. The filter can use operators, such as "$eq" for equality,
            and require columns to compare with literal values. Simple examples
            are `{}` (zero filter), `{"match_no": 123}` (a shorthand for
            `{"match_no": {"$eq": 123}}`, or `{"match_no": 123, "round": "C"}`
            (multiple conditions are implicitly combined with "$and").
            Please consult the Data API documentation for a more detailed
            explanation of table search filters and tips on their usage.
        general_method_timeout_ms: a timeout, in milliseconds, for the whole
            requested operation (which may involve multiple API requests).
            This method, being based on `find` (see) may entail successive HTTP API
            requests, depending on the amount of involved rows.
            If not provided, this object's defaults apply.
        request_timeout_ms: a timeout, in milliseconds, for each API request.
            If not provided, this object's defaults apply.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a list of all different values for `key` found across the rows
        that match the filter. The result list has no repeated items.

    Examples:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> asyncio.run(my_async_table.distinct(
        ...     "winner",
        ...     filter={"match_id": "challenge6"},
        ... ))
        ['Donna', 'Erick', 'Fiona']
        >>>
        >>> # distinct values across the whole table:
        >>> # (not recommended performance-wise)
        >>> asyncio.run(my_async_table.distinct("winner"))
        The Data API returned a warning: {'errorCode': 'ZERO_FILTER_OPERATIONS', ...
        ['Victor', 'Adam Zuul', 'Betta Vigo', 'Caio Gozer', 'Donna', 'Erick', ...
        >>>
        >>> # Over a column containing null values
        >>> # (also with composite filter):
        >>> asyncio.run(my_async_table.distinct(
        ...     "score",
        ...     filter={"match_id": {"$in": ["fight4", "tournamentA"]}},
        ... ))
        [18, None]
        >>>
        >>> # distinct over a set column (automatically "unrolled"):
        >>> asyncio.run(my_async_table.distinct(
        ...     "fighters",
        ...     filter={"match_id": {"$in": ["fight4", "tournamentA"]}},
        ... ))
        [UUID('0193539a-2770-8c09-a32a-111111111111'), UUID('019353e3-00b4-...

    Note:
        It must be kept in mind that `distinct` is a client-side operation,
        which effectively browses all required rows using the logic
        of the `find` method and collects the unique values found for `key`.
        As such, there may be performance, latency and ultimately
        billing implications if the amount of matching rows is large.

    Note:
        For details on the behaviour of "distinct" in conjunction with
        real-time changes in the table contents, see the
        Note of the `find` command.
    """

    # lazy-import here to avoid circular import issues
    from astrapy.cursors import AsyncTableFindCursor

    _general_method_timeout_ms, _gmt_label = _first_valid_timeout(
        (general_method_timeout_ms, "general_method_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (
            self.api_options.timeout_options.general_method_timeout_ms,
            "general_method_timeout_ms",
        ),
    )
    _request_timeout_ms, _rt_label = _first_valid_timeout(
        (request_timeout_ms, "request_timeout_ms"),
        (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
    )
    # preparing cursor:
    _extractor = _create_document_key_extractor(key)
    _key = _reduce_distinct_key_to_shallow_safe(key)
    if _key == "":
        raise ValueError(
            "The 'key' parameter for distinct cannot be empty "
            "or start with a list index."
        )
    # relaxing the type hint (limited to within this method body)
    f_cursor: AsyncTableFindCursor[dict[str, Any], dict[str, Any]] = (
        AsyncTableFindCursor(
            table=self,
            request_timeout_ms=_request_timeout_ms,
            overall_timeout_ms=_general_method_timeout_ms,
            request_timeout_label=_rt_label,
            overall_timeout_label=_gmt_label,
        )  # type: ignore[assignment]
        .filter(filter)
        .project({_key: True})
    )
    # consuming it:
    _item_hashes = set()
    distinct_items: list[Any] = []
    logger.info(f"running distinct() on '{self.name}'")
    async for document in f_cursor:
        for item in _extractor(document):
            _item_hash = _hash_document(
                item, options=self.api_options.serdes_options
            )
            if _item_hash not in _item_hashes:
                _item_hashes.add(_item_hash)
                distinct_items.append(item)
    logger.info(f"finished running distinct() on '{self.name}'")
    return distinct_items
async def drop(self, *, if_exists: bool | None = None, table_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> dict[str, typing.Any]

Drop the table, i.e. delete it from the database along with all the rows stored therein.

Args

if_exists
if passed as True, trying to drop a non-existing table will not error, just silently do nothing instead. If not provided, the API default behaviour will hold.
table_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for table_admin_timeout_ms.
timeout_ms
an alias for table_admin_timeout_ms.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> # List tables:
>>> asyncio.run(my_async_table.database.list_table_names())
['games']
>>>
>>> # Drop this table:
>>> asyncio.run(my_table.drop())
>>>
>>> # List tables again:
>>> asyncio.run(my_table.database.list_table_names())
[]
>>>
>>> # Try working on the table now:
>>> from astrapy.exceptions import DataAPIResponseException
>>>
>>> async def try_use_table():
...     try:
...         my_table.find_one({})
...     except DataAPIResponseException as err:
...         print(str(err))
...
>>> asyncio.run(try_use_table())
Collection does not exist [...] (COLLECTION_NOT_EXIST)

Note

Use with caution.

Note

Once the method succeeds, methods on this object can still be invoked: however, this hardly makes sense as the underlying actual table is no more. It is responsibility of the developer to design a correct flow which avoids using a deceased collection any further.

Expand source code
async def drop(
    self,
    *,
    if_exists: bool | None = None,
    table_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> dict[str, Any]:
    """
    Drop the table, i.e. delete it from the database along with
    all the rows stored therein.

    Args:
        if_exists: if passed as True, trying to drop a non-existing table
            will not error, just silently do nothing instead. If not provided,
            the API default behaviour will hold.
        table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `table_admin_timeout_ms`.
        timeout_ms: an alias for `table_admin_timeout_ms`.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> # List tables:
        >>> asyncio.run(my_async_table.database.list_table_names())
        ['games']
        >>>
        >>> # Drop this table:
        >>> asyncio.run(my_table.drop())
        >>>
        >>> # List tables again:
        >>> asyncio.run(my_table.database.list_table_names())
        []
        >>>
        >>> # Try working on the table now:
        >>> from astrapy.exceptions import DataAPIResponseException
        >>>
        >>> async def try_use_table():
        ...     try:
        ...         my_table.find_one({})
        ...     except DataAPIResponseException as err:
        ...         print(str(err))
        ...
        >>> asyncio.run(try_use_table())
        Collection does not exist [...] (COLLECTION_NOT_EXIST)

    Note:
        Use with caution.

    Note:
        Once the method succeeds, methods on this object can still be invoked:
        however, this hardly makes sense as the underlying actual table
        is no more.
        It is responsibility of the developer to design a correct flow
        which avoids using a deceased collection any further.
    """

    logger.info(f"dropping table '{self.name}' (self)")
    drop_result = await self.database.drop_table(
        self.name,
        if_exists=if_exists,
        table_admin_timeout_ms=table_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info(f"finished dropping table '{self.name}' (self)")
    return drop_result
async def estimated_document_count(self, *, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> int

Query the API server for an estimate of the document count in the table.

Contrary to count_documents, this method has no filtering parameters.

Args

general_method_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for general_method_timeout_ms.
timeout_ms
an alias for general_method_timeout_ms.

Returns

a server-provided estimate count of the documents in the table.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> asyncio.run(my_async_table.estimated_document_count())
5820
Expand source code
async def estimated_document_count(
    self,
    *,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> int:
    """
    Query the API server for an estimate of the document count in the table.

    Contrary to `count_documents`, this method has no filtering parameters.

    Args:
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a server-provided estimate count of the documents in the table.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> asyncio.run(my_async_table.estimated_document_count())
        5820
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    ed_payload: dict[str, Any] = {"estimatedDocumentCount": {}}
    logger.info(f"estimatedDocumentCount on '{self.name}'")
    ed_response = await self._api_commander.async_request(
        payload=ed_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished estimatedDocumentCount on '{self.name}'")
    if "count" in ed_response.get("status", {}):
        count: int = ed_response["status"]["count"]
        return count
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from estimatedDocumentCount API command.",
            raw_response=ed_response,
        )
def find(self, filter: FilterType | None = None, *, projection: ProjectionType | None = None, row_type: type[ROW2] | None = None, skip: int | None = None, limit: int | None = None, include_similarity: bool | None = None, include_sort_vector: bool | None = None, sort: SortType | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> AsyncTableFindCursor[ROW, ROW2]

Find rows on the table matching the provided filters and according to sorting criteria including vector similarity.

The returned TableFindCursor object, representing the stream of results, can be iterated over, or consumed and manipulated in several other ways (see the examples below and the TableFindCursor documentation for details). Since the amount of returned items can be large, TableFindCursor is a lazy object, that fetches new data while it is being read using the Data API pagination mechanism.

Invoking .to_list() on a TableFindCursor will cause it to consume all rows and materialize the entire result set as a list. This is not recommended if the amount of results is very large.

Args

filter
a dictionary expressing which condition the returned rows must satisfy. The filter can use operators, such as "$eq" for equality, and require columns to compare with literal values. Simple examples are {} (zero filter, not recommended for large tables), {"match_no": 123} (a shorthand for {"match_no": {"$eq": 123}}, or {"match_no": 123, "round": "C"} (multiple conditions are implicitly combined with "$and"). Please consult the Data API documentation for a more detailed explanation of table search filters and tips on their usage.
projection
a prescription on which columns to return for the matching rows. The projection can take the form {"column1": True, "column2": True}. {"*": True} (i.e. return the whole row), or the complementary form that excludes columns: {"column1": False, "column2": False}. To optimize bandwidth usage, it is recommended to use a projection, especially to avoid unnecessary columns of type vector with high-dimensional embeddings.
row_type
this parameter acts a formal specifier for the type checker. If omitted, the resulting cursor is implicitly an AsyncTableFindCursor[ROW, ROW], i.e. maintains the same type for the items it returns as that for the rows in the table. Strictly typed code may want to specify this parameter especially when a projection is given.
skip
if provided, it is a number of rows that would be obtained first in the response and are instead skipped.
limit
a maximum amount of rows to get from the table. The returned cursor will stop yielding rows when either this number is reached or there really are no more matches in the table.
include_similarity
a boolean to request the numeric value of the similarity to be returned as an added "$similarity" key in each returned row. It can be used meaningfully only in a vector search (see sort).
include_sort_vector
a boolean to request the search query vector. If set to True (and if the search is a vector search), calling the get_sort_vector method on the returned cursor will yield the vector used for the ANN search.
sort
this dictionary parameter controls the order in which the rows are returned. The sort parameter can express either a vector search or a regular (ascending/descending, even hierarchical) sorting. * For a vector search the parameter takes the form {"vector_column": qv}, with the query vector qv of the appropriate type (list of floats or DataAPIVector). If the table has automatic embedding generation ("vectorize") enabled on that column, the form {"vectorize_enabled_column": "query text"} is also valid. * In the case of non-vector sorting, the parameter specifies the column(s) and the ascending/descending ordering required. If multiple columns are provided, the sorting applies them hierarchically to the rows. Examples are {"score": SortMode.ASCENDING} (equivalently {"score": +1}), {"score": +1, "when": -1}. Note that, depending on the column(s) chosen for sorting, the table partitioning structure, and the presence of indexes, the sorting may be done in-memory by the API. In that case, there may be performance implications and limitations on the amount of items returned. Consult the Data API documentation for more details on this topic.
request_timeout_ms
a timeout, in milliseconds, to impose on each individual HTTP request to the Data API to accomplish the operation. If not provided, this object's defaults apply.
timeout_ms
an alias for request_timeout_ms.

Returns

a TableFindCursor object, that can be iterated over (and manipulated in several ways), that if needed handles pagination under the hood as the rows are consumed.

Note

As the rows are retrieved in chunks progressively, while the cursor is being iterated over, it is possible that the actual results obtained will reflect changes occurring to the table contents in real time.

Examples

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> # Iterate over results:
>>> async def loop1():
...     async for row in my_async_table.find({"match_id": "challenge6"}):
...         print(f"(R:{row['round']}): winner {row['winner']}")
...
>>> asyncio.run(loop1())
(R:1): winner Donna
(R:2): winner Erick
(R:3): winner Fiona
>>>
>>> # Optimize bandwidth using a projection:
>>> proj = {"round": True, "winner": True}
>>> async def loop2():
...     async for row in my_async_table.find(
...           {"match_id": "challenge6"},
...           projection=proj,
...     ):
...         print(f"(R:{row['round']}): winner {row['winner']}")
...
>>> asyncio.run(loop2())
(R:1): winner Donna
(R:2): winner Erick
(R:3): winner Fiona
>>>
>>> # Filter on the partitioning:
>>> asyncio.run(
...     my_async_table.find({"match_id": "challenge6"}).to_list()
... )
[{'match_id': 'challenge6', 'round': 1, 'fighters': DataAPISet([]), ...
>>>
>>> # Filter on primary key:
>>> asyncio.run(
...     my_async_table.find(
...         {"match_id": "challenge6", "round": 1}
...     ).to_list()
... )
[{'match_id': 'challenge6', 'round': 1, 'fighters': DataAPISet([]), ...
>>>
>>> # Filter on a regular indexed column:
>>> asyncio.run(my_async_table.find({"winner": "Caio Gozer"}).to_list())
[{'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
>>>
>>> # Non-equality filter on a regular indexed column:
>>> asyncio.run(my_async_table.find({"score": {"$gte": 15}}).to_list())
[{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
>>>
>>> # Filter on a regular non-indexed column:
>>> # (not recommended performance-wise)
>>> asyncio.run(my_async_table.find(
...     {"when": {
...         "$gte": DataAPITimestamp.from_string("1999-12-31T01:23:44Z")
...     }}
... ).to_list())
The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
[{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
>>>
>>> # Empty filter (not recommended performance-wise):
>>> asyncio.run(my_async_table.find({}).to_list())
The Data API returned a warning: {'errorCode': 'ZERO_FILTER_OPERATIONS', ...
[{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
>>>
>>> # Filter on the primary key and a regular non-indexed column:
>>> # (not recommended performance-wise)
>>> asyncio.run(my_async_table.find(
...     {"match_id": "fight5", "round": 3, "winner": "Caio Gozer"}
... ).to_list())
The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
[{'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
>>>
>>> # Filter on a regular non-indexed column (and incomplete primary key)
>>> # (not recommended performance-wise)
>>> asyncio.run(my_async_table.find(
...     {"round": 3, "winner": "Caio Gozer"}
... ).to_list())
The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
[{'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
>>>
>>> # Vector search with "sort" (on an appropriately-indexed vector column):
>>> asyncio.run(my_async_table.find(
...     {},
...     sort={"m_vector": DataAPIVector([0.2, 0.3, 0.4])},
...     projection={"winner": True},
...     limit=3,
... ).to_list())
[{'winner': 'Donna'}, {'winner': 'Victor'}]
>>>
>>> # Hybrid search with vector sort and non-vector filtering:
>>> my_table.find(
...     {"match_id": "fight4"},
...     sort={"m_vector": DataAPIVector([0.2, 0.3, 0.4])},
...     projection={"winner": True},
... ).to_list()
[{'winner': 'Victor'}]
>>>
>>> # Return the numeric value of the vector similarity
>>> # (also demonstrating that one can pass a plain list for a vector):
>>> asyncio.run(my_async_table.find(
...     {},
...     sort={"m_vector": [0.2, 0.3, 0.4]},
...     projection={"winner": True},
...     limit=3,
...     include_similarity=True,
... ).to_list())
[{'winner': 'Donna', '$similarity': 0.515}, {'winner': 'Victor', ...
>>>
>>> # Non-vector sorting on a 'partitionSort' column:
>>> asyncio.run(my_async_table.find(
...     {"match_id": "fight5"},
...     sort={"round": SortMode.DESCENDING},
...     projection={"winner": True},
... ).to_list())
[{'winner': 'Caio Gozer'}, {'winner': 'Betta Vigo'}, ...
>>>
>>> # Using `skip` and <code>limit</code>:
>>> asyncio.run(my_async_table.find(
...     {"match_id": "fight5"},
...     sort={"round": SortMode.DESCENDING},
...     projection={"winner": True},
...     skip=1,
...     limit=2,
... ).to_list())
The Data API returned a warning: {'errorCode': 'IN_MEMORY_SORTING...
[{'winner': 'Betta Vigo'}, {'winner': 'Adam Zuul'}]
>>>
>>> # Non-vector sorting on a regular column:
>>> # (not recommended performance-wise)
>>> asyncio.run(my_async_table.find(
...     {"match_id": "fight5"},
...     sort={"winner": SortMode.ASCENDING},
...     projection={"winner": True},
... ).to_list())
The Data API returned a warning: {'errorCode': 'IN_MEMORY_SORTING...
[{'winner': 'Adam Zuul'}, {'winner': 'Betta Vigo'}, ...
>>>
>>> # Using `.map()` on a cursor:
>>> winner_cursor = my_async_table.find(
...     {"match_id": "fight5"},
...     sort={"round": SortMode.DESCENDING},
...     projection={"winner": True},
...     limit=5,
... )
>>> print("/".join(asyncio.run(
...     winner_cursor.map(lambda row: row["winner"].upper()).to_list())
... ))
CAIO GOZER/BETTA VIGO/ADAM ZUUL
>>>
>>> # Some other examples of cursor manipulation
>>> matches_async_cursor = my_async_table.find(
...     sort={"m_vector": DataAPIVector([-0.1, 0.15, 0.3])}
... )
>>> asyncio.run(matches_async_cursor.has_next())
True
>>> asyncio.run(matches_async_cursor.__anext__())
{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
>>> matches_async_cursor.consumed
1
>>> matches_async_cursor.rewind()
>>> matches_async_cursor.consumed
0
>>> asyncio.run(matches_async_cursor.has_next())
True
>>> matches_async_cursor.close()
>>>
>>> async def try_consume():
...     try:
...         await matches_async_cursor.__anext__()
...     except StopAsyncIteration:
...         print("StopAsyncIteration triggered.")
...
>>> asyncio.run(try_consume())
StopAsyncIteration triggered.
Expand source code
def find(
    self,
    filter: FilterType | None = None,
    *,
    projection: ProjectionType | None = None,
    row_type: type[ROW2] | None = None,
    skip: int | None = None,
    limit: int | None = None,
    include_similarity: bool | None = None,
    include_sort_vector: bool | None = None,
    sort: SortType | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> AsyncTableFindCursor[ROW, ROW2]:
    """
    Find rows on the table matching the provided filters
    and according to sorting criteria including vector similarity.

    The returned TableFindCursor object, representing the stream of results,
    can be iterated over, or consumed and manipulated in several other ways
    (see the examples below and the `TableFindCursor` documentation for details).
    Since the amount of returned items can be large, TableFindCursor is a lazy
    object, that fetches new data while it is being read using the Data API
    pagination mechanism.

    Invoking `.to_list()` on a TableFindCursor will cause it to consume all
    rows and materialize the entire result set as a list. This is not recommended
    if the amount of results is very large.

    Args:
        filter: a dictionary expressing which condition the returned rows
            must satisfy. The filter can use operators, such as "$eq" for equality,
            and require columns to compare with literal values. Simple examples
            are `{}` (zero filter, not recommended for large tables),
            `{"match_no": 123}` (a shorthand for `{"match_no": {"$eq": 123}}`,
            or `{"match_no": 123, "round": "C"}` (multiple conditions are
            implicitly combined with "$and").
            Please consult the Data API documentation for a more detailed
            explanation of table search filters and tips on their usage.
        projection: a prescription on which columns to return for the matching rows.
            The projection can take the form `{"column1": True, "column2": True}`.
            `{"*": True}` (i.e. return the whole row), or the complementary
            form that excludes columns: `{"column1": False, "column2": False}`.
            To optimize bandwidth usage, it is recommended to use a projection,
            especially to avoid unnecessary columns of type vector with
            high-dimensional embeddings.
        row_type: this parameter acts a formal specifier for the type checker.
            If omitted, the resulting cursor is implicitly an
            `AsyncTableFindCursor[ROW, ROW]`, i.e. maintains the same type for
            the items it returns as that for the rows in the table. Strictly
            typed code may want to specify this parameter especially when a
            projection is given.
        skip: if provided, it is a number of rows that would be obtained first
            in the response and are instead skipped.
        limit: a maximum amount of rows to get from the table. The returned cursor
            will stop yielding rows when either this number is reached or there
            really are no more matches in the table.
        include_similarity: a boolean to request the numeric value of the
            similarity to be returned as an added "$similarity" key in each returned
            row. It can be used meaningfully only in a vector search (see `sort`).
        include_sort_vector: a boolean to request the search query vector.
            If set to True (and if the search is a vector search), calling
            the `get_sort_vector` method on the returned cursor will yield
            the vector used for the ANN search.
        sort: this dictionary parameter controls the order in which the rows
            are returned. The sort parameter can express either a vector search or
            a regular (ascending/descending, even hierarchical) sorting.
            * For a vector search the parameter takes the form
            `{"vector_column": qv}`, with the query vector `qv` of the appropriate
            type (list of floats or DataAPIVector). If the table has automatic
            embedding generation ("vectorize") enabled on that column, the form
            `{"vectorize_enabled_column": "query text"}` is also valid.
            * In the case of non-vector sorting, the parameter specifies the
            column(s) and the ascending/descending ordering required.
            If multiple columns are provided, the sorting applies them
            hierarchically to the rows. Examples are `{"score": SortMode.ASCENDING}`
            (equivalently `{"score": +1}`), `{"score": +1, "when": -1}`.
            Note that, depending on the column(s) chosen for sorting, the table
            partitioning structure, and the presence of indexes, the sorting
            may be done in-memory by the API. In that case, there may be performance
            implications and limitations on the amount of items returned.
            Consult the Data API documentation for more details on this topic.
        request_timeout_ms: a timeout, in milliseconds, to impose on each
            individual HTTP request to the Data API to accomplish the operation.
            If not provided, this object's defaults apply.
        timeout_ms: an alias for `request_timeout_ms`.

    Returns:
        a TableFindCursor object, that can be iterated over (and manipulated
        in several ways), that if needed handles pagination under the hood
        as the rows are consumed.

    Note:
        As the rows are retrieved in chunks progressively, while the cursor
        is being iterated over, it is possible that the actual results
        obtained will reflect changes occurring to the table contents in
        real time.

    Examples:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> # Iterate over results:
        >>> async def loop1():
        ...     async for row in my_async_table.find({"match_id": "challenge6"}):
        ...         print(f"(R:{row['round']}): winner {row['winner']}")
        ...
        >>> asyncio.run(loop1())
        (R:1): winner Donna
        (R:2): winner Erick
        (R:3): winner Fiona
        >>>
        >>> # Optimize bandwidth using a projection:
        >>> proj = {"round": True, "winner": True}
        >>> async def loop2():
        ...     async for row in my_async_table.find(
        ...           {"match_id": "challenge6"},
        ...           projection=proj,
        ...     ):
        ...         print(f"(R:{row['round']}): winner {row['winner']}")
        ...
        >>> asyncio.run(loop2())
        (R:1): winner Donna
        (R:2): winner Erick
        (R:3): winner Fiona
        >>>
        >>> # Filter on the partitioning:
        >>> asyncio.run(
        ...     my_async_table.find({"match_id": "challenge6"}).to_list()
        ... )
        [{'match_id': 'challenge6', 'round': 1, 'fighters': DataAPISet([]), ...
        >>>
        >>> # Filter on primary key:
        >>> asyncio.run(
        ...     my_async_table.find(
        ...         {"match_id": "challenge6", "round": 1}
        ...     ).to_list()
        ... )
        [{'match_id': 'challenge6', 'round': 1, 'fighters': DataAPISet([]), ...
        >>>
        >>> # Filter on a regular indexed column:
        >>> asyncio.run(my_async_table.find({"winner": "Caio Gozer"}).to_list())
        [{'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
        >>>
        >>> # Non-equality filter on a regular indexed column:
        >>> asyncio.run(my_async_table.find({"score": {"$gte": 15}}).to_list())
        [{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
        >>>
        >>> # Filter on a regular non-indexed column:
        >>> # (not recommended performance-wise)
        >>> asyncio.run(my_async_table.find(
        ...     {"when": {
        ...         "$gte": DataAPITimestamp.from_string("1999-12-31T01:23:44Z")
        ...     }}
        ... ).to_list())
        The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
        [{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
        >>>
        >>> # Empty filter (not recommended performance-wise):
        >>> asyncio.run(my_async_table.find({}).to_list())
        The Data API returned a warning: {'errorCode': 'ZERO_FILTER_OPERATIONS', ...
        [{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
        >>>
        >>> # Filter on the primary key and a regular non-indexed column:
        >>> # (not recommended performance-wise)
        >>> asyncio.run(my_async_table.find(
        ...     {"match_id": "fight5", "round": 3, "winner": "Caio Gozer"}
        ... ).to_list())
        The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
        [{'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
        >>>
        >>> # Filter on a regular non-indexed column (and incomplete primary key)
        >>> # (not recommended performance-wise)
        >>> asyncio.run(my_async_table.find(
        ...     {"round": 3, "winner": "Caio Gozer"}
        ... ).to_list())
        The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
        [{'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
        >>>
        >>> # Vector search with "sort" (on an appropriately-indexed vector column):
        >>> asyncio.run(my_async_table.find(
        ...     {},
        ...     sort={"m_vector": DataAPIVector([0.2, 0.3, 0.4])},
        ...     projection={"winner": True},
        ...     limit=3,
        ... ).to_list())
        [{'winner': 'Donna'}, {'winner': 'Victor'}]
        >>>
        >>> # Hybrid search with vector sort and non-vector filtering:
        >>> my_table.find(
        ...     {"match_id": "fight4"},
        ...     sort={"m_vector": DataAPIVector([0.2, 0.3, 0.4])},
        ...     projection={"winner": True},
        ... ).to_list()
        [{'winner': 'Victor'}]
        >>>
        >>> # Return the numeric value of the vector similarity
        >>> # (also demonstrating that one can pass a plain list for a vector):
        >>> asyncio.run(my_async_table.find(
        ...     {},
        ...     sort={"m_vector": [0.2, 0.3, 0.4]},
        ...     projection={"winner": True},
        ...     limit=3,
        ...     include_similarity=True,
        ... ).to_list())
        [{'winner': 'Donna', '$similarity': 0.515}, {'winner': 'Victor', ...
        >>>
        >>> # Non-vector sorting on a 'partitionSort' column:
        >>> asyncio.run(my_async_table.find(
        ...     {"match_id": "fight5"},
        ...     sort={"round": SortMode.DESCENDING},
        ...     projection={"winner": True},
        ... ).to_list())
        [{'winner': 'Caio Gozer'}, {'winner': 'Betta Vigo'}, ...
        >>>
        >>> # Using `skip` and `limit`:
        >>> asyncio.run(my_async_table.find(
        ...     {"match_id": "fight5"},
        ...     sort={"round": SortMode.DESCENDING},
        ...     projection={"winner": True},
        ...     skip=1,
        ...     limit=2,
        ... ).to_list())
        The Data API returned a warning: {'errorCode': 'IN_MEMORY_SORTING...
        [{'winner': 'Betta Vigo'}, {'winner': 'Adam Zuul'}]
        >>>
        >>> # Non-vector sorting on a regular column:
        >>> # (not recommended performance-wise)
        >>> asyncio.run(my_async_table.find(
        ...     {"match_id": "fight5"},
        ...     sort={"winner": SortMode.ASCENDING},
        ...     projection={"winner": True},
        ... ).to_list())
        The Data API returned a warning: {'errorCode': 'IN_MEMORY_SORTING...
        [{'winner': 'Adam Zuul'}, {'winner': 'Betta Vigo'}, ...
        >>>
        >>> # Using `.map()` on a cursor:
        >>> winner_cursor = my_async_table.find(
        ...     {"match_id": "fight5"},
        ...     sort={"round": SortMode.DESCENDING},
        ...     projection={"winner": True},
        ...     limit=5,
        ... )
        >>> print("/".join(asyncio.run(
        ...     winner_cursor.map(lambda row: row["winner"].upper()).to_list())
        ... ))
        CAIO GOZER/BETTA VIGO/ADAM ZUUL
        >>>
        >>> # Some other examples of cursor manipulation
        >>> matches_async_cursor = my_async_table.find(
        ...     sort={"m_vector": DataAPIVector([-0.1, 0.15, 0.3])}
        ... )
        >>> asyncio.run(matches_async_cursor.has_next())
        True
        >>> asyncio.run(matches_async_cursor.__anext__())
        {'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
        >>> matches_async_cursor.consumed
        1
        >>> matches_async_cursor.rewind()
        >>> matches_async_cursor.consumed
        0
        >>> asyncio.run(matches_async_cursor.has_next())
        True
        >>> matches_async_cursor.close()
        >>>
        >>> async def try_consume():
        ...     try:
        ...         await matches_async_cursor.__anext__()
        ...     except StopAsyncIteration:
        ...         print("StopAsyncIteration triggered.")
        ...
        >>> asyncio.run(try_consume())
        StopAsyncIteration triggered.
    """

    # lazy-import here to avoid circular import issues
    from astrapy.cursors import AsyncTableFindCursor

    _request_timeout_ms, _rt_label = _first_valid_timeout(
        (request_timeout_ms, "request_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
    )
    return (
        AsyncTableFindCursor(
            table=self,
            request_timeout_ms=_request_timeout_ms,
            overall_timeout_ms=None,
            request_timeout_label=_rt_label,
        )
        .filter(filter)
        .project(projection)
        .skip(skip)
        .limit(limit)
        .sort(sort)
        .include_similarity(include_similarity)
        .include_sort_vector(include_sort_vector)
    )
async def find_one(self, filter: FilterType | None = None, *, projection: ProjectionType | None = None, include_similarity: bool | None = None, sort: SortType | None = None, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> Optional[~ROW]

Run a search according to the given filtering and sorting criteria and return the top row matching it, or nothing if there are none.

The parameters are analogous to some of the parameters to the find method (which has a few more that do not make sense in this case, such as limit).

Args

filter
a dictionary expressing which condition the returned row must satisfy. The filter can use operators, such as "$eq" for equality, and require columns to compare with literal values. Simple examples are {} (zero filter), {"match_no": 123} (a shorthand for {"match_no": {"$eq": 123}}, or {"match_no": 123, "round": "C"} (multiple conditions are implicitly combined with "$and"). Please consult the Data API documentation for a more detailed explanation of table search filters and tips on their usage.
projection
a prescription on which columns to return for the matching row. The projection can take the form {"column1": True, "column2": True}. {"*": True} (i.e. return the whole row), or the complementary form that excludes columns: {"column1": False, "column2": False}. To optimize bandwidth usage, it is recommended to use a projection, especially to avoid unnecessary columns of type vector with high-dimensional embeddings.
include_similarity
a boolean to request the numeric value of the similarity to be returned as an added "$similarity" key in the returned row. It can be used meaningfully only in a vector search (see sort).
sort
this dictionary parameter controls the sorting order, hence determines which row is being returned. The sort parameter can express either a vector search or a regular (ascending/descending, even hierarchical) sorting. * For a vector search the parameter takes the form {"vector_column": qv}, with the query vector qv of the appropriate type (list of floats or DataAPIVector). If the table has automatic embedding generation ("vectorize") enabled on that column, the form {"vectorize_enabled_column": "query text"} is also valid. * In the case of non-vector sorting, the parameter specifies the column(s) and the ascending/descending ordering required. If multiple columns are provided, the sorting applies them hierarchically to the rows. Examples are {"score": SortMode.ASCENDING} (equivalently {"score": +1}), {"score": +1, "when": -1}. Note that, depending on the column(s) chosen for sorting, the table partitioning structure, and the presence of indexes, the sorting may be done in-memory by the API. In that case, there may be performance implications. Consult the Data API documentation for more details on this topic.
general_method_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for general_method_timeout_ms.
timeout_ms
an alias for general_method_timeout_ms.

Returns

a dictionary expressing the result if a row is found, otherwise None.

Examples

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> from astrapy.constants import SortMode
>>> from astrapy.data_types import DataAPITimestamp, DataAPIVector
>>>
>>> # Filter on the partitioning:
>>> asyncio.run(my_async_table.find_one({"match_id": "challenge6"}))
{'match_id': 'challenge6', 'round': 1, 'fighters': DataAPISet([]), ...
>>>
>>> # A find with no matches:
>>> str(asyncio.run(my_async_table.find_one({"match_id": "not_real"})))
'None'
>>>
>>> # Optimize bandwidth using a projection:
>>> asyncio.run(my_async_table.find_one(
...     {"match_id": "challenge6"},
...     projection={"round": True, "winner": True},
... ))
{'round': 1, 'winner': 'Donna'}
>>>
>>> # Filter on primary key:
>>> asyncio.run(
...     my_async_table.find_one({"match_id": "challenge6", "round": 1})
... )
{'match_id': 'challenge6', 'round': 1, 'fighters': DataAPISet([]), ...
>>>
>>> # Filter on a regular indexed column:
>>> asyncio.run(my_async_table.find_one({"winner": "Caio Gozer"}))
{'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
>>>
>>> # Non-equality filter on a regular indexed column:
>>> asyncio.run(my_async_table.find_one({"score": {"$gte": 15}}))
{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
>>>
>>> # Filter on a regular non-indexed column:
>>> # (not recommended performance-wise)
>>> asyncio.run(my_async_table.find_one(
...     {"when": {
...         "$gte": DataAPITimestamp.from_string("1999-12-31T01:23:44Z")
...     }}
... ))
The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
>>>
>>> # Empty filter:
>>> asyncio.run(my_async_table.find_one({}))
The Data API returned a warning: {'errorCode': 'ZERO_FILTER_OPERATIONS', ...
{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
>>>
>>> # Filter on the primary key and a regular non-indexed column:
>>> # (not recommended performance-wise)
>>> asyncio.run(my_async_table.find_one(
...     {"match_id": "fight5", "round": 3, "winner": "Caio Gozer"}
... ))
The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
{'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
>>>
>>> # Filter on a regular non-indexed column (and incomplete primary key)
>>> # (not recommended performance-wise)
>>> asyncio.run(
...     my_async_table.find_one({"round": 3, "winner": "Caio Gozer"})
... )
The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
{'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
>>>
>>> # Vector search with "sort" (on an appropriately-indexed vector column):
>>> asyncio.run(my_async_table.find_one(
...     {},
...     sort={"m_vector": DataAPIVector([0.2, 0.3, 0.4])},
...     projection={"winner": True},
... ))
{'winner': 'Donna'}
>>>
>>> # Hybrid search with vector sort and non-vector filtering:
>>> asyncio.run(my_table.find_one(
...     {"match_id": "fight4"},
...     sort={"m_vector": DataAPIVector([0.2, 0.3, 0.4])},
...     projection={"winner": True},
... ))
{'winner': 'Victor'}
>>>
>>> # Return the numeric value of the vector similarity
>>> # (also demonstrating that one can pass a plain list for a vector):
>>> asyncio.run(my_async_table.find_one(
...     {},
...     sort={"m_vector": [0.2, 0.3, 0.4]},
...     projection={"winner": True},
...     include_similarity=True,
... ))
{'winner': 'Donna', '$similarity': 0.515}
>>>
>>> # Non-vector sorting on a 'partitionSort' column:
>>> asyncio.run(my_async_table.find_one(
...     {"match_id": "fight5"},
...     sort={"round": SortMode.DESCENDING},
...     projection={"winner": True},
... ))
{'winner': 'Caio Gozer'}
>>>
>>> # Non-vector sorting on a regular column:
>>> # (not recommended performance-wise)
>>> asyncio.run(my_async_table.find_one(
...     {"match_id": "fight5"},
...     sort={"winner": SortMode.ASCENDING},
...     projection={"winner": True},
... ))
The Data API returned a warning: {'errorCode': 'IN_MEMORY_SORTING...
{'winner': 'Adam Zuul'}
Expand source code
async def find_one(
    self,
    filter: FilterType | None = None,
    *,
    projection: ProjectionType | None = None,
    include_similarity: bool | None = None,
    sort: SortType | None = None,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> ROW | None:
    """
    Run a search according to the given filtering and sorting criteria
    and return the top row matching it, or nothing if there are none.

    The parameters are analogous to some of the parameters to the `find` method
    (which has a few more that do not make sense in this case, such as `limit`).

    Args:
        filter: a dictionary expressing which condition the returned row
            must satisfy. The filter can use operators, such as "$eq" for equality,
            and require columns to compare with literal values. Simple examples
            are `{}` (zero filter), `{"match_no": 123}` (a shorthand for
            `{"match_no": {"$eq": 123}}`, or `{"match_no": 123, "round": "C"}`
            (multiple conditions are implicitly combined with "$and").
            Please consult the Data API documentation for a more detailed
            explanation of table search filters and tips on their usage.
        projection: a prescription on which columns to return for the matching row.
            The projection can take the form `{"column1": True, "column2": True}`.
            `{"*": True}` (i.e. return the whole row), or the complementary
            form that excludes columns: `{"column1": False, "column2": False}`.
            To optimize bandwidth usage, it is recommended to use a projection,
            especially to avoid unnecessary columns of type vector with
            high-dimensional embeddings.
        include_similarity: a boolean to request the numeric value of the
            similarity to be returned as an added "$similarity" key in the returned
            row. It can be used meaningfully only in a vector search (see `sort`).
        sort: this dictionary parameter controls the sorting order, hence determines
            which row is being returned.
            The sort parameter can express either a vector search or
            a regular (ascending/descending, even hierarchical) sorting.
            * For a vector search the parameter takes the form
            `{"vector_column": qv}`, with the query vector `qv` of the appropriate
            type (list of floats or DataAPIVector). If the table has automatic
            embedding generation ("vectorize") enabled on that column, the form
            `{"vectorize_enabled_column": "query text"}` is also valid.
            * In the case of non-vector sorting, the parameter specifies the
            column(s) and the ascending/descending ordering required.
            If multiple columns are provided, the sorting applies them
            hierarchically to the rows. Examples are `{"score": SortMode.ASCENDING}`
            (equivalently `{"score": +1}`), `{"score": +1, "when": -1}`.
            Note that, depending on the column(s) chosen for sorting, the table
            partitioning structure, and the presence of indexes, the sorting
            may be done in-memory by the API. In that case, there may be performance
            implications.
            Consult the Data API documentation for more details on this topic.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a dictionary expressing the result if a row is found, otherwise None.

    Examples:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> from astrapy.constants import SortMode
        >>> from astrapy.data_types import DataAPITimestamp, DataAPIVector
        >>>
        >>> # Filter on the partitioning:
        >>> asyncio.run(my_async_table.find_one({"match_id": "challenge6"}))
        {'match_id': 'challenge6', 'round': 1, 'fighters': DataAPISet([]), ...
        >>>
        >>> # A find with no matches:
        >>> str(asyncio.run(my_async_table.find_one({"match_id": "not_real"})))
        'None'
        >>>
        >>> # Optimize bandwidth using a projection:
        >>> asyncio.run(my_async_table.find_one(
        ...     {"match_id": "challenge6"},
        ...     projection={"round": True, "winner": True},
        ... ))
        {'round': 1, 'winner': 'Donna'}
        >>>
        >>> # Filter on primary key:
        >>> asyncio.run(
        ...     my_async_table.find_one({"match_id": "challenge6", "round": 1})
        ... )
        {'match_id': 'challenge6', 'round': 1, 'fighters': DataAPISet([]), ...
        >>>
        >>> # Filter on a regular indexed column:
        >>> asyncio.run(my_async_table.find_one({"winner": "Caio Gozer"}))
        {'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
        >>>
        >>> # Non-equality filter on a regular indexed column:
        >>> asyncio.run(my_async_table.find_one({"score": {"$gte": 15}}))
        {'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
        >>>
        >>> # Filter on a regular non-indexed column:
        >>> # (not recommended performance-wise)
        >>> asyncio.run(my_async_table.find_one(
        ...     {"when": {
        ...         "$gte": DataAPITimestamp.from_string("1999-12-31T01:23:44Z")
        ...     }}
        ... ))
        The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
        {'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
        >>>
        >>> # Empty filter:
        >>> asyncio.run(my_async_table.find_one({}))
        The Data API returned a warning: {'errorCode': 'ZERO_FILTER_OPERATIONS', ...
        {'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
        >>>
        >>> # Filter on the primary key and a regular non-indexed column:
        >>> # (not recommended performance-wise)
        >>> asyncio.run(my_async_table.find_one(
        ...     {"match_id": "fight5", "round": 3, "winner": "Caio Gozer"}
        ... ))
        The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
        {'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
        >>>
        >>> # Filter on a regular non-indexed column (and incomplete primary key)
        >>> # (not recommended performance-wise)
        >>> asyncio.run(
        ...     my_async_table.find_one({"round": 3, "winner": "Caio Gozer"})
        ... )
        The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
        {'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
        >>>
        >>> # Vector search with "sort" (on an appropriately-indexed vector column):
        >>> asyncio.run(my_async_table.find_one(
        ...     {},
        ...     sort={"m_vector": DataAPIVector([0.2, 0.3, 0.4])},
        ...     projection={"winner": True},
        ... ))
        {'winner': 'Donna'}
        >>>
        >>> # Hybrid search with vector sort and non-vector filtering:
        >>> asyncio.run(my_table.find_one(
        ...     {"match_id": "fight4"},
        ...     sort={"m_vector": DataAPIVector([0.2, 0.3, 0.4])},
        ...     projection={"winner": True},
        ... ))
        {'winner': 'Victor'}
        >>>
        >>> # Return the numeric value of the vector similarity
        >>> # (also demonstrating that one can pass a plain list for a vector):
        >>> asyncio.run(my_async_table.find_one(
        ...     {},
        ...     sort={"m_vector": [0.2, 0.3, 0.4]},
        ...     projection={"winner": True},
        ...     include_similarity=True,
        ... ))
        {'winner': 'Donna', '$similarity': 0.515}
        >>>
        >>> # Non-vector sorting on a 'partitionSort' column:
        >>> asyncio.run(my_async_table.find_one(
        ...     {"match_id": "fight5"},
        ...     sort={"round": SortMode.DESCENDING},
        ...     projection={"winner": True},
        ... ))
        {'winner': 'Caio Gozer'}
        >>>
        >>> # Non-vector sorting on a regular column:
        >>> # (not recommended performance-wise)
        >>> asyncio.run(my_async_table.find_one(
        ...     {"match_id": "fight5"},
        ...     sort={"winner": SortMode.ASCENDING},
        ...     projection={"winner": True},
        ... ))
        The Data API returned a warning: {'errorCode': 'IN_MEMORY_SORTING...
        {'winner': 'Adam Zuul'}
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    fo_options = (
        None
        if include_similarity is None
        else {"includeSimilarity": include_similarity}
    )
    fo_payload = self._converter_agent.preprocess_payload(
        {
            "findOne": {
                k: v
                for k, v in {
                    "filter": filter,
                    "projection": normalize_optional_projection(projection),
                    "options": fo_options,
                    "sort": sort,
                }.items()
                if v is not None
            }
        }
    )
    fo_response = await self._api_commander.async_request(
        payload=fo_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    if "document" not in (fo_response.get("data") or {}):
        raise UnexpectedDataAPIResponseException(
            text="Response from findOne API command missing 'document'.",
            raw_response=fo_response,
        )
    if "projectionSchema" not in (fo_response.get("status") or {}):
        raise UnexpectedDataAPIResponseException(
            text="Response from findOne API command missing 'projectionSchema'.",
            raw_response=fo_response,
        )
    doc_response = fo_response["data"]["document"]
    if doc_response is None:
        return None
    return self._converter_agent.postprocess_row(
        fo_response["data"]["document"],
        columns_dict=fo_response["status"]["projectionSchema"],
        similarity_pseudocolumn="$similarity" if include_similarity else None,
    )
async def info(self, *, database_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> TableInfo

Return information on the table. This should not be confused with the table definition (i.e. the schema).

Args

database_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying DevOps API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for database_admin_timeout_ms.
timeout_ms
an alias for database_admin_timeout_ms.

Returns

A TableInfo object for inspection.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> # Note: output reformatted for clarity.
>>> asyncio.run(my_async_table.info())
TableInfo(
    database_info=AstraDBDatabaseInfo(id=..., name=..., ...),
    keyspace='default_keyspace',
    name='games',
    full_name='default_keyspace.games'
)
Expand source code
async def info(
    self,
    *,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> TableInfo:
    """
    Return information on the table. This should not be confused with the table
    definition (i.e. the schema).

    Args:
        database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying DevOps API request.
            If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `database_admin_timeout_ms`.
        timeout_ms: an alias for `database_admin_timeout_ms`.

    Returns:
        A TableInfo object for inspection.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> # Note: output reformatted for clarity.
        >>> asyncio.run(my_async_table.info())
        TableInfo(
            database_info=AstraDBDatabaseInfo(id=..., name=..., ...),
            keyspace='default_keyspace',
            name='games',
            full_name='default_keyspace.games'
        )
    """

    db_info = await self.database.info(
        database_admin_timeout_ms=database_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    return TableInfo(
        database_info=db_info,
        keyspace=self.keyspace,
        name=self.name,
        full_name=self.full_name,
    )
async def insert_many(self, rows: Iterable[ROW], *, ordered: bool = False, chunk_size: int | None = None, concurrency: int | None = None, request_timeout_ms: int | None = None, general_method_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> TableInsertManyResult

Insert a number of rows into the table, with implied overwrite in case of primary key collision.

Inserting rows whose primary key correspond to entries alredy stored in the table has the effect of an in-place update: the rows are overwritten. However, if the rows being inserted are partially provided, i.e. some columns are not specified, these are left unchanged on the database. To explicitly reset them, specify their value as appropriate to their data type, i.e. None, {} or analogous.

Args

rows
an iterable of dictionaries, each expressing a row to insert. Each row must at least fully specify the primary key column values, while any other column may be omitted if desired (in which case it is left as is on DB). The values for the various columns supplied in each row must be of the right data type for the insertion to succeed. Non-primary-key columns can also be explicitly set to null.
ordered
if False (default), the insertions can occur in arbitrary order and possibly concurrently. If True, they are processed sequentially. If there are no specific reasons against it, unordered insertions re to be preferred as they complete much faster.
chunk_size
how many rows to include in each single API request. Exceeding the server maximum allowed value results in an error. Leave it unspecified (recommended) to use the system default.
concurrency
maximum number of concurrent requests to the API at a given time. It cannot be more than one for ordered insertions.
general_method_timeout_ms
a timeout, in milliseconds, to impose on the whole operation, which may consist of several API requests. If not provided, this object's defaults apply.
request_timeout_ms
a timeout, in milliseconds, to impose on each individual HTTP request to the Data API to accomplish the operation. If not provided, this object's defaults apply.
timeout_ms
an alias for general_method_timeout_ms.

Returns

a TableInsertManyResult object, whose attributes are the primary key of the inserted rows both in the form of dictionaries and of tuples.

Examples

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> # Insert complete and partial rows at once (concurrently)
>>> from astrapy.data_types import (
...     DataAPISet,
...     DataAPITimestamp,
...     DataAPIVector,
... )
>>> from astrapy.ids import UUID
>>>
>>> insert_result = asyncio.run(my_async_table.insert_many(
...     [
...         {
...             "match_id": "fight4",
...             "round": 1,
...             "winner": "Victor",
...             "score": 18,
...             "when": DataAPITimestamp.from_string(
...                 "2024-11-28T11:30:00Z",
...             ),
...             "fighters": DataAPISet([
...                 UUID("0193539a-2770-8c09-a32a-111111111111"),
...                 UUID('019353e3-00b4-83f9-a127-222222222222'),
...             ]),
...             "m_vector": DataAPIVector([0.4, -0.6, 0.2]),
...         },
...         {"match_id": "fight5", "round": 1, "winner": "Adam"},
...         {"match_id": "fight5", "round": 2, "winner": "Betta"},
...         {"match_id": "fight5", "round": 3, "winner": "Caio"},
...         {
...             "match_id": "challenge6",
...             "round": 1,
...             "winner": "Donna",
...             "m_vector": [0.9, -0.1, -0.3],
...         },
...         {"match_id": "challenge6", "round": 2, "winner": "Erick"},
...         {"match_id": "challenge6", "round": 3, "winner": "Fiona"},
...         {"match_id": "tournamentA", "round": 1, "winner": "Gael"},
...         {"match_id": "tournamentA", "round": 2, "winner": "Hanna"},
...         {
...             "match_id": "tournamentA",
...             "round": 3,
...             "winner": "Ian",
...             "fighters": DataAPISet([
...                 UUID("0193539a-2770-8c09-a32a-111111111111"),
...             ]),
...         },
...         {"match_id": "fight7", "round": 1, "winner": "Joy"},
...         {"match_id": "fight7", "round": 2, "winner": "Kevin"},
...         {"match_id": "fight7", "round": 3, "winner": "Lauretta"},
...     ],
...     concurrency=10,
...     chunk_size=3,
... ))
>>> insert_result.inserted_ids
[{'match_id': 'fight4', 'round': 1}, {'match_id': 'fight5', ...
>>> insert_result.inserted_id_tuples
[('fight4', 1), ('fight5', 1), ('fight5', 2), ('fight5', 3), ...
>>>
>>> # Ordered insertion
>>> # (would stop on first failure; predictable end result on DB)
>>> asyncio.run(my_async_table.insert_many(
...     [
...         {"match_id": "fight5", "round": 1, "winner": "Adam0"},
...         {"match_id": "fight5", "round": 2, "winner": "Betta0"},
...         {"match_id": "fight5", "round": 3, "winner": "Caio0"},
...         {"match_id": "fight5", "round": 1, "winner": "Adam Zuul"},
...         {"match_id": "fight5", "round": 2, "winner": "Betta Vigo"},
...         {"match_id": "fight5", "round": 3, "winner": "Caio Gozer"},
...     ],
...     ordered=True,
... ))
TableInsertManyResult(inserted_ids=[{'match_id': 'fight5', 'round': 1}, ...

Note

Unordered insertions are executed with some degree of concurrency, so it is usually better to prefer this mode unless the order in the row sequence is important.

Note

If some of the rows are unsuitable for insertion, for instance have the wrong data type for a column or lack the primary key, the Data API validation check will fail for those specific requests that contain the faulty rows. Depending on concurrency and the value of the ordered parameter, a number of rows in general could have been successfully inserted. It is possible to capture such a scenario, and inspect which rows actually got inserted, by catching an error of type TableInsertManyException: its partial_result attribute is precisely a TableInsertManyResult, encoding details on the successful writes.

Expand source code
async def insert_many(
    self,
    rows: Iterable[ROW],
    *,
    ordered: bool = False,
    chunk_size: int | None = None,
    concurrency: int | None = None,
    request_timeout_ms: int | None = None,
    general_method_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> TableInsertManyResult:
    """
    Insert a number of rows into the table,
    with implied overwrite in case of primary key collision.

    Inserting rows whose primary key correspond to entries alredy stored
    in the table has the effect of an in-place update: the rows are overwritten.
    However, if the rows being inserted are partially provided, i.e. some columns
    are not specified, these are left unchanged on the database. To explicitly
    reset them, specify their value as appropriate to their data type,
    i.e. `None`, `{}` or analogous.

    Args:
        rows: an iterable of dictionaries, each expressing a row to insert.
            Each row must at least fully specify the primary key column values,
            while any other column may be omitted if desired (in which case
            it is left as is on DB).
            The values for the various columns supplied in each row must
            be of the right data type for the insertion to succeed.
            Non-primary-key columns can also be explicitly set to null.
        ordered: if False (default), the insertions can occur in arbitrary order
            and possibly concurrently. If True, they are processed sequentially.
            If there are no specific reasons against it, unordered insertions
            re to be preferred as they complete much faster.
        chunk_size: how many rows to include in each single API request.
            Exceeding the server maximum allowed value results in an error.
            Leave it unspecified (recommended) to use the system default.
        concurrency: maximum number of concurrent requests to the API at
            a given time. It cannot be more than one for ordered insertions.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            whole operation, which may consist of several API requests.
            If not provided, this object's defaults apply.
        request_timeout_ms: a timeout, in milliseconds, to impose on each
            individual HTTP request to the Data API to accomplish the operation.
            If not provided, this object's defaults apply.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a TableInsertManyResult object, whose attributes are the primary key
        of the inserted rows both in the form of dictionaries and of tuples.

    Examples:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> # Insert complete and partial rows at once (concurrently)
        >>> from astrapy.data_types import (
        ...     DataAPISet,
        ...     DataAPITimestamp,
        ...     DataAPIVector,
        ... )
        >>> from astrapy.ids import UUID
        >>>
        >>> insert_result = asyncio.run(my_async_table.insert_many(
        ...     [
        ...         {
        ...             "match_id": "fight4",
        ...             "round": 1,
        ...             "winner": "Victor",
        ...             "score": 18,
        ...             "when": DataAPITimestamp.from_string(
        ...                 "2024-11-28T11:30:00Z",
        ...             ),
        ...             "fighters": DataAPISet([
        ...                 UUID("0193539a-2770-8c09-a32a-111111111111"),
        ...                 UUID('019353e3-00b4-83f9-a127-222222222222'),
        ...             ]),
        ...             "m_vector": DataAPIVector([0.4, -0.6, 0.2]),
        ...         },
        ...         {"match_id": "fight5", "round": 1, "winner": "Adam"},
        ...         {"match_id": "fight5", "round": 2, "winner": "Betta"},
        ...         {"match_id": "fight5", "round": 3, "winner": "Caio"},
        ...         {
        ...             "match_id": "challenge6",
        ...             "round": 1,
        ...             "winner": "Donna",
        ...             "m_vector": [0.9, -0.1, -0.3],
        ...         },
        ...         {"match_id": "challenge6", "round": 2, "winner": "Erick"},
        ...         {"match_id": "challenge6", "round": 3, "winner": "Fiona"},
        ...         {"match_id": "tournamentA", "round": 1, "winner": "Gael"},
        ...         {"match_id": "tournamentA", "round": 2, "winner": "Hanna"},
        ...         {
        ...             "match_id": "tournamentA",
        ...             "round": 3,
        ...             "winner": "Ian",
        ...             "fighters": DataAPISet([
        ...                 UUID("0193539a-2770-8c09-a32a-111111111111"),
        ...             ]),
        ...         },
        ...         {"match_id": "fight7", "round": 1, "winner": "Joy"},
        ...         {"match_id": "fight7", "round": 2, "winner": "Kevin"},
        ...         {"match_id": "fight7", "round": 3, "winner": "Lauretta"},
        ...     ],
        ...     concurrency=10,
        ...     chunk_size=3,
        ... ))
        >>> insert_result.inserted_ids
        [{'match_id': 'fight4', 'round': 1}, {'match_id': 'fight5', ...
        >>> insert_result.inserted_id_tuples
        [('fight4', 1), ('fight5', 1), ('fight5', 2), ('fight5', 3), ...
        >>>
        >>> # Ordered insertion
        >>> # (would stop on first failure; predictable end result on DB)
        >>> asyncio.run(my_async_table.insert_many(
        ...     [
        ...         {"match_id": "fight5", "round": 1, "winner": "Adam0"},
        ...         {"match_id": "fight5", "round": 2, "winner": "Betta0"},
        ...         {"match_id": "fight5", "round": 3, "winner": "Caio0"},
        ...         {"match_id": "fight5", "round": 1, "winner": "Adam Zuul"},
        ...         {"match_id": "fight5", "round": 2, "winner": "Betta Vigo"},
        ...         {"match_id": "fight5", "round": 3, "winner": "Caio Gozer"},
        ...     ],
        ...     ordered=True,
        ... ))
        TableInsertManyResult(inserted_ids=[{'match_id': 'fight5', 'round': 1}, ...

    Note:
        Unordered insertions are executed with some degree of concurrency,
        so it is usually better to prefer this mode unless the order in the
        row sequence is important.

    Note:
        If some of the rows are unsuitable for insertion, for instance
        have the wrong data type for a column or lack the primary key,
        the Data API validation check will fail for those specific requests
        that contain the faulty rows. Depending on concurrency and the value
        of the `ordered` parameter, a number of rows in general could have
        been successfully inserted.
        It is possible to capture such a scenario, and inspect which rows
        actually got inserted, by catching an error of type
        `astrapy.exceptions.TableInsertManyException`: its `partial_result`
        attribute is precisely a `TableInsertManyResult`, encoding details
        on the successful writes.
    """

    _general_method_timeout_ms, _gmt_label = _first_valid_timeout(
        (general_method_timeout_ms, "general_method_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (
            self.api_options.timeout_options.general_method_timeout_ms,
            "general_method_timeout_ms",
        ),
    )
    _request_timeout_ms, _rt_label = _first_valid_timeout(
        (request_timeout_ms, "request_timeout_ms"),
        (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
    )
    if concurrency is None:
        if ordered:
            _concurrency = 1
        else:
            _concurrency = DEFAULT_INSERT_MANY_CONCURRENCY
    else:
        _concurrency = concurrency
    if _concurrency > 1 and ordered:
        raise ValueError("Cannot run ordered insert_many concurrently.")
    if chunk_size is None:
        _chunk_size = DEFAULT_INSERT_MANY_CHUNK_SIZE
    else:
        _chunk_size = chunk_size
    _rows = list(rows)
    logger.info(f"inserting {len(_rows)} rows in '{self.name}'")
    raw_results: list[dict[str, Any]] = []
    timeout_manager = MultiCallTimeoutManager(
        overall_timeout_ms=_general_method_timeout_ms,
        timeout_label=_gmt_label,
    )
    if ordered:
        options = {"ordered": True}
        inserted_ids: list[Any] = []
        inserted_id_tuples: list[Any] = []
        for i in range(0, len(_rows), _chunk_size):
            im_payload = self._converter_agent.preprocess_payload(
                {
                    "insertMany": {
                        "documents": _rows[i : i + _chunk_size],
                        "options": options,
                    },
                },
            )
            logger.info(f"insertMany(chunk) on '{self.name}'")
            chunk_response = await self._api_commander.async_request(
                payload=im_payload,
                raise_api_errors=False,
                timeout_context=timeout_manager.remaining_timeout(
                    cap_time_ms=_request_timeout_ms,
                    cap_timeout_label=_rt_label,
                ),
            )
            logger.info(f"finished insertMany(chunk) on '{self.name}'")
            # accumulate the results in this call
            chunk_inserted_ids, chunk_inserted_ids_tuples = (
                self._prepare_keys_from_status(chunk_response.get("status"))
            )
            inserted_ids += chunk_inserted_ids
            inserted_id_tuples += chunk_inserted_ids_tuples
            raw_results += [chunk_response]
            # if errors, quit early
            if chunk_response.get("errors", []):
                partial_result = TableInsertManyResult(
                    raw_results=raw_results,
                    inserted_ids=inserted_ids,
                    inserted_id_tuples=inserted_id_tuples,
                )
                raise TableInsertManyException.from_response(
                    command=None,
                    raw_response=chunk_response,
                    partial_result=partial_result,
                )

        # return
        full_result = TableInsertManyResult(
            raw_results=raw_results,
            inserted_ids=inserted_ids,
            inserted_id_tuples=inserted_id_tuples,
        )
        logger.info(f"finished inserting {len(_rows)} rows in '{self.name}'")
        return full_result

    else:
        # unordered: concurrent or not, do all of them and parse the results
        options = {"ordered": False}

        sem = asyncio.Semaphore(_concurrency)

        async def concurrent_insert_chunk(
            row_chunk: list[ROW],
        ) -> dict[str, Any]:
            async with sem:
                im_payload = self._converter_agent.preprocess_payload(
                    {
                        "insertMany": {
                            "documents": row_chunk,
                            "options": options,
                        },
                    },
                )
                logger.info(f"insertMany(chunk) on '{self.name}'")
                im_response = await self._api_commander.async_request(
                    payload=im_payload,
                    raise_api_errors=False,
                    timeout_context=timeout_manager.remaining_timeout(
                        cap_time_ms=_request_timeout_ms,
                        cap_timeout_label=_rt_label,
                    ),
                )
                logger.info(f"finished insertMany(chunk) on '{self.name}'")
                return im_response

        if _concurrency > 1:
            tasks = [
                asyncio.create_task(
                    concurrent_insert_chunk(_rows[i : i + _chunk_size])
                )
                for i in range(0, len(_rows), _chunk_size)
            ]
            raw_results = await asyncio.gather(*tasks)
        else:
            raw_results = [
                await concurrent_insert_chunk(_rows[i : i + _chunk_size])
                for i in range(0, len(_rows), _chunk_size)
            ]

        # recast raw_results. Each response has its schema: unfold appropriately
        ids_and_tuples_per_chunk = [
            self._prepare_keys_from_status(chunk_response.get("status"))
            for chunk_response in raw_results
        ]
        inserted_ids = [
            inserted_id
            for chunk_ids, _ in ids_and_tuples_per_chunk
            for inserted_id in chunk_ids
        ]
        inserted_id_tuples = [
            inserted_id_tuple
            for _, chunk_id_tuples in ids_and_tuples_per_chunk
            for inserted_id_tuple in chunk_id_tuples
        ]
        # check-raise
        if any(
            [chunk_response.get("errors", []) for chunk_response in raw_results]
        ):
            partial_result = TableInsertManyResult(
                raw_results=raw_results,
                inserted_ids=inserted_ids,
                inserted_id_tuples=inserted_id_tuples,
            )
            raise TableInsertManyException.from_responses(
                commands=[None for _ in raw_results],
                raw_responses=raw_results,
                partial_result=partial_result,
            )

        # return
        full_result = TableInsertManyResult(
            raw_results=raw_results,
            inserted_ids=inserted_ids,
            inserted_id_tuples=inserted_id_tuples,
        )
        logger.info(f"finished inserting {len(_rows)} rows in '{self.name}'")
        return full_result
async def insert_one(self, row: ROW, *, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> TableInsertOneResult

Insert a single row in the table, with implied overwrite in case of primary key collision.

Inserting a row whose primary key correspond to an entry alredy stored in the table has the effect of an in-place update: the row is overwritten. However, if the row being inserted is partially provided, i.e. some columns are not specified, these are left unchanged on the database. To explicitly reset them, specify their value as appropriate to their data type, i.e. None, {} or analogous.

Args

row
a dictionary expressing the row to insert. The primary key must be specified in full, while any other column may be omitted if desired (in which case it is left as is on DB). The values for the various columns supplied in the row must be of the right data type for the insertion to succeed. Non-primary-key columns can also be explicitly set to null.
general_method_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for general_method_timeout_ms.
timeout_ms
an alias for general_method_timeout_ms.

Returns

a TableInsertOneResult object, whose attributes are the primary key of the inserted row both in the form of a dictionary and of a tuple.

Examples

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> # a full-row insert using astrapy's datatypes
>>> from astrapy.data_types import (
...     DataAPISet,
...     DataAPITimestamp,
...     DataAPIVector,
... )
>>> from astrapy.ids import UUID
>>>
>>> insert_result = asyncio.run(my_async_table.insert_one(
...     {
...         "match_id": "mtch_0",
...         "round": 1,
...         "m_vector": DataAPIVector([0.4, -0.6, 0.2]),
...         "score": 18,
...         "when": DataAPITimestamp.from_string("2024-11-28T11:30:00Z"),
...         "winner": "Victor",
...         "fighters": DataAPISet([
...             UUID("0193539a-2770-8c09-a32a-111111111111"),
...         ]),
...     },
... ))
>>> insert_result.inserted_id
{'match_id': 'mtch_0', 'round': 1}
>>> insert_result.inserted_id_tuple
('mtch_0', 1)
>>>
>>> # a partial-row (which in this case overwrites some of the values)
>>> asyncio.run(my_async_table.insert_one(
...     {
...         "match_id": "mtch_0",
...         "round": 1,
...         "winner": "Victor Vector",
...         "fighters": DataAPISet([
...             UUID("0193539a-2770-8c09-a32a-111111111111"),
...             UUID("0193539a-2880-8875-9f07-222222222222"),
...         ]),
...     },
... ))
TableInsertOneResult(inserted_id={'match_id': 'mtch_0', 'round': 1} ...
>>>
>>> # another insertion demonstrating standard-library datatypes in values
>>> import datetime
>>>
>>> asyncio.run(my_async_table.insert_one(
...     {
...         "match_id": "mtch_0",
...         "round": 2,
...         "winner": "Angela",
...         "score": 25,
...         "when": datetime.datetime(
...             2024, 7, 13, 12, 55, 30, 889,
...             tzinfo=datetime.timezone.utc,
...         ),
...         "fighters": {
...             UUID("019353cb-8e01-8276-a190-333333333333"),
...         },
...         "m_vector": [0.4, -0.6, 0.2],
...     },
... ))
TableInsertOneResult(inserted_id={'match_id': 'mtch_0', 'round': 2}, ...
Expand source code
async def insert_one(
    self,
    row: ROW,
    *,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> TableInsertOneResult:
    """
    Insert a single row in the table,
    with implied overwrite in case of primary key collision.

    Inserting a row whose primary key correspond to an entry alredy stored
    in the table has the effect of an in-place update: the row is overwritten.
    However, if the row being inserted is partially provided, i.e. some columns
    are not specified, these are left unchanged on the database. To explicitly
    reset them, specify their value as appropriate to their data type,
    i.e. `None`, `{}` or analogous.

    Args:
        row: a dictionary expressing the row to insert. The primary key
            must be specified in full, while any other column may be omitted
            if desired (in which case it is left as is on DB).
            The values for the various columns supplied in the row must
            be of the right data type for the insertion to succeed.
            Non-primary-key columns can also be explicitly set to null.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a TableInsertOneResult object, whose attributes are the primary key
        of the inserted row both in the form of a dictionary and of a tuple.

    Examples:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> # a full-row insert using astrapy's datatypes
        >>> from astrapy.data_types import (
        ...     DataAPISet,
        ...     DataAPITimestamp,
        ...     DataAPIVector,
        ... )
        >>> from astrapy.ids import UUID
        >>>
        >>> insert_result = asyncio.run(my_async_table.insert_one(
        ...     {
        ...         "match_id": "mtch_0",
        ...         "round": 1,
        ...         "m_vector": DataAPIVector([0.4, -0.6, 0.2]),
        ...         "score": 18,
        ...         "when": DataAPITimestamp.from_string("2024-11-28T11:30:00Z"),
        ...         "winner": "Victor",
        ...         "fighters": DataAPISet([
        ...             UUID("0193539a-2770-8c09-a32a-111111111111"),
        ...         ]),
        ...     },
        ... ))
        >>> insert_result.inserted_id
        {'match_id': 'mtch_0', 'round': 1}
        >>> insert_result.inserted_id_tuple
        ('mtch_0', 1)
        >>>
        >>> # a partial-row (which in this case overwrites some of the values)
        >>> asyncio.run(my_async_table.insert_one(
        ...     {
        ...         "match_id": "mtch_0",
        ...         "round": 1,
        ...         "winner": "Victor Vector",
        ...         "fighters": DataAPISet([
        ...             UUID("0193539a-2770-8c09-a32a-111111111111"),
        ...             UUID("0193539a-2880-8875-9f07-222222222222"),
        ...         ]),
        ...     },
        ... ))
        TableInsertOneResult(inserted_id={'match_id': 'mtch_0', 'round': 1} ...
        >>>
        >>> # another insertion demonstrating standard-library datatypes in values
        >>> import datetime
        >>>
        >>> asyncio.run(my_async_table.insert_one(
        ...     {
        ...         "match_id": "mtch_0",
        ...         "round": 2,
        ...         "winner": "Angela",
        ...         "score": 25,
        ...         "when": datetime.datetime(
        ...             2024, 7, 13, 12, 55, 30, 889,
        ...             tzinfo=datetime.timezone.utc,
        ...         ),
        ...         "fighters": {
        ...             UUID("019353cb-8e01-8276-a190-333333333333"),
        ...         },
        ...         "m_vector": [0.4, -0.6, 0.2],
        ...     },
        ... ))
        TableInsertOneResult(inserted_id={'match_id': 'mtch_0', 'round': 2}, ...
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    io_payload = self._converter_agent.preprocess_payload(
        {"insertOne": {"document": row}}
    )
    logger.info(f"insertOne on '{self.name}'")
    io_response = await self._api_commander.async_request(
        payload=io_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished insertOne on '{self.name}'")
    if "insertedIds" in io_response.get("status", {}):
        if not io_response["status"]["insertedIds"]:
            raise UnexpectedDataAPIResponseException(
                text="Response from insertOne API command has empty 'insertedIds'.",
                raw_response=io_response,
            )
        if not io_response["status"]["primaryKeySchema"]:
            raise UnexpectedDataAPIResponseException(
                text="Response from insertOne API command has empty 'primaryKeySchema'.",
                raw_response=io_response,
            )
        inserted_id_list = io_response["status"]["insertedIds"][0]
        inserted_id_tuple, inserted_id = self._converter_agent.postprocess_key(
            inserted_id_list,
            primary_key_schema_dict=io_response["status"]["primaryKeySchema"],
        )
        return TableInsertOneResult(
            raw_results=[io_response],
            inserted_id=inserted_id,
            inserted_id_tuple=inserted_id_tuple,
        )
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from insertOne API command.",
            raw_response=io_response,
        )
async def list_index_names(self, *, table_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> list[str]

List the names of all indexes existing on this table.

Args

table_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for table_admin_timeout_ms.
timeout_ms
an alias for table_admin_timeout_ms.

Returns

a list of the index names as strings, in no particular order.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> asyncio.run(my_async_table.list_index_names())
['m_vector_index', 'winner_index', 'score_index']
Expand source code
async def list_index_names(
    self,
    *,
    table_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> list[str]:
    """
    List the names of all indexes existing on this table.

    Args:
        table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `table_admin_timeout_ms`.
        timeout_ms: an alias for `table_admin_timeout_ms`.

    Returns:
        a list of the index names as strings, in no particular order.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> asyncio.run(my_async_table.list_index_names())
        ['m_vector_index', 'winner_index', 'score_index']
    """

    _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
        timeout_options=self.api_options.timeout_options,
        table_admin_timeout_ms=table_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    li_payload: dict[str, Any] = {"listIndexes": {"options": {}}}
    logger.info("listIndexes")
    li_response = await self._api_commander.async_request(
        payload=li_payload,
        timeout_context=_TimeoutContext(
            request_ms=_table_admin_timeout_ms, label=_ta_label
        ),
    )
    if "indexes" not in li_response.get("status", {}):
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from listIndexes API command.",
            raw_response=li_response,
        )
    else:
        logger.info("finished listIndexes")
        return li_response["status"]["indexes"]  # type: ignore[no-any-return]
async def list_indexes(self, *, table_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> list[TableIndexDescriptor]

List the full definitions of all indexes existing on this table.

Args

table_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for table_admin_timeout_ms.
timeout_ms
an alias for table_admin_timeout_ms.

Returns

a list of TableIndexDescriptor objects in no particular order, each providing the details of an index present on the table.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> indexes = asyncio.run(my_async_table.list_indexes())
>>> indexes
[TableIndexDescriptor(name='m_vector_index', definition=...)...]
>>> # (Note: shortened output above)
>>> indexes[1].definition.column
'winner'
>>> indexes[1].definition.options.case_sensitive
False
Expand source code
async def list_indexes(
    self,
    *,
    table_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> list[TableIndexDescriptor]:
    """
    List the full definitions of all indexes existing on this table.

    Args:
        table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `table_admin_timeout_ms`.
        timeout_ms: an alias for `table_admin_timeout_ms`.

    Returns:
        a list of `astrapy.info.TableIndexDescriptor` objects in no particular
        order, each providing the details of an index present on the table.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> indexes = asyncio.run(my_async_table.list_indexes())
        >>> indexes
        [TableIndexDescriptor(name='m_vector_index', definition=...)...]
        >>> # (Note: shortened output above)
        >>> indexes[1].definition.column
        'winner'
        >>> indexes[1].definition.options.case_sensitive
        False
    """

    _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
        timeout_options=self.api_options.timeout_options,
        table_admin_timeout_ms=table_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    li_payload: dict[str, Any] = {"listIndexes": {"options": {"explain": True}}}
    logger.info("listIndexes")
    li_response = await self._api_commander.async_request(
        payload=li_payload,
        timeout_context=_TimeoutContext(
            request_ms=_table_admin_timeout_ms, label=_ta_label
        ),
    )
    if "indexes" not in li_response.get("status", {}):
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from listIndexes API command.",
            raw_response=li_response,
        )
    else:
        logger.info("finished listIndexes")
        return [
            TableIndexDescriptor.coerce(index_object)
            for index_object in li_response["status"]["indexes"]
        ]
def to_sync(self: AsyncTable[ROW], *, embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = (unset), api_options: APIOptions | UnsetType = (unset)) ‑> Table[ROW]

Create a Table from this one. Save for the arguments explicitly provided as overrides, everything else is kept identical to this table in the copy (the database is converted into an async object).

Args

embedding_api_key
optional API key(s) for interacting with the table. If an embedding service is configured, and this parameter is not None, each Data API call will include the necessary embedding-related headers as specified by this parameter. If a string is passed, it translates into the one "embedding api key" header (i.e. EmbeddingAPIKeyHeaderProvider). For some vectorize providers/models, if using header-based authentication, specialized subclasses of EmbeddingHeadersProvider should be supplied.
api_options
any additional options to set for the result, in the form of an APIOptions instance (where one can set just the needed attributes). In case the same setting is also provided as named parameter, the latter takes precedence.

Returns

the new copy, a Table instance.

Example

>>> my_async_table.to_sync().find_one(
...     {"match_id": "fight4"},
...     projection={"winner": True},
... )
{"pk": 1, "column": "value}
Expand source code
def to_sync(
    self: AsyncTable[ROW],
    *,
    embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
    api_options: APIOptions | UnsetType = _UNSET,
) -> Table[ROW]:
    """
    Create a Table from this one. Save for the arguments
    explicitly provided as overrides, everything else is kept identical
    to this table in the copy (the database is converted into
    an async object).

    Args:
        embedding_api_key: optional API key(s) for interacting with the table.
            If an embedding service is configured, and this parameter is not None,
            each Data API call will include the necessary embedding-related headers
            as specified by this parameter. If a string is passed, it translates
            into the one "embedding api key" header
            (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
            For some vectorize providers/models, if using header-based authentication,
            specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
            should be supplied.
        api_options: any additional options to set for the result, in the form of
            an APIOptions instance (where one can set just the needed attributes).
            In case the same setting is also provided as named parameter,
            the latter takes precedence.

    Returns:
        the new copy, a Table instance.

    Example:
        >>> my_async_table.to_sync().find_one(
        ...     {"match_id": "fight4"},
        ...     projection={"winner": True},
        ... )
        {"pk": 1, "column": "value}
    """

    arg_api_options = APIOptions(
        embedding_api_key=embedding_api_key,
    )
    final_api_options = self.api_options.with_override(api_options).with_override(
        arg_api_options
    )
    return Table(
        database=self.database.to_sync(),
        name=self.name,
        keyspace=self.keyspace,
        api_options=final_api_options,
    )
async def update_one(self, filter: FilterType, update: dict[str, Any], *, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> None

Update a single document on the table, changing some or all of the columns, with the implicit behaviour of inserting a new row if no match is found.

Args

filter
a predicate expressing the table primary key in full, i.e. a dictionary defining values for all columns that form the primary key. An example may be {"match_id": "fight4", "round": 1}.
update
the update prescription to apply to the row, expressed as a dictionary conforming to the Data API syntax. The update operators for tables are $set and $unset (in particular, setting a column to None has the same effect as the $unset operator). Examples are {"$set": {"round": 12}} and {"$unset": {"winner": "", "score": ""}}. Note that the update operation cannot alter the primary key columns. See the Data API documentation for more details.
general_method_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for general_method_timeout_ms.
timeout_ms
an alias for general_method_timeout_ms.

Examples

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> from astrapy.data_types import DataAPISet
>>>
>>> # Set a new value for a column
>>> await my_async_table.update_one(
...     {"match_id": "fight4", "round": 1},
...     update={"$set": {"winner": "Winona"}},
... )
>>>
>>> # Set a new value for a column while unsetting another colum
>>> await my_async_table.update_one(
...     {"match_id": "fight4", "round": 1},
...     update={"$set": {"winner": None, "score": 24}},
... )
>>>
>>> # Set a 'set' column to empty
>>> await my_async_table.update_one(
...     {"match_id": "fight4", "round": 1},
...     update={"$set": {"fighters": DataAPISet()}},
... )
>>>
>>> # Set a 'set' column to empty using None
>>> await my_async_table.update_one(
...     {"match_id": "fight4", "round": 1},
...     update={"$set": {"fighters": None}},
... )
>>>
>>> # Set a 'set' column to empty using a regular (empty) set
>>> await my_async_table.update_one(
...     {"match_id": "fight4", "round": 1},
...     update={"$set": {"fighters": set()}},
... )
>>>
>>> # Set a 'set' column to empty using $unset
>>> await my_async_table.update_one(
...     {"match_id": "fight4", "round": 1},
...     update={"$unset": {"fighters": None}},
... )
>>>
>>> # A non-existing primary key creates a new row
>>> await my_async_table.update_one(
...     {"match_id": "bar_fight", "round": 4},
...     update={"$set": {"score": 8, "winner": "Jack"}},
... )
>>>
>>> # Delete column values for a row (they'll read as None now)
>>> await my_async_table.update_one(
...     {"match_id": "challenge6", "round": 2},
...     update={"$unset": {"winner": None, "score": None}},
... )

Note

a row created entirely with update operations (as opposed to insertions) may, correspondingly, be deleted by means of an $unset update on all columns.

Expand source code
async def update_one(
    self,
    filter: FilterType,
    update: dict[str, Any],
    *,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Update a single document on the table, changing some or all of the columns,
    with the implicit behaviour of inserting a new row if no match is found.

    Args:
        filter: a predicate expressing the table primary key in full,
            i.e. a dictionary defining values for all columns that form the
            primary key. An example may be `{"match_id": "fight4", "round": 1}`.
        update: the update prescription to apply to the row, expressed
            as a dictionary conforming to the Data API syntax. The update
            operators for tables are `$set` and `$unset` (in particular,
            setting a column to None has the same effect as the $unset operator).
            Examples are `{"$set": {"round": 12}}` and
            `{"$unset": {"winner": "", "score": ""}}`.
            Note that the update operation cannot alter the primary key columns.
            See the Data API documentation for more details.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Examples:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> from astrapy.data_types import DataAPISet
        >>>
        >>> # Set a new value for a column
        >>> await my_async_table.update_one(
        ...     {"match_id": "fight4", "round": 1},
        ...     update={"$set": {"winner": "Winona"}},
        ... )
        >>>
        >>> # Set a new value for a column while unsetting another colum
        >>> await my_async_table.update_one(
        ...     {"match_id": "fight4", "round": 1},
        ...     update={"$set": {"winner": None, "score": 24}},
        ... )
        >>>
        >>> # Set a 'set' column to empty
        >>> await my_async_table.update_one(
        ...     {"match_id": "fight4", "round": 1},
        ...     update={"$set": {"fighters": DataAPISet()}},
        ... )
        >>>
        >>> # Set a 'set' column to empty using None
        >>> await my_async_table.update_one(
        ...     {"match_id": "fight4", "round": 1},
        ...     update={"$set": {"fighters": None}},
        ... )
        >>>
        >>> # Set a 'set' column to empty using a regular (empty) set
        >>> await my_async_table.update_one(
        ...     {"match_id": "fight4", "round": 1},
        ...     update={"$set": {"fighters": set()}},
        ... )
        >>>
        >>> # Set a 'set' column to empty using $unset
        >>> await my_async_table.update_one(
        ...     {"match_id": "fight4", "round": 1},
        ...     update={"$unset": {"fighters": None}},
        ... )
        >>>
        >>> # A non-existing primary key creates a new row
        >>> await my_async_table.update_one(
        ...     {"match_id": "bar_fight", "round": 4},
        ...     update={"$set": {"score": 8, "winner": "Jack"}},
        ... )
        >>>
        >>> # Delete column values for a row (they'll read as None now)
        >>> await my_async_table.update_one(
        ...     {"match_id": "challenge6", "round": 2},
        ...     update={"$unset": {"winner": None, "score": None}},
        ... )

    Note:
        a row created entirely with update operations (as opposed to insertions)
        may, correspondingly, be deleted by means of an $unset update on all columns.
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    uo_payload = {
        "updateOne": {
            k: v
            for k, v in {
                "filter": filter,
                "update": self._converter_agent.preprocess_payload(update),
            }.items()
            if v is not None
        }
    }
    logger.info(f"updateOne on '{self.name}'")
    uo_response = await self._api_commander.async_request(
        payload=uo_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished updateOne on '{self.name}'")
    if "status" in uo_response:
        # the contents are disregarded and the method just returns:
        return
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from updateOne API command.",
            raw_response=uo_response,
        )
def with_options(self: AsyncTable[ROW], *, embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = (unset), api_options: APIOptions | UnsetType = (unset)) ‑> AsyncTable[ROW]

Create a clone of this table with some changed attributes.

Args

embedding_api_key
optional API key(s) for interacting with the table. If an embedding service is configured, and this parameter is not None, each Data API call will include the necessary embedding-related headers as specified by this parameter. If a string is passed, it translates into the one "embedding api key" header (i.e. EmbeddingAPIKeyHeaderProvider). For some vectorize providers/models, if using header-based authentication, specialized subclasses of EmbeddingHeadersProvider should be supplied.
api_options
any additional options to set for the clone, in the form of an APIOptions instance (where one can set just the needed attributes). In case the same setting is also provided as named parameter, the latter takes precedence.

Returns

a new AsyncTable instance.

Example

>>> table_with_api_key_configured = my_async_table.with_options(
...     embedding_api_key="secret-key-0123abcd...",
... )
Expand source code
def with_options(
    self: AsyncTable[ROW],
    *,
    embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
    api_options: APIOptions | UnsetType = _UNSET,
) -> AsyncTable[ROW]:
    """
    Create a clone of this table with some changed attributes.

    Args:
        embedding_api_key: optional API key(s) for interacting with the table.
            If an embedding service is configured, and this parameter is not None,
            each Data API call will include the necessary embedding-related headers
            as specified by this parameter. If a string is passed, it translates
            into the one "embedding api key" header
            (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
            For some vectorize providers/models, if using header-based authentication,
            specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
            should be supplied.
        api_options: any additional options to set for the clone, in the form of
            an APIOptions instance (where one can set just the needed attributes).
            In case the same setting is also provided as named parameter,
            the latter takes precedence.

    Returns:
        a new AsyncTable instance.

    Example:
        >>> table_with_api_key_configured = my_async_table.with_options(
        ...     embedding_api_key="secret-key-0123abcd...",
        ... )
    """

    return self._copy(
        embedding_api_key=embedding_api_key,
        api_options=api_options,
    )
class Collection (*, database: Database, name: str, keyspace: str | None, api_options: FullAPIOptions)

A Data API collection, the object to interact with the Data API for unstructured (schemaless) data, especially for DDL operations. This class has a synchronous interface.

This class is not meant for direct instantiation by the user, rather it is obtained by invoking methods such as get_collection of Database, wherefrom the Collection inherits its API options such as authentication token and API endpoint.

Args

database
a Database object, instantiated earlier. This represents the database the collection belongs to.
name
the collection name. This parameter should match an existing collection on the database.
keyspace
this is the keyspace to which the collection belongs. If nothing is specified, the database's working keyspace is used.
api_options
a complete specification of the API Options for this instance.

Examples

>>> from astrapy import DataAPIClient
>>> client = DataAPIClient()
>>> database = client.get_database(
...     "https://01234567-....apps.astra.datastax.com",
...     token="AstraCS:..."
... )
>>> # Create a collection using the fluent syntax for its definition
>>> from astrapy.constants import VectorMetric
>>> from astrapy.info import CollectionDefinition
>>>
>>> collection_definition = (
...     CollectionDefinition.builder()
...     .set_vector_dimension(3)
...     .set_vector_metric(VectorMetric.DOT_PRODUCT)
...     .set_indexing("deny", ["annotations", "logs"])
...     .build()
... )
>>> my_collection = database.create_collection(
...     "my_events",
...     definition=collection_definition,
... )
>>> # Create a collection with the definition as object
>>> from astrapy.info import CollectionVectorOptions
>>>
>>> collection_definition_1 = CollectionDefinition(
...     vector=CollectionVectorOptions(
...         dimension=3,
...         metric=VectorMetric.DOT_PRODUCT,
...     ),
...     indexing={"deny": ["annotations", "logs"]},
... )
>>> my_collection_1 = database.create_collection(
...     "my_events",
...     definition=collection_definition_1,
... )
>>>
>>> # Create a collection with the definition as plain dictionary
>>> collection_definition_2 = {
...     "indexing": {"deny": ["annotations", "logs"]},
...     "vector": {
...         "dimension": 3,
...         "metric": VectorMetric.DOT_PRODUCT,
...     },
... }
>>> my_collection_2 = database.create_collection(
...     "my_events",
...     definition=collection_definition_2,
... )
>>> # Get a reference to an existing collection
>>> # (no checks are performed on DB)
>>> my_collection_3a = database.get_collection("my_events")
>>> my_collection_3b = database.my_events
>>> my_collection_3c = database["my_events"]

Note

creating an instance of Collection does not trigger actual creation of the collection on the database. The latter should have been created beforehand, e.g. through the create_collection method of a Database.

Expand source code
class Collection(Generic[DOC]):
    """
    A Data API collection, the object to interact with the Data API for unstructured
    (schemaless) data, especially for DDL operations.
    This class has a synchronous interface.

    This class is not meant for direct instantiation by the user, rather
    it is obtained by invoking methods such as `get_collection` of Database,
    wherefrom the Collection inherits its API options such as authentication
    token and API endpoint.

    Args:
        database: a Database object, instantiated earlier. This represents
            the database the collection belongs to.
        name: the collection name. This parameter should match an existing
            collection on the database.
        keyspace: this is the keyspace to which the collection belongs.
            If nothing is specified, the database's working keyspace is used.
        api_options: a complete specification of the API Options for this instance.

    Examples:
        >>> from astrapy import DataAPIClient
        >>> client = DataAPIClient()
        >>> database = client.get_database(
        ...     "https://01234567-....apps.astra.datastax.com",
        ...     token="AstraCS:..."
        ... )

        >>> # Create a collection using the fluent syntax for its definition
        >>> from astrapy.constants import VectorMetric
        >>> from astrapy.info import CollectionDefinition
        >>>
        >>> collection_definition = (
        ...     CollectionDefinition.builder()
        ...     .set_vector_dimension(3)
        ...     .set_vector_metric(VectorMetric.DOT_PRODUCT)
        ...     .set_indexing("deny", ["annotations", "logs"])
        ...     .build()
        ... )
        >>> my_collection = database.create_collection(
        ...     "my_events",
        ...     definition=collection_definition,
        ... )

        >>>
        >>> # Create a collection with the definition as object
        >>> from astrapy.info import CollectionVectorOptions
        >>>
        >>> collection_definition_1 = CollectionDefinition(
        ...     vector=CollectionVectorOptions(
        ...         dimension=3,
        ...         metric=VectorMetric.DOT_PRODUCT,
        ...     ),
        ...     indexing={"deny": ["annotations", "logs"]},
        ... )
        >>> my_collection_1 = database.create_collection(
        ...     "my_events",
        ...     definition=collection_definition_1,
        ... )
        >>>

        >>> # Create a collection with the definition as plain dictionary
        >>> collection_definition_2 = {
        ...     "indexing": {"deny": ["annotations", "logs"]},
        ...     "vector": {
        ...         "dimension": 3,
        ...         "metric": VectorMetric.DOT_PRODUCT,
        ...     },
        ... }
        >>> my_collection_2 = database.create_collection(
        ...     "my_events",
        ...     definition=collection_definition_2,
        ... )

        >>> # Get a reference to an existing collection
        >>> # (no checks are performed on DB)
        >>> my_collection_3a = database.get_collection("my_events")
        >>> my_collection_3b = database.my_events
        >>> my_collection_3c = database["my_events"]

    Note:
        creating an instance of Collection does not trigger actual creation
        of the collection on the database. The latter should have been created
        beforehand, e.g. through the `create_collection` method of a Database.
    """

    def __init__(
        self,
        *,
        database: Database,
        name: str,
        keyspace: str | None,
        api_options: FullAPIOptions,
    ) -> None:
        self.api_options = api_options
        self._name = name
        _keyspace = keyspace if keyspace is not None else database.keyspace

        if _keyspace is None:
            raise ValueError("Attempted to create Collection with 'keyspace' unset.")

        self._database = database._copy(
            keyspace=_keyspace, api_options=self.api_options
        )
        self._commander_headers = {
            **{DEFAULT_DATA_API_AUTH_HEADER: self.api_options.token.get_token()},
            **self.api_options.embedding_api_key.get_headers(),
            **self.api_options.database_additional_headers,
        }
        self._api_commander = self._get_api_commander()

    def __repr__(self) -> str:
        _db_desc = f'database.api_endpoint="{self.database.api_endpoint}"'
        return (
            f'{self.__class__.__name__}(name="{self.name}", '
            f'keyspace="{self.keyspace}", {_db_desc}, '
            f"api_options={self.api_options})"
        )

    def __eq__(self, other: Any) -> bool:
        if isinstance(other, Collection):
            return all(
                [
                    self._name == other._name,
                    self._database == other._database,
                    self.api_options == other.api_options,
                ]
            )
        else:
            return False

    def __call__(self, *pargs: Any, **kwargs: Any) -> None:
        raise TypeError(
            f"'{self.__class__.__name__}' object is not callable. If you "
            f"meant to call the '{self.name}' method on a "
            f"'{self.database.__class__.__name__}' object "
            "it is failing because no such method exists."
        )

    def _get_api_commander(self) -> APICommander:
        """Instantiate a new APICommander based on the properties of this class."""

        if self._database.keyspace is None:
            raise ValueError(
                "No keyspace specified. Collection requires a keyspace to "
                "be set, e.g. through the `keyspace` constructor parameter."
            )

        base_path_components = [
            comp
            for comp in (
                ncomp.strip("/")
                for ncomp in (
                    self._database.api_options.data_api_url_options.api_path,
                    self._database.api_options.data_api_url_options.api_version,
                    self._database.keyspace,
                    self._name,
                )
                if ncomp is not None
            )
            if comp != ""
        ]
        base_path = f"/{'/'.join(base_path_components)}"
        api_commander = APICommander(
            api_endpoint=self._database.api_endpoint,
            path=base_path,
            headers=self._commander_headers,
            callers=self.api_options.callers,
            redacted_header_names=self.api_options.redacted_header_names,
            handle_decimals_writes=(
                self.api_options.serdes_options.use_decimals_in_collections
            ),
            handle_decimals_reads=(
                self.api_options.serdes_options.use_decimals_in_collections
            ),
        )
        return api_commander

    def _converted_request(
        self,
        *,
        http_method: str = HttpMethod.POST,
        payload: dict[str, Any] | None = None,
        additional_path: str | None = None,
        request_params: dict[str, Any] = {},
        raise_api_errors: bool = True,
        timeout_context: _TimeoutContext,
    ) -> dict[str, Any]:
        converted_payload = preprocess_collection_payload(
            payload, options=self.api_options.serdes_options
        )
        raw_response_json = self._api_commander.request(
            http_method=http_method,
            payload=converted_payload,
            additional_path=additional_path,
            request_params=request_params,
            raise_api_errors=raise_api_errors,
            timeout_context=timeout_context,
        )
        response_json = postprocess_collection_response(
            raw_response_json, options=self.api_options.serdes_options
        )
        return response_json

    def _copy(
        self: Collection[DOC],
        *,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> Collection[DOC]:
        arg_api_options = APIOptions(
            embedding_api_key=embedding_api_key,
        )
        final_api_options = self.api_options.with_override(api_options).with_override(
            arg_api_options
        )
        return Collection(
            database=self.database,
            name=self.name,
            keyspace=self.keyspace,
            api_options=final_api_options,
        )

    def with_options(
        self: Collection[DOC],
        *,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> Collection[DOC]:
        """
        Create a clone of this collection with some changed attributes.

        Args:
            embedding_api_key: optional API key(s) for interacting with the collection.
                If an embedding service is configured, and this parameter is not None,
                each Data API call will include the necessary embedding-related headers
                as specified by this parameter. If a string is passed, it translates
                into the one "embedding api key" header
                (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
                For some vectorize providers/models, if using header-based authentication,
                specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
                should be supplied.
            api_options: any additional options to set for the clone, in the form of
                an APIOptions instance (where one can set just the needed attributes).
                In case the same setting is also provided as named parameter,
                the latter takes precedence.

        Returns:
            a new Collection instance.

        Example:
            >>> collection_with_api_key_configured = my_collection.with_options(
            ...     embedding_api_key="secret-key-0123abcd...",
            ... )
        """

        return self._copy(
            embedding_api_key=embedding_api_key,
            api_options=api_options,
        )

    def to_async(
        self: Collection[DOC],
        *,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncCollection[DOC]:
        """
        Create an AsyncCollection from this one. Save for the arguments
        explicitly provided as overrides, everything else is kept identical
        to this collection in the copy (the database is converted into
        an async object).

        Args:
            embedding_api_key: optional API key(s) for interacting with the collection.
                If an embedding service is configured, and this parameter is not None,
                each Data API call will include the necessary embedding-related headers
                as specified by this parameter. If a string is passed, it translates
                into the one "embedding api key" header
                (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
                For some vectorize providers/models, if using header-based authentication,
                specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
                should be supplied.
            api_options: any additional options to set for the result, in the form of
                an APIOptions instance (where one can set just the needed attributes).
                In case the same setting is also provided as named parameter,
                the latter takes precedence.

        Returns:
            the new copy, an AsyncCollection instance.

        Example:
            >>> asyncio.run(my_coll.to_async().count_documents({},upper_bound=100))
            77
        """

        arg_api_options = APIOptions(
            embedding_api_key=embedding_api_key,
        )
        final_api_options = self.api_options.with_override(api_options).with_override(
            arg_api_options
        )
        return AsyncCollection(
            database=self.database.to_async(),
            name=self.name,
            keyspace=self.keyspace,
            api_options=final_api_options,
        )

    def options(
        self,
        *,
        collection_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> CollectionDefinition:
        """
        Get the collection options, i.e. its configuration as read from the database.

        The method issues a request to the Data API each time is invoked,
        without caching mechanisms: this ensures up-to-date information
        for usages such as real-time collection validation by the application.

        Args:
            collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `collection_admin_timeout_ms`.
            timeout_ms: an alias for `collection_admin_timeout_ms`.

        Returns:
            a CollectionDefinition instance describing the collection.
            (See also the database `list_collections` method.)

        Example:
            >>> my_coll.options()
            CollectionDefinition(vector=CollectionVectorOptions(dimension=3, metric='cosine'))
        """

        _collection_admin_timeout_ms, _ca_label = _select_singlereq_timeout_ca(
            timeout_options=self.api_options.timeout_options,
            collection_admin_timeout_ms=collection_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info(f"getting collections in search of '{self.name}'")
        self_descriptors = [
            coll_desc
            for coll_desc in self.database._list_collections_ctx(
                keyspace=None,
                timeout_context=_TimeoutContext(
                    request_ms=_collection_admin_timeout_ms,
                    label=_ca_label,
                ),
            )
            if coll_desc.name == self.name
        ]
        logger.info(f"finished getting collections in search of '{self.name}'")
        if self_descriptors:
            return self_descriptors[0].definition
        else:
            raise ValueError(
                f"Collection {self.keyspace}.{self.name} not found.",
            )

    def info(
        self,
        *,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> CollectionInfo:
        """
        Information on the collection (name, location, database), in the
        form of a CollectionInfo object.

        Not to be confused with the collection `options` method (related
        to the collection internal configuration).

        Args:
            database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying DevOps API request.
                If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `database_admin_timeout_ms`.
            timeout_ms: an alias for `database_admin_timeout_ms`.

        Example:
            >>> my_coll.info().database_info.region
            'eu-west-1'
            >>> my_coll.info().full_name
            'default_keyspace.my_v_collection'

        Note:
            the returned CollectionInfo wraps, among other things,
            the database information: as such, calling this method
            triggers the same-named method of a Database object (which, in turn,
            performs a HTTP request to the DevOps API).
            See the documentation for `Database.info()` for more details.
        """

        return CollectionInfo(
            database_info=self.database.info(
                database_admin_timeout_ms=database_admin_timeout_ms,
                request_timeout_ms=request_timeout_ms,
                timeout_ms=timeout_ms,
            ),
            keyspace=self.keyspace,
            name=self.name,
            full_name=self.full_name,
        )

    @property
    def database(self) -> Database:
        """
        a Database object, the database this collection belongs to.

        Example:
            >>> my_coll.database.name
            'the_application_database'
        """

        return self._database

    @property
    def keyspace(self) -> str:
        """
        The keyspace this collection is in.

        Example:
            >>> my_coll.keyspace
            'default_keyspace'
        """

        _keyspace = self.database.keyspace
        if _keyspace is None:
            raise ValueError("The collection's DB is set with keyspace=None")
        return _keyspace

    @property
    def name(self) -> str:
        """
        The name of this collection.

        Example:
            >>> my_coll.name
            'my_v_collection'
        """

        return self._name

    @property
    def full_name(self) -> str:
        """
        The fully-qualified collection name within the database,
        in the form "keyspace.collection_name".

        Example:
            >>> my_coll.full_name
            'default_keyspace.my_v_collection'
        """

        return f"{self.keyspace}.{self.name}"

    def insert_one(
        self,
        document: DOC,
        *,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> CollectionInsertOneResult:
        """
        Insert a single document in the collection in an atomic operation.

        Args:
            document: the dictionary expressing the document to insert.
                The `_id` field of the document can be left out, in which
                case it will be created automatically.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a CollectionInsertOneResult object.

        Examples:
            >>> my_coll.count_documents({}, upper_bound=10)
            0
            >>> my_coll.insert_one(
            ...     {
            ...         "age": 30,
            ...         "name": "Smith",
            ...         "food": ["pear", "peach"],
            ...         "likes_fruit": True,
            ...     },
            ... )
            CollectionInsertOneResult(raw_results=..., inserted_id='ed4587a4-...-...-...')
            >>> my_coll.insert_one({"_id": "user-123", "age": 50, "name": "Maccio"})
            CollectionInsertOneResult(raw_results=..., inserted_id='user-123')
            >>> my_coll.count_documents({}, upper_bound=10)
            2

            >>> my_coll.insert_one({"tag": "v", "$vector": [10, 11]})
            CollectionInsertOneResult(...)

        Note:
            If an `_id` is explicitly provided, which corresponds to a document
            that exists already in the collection, an error is raised and
            the insertion fails.
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        io_payload = {"insertOne": {"document": document}}
        logger.info(f"insertOne on '{self.name}'")
        io_response = self._converted_request(
            payload=io_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished insertOne on '{self.name}'")
        if "insertedIds" in io_response.get("status", {}):
            if io_response["status"]["insertedIds"]:
                inserted_id = io_response["status"]["insertedIds"][0]
                return CollectionInsertOneResult(
                    raw_results=[io_response],
                    inserted_id=inserted_id,
                )
            else:
                raise UnexpectedDataAPIResponseException(
                    text="Faulty response from insert_one API command.",
                    raw_response=io_response,
                )
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from insert_one API command.",
                raw_response=io_response,
            )

    def insert_many(
        self,
        documents: Iterable[DOC],
        *,
        ordered: bool = False,
        chunk_size: int | None = None,
        concurrency: int | None = None,
        request_timeout_ms: int | None = None,
        general_method_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> CollectionInsertManyResult:
        """
        Insert a list of documents into the collection.
        This is not an atomic operation.

        Args:
            documents: an iterable of dictionaries, each a document to insert.
                Documents may specify their `_id` field or leave it out, in which
                case it will be added automatically.
            ordered: if False (default), the insertions can occur in arbitrary order
                and possibly concurrently. If True, they are processed sequentially.
                If there are no specific reasons against it, unordered insertions are to
                be preferred as they complete much faster.
            chunk_size: how many documents to include in a single API request.
                Exceeding the server maximum allowed value results in an error.
                Leave it unspecified (recommended) to use the system default.
            concurrency: maximum number of concurrent requests to the API at
                a given time. It cannot be more than one for ordered insertions.
            general_method_timeout_ms: a timeout, in milliseconds, for the whole
                requested operation (which may involve multiple API requests).
                If not passed, the collection-level setting is used instead.
            request_timeout_ms: a timeout, in milliseconds, for each API request.
                If not passed, the collection-level setting is used instead.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a CollectionInsertManyResult object.

        Examples:
            >>> my_coll.count_documents({}, upper_bound=10)
            0
            >>> my_coll.insert_many(
            ...     [{"a": 10}, {"a": 5}, {"b": [True, False, False]}],
            ...     ordered=True,
            ... )
            CollectionInsertManyResult(raw_results=..., inserted_ids=['184bb06f-...', '...', '...'])
            >>> my_coll.count_documents({}, upper_bound=100)
            3
            >>> my_coll.insert_many(
            ...     [{"seq": i} for i in range(50)],
            ...     concurrency=5,
            ... )
            CollectionInsertManyResult(raw_results=..., inserted_ids=[... ...])
            >>> my_coll.count_documents({}, upper_bound=100)
            53
            >>> my_coll.insert_many(
            ...     [
            ...         {"tag": "a", "$vector": [1, 2]},
            ...         {"tag": "b", "$vector": [3, 4]},
            ...     ]
            ... )
            CollectionInsertManyResult(...)

        Note:
            Unordered insertions are executed with some degree of concurrency,
            so it is usually better to prefer this mode unless the order in the
            document sequence is important.

        Note:
            A failure mode for this command is related to certain faulty documents
            found among those to insert: a document may have the an `_id` already
            present on the collection, or its vector dimension may not
            match the collection setting.

            For an ordered insertion, the method will raise an exception at
            the first such faulty document -- nevertheless, all documents processed
            until then will end up being written to the database.

            For unordered insertions, if the error stems from faulty documents
            the insertion proceeds until exhausting the input documents: then,
            an exception is raised -- and all insertable documents will have been
            written to the database, including those "after" the troublesome ones.

            If, on the other hand, there are errors not related to individual
            documents (such as a network connectivity error), the whole
            `insert_many` operation will stop in mid-way, an exception will be raised,
            and only a certain amount of the input documents will
            have made their way to the database.
        """

        _general_method_timeout_ms, _gmt_label = _first_valid_timeout(
            (general_method_timeout_ms, "general_method_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (
                self.api_options.timeout_options.general_method_timeout_ms,
                "general_method_timeout_ms",
            ),
        )
        _request_timeout_ms, _rt_label = _first_valid_timeout(
            (request_timeout_ms, "request_timeout_ms"),
            (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
        )
        if concurrency is None:
            if ordered:
                _concurrency = 1
            else:
                _concurrency = DEFAULT_INSERT_MANY_CONCURRENCY
        else:
            _concurrency = concurrency
        if _concurrency > 1 and ordered:
            raise ValueError("Cannot run ordered insert_many concurrently.")
        if chunk_size is None:
            _chunk_size = DEFAULT_INSERT_MANY_CHUNK_SIZE
        else:
            _chunk_size = chunk_size
        _documents = list(documents)
        logger.info(f"inserting {len(_documents)} documents in '{self.name}'")
        raw_results: list[dict[str, Any]] = []
        timeout_manager = MultiCallTimeoutManager(
            overall_timeout_ms=_general_method_timeout_ms,
            timeout_label=_gmt_label,
        )
        if ordered:
            options = {"ordered": True}
            inserted_ids: list[Any] = []
            for i in range(0, len(_documents), _chunk_size):
                im_payload = {
                    "insertMany": {
                        "documents": _documents[i : i + _chunk_size],
                        "options": options,
                    },
                }
                logger.info(f"insertMany(chunk) on '{self.name}'")
                chunk_response = self._converted_request(
                    payload=im_payload,
                    raise_api_errors=False,
                    timeout_context=timeout_manager.remaining_timeout(
                        cap_time_ms=_request_timeout_ms,
                        cap_timeout_label=_rt_label,
                    ),
                )
                logger.info(f"finished insertMany(chunk) on '{self.name}'")
                # accumulate the results in this call
                chunk_inserted_ids = (chunk_response.get("status") or {}).get(
                    "insertedIds", []
                )
                inserted_ids += chunk_inserted_ids
                raw_results += [chunk_response]
                # if errors, quit early
                if chunk_response.get("errors", []):
                    partial_result = CollectionInsertManyResult(
                        raw_results=raw_results,
                        inserted_ids=inserted_ids,
                    )
                    raise CollectionInsertManyException.from_response(
                        command=None,
                        raw_response=chunk_response,
                        partial_result=partial_result,
                    )

            # return
            full_result = CollectionInsertManyResult(
                raw_results=raw_results,
                inserted_ids=inserted_ids,
            )
            logger.info(
                f"finished inserting {len(_documents)} documents in '{self.name}'"
            )
            return full_result

        else:
            # unordered: concurrent or not, do all of them and parse the results
            options = {"ordered": False}
            if _concurrency > 1:
                with ThreadPoolExecutor(max_workers=_concurrency) as executor:

                    def _chunk_insertor(
                        document_chunk: list[dict[str, Any]],
                    ) -> dict[str, Any]:
                        im_payload = {
                            "insertMany": {
                                "documents": document_chunk,
                                "options": options,
                            },
                        }
                        logger.info(f"insertMany(chunk) on '{self.name}'")
                        im_response = self._converted_request(
                            payload=im_payload,
                            raise_api_errors=False,
                            timeout_context=timeout_manager.remaining_timeout(
                                cap_time_ms=_request_timeout_ms,
                                cap_timeout_label=_rt_label,
                            ),
                        )
                        logger.info(f"finished insertMany(chunk) on '{self.name}'")
                        return im_response

                    raw_results = list(
                        executor.map(
                            _chunk_insertor,
                            (
                                _documents[i : i + _chunk_size]
                                for i in range(0, len(_documents), _chunk_size)
                            ),
                        )
                    )
            else:
                for i in range(0, len(_documents), _chunk_size):
                    im_payload = {
                        "insertMany": {
                            "documents": _documents[i : i + _chunk_size],
                            "options": options,
                        },
                    }
                    logger.info(f"insertMany(chunk) on '{self.name}'")
                    im_response = self._converted_request(
                        payload=im_payload,
                        raise_api_errors=False,
                        timeout_context=timeout_manager.remaining_timeout(
                            cap_time_ms=_request_timeout_ms,
                            cap_timeout_label=_rt_label,
                        ),
                    )
                    logger.info(f"finished insertMany(chunk) on '{self.name}'")
                    raw_results.append(im_response)
            # recast raw_results
            inserted_ids = [
                inserted_id
                for chunk_response in raw_results
                for inserted_id in (chunk_response.get("status") or {}).get(
                    "insertedIds", []
                )
            ]

            # check-raise
            if any(
                [chunk_response.get("errors", []) for chunk_response in raw_results]
            ):
                partial_result = CollectionInsertManyResult(
                    raw_results=raw_results,
                    inserted_ids=inserted_ids,
                )
                raise CollectionInsertManyException.from_responses(
                    commands=[None for _ in raw_results],
                    raw_responses=raw_results,
                    partial_result=partial_result,
                )

            # return
            full_result = CollectionInsertManyResult(
                raw_results=raw_results,
                inserted_ids=inserted_ids,
            )
            logger.info(
                f"finished inserting {len(_documents)} documents in '{self.name}'"
            )
            return full_result

    @overload
    def find(
        self,
        filter: FilterType | None = None,
        *,
        projection: ProjectionType | None = None,
        document_type: None = None,
        skip: int | None = None,
        limit: int | None = None,
        include_similarity: bool | None = None,
        include_sort_vector: bool | None = None,
        sort: SortType | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> CollectionFindCursor[DOC, DOC]: ...

    @overload
    def find(
        self,
        filter: FilterType | None = None,
        *,
        projection: ProjectionType | None = None,
        document_type: type[DOC2],
        skip: int | None = None,
        limit: int | None = None,
        include_similarity: bool | None = None,
        include_sort_vector: bool | None = None,
        sort: SortType | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> CollectionFindCursor[DOC, DOC2]: ...

    def find(
        self,
        filter: FilterType | None = None,
        *,
        projection: ProjectionType | None = None,
        document_type: type[DOC2] | None = None,
        skip: int | None = None,
        limit: int | None = None,
        include_similarity: bool | None = None,
        include_sort_vector: bool | None = None,
        sort: SortType | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> CollectionFindCursor[DOC, DOC2]:
        """
        Find documents on the collection, matching a certain provided filter.

        The method returns a Cursor that can then be iterated over. Depending
        on the method call pattern, the iteration over all documents can reflect
        collection mutations occurred since the `find` method was called, or not.
        In cases where the cursor reflects mutations in real-time, it will iterate
        over cursors in an approximate way (i.e. exhibiting occasional skipped
        or duplicate documents). This happens when making use of the `sort`
        option in a non-vector-search manner.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            projection: it controls which parts of the document are returned.
                It can be an allow-list: `{"f1": True, "f2": True}`,
                or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
                (except for the `_id` and other special fields, which can be
                associated to both True or False independently of the rest
                of the specification).
                The special star-projections `{"*": True}` and `{"*": False}`
                have the effect of returning the whole document and `{}` respectively.
                For lists in documents, slice directives can be passed to select
                portions of the list: for instance, `{"array": {"$slice": 2}}`,
                `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
                `{"array": {"$slice": [-4, 2]}}`.
                An iterable over strings will be treated implicitly as an allow-list.
                The default projection (used if this parameter is not passed) does not
                necessarily include "special" fields such as `$vector` or `$vectorize`.
                See the Data API documentation for more on projections.
            document_type: this parameter acts a formal specifier for the type checker.
                If omitted, the resulting cursor is implicitly a
                `CollectionFindCursor[DOC, DOC]`, i.e. maintains the same type for
                the items it returns as that for the documents in the table. Strictly
                typed code may want to specify this parameter especially when a
                projection is given.
            skip: with this integer parameter, what would be the first `skip`
                documents returned by the query are discarded, and the results
                start from the (skip+1)-th document.
                This parameter can be used only in conjunction with an explicit
                `sort` criterion of the ascending/descending type (i.e. it cannot
                be used when not sorting, nor with vector-based ANN search).
            limit: this (integer) parameter sets a limit over how many documents
                are returned. Once `limit` is reached (or the cursor is exhausted
                for lack of matching documents), nothing more is returned.
            include_similarity: a boolean to request the numeric value of the
                similarity to be returned as an added "$similarity" key in each
                returned document. Can only be used for vector ANN search, i.e.
                when either `vector` is supplied or the `sort` parameter has the
                shape {"$vector": ...}.
            include_sort_vector: a boolean to request the search query vector.
                If set to True (and if the invocation is a vector search), calling
                the `get_sort_vector` method on the returned cursor will yield
                the vector used for the ANN search.
            sort: with this dictionary parameter one can control the order
                the documents are returned. See the Note about sorting, as well as
                the one about upper bounds, for details.
                Vector-based ANN sorting is achieved by providing a "$vector"
                or a "$vectorize" key in `sort`.
            request_timeout_ms: a timeout, in milliseconds, for each single one
                of the underlying HTTP requests used to fetch documents as the
                cursor is iterated over.
                If not passed, the collection-level setting is used instead.
            timeout_ms: an alias for `request_timeout_ms`.

        Returns:
            a Cursor object representing iterations over the matching documents
            (see the Cursor object for how to use it. The simplest thing is to
            run a for loop: `for document in collection.sort(...):`).

        Examples:
            >>> filter = {"seq": {"$exists": True}}
            >>> for doc in my_coll.find(filter, projection={"seq": True}, limit=5):
            ...     print(doc["seq"])
            ...
            37
            35
            10
            36
            27
            >>> cursor1 = my_coll.find(
            ...     {},
            ...     limit=4,
            ...     sort={"seq": astrapy.constants.SortMode.DESCENDING},
            ... )
            >>> [doc["_id"] for doc in cursor1]
            ['97e85f81-...', '1581efe4-...', '...', '...']
            >>> cursor2 = my_coll.find({}, limit=3)
            >>> cursor2.distinct("seq")
            [37, 35, 10]

            >>> my_coll.insert_many([
            ...     {"tag": "A", "$vector": [4, 5]},
            ...     {"tag": "B", "$vector": [3, 4]},
            ...     {"tag": "C", "$vector": [3, 2]},
            ...     {"tag": "D", "$vector": [4, 1]},
            ...     {"tag": "E", "$vector": [2, 5]},
            ... ])
            >>> ann_tags = [
            ...     document["tag"]
            ...     for document in my_coll.find(
            ...         {},
            ...         sort={"$vector": [3, 3]},
            ...         limit=3,
            ...     )
            ... ]
            >>> ann_tags
            ['A', 'B', 'C']
            >>> # (assuming the collection has metric VectorMetric.COSINE)

            >>> cursor = my_coll.find(
            ...     sort={"$vector": [3, 3]},
            ...     limit=3,
            ...     include_sort_vector=True,
            ... )
            >>> cursor.get_sort_vector()
            [3.0, 3.0]
            >>> matches = list(cursor)
            >>> cursor.get_sort_vector()
            [3.0, 3.0]

        Note:
            The following are example values for the `sort` parameter.
            When no particular order is required:
                sort={}  # (default when parameter not provided)
            When sorting by a certain value in ascending/descending order:
                sort={"field": SortMode.ASCENDING}
                sort={"field": SortMode.DESCENDING}
            When sorting first by "field" and then by "subfield"
            (while modern Python versions preserve the order of dictionaries,
            it is suggested for clarity to employ a `collections.OrderedDict`
            in these cases):
                sort={
                    "field": SortMode.ASCENDING,
                    "subfield": SortMode.ASCENDING,
                }
            When running a vector similarity (ANN) search:
                sort={"$vector": [0.4, 0.15, -0.5]}

        Note:
            Some combinations of arguments impose an implicit upper bound on the
            number of documents that are returned by the Data API. More specifically:
            (a) Vector ANN searches cannot return more than a number of documents
            that at the time of writing is set to 1000 items.
            (b) When using a sort criterion of the ascending/descending type,
            the Data API will return a smaller number of documents, set to 20
            at the time of writing, and stop there. The returned documents are
            the top results across the whole collection according to the requested
            criterion.
            These provisions should be kept in mind even when subsequently running
            a command such as `.distinct()` on a cursor.

        Note:
            When not specifying sorting criteria at all (by vector or otherwise),
            the cursor can scroll through an arbitrary number of documents as
            the Data API and the client periodically exchange new chunks of documents.
            It should be noted that the behavior of the cursor in the case documents
            have been added/removed after the `find` was started depends on database
            internals and it is not guaranteed, nor excluded, that such "real-time"
            changes in the data would be picked up by the cursor.
        """

        # lazy-import here to avoid circular import issues
        from astrapy.cursors import CollectionFindCursor

        _request_timeout_ms, _rt_label = _first_valid_timeout(
            (request_timeout_ms, "request_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
        )
        return (
            CollectionFindCursor(
                collection=self,
                request_timeout_ms=_request_timeout_ms,
                overall_timeout_ms=None,
                request_timeout_label=_rt_label,
            )
            .filter(filter)
            .project(projection)
            .skip(skip)
            .limit(limit)
            .sort(sort)
            .include_similarity(include_similarity)
            .include_sort_vector(include_sort_vector)
        )

    def find_one(
        self,
        filter: FilterType | None = None,
        *,
        projection: ProjectionType | None = None,
        include_similarity: bool | None = None,
        sort: SortType | None = None,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> DOC | None:
        """
        Run a search, returning the first document in the collection that matches
        provided filters, if any is found.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            projection: it controls which parts of the document are returned.
                It can be an allow-list: `{"f1": True, "f2": True}`,
                or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
                (except for the `_id` and other special fields, which can be
                associated to both True or False independently of the rest
                of the specification).
                The special star-projections `{"*": True}` and `{"*": False}`
                have the effect of returning the whole document and `{}` respectively.
                For lists in documents, slice directives can be passed to select
                portions of the list: for instance, `{"array": {"$slice": 2}}`,
                `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
                `{"array": {"$slice": [-4, 2]}}`.
                An iterable over strings will be treated implicitly as an allow-list.
                The default projection (used if this parameter is not passed) does not
                necessarily include "special" fields such as `$vector` or `$vectorize`.
                See the Data API documentation for more on projections.
            include_similarity: a boolean to request the numeric value of the
                similarity to be returned as an added "$similarity" key in the
                returned document. Can only be used for vector ANN search, i.e.
                when either `vector` is supplied or the `sort` parameter has the
                shape {"$vector": ...}.
            sort: with this dictionary parameter one can control the order
                the documents are returned. See the Note about sorting for details.
                Vector-based ANN sorting is achieved by providing a "$vector"
                or a "$vectorize" key in `sort`.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a dictionary expressing the required document, otherwise None.

        Examples:
            >>> my_coll.find_one({})
            {'_id': '68d1e515-...', 'seq': 37}
            >>> my_coll.find_one({"seq": 10})
            {'_id': 'd560e217-...', 'seq': 10}
            >>> my_coll.find_one({"seq": 1011})
            >>> # (returns None for no matches)
            >>> my_coll.find_one({}, projection={"seq": False})
            {'_id': '68d1e515-...'}
            >>> my_coll.find_one(
            ...     {},
            ...     sort={"seq": astrapy.constants.SortMode.DESCENDING},
            ... )
            {'_id': '97e85f81-...', 'seq': 69}
            >>> my_coll.find_one({}, sort={"$vector": [1, 0]}, projection={"*": True})
            {'_id': '...', 'tag': 'D', '$vector': [4.0, 1.0]}

        Note:
            See the `find` method for more details on the accepted parameters
            (whereas `skip` and `limit` are not valid parameters for `find_one`).
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        fo_options = (
            None
            if include_similarity is None
            else {"includeSimilarity": include_similarity}
        )
        fo_payload = {
            "findOne": {
                k: v
                for k, v in {
                    "filter": filter,
                    "projection": normalize_optional_projection(projection),
                    "options": fo_options,
                    "sort": sort,
                }.items()
                if v is not None
            }
        }
        fo_response = self._converted_request(
            payload=fo_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        if "document" not in (fo_response.get("data") or {}):
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from findOne API command.",
                raw_response=fo_response,
            )
        doc_response = fo_response["data"]["document"]
        if doc_response is None:
            return None
        return fo_response["data"]["document"]  # type: ignore[no-any-return]

    def distinct(
        self,
        key: str,
        *,
        filter: FilterType | None = None,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> list[Any]:
        """
        Return a list of the unique values of `key` across the documents
        in the collection that match the provided filter.

        Args:
            key: the name of the field whose value is inspected across documents.
                Keys can use dot-notation to descend to deeper document levels.
                Example of acceptable `key` values:
                    "field"
                    "field.subfield"
                    "field.3"
                    "field.3.subfield"
                If lists are encountered and no numeric index is specified,
                all items in the list are visited.
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            general_method_timeout_ms: a timeout, in milliseconds, for the whole
                requested operation (which may involve multiple API requests).
                This method, being based on `find` (see) may entail successive HTTP API
                requests, depending on the amount of involved documents.
            request_timeout_ms: a timeout, in milliseconds, for each API request.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a list of all different values for `key` found across the documents
            that match the filter. The result list has no repeated items.

        Example:
            >>> my_coll.insert_many(
            ...     [
            ...         {"name": "Marco", "food": ["apple", "orange"], "city": "Helsinki"},
            ...         {"name": "Emma", "food": {"likes_fruit": True, "allergies": []}},
            ...     ]
            ... )
            CollectionInsertManyResult(raw_results=..., inserted_ids=['c5b99f37-...', 'd6416321-...'])
            >>> my_coll.distinct("name")
            ['Marco', 'Emma']
            >>> my_coll.distinct("city")
            ['Helsinki']
            >>> my_coll.distinct("food")
            ['apple', 'orange', {'likes_fruit': True, 'allergies': []}]
            >>> my_coll.distinct("food.1")
            ['orange']
            >>> my_coll.distinct("food.allergies")
            []
            >>> my_coll.distinct("food.likes_fruit")
            [True]

        Note:
            It must be kept in mind that `distinct` is a client-side operation,
            which effectively browses all required documents using the logic
            of the `find` method and collects the unique values found for `key`.
            As such, there may be performance, latency and ultimately
            billing implications if the amount of matching documents is large.

        Note:
            For details on the behaviour of "distinct" in conjunction with
            real-time changes in the collection contents, see the
            Note of the `find` command.
        """

        # lazy-import here to avoid circular import issues
        from astrapy.cursors import CollectionFindCursor

        _general_method_timeout_ms, _gmt_label = _first_valid_timeout(
            (general_method_timeout_ms, "general_method_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (
                self.api_options.timeout_options.general_method_timeout_ms,
                "general_method_timeout_ms",
            ),
        )
        _request_timeout_ms, _rt_label = _first_valid_timeout(
            (request_timeout_ms, "request_timeout_ms"),
            (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
        )
        # preparing cursor:
        _extractor = _create_document_key_extractor(key)
        _key = _reduce_distinct_key_to_safe(key)
        if _key == "":
            raise ValueError(
                "The 'key' parameter for distinct cannot be empty "
                "or start with a list index."
            )
        # relaxing the type hint (limited to within this method body)
        f_cursor: CollectionFindCursor[dict[str, Any], dict[str, Any]] = (
            CollectionFindCursor(
                collection=self,
                request_timeout_ms=_request_timeout_ms,
                overall_timeout_ms=_general_method_timeout_ms,
                request_timeout_label=_rt_label,
                overall_timeout_label=_gmt_label,
            )  # type: ignore[assignment]
            .filter(filter)
            .project({_key: True})
        )
        # consuming it:
        _item_hashes = set()
        distinct_items: list[Any] = []
        logger.info(f"running distinct() on '{self.name}'")
        for document in f_cursor:
            for item in _extractor(document):
                _item_hash = _hash_document(
                    item, options=self.api_options.serdes_options
                )
                if _item_hash not in _item_hashes:
                    _item_hashes.add(_item_hash)
                    distinct_items.append(item)
        logger.info(f"finished running distinct() on '{self.name}'")
        return distinct_items

    def count_documents(
        self,
        filter: FilterType,
        *,
        upper_bound: int,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> int:
        """
        Count the documents in the collection matching the specified filter.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            upper_bound: a required ceiling on the result of the count operation.
                If the actual number of documents exceeds this value,
                an exception will be raised.
                Furthermore, if the actual number of documents exceeds the maximum
                count that the Data API can reach (regardless of upper_bound),
                an exception will be raised.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            the exact count of matching documents.

        Example:
            >>> my_coll.insert_many([{"seq": i} for i in range(20)])
            CollectionInsertManyResult(...)
            >>> my_coll.count_documents({}, upper_bound=100)
            20
            >>> my_coll.count_documents({"seq":{"$gt": 15}}, upper_bound=100)
            4
            >>> my_coll.count_documents({}, upper_bound=10)
            Traceback (most recent call last):
                ... ...
            astrapy.exceptions.TooManyDocumentsToCountException

        Note:
            Count operations are expensive: for this reason, the best practice
            is to provide a reasonable `upper_bound` according to the caller
            expectations. Moreover, indiscriminate usage of count operations
            for sizeable amounts of documents (i.e. in the thousands and more)
            is discouraged in favor of alternative application-specific solutions.
            Keep in mind that the Data API has a hard upper limit on the amount
            of documents it will count, and that an exception will be thrown
            by this method if this limit is encountered.
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        cd_payload = {"countDocuments": {"filter": filter}}
        logger.info(f"countDocuments on '{self.name}'")
        cd_response = self._converted_request(
            payload=cd_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished countDocuments on '{self.name}'")
        if "count" in cd_response.get("status", {}):
            count: int = cd_response["status"]["count"]
            if cd_response["status"].get("moreData", False):
                raise TooManyDocumentsToCountException(
                    text=f"Document count exceeds {count}, the maximum allowed by the server",
                    server_max_count_exceeded=True,
                )
            else:
                if count > upper_bound:
                    raise TooManyDocumentsToCountException(
                        text="Document count exceeds required upper bound",
                        server_max_count_exceeded=False,
                    )
                else:
                    return count
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from countDocuments API command.",
                raw_response=cd_response,
            )

    def estimated_document_count(
        self,
        *,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> int:
        """
        Query the API server for an estimate of the document count in the collection.

        Contrary to `count_documents`, this method has no filtering parameters.

        Args:
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a server-provided estimate count of the documents in the collection.

        Example:
            >>> my_coll.estimated_document_count()
            35700
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        ed_payload: dict[str, Any] = {"estimatedDocumentCount": {}}
        logger.info(f"estimatedDocumentCount on '{self.name}'")
        ed_response = self._converted_request(
            payload=ed_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished estimatedDocumentCount on '{self.name}'")
        if "count" in ed_response.get("status", {}):
            count: int = ed_response["status"]["count"]
            return count
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from estimatedDocumentCount API command.",
                raw_response=ed_response,
            )

    def find_one_and_replace(
        self,
        filter: FilterType,
        replacement: DOC,
        *,
        projection: ProjectionType | None = None,
        sort: SortType | None = None,
        upsert: bool = False,
        return_document: str = ReturnDocument.BEFORE,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> DOC | None:
        """
        Find a document on the collection and replace it entirely with a new one,
        optionally inserting a new one if no match is found.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            replacement: the new document to write into the collection.
            projection: it controls which parts of the document are returned.
                It can be an allow-list: `{"f1": True, "f2": True}`,
                or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
                (except for the `_id` and other special fields, which can be
                associated to both True or False independently of the rest
                of the specification).
                The special star-projections `{"*": True}` and `{"*": False}`
                have the effect of returning the whole document and `{}` respectively.
                For lists in documents, slice directives can be passed to select
                portions of the list: for instance, `{"array": {"$slice": 2}}`,
                `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
                `{"array": {"$slice": [-4, 2]}}`.
                An iterable over strings will be treated implicitly as an allow-list.
                The default projection (used if this parameter is not passed) does not
                necessarily include "special" fields such as `$vector` or `$vectorize`.
                See the Data API documentation for more on projections.
            sort: with this dictionary parameter one can control the sorting
                order of the documents matching the filter, effectively
                determining what document will come first and hence be the
                replaced one. See the `find` method for more on sorting.
                Vector-based ANN sorting is achieved by providing a "$vector"
                or a "$vectorize" key in `sort`.
            upsert: this parameter controls the behavior in absence of matches.
                If True, `replacement` is inserted as a new document
                if no matches are found on the collection. If False,
                the operation silently does nothing in case of no matches.
            return_document: a flag controlling what document is returned:
                if set to `ReturnDocument.BEFORE`, or the string "before",
                the document found on database is returned; if set to
                `ReturnDocument.AFTER`, or the string "after", the new
                document is returned. The default is "before".
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            A document (or a projection thereof, as required), either the one
            before the replace operation or the one after that.
            Alternatively, the method returns None to represent
            that no matching document was found, or that no replacement
            was inserted (depending on the `return_document` parameter).

        Example:
            >>> my_coll.insert_one({"_id": "rule1", "text": "all animals are equal"})
            CollectionInsertOneResult(...)
            >>> my_coll.find_one_and_replace(
            ...     {"_id": "rule1"},
            ...     {"text": "some animals are more equal!"},
            ... )
            {'_id': 'rule1', 'text': 'all animals are equal'}
            >>> my_coll.find_one_and_replace(
            ...     {"text": "some animals are more equal!"},
            ...     {"text": "and the pigs are the rulers"},
            ...     return_document=astrapy.constants.ReturnDocument.AFTER,
            ... )
            {'_id': 'rule1', 'text': 'and the pigs are the rulers'}
            >>> my_coll.find_one_and_replace(
            ...     {"_id": "rule2"},
            ...     {"text": "F=ma^2"},
            ...     return_document=astrapy.constants.ReturnDocument.AFTER,
            ... )
            >>> # (returns None for no matches)
            >>> my_coll.find_one_and_replace(
            ...     {"_id": "rule2"},
            ...     {"text": "F=ma"},
            ...     upsert=True,
            ...     return_document=astrapy.constants.ReturnDocument.AFTER,
            ...     projection={"_id": False},
            ... )
            {'text': 'F=ma'}
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        options = {
            "returnDocument": return_document,
            "upsert": upsert,
        }
        fo_payload = {
            "findOneAndReplace": {
                k: v
                for k, v in {
                    "filter": filter,
                    "projection": normalize_optional_projection(projection),
                    "replacement": replacement,
                    "options": options,
                    "sort": sort,
                }.items()
                if v is not None
            }
        }
        logger.info(f"findOneAndReplace on '{self.name}'")
        fo_response = self._converted_request(
            payload=fo_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished findOneAndReplace on '{self.name}'")
        if "document" in fo_response.get("data", {}):
            ret_document = fo_response.get("data", {}).get("document")
            if ret_document is None:
                return None
            else:
                return ret_document  # type: ignore[no-any-return]
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from find_one_and_replace API command.",
                raw_response=fo_response,
            )

    def replace_one(
        self,
        filter: FilterType,
        replacement: DOC,
        *,
        sort: SortType | None = None,
        upsert: bool = False,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> CollectionUpdateResult:
        """
        Replace a single document on the collection with a new one,
        optionally inserting a new one if no match is found.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            replacement: the new document to write into the collection.
            sort: with this dictionary parameter one can control the sorting
                order of the documents matching the filter, effectively
                determining what document will come first and hence be the
                replaced one. See the `find` method for more on sorting.
                Vector-based ANN sorting is achieved by providing a "$vector"
                or a "$vectorize" key in `sort`.
            upsert: this parameter controls the behavior in absence of matches.
                If True, `replacement` is inserted as a new document
                if no matches are found on the collection. If False,
                the operation silently does nothing in case of no matches.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a CollectionUpdateResult object summarizing the outcome of
            the replace operation.

        Example:
            >>> my_coll.insert_one({"Marco": "Polo"})
            CollectionInsertOneResult(...)
            >>> my_coll.replace_one({"Marco": {"$exists": True}}, {"Buda": "Pest"})
            CollectionUpdateResult(raw_results=..., update_info={'n': 1, 'updatedExisting': True, 'ok': 1.0, 'nModified': 1})
            >>> my_coll.find_one({"Buda": "Pest"})
            {'_id': '8424905a-...', 'Buda': 'Pest'}
            >>> my_coll.replace_one({"Mirco": {"$exists": True}}, {"Oh": "yeah?"})
            CollectionUpdateResult(raw_results=..., update_info={'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0})
            >>> my_coll.replace_one({"Mirco": {"$exists": True}}, {"Oh": "yeah?"}, upsert=True)
            CollectionUpdateResult(raw_results=..., update_info={'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '931b47d6-...'})
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        options = {
            "upsert": upsert,
        }
        fo_payload = {
            "findOneAndReplace": {
                k: v
                for k, v in {
                    "filter": filter,
                    "replacement": replacement,
                    "options": options,
                    "sort": sort,
                }.items()
                if v is not None
            }
        }
        logger.info(f"findOneAndReplace on '{self.name}'")
        fo_response = self._converted_request(
            payload=fo_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished findOneAndReplace on '{self.name}'")
        if "document" in fo_response.get("data", {}):
            fo_status = fo_response.get("status") or {}
            _update_info = _prepare_update_info([fo_status])
            return CollectionUpdateResult(
                raw_results=[fo_response],
                update_info=_update_info,
            )
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from find_one_and_replace API command.",
                raw_response=fo_response,
            )

    def find_one_and_update(
        self,
        filter: FilterType,
        update: dict[str, Any],
        *,
        projection: ProjectionType | None = None,
        sort: SortType | None = None,
        upsert: bool = False,
        return_document: str = ReturnDocument.BEFORE,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> DOC | None:
        """
        Find a document on the collection and update it as requested,
        optionally inserting a new one if no match is found.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            update: the update prescription to apply to the document, expressed
                as a dictionary as per Data API syntax. Examples are:
                    {"$set": {"field": "value}}
                    {"$inc": {"counter": 10}}
                    {"$unset": {"field": ""}}
                See the Data API documentation for the full syntax.
            projection: it controls which parts of the document are returned.
                It can be an allow-list: `{"f1": True, "f2": True}`,
                or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
                (except for the `_id` and other special fields, which can be
                associated to both True or False independently of the rest
                of the specification).
                The special star-projections `{"*": True}` and `{"*": False}`
                have the effect of returning the whole document and `{}` respectively.
                For lists in documents, slice directives can be passed to select
                portions of the list: for instance, `{"array": {"$slice": 2}}`,
                `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
                `{"array": {"$slice": [-4, 2]}}`.
                An iterable over strings will be treated implicitly as an allow-list.
                The default projection (used if this parameter is not passed) does not
                necessarily include "special" fields such as `$vector` or `$vectorize`.
                See the Data API documentation for more on projections.
            sort: with this dictionary parameter one can control the sorting
                order of the documents matching the filter, effectively
                determining what document will come first and hence be the
                replaced one. See the `find` method for more on sorting.
                Vector-based ANN sorting is achieved by providing a "$vector"
                or a "$vectorize" key in `sort`.
            upsert: this parameter controls the behavior in absence of matches.
                If True, a new document (resulting from applying the `update`
                to an empty document) is inserted if no matches are found on
                the collection. If False, the operation silently does nothing
                in case of no matches.
            return_document: a flag controlling what document is returned:
                if set to `ReturnDocument.BEFORE`, or the string "before",
                the document found on database is returned; if set to
                `ReturnDocument.AFTER`, or the string "after", the new
                document is returned. The default is "before".
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            A document (or a projection thereof, as required), either the one
            before the replace operation or the one after that.
            Alternatively, the method returns None to represent
            that no matching document was found, or that no update
            was applied (depending on the `return_document` parameter).

        Example:
            >>> my_coll.insert_one({"Marco": "Polo"})
            CollectionInsertOneResult(...)
            >>> my_coll.find_one_and_update(
            ...     {"Marco": {"$exists": True}},
            ...     {"$set": {"title": "Mr."}},
            ... )
            {'_id': 'a80106f2-...', 'Marco': 'Polo'}
            >>> my_coll.find_one_and_update(
            ...     {"title": "Mr."},
            ...     {"$inc": {"rank": 3}},
            ...     projection=["title", "rank"],
            ...     return_document=astrapy.constants.ReturnDocument.AFTER,
            ... )
            {'_id': 'a80106f2-...', 'title': 'Mr.', 'rank': 3}
            >>> my_coll.find_one_and_update(
            ...     {"name": "Johnny"},
            ...     {"$set": {"rank": 0}},
            ...     return_document=astrapy.constants.ReturnDocument.AFTER,
            ... )
            >>> # (returns None for no matches)
            >>> my_coll.find_one_and_update(
            ...     {"name": "Johnny"},
            ...     {"$set": {"rank": 0}},
            ...     upsert=True,
            ...     return_document=astrapy.constants.ReturnDocument.AFTER,
            ... )
            {'_id': 'cb4ef2ab-...', 'name': 'Johnny', 'rank': 0}
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        options = {
            "returnDocument": return_document,
            "upsert": upsert,
        }
        fo_payload = {
            "findOneAndUpdate": {
                k: v
                for k, v in {
                    "filter": filter,
                    "update": update,
                    "options": options,
                    "sort": sort,
                    "projection": normalize_optional_projection(projection),
                }.items()
                if v is not None
            }
        }
        logger.info(f"findOneAndUpdate on '{self.name}'")
        fo_response = self._converted_request(
            payload=fo_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished findOneAndUpdate on '{self.name}'")
        if "document" in fo_response.get("data", {}):
            ret_document = fo_response.get("data", {}).get("document")
            if ret_document is None:
                return None
            else:
                return ret_document  # type: ignore[no-any-return]
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from find_one_and_update API command.",
                raw_response=fo_response,
            )

    def update_one(
        self,
        filter: FilterType,
        update: dict[str, Any],
        *,
        sort: SortType | None = None,
        upsert: bool = False,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> CollectionUpdateResult:
        """
        Update a single document on the collection as requested,
        optionally inserting a new one if no match is found.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            update: the update prescription to apply to the document, expressed
                as a dictionary as per Data API syntax. Examples are:
                    {"$set": {"field": "value}}
                    {"$inc": {"counter": 10}}
                    {"$unset": {"field": ""}}
                See the Data API documentation for the full syntax.
            sort: with this dictionary parameter one can control the sorting
                order of the documents matching the filter, effectively
                determining what document will come first and hence be the
                replaced one. See the `find` method for more on sorting.
                Vector-based ANN sorting is achieved by providing a "$vector"
                or a "$vectorize" key in `sort`.
            upsert: this parameter controls the behavior in absence of matches.
                If True, a new document (resulting from applying the `update`
                to an empty document) is inserted if no matches are found on
                the collection. If False, the operation silently does nothing
                in case of no matches.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a CollectionUpdateResult object summarizing the outcome of
            the update operation.

        Example:
            >>> my_coll.insert_one({"Marco": "Polo"})
            CollectionInsertOneResult(...)
            >>> my_coll.update_one({"Marco": {"$exists": True}}, {"$inc": {"rank": 3}})
            CollectionUpdateResult(raw_results=..., update_info={'n': 1, 'updatedExisting': True, 'ok': 1.0, 'nModified': 1})
            >>> my_coll.update_one({"Mirko": {"$exists": True}}, {"$inc": {"rank": 3}})
            CollectionUpdateResult(raw_results=..., update_info={'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0})
            >>> my_coll.update_one({"Mirko": {"$exists": True}}, {"$inc": {"rank": 3}}, upsert=True)
            CollectionUpdateResult(raw_results=..., update_info={'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '2a45ff60-...'})
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        options = {
            "upsert": upsert,
        }
        uo_payload = {
            "updateOne": {
                k: v
                for k, v in {
                    "filter": filter,
                    "update": update,
                    "options": options,
                    "sort": sort,
                }.items()
                if v is not None
            }
        }
        logger.info(f"updateOne on '{self.name}'")
        uo_response = self._converted_request(
            payload=uo_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished updateOne on '{self.name}'")
        if "status" in uo_response:
            uo_status = uo_response["status"]
            _update_info = _prepare_update_info([uo_status])
            return CollectionUpdateResult(
                raw_results=[uo_response],
                update_info=_update_info,
            )
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from updateOne API command.",
                raw_response=uo_response,
            )

    def update_many(
        self,
        filter: FilterType,
        update: dict[str, Any],
        *,
        upsert: bool = False,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> CollectionUpdateResult:
        """
        Apply an update operation to all documents matching a condition,
        optionally inserting one documents in absence of matches.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            update: the update prescription to apply to the documents, expressed
                as a dictionary as per Data API syntax. Examples are:
                    {"$set": {"field": "value}}
                    {"$inc": {"counter": 10}}
                    {"$unset": {"field": ""}}
                See the Data API documentation for the full syntax.
            upsert: this parameter controls the behavior in absence of matches.
                If True, a single new document (resulting from applying `update`
                to an empty document) is inserted if no matches are found on
                the collection. If False, the operation silently does nothing
                in case of no matches.
            general_method_timeout_ms: a timeout, in milliseconds, for the whole
                requested operation (which may involve multiple API requests).
                This method may entail successive HTTP API requests,
                depending on the amount of involved documents.
                If not passed, the collection-level setting is used instead.
            request_timeout_ms: a timeout, in milliseconds, for each API request.
                If not passed, the collection-level setting is used instead.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a CollectionUpdateResult object summarizing the outcome of
            the update operation.

        Example:
            >>> my_coll.insert_many([{"c": "red"}, {"c": "green"}, {"c": "blue"}])
            CollectionInsertManyResult(...)
            >>> my_coll.update_many({"c": {"$ne": "green"}}, {"$set": {"nongreen": True}})
            CollectionUpdateResult(raw_results=..., update_info={'n': 2, 'updatedExisting': True, 'ok': 1.0, 'nModified': 2})
            >>> my_coll.update_many({"c": "orange"}, {"$set": {"is_also_fruit": True}})
            CollectionUpdateResult(raw_results=..., update_info={'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0})
            >>> my_coll.update_many(
            ...     {"c": "orange"},
            ...     {"$set": {"is_also_fruit": True}},
            ...     upsert=True,
            ... )
            CollectionUpdateResult(raw_results=..., update_info={'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '46643050-...'})

        Note:
            Similarly to the case of `find` (see its docstring for more details),
            running this command while, at the same time, another process is
            inserting new documents which match the filter of the `update_many`
            can result in an unpredictable fraction of these documents being updated.
            In other words, it cannot be easily predicted whether a given
            newly-inserted document will be picked up by the update_many command or not.
        """

        _general_method_timeout_ms, _gmt_label = _first_valid_timeout(
            (general_method_timeout_ms, "general_method_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (
                self.api_options.timeout_options.general_method_timeout_ms,
                "general_method_timeout_ms",
            ),
        )
        _request_timeout_ms, _rt_label = _first_valid_timeout(
            (request_timeout_ms, "request_timeout_ms"),
            (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
        )
        api_options = {
            "upsert": upsert,
        }
        page_state_options: dict[str, str] = {}
        um_responses: list[dict[str, Any]] = []
        um_statuses: list[dict[str, Any]] = []
        must_proceed = True
        logger.info(f"starting update_many on '{self.name}'")
        timeout_manager = MultiCallTimeoutManager(
            overall_timeout_ms=_general_method_timeout_ms,
            timeout_label=_gmt_label,
        )
        while must_proceed:
            options = {**api_options, **page_state_options}
            this_um_payload = {
                "updateMany": {
                    k: v
                    for k, v in {
                        "filter": filter,
                        "update": update,
                        "options": options,
                    }.items()
                    if v is not None
                }
            }
            logger.info(f"updateMany on '{self.name}'")
            this_um_response = self._converted_request(
                payload=this_um_payload,
                timeout_context=timeout_manager.remaining_timeout(
                    cap_time_ms=_request_timeout_ms,
                    cap_timeout_label=_rt_label,
                ),
            )
            logger.info(f"finished updateMany on '{self.name}'")
            this_um_status = this_um_response.get("status") or {}
            #
            # if errors, quit early
            if this_um_response.get("errors", []):
                partial_update_info = _prepare_update_info(um_statuses)
                partial_result = CollectionUpdateResult(
                    raw_results=um_responses,
                    update_info=partial_update_info,
                )
                all_um_responses = um_responses + [this_um_response]
                raise CollectionUpdateManyException.from_responses(
                    commands=[None for _ in all_um_responses],
                    raw_responses=all_um_responses,
                    partial_result=partial_result,
                )
            else:
                if "status" not in this_um_response:
                    raise UnexpectedDataAPIResponseException(
                        text="Faulty response from update_many API command.",
                        raw_response=this_um_response,
                    )
                um_responses.append(this_um_response)
                um_statuses.append(this_um_status)
                next_page_state = this_um_status.get("nextPageState")
                if next_page_state is not None:
                    must_proceed = True
                    page_state_options = {"pageState": next_page_state}
                else:
                    must_proceed = False
                    page_state_options = {}

        update_info = _prepare_update_info(um_statuses)
        logger.info(f"finished update_many on '{self.name}'")
        return CollectionUpdateResult(
            raw_results=um_responses,
            update_info=update_info,
        )

    def find_one_and_delete(
        self,
        filter: FilterType,
        *,
        projection: ProjectionType | None = None,
        sort: SortType | None = None,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> DOC | None:
        """
        Find a document in the collection and delete it. The deleted document,
        however, is the return value of the method.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            projection: it controls which parts of the document are returned.
                It can be an allow-list: `{"f1": True, "f2": True}`,
                or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
                (except for the `_id` and other special fields, which can be
                associated to both True or False independently of the rest
                of the specification).
                The special star-projections `{"*": True}` and `{"*": False}`
                have the effect of returning the whole document and `{}` respectively.
                For lists in documents, slice directives can be passed to select
                portions of the list: for instance, `{"array": {"$slice": 2}}`,
                `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
                `{"array": {"$slice": [-4, 2]}}`.
                An iterable over strings will be treated implicitly as an allow-list.
                The default projection (used if this parameter is not passed) does not
                necessarily include "special" fields such as `$vector` or `$vectorize`.
                See the Data API documentation for more on projections.
            sort: with this dictionary parameter one can control the sorting
                order of the documents matching the filter, effectively
                determining what document will come first and hence be the
                deleted one. See the `find` method for more on sorting.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            Either the document (or a projection thereof, as requested), or None
            if no matches were found in the first place.

        Example:
            >>> my_coll.insert_many(
            ...     [
            ...         {"species": "swan", "class": "Aves"},
            ...         {"species": "frog", "class": "Amphibia"},
            ...     ],
            ... )
            CollectionInsertManyResult(...)
            >>> my_coll.find_one_and_delete(
            ...     {"species": {"$ne": "frog"}},
            ...     projection=["species"],
            ... )
            {'_id': '5997fb48-...', 'species': 'swan'}
            >>> my_coll.find_one_and_delete({"species": {"$ne": "frog"}})
            >>> # (returns None for no matches)
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        _projection = normalize_optional_projection(projection)
        fo_payload = {
            "findOneAndDelete": {
                k: v
                for k, v in {
                    "filter": filter,
                    "sort": sort,
                    "projection": _projection,
                }.items()
                if v is not None
            }
        }
        logger.info(f"findOneAndDelete on '{self.name}'")
        fo_response = self._converted_request(
            payload=fo_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished findOneAndDelete on '{self.name}'")
        if "document" in fo_response.get("data", {}):
            document = fo_response["data"]["document"]
            return document  # type: ignore[no-any-return]
        else:
            deleted_count = fo_response.get("status", {}).get("deletedCount")
            if deleted_count == 0:
                return None
            else:
                raise UnexpectedDataAPIResponseException(
                    text="Faulty response from find_one_and_delete API command.",
                    raw_response=fo_response,
                )

    def delete_one(
        self,
        filter: FilterType,
        *,
        sort: SortType | None = None,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> CollectionDeleteResult:
        """
        Delete one document matching a provided filter.
        This method never deletes more than a single document, regardless
        of the number of matches to the provided filters.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            sort: with this dictionary parameter one can control the sorting
                order of the documents matching the filter, effectively
                determining what document will come first and hence be the
                deleted one. See the `find` method for more on sorting.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a CollectionDeleteResult object summarizing the outcome of the
            delete operation.

        Example:
            >>> my_coll.insert_many([{"seq": 1}, {"seq": 0}, {"seq": 2}])
            CollectionInsertManyResult(...)
            >>> my_coll.delete_one({"seq": 1})
            CollectionDeleteResult(raw_results=..., deleted_count=1)
            >>> my_coll.distinct("seq")
            [0, 2]
            >>> my_coll.delete_one(
            ...     {"seq": {"$exists": True}},
            ...     sort={"seq": astrapy.constants.SortMode.DESCENDING},
            ... )
            CollectionDeleteResult(raw_results=..., deleted_count=1)
            >>> my_coll.distinct("seq")
            [0]
            >>> my_coll.delete_one({"seq": 2})
            CollectionDeleteResult(raw_results=..., deleted_count=0)
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        do_payload = {
            "deleteOne": {
                k: v
                for k, v in {
                    "filter": filter,
                    "sort": sort,
                }.items()
                if v is not None
            }
        }
        logger.info(f"deleteOne on '{self.name}'")
        do_response = self._converted_request(
            payload=do_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished deleteOne on '{self.name}'")
        if "deletedCount" in do_response.get("status", {}):
            deleted_count = do_response["status"]["deletedCount"]
            return CollectionDeleteResult(
                deleted_count=deleted_count,
                raw_results=[do_response],
            )
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from delete_one API command.",
                raw_response=do_response,
            )

    def delete_many(
        self,
        filter: FilterType,
        *,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> CollectionDeleteResult:
        """
        Delete all documents matching a provided filter.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
                Passing an empty filter, `{}`, completely erases all contents
                of the collection.
            general_method_timeout_ms: a timeout, in milliseconds, for the whole
                requested operation (which may involve multiple API requests).
                This method may entail successive HTTP API requests,
                depending on the amount of involved documents.
                If not passed, the collection-level setting is used instead.
            request_timeout_ms: a timeout, in milliseconds, for each API request.
                If not passed, the collection-level setting is used instead.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a CollectionDeleteResult object summarizing the outcome of the
            delete operation.

        Example:
            >>> my_coll.insert_many([{"seq": 1}, {"seq": 0}, {"seq": 2}])
            CollectionInsertManyResult(...)
            >>> my_coll.delete_many({"seq": {"$lte": 1}})
            CollectionDeleteResult(raw_results=..., deleted_count=2)
            >>> my_coll.distinct("seq")
            [2]
            >>> my_coll.delete_many({"seq": {"$lte": 1}})
            CollectionDeleteResult(raw_results=..., deleted_count=0)

        Note:
            This operation is in general not atomic. Depending on the amount
            of matching documents, it can keep running (in a blocking way)
            for a macroscopic time. In that case, new documents that are
            meanwhile inserted (e.g. from another process/application) will be
            deleted during the execution of this method call until the
            collection is devoid of matches.
            An exception is the `filter={}` case, whereby the operation is atomic.
        """

        _general_method_timeout_ms, _gmt_label = _first_valid_timeout(
            (general_method_timeout_ms, "general_method_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (
                self.api_options.timeout_options.general_method_timeout_ms,
                "general_method_timeout_ms",
            ),
        )
        _request_timeout_ms, _rt_label = _first_valid_timeout(
            (request_timeout_ms, "request_timeout_ms"),
            (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
        )
        dm_responses: list[dict[str, Any]] = []
        deleted_count = 0
        must_proceed = True
        timeout_manager = MultiCallTimeoutManager(
            overall_timeout_ms=_general_method_timeout_ms,
            timeout_label=_gmt_label,
        )
        this_dm_payload = {"deleteMany": {"filter": filter}}
        logger.info(f"starting delete_many on '{self.name}'")
        while must_proceed:
            logger.info(f"deleteMany on '{self.name}'")
            this_dm_response = self._converted_request(
                payload=this_dm_payload,
                raise_api_errors=False,
                timeout_context=timeout_manager.remaining_timeout(
                    cap_time_ms=_request_timeout_ms,
                    cap_timeout_label=_rt_label,
                ),
            )
            logger.info(f"finished deleteMany on '{self.name}'")
            # if errors, quit early
            if this_dm_response.get("errors", []):
                partial_result = CollectionDeleteResult(
                    deleted_count=deleted_count,
                    raw_results=dm_responses,
                )
                all_dm_responses = dm_responses + [this_dm_response]
                raise CollectionDeleteManyException.from_responses(
                    commands=[None for _ in all_dm_responses],
                    raw_responses=all_dm_responses,
                    partial_result=partial_result,
                )
            else:
                this_dc = this_dm_response.get("status", {}).get("deletedCount")
                if this_dc is None:
                    raise UnexpectedDataAPIResponseException(
                        text="Faulty response from delete_many API command.",
                        raw_response=this_dm_response,
                    )
                dm_responses.append(this_dm_response)
                deleted_count += this_dc
                must_proceed = this_dm_response.get("status", {}).get("moreData", False)

        logger.info(f"finished delete_many on '{self.name}'")
        return CollectionDeleteResult(
            deleted_count=deleted_count,
            raw_results=dm_responses,
        )

    def drop(
        self,
        *,
        collection_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Drop the collection, i.e. delete it from the database along with
        all the documents it contains.

        Args:
            collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `collection_admin_timeout_ms`.
            timeout_ms: an alias for `collection_admin_timeout_ms`.

        Example:
            >>> my_coll.find_one({})
            {'_id': '...', 'a': 100}
            >>> my_coll.drop()
            >>> my_coll.find_one({})
            Traceback (most recent call last):
                ... ...
            astrapy.exceptions.DataAPIResponseException: Collection does not exist, collection name: my_collection

        Note:
            Use with caution.

        Note:
            Once the method succeeds, methods on this object can still be invoked:
            however, this hardly makes sense as the underlying actual collection
            is no more.
            It is responsibility of the developer to design a correct flow
            which avoids using a deceased collection any further.
        """

        logger.info(f"dropping collection '{self.name}' (self)")
        self.database.drop_collection(
            self.name,
            collection_admin_timeout_ms=collection_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info(f"finished dropping collection '{self.name}' (self)")

    def command(
        self,
        body: dict[str, Any] | None,
        *,
        raise_api_errors: bool = True,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> dict[str, Any]:
        """
        Send a POST request to the Data API for this collection with
        an arbitrary, caller-provided payload.
        No transformations or type conversions are made on the provided payload.

        Args:
            body: a JSON-serializable dictionary, the payload of the request.
            raise_api_errors: if True, responses with a nonempty 'errors' field
                result in an astrapy exception being raised.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a dictionary with the response of the HTTP request.

        Example:
            >>> my_coll.command({"countDocuments": {}})
            {'status': {'count': 123}}
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        _cmd_desc: str
        if body:
            _cmd_desc = ",".join(sorted(body.keys()))
        else:
            _cmd_desc = "(none)"
        logger.info(f"command={_cmd_desc} on '{self.name}'")
        command_result = self._api_commander.request(
            payload=body,
            raise_api_errors=raise_api_errors,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished command={_cmd_desc} on '{self.name}'")
        return command_result

Ancestors

  • typing.Generic

Instance variables

var databaseDatabase

a Database object, the database this collection belongs to.

Example

>>> my_coll.database.name
'the_application_database'
Expand source code
@property
def database(self) -> Database:
    """
    a Database object, the database this collection belongs to.

    Example:
        >>> my_coll.database.name
        'the_application_database'
    """

    return self._database
var full_name : str

The fully-qualified collection name within the database, in the form "keyspace.collection_name".

Example

>>> my_coll.full_name
'default_keyspace.my_v_collection'
Expand source code
@property
def full_name(self) -> str:
    """
    The fully-qualified collection name within the database,
    in the form "keyspace.collection_name".

    Example:
        >>> my_coll.full_name
        'default_keyspace.my_v_collection'
    """

    return f"{self.keyspace}.{self.name}"
var keyspace : str

The keyspace this collection is in.

Example

>>> my_coll.keyspace
'default_keyspace'
Expand source code
@property
def keyspace(self) -> str:
    """
    The keyspace this collection is in.

    Example:
        >>> my_coll.keyspace
        'default_keyspace'
    """

    _keyspace = self.database.keyspace
    if _keyspace is None:
        raise ValueError("The collection's DB is set with keyspace=None")
    return _keyspace
var name : str

The name of this collection.

Example

>>> my_coll.name
'my_v_collection'
Expand source code
@property
def name(self) -> str:
    """
    The name of this collection.

    Example:
        >>> my_coll.name
        'my_v_collection'
    """

    return self._name

Methods

def command(self, body: dict[str, Any] | None, *, raise_api_errors: bool = True, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> dict[str, typing.Any]

Send a POST request to the Data API for this collection with an arbitrary, caller-provided payload. No transformations or type conversions are made on the provided payload.

Args

body
a JSON-serializable dictionary, the payload of the request.
raise_api_errors
if True, responses with a nonempty 'errors' field result in an astrapy exception being raised.
general_method_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for general_method_timeout_ms.
timeout_ms
an alias for general_method_timeout_ms.

Returns

a dictionary with the response of the HTTP request.

Example

>>> my_coll.command({"countDocuments": {}})
{'status': {'count': 123}}
Expand source code
def command(
    self,
    body: dict[str, Any] | None,
    *,
    raise_api_errors: bool = True,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> dict[str, Any]:
    """
    Send a POST request to the Data API for this collection with
    an arbitrary, caller-provided payload.
    No transformations or type conversions are made on the provided payload.

    Args:
        body: a JSON-serializable dictionary, the payload of the request.
        raise_api_errors: if True, responses with a nonempty 'errors' field
            result in an astrapy exception being raised.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a dictionary with the response of the HTTP request.

    Example:
        >>> my_coll.command({"countDocuments": {}})
        {'status': {'count': 123}}
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    _cmd_desc: str
    if body:
        _cmd_desc = ",".join(sorted(body.keys()))
    else:
        _cmd_desc = "(none)"
    logger.info(f"command={_cmd_desc} on '{self.name}'")
    command_result = self._api_commander.request(
        payload=body,
        raise_api_errors=raise_api_errors,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished command={_cmd_desc} on '{self.name}'")
    return command_result
def count_documents(self, filter: FilterType, *, upper_bound: int, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> int

Count the documents in the collection matching the specified filter.

Args

filter
a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
upper_bound
a required ceiling on the result of the count operation. If the actual number of documents exceeds this value, an exception will be raised. Furthermore, if the actual number of documents exceeds the maximum count that the Data API can reach (regardless of upper_bound), an exception will be raised.
general_method_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for general_method_timeout_ms.
timeout_ms
an alias for general_method_timeout_ms.

Returns

the exact count of matching documents.

Example

>>> my_coll.insert_many([{"seq": i} for i in range(20)])
CollectionInsertManyResult(...)
>>> my_coll.count_documents({}, upper_bound=100)
20
>>> my_coll.count_documents({"seq":{"$gt": 15}}, upper_bound=100)
4
>>> my_coll.count_documents({}, upper_bound=10)
Traceback (most recent call last):
    ... ...
astrapy.exceptions.TooManyDocumentsToCountException

Note

Count operations are expensive: for this reason, the best practice is to provide a reasonable upper_bound according to the caller expectations. Moreover, indiscriminate usage of count operations for sizeable amounts of documents (i.e. in the thousands and more) is discouraged in favor of alternative application-specific solutions. Keep in mind that the Data API has a hard upper limit on the amount of documents it will count, and that an exception will be thrown by this method if this limit is encountered.

Expand source code
def count_documents(
    self,
    filter: FilterType,
    *,
    upper_bound: int,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> int:
    """
    Count the documents in the collection matching the specified filter.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        upper_bound: a required ceiling on the result of the count operation.
            If the actual number of documents exceeds this value,
            an exception will be raised.
            Furthermore, if the actual number of documents exceeds the maximum
            count that the Data API can reach (regardless of upper_bound),
            an exception will be raised.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        the exact count of matching documents.

    Example:
        >>> my_coll.insert_many([{"seq": i} for i in range(20)])
        CollectionInsertManyResult(...)
        >>> my_coll.count_documents({}, upper_bound=100)
        20
        >>> my_coll.count_documents({"seq":{"$gt": 15}}, upper_bound=100)
        4
        >>> my_coll.count_documents({}, upper_bound=10)
        Traceback (most recent call last):
            ... ...
        astrapy.exceptions.TooManyDocumentsToCountException

    Note:
        Count operations are expensive: for this reason, the best practice
        is to provide a reasonable `upper_bound` according to the caller
        expectations. Moreover, indiscriminate usage of count operations
        for sizeable amounts of documents (i.e. in the thousands and more)
        is discouraged in favor of alternative application-specific solutions.
        Keep in mind that the Data API has a hard upper limit on the amount
        of documents it will count, and that an exception will be thrown
        by this method if this limit is encountered.
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    cd_payload = {"countDocuments": {"filter": filter}}
    logger.info(f"countDocuments on '{self.name}'")
    cd_response = self._converted_request(
        payload=cd_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished countDocuments on '{self.name}'")
    if "count" in cd_response.get("status", {}):
        count: int = cd_response["status"]["count"]
        if cd_response["status"].get("moreData", False):
            raise TooManyDocumentsToCountException(
                text=f"Document count exceeds {count}, the maximum allowed by the server",
                server_max_count_exceeded=True,
            )
        else:
            if count > upper_bound:
                raise TooManyDocumentsToCountException(
                    text="Document count exceeds required upper bound",
                    server_max_count_exceeded=False,
                )
            else:
                return count
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from countDocuments API command.",
            raw_response=cd_response,
        )
def delete_many(self, filter: FilterType, *, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> CollectionDeleteResult

Delete all documents matching a provided filter.

Args

filter
a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators. Passing an empty filter, {}, completely erases all contents of the collection.
general_method_timeout_ms
a timeout, in milliseconds, for the whole requested operation (which may involve multiple API requests). This method may entail successive HTTP API requests, depending on the amount of involved documents. If not passed, the collection-level setting is used instead.
request_timeout_ms
a timeout, in milliseconds, for each API request. If not passed, the collection-level setting is used instead.
timeout_ms
an alias for general_method_timeout_ms.

Returns

a CollectionDeleteResult object summarizing the outcome of the delete operation.

Example

>>> my_coll.insert_many([{"seq": 1}, {"seq": 0}, {"seq": 2}])
CollectionInsertManyResult(...)
>>> my_coll.delete_many({"seq": {"$lte": 1}})
CollectionDeleteResult(raw_results=..., deleted_count=2)
>>> my_coll.distinct("seq")
[2]
>>> my_coll.delete_many({"seq": {"$lte": 1}})
CollectionDeleteResult(raw_results=..., deleted_count=0)

Note

This operation is in general not atomic. Depending on the amount of matching documents, it can keep running (in a blocking way) for a macroscopic time. In that case, new documents that are meanwhile inserted (e.g. from another process/application) will be deleted during the execution of this method call until the collection is devoid of matches. An exception is the filter={} case, whereby the operation is atomic.

Expand source code
def delete_many(
    self,
    filter: FilterType,
    *,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> CollectionDeleteResult:
    """
    Delete all documents matching a provided filter.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
            Passing an empty filter, `{}`, completely erases all contents
            of the collection.
        general_method_timeout_ms: a timeout, in milliseconds, for the whole
            requested operation (which may involve multiple API requests).
            This method may entail successive HTTP API requests,
            depending on the amount of involved documents.
            If not passed, the collection-level setting is used instead.
        request_timeout_ms: a timeout, in milliseconds, for each API request.
            If not passed, the collection-level setting is used instead.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a CollectionDeleteResult object summarizing the outcome of the
        delete operation.

    Example:
        >>> my_coll.insert_many([{"seq": 1}, {"seq": 0}, {"seq": 2}])
        CollectionInsertManyResult(...)
        >>> my_coll.delete_many({"seq": {"$lte": 1}})
        CollectionDeleteResult(raw_results=..., deleted_count=2)
        >>> my_coll.distinct("seq")
        [2]
        >>> my_coll.delete_many({"seq": {"$lte": 1}})
        CollectionDeleteResult(raw_results=..., deleted_count=0)

    Note:
        This operation is in general not atomic. Depending on the amount
        of matching documents, it can keep running (in a blocking way)
        for a macroscopic time. In that case, new documents that are
        meanwhile inserted (e.g. from another process/application) will be
        deleted during the execution of this method call until the
        collection is devoid of matches.
        An exception is the `filter={}` case, whereby the operation is atomic.
    """

    _general_method_timeout_ms, _gmt_label = _first_valid_timeout(
        (general_method_timeout_ms, "general_method_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (
            self.api_options.timeout_options.general_method_timeout_ms,
            "general_method_timeout_ms",
        ),
    )
    _request_timeout_ms, _rt_label = _first_valid_timeout(
        (request_timeout_ms, "request_timeout_ms"),
        (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
    )
    dm_responses: list[dict[str, Any]] = []
    deleted_count = 0
    must_proceed = True
    timeout_manager = MultiCallTimeoutManager(
        overall_timeout_ms=_general_method_timeout_ms,
        timeout_label=_gmt_label,
    )
    this_dm_payload = {"deleteMany": {"filter": filter}}
    logger.info(f"starting delete_many on '{self.name}'")
    while must_proceed:
        logger.info(f"deleteMany on '{self.name}'")
        this_dm_response = self._converted_request(
            payload=this_dm_payload,
            raise_api_errors=False,
            timeout_context=timeout_manager.remaining_timeout(
                cap_time_ms=_request_timeout_ms,
                cap_timeout_label=_rt_label,
            ),
        )
        logger.info(f"finished deleteMany on '{self.name}'")
        # if errors, quit early
        if this_dm_response.get("errors", []):
            partial_result = CollectionDeleteResult(
                deleted_count=deleted_count,
                raw_results=dm_responses,
            )
            all_dm_responses = dm_responses + [this_dm_response]
            raise CollectionDeleteManyException.from_responses(
                commands=[None for _ in all_dm_responses],
                raw_responses=all_dm_responses,
                partial_result=partial_result,
            )
        else:
            this_dc = this_dm_response.get("status", {}).get("deletedCount")
            if this_dc is None:
                raise UnexpectedDataAPIResponseException(
                    text="Faulty response from delete_many API command.",
                    raw_response=this_dm_response,
                )
            dm_responses.append(this_dm_response)
            deleted_count += this_dc
            must_proceed = this_dm_response.get("status", {}).get("moreData", False)

    logger.info(f"finished delete_many on '{self.name}'")
    return CollectionDeleteResult(
        deleted_count=deleted_count,
        raw_results=dm_responses,
    )
def delete_one(self, filter: FilterType, *, sort: SortType | None = None, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> CollectionDeleteResult

Delete one document matching a provided filter. This method never deletes more than a single document, regardless of the number of matches to the provided filters.

Args

filter
a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
sort
with this dictionary parameter one can control the sorting order of the documents matching the filter, effectively determining what document will come first and hence be the deleted one. See the find method for more on sorting.
general_method_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for general_method_timeout_ms.
timeout_ms
an alias for general_method_timeout_ms.

Returns

a CollectionDeleteResult object summarizing the outcome of the delete operation.

Example

>>> my_coll.insert_many([{"seq": 1}, {"seq": 0}, {"seq": 2}])
CollectionInsertManyResult(...)
>>> my_coll.delete_one({"seq": 1})
CollectionDeleteResult(raw_results=..., deleted_count=1)
>>> my_coll.distinct("seq")
[0, 2]
>>> my_coll.delete_one(
...     {"seq": {"$exists": True}},
...     sort={"seq": astrapy.constants.SortMode.DESCENDING},
... )
CollectionDeleteResult(raw_results=..., deleted_count=1)
>>> my_coll.distinct("seq")
[0]
>>> my_coll.delete_one({"seq": 2})
CollectionDeleteResult(raw_results=..., deleted_count=0)
Expand source code
def delete_one(
    self,
    filter: FilterType,
    *,
    sort: SortType | None = None,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> CollectionDeleteResult:
    """
    Delete one document matching a provided filter.
    This method never deletes more than a single document, regardless
    of the number of matches to the provided filters.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        sort: with this dictionary parameter one can control the sorting
            order of the documents matching the filter, effectively
            determining what document will come first and hence be the
            deleted one. See the `find` method for more on sorting.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a CollectionDeleteResult object summarizing the outcome of the
        delete operation.

    Example:
        >>> my_coll.insert_many([{"seq": 1}, {"seq": 0}, {"seq": 2}])
        CollectionInsertManyResult(...)
        >>> my_coll.delete_one({"seq": 1})
        CollectionDeleteResult(raw_results=..., deleted_count=1)
        >>> my_coll.distinct("seq")
        [0, 2]
        >>> my_coll.delete_one(
        ...     {"seq": {"$exists": True}},
        ...     sort={"seq": astrapy.constants.SortMode.DESCENDING},
        ... )
        CollectionDeleteResult(raw_results=..., deleted_count=1)
        >>> my_coll.distinct("seq")
        [0]
        >>> my_coll.delete_one({"seq": 2})
        CollectionDeleteResult(raw_results=..., deleted_count=0)
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    do_payload = {
        "deleteOne": {
            k: v
            for k, v in {
                "filter": filter,
                "sort": sort,
            }.items()
            if v is not None
        }
    }
    logger.info(f"deleteOne on '{self.name}'")
    do_response = self._converted_request(
        payload=do_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished deleteOne on '{self.name}'")
    if "deletedCount" in do_response.get("status", {}):
        deleted_count = do_response["status"]["deletedCount"]
        return CollectionDeleteResult(
            deleted_count=deleted_count,
            raw_results=[do_response],
        )
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from delete_one API command.",
            raw_response=do_response,
        )
def distinct(self, key: str, *, filter: FilterType | None = None, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> list[typing.Any]

Return a list of the unique values of key across the documents in the collection that match the provided filter.

Args

key
the name of the field whose value is inspected across documents. Keys can use dot-notation to descend to deeper document levels. Example of acceptable key values: "field" "field.subfield" "field.3" "field.3.subfield" If lists are encountered and no numeric index is specified, all items in the list are visited.
filter
a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
general_method_timeout_ms
a timeout, in milliseconds, for the whole requested operation (which may involve multiple API requests). This method, being based on find (see) may entail successive HTTP API requests, depending on the amount of involved documents.
request_timeout_ms
a timeout, in milliseconds, for each API request.
timeout_ms
an alias for general_method_timeout_ms.

Returns

a list of all different values for key found across the documents that match the filter. The result list has no repeated items.

Example

>>> my_coll.insert_many(
...     [
...         {"name": "Marco", "food": ["apple", "orange"], "city": "Helsinki"},
...         {"name": "Emma", "food": {"likes_fruit": True, "allergies": []}},
...     ]
... )
CollectionInsertManyResult(raw_results=..., inserted_ids=['c5b99f37-...', 'd6416321-...'])
>>> my_coll.distinct("name")
['Marco', 'Emma']
>>> my_coll.distinct("city")
['Helsinki']
>>> my_coll.distinct("food")
['apple', 'orange', {'likes_fruit': True, 'allergies': []}]
>>> my_coll.distinct("food.1")
['orange']
>>> my_coll.distinct("food.allergies")
[]
>>> my_coll.distinct("food.likes_fruit")
[True]

Note

It must be kept in mind that distinct is a client-side operation, which effectively browses all required documents using the logic of the find method and collects the unique values found for key. As such, there may be performance, latency and ultimately billing implications if the amount of matching documents is large.

Note

For details on the behaviour of "distinct" in conjunction with real-time changes in the collection contents, see the Note of the find command.

Expand source code
def distinct(
    self,
    key: str,
    *,
    filter: FilterType | None = None,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> list[Any]:
    """
    Return a list of the unique values of `key` across the documents
    in the collection that match the provided filter.

    Args:
        key: the name of the field whose value is inspected across documents.
            Keys can use dot-notation to descend to deeper document levels.
            Example of acceptable `key` values:
                "field"
                "field.subfield"
                "field.3"
                "field.3.subfield"
            If lists are encountered and no numeric index is specified,
            all items in the list are visited.
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        general_method_timeout_ms: a timeout, in milliseconds, for the whole
            requested operation (which may involve multiple API requests).
            This method, being based on `find` (see) may entail successive HTTP API
            requests, depending on the amount of involved documents.
        request_timeout_ms: a timeout, in milliseconds, for each API request.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a list of all different values for `key` found across the documents
        that match the filter. The result list has no repeated items.

    Example:
        >>> my_coll.insert_many(
        ...     [
        ...         {"name": "Marco", "food": ["apple", "orange"], "city": "Helsinki"},
        ...         {"name": "Emma", "food": {"likes_fruit": True, "allergies": []}},
        ...     ]
        ... )
        CollectionInsertManyResult(raw_results=..., inserted_ids=['c5b99f37-...', 'd6416321-...'])
        >>> my_coll.distinct("name")
        ['Marco', 'Emma']
        >>> my_coll.distinct("city")
        ['Helsinki']
        >>> my_coll.distinct("food")
        ['apple', 'orange', {'likes_fruit': True, 'allergies': []}]
        >>> my_coll.distinct("food.1")
        ['orange']
        >>> my_coll.distinct("food.allergies")
        []
        >>> my_coll.distinct("food.likes_fruit")
        [True]

    Note:
        It must be kept in mind that `distinct` is a client-side operation,
        which effectively browses all required documents using the logic
        of the `find` method and collects the unique values found for `key`.
        As such, there may be performance, latency and ultimately
        billing implications if the amount of matching documents is large.

    Note:
        For details on the behaviour of "distinct" in conjunction with
        real-time changes in the collection contents, see the
        Note of the `find` command.
    """

    # lazy-import here to avoid circular import issues
    from astrapy.cursors import CollectionFindCursor

    _general_method_timeout_ms, _gmt_label = _first_valid_timeout(
        (general_method_timeout_ms, "general_method_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (
            self.api_options.timeout_options.general_method_timeout_ms,
            "general_method_timeout_ms",
        ),
    )
    _request_timeout_ms, _rt_label = _first_valid_timeout(
        (request_timeout_ms, "request_timeout_ms"),
        (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
    )
    # preparing cursor:
    _extractor = _create_document_key_extractor(key)
    _key = _reduce_distinct_key_to_safe(key)
    if _key == "":
        raise ValueError(
            "The 'key' parameter for distinct cannot be empty "
            "or start with a list index."
        )
    # relaxing the type hint (limited to within this method body)
    f_cursor: CollectionFindCursor[dict[str, Any], dict[str, Any]] = (
        CollectionFindCursor(
            collection=self,
            request_timeout_ms=_request_timeout_ms,
            overall_timeout_ms=_general_method_timeout_ms,
            request_timeout_label=_rt_label,
            overall_timeout_label=_gmt_label,
        )  # type: ignore[assignment]
        .filter(filter)
        .project({_key: True})
    )
    # consuming it:
    _item_hashes = set()
    distinct_items: list[Any] = []
    logger.info(f"running distinct() on '{self.name}'")
    for document in f_cursor:
        for item in _extractor(document):
            _item_hash = _hash_document(
                item, options=self.api_options.serdes_options
            )
            if _item_hash not in _item_hashes:
                _item_hashes.add(_item_hash)
                distinct_items.append(item)
    logger.info(f"finished running distinct() on '{self.name}'")
    return distinct_items
def drop(self, *, collection_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> None

Drop the collection, i.e. delete it from the database along with all the documents it contains.

Args

collection_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for collection_admin_timeout_ms.
timeout_ms
an alias for collection_admin_timeout_ms.

Example

>>> my_coll.find_one({})
{'_id': '...', 'a': 100}
>>> my_coll.drop()
>>> my_coll.find_one({})
Traceback (most recent call last):
    ... ...
astrapy.exceptions.DataAPIResponseException: Collection does not exist, collection name: my_collection

Note

Use with caution.

Note

Once the method succeeds, methods on this object can still be invoked: however, this hardly makes sense as the underlying actual collection is no more. It is responsibility of the developer to design a correct flow which avoids using a deceased collection any further.

Expand source code
def drop(
    self,
    *,
    collection_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Drop the collection, i.e. delete it from the database along with
    all the documents it contains.

    Args:
        collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `collection_admin_timeout_ms`.
        timeout_ms: an alias for `collection_admin_timeout_ms`.

    Example:
        >>> my_coll.find_one({})
        {'_id': '...', 'a': 100}
        >>> my_coll.drop()
        >>> my_coll.find_one({})
        Traceback (most recent call last):
            ... ...
        astrapy.exceptions.DataAPIResponseException: Collection does not exist, collection name: my_collection

    Note:
        Use with caution.

    Note:
        Once the method succeeds, methods on this object can still be invoked:
        however, this hardly makes sense as the underlying actual collection
        is no more.
        It is responsibility of the developer to design a correct flow
        which avoids using a deceased collection any further.
    """

    logger.info(f"dropping collection '{self.name}' (self)")
    self.database.drop_collection(
        self.name,
        collection_admin_timeout_ms=collection_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info(f"finished dropping collection '{self.name}' (self)")
def estimated_document_count(self, *, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> int

Query the API server for an estimate of the document count in the collection.

Contrary to count_documents, this method has no filtering parameters.

Args

general_method_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for general_method_timeout_ms.
timeout_ms
an alias for general_method_timeout_ms.

Returns

a server-provided estimate count of the documents in the collection.

Example

>>> my_coll.estimated_document_count()
35700
Expand source code
def estimated_document_count(
    self,
    *,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> int:
    """
    Query the API server for an estimate of the document count in the collection.

    Contrary to `count_documents`, this method has no filtering parameters.

    Args:
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a server-provided estimate count of the documents in the collection.

    Example:
        >>> my_coll.estimated_document_count()
        35700
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    ed_payload: dict[str, Any] = {"estimatedDocumentCount": {}}
    logger.info(f"estimatedDocumentCount on '{self.name}'")
    ed_response = self._converted_request(
        payload=ed_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished estimatedDocumentCount on '{self.name}'")
    if "count" in ed_response.get("status", {}):
        count: int = ed_response["status"]["count"]
        return count
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from estimatedDocumentCount API command.",
            raw_response=ed_response,
        )
def find(self, filter: FilterType | None = None, *, projection: ProjectionType | None = None, document_type: type[DOC2] | None = None, skip: int | None = None, limit: int | None = None, include_similarity: bool | None = None, include_sort_vector: bool | None = None, sort: SortType | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> CollectionFindCursor[DOC, DOC2]

Find documents on the collection, matching a certain provided filter.

The method returns a Cursor that can then be iterated over. Depending on the method call pattern, the iteration over all documents can reflect collection mutations occurred since the find method was called, or not. In cases where the cursor reflects mutations in real-time, it will iterate over cursors in an approximate way (i.e. exhibiting occasional skipped or duplicate documents). This happens when making use of the sort option in a non-vector-search manner.

Args

filter
a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
projection
it controls which parts of the document are returned. It can be an allow-list: {"f1": True, "f2": True}, or a deny-list: {"fx": False, "fy": False}, but not a mixture (except for the _id and other special fields, which can be associated to both True or False independently of the rest of the specification). The special star-projections {"*": True} and {"*": False} have the effect of returning the whole document and {} respectively. For lists in documents, slice directives can be passed to select portions of the list: for instance, {"array": {"$slice": 2}}, {"array": {"$slice": -2}}, {"array": {"$slice": [4, 2]}} or {"array": {"$slice": [-4, 2]}}. An iterable over strings will be treated implicitly as an allow-list. The default projection (used if this parameter is not passed) does not necessarily include "special" fields such as $vector or $vectorize. See the Data API documentation for more on projections.
document_type
this parameter acts a formal specifier for the type checker. If omitted, the resulting cursor is implicitly a CollectionFindCursor[DOC, DOC], i.e. maintains the same type for the items it returns as that for the documents in the table. Strictly typed code may want to specify this parameter especially when a projection is given.
skip
with this integer parameter, what would be the first skip documents returned by the query are discarded, and the results start from the (skip+1)-th document. This parameter can be used only in conjunction with an explicit sort criterion of the ascending/descending type (i.e. it cannot be used when not sorting, nor with vector-based ANN search).
limit
this (integer) parameter sets a limit over how many documents are returned. Once limit is reached (or the cursor is exhausted for lack of matching documents), nothing more is returned.
include_similarity
a boolean to request the numeric value of the similarity to be returned as an added "$similarity" key in each returned document. Can only be used for vector ANN search, i.e. when either vector is supplied or the sort parameter has the shape {"$vector": …}.
include_sort_vector
a boolean to request the search query vector. If set to True (and if the invocation is a vector search), calling the get_sort_vector method on the returned cursor will yield the vector used for the ANN search.
sort
with this dictionary parameter one can control the order the documents are returned. See the Note about sorting, as well as the one about upper bounds, for details. Vector-based ANN sorting is achieved by providing a "$vector" or a "$vectorize" key in sort.
request_timeout_ms
a timeout, in milliseconds, for each single one of the underlying HTTP requests used to fetch documents as the cursor is iterated over. If not passed, the collection-level setting is used instead.
timeout_ms
an alias for request_timeout_ms.

Returns

a Cursor object representing iterations over the matching documents
(see the Cursor object for how to use it. The simplest thing is to
run a for loop
for document in collection.sort(...):).

Examples

>>> filter = {"seq": {"$exists": True}}
>>> for doc in my_coll.find(filter, projection={"seq": True}, limit=5):
...     print(doc["seq"])
...
37
35
10
36
27
>>> cursor1 = my_coll.find(
...     {},
...     limit=4,
...     sort={"seq": astrapy.constants.SortMode.DESCENDING},
... )
>>> [doc["_id"] for doc in cursor1]
['97e85f81-...', '1581efe4-...', '...', '...']
>>> cursor2 = my_coll.find({}, limit=3)
>>> cursor2.distinct("seq")
[37, 35, 10]
>>> my_coll.insert_many([
...     {"tag": "A", "$vector": [4, 5]},
...     {"tag": "B", "$vector": [3, 4]},
...     {"tag": "C", "$vector": [3, 2]},
...     {"tag": "D", "$vector": [4, 1]},
...     {"tag": "E", "$vector": [2, 5]},
... ])
>>> ann_tags = [
...     document["tag"]
...     for document in my_coll.find(
...         {},
...         sort={"$vector": [3, 3]},
...         limit=3,
...     )
... ]
>>> ann_tags
['A', 'B', 'C']
>>> # (assuming the collection has metric VectorMetric.COSINE)
>>> cursor = my_coll.find(
...     sort={"$vector": [3, 3]},
...     limit=3,
...     include_sort_vector=True,
... )
>>> cursor.get_sort_vector()
[3.0, 3.0]
>>> matches = list(cursor)
>>> cursor.get_sort_vector()
[3.0, 3.0]

Note

The following are example values for the sort parameter. When no particular order is required: sort={} # (default when parameter not provided) When sorting by a certain value in ascending/descending order: sort={"field": SortMode.ASCENDING} sort={"field": SortMode.DESCENDING} When sorting first by "field" and then by "subfield" (while modern Python versions preserve the order of dictionaries, it is suggested for clarity to employ a collections.OrderedDict in these cases): sort={ "field": SortMode.ASCENDING, "subfield": SortMode.ASCENDING, } When running a vector similarity (ANN) search: sort={"$vector": [0.4, 0.15, -0.5]}

Note

Some combinations of arguments impose an implicit upper bound on the number of documents that are returned by the Data API. More specifically: (a) Vector ANN searches cannot return more than a number of documents that at the time of writing is set to 1000 items. (b) When using a sort criterion of the ascending/descending type, the Data API will return a smaller number of documents, set to 20 at the time of writing, and stop there. The returned documents are the top results across the whole collection according to the requested criterion. These provisions should be kept in mind even when subsequently running a command such as .distinct() on a cursor.

Note

When not specifying sorting criteria at all (by vector or otherwise), the cursor can scroll through an arbitrary number of documents as the Data API and the client periodically exchange new chunks of documents. It should be noted that the behavior of the cursor in the case documents have been added/removed after the find was started depends on database internals and it is not guaranteed, nor excluded, that such "real-time" changes in the data would be picked up by the cursor.

Expand source code
def find(
    self,
    filter: FilterType | None = None,
    *,
    projection: ProjectionType | None = None,
    document_type: type[DOC2] | None = None,
    skip: int | None = None,
    limit: int | None = None,
    include_similarity: bool | None = None,
    include_sort_vector: bool | None = None,
    sort: SortType | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> CollectionFindCursor[DOC, DOC2]:
    """
    Find documents on the collection, matching a certain provided filter.

    The method returns a Cursor that can then be iterated over. Depending
    on the method call pattern, the iteration over all documents can reflect
    collection mutations occurred since the `find` method was called, or not.
    In cases where the cursor reflects mutations in real-time, it will iterate
    over cursors in an approximate way (i.e. exhibiting occasional skipped
    or duplicate documents). This happens when making use of the `sort`
    option in a non-vector-search manner.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        projection: it controls which parts of the document are returned.
            It can be an allow-list: `{"f1": True, "f2": True}`,
            or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
            (except for the `_id` and other special fields, which can be
            associated to both True or False independently of the rest
            of the specification).
            The special star-projections `{"*": True}` and `{"*": False}`
            have the effect of returning the whole document and `{}` respectively.
            For lists in documents, slice directives can be passed to select
            portions of the list: for instance, `{"array": {"$slice": 2}}`,
            `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
            `{"array": {"$slice": [-4, 2]}}`.
            An iterable over strings will be treated implicitly as an allow-list.
            The default projection (used if this parameter is not passed) does not
            necessarily include "special" fields such as `$vector` or `$vectorize`.
            See the Data API documentation for more on projections.
        document_type: this parameter acts a formal specifier for the type checker.
            If omitted, the resulting cursor is implicitly a
            `CollectionFindCursor[DOC, DOC]`, i.e. maintains the same type for
            the items it returns as that for the documents in the table. Strictly
            typed code may want to specify this parameter especially when a
            projection is given.
        skip: with this integer parameter, what would be the first `skip`
            documents returned by the query are discarded, and the results
            start from the (skip+1)-th document.
            This parameter can be used only in conjunction with an explicit
            `sort` criterion of the ascending/descending type (i.e. it cannot
            be used when not sorting, nor with vector-based ANN search).
        limit: this (integer) parameter sets a limit over how many documents
            are returned. Once `limit` is reached (or the cursor is exhausted
            for lack of matching documents), nothing more is returned.
        include_similarity: a boolean to request the numeric value of the
            similarity to be returned as an added "$similarity" key in each
            returned document. Can only be used for vector ANN search, i.e.
            when either `vector` is supplied or the `sort` parameter has the
            shape {"$vector": ...}.
        include_sort_vector: a boolean to request the search query vector.
            If set to True (and if the invocation is a vector search), calling
            the `get_sort_vector` method on the returned cursor will yield
            the vector used for the ANN search.
        sort: with this dictionary parameter one can control the order
            the documents are returned. See the Note about sorting, as well as
            the one about upper bounds, for details.
            Vector-based ANN sorting is achieved by providing a "$vector"
            or a "$vectorize" key in `sort`.
        request_timeout_ms: a timeout, in milliseconds, for each single one
            of the underlying HTTP requests used to fetch documents as the
            cursor is iterated over.
            If not passed, the collection-level setting is used instead.
        timeout_ms: an alias for `request_timeout_ms`.

    Returns:
        a Cursor object representing iterations over the matching documents
        (see the Cursor object for how to use it. The simplest thing is to
        run a for loop: `for document in collection.sort(...):`).

    Examples:
        >>> filter = {"seq": {"$exists": True}}
        >>> for doc in my_coll.find(filter, projection={"seq": True}, limit=5):
        ...     print(doc["seq"])
        ...
        37
        35
        10
        36
        27
        >>> cursor1 = my_coll.find(
        ...     {},
        ...     limit=4,
        ...     sort={"seq": astrapy.constants.SortMode.DESCENDING},
        ... )
        >>> [doc["_id"] for doc in cursor1]
        ['97e85f81-...', '1581efe4-...', '...', '...']
        >>> cursor2 = my_coll.find({}, limit=3)
        >>> cursor2.distinct("seq")
        [37, 35, 10]

        >>> my_coll.insert_many([
        ...     {"tag": "A", "$vector": [4, 5]},
        ...     {"tag": "B", "$vector": [3, 4]},
        ...     {"tag": "C", "$vector": [3, 2]},
        ...     {"tag": "D", "$vector": [4, 1]},
        ...     {"tag": "E", "$vector": [2, 5]},
        ... ])
        >>> ann_tags = [
        ...     document["tag"]
        ...     for document in my_coll.find(
        ...         {},
        ...         sort={"$vector": [3, 3]},
        ...         limit=3,
        ...     )
        ... ]
        >>> ann_tags
        ['A', 'B', 'C']
        >>> # (assuming the collection has metric VectorMetric.COSINE)

        >>> cursor = my_coll.find(
        ...     sort={"$vector": [3, 3]},
        ...     limit=3,
        ...     include_sort_vector=True,
        ... )
        >>> cursor.get_sort_vector()
        [3.0, 3.0]
        >>> matches = list(cursor)
        >>> cursor.get_sort_vector()
        [3.0, 3.0]

    Note:
        The following are example values for the `sort` parameter.
        When no particular order is required:
            sort={}  # (default when parameter not provided)
        When sorting by a certain value in ascending/descending order:
            sort={"field": SortMode.ASCENDING}
            sort={"field": SortMode.DESCENDING}
        When sorting first by "field" and then by "subfield"
        (while modern Python versions preserve the order of dictionaries,
        it is suggested for clarity to employ a `collections.OrderedDict`
        in these cases):
            sort={
                "field": SortMode.ASCENDING,
                "subfield": SortMode.ASCENDING,
            }
        When running a vector similarity (ANN) search:
            sort={"$vector": [0.4, 0.15, -0.5]}

    Note:
        Some combinations of arguments impose an implicit upper bound on the
        number of documents that are returned by the Data API. More specifically:
        (a) Vector ANN searches cannot return more than a number of documents
        that at the time of writing is set to 1000 items.
        (b) When using a sort criterion of the ascending/descending type,
        the Data API will return a smaller number of documents, set to 20
        at the time of writing, and stop there. The returned documents are
        the top results across the whole collection according to the requested
        criterion.
        These provisions should be kept in mind even when subsequently running
        a command such as `.distinct()` on a cursor.

    Note:
        When not specifying sorting criteria at all (by vector or otherwise),
        the cursor can scroll through an arbitrary number of documents as
        the Data API and the client periodically exchange new chunks of documents.
        It should be noted that the behavior of the cursor in the case documents
        have been added/removed after the `find` was started depends on database
        internals and it is not guaranteed, nor excluded, that such "real-time"
        changes in the data would be picked up by the cursor.
    """

    # lazy-import here to avoid circular import issues
    from astrapy.cursors import CollectionFindCursor

    _request_timeout_ms, _rt_label = _first_valid_timeout(
        (request_timeout_ms, "request_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
    )
    return (
        CollectionFindCursor(
            collection=self,
            request_timeout_ms=_request_timeout_ms,
            overall_timeout_ms=None,
            request_timeout_label=_rt_label,
        )
        .filter(filter)
        .project(projection)
        .skip(skip)
        .limit(limit)
        .sort(sort)
        .include_similarity(include_similarity)
        .include_sort_vector(include_sort_vector)
    )
def find_one(self, filter: FilterType | None = None, *, projection: ProjectionType | None = None, include_similarity: bool | None = None, sort: SortType | None = None, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> Optional[~DOC]

Run a search, returning the first document in the collection that matches provided filters, if any is found.

Args

filter
a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
projection
it controls which parts of the document are returned. It can be an allow-list: {"f1": True, "f2": True}, or a deny-list: {"fx": False, "fy": False}, but not a mixture (except for the _id and other special fields, which can be associated to both True or False independently of the rest of the specification). The special star-projections {"*": True} and {"*": False} have the effect of returning the whole document and {} respectively. For lists in documents, slice directives can be passed to select portions of the list: for instance, {"array": {"$slice": 2}}, {"array": {"$slice": -2}}, {"array": {"$slice": [4, 2]}} or {"array": {"$slice": [-4, 2]}}. An iterable over strings will be treated implicitly as an allow-list. The default projection (used if this parameter is not passed) does not necessarily include "special" fields such as $vector or $vectorize. See the Data API documentation for more on projections.
include_similarity
a boolean to request the numeric value of the similarity to be returned as an added "$similarity" key in the returned document. Can only be used for vector ANN search, i.e. when either vector is supplied or the sort parameter has the shape {"$vector": …}.
sort
with this dictionary parameter one can control the order the documents are returned. See the Note about sorting for details. Vector-based ANN sorting is achieved by providing a "$vector" or a "$vectorize" key in sort.
general_method_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for general_method_timeout_ms.
timeout_ms
an alias for general_method_timeout_ms.

Returns

a dictionary expressing the required document, otherwise None.

Examples

>>> my_coll.find_one({})
{'_id': '68d1e515-...', 'seq': 37}
>>> my_coll.find_one({"seq": 10})
{'_id': 'd560e217-...', 'seq': 10}
>>> my_coll.find_one({"seq": 1011})
>>> # (returns None for no matches)
>>> my_coll.find_one({}, projection={"seq": False})
{'_id': '68d1e515-...'}
>>> my_coll.find_one(
...     {},
...     sort={"seq": astrapy.constants.SortMode.DESCENDING},
... )
{'_id': '97e85f81-...', 'seq': 69}
>>> my_coll.find_one({}, sort={"$vector": [1, 0]}, projection={"*": True})
{'_id': '...', 'tag': 'D', '$vector': [4.0, 1.0]}

Note

See the find method for more details on the accepted parameters (whereas skip and limit are not valid parameters for find_one).

Expand source code
def find_one(
    self,
    filter: FilterType | None = None,
    *,
    projection: ProjectionType | None = None,
    include_similarity: bool | None = None,
    sort: SortType | None = None,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> DOC | None:
    """
    Run a search, returning the first document in the collection that matches
    provided filters, if any is found.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        projection: it controls which parts of the document are returned.
            It can be an allow-list: `{"f1": True, "f2": True}`,
            or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
            (except for the `_id` and other special fields, which can be
            associated to both True or False independently of the rest
            of the specification).
            The special star-projections `{"*": True}` and `{"*": False}`
            have the effect of returning the whole document and `{}` respectively.
            For lists in documents, slice directives can be passed to select
            portions of the list: for instance, `{"array": {"$slice": 2}}`,
            `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
            `{"array": {"$slice": [-4, 2]}}`.
            An iterable over strings will be treated implicitly as an allow-list.
            The default projection (used if this parameter is not passed) does not
            necessarily include "special" fields such as `$vector` or `$vectorize`.
            See the Data API documentation for more on projections.
        include_similarity: a boolean to request the numeric value of the
            similarity to be returned as an added "$similarity" key in the
            returned document. Can only be used for vector ANN search, i.e.
            when either `vector` is supplied or the `sort` parameter has the
            shape {"$vector": ...}.
        sort: with this dictionary parameter one can control the order
            the documents are returned. See the Note about sorting for details.
            Vector-based ANN sorting is achieved by providing a "$vector"
            or a "$vectorize" key in `sort`.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a dictionary expressing the required document, otherwise None.

    Examples:
        >>> my_coll.find_one({})
        {'_id': '68d1e515-...', 'seq': 37}
        >>> my_coll.find_one({"seq": 10})
        {'_id': 'd560e217-...', 'seq': 10}
        >>> my_coll.find_one({"seq": 1011})
        >>> # (returns None for no matches)
        >>> my_coll.find_one({}, projection={"seq": False})
        {'_id': '68d1e515-...'}
        >>> my_coll.find_one(
        ...     {},
        ...     sort={"seq": astrapy.constants.SortMode.DESCENDING},
        ... )
        {'_id': '97e85f81-...', 'seq': 69}
        >>> my_coll.find_one({}, sort={"$vector": [1, 0]}, projection={"*": True})
        {'_id': '...', 'tag': 'D', '$vector': [4.0, 1.0]}

    Note:
        See the `find` method for more details on the accepted parameters
        (whereas `skip` and `limit` are not valid parameters for `find_one`).
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    fo_options = (
        None
        if include_similarity is None
        else {"includeSimilarity": include_similarity}
    )
    fo_payload = {
        "findOne": {
            k: v
            for k, v in {
                "filter": filter,
                "projection": normalize_optional_projection(projection),
                "options": fo_options,
                "sort": sort,
            }.items()
            if v is not None
        }
    }
    fo_response = self._converted_request(
        payload=fo_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    if "document" not in (fo_response.get("data") or {}):
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from findOne API command.",
            raw_response=fo_response,
        )
    doc_response = fo_response["data"]["document"]
    if doc_response is None:
        return None
    return fo_response["data"]["document"]  # type: ignore[no-any-return]
def find_one_and_delete(self, filter: FilterType, *, projection: ProjectionType | None = None, sort: SortType | None = None, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> Optional[~DOC]

Find a document in the collection and delete it. The deleted document, however, is the return value of the method.

Args

filter
a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
projection
it controls which parts of the document are returned. It can be an allow-list: {"f1": True, "f2": True}, or a deny-list: {"fx": False, "fy": False}, but not a mixture (except for the _id and other special fields, which can be associated to both True or False independently of the rest of the specification). The special star-projections {"*": True} and {"*": False} have the effect of returning the whole document and {} respectively. For lists in documents, slice directives can be passed to select portions of the list: for instance, {"array": {"$slice": 2}}, {"array": {"$slice": -2}}, {"array": {"$slice": [4, 2]}} or {"array": {"$slice": [-4, 2]}}. An iterable over strings will be treated implicitly as an allow-list. The default projection (used if this parameter is not passed) does not necessarily include "special" fields such as $vector or $vectorize. See the Data API documentation for more on projections.
sort
with this dictionary parameter one can control the sorting order of the documents matching the filter, effectively determining what document will come first and hence be the deleted one. See the find method for more on sorting.
general_method_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for general_method_timeout_ms.
timeout_ms
an alias for general_method_timeout_ms.

Returns

Either the document (or a projection thereof, as requested), or None if no matches were found in the first place.

Example

>>> my_coll.insert_many(
...     [
...         {"species": "swan", "class": "Aves"},
...         {"species": "frog", "class": "Amphibia"},
...     ],
... )
CollectionInsertManyResult(...)
>>> my_coll.find_one_and_delete(
...     {"species": {"$ne": "frog"}},
...     projection=["species"],
... )
{'_id': '5997fb48-...', 'species': 'swan'}
>>> my_coll.find_one_and_delete({"species": {"$ne": "frog"}})
>>> # (returns None for no matches)
Expand source code
def find_one_and_delete(
    self,
    filter: FilterType,
    *,
    projection: ProjectionType | None = None,
    sort: SortType | None = None,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> DOC | None:
    """
    Find a document in the collection and delete it. The deleted document,
    however, is the return value of the method.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        projection: it controls which parts of the document are returned.
            It can be an allow-list: `{"f1": True, "f2": True}`,
            or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
            (except for the `_id` and other special fields, which can be
            associated to both True or False independently of the rest
            of the specification).
            The special star-projections `{"*": True}` and `{"*": False}`
            have the effect of returning the whole document and `{}` respectively.
            For lists in documents, slice directives can be passed to select
            portions of the list: for instance, `{"array": {"$slice": 2}}`,
            `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
            `{"array": {"$slice": [-4, 2]}}`.
            An iterable over strings will be treated implicitly as an allow-list.
            The default projection (used if this parameter is not passed) does not
            necessarily include "special" fields such as `$vector` or `$vectorize`.
            See the Data API documentation for more on projections.
        sort: with this dictionary parameter one can control the sorting
            order of the documents matching the filter, effectively
            determining what document will come first and hence be the
            deleted one. See the `find` method for more on sorting.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        Either the document (or a projection thereof, as requested), or None
        if no matches were found in the first place.

    Example:
        >>> my_coll.insert_many(
        ...     [
        ...         {"species": "swan", "class": "Aves"},
        ...         {"species": "frog", "class": "Amphibia"},
        ...     ],
        ... )
        CollectionInsertManyResult(...)
        >>> my_coll.find_one_and_delete(
        ...     {"species": {"$ne": "frog"}},
        ...     projection=["species"],
        ... )
        {'_id': '5997fb48-...', 'species': 'swan'}
        >>> my_coll.find_one_and_delete({"species": {"$ne": "frog"}})
        >>> # (returns None for no matches)
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    _projection = normalize_optional_projection(projection)
    fo_payload = {
        "findOneAndDelete": {
            k: v
            for k, v in {
                "filter": filter,
                "sort": sort,
                "projection": _projection,
            }.items()
            if v is not None
        }
    }
    logger.info(f"findOneAndDelete on '{self.name}'")
    fo_response = self._converted_request(
        payload=fo_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished findOneAndDelete on '{self.name}'")
    if "document" in fo_response.get("data", {}):
        document = fo_response["data"]["document"]
        return document  # type: ignore[no-any-return]
    else:
        deleted_count = fo_response.get("status", {}).get("deletedCount")
        if deleted_count == 0:
            return None
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from find_one_and_delete API command.",
                raw_response=fo_response,
            )
def find_one_and_replace(self, filter: FilterType, replacement: DOC, *, projection: ProjectionType | None = None, sort: SortType | None = None, upsert: bool = False, return_document: str = 'before', general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> Optional[~DOC]

Find a document on the collection and replace it entirely with a new one, optionally inserting a new one if no match is found.

Args

filter
a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
replacement
the new document to write into the collection.
projection
it controls which parts of the document are returned. It can be an allow-list: {"f1": True, "f2": True}, or a deny-list: {"fx": False, "fy": False}, but not a mixture (except for the _id and other special fields, which can be associated to both True or False independently of the rest of the specification). The special star-projections {"*": True} and {"*": False} have the effect of returning the whole document and {} respectively. For lists in documents, slice directives can be passed to select portions of the list: for instance, {"array": {"$slice": 2}}, {"array": {"$slice": -2}}, {"array": {"$slice": [4, 2]}} or {"array": {"$slice": [-4, 2]}}. An iterable over strings will be treated implicitly as an allow-list. The default projection (used if this parameter is not passed) does not necessarily include "special" fields such as $vector or $vectorize. See the Data API documentation for more on projections.
sort
with this dictionary parameter one can control the sorting order of the documents matching the filter, effectively determining what document will come first and hence be the replaced one. See the find method for more on sorting. Vector-based ANN sorting is achieved by providing a "$vector" or a "$vectorize" key in sort.
upsert
this parameter controls the behavior in absence of matches. If True, replacement is inserted as a new document if no matches are found on the collection. If False, the operation silently does nothing in case of no matches.
return_document
a flag controlling what document is returned: if set to ReturnDocument.BEFORE, or the string "before", the document found on database is returned; if set to ReturnDocument.AFTER, or the string "after", the new document is returned. The default is "before".
general_method_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for general_method_timeout_ms.
timeout_ms
an alias for general_method_timeout_ms.

Returns

A document (or a projection thereof, as required), either the one before the replace operation or the one after that. Alternatively, the method returns None to represent that no matching document was found, or that no replacement was inserted (depending on the return_document parameter).

Example

>>> my_coll.insert_one({"_id": "rule1", "text": "all animals are equal"})
CollectionInsertOneResult(...)
>>> my_coll.find_one_and_replace(
...     {"_id": "rule1"},
...     {"text": "some animals are more equal!"},
... )
{'_id': 'rule1', 'text': 'all animals are equal'}
>>> my_coll.find_one_and_replace(
...     {"text": "some animals are more equal!"},
...     {"text": "and the pigs are the rulers"},
...     return_document=astrapy.constants.ReturnDocument.AFTER,
... )
{'_id': 'rule1', 'text': 'and the pigs are the rulers'}
>>> my_coll.find_one_and_replace(
...     {"_id": "rule2"},
...     {"text": "F=ma^2"},
...     return_document=astrapy.constants.ReturnDocument.AFTER,
... )
>>> # (returns None for no matches)
>>> my_coll.find_one_and_replace(
...     {"_id": "rule2"},
...     {"text": "F=ma"},
...     upsert=True,
...     return_document=astrapy.constants.ReturnDocument.AFTER,
...     projection={"_id": False},
... )
{'text': 'F=ma'}
Expand source code
def find_one_and_replace(
    self,
    filter: FilterType,
    replacement: DOC,
    *,
    projection: ProjectionType | None = None,
    sort: SortType | None = None,
    upsert: bool = False,
    return_document: str = ReturnDocument.BEFORE,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> DOC | None:
    """
    Find a document on the collection and replace it entirely with a new one,
    optionally inserting a new one if no match is found.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        replacement: the new document to write into the collection.
        projection: it controls which parts of the document are returned.
            It can be an allow-list: `{"f1": True, "f2": True}`,
            or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
            (except for the `_id` and other special fields, which can be
            associated to both True or False independently of the rest
            of the specification).
            The special star-projections `{"*": True}` and `{"*": False}`
            have the effect of returning the whole document and `{}` respectively.
            For lists in documents, slice directives can be passed to select
            portions of the list: for instance, `{"array": {"$slice": 2}}`,
            `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
            `{"array": {"$slice": [-4, 2]}}`.
            An iterable over strings will be treated implicitly as an allow-list.
            The default projection (used if this parameter is not passed) does not
            necessarily include "special" fields such as `$vector` or `$vectorize`.
            See the Data API documentation for more on projections.
        sort: with this dictionary parameter one can control the sorting
            order of the documents matching the filter, effectively
            determining what document will come first and hence be the
            replaced one. See the `find` method for more on sorting.
            Vector-based ANN sorting is achieved by providing a "$vector"
            or a "$vectorize" key in `sort`.
        upsert: this parameter controls the behavior in absence of matches.
            If True, `replacement` is inserted as a new document
            if no matches are found on the collection. If False,
            the operation silently does nothing in case of no matches.
        return_document: a flag controlling what document is returned:
            if set to `ReturnDocument.BEFORE`, or the string "before",
            the document found on database is returned; if set to
            `ReturnDocument.AFTER`, or the string "after", the new
            document is returned. The default is "before".
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        A document (or a projection thereof, as required), either the one
        before the replace operation or the one after that.
        Alternatively, the method returns None to represent
        that no matching document was found, or that no replacement
        was inserted (depending on the `return_document` parameter).

    Example:
        >>> my_coll.insert_one({"_id": "rule1", "text": "all animals are equal"})
        CollectionInsertOneResult(...)
        >>> my_coll.find_one_and_replace(
        ...     {"_id": "rule1"},
        ...     {"text": "some animals are more equal!"},
        ... )
        {'_id': 'rule1', 'text': 'all animals are equal'}
        >>> my_coll.find_one_and_replace(
        ...     {"text": "some animals are more equal!"},
        ...     {"text": "and the pigs are the rulers"},
        ...     return_document=astrapy.constants.ReturnDocument.AFTER,
        ... )
        {'_id': 'rule1', 'text': 'and the pigs are the rulers'}
        >>> my_coll.find_one_and_replace(
        ...     {"_id": "rule2"},
        ...     {"text": "F=ma^2"},
        ...     return_document=astrapy.constants.ReturnDocument.AFTER,
        ... )
        >>> # (returns None for no matches)
        >>> my_coll.find_one_and_replace(
        ...     {"_id": "rule2"},
        ...     {"text": "F=ma"},
        ...     upsert=True,
        ...     return_document=astrapy.constants.ReturnDocument.AFTER,
        ...     projection={"_id": False},
        ... )
        {'text': 'F=ma'}
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    options = {
        "returnDocument": return_document,
        "upsert": upsert,
    }
    fo_payload = {
        "findOneAndReplace": {
            k: v
            for k, v in {
                "filter": filter,
                "projection": normalize_optional_projection(projection),
                "replacement": replacement,
                "options": options,
                "sort": sort,
            }.items()
            if v is not None
        }
    }
    logger.info(f"findOneAndReplace on '{self.name}'")
    fo_response = self._converted_request(
        payload=fo_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished findOneAndReplace on '{self.name}'")
    if "document" in fo_response.get("data", {}):
        ret_document = fo_response.get("data", {}).get("document")
        if ret_document is None:
            return None
        else:
            return ret_document  # type: ignore[no-any-return]
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from find_one_and_replace API command.",
            raw_response=fo_response,
        )
def find_one_and_update(self, filter: FilterType, update: dict[str, Any], *, projection: ProjectionType | None = None, sort: SortType | None = None, upsert: bool = False, return_document: str = 'before', general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> Optional[~DOC]

Find a document on the collection and update it as requested, optionally inserting a new one if no match is found.

Args

filter
a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
update
the update prescription to apply to the document, expressed as a dictionary as per Data API syntax. Examples are: {"$set": {"field": "value}} {"$inc": {"counter": 10}} {"$unset": {"field": ""}} See the Data API documentation for the full syntax.
projection
it controls which parts of the document are returned. It can be an allow-list: {"f1": True, "f2": True}, or a deny-list: {"fx": False, "fy": False}, but not a mixture (except for the _id and other special fields, which can be associated to both True or False independently of the rest of the specification). The special star-projections {"*": True} and {"*": False} have the effect of returning the whole document and {} respectively. For lists in documents, slice directives can be passed to select portions of the list: for instance, {"array": {"$slice": 2}}, {"array": {"$slice": -2}}, {"array": {"$slice": [4, 2]}} or {"array": {"$slice": [-4, 2]}}. An iterable over strings will be treated implicitly as an allow-list. The default projection (used if this parameter is not passed) does not necessarily include "special" fields such as $vector or $vectorize. See the Data API documentation for more on projections.
sort
with this dictionary parameter one can control the sorting order of the documents matching the filter, effectively determining what document will come first and hence be the replaced one. See the find method for more on sorting. Vector-based ANN sorting is achieved by providing a "$vector" or a "$vectorize" key in sort.
upsert
this parameter controls the behavior in absence of matches. If True, a new document (resulting from applying the update to an empty document) is inserted if no matches are found on the collection. If False, the operation silently does nothing in case of no matches.
return_document
a flag controlling what document is returned: if set to ReturnDocument.BEFORE, or the string "before", the document found on database is returned; if set to ReturnDocument.AFTER, or the string "after", the new document is returned. The default is "before".
general_method_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for general_method_timeout_ms.
timeout_ms
an alias for general_method_timeout_ms.

Returns

A document (or a projection thereof, as required), either the one before the replace operation or the one after that. Alternatively, the method returns None to represent that no matching document was found, or that no update was applied (depending on the return_document parameter).

Example

>>> my_coll.insert_one({"Marco": "Polo"})
CollectionInsertOneResult(...)
>>> my_coll.find_one_and_update(
...     {"Marco": {"$exists": True}},
...     {"$set": {"title": "Mr."}},
... )
{'_id': 'a80106f2-...', 'Marco': 'Polo'}
>>> my_coll.find_one_and_update(
...     {"title": "Mr."},
...     {"$inc": {"rank": 3}},
...     projection=["title", "rank"],
...     return_document=astrapy.constants.ReturnDocument.AFTER,
... )
{'_id': 'a80106f2-...', 'title': 'Mr.', 'rank': 3}
>>> my_coll.find_one_and_update(
...     {"name": "Johnny"},
...     {"$set": {"rank": 0}},
...     return_document=astrapy.constants.ReturnDocument.AFTER,
... )
>>> # (returns None for no matches)
>>> my_coll.find_one_and_update(
...     {"name": "Johnny"},
...     {"$set": {"rank": 0}},
...     upsert=True,
...     return_document=astrapy.constants.ReturnDocument.AFTER,
... )
{'_id': 'cb4ef2ab-...', 'name': 'Johnny', 'rank': 0}
Expand source code
def find_one_and_update(
    self,
    filter: FilterType,
    update: dict[str, Any],
    *,
    projection: ProjectionType | None = None,
    sort: SortType | None = None,
    upsert: bool = False,
    return_document: str = ReturnDocument.BEFORE,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> DOC | None:
    """
    Find a document on the collection and update it as requested,
    optionally inserting a new one if no match is found.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        update: the update prescription to apply to the document, expressed
            as a dictionary as per Data API syntax. Examples are:
                {"$set": {"field": "value}}
                {"$inc": {"counter": 10}}
                {"$unset": {"field": ""}}
            See the Data API documentation for the full syntax.
        projection: it controls which parts of the document are returned.
            It can be an allow-list: `{"f1": True, "f2": True}`,
            or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
            (except for the `_id` and other special fields, which can be
            associated to both True or False independently of the rest
            of the specification).
            The special star-projections `{"*": True}` and `{"*": False}`
            have the effect of returning the whole document and `{}` respectively.
            For lists in documents, slice directives can be passed to select
            portions of the list: for instance, `{"array": {"$slice": 2}}`,
            `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
            `{"array": {"$slice": [-4, 2]}}`.
            An iterable over strings will be treated implicitly as an allow-list.
            The default projection (used if this parameter is not passed) does not
            necessarily include "special" fields such as `$vector` or `$vectorize`.
            See the Data API documentation for more on projections.
        sort: with this dictionary parameter one can control the sorting
            order of the documents matching the filter, effectively
            determining what document will come first and hence be the
            replaced one. See the `find` method for more on sorting.
            Vector-based ANN sorting is achieved by providing a "$vector"
            or a "$vectorize" key in `sort`.
        upsert: this parameter controls the behavior in absence of matches.
            If True, a new document (resulting from applying the `update`
            to an empty document) is inserted if no matches are found on
            the collection. If False, the operation silently does nothing
            in case of no matches.
        return_document: a flag controlling what document is returned:
            if set to `ReturnDocument.BEFORE`, or the string "before",
            the document found on database is returned; if set to
            `ReturnDocument.AFTER`, or the string "after", the new
            document is returned. The default is "before".
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        A document (or a projection thereof, as required), either the one
        before the replace operation or the one after that.
        Alternatively, the method returns None to represent
        that no matching document was found, or that no update
        was applied (depending on the `return_document` parameter).

    Example:
        >>> my_coll.insert_one({"Marco": "Polo"})
        CollectionInsertOneResult(...)
        >>> my_coll.find_one_and_update(
        ...     {"Marco": {"$exists": True}},
        ...     {"$set": {"title": "Mr."}},
        ... )
        {'_id': 'a80106f2-...', 'Marco': 'Polo'}
        >>> my_coll.find_one_and_update(
        ...     {"title": "Mr."},
        ...     {"$inc": {"rank": 3}},
        ...     projection=["title", "rank"],
        ...     return_document=astrapy.constants.ReturnDocument.AFTER,
        ... )
        {'_id': 'a80106f2-...', 'title': 'Mr.', 'rank': 3}
        >>> my_coll.find_one_and_update(
        ...     {"name": "Johnny"},
        ...     {"$set": {"rank": 0}},
        ...     return_document=astrapy.constants.ReturnDocument.AFTER,
        ... )
        >>> # (returns None for no matches)
        >>> my_coll.find_one_and_update(
        ...     {"name": "Johnny"},
        ...     {"$set": {"rank": 0}},
        ...     upsert=True,
        ...     return_document=astrapy.constants.ReturnDocument.AFTER,
        ... )
        {'_id': 'cb4ef2ab-...', 'name': 'Johnny', 'rank': 0}
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    options = {
        "returnDocument": return_document,
        "upsert": upsert,
    }
    fo_payload = {
        "findOneAndUpdate": {
            k: v
            for k, v in {
                "filter": filter,
                "update": update,
                "options": options,
                "sort": sort,
                "projection": normalize_optional_projection(projection),
            }.items()
            if v is not None
        }
    }
    logger.info(f"findOneAndUpdate on '{self.name}'")
    fo_response = self._converted_request(
        payload=fo_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished findOneAndUpdate on '{self.name}'")
    if "document" in fo_response.get("data", {}):
        ret_document = fo_response.get("data", {}).get("document")
        if ret_document is None:
            return None
        else:
            return ret_document  # type: ignore[no-any-return]
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from find_one_and_update API command.",
            raw_response=fo_response,
        )
def info(self, *, database_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> CollectionInfo

Information on the collection (name, location, database), in the form of a CollectionInfo object.

Not to be confused with the collection options method (related to the collection internal configuration).

Args

database_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying DevOps API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for database_admin_timeout_ms.
timeout_ms
an alias for database_admin_timeout_ms.

Example

>>> my_coll.info().database_info.region
'eu-west-1'
>>> my_coll.info().full_name
'default_keyspace.my_v_collection'

Note

the returned CollectionInfo wraps, among other things, the database information: as such, calling this method triggers the same-named method of a Database object (which, in turn, performs a HTTP request to the DevOps API). See the documentation for Database.info() for more details.

Expand source code
def info(
    self,
    *,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> CollectionInfo:
    """
    Information on the collection (name, location, database), in the
    form of a CollectionInfo object.

    Not to be confused with the collection `options` method (related
    to the collection internal configuration).

    Args:
        database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying DevOps API request.
            If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `database_admin_timeout_ms`.
        timeout_ms: an alias for `database_admin_timeout_ms`.

    Example:
        >>> my_coll.info().database_info.region
        'eu-west-1'
        >>> my_coll.info().full_name
        'default_keyspace.my_v_collection'

    Note:
        the returned CollectionInfo wraps, among other things,
        the database information: as such, calling this method
        triggers the same-named method of a Database object (which, in turn,
        performs a HTTP request to the DevOps API).
        See the documentation for `Database.info()` for more details.
    """

    return CollectionInfo(
        database_info=self.database.info(
            database_admin_timeout_ms=database_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        ),
        keyspace=self.keyspace,
        name=self.name,
        full_name=self.full_name,
    )
def insert_many(self, documents: Iterable[DOC], *, ordered: bool = False, chunk_size: int | None = None, concurrency: int | None = None, request_timeout_ms: int | None = None, general_method_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> CollectionInsertManyResult

Insert a list of documents into the collection. This is not an atomic operation.

Args

documents
an iterable of dictionaries, each a document to insert. Documents may specify their _id field or leave it out, in which case it will be added automatically.
ordered
if False (default), the insertions can occur in arbitrary order and possibly concurrently. If True, they are processed sequentially. If there are no specific reasons against it, unordered insertions are to be preferred as they complete much faster.
chunk_size
how many documents to include in a single API request. Exceeding the server maximum allowed value results in an error. Leave it unspecified (recommended) to use the system default.
concurrency
maximum number of concurrent requests to the API at a given time. It cannot be more than one for ordered insertions.
general_method_timeout_ms
a timeout, in milliseconds, for the whole requested operation (which may involve multiple API requests). If not passed, the collection-level setting is used instead.
request_timeout_ms
a timeout, in milliseconds, for each API request. If not passed, the collection-level setting is used instead.
timeout_ms
an alias for general_method_timeout_ms.

Returns

a CollectionInsertManyResult object.

Examples

>>> my_coll.count_documents({}, upper_bound=10)
0
>>> my_coll.insert_many(
...     [{"a": 10}, {"a": 5}, {"b": [True, False, False]}],
...     ordered=True,
... )
CollectionInsertManyResult(raw_results=..., inserted_ids=['184bb06f-...', '...', '...'])
>>> my_coll.count_documents({}, upper_bound=100)
3
>>> my_coll.insert_many(
...     [{"seq": i} for i in range(50)],
...     concurrency=5,
... )
CollectionInsertManyResult(raw_results=..., inserted_ids=[... ...])
>>> my_coll.count_documents({}, upper_bound=100)
53
>>> my_coll.insert_many(
...     [
...         {"tag": "a", "$vector": [1, 2]},
...         {"tag": "b", "$vector": [3, 4]},
...     ]
... )
CollectionInsertManyResult(...)

Note

Unordered insertions are executed with some degree of concurrency, so it is usually better to prefer this mode unless the order in the document sequence is important.

Note

A failure mode for this command is related to certain faulty documents found among those to insert: a document may have the an _id already present on the collection, or its vector dimension may not match the collection setting.

For an ordered insertion, the method will raise an exception at the first such faulty document – nevertheless, all documents processed until then will end up being written to the database.

For unordered insertions, if the error stems from faulty documents the insertion proceeds until exhausting the input documents: then, an exception is raised – and all insertable documents will have been written to the database, including those "after" the troublesome ones.

If, on the other hand, there are errors not related to individual documents (such as a network connectivity error), the whole insert_many operation will stop in mid-way, an exception will be raised, and only a certain amount of the input documents will have made their way to the database.

Expand source code
def insert_many(
    self,
    documents: Iterable[DOC],
    *,
    ordered: bool = False,
    chunk_size: int | None = None,
    concurrency: int | None = None,
    request_timeout_ms: int | None = None,
    general_method_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> CollectionInsertManyResult:
    """
    Insert a list of documents into the collection.
    This is not an atomic operation.

    Args:
        documents: an iterable of dictionaries, each a document to insert.
            Documents may specify their `_id` field or leave it out, in which
            case it will be added automatically.
        ordered: if False (default), the insertions can occur in arbitrary order
            and possibly concurrently. If True, they are processed sequentially.
            If there are no specific reasons against it, unordered insertions are to
            be preferred as they complete much faster.
        chunk_size: how many documents to include in a single API request.
            Exceeding the server maximum allowed value results in an error.
            Leave it unspecified (recommended) to use the system default.
        concurrency: maximum number of concurrent requests to the API at
            a given time. It cannot be more than one for ordered insertions.
        general_method_timeout_ms: a timeout, in milliseconds, for the whole
            requested operation (which may involve multiple API requests).
            If not passed, the collection-level setting is used instead.
        request_timeout_ms: a timeout, in milliseconds, for each API request.
            If not passed, the collection-level setting is used instead.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a CollectionInsertManyResult object.

    Examples:
        >>> my_coll.count_documents({}, upper_bound=10)
        0
        >>> my_coll.insert_many(
        ...     [{"a": 10}, {"a": 5}, {"b": [True, False, False]}],
        ...     ordered=True,
        ... )
        CollectionInsertManyResult(raw_results=..., inserted_ids=['184bb06f-...', '...', '...'])
        >>> my_coll.count_documents({}, upper_bound=100)
        3
        >>> my_coll.insert_many(
        ...     [{"seq": i} for i in range(50)],
        ...     concurrency=5,
        ... )
        CollectionInsertManyResult(raw_results=..., inserted_ids=[... ...])
        >>> my_coll.count_documents({}, upper_bound=100)
        53
        >>> my_coll.insert_many(
        ...     [
        ...         {"tag": "a", "$vector": [1, 2]},
        ...         {"tag": "b", "$vector": [3, 4]},
        ...     ]
        ... )
        CollectionInsertManyResult(...)

    Note:
        Unordered insertions are executed with some degree of concurrency,
        so it is usually better to prefer this mode unless the order in the
        document sequence is important.

    Note:
        A failure mode for this command is related to certain faulty documents
        found among those to insert: a document may have the an `_id` already
        present on the collection, or its vector dimension may not
        match the collection setting.

        For an ordered insertion, the method will raise an exception at
        the first such faulty document -- nevertheless, all documents processed
        until then will end up being written to the database.

        For unordered insertions, if the error stems from faulty documents
        the insertion proceeds until exhausting the input documents: then,
        an exception is raised -- and all insertable documents will have been
        written to the database, including those "after" the troublesome ones.

        If, on the other hand, there are errors not related to individual
        documents (such as a network connectivity error), the whole
        `insert_many` operation will stop in mid-way, an exception will be raised,
        and only a certain amount of the input documents will
        have made their way to the database.
    """

    _general_method_timeout_ms, _gmt_label = _first_valid_timeout(
        (general_method_timeout_ms, "general_method_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (
            self.api_options.timeout_options.general_method_timeout_ms,
            "general_method_timeout_ms",
        ),
    )
    _request_timeout_ms, _rt_label = _first_valid_timeout(
        (request_timeout_ms, "request_timeout_ms"),
        (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
    )
    if concurrency is None:
        if ordered:
            _concurrency = 1
        else:
            _concurrency = DEFAULT_INSERT_MANY_CONCURRENCY
    else:
        _concurrency = concurrency
    if _concurrency > 1 and ordered:
        raise ValueError("Cannot run ordered insert_many concurrently.")
    if chunk_size is None:
        _chunk_size = DEFAULT_INSERT_MANY_CHUNK_SIZE
    else:
        _chunk_size = chunk_size
    _documents = list(documents)
    logger.info(f"inserting {len(_documents)} documents in '{self.name}'")
    raw_results: list[dict[str, Any]] = []
    timeout_manager = MultiCallTimeoutManager(
        overall_timeout_ms=_general_method_timeout_ms,
        timeout_label=_gmt_label,
    )
    if ordered:
        options = {"ordered": True}
        inserted_ids: list[Any] = []
        for i in range(0, len(_documents), _chunk_size):
            im_payload = {
                "insertMany": {
                    "documents": _documents[i : i + _chunk_size],
                    "options": options,
                },
            }
            logger.info(f"insertMany(chunk) on '{self.name}'")
            chunk_response = self._converted_request(
                payload=im_payload,
                raise_api_errors=False,
                timeout_context=timeout_manager.remaining_timeout(
                    cap_time_ms=_request_timeout_ms,
                    cap_timeout_label=_rt_label,
                ),
            )
            logger.info(f"finished insertMany(chunk) on '{self.name}'")
            # accumulate the results in this call
            chunk_inserted_ids = (chunk_response.get("status") or {}).get(
                "insertedIds", []
            )
            inserted_ids += chunk_inserted_ids
            raw_results += [chunk_response]
            # if errors, quit early
            if chunk_response.get("errors", []):
                partial_result = CollectionInsertManyResult(
                    raw_results=raw_results,
                    inserted_ids=inserted_ids,
                )
                raise CollectionInsertManyException.from_response(
                    command=None,
                    raw_response=chunk_response,
                    partial_result=partial_result,
                )

        # return
        full_result = CollectionInsertManyResult(
            raw_results=raw_results,
            inserted_ids=inserted_ids,
        )
        logger.info(
            f"finished inserting {len(_documents)} documents in '{self.name}'"
        )
        return full_result

    else:
        # unordered: concurrent or not, do all of them and parse the results
        options = {"ordered": False}
        if _concurrency > 1:
            with ThreadPoolExecutor(max_workers=_concurrency) as executor:

                def _chunk_insertor(
                    document_chunk: list[dict[str, Any]],
                ) -> dict[str, Any]:
                    im_payload = {
                        "insertMany": {
                            "documents": document_chunk,
                            "options": options,
                        },
                    }
                    logger.info(f"insertMany(chunk) on '{self.name}'")
                    im_response = self._converted_request(
                        payload=im_payload,
                        raise_api_errors=False,
                        timeout_context=timeout_manager.remaining_timeout(
                            cap_time_ms=_request_timeout_ms,
                            cap_timeout_label=_rt_label,
                        ),
                    )
                    logger.info(f"finished insertMany(chunk) on '{self.name}'")
                    return im_response

                raw_results = list(
                    executor.map(
                        _chunk_insertor,
                        (
                            _documents[i : i + _chunk_size]
                            for i in range(0, len(_documents), _chunk_size)
                        ),
                    )
                )
        else:
            for i in range(0, len(_documents), _chunk_size):
                im_payload = {
                    "insertMany": {
                        "documents": _documents[i : i + _chunk_size],
                        "options": options,
                    },
                }
                logger.info(f"insertMany(chunk) on '{self.name}'")
                im_response = self._converted_request(
                    payload=im_payload,
                    raise_api_errors=False,
                    timeout_context=timeout_manager.remaining_timeout(
                        cap_time_ms=_request_timeout_ms,
                        cap_timeout_label=_rt_label,
                    ),
                )
                logger.info(f"finished insertMany(chunk) on '{self.name}'")
                raw_results.append(im_response)
        # recast raw_results
        inserted_ids = [
            inserted_id
            for chunk_response in raw_results
            for inserted_id in (chunk_response.get("status") or {}).get(
                "insertedIds", []
            )
        ]

        # check-raise
        if any(
            [chunk_response.get("errors", []) for chunk_response in raw_results]
        ):
            partial_result = CollectionInsertManyResult(
                raw_results=raw_results,
                inserted_ids=inserted_ids,
            )
            raise CollectionInsertManyException.from_responses(
                commands=[None for _ in raw_results],
                raw_responses=raw_results,
                partial_result=partial_result,
            )

        # return
        full_result = CollectionInsertManyResult(
            raw_results=raw_results,
            inserted_ids=inserted_ids,
        )
        logger.info(
            f"finished inserting {len(_documents)} documents in '{self.name}'"
        )
        return full_result
def insert_one(self, document: DOC, *, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> CollectionInsertOneResult

Insert a single document in the collection in an atomic operation.

Args

document
the dictionary expressing the document to insert. The _id field of the document can be left out, in which case it will be created automatically.
general_method_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for general_method_timeout_ms.
timeout_ms
an alias for general_method_timeout_ms.

Returns

a CollectionInsertOneResult object.

Examples

>>> my_coll.count_documents({}, upper_bound=10)
0
>>> my_coll.insert_one(
...     {
...         "age": 30,
...         "name": "Smith",
...         "food": ["pear", "peach"],
...         "likes_fruit": True,
...     },
... )
CollectionInsertOneResult(raw_results=..., inserted_id='ed4587a4-...-...-...')
>>> my_coll.insert_one({"_id": "user-123", "age": 50, "name": "Maccio"})
CollectionInsertOneResult(raw_results=..., inserted_id='user-123')
>>> my_coll.count_documents({}, upper_bound=10)
2
>>> my_coll.insert_one({"tag": "v", "$vector": [10, 11]})
CollectionInsertOneResult(...)

Note

If an _id is explicitly provided, which corresponds to a document that exists already in the collection, an error is raised and the insertion fails.

Expand source code
def insert_one(
    self,
    document: DOC,
    *,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> CollectionInsertOneResult:
    """
    Insert a single document in the collection in an atomic operation.

    Args:
        document: the dictionary expressing the document to insert.
            The `_id` field of the document can be left out, in which
            case it will be created automatically.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a CollectionInsertOneResult object.

    Examples:
        >>> my_coll.count_documents({}, upper_bound=10)
        0
        >>> my_coll.insert_one(
        ...     {
        ...         "age": 30,
        ...         "name": "Smith",
        ...         "food": ["pear", "peach"],
        ...         "likes_fruit": True,
        ...     },
        ... )
        CollectionInsertOneResult(raw_results=..., inserted_id='ed4587a4-...-...-...')
        >>> my_coll.insert_one({"_id": "user-123", "age": 50, "name": "Maccio"})
        CollectionInsertOneResult(raw_results=..., inserted_id='user-123')
        >>> my_coll.count_documents({}, upper_bound=10)
        2

        >>> my_coll.insert_one({"tag": "v", "$vector": [10, 11]})
        CollectionInsertOneResult(...)

    Note:
        If an `_id` is explicitly provided, which corresponds to a document
        that exists already in the collection, an error is raised and
        the insertion fails.
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    io_payload = {"insertOne": {"document": document}}
    logger.info(f"insertOne on '{self.name}'")
    io_response = self._converted_request(
        payload=io_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished insertOne on '{self.name}'")
    if "insertedIds" in io_response.get("status", {}):
        if io_response["status"]["insertedIds"]:
            inserted_id = io_response["status"]["insertedIds"][0]
            return CollectionInsertOneResult(
                raw_results=[io_response],
                inserted_id=inserted_id,
            )
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from insert_one API command.",
                raw_response=io_response,
            )
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from insert_one API command.",
            raw_response=io_response,
        )
def options(self, *, collection_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> CollectionDefinition

Get the collection options, i.e. its configuration as read from the database.

The method issues a request to the Data API each time is invoked, without caching mechanisms: this ensures up-to-date information for usages such as real-time collection validation by the application.

Args

collection_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for collection_admin_timeout_ms.
timeout_ms
an alias for collection_admin_timeout_ms.

Returns

a CollectionDefinition instance describing the collection. (See also the database list_collections method.)

Example

>>> my_coll.options()
CollectionDefinition(vector=CollectionVectorOptions(dimension=3, metric='cosine'))
Expand source code
def options(
    self,
    *,
    collection_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> CollectionDefinition:
    """
    Get the collection options, i.e. its configuration as read from the database.

    The method issues a request to the Data API each time is invoked,
    without caching mechanisms: this ensures up-to-date information
    for usages such as real-time collection validation by the application.

    Args:
        collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `collection_admin_timeout_ms`.
        timeout_ms: an alias for `collection_admin_timeout_ms`.

    Returns:
        a CollectionDefinition instance describing the collection.
        (See also the database `list_collections` method.)

    Example:
        >>> my_coll.options()
        CollectionDefinition(vector=CollectionVectorOptions(dimension=3, metric='cosine'))
    """

    _collection_admin_timeout_ms, _ca_label = _select_singlereq_timeout_ca(
        timeout_options=self.api_options.timeout_options,
        collection_admin_timeout_ms=collection_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info(f"getting collections in search of '{self.name}'")
    self_descriptors = [
        coll_desc
        for coll_desc in self.database._list_collections_ctx(
            keyspace=None,
            timeout_context=_TimeoutContext(
                request_ms=_collection_admin_timeout_ms,
                label=_ca_label,
            ),
        )
        if coll_desc.name == self.name
    ]
    logger.info(f"finished getting collections in search of '{self.name}'")
    if self_descriptors:
        return self_descriptors[0].definition
    else:
        raise ValueError(
            f"Collection {self.keyspace}.{self.name} not found.",
        )
def replace_one(self, filter: FilterType, replacement: DOC, *, sort: SortType | None = None, upsert: bool = False, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> CollectionUpdateResult

Replace a single document on the collection with a new one, optionally inserting a new one if no match is found.

Args

filter
a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
replacement
the new document to write into the collection.
sort
with this dictionary parameter one can control the sorting order of the documents matching the filter, effectively determining what document will come first and hence be the replaced one. See the find method for more on sorting. Vector-based ANN sorting is achieved by providing a "$vector" or a "$vectorize" key in sort.
upsert
this parameter controls the behavior in absence of matches. If True, replacement is inserted as a new document if no matches are found on the collection. If False, the operation silently does nothing in case of no matches.
general_method_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for general_method_timeout_ms.
timeout_ms
an alias for general_method_timeout_ms.

Returns

a CollectionUpdateResult object summarizing the outcome of the replace operation.

Example

>>> my_coll.insert_one({"Marco": "Polo"})
CollectionInsertOneResult(...)
>>> my_coll.replace_one({"Marco": {"$exists": True}}, {"Buda": "Pest"})
CollectionUpdateResult(raw_results=..., update_info={'n': 1, 'updatedExisting': True, 'ok': 1.0, 'nModified': 1})
>>> my_coll.find_one({"Buda": "Pest"})
{'_id': '8424905a-...', 'Buda': 'Pest'}
>>> my_coll.replace_one({"Mirco": {"$exists": True}}, {"Oh": "yeah?"})
CollectionUpdateResult(raw_results=..., update_info={'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0})
>>> my_coll.replace_one({"Mirco": {"$exists": True}}, {"Oh": "yeah?"}, upsert=True)
CollectionUpdateResult(raw_results=..., update_info={'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '931b47d6-...'})
Expand source code
def replace_one(
    self,
    filter: FilterType,
    replacement: DOC,
    *,
    sort: SortType | None = None,
    upsert: bool = False,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> CollectionUpdateResult:
    """
    Replace a single document on the collection with a new one,
    optionally inserting a new one if no match is found.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        replacement: the new document to write into the collection.
        sort: with this dictionary parameter one can control the sorting
            order of the documents matching the filter, effectively
            determining what document will come first and hence be the
            replaced one. See the `find` method for more on sorting.
            Vector-based ANN sorting is achieved by providing a "$vector"
            or a "$vectorize" key in `sort`.
        upsert: this parameter controls the behavior in absence of matches.
            If True, `replacement` is inserted as a new document
            if no matches are found on the collection. If False,
            the operation silently does nothing in case of no matches.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a CollectionUpdateResult object summarizing the outcome of
        the replace operation.

    Example:
        >>> my_coll.insert_one({"Marco": "Polo"})
        CollectionInsertOneResult(...)
        >>> my_coll.replace_one({"Marco": {"$exists": True}}, {"Buda": "Pest"})
        CollectionUpdateResult(raw_results=..., update_info={'n': 1, 'updatedExisting': True, 'ok': 1.0, 'nModified': 1})
        >>> my_coll.find_one({"Buda": "Pest"})
        {'_id': '8424905a-...', 'Buda': 'Pest'}
        >>> my_coll.replace_one({"Mirco": {"$exists": True}}, {"Oh": "yeah?"})
        CollectionUpdateResult(raw_results=..., update_info={'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0})
        >>> my_coll.replace_one({"Mirco": {"$exists": True}}, {"Oh": "yeah?"}, upsert=True)
        CollectionUpdateResult(raw_results=..., update_info={'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '931b47d6-...'})
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    options = {
        "upsert": upsert,
    }
    fo_payload = {
        "findOneAndReplace": {
            k: v
            for k, v in {
                "filter": filter,
                "replacement": replacement,
                "options": options,
                "sort": sort,
            }.items()
            if v is not None
        }
    }
    logger.info(f"findOneAndReplace on '{self.name}'")
    fo_response = self._converted_request(
        payload=fo_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished findOneAndReplace on '{self.name}'")
    if "document" in fo_response.get("data", {}):
        fo_status = fo_response.get("status") or {}
        _update_info = _prepare_update_info([fo_status])
        return CollectionUpdateResult(
            raw_results=[fo_response],
            update_info=_update_info,
        )
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from find_one_and_replace API command.",
            raw_response=fo_response,
        )
def to_async(self: Collection[DOC], *, embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = (unset), api_options: APIOptions | UnsetType = (unset)) ‑> AsyncCollection[DOC]

Create an AsyncCollection from this one. Save for the arguments explicitly provided as overrides, everything else is kept identical to this collection in the copy (the database is converted into an async object).

Args

embedding_api_key
optional API key(s) for interacting with the collection. If an embedding service is configured, and this parameter is not None, each Data API call will include the necessary embedding-related headers as specified by this parameter. If a string is passed, it translates into the one "embedding api key" header (i.e. EmbeddingAPIKeyHeaderProvider). For some vectorize providers/models, if using header-based authentication, specialized subclasses of EmbeddingHeadersProvider should be supplied.
api_options
any additional options to set for the result, in the form of an APIOptions instance (where one can set just the needed attributes). In case the same setting is also provided as named parameter, the latter takes precedence.

Returns

the new copy, an AsyncCollection instance.

Example

>>> asyncio.run(my_coll.to_async().count_documents({},upper_bound=100))
77
Expand source code
def to_async(
    self: Collection[DOC],
    *,
    embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
    api_options: APIOptions | UnsetType = _UNSET,
) -> AsyncCollection[DOC]:
    """
    Create an AsyncCollection from this one. Save for the arguments
    explicitly provided as overrides, everything else is kept identical
    to this collection in the copy (the database is converted into
    an async object).

    Args:
        embedding_api_key: optional API key(s) for interacting with the collection.
            If an embedding service is configured, and this parameter is not None,
            each Data API call will include the necessary embedding-related headers
            as specified by this parameter. If a string is passed, it translates
            into the one "embedding api key" header
            (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
            For some vectorize providers/models, if using header-based authentication,
            specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
            should be supplied.
        api_options: any additional options to set for the result, in the form of
            an APIOptions instance (where one can set just the needed attributes).
            In case the same setting is also provided as named parameter,
            the latter takes precedence.

    Returns:
        the new copy, an AsyncCollection instance.

    Example:
        >>> asyncio.run(my_coll.to_async().count_documents({},upper_bound=100))
        77
    """

    arg_api_options = APIOptions(
        embedding_api_key=embedding_api_key,
    )
    final_api_options = self.api_options.with_override(api_options).with_override(
        arg_api_options
    )
    return AsyncCollection(
        database=self.database.to_async(),
        name=self.name,
        keyspace=self.keyspace,
        api_options=final_api_options,
    )
def update_many(self, filter: FilterType, update: dict[str, Any], *, upsert: bool = False, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> CollectionUpdateResult

Apply an update operation to all documents matching a condition, optionally inserting one documents in absence of matches.

Args

filter
a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
update
the update prescription to apply to the documents, expressed as a dictionary as per Data API syntax. Examples are: {"$set": {"field": "value}} {"$inc": {"counter": 10}} {"$unset": {"field": ""}} See the Data API documentation for the full syntax.
upsert
this parameter controls the behavior in absence of matches. If True, a single new document (resulting from applying update to an empty document) is inserted if no matches are found on the collection. If False, the operation silently does nothing in case of no matches.
general_method_timeout_ms
a timeout, in milliseconds, for the whole requested operation (which may involve multiple API requests). This method may entail successive HTTP API requests, depending on the amount of involved documents. If not passed, the collection-level setting is used instead.
request_timeout_ms
a timeout, in milliseconds, for each API request. If not passed, the collection-level setting is used instead.
timeout_ms
an alias for general_method_timeout_ms.

Returns

a CollectionUpdateResult object summarizing the outcome of the update operation.

Example

>>> my_coll.insert_many([{"c": "red"}, {"c": "green"}, {"c": "blue"}])
CollectionInsertManyResult(...)
>>> my_coll.update_many({"c": {"$ne": "green"}}, {"$set": {"nongreen": True}})
CollectionUpdateResult(raw_results=..., update_info={'n': 2, 'updatedExisting': True, 'ok': 1.0, 'nModified': 2})
>>> my_coll.update_many({"c": "orange"}, {"$set": {"is_also_fruit": True}})
CollectionUpdateResult(raw_results=..., update_info={'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0})
>>> my_coll.update_many(
...     {"c": "orange"},
...     {"$set": {"is_also_fruit": True}},
...     upsert=True,
... )
CollectionUpdateResult(raw_results=..., update_info={'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '46643050-...'})

Note

Similarly to the case of find (see its docstring for more details), running this command while, at the same time, another process is inserting new documents which match the filter of the update_many can result in an unpredictable fraction of these documents being updated. In other words, it cannot be easily predicted whether a given newly-inserted document will be picked up by the update_many command or not.

Expand source code
def update_many(
    self,
    filter: FilterType,
    update: dict[str, Any],
    *,
    upsert: bool = False,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> CollectionUpdateResult:
    """
    Apply an update operation to all documents matching a condition,
    optionally inserting one documents in absence of matches.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        update: the update prescription to apply to the documents, expressed
            as a dictionary as per Data API syntax. Examples are:
                {"$set": {"field": "value}}
                {"$inc": {"counter": 10}}
                {"$unset": {"field": ""}}
            See the Data API documentation for the full syntax.
        upsert: this parameter controls the behavior in absence of matches.
            If True, a single new document (resulting from applying `update`
            to an empty document) is inserted if no matches are found on
            the collection. If False, the operation silently does nothing
            in case of no matches.
        general_method_timeout_ms: a timeout, in milliseconds, for the whole
            requested operation (which may involve multiple API requests).
            This method may entail successive HTTP API requests,
            depending on the amount of involved documents.
            If not passed, the collection-level setting is used instead.
        request_timeout_ms: a timeout, in milliseconds, for each API request.
            If not passed, the collection-level setting is used instead.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a CollectionUpdateResult object summarizing the outcome of
        the update operation.

    Example:
        >>> my_coll.insert_many([{"c": "red"}, {"c": "green"}, {"c": "blue"}])
        CollectionInsertManyResult(...)
        >>> my_coll.update_many({"c": {"$ne": "green"}}, {"$set": {"nongreen": True}})
        CollectionUpdateResult(raw_results=..., update_info={'n': 2, 'updatedExisting': True, 'ok': 1.0, 'nModified': 2})
        >>> my_coll.update_many({"c": "orange"}, {"$set": {"is_also_fruit": True}})
        CollectionUpdateResult(raw_results=..., update_info={'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0})
        >>> my_coll.update_many(
        ...     {"c": "orange"},
        ...     {"$set": {"is_also_fruit": True}},
        ...     upsert=True,
        ... )
        CollectionUpdateResult(raw_results=..., update_info={'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '46643050-...'})

    Note:
        Similarly to the case of `find` (see its docstring for more details),
        running this command while, at the same time, another process is
        inserting new documents which match the filter of the `update_many`
        can result in an unpredictable fraction of these documents being updated.
        In other words, it cannot be easily predicted whether a given
        newly-inserted document will be picked up by the update_many command or not.
    """

    _general_method_timeout_ms, _gmt_label = _first_valid_timeout(
        (general_method_timeout_ms, "general_method_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (
            self.api_options.timeout_options.general_method_timeout_ms,
            "general_method_timeout_ms",
        ),
    )
    _request_timeout_ms, _rt_label = _first_valid_timeout(
        (request_timeout_ms, "request_timeout_ms"),
        (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
    )
    api_options = {
        "upsert": upsert,
    }
    page_state_options: dict[str, str] = {}
    um_responses: list[dict[str, Any]] = []
    um_statuses: list[dict[str, Any]] = []
    must_proceed = True
    logger.info(f"starting update_many on '{self.name}'")
    timeout_manager = MultiCallTimeoutManager(
        overall_timeout_ms=_general_method_timeout_ms,
        timeout_label=_gmt_label,
    )
    while must_proceed:
        options = {**api_options, **page_state_options}
        this_um_payload = {
            "updateMany": {
                k: v
                for k, v in {
                    "filter": filter,
                    "update": update,
                    "options": options,
                }.items()
                if v is not None
            }
        }
        logger.info(f"updateMany on '{self.name}'")
        this_um_response = self._converted_request(
            payload=this_um_payload,
            timeout_context=timeout_manager.remaining_timeout(
                cap_time_ms=_request_timeout_ms,
                cap_timeout_label=_rt_label,
            ),
        )
        logger.info(f"finished updateMany on '{self.name}'")
        this_um_status = this_um_response.get("status") or {}
        #
        # if errors, quit early
        if this_um_response.get("errors", []):
            partial_update_info = _prepare_update_info(um_statuses)
            partial_result = CollectionUpdateResult(
                raw_results=um_responses,
                update_info=partial_update_info,
            )
            all_um_responses = um_responses + [this_um_response]
            raise CollectionUpdateManyException.from_responses(
                commands=[None for _ in all_um_responses],
                raw_responses=all_um_responses,
                partial_result=partial_result,
            )
        else:
            if "status" not in this_um_response:
                raise UnexpectedDataAPIResponseException(
                    text="Faulty response from update_many API command.",
                    raw_response=this_um_response,
                )
            um_responses.append(this_um_response)
            um_statuses.append(this_um_status)
            next_page_state = this_um_status.get("nextPageState")
            if next_page_state is not None:
                must_proceed = True
                page_state_options = {"pageState": next_page_state}
            else:
                must_proceed = False
                page_state_options = {}

    update_info = _prepare_update_info(um_statuses)
    logger.info(f"finished update_many on '{self.name}'")
    return CollectionUpdateResult(
        raw_results=um_responses,
        update_info=update_info,
    )
def update_one(self, filter: FilterType, update: dict[str, Any], *, sort: SortType | None = None, upsert: bool = False, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> CollectionUpdateResult

Update a single document on the collection as requested, optionally inserting a new one if no match is found.

Args

filter
a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
update
the update prescription to apply to the document, expressed as a dictionary as per Data API syntax. Examples are: {"$set": {"field": "value}} {"$inc": {"counter": 10}} {"$unset": {"field": ""}} See the Data API documentation for the full syntax.
sort
with this dictionary parameter one can control the sorting order of the documents matching the filter, effectively determining what document will come first and hence be the replaced one. See the find method for more on sorting. Vector-based ANN sorting is achieved by providing a "$vector" or a "$vectorize" key in sort.
upsert
this parameter controls the behavior in absence of matches. If True, a new document (resulting from applying the update to an empty document) is inserted if no matches are found on the collection. If False, the operation silently does nothing in case of no matches.
general_method_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for general_method_timeout_ms.
timeout_ms
an alias for general_method_timeout_ms.

Returns

a CollectionUpdateResult object summarizing the outcome of the update operation.

Example

>>> my_coll.insert_one({"Marco": "Polo"})
CollectionInsertOneResult(...)
>>> my_coll.update_one({"Marco": {"$exists": True}}, {"$inc": {"rank": 3}})
CollectionUpdateResult(raw_results=..., update_info={'n': 1, 'updatedExisting': True, 'ok': 1.0, 'nModified': 1})
>>> my_coll.update_one({"Mirko": {"$exists": True}}, {"$inc": {"rank": 3}})
CollectionUpdateResult(raw_results=..., update_info={'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0})
>>> my_coll.update_one({"Mirko": {"$exists": True}}, {"$inc": {"rank": 3}}, upsert=True)
CollectionUpdateResult(raw_results=..., update_info={'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '2a45ff60-...'})
Expand source code
def update_one(
    self,
    filter: FilterType,
    update: dict[str, Any],
    *,
    sort: SortType | None = None,
    upsert: bool = False,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> CollectionUpdateResult:
    """
    Update a single document on the collection as requested,
    optionally inserting a new one if no match is found.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        update: the update prescription to apply to the document, expressed
            as a dictionary as per Data API syntax. Examples are:
                {"$set": {"field": "value}}
                {"$inc": {"counter": 10}}
                {"$unset": {"field": ""}}
            See the Data API documentation for the full syntax.
        sort: with this dictionary parameter one can control the sorting
            order of the documents matching the filter, effectively
            determining what document will come first and hence be the
            replaced one. See the `find` method for more on sorting.
            Vector-based ANN sorting is achieved by providing a "$vector"
            or a "$vectorize" key in `sort`.
        upsert: this parameter controls the behavior in absence of matches.
            If True, a new document (resulting from applying the `update`
            to an empty document) is inserted if no matches are found on
            the collection. If False, the operation silently does nothing
            in case of no matches.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a CollectionUpdateResult object summarizing the outcome of
        the update operation.

    Example:
        >>> my_coll.insert_one({"Marco": "Polo"})
        CollectionInsertOneResult(...)
        >>> my_coll.update_one({"Marco": {"$exists": True}}, {"$inc": {"rank": 3}})
        CollectionUpdateResult(raw_results=..., update_info={'n': 1, 'updatedExisting': True, 'ok': 1.0, 'nModified': 1})
        >>> my_coll.update_one({"Mirko": {"$exists": True}}, {"$inc": {"rank": 3}})
        CollectionUpdateResult(raw_results=..., update_info={'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0})
        >>> my_coll.update_one({"Mirko": {"$exists": True}}, {"$inc": {"rank": 3}}, upsert=True)
        CollectionUpdateResult(raw_results=..., update_info={'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '2a45ff60-...'})
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    options = {
        "upsert": upsert,
    }
    uo_payload = {
        "updateOne": {
            k: v
            for k, v in {
                "filter": filter,
                "update": update,
                "options": options,
                "sort": sort,
            }.items()
            if v is not None
        }
    }
    logger.info(f"updateOne on '{self.name}'")
    uo_response = self._converted_request(
        payload=uo_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished updateOne on '{self.name}'")
    if "status" in uo_response:
        uo_status = uo_response["status"]
        _update_info = _prepare_update_info([uo_status])
        return CollectionUpdateResult(
            raw_results=[uo_response],
            update_info=_update_info,
        )
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from updateOne API command.",
            raw_response=uo_response,
        )
def with_options(self: Collection[DOC], *, embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = (unset), api_options: APIOptions | UnsetType = (unset)) ‑> Collection[DOC]

Create a clone of this collection with some changed attributes.

Args

embedding_api_key
optional API key(s) for interacting with the collection. If an embedding service is configured, and this parameter is not None, each Data API call will include the necessary embedding-related headers as specified by this parameter. If a string is passed, it translates into the one "embedding api key" header (i.e. EmbeddingAPIKeyHeaderProvider). For some vectorize providers/models, if using header-based authentication, specialized subclasses of EmbeddingHeadersProvider should be supplied.
api_options
any additional options to set for the clone, in the form of an APIOptions instance (where one can set just the needed attributes). In case the same setting is also provided as named parameter, the latter takes precedence.

Returns

a new Collection instance.

Example

>>> collection_with_api_key_configured = my_collection.with_options(
...     embedding_api_key="secret-key-0123abcd...",
... )
Expand source code
def with_options(
    self: Collection[DOC],
    *,
    embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
    api_options: APIOptions | UnsetType = _UNSET,
) -> Collection[DOC]:
    """
    Create a clone of this collection with some changed attributes.

    Args:
        embedding_api_key: optional API key(s) for interacting with the collection.
            If an embedding service is configured, and this parameter is not None,
            each Data API call will include the necessary embedding-related headers
            as specified by this parameter. If a string is passed, it translates
            into the one "embedding api key" header
            (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
            For some vectorize providers/models, if using header-based authentication,
            specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
            should be supplied.
        api_options: any additional options to set for the clone, in the form of
            an APIOptions instance (where one can set just the needed attributes).
            In case the same setting is also provided as named parameter,
            the latter takes precedence.

    Returns:
        a new Collection instance.

    Example:
        >>> collection_with_api_key_configured = my_collection.with_options(
        ...     embedding_api_key="secret-key-0123abcd...",
        ... )
    """

    return self._copy(
        embedding_api_key=embedding_api_key,
        api_options=api_options,
    )
class DataAPIClient (token: str | TokenProvider | UnsetType = (unset), *, environment: str | UnsetType = (unset), callers: Sequence[CallerType] | UnsetType = (unset), api_options: APIOptions | UnsetType = (unset))

A client for using the Data API. This is the entry point, sitting at the top of the conceptual "client -> database -> collection" hierarchy and of the "client -> admin -> database admin" chain as well.

A client is created first, optionally passing it a suitable Access Token. Starting from the client, then: - databases (Database and AsyncDatabase) are created for working with data - AstraDBAdmin objects can be created for admin-level work

Args

token
an Access Token to the database. Example: "AstraCS:xyz...". This can be either a literal token string or a subclass of TokenProvider. Note that generally one should pass the token later, when spawning Database instances from the client (with the get_database) method of DataAPIClient; the reason is that the typical tokens are scoped to a single database. However, when performing administrative tasks at the AstraDBAdmin level (such as creating databases), an org-wide token is required – then it makes sense to provide it when creating the DataAPIClient instance.
environment
a string representing the target Data API environment. It can be left unspecified for the default value of Environment.PROD; other values include Environment.OTHER, Environment.DSE.
callers
a list of caller identities, i.e. applications, or frameworks, on behalf of which Data API and DevOps API calls are performed. These end up in the request user-agent. Each caller identity is a ("caller_name", "caller_version") pair.
api_options
a specification - complete or partial - of the API Options to override the system defaults. This allows for a deeper configuration than what the named parameters (token, environment, callers) offer. If this is passed alongside these named parameters, those will take precedence.

Example

>>> from astrapy import DataAPIClient
>>> from astrapy.info import CollectionDefinition
>>> my_client = DataAPIClient()
>>> my_db0 = my_client.get_database(
...     "https://01234567-....apps.astra.datastax.com",
...     token="AstraCS:...",
... )
>>> my_coll = my_db0.create_collection(
...     "movies",
...     definition=(
...         CollectionDefinition.builder()
...         .set_vector_dimension(2)
...         .build()
...     ),
... )
>>> my_coll.insert_one({"title": "The Title", "$vector": [0.1, 0.3]})
>>> my_db1 = my_client.get_database("01234567-...")
>>> my_db2 = my_client.get_database("01234567-...", region="us-east1")
>>> my_adm0 = my_client.get_admin()
>>> my_adm1 = my_client.get_admin(token=more_powerful_token_override)
>>> database_list = my_adm0.list_databases()
Expand source code
class DataAPIClient:
    """
    A client for using the Data API. This is the entry point, sitting
    at the top of the conceptual "client -> database -> collection" hierarchy
    and of the "client -> admin -> database admin" chain as well.

    A client is created first, optionally passing it a suitable Access Token.
    Starting from the client, then:
        - databases (Database and AsyncDatabase) are created for working with data
        - AstraDBAdmin objects can be created for admin-level work

    Args:
        token: an Access Token to the database. Example: `"AstraCS:xyz..."`.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
            Note that generally one should pass the token later, when spawning
            Database instances from the client (with the `get_database`) method
            of DataAPIClient; the reason is that the typical tokens are scoped
            to a single database. However, when performing administrative tasks
            at the AstraDBAdmin level (such as creating databases), an org-wide
            token is required -- then it makes sense to provide it when creating
            the DataAPIClient instance.
        environment: a string representing the target Data API environment.
            It can be left unspecified for the default value of `Environment.PROD`;
            other values include `Environment.OTHER`, `Environment.DSE`.
        callers: a list of caller identities, i.e. applications, or frameworks,
            on behalf of which Data API and DevOps API calls are performed.
            These end up in the request user-agent.
            Each caller identity is a ("caller_name", "caller_version") pair.
        api_options: a specification - complete or partial - of the API Options
            to override the system defaults. This allows for a deeper configuration
            than what the named parameters (token, environment, callers) offer.
            If this is passed alongside these named parameters, those will take
            precedence.

    Example:
        >>> from astrapy import DataAPIClient
        >>> from astrapy.info import CollectionDefinition
        >>> my_client = DataAPIClient()
        >>> my_db0 = my_client.get_database(
        ...     "https://01234567-....apps.astra.datastax.com",
        ...     token="AstraCS:...",
        ... )
        >>> my_coll = my_db0.create_collection(
        ...     "movies",
        ...     definition=(
        ...         CollectionDefinition.builder()
        ...         .set_vector_dimension(2)
        ...         .build()
        ...     ),
        ... )
        >>> my_coll.insert_one({"title": "The Title", "$vector": [0.1, 0.3]})
        >>> my_db1 = my_client.get_database("01234567-...")
        >>> my_db2 = my_client.get_database("01234567-...", region="us-east1")
        >>> my_adm0 = my_client.get_admin()
        >>> my_adm1 = my_client.get_admin(token=more_powerful_token_override)
        >>> database_list = my_adm0.list_databases()
    """

    def __init__(
        self,
        token: str | TokenProvider | UnsetType = _UNSET,
        *,
        environment: str | UnsetType = _UNSET,
        callers: Sequence[CallerType] | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> None:
        # this parameter bootstraps the defaults, has a special treatment:
        _environment: str
        if isinstance(environment, UnsetType):
            _environment = Environment.PROD.lower()
        else:
            _environment = environment.lower()
        if _environment not in Environment.values:
            raise InvalidEnvironmentException(
                f"Unsupported `environment` value: '{_environment}'."
            )
        arg_api_options = APIOptions(
            callers=callers,
            token=token,
        )
        self.api_options = (
            defaultAPIOptions(_environment)
            .with_override(api_options)
            .with_override(arg_api_options)
        )

    def __repr__(self) -> str:
        return f"{self.__class__.__name__}({self.api_options})"

    def __eq__(self, other: Any) -> bool:
        if isinstance(other, DataAPIClient):
            return all(
                [
                    self.api_options.token == other.api_options.token,
                    self.api_options.environment == other.api_options.environment,
                    self.api_options.callers == other.api_options.callers,
                ]
            )
        else:
            return False

    def __getitem__(self, api_endpoint: str) -> Database:
        return self.get_database(api_endpoint=api_endpoint)

    def _copy(
        self,
        *,
        token: str | TokenProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> DataAPIClient:
        arg_api_options = APIOptions(token=token)
        final_api_options = self.api_options.with_override(api_options).with_override(
            arg_api_options
        )
        return DataAPIClient(
            token=token,
            environment=final_api_options.environment,
            api_options=final_api_options,
        )

    def with_options(
        self,
        *,
        token: str | TokenProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> DataAPIClient:
        """
        Create a clone of this DataAPIClient with some changed attributes.

        Args:
            token: an Access Token to the database. Example: `"AstraCS:xyz..."`.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            api_options: any additional options to set for the clone, in the form of
                an APIOptions instance (where one can set just the needed attributes).
                In case the same setting is also provided as named parameter,
                the latter takes precedence.

        Returns:
            a new DataAPIClient instance.

        Example:
            >>> other_auth_client = my_client.with_options(
            ...     token="AstraCS:xyz...",
            ... )
        """

        return self._copy(
            token=token,
            api_options=api_options,
        )

    def get_database(
        self,
        api_endpoint: str,
        *,
        token: str | TokenProvider | UnsetType = _UNSET,
        keyspace: str | None = None,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> Database:
        """
        Get a Database object from this client, for doing data-related work.

        Args:
            api_endpoint: the API Endpoint for the target database
                (e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`).
                The database must exist already for the resulting object
                to be effectively used; in other words, this invocation
                does not create the database, just the object instance.
                Actual admin work can be achieved by using the AstraDBAdmin object.
            token: if supplied, is passed to the Database instead of the client token.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            keyspace: if provided, it is passed to the Database; otherwise
                the Database class will apply an environment-specific default.
            spawn_api_options: a specification - complete or partial - of the
                API Options to override the defaults.
                This allows for a deeper configuration of the database, e.g.
                concerning timeouts; if this is passed together with
                the equivalent named parameters, the latter will take precedence
                in their respective settings.

        Returns:
            a Database object with which to work on Data API collections.

        Example:
            >>> my_db1 = my_client.get_database(
            ...     "https://01234567-...us-west1.apps.astra.datastax.com",
            ... )
            >>> my_db2 = my_client.get_database(
            ...     "https://01234567-...us-west1.apps.astra.datastax.com",
            ...     token="AstraCS:...",
            ...     keyspace="prod_keyspace",
            ... )
            >>> my_coll = my_db0.create_collection(
            ...     "movies",
            ...     definition=(
            ...         CollectionDefinition.builder()
            ...         .set_vector_dimension(2)
            ...         .build()
            ...     ),
            ... )
            >>> my_coll.insert_one({"title": "The Title", "$vector": [0.3, 0.4]})

        Note:
            This method does not perform any admin-level operation through
            the DevOps API. For actual creation of a database, see the
            `create_database` method of class AstraDBAdmin.
        """

        # lazy importing here to avoid circular dependency
        from astrapy import Database

        arg_api_options = APIOptions(token=token)
        resulting_api_options = self.api_options.with_override(
            spawn_api_options
        ).with_override(arg_api_options)

        if resulting_api_options.environment in Environment.astra_db_values:
            parsed_api_endpoint = parse_api_endpoint(api_endpoint)
            if parsed_api_endpoint is not None:
                if parsed_api_endpoint.environment != resulting_api_options.environment:
                    raise InvalidEnvironmentException(
                        "Environment mismatch between client and provided "
                        "API endpoint. You can try adding "
                        f'`environment="{parsed_api_endpoint.environment}"` '
                        "to the DataAPIClient creation statement."
                    )
                return Database(
                    api_endpoint=api_endpoint,
                    keyspace=keyspace,
                    api_options=resulting_api_options,
                )
            else:
                msg = api_endpoint_parsing_error_message(api_endpoint)
                raise ValueError(msg)
        else:
            parsed_generic_api_endpoint = parse_generic_api_url(api_endpoint)
            if parsed_generic_api_endpoint:
                return Database(
                    api_endpoint=parsed_generic_api_endpoint,
                    keyspace=keyspace,
                    api_options=resulting_api_options,
                )
            else:
                msg = generic_api_url_parsing_error_message(api_endpoint)
                raise ValueError(msg)

    def get_async_database(
        self,
        api_endpoint: str,
        *,
        token: str | TokenProvider | UnsetType = _UNSET,
        keyspace: str | None = None,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncDatabase:
        """
        Get an AsyncDatabase object from this client, for doing data-related work.

        Args:
            api_endpoint: the API Endpoint for the target database
                (e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`).
                The database must exist already for the resulting object
                to be effectively used; in other words, this invocation
                does not create the database, just the object instance.
                Actual admin work can be achieved by using the AstraDBAdmin object.
            token: if supplied, is passed to the Database instead of the client token.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            keyspace: if provided, it is passed to the Database; otherwise
                the Database class will apply an environment-specific default.
            spawn_api_options: a specification - complete or partial - of the
                API Options to override the defaults.
                This allows for a deeper configuration of the database, e.g.
                concerning timeouts; if this is passed together with
                the equivalent named parameters, the latter will take precedence
                in their respective settings.

        Returns:
            an AsyncDatabase object with which to work on Data API collections.

        Example:
            >>> async def create_use_db(cl: DataAPIClient, api_ep: str) -> None:
            ...     async_db = cl.get_async_database(api_ep)
            ...     my_a_coll = await async_db.create_collection(
            ...         "movies",
            ...         definition=(
            ...             CollectionDefinition.builder()
            ...             .set_vector_dimension(2)
            ...         .build()
            ...         )
            ...     )
            ...     await my_a_coll.insert_one({"title": "The Title", "$vector": [0.3, 0.4]})
            ...
            >>> asyncio.run(
            ...   create_use_db(
            ...       my_client,
            ...       "https://01234567-...us-west1.apps.astra.datastax.com",
            ...   )
            ... )

        Note:
            This method does not perform any admin-level operation through
            the DevOps API. For actual creation of a database, see the
            `create_database` method of class AstraDBAdmin.
        """

        return self.get_database(
            api_endpoint=api_endpoint,
            token=token,
            keyspace=keyspace,
            spawn_api_options=spawn_api_options,
        ).to_async()

    def get_database_by_api_endpoint(
        self,
        api_endpoint: str,
        *,
        token: str | TokenProvider | UnsetType = _UNSET,
        keyspace: str | None = None,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> Database:
        """
        Get a Database object from this client, for doing data-related work.

        Note: this is an alias for `get_database` (see).

        Args:
            api_endpoint: the API Endpoint for the target database
                (e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`).
                The database must exist already for the resulting object
                to be effectively used; in other words, this invocation
                does not create the database, just the object instance.
                Actual admin work can be achieved by using the AstraDBAdmin object.
            token: if supplied, is passed to the Database instead of the client token.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            keyspace: if provided, it is passed to the Database; otherwise
                the Database class will apply an environment-specific default.
            spawn_api_options: a specification - complete or partial - of the
                API Options to override the defaults.
                This allows for a deeper configuration of the database, e.g.
                concerning timeouts; if this is passed together with
                the equivalent named parameters, the latter will take precedence
                in their respective settings.

        Returns:
            a Database object with which to work on Data API collections.
        """

        return self.get_database(
            api_endpoint=api_endpoint,
            token=token,
            keyspace=keyspace,
            spawn_api_options=spawn_api_options,
        )

    def get_async_database_by_api_endpoint(
        self,
        api_endpoint: str,
        *,
        token: str | TokenProvider | UnsetType = _UNSET,
        keyspace: str | None = None,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncDatabase:
        """
        Get an AsyncDatabase object from this client, for doing data-related work.

        Note: this is an alias for `get_async_database` (see).

        Args:
            api_endpoint: the API Endpoint for the target database
                (e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`).
                The database must exist already for the resulting object
                to be effectively used; in other words, this invocation
                does not create the database, just the object instance.
                Actual admin work can be achieved by using the AstraDBAdmin object.
            token: if supplied, is passed to the Database instead of the client token.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            keyspace: if provided, it is passed to the Database; otherwise
                the Database class will apply an environment-specific default.
            spawn_api_options: a specification - complete or partial - of the
                API Options to override the defaults.
                This allows for a deeper configuration of the database, e.g.
                concerning timeouts; if this is passed together with
                the equivalent named parameters, the latter will take precedence
                in their respective settings.

        Returns:
            an AsyncDatabase object with which to work on Data API collections.
        """

        return self.get_async_database(
            api_endpoint=api_endpoint,
            token=token,
            keyspace=keyspace,
            spawn_api_options=spawn_api_options,
        )

    def get_admin(
        self,
        *,
        token: str | TokenProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> AstraDBAdmin:
        """
        Get an AstraDBAdmin instance corresponding to this client, for
        admin work such as managing databases.

        Args:
            token: if supplied, is passed to the Astra DB Admin instead of the
                client token. This may be useful when switching to a more powerful,
                admin-capable permission set.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            spawn_api_options: a specification - complete or partial - of the
                API Options to override the defaults.
                This allows for a deeper configuration of the admin, e.g.
                concerning timeouts; if this is passed together with
                the equivalent named parameters, the latter will take precedence
                in their respective settings.

        Returns:
            An AstraDBAdmin instance, wich which to perform management at the
            database level.

        Example:
            >>> my_adm0 = my_client.get_admin()
            >>> my_adm1 = my_client.get_admin(token=more_powerful_token_override)
            >>> database_list = my_adm0.list_databases()
            >>> my_db_admin = my_adm0.create_database(
            ...     "the_other_database",
            ...     cloud_provider="AWS",
            ...     region="eu-west-1",
            ... )
            >>> my_db_admin.list_keyspaces()
            ['default_keyspace', 'that_other_one']
        """

        # lazy importing here to avoid circular dependency
        from astrapy.admin import AstraDBAdmin

        arg_api_options = APIOptions(token=token)
        resulting_api_options = self.api_options.with_override(
            spawn_api_options
        ).with_override(arg_api_options)

        if resulting_api_options.environment not in Environment.astra_db_values:
            raise InvalidEnvironmentException(
                "Method not supported outside of Astra DB."
            )

        return AstraDBAdmin(api_options=resulting_api_options)

Methods

def get_admin(self, *, token: str | TokenProvider | UnsetType = (unset), spawn_api_options: APIOptions | UnsetType = (unset)) ‑> AstraDBAdmin

Get an AstraDBAdmin instance corresponding to this client, for admin work such as managing databases.

Args

token
if supplied, is passed to the Astra DB Admin instead of the client token. This may be useful when switching to a more powerful, admin-capable permission set. This can be either a literal token string or a subclass of TokenProvider.
spawn_api_options
a specification - complete or partial - of the API Options to override the defaults. This allows for a deeper configuration of the admin, e.g. concerning timeouts; if this is passed together with the equivalent named parameters, the latter will take precedence in their respective settings.

Returns

An AstraDBAdmin instance, wich which to perform management at the database level.

Example

>>> my_adm0 = my_client.get_admin()
>>> my_adm1 = my_client.get_admin(token=more_powerful_token_override)
>>> database_list = my_adm0.list_databases()
>>> my_db_admin = my_adm0.create_database(
...     "the_other_database",
...     cloud_provider="AWS",
...     region="eu-west-1",
... )
>>> my_db_admin.list_keyspaces()
['default_keyspace', 'that_other_one']
Expand source code
def get_admin(
    self,
    *,
    token: str | TokenProvider | UnsetType = _UNSET,
    spawn_api_options: APIOptions | UnsetType = _UNSET,
) -> AstraDBAdmin:
    """
    Get an AstraDBAdmin instance corresponding to this client, for
    admin work such as managing databases.

    Args:
        token: if supplied, is passed to the Astra DB Admin instead of the
            client token. This may be useful when switching to a more powerful,
            admin-capable permission set.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        spawn_api_options: a specification - complete or partial - of the
            API Options to override the defaults.
            This allows for a deeper configuration of the admin, e.g.
            concerning timeouts; if this is passed together with
            the equivalent named parameters, the latter will take precedence
            in their respective settings.

    Returns:
        An AstraDBAdmin instance, wich which to perform management at the
        database level.

    Example:
        >>> my_adm0 = my_client.get_admin()
        >>> my_adm1 = my_client.get_admin(token=more_powerful_token_override)
        >>> database_list = my_adm0.list_databases()
        >>> my_db_admin = my_adm0.create_database(
        ...     "the_other_database",
        ...     cloud_provider="AWS",
        ...     region="eu-west-1",
        ... )
        >>> my_db_admin.list_keyspaces()
        ['default_keyspace', 'that_other_one']
    """

    # lazy importing here to avoid circular dependency
    from astrapy.admin import AstraDBAdmin

    arg_api_options = APIOptions(token=token)
    resulting_api_options = self.api_options.with_override(
        spawn_api_options
    ).with_override(arg_api_options)

    if resulting_api_options.environment not in Environment.astra_db_values:
        raise InvalidEnvironmentException(
            "Method not supported outside of Astra DB."
        )

    return AstraDBAdmin(api_options=resulting_api_options)
def get_async_database(self, api_endpoint: str, *, token: str | TokenProvider | UnsetType = (unset), keyspace: str | None = None, spawn_api_options: APIOptions | UnsetType = (unset)) ‑> AsyncDatabase

Get an AsyncDatabase object from this client, for doing data-related work.

Args

api_endpoint
the API Endpoint for the target database (e.g. https://<ID>-<REGION>.apps.astra.datastax.com). The database must exist already for the resulting object to be effectively used; in other words, this invocation does not create the database, just the object instance. Actual admin work can be achieved by using the AstraDBAdmin object.
token
if supplied, is passed to the Database instead of the client token. This can be either a literal token string or a subclass of TokenProvider.
keyspace
if provided, it is passed to the Database; otherwise the Database class will apply an environment-specific default.
spawn_api_options
a specification - complete or partial - of the API Options to override the defaults. This allows for a deeper configuration of the database, e.g. concerning timeouts; if this is passed together with the equivalent named parameters, the latter will take precedence in their respective settings.

Returns

an AsyncDatabase object with which to work on Data API collections.

Example

>>> async def create_use_db(cl: DataAPIClient, api_ep: str) -> None:
...     async_db = cl.get_async_database(api_ep)
...     my_a_coll = await async_db.create_collection(
...         "movies",
...         definition=(
...             CollectionDefinition.builder()
...             .set_vector_dimension(2)
...         .build()
...         )
...     )
...     await my_a_coll.insert_one({"title": "The Title", "$vector": [0.3, 0.4]})
...
>>> asyncio.run(
...   create_use_db(
...       my_client,
...       "https://01234567-...us-west1.apps.astra.datastax.com",
...   )
... )

Note

This method does not perform any admin-level operation through the DevOps API. For actual creation of a database, see the create_database method of class AstraDBAdmin.

Expand source code
def get_async_database(
    self,
    api_endpoint: str,
    *,
    token: str | TokenProvider | UnsetType = _UNSET,
    keyspace: str | None = None,
    spawn_api_options: APIOptions | UnsetType = _UNSET,
) -> AsyncDatabase:
    """
    Get an AsyncDatabase object from this client, for doing data-related work.

    Args:
        api_endpoint: the API Endpoint for the target database
            (e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`).
            The database must exist already for the resulting object
            to be effectively used; in other words, this invocation
            does not create the database, just the object instance.
            Actual admin work can be achieved by using the AstraDBAdmin object.
        token: if supplied, is passed to the Database instead of the client token.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        keyspace: if provided, it is passed to the Database; otherwise
            the Database class will apply an environment-specific default.
        spawn_api_options: a specification - complete or partial - of the
            API Options to override the defaults.
            This allows for a deeper configuration of the database, e.g.
            concerning timeouts; if this is passed together with
            the equivalent named parameters, the latter will take precedence
            in their respective settings.

    Returns:
        an AsyncDatabase object with which to work on Data API collections.

    Example:
        >>> async def create_use_db(cl: DataAPIClient, api_ep: str) -> None:
        ...     async_db = cl.get_async_database(api_ep)
        ...     my_a_coll = await async_db.create_collection(
        ...         "movies",
        ...         definition=(
        ...             CollectionDefinition.builder()
        ...             .set_vector_dimension(2)
        ...         .build()
        ...         )
        ...     )
        ...     await my_a_coll.insert_one({"title": "The Title", "$vector": [0.3, 0.4]})
        ...
        >>> asyncio.run(
        ...   create_use_db(
        ...       my_client,
        ...       "https://01234567-...us-west1.apps.astra.datastax.com",
        ...   )
        ... )

    Note:
        This method does not perform any admin-level operation through
        the DevOps API. For actual creation of a database, see the
        `create_database` method of class AstraDBAdmin.
    """

    return self.get_database(
        api_endpoint=api_endpoint,
        token=token,
        keyspace=keyspace,
        spawn_api_options=spawn_api_options,
    ).to_async()
def get_async_database_by_api_endpoint(self, api_endpoint: str, *, token: str | TokenProvider | UnsetType = (unset), keyspace: str | None = None, spawn_api_options: APIOptions | UnsetType = (unset)) ‑> AsyncDatabase

Get an AsyncDatabase object from this client, for doing data-related work.

Note: this is an alias for get_async_database (see).

Args

api_endpoint
the API Endpoint for the target database (e.g. https://<ID>-<REGION>.apps.astra.datastax.com). The database must exist already for the resulting object to be effectively used; in other words, this invocation does not create the database, just the object instance. Actual admin work can be achieved by using the AstraDBAdmin object.
token
if supplied, is passed to the Database instead of the client token. This can be either a literal token string or a subclass of TokenProvider.
keyspace
if provided, it is passed to the Database; otherwise the Database class will apply an environment-specific default.
spawn_api_options
a specification - complete or partial - of the API Options to override the defaults. This allows for a deeper configuration of the database, e.g. concerning timeouts; if this is passed together with the equivalent named parameters, the latter will take precedence in their respective settings.

Returns

an AsyncDatabase object with which to work on Data API collections.

Expand source code
def get_async_database_by_api_endpoint(
    self,
    api_endpoint: str,
    *,
    token: str | TokenProvider | UnsetType = _UNSET,
    keyspace: str | None = None,
    spawn_api_options: APIOptions | UnsetType = _UNSET,
) -> AsyncDatabase:
    """
    Get an AsyncDatabase object from this client, for doing data-related work.

    Note: this is an alias for `get_async_database` (see).

    Args:
        api_endpoint: the API Endpoint for the target database
            (e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`).
            The database must exist already for the resulting object
            to be effectively used; in other words, this invocation
            does not create the database, just the object instance.
            Actual admin work can be achieved by using the AstraDBAdmin object.
        token: if supplied, is passed to the Database instead of the client token.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        keyspace: if provided, it is passed to the Database; otherwise
            the Database class will apply an environment-specific default.
        spawn_api_options: a specification - complete or partial - of the
            API Options to override the defaults.
            This allows for a deeper configuration of the database, e.g.
            concerning timeouts; if this is passed together with
            the equivalent named parameters, the latter will take precedence
            in their respective settings.

    Returns:
        an AsyncDatabase object with which to work on Data API collections.
    """

    return self.get_async_database(
        api_endpoint=api_endpoint,
        token=token,
        keyspace=keyspace,
        spawn_api_options=spawn_api_options,
    )
def get_database(self, api_endpoint: str, *, token: str | TokenProvider | UnsetType = (unset), keyspace: str | None = None, spawn_api_options: APIOptions | UnsetType = (unset)) ‑> Database

Get a Database object from this client, for doing data-related work.

Args

api_endpoint
the API Endpoint for the target database (e.g. https://<ID>-<REGION>.apps.astra.datastax.com). The database must exist already for the resulting object to be effectively used; in other words, this invocation does not create the database, just the object instance. Actual admin work can be achieved by using the AstraDBAdmin object.
token
if supplied, is passed to the Database instead of the client token. This can be either a literal token string or a subclass of TokenProvider.
keyspace
if provided, it is passed to the Database; otherwise the Database class will apply an environment-specific default.
spawn_api_options
a specification - complete or partial - of the API Options to override the defaults. This allows for a deeper configuration of the database, e.g. concerning timeouts; if this is passed together with the equivalent named parameters, the latter will take precedence in their respective settings.

Returns

a Database object with which to work on Data API collections.

Example

>>> my_db1 = my_client.get_database(
...     "https://01234567-...us-west1.apps.astra.datastax.com",
... )
>>> my_db2 = my_client.get_database(
...     "https://01234567-...us-west1.apps.astra.datastax.com",
...     token="AstraCS:...",
...     keyspace="prod_keyspace",
... )
>>> my_coll = my_db0.create_collection(
...     "movies",
...     definition=(
...         CollectionDefinition.builder()
...         .set_vector_dimension(2)
...         .build()
...     ),
... )
>>> my_coll.insert_one({"title": "The Title", "$vector": [0.3, 0.4]})

Note

This method does not perform any admin-level operation through the DevOps API. For actual creation of a database, see the create_database method of class AstraDBAdmin.

Expand source code
def get_database(
    self,
    api_endpoint: str,
    *,
    token: str | TokenProvider | UnsetType = _UNSET,
    keyspace: str | None = None,
    spawn_api_options: APIOptions | UnsetType = _UNSET,
) -> Database:
    """
    Get a Database object from this client, for doing data-related work.

    Args:
        api_endpoint: the API Endpoint for the target database
            (e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`).
            The database must exist already for the resulting object
            to be effectively used; in other words, this invocation
            does not create the database, just the object instance.
            Actual admin work can be achieved by using the AstraDBAdmin object.
        token: if supplied, is passed to the Database instead of the client token.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        keyspace: if provided, it is passed to the Database; otherwise
            the Database class will apply an environment-specific default.
        spawn_api_options: a specification - complete or partial - of the
            API Options to override the defaults.
            This allows for a deeper configuration of the database, e.g.
            concerning timeouts; if this is passed together with
            the equivalent named parameters, the latter will take precedence
            in their respective settings.

    Returns:
        a Database object with which to work on Data API collections.

    Example:
        >>> my_db1 = my_client.get_database(
        ...     "https://01234567-...us-west1.apps.astra.datastax.com",
        ... )
        >>> my_db2 = my_client.get_database(
        ...     "https://01234567-...us-west1.apps.astra.datastax.com",
        ...     token="AstraCS:...",
        ...     keyspace="prod_keyspace",
        ... )
        >>> my_coll = my_db0.create_collection(
        ...     "movies",
        ...     definition=(
        ...         CollectionDefinition.builder()
        ...         .set_vector_dimension(2)
        ...         .build()
        ...     ),
        ... )
        >>> my_coll.insert_one({"title": "The Title", "$vector": [0.3, 0.4]})

    Note:
        This method does not perform any admin-level operation through
        the DevOps API. For actual creation of a database, see the
        `create_database` method of class AstraDBAdmin.
    """

    # lazy importing here to avoid circular dependency
    from astrapy import Database

    arg_api_options = APIOptions(token=token)
    resulting_api_options = self.api_options.with_override(
        spawn_api_options
    ).with_override(arg_api_options)

    if resulting_api_options.environment in Environment.astra_db_values:
        parsed_api_endpoint = parse_api_endpoint(api_endpoint)
        if parsed_api_endpoint is not None:
            if parsed_api_endpoint.environment != resulting_api_options.environment:
                raise InvalidEnvironmentException(
                    "Environment mismatch between client and provided "
                    "API endpoint. You can try adding "
                    f'`environment="{parsed_api_endpoint.environment}"` '
                    "to the DataAPIClient creation statement."
                )
            return Database(
                api_endpoint=api_endpoint,
                keyspace=keyspace,
                api_options=resulting_api_options,
            )
        else:
            msg = api_endpoint_parsing_error_message(api_endpoint)
            raise ValueError(msg)
    else:
        parsed_generic_api_endpoint = parse_generic_api_url(api_endpoint)
        if parsed_generic_api_endpoint:
            return Database(
                api_endpoint=parsed_generic_api_endpoint,
                keyspace=keyspace,
                api_options=resulting_api_options,
            )
        else:
            msg = generic_api_url_parsing_error_message(api_endpoint)
            raise ValueError(msg)
def get_database_by_api_endpoint(self, api_endpoint: str, *, token: str | TokenProvider | UnsetType = (unset), keyspace: str | None = None, spawn_api_options: APIOptions | UnsetType = (unset)) ‑> Database

Get a Database object from this client, for doing data-related work.

Note: this is an alias for get_database (see).

Args

api_endpoint
the API Endpoint for the target database (e.g. https://<ID>-<REGION>.apps.astra.datastax.com). The database must exist already for the resulting object to be effectively used; in other words, this invocation does not create the database, just the object instance. Actual admin work can be achieved by using the AstraDBAdmin object.
token
if supplied, is passed to the Database instead of the client token. This can be either a literal token string or a subclass of TokenProvider.
keyspace
if provided, it is passed to the Database; otherwise the Database class will apply an environment-specific default.
spawn_api_options
a specification - complete or partial - of the API Options to override the defaults. This allows for a deeper configuration of the database, e.g. concerning timeouts; if this is passed together with the equivalent named parameters, the latter will take precedence in their respective settings.

Returns

a Database object with which to work on Data API collections.

Expand source code
def get_database_by_api_endpoint(
    self,
    api_endpoint: str,
    *,
    token: str | TokenProvider | UnsetType = _UNSET,
    keyspace: str | None = None,
    spawn_api_options: APIOptions | UnsetType = _UNSET,
) -> Database:
    """
    Get a Database object from this client, for doing data-related work.

    Note: this is an alias for `get_database` (see).

    Args:
        api_endpoint: the API Endpoint for the target database
            (e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`).
            The database must exist already for the resulting object
            to be effectively used; in other words, this invocation
            does not create the database, just the object instance.
            Actual admin work can be achieved by using the AstraDBAdmin object.
        token: if supplied, is passed to the Database instead of the client token.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        keyspace: if provided, it is passed to the Database; otherwise
            the Database class will apply an environment-specific default.
        spawn_api_options: a specification - complete or partial - of the
            API Options to override the defaults.
            This allows for a deeper configuration of the database, e.g.
            concerning timeouts; if this is passed together with
            the equivalent named parameters, the latter will take precedence
            in their respective settings.

    Returns:
        a Database object with which to work on Data API collections.
    """

    return self.get_database(
        api_endpoint=api_endpoint,
        token=token,
        keyspace=keyspace,
        spawn_api_options=spawn_api_options,
    )
def with_options(self, *, token: str | TokenProvider | UnsetType = (unset), api_options: APIOptions | UnsetType = (unset)) ‑> DataAPIClient

Create a clone of this DataAPIClient with some changed attributes.

Args

token
an Access Token to the database. Example: "AstraCS:xyz...". This can be either a literal token string or a subclass of TokenProvider.
api_options
any additional options to set for the clone, in the form of an APIOptions instance (where one can set just the needed attributes). In case the same setting is also provided as named parameter, the latter takes precedence.

Returns

a new DataAPIClient instance.

Example

>>> other_auth_client = my_client.with_options(
...     token="AstraCS:xyz...",
... )
Expand source code
def with_options(
    self,
    *,
    token: str | TokenProvider | UnsetType = _UNSET,
    api_options: APIOptions | UnsetType = _UNSET,
) -> DataAPIClient:
    """
    Create a clone of this DataAPIClient with some changed attributes.

    Args:
        token: an Access Token to the database. Example: `"AstraCS:xyz..."`.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        api_options: any additional options to set for the clone, in the form of
            an APIOptions instance (where one can set just the needed attributes).
            In case the same setting is also provided as named parameter,
            the latter takes precedence.

    Returns:
        a new DataAPIClient instance.

    Example:
        >>> other_auth_client = my_client.with_options(
        ...     token="AstraCS:xyz...",
        ... )
    """

    return self._copy(
        token=token,
        api_options=api_options,
    )
class DataAPIDatabaseAdmin (*, api_endpoint: str, api_options: FullAPIOptions, spawner_database: Database | AsyncDatabase | None = None)

An "admin" object for non-Astra Data API environments, to perform administrative tasks at the keyspaces level such as creating/listing/dropping keyspaces.

Conforming to the architecture of non-Astra deployments of the Data API, this object works within the one existing database. It is within that database that the keyspace CRUD operations (and possibly other admin operations) are performed. Since non-Astra environment lack the concept of an overall admin (such as the all-databases AstraDBAdmin class), a DataAPIDatabaseAdmin is generally created by invoking the get_database_admin method of the corresponding Database object (which in turn is spawned by a DataAPIClient).

Args

api_endpoint
the full URI to access the Data API, e.g. "http://localhost:8181".
api_options
a complete specification of the API Options for this instance.
spawner_database
either a Database or an AsyncDatabase instance. This represents the database class which spawns this admin object, so that, if required, a keyspace creation can retroactively "use" the new keyspace in the spawner. Used to enable the Async/Database.get_admin_database().create_keyspace() pattern.

Example

>>> from astrapy import DataAPIClient
>>> from astrapy.constants import Environment
>>> from astrapy.authentication import UsernamePasswordTokenProvider
>>>
>>> token_provider = UsernamePasswordTokenProvider("username", "password")
>>> endpoint = "http://localhost:8181"
>>>
>>> client = DataAPIClient(
>>>     token=token_provider,
>>>     environment=Environment.OTHER,
>>> )
>>> database = client.get_database(endpoint)
>>> admin_for_my_db = database.get_database_admin()
>>>
>>> admin_for_my_db.list_keyspaces()
['keyspace1', 'keyspace2']

Note

a more powerful token may be required than the one sufficient for working in the Database, Collection and Table classes. Check the provided token if "Unauthorized" errors are encountered.

Expand source code
class DataAPIDatabaseAdmin(DatabaseAdmin):
    """
    An "admin" object for non-Astra Data API environments, to perform administrative
    tasks at the keyspaces level such as creating/listing/dropping keyspaces.

    Conforming to the architecture of non-Astra deployments of the Data API,
    this object works within the one existing database. It is within that database
    that the keyspace CRUD operations (and possibly other admin operations)
    are performed. Since non-Astra environment lack the concept of an overall
    admin (such as the all-databases AstraDBAdmin class), a `DataAPIDatabaseAdmin`
    is generally created by invoking the `get_database_admin` method of the
    corresponding `Database` object (which in turn is spawned by a DataAPIClient).

    Args:
        api_endpoint: the full URI to access the Data API,
            e.g. "http://localhost:8181".
        api_options: a complete specification of the API Options for this instance.
        spawner_database: either a Database or an AsyncDatabase instance. This represents
            the database class which spawns this admin object, so that, if required,
            a keyspace creation can retroactively "use" the new keyspace in the spawner.
            Used to enable the Async/Database.get_admin_database().create_keyspace()
            pattern.

    Example:
        >>> from astrapy import DataAPIClient
        >>> from astrapy.constants import Environment
        >>> from astrapy.authentication import UsernamePasswordTokenProvider
        >>>
        >>> token_provider = UsernamePasswordTokenProvider("username", "password")
        >>> endpoint = "http://localhost:8181"
        >>>
        >>> client = DataAPIClient(
        >>>     token=token_provider,
        >>>     environment=Environment.OTHER,
        >>> )
        >>> database = client.get_database(endpoint)
        >>> admin_for_my_db = database.get_database_admin()
        >>>
        >>> admin_for_my_db.list_keyspaces()
        ['keyspace1', 'keyspace2']

    Note:
        a more powerful token may be required than the one sufficient for working
        in the Database, Collection and Table classes. Check the provided token
        if "Unauthorized" errors are encountered.
    """

    def __init__(
        self,
        *,
        api_endpoint: str,
        api_options: FullAPIOptions,
        spawner_database: Database | AsyncDatabase | None = None,
    ) -> None:
        # lazy import here to avoid circular dependency
        from astrapy.database import Database

        self.api_options = api_options
        self.api_endpoint = api_endpoint

        if spawner_database is not None:
            self.spawner_database = spawner_database
        else:
            # leaving the keyspace to its per-environment default
            # (a task for the Database)
            self.spawner_database = Database(
                api_endpoint=self.api_endpoint,
                keyspace=None,
                api_options=self.api_options,
            )

        # even if Data API, this is admin and must use the Admin additional headers:
        self._commander_headers = {
            DEFAULT_DATA_API_AUTH_HEADER: self.api_options.token.get_token(),
            **self.api_options.admin_additional_headers,
        }
        self._api_commander = self._get_api_commander()

    def __repr__(self) -> str:
        parts = [
            f'api_endpoint="{self.api_endpoint}"',
            f"api_options={self.api_options}",
        ]
        return f"{self.__class__.__name__}({', '.join(parts)})"

    def __eq__(self, other: Any) -> bool:
        if isinstance(other, DataAPIDatabaseAdmin):
            return all(
                [
                    self.api_endpoint == other.api_endpoint,
                    self.api_options == other.api_options,
                ]
            )
        else:
            return False

    def _get_api_commander(self) -> APICommander:
        base_path_components = [
            comp
            for comp in (
                ncomp.strip("/")
                for ncomp in (
                    self.api_options.data_api_url_options.api_path,
                    self.api_options.data_api_url_options.api_version,
                )
                if ncomp is not None
            )
            if comp != ""
        ]
        base_path = f"/{'/'.join(base_path_components)}"
        api_commander = APICommander(
            api_endpoint=self.api_endpoint,
            path=base_path,
            headers=self._commander_headers,
            callers=self.api_options.callers,
            redacted_header_names=self.api_options.redacted_header_names,
        )
        return api_commander

    def _copy(
        self,
        *,
        token: str | TokenProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> DataAPIDatabaseAdmin:
        arg_api_options = APIOptions(
            token=token,
        )
        final_api_options = self.api_options.with_override(api_options).with_override(
            arg_api_options
        )
        return DataAPIDatabaseAdmin(
            api_endpoint=self.api_endpoint,
            api_options=final_api_options,
            spawner_database=self.spawner_database,
        )

    def with_options(
        self,
        *,
        token: str | TokenProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> DataAPIDatabaseAdmin:
        """
        Create a clone of this DataAPIDatabaseAdmin with some changed attributes.

        Args:
            token: an access token with enough permission to perform admin tasks.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            api_options: any additional options to set for the clone, in the form of
                an APIOptions instance (where one can set just the needed attributes).
                In case the same setting is also provided as named parameter,
                the latter takes precedence.

        Returns:
            a new DataAPIDatabaseAdmin instance.

        Example:
            >>> admin_for_my_other_db = admin_for_my_db.with_options(
            ...     api_endpoint="http://10.1.1.5:8181",
            ... )
        """

        return self._copy(
            token=token,
            api_options=api_options,
        )

    def list_keyspaces(
        self,
        *,
        keyspace_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> list[str]:
        """
        Query the API for a list of the keyspaces in the database.

        Args:
            keyspace_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `keyspace_admin_timeout_ms`.
            timeout_ms: an alias for `keyspace_admin_timeout_ms`.

        Returns:
            A list of the keyspaces, each a string, in no particular order.

        Example:
            >>> admin_for_my_db.list_keyspaces()
            ['default_keyspace', 'staging_keyspace']
        """

        _keyspace_admin_timeout_ms, _ka_label = _select_singlereq_timeout_ka(
            timeout_options=self.api_options.timeout_options,
            keyspace_admin_timeout_ms=keyspace_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info("getting list of keyspaces")
        fn_response = self._api_commander.request(
            payload={"findKeyspaces": {}},
            timeout_context=_TimeoutContext(
                request_ms=_keyspace_admin_timeout_ms, label=_ka_label
            ),
        )
        if "keyspaces" not in fn_response.get("status", {}):
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from findKeyspaces API command.",
                raw_response=fn_response,
            )
        else:
            logger.info("finished getting list of keyspaces")
            return fn_response["status"]["keyspaces"]  # type: ignore[no-any-return]

    def create_keyspace(
        self,
        name: str,
        *,
        replication_options: dict[str, Any] | None = None,
        update_db_keyspace: bool | None = None,
        keyspace_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
        **kwargs: Any,
    ) -> None:
        """
        Create a keyspace in the database.

        Args:
            name: the keyspace name. If supplying a keyspace that exists
                already, the method call proceeds as usual, no errors are
                raised, and the whole invocation is a no-op.
            replication_options: this dictionary can specify the options about
                replication of the keyspace (across database nodes). If provided,
                it must have a structure similar to:
                `{"class": "SimpleStrategy", "replication_factor": 1}`.
            update_db_keyspace: if True, the `Database` or `AsyncDatabase` class
                that spawned this DatabaseAdmin, if any, gets updated to work on
                the newly-created keyspace starting when this method returns.
            keyspace_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `keyspace_admin_timeout_ms`.
            timeout_ms: an alias for `keyspace_admin_timeout_ms`.

        Note: a timeout event is no guarantee at all that the
        creation request has not reached the API server and is not going
        to be, in fact, honored.

        Example:
            >>> admin_for_my_db.list_keyspaces()
            ['default_keyspace']
            >>> admin_for_my_db.create_keyspace("that_other_one")
            >>> admin_for_my_db.list_keyspaces()
            ['default_keyspace', 'that_other_one']
        """

        _keyspace_admin_timeout_ms, _ka_label = _select_singlereq_timeout_ka(
            timeout_options=self.api_options.timeout_options,
            keyspace_admin_timeout_ms=keyspace_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        options = {
            k: v
            for k, v in {
                "replication": replication_options,
            }.items()
            if v
        }
        payload = {
            "createKeyspace": {
                **{"name": name},
                **({"options": options} if options else {}),
            }
        }
        logger.info("creating keyspace")
        cn_response = self._api_commander.request(
            payload=payload,
            timeout_context=_TimeoutContext(
                request_ms=_keyspace_admin_timeout_ms, label=_ka_label
            ),
        )
        if (cn_response.get("status") or {}).get("ok") != 1:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from createKeyspace API command.",
                raw_response=cn_response,
            )
        else:
            logger.info("finished creating keyspace")
            if update_db_keyspace:
                self.spawner_database.use_keyspace(name)

    def drop_keyspace(
        self,
        name: str,
        *,
        keyspace_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Drop (delete) a keyspace from the database.

        Args:
            name: the keyspace to delete. If it does not exist in this database,
                an error is raised.
            keyspace_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `keyspace_admin_timeout_ms`.
            timeout_ms: an alias for `keyspace_admin_timeout_ms`.

        Note: a timeout event is no guarantee at all that the
        deletion request has not reached the API server and is not going
        to be, in fact, honored.

        Example:
            >>> admin_for_my_db.list_keyspaces()
            ['default_keyspace', 'that_other_one']
            >>> admin_for_my_db.drop_keyspace("that_other_one")
            >>> admin_for_my_db.list_keyspaces()
            ['default_keyspace']
        """

        _keyspace_admin_timeout_ms, _ka_label = _select_singlereq_timeout_ka(
            timeout_options=self.api_options.timeout_options,
            keyspace_admin_timeout_ms=keyspace_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info("dropping keyspace")
        dn_response = self._api_commander.request(
            payload={"dropKeyspace": {"name": name}},
            timeout_context=_TimeoutContext(
                request_ms=_keyspace_admin_timeout_ms, label=_ka_label
            ),
        )
        if (dn_response.get("status") or {}).get("ok") != 1:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from dropKeyspace API command.",
                raw_response=dn_response,
            )
        else:
            logger.info("finished dropping keyspace")

    async def async_list_keyspaces(
        self,
        *,
        keyspace_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> list[str]:
        """
        Query the API for a list of the keyspaces in the database.
        Async version of the method, for use in an asyncio context.

        Args:
            keyspace_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `keyspace_admin_timeout_ms`.
            timeout_ms: an alias for `keyspace_admin_timeout_ms`.

        Returns:
            A list of the keyspaces, each a string, in no particular order.

        Example:
            >>> asyncio.run(admin_for_my_db.async_list_keyspaces())
            ['default_keyspace', 'staging_keyspace']
        """

        _keyspace_admin_timeout_ms, _ka_label = _select_singlereq_timeout_ka(
            timeout_options=self.api_options.timeout_options,
            keyspace_admin_timeout_ms=keyspace_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info("getting list of keyspaces, async")
        fn_response = await self._api_commander.async_request(
            payload={"findKeyspaces": {}},
            timeout_context=_TimeoutContext(
                request_ms=_keyspace_admin_timeout_ms, label=_ka_label
            ),
        )
        if "keyspaces" not in fn_response.get("status", {}):
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from findKeyspaces API command.",
                raw_response=fn_response,
            )
        else:
            logger.info("finished getting list of keyspaces, async")
            return fn_response["status"]["keyspaces"]  # type: ignore[no-any-return]

    async def async_create_keyspace(
        self,
        name: str,
        *,
        replication_options: dict[str, Any] | None = None,
        update_db_keyspace: bool | None = None,
        keyspace_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
        **kwargs: Any,
    ) -> None:
        """
        Create a keyspace in the database.
        Async version of the method, for use in an asyncio context.

        Args:
            name: the keyspace name. If supplying a keyspace that exists
                already, the method call proceeds as usual, no errors are
                raised, and the whole invocation is a no-op.
            replication_options: this dictionary can specify the options about
                replication of the keyspace (across database nodes). If provided,
                it must have a structure similar to:
                `{"class": "SimpleStrategy", "replication_factor": 1}`.
            update_db_keyspace: if True, the `Database` or `AsyncDatabase` class
                that spawned this DatabaseAdmin, if any, gets updated to work on
                the newly-created keyspace starting when this method returns.
            keyspace_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `keyspace_admin_timeout_ms`.
            timeout_ms: an alias for `keyspace_admin_timeout_ms`.

        Note: a timeout event is no guarantee at all that the
        creation request has not reached the API server and is not going
        to be, in fact, honored.

        Example:
            >>> admin_for_my_db.list_keyspaces()
            ['default_keyspace']
            >>> asyncio.run(admin_for_my_db.async_create_keyspace(
            ...     "that_other_one"
            ... ))
            >>> admin_for_my_db.list_leyspaces()
            ['default_keyspace', 'that_other_one']
        """

        _keyspace_admin_timeout_ms, _ka_label = _select_singlereq_timeout_ka(
            timeout_options=self.api_options.timeout_options,
            keyspace_admin_timeout_ms=keyspace_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        options = {
            k: v
            for k, v in {
                "replication": replication_options,
            }.items()
            if v
        }
        payload = {
            "createKeyspace": {
                **{"name": name},
                **({"options": options} if options else {}),
            }
        }
        logger.info("creating keyspace, async")
        cn_response = await self._api_commander.async_request(
            payload=payload,
            timeout_context=_TimeoutContext(
                request_ms=_keyspace_admin_timeout_ms, label=_ka_label
            ),
        )
        if (cn_response.get("status") or {}).get("ok") != 1:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from createKeyspace API command.",
                raw_response=cn_response,
            )
        else:
            logger.info("finished creating keyspace, async")
            if update_db_keyspace:
                self.spawner_database.use_keyspace(name)

    async def async_drop_keyspace(
        self,
        name: str,
        *,
        keyspace_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Drop (delete) a keyspace from the database.
        Async version of the method, for use in an asyncio context.

        Args:
            name: the keyspace to delete. If it does not exist in this database,
                an error is raised.
            keyspace_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `keyspace_admin_timeout_ms`.
            timeout_ms: an alias for `keyspace_admin_timeout_ms`.

        Note: a timeout event is no guarantee at all that the
        deletion request has not reached the API server and is not going
        to be, in fact, honored.

        Example:
            >>> admin_for_my_db.list_keyspaces()
            ['that_other_one', 'default_keyspace']
            >>> asyncio.run(admin_for_my_db.async_drop_keyspace(
            ...     "that_other_one"
            ... ))
            >>> admin_for_my_db.list_keyspaces()
            ['default_keyspace']
        """

        _keyspace_admin_timeout_ms, _ka_label = _select_singlereq_timeout_ka(
            timeout_options=self.api_options.timeout_options,
            keyspace_admin_timeout_ms=keyspace_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info("dropping keyspace, async")
        dn_response = await self._api_commander.async_request(
            payload={"dropKeyspace": {"name": name}},
            timeout_context=_TimeoutContext(
                request_ms=_keyspace_admin_timeout_ms, label=_ka_label
            ),
        )
        if (dn_response.get("status") or {}).get("ok") != 1:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from dropKeyspace API command.",
                raw_response=dn_response,
            )
        else:
            logger.info("finished dropping keyspace, async")

    def get_database(
        self,
        *,
        keyspace: str | None = None,
        token: str | TokenProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> Database:
        """
        Create a Database instance for a specific database, to be used
        when doing data-level work (such as creating/managing collections).

        Args:
            keyspace: an optional keyspace to set in the resulting Database.
                If not set, the keyspace remains unspecified and must be set later
                with the `use_keyspace` method.
            token: if supplied, is passed to the Database instead of
                the one set for this object. Useful if one wants to work in
                a least-privilege manner, limiting the permissions for non-admin work.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            spawn_api_options: a specification - complete or partial - of the
                API Options to override the defaults inherited from the database admin.
                This allows for a deeper configuration of the database admin, e.g.
                concerning timeouts; if this is passed together with
                the named timeout parameters, the latter will take precedence
                in their respective settings.

        Returns:
            A Database object, ready to be used for working with data and collections.

        Example:
            >>> my_db = admin_for_my_db.get_database()
            >>> my_db.list_collection_names()
            ['movies', 'another_collection']

        Note:
            creating an instance of Database does not trigger actual creation
            of the database itself, which should exist beforehand.
        """

        # lazy importing here to avoid circular dependency
        from astrapy import Database

        # this multiple-override implements the alias on timeout params
        resulting_api_options = self.api_options.with_override(
            spawn_api_options,
        ).with_override(
            APIOptions(
                token=token,
            ),
        )

        return Database(
            api_endpoint=self.api_endpoint,
            keyspace=keyspace,
            api_options=resulting_api_options,
        )

    def get_async_database(
        self,
        *,
        keyspace: str | None = None,
        token: str | TokenProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncDatabase:
        """
        Create an AsyncDatabase instance for a specific database, to be used
        when doing data-level work (such as creating/managing collections).

        Args:
            keyspace: an optional keyspace to set in the resulting AsyncDatabase.
                If not set, the keyspace remains unspecified and must be set later
                with the `use_keyspace` method.
            token: if supplied, is passed to the AsyncDatabase instead of
                the one set for this object. Useful if one wants to work in
                a least-privilege manner, limiting the permissions for non-admin work.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            spawn_api_options: a specification - complete or partial - of the
                API Options to override the defaults inherited from the database admin.
                This allows for a deeper configuration of the database admin, e.g.
                concerning timeouts; if this is passed together with
                the named timeout parameters, the latter will take precedence
                in their respective settings.

        Returns:
            An AsyncDatabase object, ready to be used for working with
            data and collections.

        Note:
            creating an instance of AsyncDatabase does not trigger actual creation
            of the database itself, which should exist beforehand.
        """

        return self.get_database(
            token=token,
            keyspace=keyspace,
            spawn_api_options=spawn_api_options,
        ).to_async()

    def find_embedding_providers(
        self,
        *,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> FindEmbeddingProvidersResult:
        """
        Query the API for the full information on available embedding providers.

        Args:
            database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `database_admin_timeout_ms`.
            timeout_ms: an alias for `database_admin_timeout_ms`.

        Returns:
            A `FindEmbeddingProvidersResult` object with the complete information
            returned by the API about available embedding providers

        Example (output abridged and indented for clarity):
            >>> admin_for_my_db.find_embedding_providers()
            FindEmbeddingProvidersResult(embedding_providers=..., openai, ...)
            >>> admin_for_my_db.find_embedding_providers().embedding_providers
            {
                'openai': EmbeddingProvider(
                    display_name='OpenAI',
                    models=[
                        EmbeddingProviderModel(name='text-embedding-3-small'),
                        ...
                    ]
                ),
                ...
            }
        """

        _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
            timeout_options=self.api_options.timeout_options,
            database_admin_timeout_ms=database_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info("findEmbeddingProviders")
        fe_response = self._api_commander.request(
            payload={"findEmbeddingProviders": {}},
            timeout_context=_TimeoutContext(
                request_ms=_database_admin_timeout_ms, label=_da_label
            ),
        )
        if "embeddingProviders" not in fe_response.get("status", {}):
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from findEmbeddingProviders API command.",
                raw_response=fe_response,
            )
        else:
            logger.info("finished findEmbeddingProviders")
            return FindEmbeddingProvidersResult._from_dict(fe_response["status"])

    async def async_find_embedding_providers(
        self,
        *,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> FindEmbeddingProvidersResult:
        """
        Query the API for the full information on available embedding providers.
        Async version of the method, for use in an asyncio context.

        Args:
            database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `database_admin_timeout_ms`.
            timeout_ms: an alias for `database_admin_timeout_ms`.

        Returns:
            A `FindEmbeddingProvidersResult` object with the complete information
            returned by the API about available embedding providers

        Example (output abridged and indented for clarity):
            >>> admin_for_my_db.find_embedding_providers()
            FindEmbeddingProvidersResult(embedding_providers=..., openai, ...)
            >>> admin_for_my_db.find_embedding_providers().embedding_providers
            {
                'openai': EmbeddingProvider(
                    display_name='OpenAI',
                    models=[
                        EmbeddingProviderModel(name='text-embedding-3-small'),
                        ...
                    ]
                ),
                ...
            }
        """

        _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
            timeout_options=self.api_options.timeout_options,
            database_admin_timeout_ms=database_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info("findEmbeddingProviders, async")
        fe_response = await self._api_commander.async_request(
            payload={"findEmbeddingProviders": {}},
            timeout_context=_TimeoutContext(
                request_ms=_database_admin_timeout_ms, label=_da_label
            ),
        )
        if "embeddingProviders" not in fe_response.get("status", {}):
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from findEmbeddingProviders API command.",
                raw_response=fe_response,
            )
        else:
            logger.info("finished findEmbeddingProviders, async")
            return FindEmbeddingProvidersResult._from_dict(fe_response["status"])

Ancestors

Methods

async def async_create_keyspace(self, name: str, *, replication_options: dict[str, Any] | None = None, update_db_keyspace: bool | None = None, keyspace_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None, **kwargs: Any) ‑> None

Create a keyspace in the database. Async version of the method, for use in an asyncio context.

Args

name
the keyspace name. If supplying a keyspace that exists already, the method call proceeds as usual, no errors are raised, and the whole invocation is a no-op.
replication_options
this dictionary can specify the options about replication of the keyspace (across database nodes). If provided, it must have a structure similar to: {"class": "SimpleStrategy", "replication_factor": 1}.
update_db_keyspace
if True, the Database or AsyncDatabase class that spawned this DatabaseAdmin, if any, gets updated to work on the newly-created keyspace starting when this method returns.
keyspace_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for keyspace_admin_timeout_ms.
timeout_ms
an alias for keyspace_admin_timeout_ms.

Note: a timeout event is no guarantee at all that the creation request has not reached the API server and is not going to be, in fact, honored.

Example

>>> admin_for_my_db.list_keyspaces()
['default_keyspace']
>>> asyncio.run(admin_for_my_db.async_create_keyspace(
...     "that_other_one"
... ))
>>> admin_for_my_db.list_leyspaces()
['default_keyspace', 'that_other_one']
Expand source code
async def async_create_keyspace(
    self,
    name: str,
    *,
    replication_options: dict[str, Any] | None = None,
    update_db_keyspace: bool | None = None,
    keyspace_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
    **kwargs: Any,
) -> None:
    """
    Create a keyspace in the database.
    Async version of the method, for use in an asyncio context.

    Args:
        name: the keyspace name. If supplying a keyspace that exists
            already, the method call proceeds as usual, no errors are
            raised, and the whole invocation is a no-op.
        replication_options: this dictionary can specify the options about
            replication of the keyspace (across database nodes). If provided,
            it must have a structure similar to:
            `{"class": "SimpleStrategy", "replication_factor": 1}`.
        update_db_keyspace: if True, the `Database` or `AsyncDatabase` class
            that spawned this DatabaseAdmin, if any, gets updated to work on
            the newly-created keyspace starting when this method returns.
        keyspace_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `keyspace_admin_timeout_ms`.
        timeout_ms: an alias for `keyspace_admin_timeout_ms`.

    Note: a timeout event is no guarantee at all that the
    creation request has not reached the API server and is not going
    to be, in fact, honored.

    Example:
        >>> admin_for_my_db.list_keyspaces()
        ['default_keyspace']
        >>> asyncio.run(admin_for_my_db.async_create_keyspace(
        ...     "that_other_one"
        ... ))
        >>> admin_for_my_db.list_leyspaces()
        ['default_keyspace', 'that_other_one']
    """

    _keyspace_admin_timeout_ms, _ka_label = _select_singlereq_timeout_ka(
        timeout_options=self.api_options.timeout_options,
        keyspace_admin_timeout_ms=keyspace_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    options = {
        k: v
        for k, v in {
            "replication": replication_options,
        }.items()
        if v
    }
    payload = {
        "createKeyspace": {
            **{"name": name},
            **({"options": options} if options else {}),
        }
    }
    logger.info("creating keyspace, async")
    cn_response = await self._api_commander.async_request(
        payload=payload,
        timeout_context=_TimeoutContext(
            request_ms=_keyspace_admin_timeout_ms, label=_ka_label
        ),
    )
    if (cn_response.get("status") or {}).get("ok") != 1:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from createKeyspace API command.",
            raw_response=cn_response,
        )
    else:
        logger.info("finished creating keyspace, async")
        if update_db_keyspace:
            self.spawner_database.use_keyspace(name)
async def async_drop_keyspace(self, name: str, *, keyspace_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> None

Drop (delete) a keyspace from the database. Async version of the method, for use in an asyncio context.

Args

name
the keyspace to delete. If it does not exist in this database, an error is raised.
keyspace_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for keyspace_admin_timeout_ms.
timeout_ms
an alias for keyspace_admin_timeout_ms.

Note: a timeout event is no guarantee at all that the deletion request has not reached the API server and is not going to be, in fact, honored.

Example

>>> admin_for_my_db.list_keyspaces()
['that_other_one', 'default_keyspace']
>>> asyncio.run(admin_for_my_db.async_drop_keyspace(
...     "that_other_one"
... ))
>>> admin_for_my_db.list_keyspaces()
['default_keyspace']
Expand source code
async def async_drop_keyspace(
    self,
    name: str,
    *,
    keyspace_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Drop (delete) a keyspace from the database.
    Async version of the method, for use in an asyncio context.

    Args:
        name: the keyspace to delete. If it does not exist in this database,
            an error is raised.
        keyspace_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `keyspace_admin_timeout_ms`.
        timeout_ms: an alias for `keyspace_admin_timeout_ms`.

    Note: a timeout event is no guarantee at all that the
    deletion request has not reached the API server and is not going
    to be, in fact, honored.

    Example:
        >>> admin_for_my_db.list_keyspaces()
        ['that_other_one', 'default_keyspace']
        >>> asyncio.run(admin_for_my_db.async_drop_keyspace(
        ...     "that_other_one"
        ... ))
        >>> admin_for_my_db.list_keyspaces()
        ['default_keyspace']
    """

    _keyspace_admin_timeout_ms, _ka_label = _select_singlereq_timeout_ka(
        timeout_options=self.api_options.timeout_options,
        keyspace_admin_timeout_ms=keyspace_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info("dropping keyspace, async")
    dn_response = await self._api_commander.async_request(
        payload={"dropKeyspace": {"name": name}},
        timeout_context=_TimeoutContext(
            request_ms=_keyspace_admin_timeout_ms, label=_ka_label
        ),
    )
    if (dn_response.get("status") or {}).get("ok") != 1:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from dropKeyspace API command.",
            raw_response=dn_response,
        )
    else:
        logger.info("finished dropping keyspace, async")
async def async_find_embedding_providers(self, *, database_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> FindEmbeddingProvidersResult

Query the API for the full information on available embedding providers. Async version of the method, for use in an asyncio context.

Args

database_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for database_admin_timeout_ms.
timeout_ms
an alias for database_admin_timeout_ms.

Returns

A FindEmbeddingProvidersResult object with the complete information returned by the API about available embedding providers Example (output abridged and indented for clarity): >>> admin_for_my_db.find_embedding_providers() FindEmbeddingProvidersResult(embedding_providers=…, openai, …) >>> admin_for_my_db.find_embedding_providers().embedding_providers { 'openai': EmbeddingProvider( display_name='OpenAI', models=[ EmbeddingProviderModel(name='text-embedding-3-small'), … ] ), … }

Expand source code
async def async_find_embedding_providers(
    self,
    *,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> FindEmbeddingProvidersResult:
    """
    Query the API for the full information on available embedding providers.
    Async version of the method, for use in an asyncio context.

    Args:
        database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `database_admin_timeout_ms`.
        timeout_ms: an alias for `database_admin_timeout_ms`.

    Returns:
        A `FindEmbeddingProvidersResult` object with the complete information
        returned by the API about available embedding providers

    Example (output abridged and indented for clarity):
        >>> admin_for_my_db.find_embedding_providers()
        FindEmbeddingProvidersResult(embedding_providers=..., openai, ...)
        >>> admin_for_my_db.find_embedding_providers().embedding_providers
        {
            'openai': EmbeddingProvider(
                display_name='OpenAI',
                models=[
                    EmbeddingProviderModel(name='text-embedding-3-small'),
                    ...
                ]
            ),
            ...
        }
    """

    _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
        timeout_options=self.api_options.timeout_options,
        database_admin_timeout_ms=database_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info("findEmbeddingProviders, async")
    fe_response = await self._api_commander.async_request(
        payload={"findEmbeddingProviders": {}},
        timeout_context=_TimeoutContext(
            request_ms=_database_admin_timeout_ms, label=_da_label
        ),
    )
    if "embeddingProviders" not in fe_response.get("status", {}):
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from findEmbeddingProviders API command.",
            raw_response=fe_response,
        )
    else:
        logger.info("finished findEmbeddingProviders, async")
        return FindEmbeddingProvidersResult._from_dict(fe_response["status"])
async def async_list_keyspaces(self, *, keyspace_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> list[str]

Query the API for a list of the keyspaces in the database. Async version of the method, for use in an asyncio context.

Args

keyspace_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for keyspace_admin_timeout_ms.
timeout_ms
an alias for keyspace_admin_timeout_ms.

Returns

A list of the keyspaces, each a string, in no particular order.

Example

>>> asyncio.run(admin_for_my_db.async_list_keyspaces())
['default_keyspace', 'staging_keyspace']
Expand source code
async def async_list_keyspaces(
    self,
    *,
    keyspace_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> list[str]:
    """
    Query the API for a list of the keyspaces in the database.
    Async version of the method, for use in an asyncio context.

    Args:
        keyspace_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `keyspace_admin_timeout_ms`.
        timeout_ms: an alias for `keyspace_admin_timeout_ms`.

    Returns:
        A list of the keyspaces, each a string, in no particular order.

    Example:
        >>> asyncio.run(admin_for_my_db.async_list_keyspaces())
        ['default_keyspace', 'staging_keyspace']
    """

    _keyspace_admin_timeout_ms, _ka_label = _select_singlereq_timeout_ka(
        timeout_options=self.api_options.timeout_options,
        keyspace_admin_timeout_ms=keyspace_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info("getting list of keyspaces, async")
    fn_response = await self._api_commander.async_request(
        payload={"findKeyspaces": {}},
        timeout_context=_TimeoutContext(
            request_ms=_keyspace_admin_timeout_ms, label=_ka_label
        ),
    )
    if "keyspaces" not in fn_response.get("status", {}):
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from findKeyspaces API command.",
            raw_response=fn_response,
        )
    else:
        logger.info("finished getting list of keyspaces, async")
        return fn_response["status"]["keyspaces"]  # type: ignore[no-any-return]
def create_keyspace(self, name: str, *, replication_options: dict[str, Any] | None = None, update_db_keyspace: bool | None = None, keyspace_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None, **kwargs: Any) ‑> None

Create a keyspace in the database.

Args

name
the keyspace name. If supplying a keyspace that exists already, the method call proceeds as usual, no errors are raised, and the whole invocation is a no-op.
replication_options
this dictionary can specify the options about replication of the keyspace (across database nodes). If provided, it must have a structure similar to: {"class": "SimpleStrategy", "replication_factor": 1}.
update_db_keyspace
if True, the Database or AsyncDatabase class that spawned this DatabaseAdmin, if any, gets updated to work on the newly-created keyspace starting when this method returns.
keyspace_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for keyspace_admin_timeout_ms.
timeout_ms
an alias for keyspace_admin_timeout_ms.

Note: a timeout event is no guarantee at all that the creation request has not reached the API server and is not going to be, in fact, honored.

Example

>>> admin_for_my_db.list_keyspaces()
['default_keyspace']
>>> admin_for_my_db.create_keyspace("that_other_one")
>>> admin_for_my_db.list_keyspaces()
['default_keyspace', 'that_other_one']
Expand source code
def create_keyspace(
    self,
    name: str,
    *,
    replication_options: dict[str, Any] | None = None,
    update_db_keyspace: bool | None = None,
    keyspace_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
    **kwargs: Any,
) -> None:
    """
    Create a keyspace in the database.

    Args:
        name: the keyspace name. If supplying a keyspace that exists
            already, the method call proceeds as usual, no errors are
            raised, and the whole invocation is a no-op.
        replication_options: this dictionary can specify the options about
            replication of the keyspace (across database nodes). If provided,
            it must have a structure similar to:
            `{"class": "SimpleStrategy", "replication_factor": 1}`.
        update_db_keyspace: if True, the `Database` or `AsyncDatabase` class
            that spawned this DatabaseAdmin, if any, gets updated to work on
            the newly-created keyspace starting when this method returns.
        keyspace_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `keyspace_admin_timeout_ms`.
        timeout_ms: an alias for `keyspace_admin_timeout_ms`.

    Note: a timeout event is no guarantee at all that the
    creation request has not reached the API server and is not going
    to be, in fact, honored.

    Example:
        >>> admin_for_my_db.list_keyspaces()
        ['default_keyspace']
        >>> admin_for_my_db.create_keyspace("that_other_one")
        >>> admin_for_my_db.list_keyspaces()
        ['default_keyspace', 'that_other_one']
    """

    _keyspace_admin_timeout_ms, _ka_label = _select_singlereq_timeout_ka(
        timeout_options=self.api_options.timeout_options,
        keyspace_admin_timeout_ms=keyspace_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    options = {
        k: v
        for k, v in {
            "replication": replication_options,
        }.items()
        if v
    }
    payload = {
        "createKeyspace": {
            **{"name": name},
            **({"options": options} if options else {}),
        }
    }
    logger.info("creating keyspace")
    cn_response = self._api_commander.request(
        payload=payload,
        timeout_context=_TimeoutContext(
            request_ms=_keyspace_admin_timeout_ms, label=_ka_label
        ),
    )
    if (cn_response.get("status") or {}).get("ok") != 1:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from createKeyspace API command.",
            raw_response=cn_response,
        )
    else:
        logger.info("finished creating keyspace")
        if update_db_keyspace:
            self.spawner_database.use_keyspace(name)
def drop_keyspace(self, name: str, *, keyspace_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> None

Drop (delete) a keyspace from the database.

Args

name
the keyspace to delete. If it does not exist in this database, an error is raised.
keyspace_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for keyspace_admin_timeout_ms.
timeout_ms
an alias for keyspace_admin_timeout_ms.

Note: a timeout event is no guarantee at all that the deletion request has not reached the API server and is not going to be, in fact, honored.

Example

>>> admin_for_my_db.list_keyspaces()
['default_keyspace', 'that_other_one']
>>> admin_for_my_db.drop_keyspace("that_other_one")
>>> admin_for_my_db.list_keyspaces()
['default_keyspace']
Expand source code
def drop_keyspace(
    self,
    name: str,
    *,
    keyspace_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Drop (delete) a keyspace from the database.

    Args:
        name: the keyspace to delete. If it does not exist in this database,
            an error is raised.
        keyspace_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `keyspace_admin_timeout_ms`.
        timeout_ms: an alias for `keyspace_admin_timeout_ms`.

    Note: a timeout event is no guarantee at all that the
    deletion request has not reached the API server and is not going
    to be, in fact, honored.

    Example:
        >>> admin_for_my_db.list_keyspaces()
        ['default_keyspace', 'that_other_one']
        >>> admin_for_my_db.drop_keyspace("that_other_one")
        >>> admin_for_my_db.list_keyspaces()
        ['default_keyspace']
    """

    _keyspace_admin_timeout_ms, _ka_label = _select_singlereq_timeout_ka(
        timeout_options=self.api_options.timeout_options,
        keyspace_admin_timeout_ms=keyspace_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info("dropping keyspace")
    dn_response = self._api_commander.request(
        payload={"dropKeyspace": {"name": name}},
        timeout_context=_TimeoutContext(
            request_ms=_keyspace_admin_timeout_ms, label=_ka_label
        ),
    )
    if (dn_response.get("status") or {}).get("ok") != 1:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from dropKeyspace API command.",
            raw_response=dn_response,
        )
    else:
        logger.info("finished dropping keyspace")
def find_embedding_providers(self, *, database_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> FindEmbeddingProvidersResult

Query the API for the full information on available embedding providers.

Args

database_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for database_admin_timeout_ms.
timeout_ms
an alias for database_admin_timeout_ms.

Returns

A FindEmbeddingProvidersResult object with the complete information returned by the API about available embedding providers Example (output abridged and indented for clarity): >>> admin_for_my_db.find_embedding_providers() FindEmbeddingProvidersResult(embedding_providers=…, openai, …) >>> admin_for_my_db.find_embedding_providers().embedding_providers { 'openai': EmbeddingProvider( display_name='OpenAI', models=[ EmbeddingProviderModel(name='text-embedding-3-small'), … ] ), … }

Expand source code
def find_embedding_providers(
    self,
    *,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> FindEmbeddingProvidersResult:
    """
    Query the API for the full information on available embedding providers.

    Args:
        database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `database_admin_timeout_ms`.
        timeout_ms: an alias for `database_admin_timeout_ms`.

    Returns:
        A `FindEmbeddingProvidersResult` object with the complete information
        returned by the API about available embedding providers

    Example (output abridged and indented for clarity):
        >>> admin_for_my_db.find_embedding_providers()
        FindEmbeddingProvidersResult(embedding_providers=..., openai, ...)
        >>> admin_for_my_db.find_embedding_providers().embedding_providers
        {
            'openai': EmbeddingProvider(
                display_name='OpenAI',
                models=[
                    EmbeddingProviderModel(name='text-embedding-3-small'),
                    ...
                ]
            ),
            ...
        }
    """

    _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
        timeout_options=self.api_options.timeout_options,
        database_admin_timeout_ms=database_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info("findEmbeddingProviders")
    fe_response = self._api_commander.request(
        payload={"findEmbeddingProviders": {}},
        timeout_context=_TimeoutContext(
            request_ms=_database_admin_timeout_ms, label=_da_label
        ),
    )
    if "embeddingProviders" not in fe_response.get("status", {}):
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from findEmbeddingProviders API command.",
            raw_response=fe_response,
        )
    else:
        logger.info("finished findEmbeddingProviders")
        return FindEmbeddingProvidersResult._from_dict(fe_response["status"])
def get_async_database(self, *, keyspace: str | None = None, token: str | TokenProvider | UnsetType = (unset), spawn_api_options: APIOptions | UnsetType = (unset)) ‑> AsyncDatabase

Create an AsyncDatabase instance for a specific database, to be used when doing data-level work (such as creating/managing collections).

Args

keyspace
an optional keyspace to set in the resulting AsyncDatabase. If not set, the keyspace remains unspecified and must be set later with the use_keyspace method.
token
if supplied, is passed to the AsyncDatabase instead of the one set for this object. Useful if one wants to work in a least-privilege manner, limiting the permissions for non-admin work. This can be either a literal token string or a subclass of TokenProvider.
spawn_api_options
a specification - complete or partial - of the API Options to override the defaults inherited from the database admin. This allows for a deeper configuration of the database admin, e.g. concerning timeouts; if this is passed together with the named timeout parameters, the latter will take precedence in their respective settings.

Returns

An AsyncDatabase object, ready to be used for working with data and collections.

Note

creating an instance of AsyncDatabase does not trigger actual creation of the database itself, which should exist beforehand.

Expand source code
def get_async_database(
    self,
    *,
    keyspace: str | None = None,
    token: str | TokenProvider | UnsetType = _UNSET,
    spawn_api_options: APIOptions | UnsetType = _UNSET,
) -> AsyncDatabase:
    """
    Create an AsyncDatabase instance for a specific database, to be used
    when doing data-level work (such as creating/managing collections).

    Args:
        keyspace: an optional keyspace to set in the resulting AsyncDatabase.
            If not set, the keyspace remains unspecified and must be set later
            with the `use_keyspace` method.
        token: if supplied, is passed to the AsyncDatabase instead of
            the one set for this object. Useful if one wants to work in
            a least-privilege manner, limiting the permissions for non-admin work.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        spawn_api_options: a specification - complete or partial - of the
            API Options to override the defaults inherited from the database admin.
            This allows for a deeper configuration of the database admin, e.g.
            concerning timeouts; if this is passed together with
            the named timeout parameters, the latter will take precedence
            in their respective settings.

    Returns:
        An AsyncDatabase object, ready to be used for working with
        data and collections.

    Note:
        creating an instance of AsyncDatabase does not trigger actual creation
        of the database itself, which should exist beforehand.
    """

    return self.get_database(
        token=token,
        keyspace=keyspace,
        spawn_api_options=spawn_api_options,
    ).to_async()
def get_database(self, *, keyspace: str | None = None, token: str | TokenProvider | UnsetType = (unset), spawn_api_options: APIOptions | UnsetType = (unset)) ‑> Database

Create a Database instance for a specific database, to be used when doing data-level work (such as creating/managing collections).

Args

keyspace
an optional keyspace to set in the resulting Database. If not set, the keyspace remains unspecified and must be set later with the use_keyspace method.
token
if supplied, is passed to the Database instead of the one set for this object. Useful if one wants to work in a least-privilege manner, limiting the permissions for non-admin work. This can be either a literal token string or a subclass of TokenProvider.
spawn_api_options
a specification - complete or partial - of the API Options to override the defaults inherited from the database admin. This allows for a deeper configuration of the database admin, e.g. concerning timeouts; if this is passed together with the named timeout parameters, the latter will take precedence in their respective settings.

Returns

A Database object, ready to be used for working with data and collections.

Example

>>> my_db = admin_for_my_db.get_database()
>>> my_db.list_collection_names()
['movies', 'another_collection']

Note

creating an instance of Database does not trigger actual creation of the database itself, which should exist beforehand.

Expand source code
def get_database(
    self,
    *,
    keyspace: str | None = None,
    token: str | TokenProvider | UnsetType = _UNSET,
    spawn_api_options: APIOptions | UnsetType = _UNSET,
) -> Database:
    """
    Create a Database instance for a specific database, to be used
    when doing data-level work (such as creating/managing collections).

    Args:
        keyspace: an optional keyspace to set in the resulting Database.
            If not set, the keyspace remains unspecified and must be set later
            with the `use_keyspace` method.
        token: if supplied, is passed to the Database instead of
            the one set for this object. Useful if one wants to work in
            a least-privilege manner, limiting the permissions for non-admin work.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        spawn_api_options: a specification - complete or partial - of the
            API Options to override the defaults inherited from the database admin.
            This allows for a deeper configuration of the database admin, e.g.
            concerning timeouts; if this is passed together with
            the named timeout parameters, the latter will take precedence
            in their respective settings.

    Returns:
        A Database object, ready to be used for working with data and collections.

    Example:
        >>> my_db = admin_for_my_db.get_database()
        >>> my_db.list_collection_names()
        ['movies', 'another_collection']

    Note:
        creating an instance of Database does not trigger actual creation
        of the database itself, which should exist beforehand.
    """

    # lazy importing here to avoid circular dependency
    from astrapy import Database

    # this multiple-override implements the alias on timeout params
    resulting_api_options = self.api_options.with_override(
        spawn_api_options,
    ).with_override(
        APIOptions(
            token=token,
        ),
    )

    return Database(
        api_endpoint=self.api_endpoint,
        keyspace=keyspace,
        api_options=resulting_api_options,
    )
def list_keyspaces(self, *, keyspace_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> list[str]

Query the API for a list of the keyspaces in the database.

Args

keyspace_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for keyspace_admin_timeout_ms.
timeout_ms
an alias for keyspace_admin_timeout_ms.

Returns

A list of the keyspaces, each a string, in no particular order.

Example

>>> admin_for_my_db.list_keyspaces()
['default_keyspace', 'staging_keyspace']
Expand source code
def list_keyspaces(
    self,
    *,
    keyspace_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> list[str]:
    """
    Query the API for a list of the keyspaces in the database.

    Args:
        keyspace_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `keyspace_admin_timeout_ms`.
        timeout_ms: an alias for `keyspace_admin_timeout_ms`.

    Returns:
        A list of the keyspaces, each a string, in no particular order.

    Example:
        >>> admin_for_my_db.list_keyspaces()
        ['default_keyspace', 'staging_keyspace']
    """

    _keyspace_admin_timeout_ms, _ka_label = _select_singlereq_timeout_ka(
        timeout_options=self.api_options.timeout_options,
        keyspace_admin_timeout_ms=keyspace_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info("getting list of keyspaces")
    fn_response = self._api_commander.request(
        payload={"findKeyspaces": {}},
        timeout_context=_TimeoutContext(
            request_ms=_keyspace_admin_timeout_ms, label=_ka_label
        ),
    )
    if "keyspaces" not in fn_response.get("status", {}):
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from findKeyspaces API command.",
            raw_response=fn_response,
        )
    else:
        logger.info("finished getting list of keyspaces")
        return fn_response["status"]["keyspaces"]  # type: ignore[no-any-return]
def with_options(self, *, token: str | TokenProvider | UnsetType = (unset), api_options: APIOptions | UnsetType = (unset)) ‑> DataAPIDatabaseAdmin

Create a clone of this DataAPIDatabaseAdmin with some changed attributes.

Args

token
an access token with enough permission to perform admin tasks. This can be either a literal token string or a subclass of TokenProvider.
api_options
any additional options to set for the clone, in the form of an APIOptions instance (where one can set just the needed attributes). In case the same setting is also provided as named parameter, the latter takes precedence.

Returns

a new DataAPIDatabaseAdmin instance.

Example

>>> admin_for_my_other_db = admin_for_my_db.with_options(
...     api_endpoint="http://10.1.1.5:8181",
... )
Expand source code
def with_options(
    self,
    *,
    token: str | TokenProvider | UnsetType = _UNSET,
    api_options: APIOptions | UnsetType = _UNSET,
) -> DataAPIDatabaseAdmin:
    """
    Create a clone of this DataAPIDatabaseAdmin with some changed attributes.

    Args:
        token: an access token with enough permission to perform admin tasks.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        api_options: any additional options to set for the clone, in the form of
            an APIOptions instance (where one can set just the needed attributes).
            In case the same setting is also provided as named parameter,
            the latter takes precedence.

    Returns:
        a new DataAPIDatabaseAdmin instance.

    Example:
        >>> admin_for_my_other_db = admin_for_my_db.with_options(
        ...     api_endpoint="http://10.1.1.5:8181",
        ... )
    """

    return self._copy(
        token=token,
        api_options=api_options,
    )
class Database (*, api_endpoint: str, keyspace: str | None, api_options: FullAPIOptions)

A Data API database. This is the object for doing database-level DML, such as creating/deleting collections, and for obtaining Collection objects themselves. This class has a synchronous interface.

This class is not meant for direct instantiation by the user, rather it is obtained by invoking methods such as get_database of AstraDBClient.

On Astra DB, a Database comes with an "API Endpoint", which implies a Database object instance reaches a specific region (relevant point in case of multi-region databases).

A Database is also always set with a "working keyspace" on which all data operations are done (unless otherwise specified).

Args

api_endpoint
the full "API Endpoint" string used to reach the Data API. Example: "https://-.apps.astra.datastax.com"
keyspace
this is the keyspace all method calls will target, unless one is explicitly specified in the call. If no keyspace is supplied when creating a Database, on Astra DB the name "default_keyspace" is set, while on other environments the keyspace is left unspecified: in this case, most operations are unavailable until a keyspace is set (through an explicit use_keyspace invocation or equivalent).
api_options
a complete specification of the API Options for this instance.

Example

>>> from astrapy import DataAPIClient
>>> my_client = astrapy.DataAPIClient()
>>> my_db = my_client.get_database(
...     "https://01234567-....apps.astra.datastax.com",
...     token="AstraCS:...",
... )

Note

creating an instance of Database does not trigger actual creation of the database itself, which should exist beforehand. To create databases, see the AstraDBAdmin class.

Expand source code
class Database:
    """
    A Data API database. This is the object for doing database-level
    DML, such as creating/deleting collections, and for obtaining Collection
    objects themselves. This class has a synchronous interface.

    This class is not meant for direct instantiation by the user, rather
    it is obtained by invoking methods such as `get_database`
    of AstraDBClient.

    On Astra DB, a Database comes with an "API Endpoint", which implies
    a Database object instance reaches a specific region (relevant point in
    case of multi-region databases).

    A Database is also always set with a "working keyspace" on which all
    data operations are done (unless otherwise specified).

    Args:
        api_endpoint: the full "API Endpoint" string used to reach the Data API.
            Example: "https://<database_id>-<region>.apps.astra.datastax.com"
        keyspace: this is the keyspace all method calls will target, unless
            one is explicitly specified in the call. If no keyspace is supplied
            when creating a Database, on Astra DB the name "default_keyspace" is set,
            while on other environments the keyspace is left unspecified: in this case,
            most operations are unavailable until a keyspace is set (through an explicit
            `use_keyspace` invocation or equivalent).
        api_options: a complete specification of the API Options for this instance.

    Example:
        >>> from astrapy import DataAPIClient
        >>> my_client = astrapy.DataAPIClient()
        >>> my_db = my_client.get_database(
        ...     "https://01234567-....apps.astra.datastax.com",
        ...     token="AstraCS:...",
        ... )

    Note:
        creating an instance of Database does not trigger actual creation
        of the database itself, which should exist beforehand. To create databases,
        see the AstraDBAdmin class.
    """

    def __init__(
        self,
        *,
        api_endpoint: str,
        keyspace: str | None,
        api_options: FullAPIOptions,
    ) -> None:
        self.api_options = api_options
        self.api_endpoint = api_endpoint.strip("/")
        # enforce defaults if on Astra DB:
        self._using_keyspace: str | None
        if (
            keyspace is None
            and self.api_options.environment in Environment.astra_db_values
        ):
            self._using_keyspace = DEFAULT_ASTRA_DB_KEYSPACE
        else:
            self._using_keyspace = keyspace

        self._commander_headers = {
            DEFAULT_DATA_API_AUTH_HEADER: self.api_options.token.get_token(),
            **self.api_options.database_additional_headers,
        }
        self._name: str | None = None
        self._api_commander = self._get_api_commander(keyspace=self.keyspace)

    def __getattr__(self, collection_name: str) -> Collection[DefaultDocumentType]:
        return self.get_collection(name=collection_name)

    def __getitem__(self, collection_name: str) -> Collection[DefaultDocumentType]:
        return self.get_collection(name=collection_name)

    def __repr__(self) -> str:
        ep_desc = f'api_endpoint="{self.api_endpoint}"'
        keyspace_desc: str | None
        if self._using_keyspace is None:
            keyspace_desc = "keyspace not set"
        else:
            keyspace_desc = f'keyspace="{self._using_keyspace}"'
        api_options_desc = f"api_options={self.api_options}"
        parts = [
            pt for pt in [ep_desc, keyspace_desc, api_options_desc] if pt is not None
        ]
        return f"{self.__class__.__name__}({', '.join(parts)})"

    def __eq__(self, other: Any) -> bool:
        if isinstance(other, Database):
            return all(
                [
                    self.api_endpoint == other.api_endpoint,
                    self.keyspace == other.keyspace,
                    self.api_options == other.api_options,
                ]
            )
        else:
            return False

    def _get_api_commander(self, keyspace: str | None) -> APICommander | None:
        """
        Instantiate a new APICommander based on the properties of this class
        and a provided keyspace.

        If keyspace is None, return None (signaling a "keyspace not set").
        """

        if keyspace is None:
            return None
        else:
            base_path_components = [
                comp
                for comp in (
                    ncomp.strip("/")
                    for ncomp in (
                        self.api_options.data_api_url_options.api_path,
                        self.api_options.data_api_url_options.api_version,
                        keyspace,
                    )
                    if ncomp is not None
                )
                if comp != ""
            ]
            base_path = f"/{'/'.join(base_path_components)}"
            api_commander = APICommander(
                api_endpoint=self.api_endpoint,
                path=base_path,
                headers=self._commander_headers,
                callers=self.api_options.callers,
                redacted_header_names=self.api_options.redacted_header_names,
            )
            return api_commander

    def _get_driver_commander(self, keyspace: str | None) -> APICommander:
        """
        Building on _get_api_commander, fall back to class keyspace in
        creating/returning a commander, and in any case raise an error if not set.
        """
        driver_commander: APICommander | None
        if keyspace:
            driver_commander = self._get_api_commander(keyspace=keyspace)
        else:
            driver_commander = self._api_commander
        if driver_commander is None:
            raise ValueError(
                "No keyspace specified. This operation requires a keyspace to "
                "be set, e.g. through the `use_keyspace` method."
            )
        return driver_commander

    def _copy(
        self,
        *,
        keyspace: str | None = None,
        token: str | TokenProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> Database:
        arg_api_options = APIOptions(
            token=token,
        )
        final_api_options = self.api_options.with_override(api_options).with_override(
            arg_api_options
        )
        return Database(
            api_endpoint=self.api_endpoint,
            keyspace=keyspace or self.keyspace,
            api_options=final_api_options,
        )

    def with_options(
        self,
        *,
        keyspace: str | None = None,
        token: str | TokenProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> Database:
        """
        Create a clone of this database with some changed attributes.

        Args:
            keyspace: this is the keyspace all method calls will target, unless
                one is explicitly specified in the call. If no keyspace is supplied
                when creating a Database, the name "default_keyspace" is set.
            token: an Access Token to the database. Example: `"AstraCS:xyz..."`.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            api_options: any additional options to set for the clone, in the form of
                an APIOptions instance (where one can set just the needed attributes).
                In case the same setting is also provided as named parameter,
                the latter takes precedence.

        Returns:
            a new `Database` instance.

        Example:
            >>> my_db_2 = my_db.with_options(
            ...     keyspace="the_other_keyspace",
            ...     token="AstraCS:xyz...",
            ... )
        """

        return self._copy(
            keyspace=keyspace,
            token=token,
            api_options=api_options,
        )

    def to_async(
        self,
        *,
        keyspace: str | None = None,
        token: str | TokenProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncDatabase:
        """
        Create an AsyncDatabase from this one. Save for the arguments
        explicitly provided as overrides, everything else is kept identical
        to this database in the copy.

        Args:
            keyspace: this is the keyspace all method calls will target, unless
                one is explicitly specified in the call. If no keyspace is supplied
                when creating a Database, the name "default_keyspace" is set.
            token: an Access Token to the database. Example: "AstraCS:xyz..."
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            api_options: any additional options to set for the result, in the form of
                an APIOptions instance (where one can set just the needed attributes).
                In case the same setting is also provided as named parameter,
                the latter takes precedence.

        Returns:
            the new copy, an `AsyncDatabase` instance.

        Example:
            >>> async_database = my_db.to_async()
            >>> asyncio.run(async_database.list_collection_names())
        """

        arg_api_options = APIOptions(
            token=token,
        )
        final_api_options = self.api_options.with_override(api_options).with_override(
            arg_api_options
        )
        return AsyncDatabase(
            api_endpoint=self.api_endpoint,
            keyspace=keyspace or self.keyspace,
            api_options=final_api_options,
        )

    def use_keyspace(self, keyspace: str) -> None:
        """
        Switch to a new working keyspace for this database.
        This method changes (mutates) the Database instance.

        Note that this method does not create the keyspace, which should exist
        already (created for instance with a `DatabaseAdmin.create_keyspace` call).

        Args:
            keyspace: the new keyspace to use as the database working keyspace.

        Returns:
            None.

        Example:
            >>> my_db.list_collection_names()
            ['coll_1', 'coll_2']
            >>> my_db.use_keyspace("an_empty_keyspace")
            >>> my_db.list_collection_names()
            []
        """
        logger.info(f"switching to keyspace '{keyspace}'")
        self._using_keyspace = keyspace
        self._api_commander = self._get_api_commander(keyspace=self.keyspace)

    def info(
        self,
        *,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> AstraDBDatabaseInfo:
        """
        Additional information on the database as an AstraDBDatabaseInfo instance.

        Some of the returned properties are dynamic throughout the lifetime
        of the database (such as raw_info["keyspaces"]). For this reason,
        each invocation of this method triggers a new request to the DevOps API.

        Args:
            database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `database_admin_timeout_ms`.
            timeout_ms: an alias for `database_admin_timeout_ms`.

        Example:
            >>> my_db.info().region
            'eu-west-1'

            >>> my_db.info().raw_info['datacenters'][0]['dateCreated']
            '2023-01-30T12:34:56Z'

        Note:
            see the AstraDBDatabaseInfo documentation for a caveat about the difference
            between the `region` and the `raw["region"]` attributes.
        """

        if self.api_options.environment not in Environment.astra_db_values:
            raise InvalidEnvironmentException(
                "Environments outside of Astra DB are not supported."
            )

        _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
            timeout_options=self.api_options.timeout_options,
            database_admin_timeout_ms=database_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info("getting database info")
        database_info = fetch_database_info(
            self.api_endpoint,
            keyspace=self.keyspace,
            request_timeout_ms=_database_admin_timeout_ms,
            api_options=self.api_options,
        )
        if database_info is not None:
            logger.info("finished getting database info")
            return database_info
        else:
            raise DevOpsAPIException("Failure while fetching database info.")

    @property
    def id(self) -> str:
        """
        The ID of this database.

        Example:
            >>> my_db.id
            '01234567-89ab-cdef-0123-456789abcdef'
        """

        parsed_api_endpoint = parse_api_endpoint(self.api_endpoint)
        if parsed_api_endpoint is not None:
            return parsed_api_endpoint.database_id
        else:
            raise DevOpsAPIException(
                "Database is not in a supported environment for this operation."
            )

    @property
    def region(self) -> str:
        """
        The region where this database is located.

        The region is still well defined in case of multi-region databases,
        since a Database instance connects to exactly one of the regions
        (as specified by the API Endpoint).

        Example:
            >>> my_db.region
            'us-west-2'
        """

        parsed_api_endpoint = parse_api_endpoint(self.api_endpoint)
        if parsed_api_endpoint is not None:
            return parsed_api_endpoint.region
        else:
            raise DevOpsAPIException(
                "Database is not in a supported environment for this operation."
            )

    def name(self) -> str:
        """
        The name of this database. Note that this bears no unicity guarantees.

        Calling this method the first time involves a request
        to the DevOps API (the resulting database name is then cached).
        See the `info()` method for more details.

        Example:
            >>> my_db.name()
            'the_application_database'
        """

        if self._name is None:
            self._name = self.info().name
        return self._name

    @property
    def keyspace(self) -> str | None:
        """
        The keyspace this database uses as target for all commands when
        no method-call-specific keyspace is specified.

        Returns:
            the working keyspace (a string), or None if not set.

        Example:
            >>> my_db.keyspace
            'the_keyspace'
        """

        return self._using_keyspace

    @overload
    def get_collection(
        self,
        name: str,
        *,
        keyspace: str | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> Collection[DefaultDocumentType]: ...

    @overload
    def get_collection(
        self,
        name: str,
        *,
        document_type: type[DOC],
        keyspace: str | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> Collection[DOC]: ...

    def get_collection(
        self,
        name: str,
        *,
        document_type: type[Any] = DefaultDocumentType,
        keyspace: str | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> Collection[DOC]:
        """
        Spawn a `Collection` object instance representing a collection
        on this database.

        Creating a `Collection` instance does not have any effect on the
        actual state of the database: in other words, for the created
        `Collection` instance to be used meaningfully, the collection
        must exist already (for instance, it should have been created
        previously by calling the `create_collection` method).

        Args:
            name: the name of the collection.
            document_type: this parameter acts a formal specifier for the type checker.
                If omitted, the resulting Collection is implicitly
                a `Collection[dict[str, Any]]`. If provided, it must match the
                type hint specified in the assignment.
                See the examples below.
            keyspace: the keyspace containing the collection. If no keyspace
                is specified, the general setting for this database is used.
            embedding_api_key: optional API key(s) for interacting with the collection.
                If an embedding service is configured, and this parameter is not None,
                each Data API call will include the necessary embedding-related headers
                as specified by this parameter. If a string is passed, it translates
                into the one "embedding api key" header
                (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
                For some vectorize providers/models, if using header-based
                authentication, specialized subclasses of
                `astrapy.authentication.EmbeddingHeadersProvider` should be supplied.
            spawn_api_options: a specification - complete or partial - of the
                API Options to override the defaults inherited from the Database.
                This allows for a deeper configuration of the collection, e.g.
                concerning timeouts; if this is passed together with
                the named timeout parameters, the latter will take precedence
                in their respective settings.

        Returns:
            a `Collection` instance, representing the desired collection
                (but without any form of validation).

        Example:
            >>> my_col = my_db.get_collection("my_collection")
            >>> my_col.count_documents({}, upper_bound=100)
            41

        Note:
            The attribute and indexing syntax forms achieve the same effect
            as this method. In other words, the following are equivalent:
                my_db.get_collection("coll_name")
                my_db.coll_name
                my_db["coll_name"]
        """

        # lazy importing here against circular-import error
        from astrapy.collection import Collection

        resulting_api_options = self.api_options.with_override(
            spawn_api_options,
        ).with_override(
            APIOptions(
                embedding_api_key=embedding_api_key,
            ),
        )

        _keyspace = keyspace or self.keyspace
        if _keyspace is None:
            raise ValueError(
                "No keyspace specified. This operation requires a keyspace to "
                "be set, e.g. through the `use_keyspace` method."
            )
        return Collection(
            database=self,
            name=name,
            keyspace=_keyspace,
            api_options=resulting_api_options,
        )

    @overload
    def create_collection(
        self,
        name: str,
        *,
        definition: CollectionDefinition | dict[str, Any] | None = None,
        keyspace: str | None = None,
        collection_admin_timeout_ms: int | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> Collection[DefaultDocumentType]: ...

    @overload
    def create_collection(
        self,
        name: str,
        *,
        definition: CollectionDefinition | dict[str, Any] | None = None,
        document_type: type[DOC],
        keyspace: str | None = None,
        collection_admin_timeout_ms: int | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> Collection[DOC]: ...

    def create_collection(
        self,
        name: str,
        *,
        definition: CollectionDefinition | dict[str, Any] | None = None,
        document_type: type[Any] = DefaultDocumentType,
        keyspace: str | None = None,
        collection_admin_timeout_ms: int | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> Collection[DOC]:
        """
        Creates a collection on the database and return the Collection
        instance that represents it.

        This is a blocking operation: the method returns when the collection
        is ready to be used. As opposed to the `get_collection` instance,
        this method triggers causes the collection to be actually created on DB.

        Args:
            name: the name of the collection.
            definition: a complete collection definition for the table. This can be an
                instance of `CollectionDefinition` or an equivalent (nested) dictionary,
                in which case it will be parsed into a `CollectionDefinition`.
                See the `astrapy.info.CollectionDefinition` class and the
                `Collection` class for more details and ways to construct this object.
            document_type: this parameter acts a formal specifier for the type checker.
                If omitted, the resulting Collection is implicitly
                a `Collection[dict[str, Any]]`. If provided, it must match the
                type hint specified in the assignment.
                See the examples below.
            keyspace: the keyspace where the collection is to be created.
                If not specified, the general setting for this database is used.
            collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
            embedding_api_key: optional API key(s) for interacting with the collection.
                If an embedding service is configured, and this parameter is not None,
                each Data API call will include the necessary embedding-related headers
                as specified by this parameter. If a string is passed, it translates
                into the one "embedding api key" header
                (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
                For some vectorize providers/models, if using header-based authentication,
                specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
                should be supplied.
            spawn_api_options: a specification - complete or partial - of the
                API Options to override the defaults inherited from the Database.
                This allows for a deeper configuration of the collection, e.g.
                concerning timeouts; if this is passed together with
                the named timeout parameters, the latter will take precedence
                in their respective settings.

        Returns:
            a (synchronous) `Collection` instance, representing the
            newly-created collection.

        Example:
            >>> # Create a collection using the fluent syntax for its definition
            >>> from astrapy.constants import VectorMetric
            >>> from astrapy.info import CollectionDefinition
            >>>
            >>> collection_definition = (
            ...     CollectionDefinition.builder()
            ...     .set_vector_dimension(3)
            ...     .set_vector_metric(VectorMetric.DOT_PRODUCT)
            ...     .set_indexing("deny", ["annotations", "logs"])
            ...     .build()
            ... )
            >>> my_collection = database.create_collection(
            ...     "my_events",
            ...     definition=collection_definition,
            ... )

            >>>
            >>> # Create a collection with the definition as object
            >>> from astrapy.info import CollectionVectorOptions
            >>>
            >>> collection_definition_1 = CollectionDefinition(
            ...     vector=CollectionVectorOptions(
            ...         dimension=3,
            ...         metric=VectorMetric.DOT_PRODUCT,
            ...     ),
            ...     indexing={"deny": ["annotations", "logs"]},
            ... )
            >>> my_collection_1 = database.create_collection(
            ...     "my_events",
            ...     definition=collection_definition_1,
            ... )
            >>>

            >>> # Create a collection with the definition as plain dictionary
            >>> collection_definition_2 = {
            ...     "indexing": {"deny": ["annotations", "logs"]},
            ...     "vector": {
            ...         "dimension": 3,
            ...         "metric": VectorMetric.DOT_PRODUCT,
            ...     },
            ... }
            >>> my_collection_2 = database.create_collection(
            ...     "my_events",
            ...     definition=collection_definition_2,
            ... )
        """

        cc_definition: dict[str, Any] = CollectionDefinition.coerce(
            definition or {}
        ).as_dict()
        # this method has custom code to pick its timeout
        _collection_admin_timeout_ms: int
        _ca_label: str
        if collection_admin_timeout_ms is not None:
            _collection_admin_timeout_ms = collection_admin_timeout_ms
            _ca_label = "collection_admin_timeout_ms"
        else:
            _collection_admin_timeout_ms = (
                self.api_options.timeout_options.collection_admin_timeout_ms
            )
            _ca_label = "collection_admin_timeout_ms"
        driver_commander = self._get_driver_commander(keyspace=keyspace)
        cc_payload = {
            "createCollection": {
                k: v
                for k, v in {
                    "name": name,
                    "options": cc_definition,
                }.items()
                if v is not None
                if v != {}
            }
        }
        logger.info(f"createCollection('{name}')")
        cc_response = driver_commander.request(
            payload=cc_payload,
            timeout_context=_TimeoutContext(
                request_ms=_collection_admin_timeout_ms, label=_ca_label
            ),
        )
        if cc_response.get("status") != {"ok": 1}:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from createCollection API command.",
                raw_response=cc_response,
            )
        logger.info(f"finished createCollection('{name}')")
        return self.get_collection(
            name,
            document_type=document_type,
            keyspace=keyspace,
            embedding_api_key=embedding_api_key,
            spawn_api_options=spawn_api_options,
        )

    def drop_collection(
        self,
        name: str,
        *,
        keyspace: str | None = None,
        collection_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Drop a collection from the database, along with all documents therein.

        Args:
            name: the name of the collection to drop.
            keyspace: the keyspace where the collection resides. If not specified,
                the database working keyspace is assumed.
            collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `collection_admin_timeout_ms`.
            timeout_ms: an alias for `collection_admin_timeout_ms`.

        Example:
            >>> my_db.list_collection_names()
            ['a_collection', 'my_v_col', 'another_col']
            >>> my_db.drop_collection("my_v_col")
            >>> my_db.list_collection_names()
            ['a_collection', 'another_col']
        """

        _collection_admin_timeout_ms, _ca_label = _select_singlereq_timeout_ca(
            timeout_options=self.api_options.timeout_options,
            collection_admin_timeout_ms=collection_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        _keyspace = keyspace or self.keyspace
        driver_commander = self._get_driver_commander(keyspace=_keyspace)
        dc_payload = {"deleteCollection": {"name": name}}
        logger.info(f"deleteCollection('{name}')")
        dc_response = driver_commander.request(
            payload=dc_payload,
            timeout_context=_TimeoutContext(
                request_ms=_collection_admin_timeout_ms, label=_ca_label
            ),
        )
        if dc_response.get("status") != {"ok": 1}:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from deleteCollection API command.",
                raw_response=dc_response,
            )
        logger.info(f"finished deleteCollection('{name}')")
        return dc_response.get("status", {})  # type: ignore[no-any-return]

    def list_collections(
        self,
        *,
        keyspace: str | None = None,
        collection_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> list[CollectionDescriptor]:
        """
        List all collections in a given keyspace for this database.

        Args:
            keyspace: the keyspace to be inspected. If not specified,
                the general setting for this database is assumed.
            collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `collection_admin_timeout_ms`.
            timeout_ms: an alias for `collection_admin_timeout_ms`.

        Returns:
            a list of CollectionDescriptor instances one for each collection.

        Example:
            >>> coll_list = my_db.list_collections()
            >>> coll_list
            [CollectionDescriptor(name='my_v_col', options=CollectionDefinition())]
            >>> for coll_dict in my_db.list_collections():
            ...     print(coll_dict)
            ...
            CollectionDescriptor(name='my_v_col', options=CollectionDefinition())
        """

        _collection_admin_timeout_ms, _ca_label = _select_singlereq_timeout_ca(
            timeout_options=self.api_options.timeout_options,
            collection_admin_timeout_ms=collection_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        return self._list_collections_ctx(
            keyspace=keyspace,
            timeout_context=_TimeoutContext(
                request_ms=_collection_admin_timeout_ms, label=_ca_label
            ),
        )

    def _list_collections_ctx(
        self,
        *,
        keyspace: str | None,
        timeout_context: _TimeoutContext,
    ) -> list[CollectionDescriptor]:
        driver_commander = self._get_driver_commander(keyspace=keyspace)
        gc_payload = {"findCollections": {"options": {"explain": True}}}
        logger.info("findCollections")
        gc_response = driver_commander.request(
            payload=gc_payload,
            timeout_context=timeout_context,
        )
        if "collections" not in gc_response.get("status", {}):
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from findCollections API command.",
                raw_response=gc_response,
            )
        else:
            # we know this is a list of dicts, to marshal into "descriptors"
            logger.info("finished findCollections")
            return [
                CollectionDescriptor._from_dict(col_dict)
                for col_dict in gc_response["status"]["collections"]
            ]

    def list_collection_names(
        self,
        *,
        keyspace: str | None = None,
        collection_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> list[str]:
        """
        List the names of all collections in a given keyspace of this database.

        Args:
            keyspace: the keyspace to be inspected. If not specified,
                the general setting for this database is assumed.
            collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `collection_admin_timeout_ms`.
            timeout_ms: an alias for `collection_admin_timeout_ms`.

        Returns:
            a list of the collection names as strings, in no particular order.

        Example:
            >>> my_db.list_collection_names()
            ['a_collection', 'another_col']
        """

        _collection_admin_timeout_ms, _ca_label = _select_singlereq_timeout_ca(
            timeout_options=self.api_options.timeout_options,
            collection_admin_timeout_ms=collection_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        driver_commander = self._get_driver_commander(keyspace=keyspace)
        gc_payload: dict[str, Any] = {"findCollections": {}}
        logger.info("findCollections")
        gc_response = driver_commander.request(
            payload=gc_payload,
            timeout_context=_TimeoutContext(
                request_ms=_collection_admin_timeout_ms, label=_ca_label
            ),
        )
        if "collections" not in gc_response.get("status", {}):
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from findCollections API command.",
                raw_response=gc_response,
            )
        else:
            logger.info("finished findCollections")
            return gc_response["status"]["collections"]  # type: ignore[no-any-return]

    @overload
    def get_table(
        self,
        name: str,
        *,
        keyspace: str | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> Table[DefaultRowType]: ...

    @overload
    def get_table(
        self,
        name: str,
        *,
        row_type: type[ROW],
        keyspace: str | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> Table[ROW]: ...

    def get_table(
        self,
        name: str,
        *,
        row_type: type[Any] = DefaultRowType,
        keyspace: str | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> Table[ROW]:
        """
        Spawn a `Table` object instance representing a table
        on this database.

        Creating a `Table` instance does not have any effect on the
        actual state of the database: in other words, for the created
        `Table` instance to be used meaningfully, the table
        must exist already (for instance, it should have been created
        previously by calling the `create_table` method).

        Args:
            name: the name of the table.
            row_type: this parameter acts a formal specifier for the type checker.
                If omitted, the resulting Table is implicitly a `Table[dict[str, Any]]`.
                If provided, it must match the type hint specified in the assignment.
                See the examples below.
            keyspace: the keyspace containing the table. If no keyspace
                is specified, the general setting for this database is used.
            embedding_api_key: optional API key(s) for interacting with the table.
                If an embedding service is configured, and this parameter is not None,
                each Data API call will include the necessary embedding-related headers
                as specified by this parameter. If a string is passed, it translates
                into the one "embedding api key" header
                (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
                For some vectorize providers/models, if using header-based
                authentication, specialized subclasses of
                `astrapy.authentication.EmbeddingHeadersProvider` should be supplied.
            spawn_api_options: a specification - complete or partial - of the
                API Options to override the defaults inherited from the Database.
                This allows for a deeper configuration of the table, e.g.
                concerning timeouts; if this is passed together with
                the named timeout parameters, the latter will take precedence
                in their respective settings.

        Returns:
            a `Table` instance, representing the desired table
                (but without any form of validation).

        Example:
            >>> # Get a Table object (and read a property of it as an example):
            >>> my_table = database.get_table("games")
            >>> my_table.full_name
            'default_keyspace.games'
            >>>
            >>> # Get a Table object in a specific keyspace,
            >>> # and set an embedding API key to it:
            >>> my_other_table = database.get_table(
            ...     "tournaments",
            ...     keyspace="the_other_keyspace",
            ...     embedding_api_key="secret-012abc...",
            ... )
            >>>
            >>> from astrapy import Table
            >>> MyCustomDictType = dict[str, int]
            >>>
            >>> # Get a Table object typed with a specific type for its rows:
            >>> my_typed_table: Table[MyCustomDictType] = database.get_table(
            ...     "games",
            ...     row_type=MyCustomDictType,
            ... )
        """

        # lazy importing here against circular-import error
        from astrapy.table import Table

        resulting_api_options = self.api_options.with_override(
            spawn_api_options,
        ).with_override(
            APIOptions(
                embedding_api_key=embedding_api_key,
            ),
        )

        _keyspace = keyspace or self.keyspace
        if _keyspace is None:
            raise ValueError(
                "No keyspace specified. This operation requires a keyspace to "
                "be set, e.g. through the `use_keyspace` method."
            )
        return Table[ROW](
            database=self,
            name=name,
            keyspace=_keyspace,
            api_options=resulting_api_options,
        )

    @overload
    def create_table(
        self,
        name: str,
        *,
        definition: CreateTableDefinition | dict[str, Any],
        keyspace: str | None = None,
        if_not_exists: bool | None = None,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> Table[DefaultRowType]: ...

    @overload
    def create_table(
        self,
        name: str,
        *,
        definition: CreateTableDefinition | dict[str, Any],
        row_type: type[ROW],
        keyspace: str | None = None,
        if_not_exists: bool | None = None,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> Table[ROW]: ...

    def create_table(
        self,
        name: str,
        *,
        definition: CreateTableDefinition | dict[str, Any],
        row_type: type[Any] = DefaultRowType,
        keyspace: str | None = None,
        if_not_exists: bool | None = None,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> Table[ROW]:
        """
        Creates a table on the database and return the Table
        instance that represents it.

        This is a blocking operation: the method returns when the table
        is ready to be used. As opposed to the `get_table` method call,
        this method causes the table to be actually created on DB.

        Args:
            name: the name of the table.
            definition: a complete table definition for the table. This can be an
                instance of `CreateTableDefinition` or an equivalent (nested) dictionary,
                in which case it will be parsed into a `CreateTableDefinition`.
                See the `astrapy.info.CreateTableDefinition` class and the
                `Table` class for more details and ways to construct this object.
            row_type: this parameter acts a formal specifier for the type checker.
                If omitted, the resulting Table is implicitly a `Table[dict[str, Any]]`.
                If provided, it must match the type hint specified in the assignment.
                See the examples below.
            keyspace: the keyspace where the table is to be created.
                If not specified, the general setting for this database is used.
            if_not_exists: if set to True, the command will succeed even if a table
                with the specified name already exists (in which case no actual
                table creation takes place on the database). Defaults to False,
                i.e. an error is raised by the API in case of table-name collision.
            table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `table_admin_timeout_ms`.
            timeout_ms: an alias for `table_admin_timeout_ms`.
            embedding_api_key: optional API key(s) for interacting with the table.
                If an embedding service is configured, and this parameter is not None,
                each Data API call will include the necessary embedding-related headers
                as specified by this parameter. If a string is passed, it translates
                into the one "embedding api key" header
                (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
                For some vectorize providers/models, if using header-based authentication,
                specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
                should be supplied.
            spawn_api_options: a specification - complete or partial - of the
                API Options to override the defaults inherited from the Database.
                This allows for a deeper configuration of the table, e.g.
                concerning timeouts; if this is passed together with
                the named timeout parameters, the latter will take precedence
                in their respective settings.

        Returns:
            a (synchronous) `Table` instance, representing the
            newly-created table.

        Example:
            >>> # Create a table using the fluent syntax for definition
            >>> from astrapy.constants import SortMode
            >>> from astrapy.info import (
            ...     CreateTableDefinition,
            ...     ColumnType,
            ... )
            >>> table_definition = (
            ...     CreateTableDefinition.builder()
            ...     .add_column("match_id", ColumnType.TEXT)
            ...     .add_column("round", ColumnType.INT)
            ...     .add_vector_column("m_vector", dimension=3)
            ...     .add_column("score", ColumnType.INT)
            ...     .add_column("when", ColumnType.TIMESTAMP)
            ...     .add_column("winner", ColumnType.TEXT)
            ...     .add_set_column("fighters", ColumnType.UUID)
            ...     .add_partition_by(["match_id"])
            ...     .add_partition_sort({"round": SortMode.ASCENDING})
            ...     .build()
            ... )
            >>> my_table = database.create_table(
            ...     "games",
            ...     definition=table_definition,
            ... )
            >>>
            >>> # Create a table with the definition as object
            >>> # (and do not raise an error if the table exists already)
            >>> from astrapy.info import (
            ...     CreateTableDefinition,
            ...     TablePrimaryKeyDescriptor,
            ...     TableScalarColumnTypeDescriptor,
            ...     TableValuedColumnType,
            ...     TableValuedColumnTypeDescriptor,
            ...     TableVectorColumnTypeDescriptor,
            ... )
            >>> table_definition_1 = CreateTableDefinition(
            ...     columns={
            ...         "match_id": TableScalarColumnTypeDescriptor(
            ...             ColumnType.TEXT,
            ...         ),
            ...         "round": TableScalarColumnTypeDescriptor(
            ...             ColumnType.INT,
            ...         ),
            ...         "m_vector": TableVectorColumnTypeDescriptor(
            ...             column_type="vector", dimension=3
            ...         ),
            ...         "score": TableScalarColumnTypeDescriptor(
            ...             ColumnType.INT,
            ...         ),
            ...         "when": TableScalarColumnTypeDescriptor(
            ...             ColumnType.TIMESTAMP,
            ...         ),
            ...         "winner": TableScalarColumnTypeDescriptor(
            ...             ColumnType.TEXT,
            ...         ),
            ...         "fighters": TableValuedColumnTypeDescriptor(
            ...             column_type=TableValuedColumnType.SET,
            ...             value_type=ColumnType.UUID,
            ...         ),
            ...     },
            ...     primary_key=TablePrimaryKeyDescriptor(
            ...         partition_by=["match_id"],
            ...         partition_sort={"round": SortMode.ASCENDING},
            ...     ),
            ... )
            >>> my_table_1 = database.create_table(
            ...     "games",
            ...     definition=table_definition_1,
            ...     if_not_exists=True,
            ... )
            >>>
            >>> # Create a table with the definition as plain dictionary
            >>> # (and do not raise an error if the table exists already)
            >>> table_definition_2 = {
            ...     "columns": {
            ...         "match_id": {"type": "text"},
            ...         "round": {"type": "int"},
            ...         "m_vector": {"type": "vector", "dimension": 3},
            ...         "score": {"type": "int"},
            ...         "when": {"type": "timestamp"},
            ...         "winner": {"type": "text"},
            ...         "fighters": {"type": "set", "valueType": "uuid"},
            ...     },
            ...     "primaryKey": {
            ...         "partitionBy": ["match_id"],
            ...         "partitionSort": {"round": 1},
            ...     },
            ... }
            >>> my_table_2 = database.create_table(
            ...     "games",
            ...     definition=table_definition_2,
            ...     if_not_exists=True,
            ... )
        """

        ct_options: dict[str, bool]
        if if_not_exists is not None:
            ct_options = {"ifNotExists": if_not_exists}
        else:
            ct_options = {}
        ct_definition: dict[str, Any] = CreateTableDefinition.coerce(
            definition
        ).as_dict()
        _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
            timeout_options=self.api_options.timeout_options,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        driver_commander = self._get_driver_commander(keyspace=keyspace)
        ct_payload = {
            "createTable": {
                k: v
                for k, v in {
                    "name": name,
                    "definition": ct_definition,
                    "options": ct_options,
                }.items()
                if v is not None
                if v != {}
            }
        }
        logger.info(f"createTable('{name}')")
        ct_response = driver_commander.request(
            payload=ct_payload,
            timeout_context=_TimeoutContext(
                request_ms=_table_admin_timeout_ms, label=_ta_label
            ),
        )
        if ct_response.get("status") != {"ok": 1}:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from createTable API command.",
                raw_response=ct_response,
            )
        logger.info(f"finished createTable('{name}')")
        return self.get_table(
            name,
            row_type=row_type,
            keyspace=keyspace,
            embedding_api_key=embedding_api_key,
            spawn_api_options=spawn_api_options,
        )

    def drop_table_index(
        self,
        name: str,
        *,
        keyspace: str | None = None,
        if_exists: bool | None = None,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Drops (deletes) an index (of any kind) from the table it is associated to.

        This is a blocking operation: the method returns once the index
        is deleted.

        Note:
            Although associated to a table, index names are unique across a keyspace.
            For this reason, no table name is required in this call.

        Args:
            name: the name of the index.
            keyspace: the keyspace to which the index belongs.
                If not specified, the general setting for this database is used.
            if_exists: if passed as True, trying to drop a non-existing index
                will not error, just silently do nothing instead. If not provided,
                the API default behaviour will hold.
            table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `table_admin_timeout_ms`.
            timeout_ms: an alias for `table_admin_timeout_ms`.

        Example:
            >>> # Drop an index from the keyspace:
            >>> database.drop_table_index("score_index")
            >>> # Drop an index, unless it does not exist already:
            >>> database.drop_table_index("score_index", if_exists=True)
        """

        _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
            timeout_options=self.api_options.timeout_options,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        di_options: dict[str, bool]
        if if_exists is not None:
            di_options = {"ifExists": if_exists}
        else:
            di_options = {}
        di_payload = {
            "dropIndex": {
                k: v
                for k, v in {
                    "name": name,
                    "options": di_options,
                }.items()
                if v is not None
                if v != {}
            }
        }
        driver_commander = self._get_driver_commander(keyspace=keyspace)
        logger.info(f"dropIndex('{name}')")
        di_response = driver_commander.request(
            payload=di_payload,
            timeout_context=_TimeoutContext(
                request_ms=_table_admin_timeout_ms, label=_ta_label
            ),
        )
        if di_response.get("status") != {"ok": 1}:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from dropIndex API command.",
                raw_response=di_response,
            )
        logger.info(f"finished dropIndex('{name}')")

    def drop_table(
        self,
        name: str,
        *,
        keyspace: str | None = None,
        if_exists: bool | None = None,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Drop a table from the database, along with all rows therein and related indexes.

        Args:
            name: the name of the table to drop.
            keyspace: the keyspace where the table resides. If not specified,
                the database working keyspace is assumed.
            if_exists: if passed as True, trying to drop a non-existing table
                will not error, just silently do nothing instead. If not provided,
                the API default behaviour will hold.
            table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `table_admin_timeout_ms`.
            timeout_ms: an alias for `table_admin_timeout_ms`.

        Example:
            >>> database.list_table_names()
            ['fighters', 'games']
            >>> database.drop_table("fighters")
            >>> database.list_table_names()
            ['games']
            >>> # not erroring because of if_not_exists:
            >>> database.drop_table("fighters", if_not_exists=True)
        """

        _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
            timeout_options=self.api_options.timeout_options,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        _keyspace = keyspace or self.keyspace
        dt_options: dict[str, bool]
        if if_exists is not None:
            dt_options = {"ifExists": if_exists}
        else:
            dt_options = {}
        driver_commander = self._get_driver_commander(keyspace=_keyspace)
        dt_payload = {
            "dropTable": {
                k: v
                for k, v in {
                    "name": name,
                    "options": dt_options,
                }.items()
                if v is not None
                if v != {}
            }
        }
        logger.info(f"dropTable('{name}')")
        dt_response = driver_commander.request(
            payload=dt_payload,
            timeout_context=_TimeoutContext(
                request_ms=_table_admin_timeout_ms, label=_ta_label
            ),
        )
        if dt_response.get("status") != {"ok": 1}:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from dropTable API command.",
                raw_response=dt_response,
            )
        logger.info(f"finished dropTable('{name}')")
        return dt_response.get("status", {})  # type: ignore[no-any-return]

    def list_tables(
        self,
        *,
        keyspace: str | None = None,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> list[ListTableDescriptor]:
        """
        List all tables in a given keyspace for this database.

        Args:
            keyspace: the keyspace to be inspected. If not specified,
                the general setting for this database is assumed.
            table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `table_admin_timeout_ms`.
            timeout_ms: an alias for `table_admin_timeout_ms`.

        Returns:
            a list of ListTableDescriptor instances, one for each table.

        Example:
            >>> tables = my_database.list_tables()
            >>> tables
            [ListTableDescriptor(name='fighters', definition=ListTableDefinition(...
            >>> tables[1].name
            'games'
            >>> tables[1].definition.columns
            {'match_id': TableScalarColumnTypeDescriptor(ColumnType.TEXT),...
            >>> tables[1].definition.columns['score']
            TableScalarColumnTypeDescriptor(ColumnType.INT)
            >>> tables[1].definition.primary_key.partition_by
            ['match_id']
            >>> tables[1].definition.primary_key.partition_sort
            {'round': 1}
        """

        _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
            timeout_options=self.api_options.timeout_options,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        return self._list_tables_ctx(
            keyspace=keyspace,
            timeout_context=_TimeoutContext(
                request_ms=_table_admin_timeout_ms, label=_ta_label
            ),
        )

    def _list_tables_ctx(
        self,
        *,
        keyspace: str | None,
        timeout_context: _TimeoutContext,
    ) -> list[ListTableDescriptor]:
        driver_commander = self._get_driver_commander(keyspace=keyspace)
        lt_payload = {"listTables": {"options": {"explain": True}}}
        logger.info("listTables")
        lt_response = driver_commander.request(
            payload=lt_payload,
            timeout_context=timeout_context,
        )
        if "tables" not in lt_response.get("status", {}):
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from listTables API command.",
                raw_response=lt_response,
            )
        else:
            # we know this is a list of dicts, to marshal into "descriptors"
            logger.info("finished listTables")
            return [
                ListTableDescriptor.coerce(tab_dict)
                for tab_dict in lt_response["status"]["tables"]
            ]

    def list_table_names(
        self,
        *,
        keyspace: str | None = None,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> list[str]:
        """
        List the names of all tables in a given keyspace of this database.

        Args:
            keyspace: the keyspace to be inspected. If not specified,
                the general setting for this database is assumed.
            table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `table_admin_timeout_ms`.
            timeout_ms: an alias for `table_admin_timeout_ms`.

        Returns:
            a list of the table names as strings, in no particular order.

        Example:
            >>> database.list_table_names()
            ['fighters', 'games']
        """

        _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
            timeout_options=self.api_options.timeout_options,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        driver_commander = self._get_driver_commander(keyspace=keyspace)
        lt_payload: dict[str, Any] = {"listTables": {}}
        logger.info("listTables")
        lt_response = driver_commander.request(
            payload=lt_payload,
            timeout_context=_TimeoutContext(
                request_ms=_table_admin_timeout_ms, label=_ta_label
            ),
        )
        if "tables" not in lt_response.get("status", {}):
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from listTables API command.",
                raw_response=lt_response,
            )
        else:
            logger.info("finished listTables")
            return lt_response["status"]["tables"]  # type: ignore[no-any-return]

    def command(
        self,
        body: dict[str, Any],
        *,
        keyspace: str | None | UnsetType = _UNSET,
        collection_or_table_name: str | None = None,
        raise_api_errors: bool = True,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> dict[str, Any]:
        """
        Send a POST request to the Data API for this database with
        an arbitrary, caller-provided payload.

        Args:
            body: a JSON-serializable dictionary, the payload of the request.
            keyspace: the keyspace to use, if any. If a keyspace is employed,
                it is used to construct the full request URL. To run a command
                targeting no specific keyspace (rather, the database as a whole),
                pass an explicit `None`: the request URL will lack the suffix
                "/<keyspace>" component. If unspecified, the working keyspace of
                this database is used. If another keyspace is passed, it will be
                used instead of the database's working one.
            collection_or_table_name: if provided, the name is appended at the end
                of the endpoint. In this way, this method allows collection-
                and table-level arbitrary POST requests as well.
                This parameter cannot be used if `keyspace=None` is explicitly provided.
            raise_api_errors: if True, responses with a nonempty 'errors' field
                result in an astrapy exception being raised.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a dictionary with the response of the HTTP request.

        Example:
            >>> my_db.command({"findCollections": {}})
            {'status': {'collections': ['my_coll']}}
            >>> my_db.command({"countDocuments": {}}, collection_or_table_name="my_coll")
            {'status': {'count': 123}}
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        _keyspace: str | None
        if keyspace is None:
            if collection_or_table_name is not None:
                raise ValueError(
                    "Cannot pass collection_or_table_name to database "
                    "`command` on a no-keyspace command"
                )
            _keyspace = None
        else:
            if isinstance(keyspace, UnsetType):
                _keyspace = self.keyspace
            else:
                _keyspace = keyspace
        # build the ad-hoc-commander path with _keyspace and the coll.or.table
        base_path_components = [
            comp
            for comp in (
                ncomp.strip("/")
                for ncomp in (
                    self.api_options.data_api_url_options.api_path,
                    self.api_options.data_api_url_options.api_version,
                    _keyspace,
                    collection_or_table_name,
                )
                if ncomp is not None
            )
            if comp != ""
        ]
        base_path = f"/{'/'.join(base_path_components)}"
        command_commander = APICommander(
            api_endpoint=self.api_endpoint,
            path=base_path,
            headers=self._commander_headers,
            callers=self.api_options.callers,
            redacted_header_names=self.api_options.redacted_header_names,
        )

        _cmd_desc = ",".join(sorted(body.keys()))
        logger.info(f"command={_cmd_desc} on {self.__class__.__name__}")
        req_response = command_commander.request(
            payload=body,
            raise_api_errors=raise_api_errors,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"command={_cmd_desc} on {self.__class__.__name__}")
        return req_response

    def get_database_admin(
        self,
        *,
        token: str | TokenProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> DatabaseAdmin:
        """
        Return a DatabaseAdmin object corresponding to this database, for
        use in admin tasks such as managing keyspaces.

        This method, depending on the environment where the database resides,
        returns an appropriate subclass of DatabaseAdmin.

        Args:
            token: an access token with enough permission on the database to
                perform the desired tasks. If omitted (as it can generally be done),
                the token of this Database is used.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            spawn_api_options: a specification - complete or partial - of the
                API Options to override the defaults.
                This allows for a deeper configuration of the database admin, e.g.
                concerning timeouts; if this is passed together with
                the equivalent named parameters, the latter will take precedence
                in their respective settings.

        Returns:
            A DatabaseAdmin instance targeting this database. More precisely,
            for Astra DB an instance of `AstraDBDatabaseAdmin` is returned;
            for other environments, an instance of `DataAPIDatabaseAdmin` is returned.

        Example:
            >>> my_db_admin = my_db.get_database_admin()
            >>> if "new_keyspace" not in my_db_admin.list_keyspaces():
            ...     my_db_admin.create_keyspace("new_keyspace")
            >>> my_db_admin.list_keyspaces()
            ['default_keyspace', 'new_keyspace']
        """

        # lazy importing here to avoid circular dependency
        from astrapy.admin.admin import AstraDBDatabaseAdmin, DataAPIDatabaseAdmin

        arg_api_options = APIOptions(
            token=token,
        )
        api_options = self.api_options.with_override(spawn_api_options).with_override(
            arg_api_options
        )

        if api_options.environment in Environment.astra_db_values:
            return AstraDBDatabaseAdmin(
                api_endpoint=self.api_endpoint,
                api_options=api_options,
                spawner_database=self,
            )
        else:
            return DataAPIDatabaseAdmin(
                api_endpoint=self.api_endpoint,
                api_options=api_options,
                spawner_database=self,
            )

Instance variables

var id : str

The ID of this database.

Example

>>> my_db.id
'01234567-89ab-cdef-0123-456789abcdef'
Expand source code
@property
def id(self) -> str:
    """
    The ID of this database.

    Example:
        >>> my_db.id
        '01234567-89ab-cdef-0123-456789abcdef'
    """

    parsed_api_endpoint = parse_api_endpoint(self.api_endpoint)
    if parsed_api_endpoint is not None:
        return parsed_api_endpoint.database_id
    else:
        raise DevOpsAPIException(
            "Database is not in a supported environment for this operation."
        )
var keyspace : str | None

The keyspace this database uses as target for all commands when no method-call-specific keyspace is specified.

Returns

the working keyspace (a string), or None if not set.

Example

>>> my_db.keyspace
'the_keyspace'
Expand source code
@property
def keyspace(self) -> str | None:
    """
    The keyspace this database uses as target for all commands when
    no method-call-specific keyspace is specified.

    Returns:
        the working keyspace (a string), or None if not set.

    Example:
        >>> my_db.keyspace
        'the_keyspace'
    """

    return self._using_keyspace
var region : str

The region where this database is located.

The region is still well defined in case of multi-region databases, since a Database instance connects to exactly one of the regions (as specified by the API Endpoint).

Example

>>> my_db.region
'us-west-2'
Expand source code
@property
def region(self) -> str:
    """
    The region where this database is located.

    The region is still well defined in case of multi-region databases,
    since a Database instance connects to exactly one of the regions
    (as specified by the API Endpoint).

    Example:
        >>> my_db.region
        'us-west-2'
    """

    parsed_api_endpoint = parse_api_endpoint(self.api_endpoint)
    if parsed_api_endpoint is not None:
        return parsed_api_endpoint.region
    else:
        raise DevOpsAPIException(
            "Database is not in a supported environment for this operation."
        )

Methods

def command(self, body: dict[str, Any], *, keyspace: str | None | UnsetType = (unset), collection_or_table_name: str | None = None, raise_api_errors: bool = True, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> dict[str, typing.Any]

Send a POST request to the Data API for this database with an arbitrary, caller-provided payload.

Args

body
a JSON-serializable dictionary, the payload of the request.
keyspace
the keyspace to use, if any. If a keyspace is employed, it is used to construct the full request URL. To run a command targeting no specific keyspace (rather, the database as a whole), pass an explicit None: the request URL will lack the suffix "/" component. If unspecified, the working keyspace of this database is used. If another keyspace is passed, it will be used instead of the database's working one.
collection_or_table_name
if provided, the name is appended at the end of the endpoint. In this way, this method allows collection- and table-level arbitrary POST requests as well. This parameter cannot be used if keyspace=None is explicitly provided.
raise_api_errors
if True, responses with a nonempty 'errors' field result in an astrapy exception being raised.
general_method_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for general_method_timeout_ms.
timeout_ms
an alias for general_method_timeout_ms.

Returns

a dictionary with the response of the HTTP request.

Example

>>> my_db.command({"findCollections": {}})
{'status': {'collections': ['my_coll']}}
>>> my_db.command({"countDocuments": {}}, collection_or_table_name="my_coll")
{'status': {'count': 123}}
Expand source code
def command(
    self,
    body: dict[str, Any],
    *,
    keyspace: str | None | UnsetType = _UNSET,
    collection_or_table_name: str | None = None,
    raise_api_errors: bool = True,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> dict[str, Any]:
    """
    Send a POST request to the Data API for this database with
    an arbitrary, caller-provided payload.

    Args:
        body: a JSON-serializable dictionary, the payload of the request.
        keyspace: the keyspace to use, if any. If a keyspace is employed,
            it is used to construct the full request URL. To run a command
            targeting no specific keyspace (rather, the database as a whole),
            pass an explicit `None`: the request URL will lack the suffix
            "/<keyspace>" component. If unspecified, the working keyspace of
            this database is used. If another keyspace is passed, it will be
            used instead of the database's working one.
        collection_or_table_name: if provided, the name is appended at the end
            of the endpoint. In this way, this method allows collection-
            and table-level arbitrary POST requests as well.
            This parameter cannot be used if `keyspace=None` is explicitly provided.
        raise_api_errors: if True, responses with a nonempty 'errors' field
            result in an astrapy exception being raised.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a dictionary with the response of the HTTP request.

    Example:
        >>> my_db.command({"findCollections": {}})
        {'status': {'collections': ['my_coll']}}
        >>> my_db.command({"countDocuments": {}}, collection_or_table_name="my_coll")
        {'status': {'count': 123}}
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    _keyspace: str | None
    if keyspace is None:
        if collection_or_table_name is not None:
            raise ValueError(
                "Cannot pass collection_or_table_name to database "
                "`command` on a no-keyspace command"
            )
        _keyspace = None
    else:
        if isinstance(keyspace, UnsetType):
            _keyspace = self.keyspace
        else:
            _keyspace = keyspace
    # build the ad-hoc-commander path with _keyspace and the coll.or.table
    base_path_components = [
        comp
        for comp in (
            ncomp.strip("/")
            for ncomp in (
                self.api_options.data_api_url_options.api_path,
                self.api_options.data_api_url_options.api_version,
                _keyspace,
                collection_or_table_name,
            )
            if ncomp is not None
        )
        if comp != ""
    ]
    base_path = f"/{'/'.join(base_path_components)}"
    command_commander = APICommander(
        api_endpoint=self.api_endpoint,
        path=base_path,
        headers=self._commander_headers,
        callers=self.api_options.callers,
        redacted_header_names=self.api_options.redacted_header_names,
    )

    _cmd_desc = ",".join(sorted(body.keys()))
    logger.info(f"command={_cmd_desc} on {self.__class__.__name__}")
    req_response = command_commander.request(
        payload=body,
        raise_api_errors=raise_api_errors,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"command={_cmd_desc} on {self.__class__.__name__}")
    return req_response
def create_collection(self, name: str, *, definition: CollectionDefinition | dict[str, Any] | None = None, document_type: type[Any] = dict[str, typing.Any], keyspace: str | None = None, collection_admin_timeout_ms: int | None = None, embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = (unset), spawn_api_options: APIOptions | UnsetType = (unset)) ‑> Collection[DOC]

Creates a collection on the database and return the Collection instance that represents it.

This is a blocking operation: the method returns when the collection is ready to be used. As opposed to the get_collection instance, this method triggers causes the collection to be actually created on DB.

Args

name
the name of the collection.
definition
a complete collection definition for the table. This can be an instance of CollectionDefinition or an equivalent (nested) dictionary, in which case it will be parsed into a CollectionDefinition. See the CollectionDefinition class and the Collection class for more details and ways to construct this object.
document_type
this parameter acts a formal specifier for the type checker. If omitted, the resulting Collection is implicitly a Collection[dict[str, Any]]. If provided, it must match the type hint specified in the assignment. See the examples below.
keyspace
the keyspace where the collection is to be created. If not specified, the general setting for this database is used.
collection_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply.
embedding_api_key
optional API key(s) for interacting with the collection. If an embedding service is configured, and this parameter is not None, each Data API call will include the necessary embedding-related headers as specified by this parameter. If a string is passed, it translates into the one "embedding api key" header (i.e. EmbeddingAPIKeyHeaderProvider). For some vectorize providers/models, if using header-based authentication, specialized subclasses of EmbeddingHeadersProvider should be supplied.
spawn_api_options
a specification - complete or partial - of the API Options to override the defaults inherited from the Database. This allows for a deeper configuration of the collection, e.g. concerning timeouts; if this is passed together with the named timeout parameters, the latter will take precedence in their respective settings.

Returns

a (synchronous) Collection instance, representing the newly-created collection.

Example

>>> # Create a collection using the fluent syntax for its definition
>>> from astrapy.constants import VectorMetric
>>> from astrapy.info import CollectionDefinition
>>>
>>> collection_definition = (
...     CollectionDefinition.builder()
...     .set_vector_dimension(3)
...     .set_vector_metric(VectorMetric.DOT_PRODUCT)
...     .set_indexing("deny", ["annotations", "logs"])
...     .build()
... )
>>> my_collection = database.create_collection(
...     "my_events",
...     definition=collection_definition,
... )
>>> # Create a collection with the definition as object
>>> from astrapy.info import CollectionVectorOptions
>>>
>>> collection_definition_1 = CollectionDefinition(
...     vector=CollectionVectorOptions(
...         dimension=3,
...         metric=VectorMetric.DOT_PRODUCT,
...     ),
...     indexing={"deny": ["annotations", "logs"]},
... )
>>> my_collection_1 = database.create_collection(
...     "my_events",
...     definition=collection_definition_1,
... )
>>>
>>> # Create a collection with the definition as plain dictionary
>>> collection_definition_2 = {
...     "indexing": {"deny": ["annotations", "logs"]},
...     "vector": {
...         "dimension": 3,
...         "metric": VectorMetric.DOT_PRODUCT,
...     },
... }
>>> my_collection_2 = database.create_collection(
...     "my_events",
...     definition=collection_definition_2,
... )
Expand source code
def create_collection(
    self,
    name: str,
    *,
    definition: CollectionDefinition | dict[str, Any] | None = None,
    document_type: type[Any] = DefaultDocumentType,
    keyspace: str | None = None,
    collection_admin_timeout_ms: int | None = None,
    embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
    spawn_api_options: APIOptions | UnsetType = _UNSET,
) -> Collection[DOC]:
    """
    Creates a collection on the database and return the Collection
    instance that represents it.

    This is a blocking operation: the method returns when the collection
    is ready to be used. As opposed to the `get_collection` instance,
    this method triggers causes the collection to be actually created on DB.

    Args:
        name: the name of the collection.
        definition: a complete collection definition for the table. This can be an
            instance of `CollectionDefinition` or an equivalent (nested) dictionary,
            in which case it will be parsed into a `CollectionDefinition`.
            See the `astrapy.info.CollectionDefinition` class and the
            `Collection` class for more details and ways to construct this object.
        document_type: this parameter acts a formal specifier for the type checker.
            If omitted, the resulting Collection is implicitly
            a `Collection[dict[str, Any]]`. If provided, it must match the
            type hint specified in the assignment.
            See the examples below.
        keyspace: the keyspace where the collection is to be created.
            If not specified, the general setting for this database is used.
        collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
        embedding_api_key: optional API key(s) for interacting with the collection.
            If an embedding service is configured, and this parameter is not None,
            each Data API call will include the necessary embedding-related headers
            as specified by this parameter. If a string is passed, it translates
            into the one "embedding api key" header
            (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
            For some vectorize providers/models, if using header-based authentication,
            specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
            should be supplied.
        spawn_api_options: a specification - complete or partial - of the
            API Options to override the defaults inherited from the Database.
            This allows for a deeper configuration of the collection, e.g.
            concerning timeouts; if this is passed together with
            the named timeout parameters, the latter will take precedence
            in their respective settings.

    Returns:
        a (synchronous) `Collection` instance, representing the
        newly-created collection.

    Example:
        >>> # Create a collection using the fluent syntax for its definition
        >>> from astrapy.constants import VectorMetric
        >>> from astrapy.info import CollectionDefinition
        >>>
        >>> collection_definition = (
        ...     CollectionDefinition.builder()
        ...     .set_vector_dimension(3)
        ...     .set_vector_metric(VectorMetric.DOT_PRODUCT)
        ...     .set_indexing("deny", ["annotations", "logs"])
        ...     .build()
        ... )
        >>> my_collection = database.create_collection(
        ...     "my_events",
        ...     definition=collection_definition,
        ... )

        >>>
        >>> # Create a collection with the definition as object
        >>> from astrapy.info import CollectionVectorOptions
        >>>
        >>> collection_definition_1 = CollectionDefinition(
        ...     vector=CollectionVectorOptions(
        ...         dimension=3,
        ...         metric=VectorMetric.DOT_PRODUCT,
        ...     ),
        ...     indexing={"deny": ["annotations", "logs"]},
        ... )
        >>> my_collection_1 = database.create_collection(
        ...     "my_events",
        ...     definition=collection_definition_1,
        ... )
        >>>

        >>> # Create a collection with the definition as plain dictionary
        >>> collection_definition_2 = {
        ...     "indexing": {"deny": ["annotations", "logs"]},
        ...     "vector": {
        ...         "dimension": 3,
        ...         "metric": VectorMetric.DOT_PRODUCT,
        ...     },
        ... }
        >>> my_collection_2 = database.create_collection(
        ...     "my_events",
        ...     definition=collection_definition_2,
        ... )
    """

    cc_definition: dict[str, Any] = CollectionDefinition.coerce(
        definition or {}
    ).as_dict()
    # this method has custom code to pick its timeout
    _collection_admin_timeout_ms: int
    _ca_label: str
    if collection_admin_timeout_ms is not None:
        _collection_admin_timeout_ms = collection_admin_timeout_ms
        _ca_label = "collection_admin_timeout_ms"
    else:
        _collection_admin_timeout_ms = (
            self.api_options.timeout_options.collection_admin_timeout_ms
        )
        _ca_label = "collection_admin_timeout_ms"
    driver_commander = self._get_driver_commander(keyspace=keyspace)
    cc_payload = {
        "createCollection": {
            k: v
            for k, v in {
                "name": name,
                "options": cc_definition,
            }.items()
            if v is not None
            if v != {}
        }
    }
    logger.info(f"createCollection('{name}')")
    cc_response = driver_commander.request(
        payload=cc_payload,
        timeout_context=_TimeoutContext(
            request_ms=_collection_admin_timeout_ms, label=_ca_label
        ),
    )
    if cc_response.get("status") != {"ok": 1}:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from createCollection API command.",
            raw_response=cc_response,
        )
    logger.info(f"finished createCollection('{name}')")
    return self.get_collection(
        name,
        document_type=document_type,
        keyspace=keyspace,
        embedding_api_key=embedding_api_key,
        spawn_api_options=spawn_api_options,
    )
def create_table(self, name: str, *, definition: CreateTableDefinition | dict[str, Any], row_type: type[Any] = dict[str, typing.Any], keyspace: str | None = None, if_not_exists: bool | None = None, table_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None, embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = (unset), spawn_api_options: APIOptions | UnsetType = (unset)) ‑> Table[ROW]

Creates a table on the database and return the Table instance that represents it.

This is a blocking operation: the method returns when the table is ready to be used. As opposed to the get_table method call, this method causes the table to be actually created on DB.

Args

name
the name of the table.
definition
a complete table definition for the table. This can be an instance of CreateTableDefinition or an equivalent (nested) dictionary, in which case it will be parsed into a CreateTableDefinition. See the CreateTableDefinition class and the Table class for more details and ways to construct this object.
row_type
this parameter acts a formal specifier for the type checker. If omitted, the resulting Table is implicitly a Table[dict[str, Any]]. If provided, it must match the type hint specified in the assignment. See the examples below.
keyspace
the keyspace where the table is to be created. If not specified, the general setting for this database is used.
if_not_exists
if set to True, the command will succeed even if a table with the specified name already exists (in which case no actual table creation takes place on the database). Defaults to False, i.e. an error is raised by the API in case of table-name collision.
table_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for table_admin_timeout_ms.
timeout_ms
an alias for table_admin_timeout_ms.
embedding_api_key
optional API key(s) for interacting with the table. If an embedding service is configured, and this parameter is not None, each Data API call will include the necessary embedding-related headers as specified by this parameter. If a string is passed, it translates into the one "embedding api key" header (i.e. EmbeddingAPIKeyHeaderProvider). For some vectorize providers/models, if using header-based authentication, specialized subclasses of EmbeddingHeadersProvider should be supplied.
spawn_api_options
a specification - complete or partial - of the API Options to override the defaults inherited from the Database. This allows for a deeper configuration of the table, e.g. concerning timeouts; if this is passed together with the named timeout parameters, the latter will take precedence in their respective settings.

Returns

a (synchronous) Table instance, representing the newly-created table.

Example

>>> # Create a table using the fluent syntax for definition
>>> from astrapy.constants import SortMode
>>> from astrapy.info import (
...     CreateTableDefinition,
...     ColumnType,
... )
>>> table_definition = (
...     CreateTableDefinition.builder()
...     .add_column("match_id", ColumnType.TEXT)
...     .add_column("round", ColumnType.INT)
...     .add_vector_column("m_vector", dimension=3)
...     .add_column("score", ColumnType.INT)
...     .add_column("when", ColumnType.TIMESTAMP)
...     .add_column("winner", ColumnType.TEXT)
...     .add_set_column("fighters", ColumnType.UUID)
...     .add_partition_by(["match_id"])
...     .add_partition_sort({"round": SortMode.ASCENDING})
...     .build()
... )
>>> my_table = database.create_table(
...     "games",
...     definition=table_definition,
... )
>>>
>>> # Create a table with the definition as object
>>> # (and do not raise an error if the table exists already)
>>> from astrapy.info import (
...     CreateTableDefinition,
...     TablePrimaryKeyDescriptor,
...     TableScalarColumnTypeDescriptor,
...     TableValuedColumnType,
...     TableValuedColumnTypeDescriptor,
...     TableVectorColumnTypeDescriptor,
... )
>>> table_definition_1 = CreateTableDefinition(
...     columns={
...         "match_id": TableScalarColumnTypeDescriptor(
...             ColumnType.TEXT,
...         ),
...         "round": TableScalarColumnTypeDescriptor(
...             ColumnType.INT,
...         ),
...         "m_vector": TableVectorColumnTypeDescriptor(
...             column_type="vector", dimension=3
...         ),
...         "score": TableScalarColumnTypeDescriptor(
...             ColumnType.INT,
...         ),
...         "when": TableScalarColumnTypeDescriptor(
...             ColumnType.TIMESTAMP,
...         ),
...         "winner": TableScalarColumnTypeDescriptor(
...             ColumnType.TEXT,
...         ),
...         "fighters": TableValuedColumnTypeDescriptor(
...             column_type=TableValuedColumnType.SET,
...             value_type=ColumnType.UUID,
...         ),
...     },
...     primary_key=TablePrimaryKeyDescriptor(
...         partition_by=["match_id"],
...         partition_sort={"round": SortMode.ASCENDING},
...     ),
... )
>>> my_table_1 = database.create_table(
...     "games",
...     definition=table_definition_1,
...     if_not_exists=True,
... )
>>>
>>> # Create a table with the definition as plain dictionary
>>> # (and do not raise an error if the table exists already)
>>> table_definition_2 = {
...     "columns": {
...         "match_id": {"type": "text"},
...         "round": {"type": "int"},
...         "m_vector": {"type": "vector", "dimension": 3},
...         "score": {"type": "int"},
...         "when": {"type": "timestamp"},
...         "winner": {"type": "text"},
...         "fighters": {"type": "set", "valueType": "uuid"},
...     },
...     "primaryKey": {
...         "partitionBy": ["match_id"],
...         "partitionSort": {"round": 1},
...     },
... }
>>> my_table_2 = database.create_table(
...     "games",
...     definition=table_definition_2,
...     if_not_exists=True,
... )
Expand source code
def create_table(
    self,
    name: str,
    *,
    definition: CreateTableDefinition | dict[str, Any],
    row_type: type[Any] = DefaultRowType,
    keyspace: str | None = None,
    if_not_exists: bool | None = None,
    table_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
    embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
    spawn_api_options: APIOptions | UnsetType = _UNSET,
) -> Table[ROW]:
    """
    Creates a table on the database and return the Table
    instance that represents it.

    This is a blocking operation: the method returns when the table
    is ready to be used. As opposed to the `get_table` method call,
    this method causes the table to be actually created on DB.

    Args:
        name: the name of the table.
        definition: a complete table definition for the table. This can be an
            instance of `CreateTableDefinition` or an equivalent (nested) dictionary,
            in which case it will be parsed into a `CreateTableDefinition`.
            See the `astrapy.info.CreateTableDefinition` class and the
            `Table` class for more details and ways to construct this object.
        row_type: this parameter acts a formal specifier for the type checker.
            If omitted, the resulting Table is implicitly a `Table[dict[str, Any]]`.
            If provided, it must match the type hint specified in the assignment.
            See the examples below.
        keyspace: the keyspace where the table is to be created.
            If not specified, the general setting for this database is used.
        if_not_exists: if set to True, the command will succeed even if a table
            with the specified name already exists (in which case no actual
            table creation takes place on the database). Defaults to False,
            i.e. an error is raised by the API in case of table-name collision.
        table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `table_admin_timeout_ms`.
        timeout_ms: an alias for `table_admin_timeout_ms`.
        embedding_api_key: optional API key(s) for interacting with the table.
            If an embedding service is configured, and this parameter is not None,
            each Data API call will include the necessary embedding-related headers
            as specified by this parameter. If a string is passed, it translates
            into the one "embedding api key" header
            (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
            For some vectorize providers/models, if using header-based authentication,
            specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
            should be supplied.
        spawn_api_options: a specification - complete or partial - of the
            API Options to override the defaults inherited from the Database.
            This allows for a deeper configuration of the table, e.g.
            concerning timeouts; if this is passed together with
            the named timeout parameters, the latter will take precedence
            in their respective settings.

    Returns:
        a (synchronous) `Table` instance, representing the
        newly-created table.

    Example:
        >>> # Create a table using the fluent syntax for definition
        >>> from astrapy.constants import SortMode
        >>> from astrapy.info import (
        ...     CreateTableDefinition,
        ...     ColumnType,
        ... )
        >>> table_definition = (
        ...     CreateTableDefinition.builder()
        ...     .add_column("match_id", ColumnType.TEXT)
        ...     .add_column("round", ColumnType.INT)
        ...     .add_vector_column("m_vector", dimension=3)
        ...     .add_column("score", ColumnType.INT)
        ...     .add_column("when", ColumnType.TIMESTAMP)
        ...     .add_column("winner", ColumnType.TEXT)
        ...     .add_set_column("fighters", ColumnType.UUID)
        ...     .add_partition_by(["match_id"])
        ...     .add_partition_sort({"round": SortMode.ASCENDING})
        ...     .build()
        ... )
        >>> my_table = database.create_table(
        ...     "games",
        ...     definition=table_definition,
        ... )
        >>>
        >>> # Create a table with the definition as object
        >>> # (and do not raise an error if the table exists already)
        >>> from astrapy.info import (
        ...     CreateTableDefinition,
        ...     TablePrimaryKeyDescriptor,
        ...     TableScalarColumnTypeDescriptor,
        ...     TableValuedColumnType,
        ...     TableValuedColumnTypeDescriptor,
        ...     TableVectorColumnTypeDescriptor,
        ... )
        >>> table_definition_1 = CreateTableDefinition(
        ...     columns={
        ...         "match_id": TableScalarColumnTypeDescriptor(
        ...             ColumnType.TEXT,
        ...         ),
        ...         "round": TableScalarColumnTypeDescriptor(
        ...             ColumnType.INT,
        ...         ),
        ...         "m_vector": TableVectorColumnTypeDescriptor(
        ...             column_type="vector", dimension=3
        ...         ),
        ...         "score": TableScalarColumnTypeDescriptor(
        ...             ColumnType.INT,
        ...         ),
        ...         "when": TableScalarColumnTypeDescriptor(
        ...             ColumnType.TIMESTAMP,
        ...         ),
        ...         "winner": TableScalarColumnTypeDescriptor(
        ...             ColumnType.TEXT,
        ...         ),
        ...         "fighters": TableValuedColumnTypeDescriptor(
        ...             column_type=TableValuedColumnType.SET,
        ...             value_type=ColumnType.UUID,
        ...         ),
        ...     },
        ...     primary_key=TablePrimaryKeyDescriptor(
        ...         partition_by=["match_id"],
        ...         partition_sort={"round": SortMode.ASCENDING},
        ...     ),
        ... )
        >>> my_table_1 = database.create_table(
        ...     "games",
        ...     definition=table_definition_1,
        ...     if_not_exists=True,
        ... )
        >>>
        >>> # Create a table with the definition as plain dictionary
        >>> # (and do not raise an error if the table exists already)
        >>> table_definition_2 = {
        ...     "columns": {
        ...         "match_id": {"type": "text"},
        ...         "round": {"type": "int"},
        ...         "m_vector": {"type": "vector", "dimension": 3},
        ...         "score": {"type": "int"},
        ...         "when": {"type": "timestamp"},
        ...         "winner": {"type": "text"},
        ...         "fighters": {"type": "set", "valueType": "uuid"},
        ...     },
        ...     "primaryKey": {
        ...         "partitionBy": ["match_id"],
        ...         "partitionSort": {"round": 1},
        ...     },
        ... }
        >>> my_table_2 = database.create_table(
        ...     "games",
        ...     definition=table_definition_2,
        ...     if_not_exists=True,
        ... )
    """

    ct_options: dict[str, bool]
    if if_not_exists is not None:
        ct_options = {"ifNotExists": if_not_exists}
    else:
        ct_options = {}
    ct_definition: dict[str, Any] = CreateTableDefinition.coerce(
        definition
    ).as_dict()
    _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
        timeout_options=self.api_options.timeout_options,
        table_admin_timeout_ms=table_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    driver_commander = self._get_driver_commander(keyspace=keyspace)
    ct_payload = {
        "createTable": {
            k: v
            for k, v in {
                "name": name,
                "definition": ct_definition,
                "options": ct_options,
            }.items()
            if v is not None
            if v != {}
        }
    }
    logger.info(f"createTable('{name}')")
    ct_response = driver_commander.request(
        payload=ct_payload,
        timeout_context=_TimeoutContext(
            request_ms=_table_admin_timeout_ms, label=_ta_label
        ),
    )
    if ct_response.get("status") != {"ok": 1}:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from createTable API command.",
            raw_response=ct_response,
        )
    logger.info(f"finished createTable('{name}')")
    return self.get_table(
        name,
        row_type=row_type,
        keyspace=keyspace,
        embedding_api_key=embedding_api_key,
        spawn_api_options=spawn_api_options,
    )
def drop_collection(self, name: str, *, keyspace: str | None = None, collection_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> None

Drop a collection from the database, along with all documents therein.

Args

name
the name of the collection to drop.
keyspace
the keyspace where the collection resides. If not specified, the database working keyspace is assumed.
collection_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for collection_admin_timeout_ms.
timeout_ms
an alias for collection_admin_timeout_ms.

Example

>>> my_db.list_collection_names()
['a_collection', 'my_v_col', 'another_col']
>>> my_db.drop_collection("my_v_col")
>>> my_db.list_collection_names()
['a_collection', 'another_col']
Expand source code
def drop_collection(
    self,
    name: str,
    *,
    keyspace: str | None = None,
    collection_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Drop a collection from the database, along with all documents therein.

    Args:
        name: the name of the collection to drop.
        keyspace: the keyspace where the collection resides. If not specified,
            the database working keyspace is assumed.
        collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `collection_admin_timeout_ms`.
        timeout_ms: an alias for `collection_admin_timeout_ms`.

    Example:
        >>> my_db.list_collection_names()
        ['a_collection', 'my_v_col', 'another_col']
        >>> my_db.drop_collection("my_v_col")
        >>> my_db.list_collection_names()
        ['a_collection', 'another_col']
    """

    _collection_admin_timeout_ms, _ca_label = _select_singlereq_timeout_ca(
        timeout_options=self.api_options.timeout_options,
        collection_admin_timeout_ms=collection_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    _keyspace = keyspace or self.keyspace
    driver_commander = self._get_driver_commander(keyspace=_keyspace)
    dc_payload = {"deleteCollection": {"name": name}}
    logger.info(f"deleteCollection('{name}')")
    dc_response = driver_commander.request(
        payload=dc_payload,
        timeout_context=_TimeoutContext(
            request_ms=_collection_admin_timeout_ms, label=_ca_label
        ),
    )
    if dc_response.get("status") != {"ok": 1}:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from deleteCollection API command.",
            raw_response=dc_response,
        )
    logger.info(f"finished deleteCollection('{name}')")
    return dc_response.get("status", {})  # type: ignore[no-any-return]
def drop_table(self, name: str, *, keyspace: str | None = None, if_exists: bool | None = None, table_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> None

Drop a table from the database, along with all rows therein and related indexes.

Args

name
the name of the table to drop.
keyspace
the keyspace where the table resides. If not specified, the database working keyspace is assumed.
if_exists
if passed as True, trying to drop a non-existing table will not error, just silently do nothing instead. If not provided, the API default behaviour will hold.
table_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for table_admin_timeout_ms.
timeout_ms
an alias for table_admin_timeout_ms.

Example

>>> database.list_table_names()
['fighters', 'games']
>>> database.drop_table("fighters")
>>> database.list_table_names()
['games']
>>> # not erroring because of if_not_exists:
>>> database.drop_table("fighters", if_not_exists=True)
Expand source code
def drop_table(
    self,
    name: str,
    *,
    keyspace: str | None = None,
    if_exists: bool | None = None,
    table_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Drop a table from the database, along with all rows therein and related indexes.

    Args:
        name: the name of the table to drop.
        keyspace: the keyspace where the table resides. If not specified,
            the database working keyspace is assumed.
        if_exists: if passed as True, trying to drop a non-existing table
            will not error, just silently do nothing instead. If not provided,
            the API default behaviour will hold.
        table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `table_admin_timeout_ms`.
        timeout_ms: an alias for `table_admin_timeout_ms`.

    Example:
        >>> database.list_table_names()
        ['fighters', 'games']
        >>> database.drop_table("fighters")
        >>> database.list_table_names()
        ['games']
        >>> # not erroring because of if_not_exists:
        >>> database.drop_table("fighters", if_not_exists=True)
    """

    _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
        timeout_options=self.api_options.timeout_options,
        table_admin_timeout_ms=table_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    _keyspace = keyspace or self.keyspace
    dt_options: dict[str, bool]
    if if_exists is not None:
        dt_options = {"ifExists": if_exists}
    else:
        dt_options = {}
    driver_commander = self._get_driver_commander(keyspace=_keyspace)
    dt_payload = {
        "dropTable": {
            k: v
            for k, v in {
                "name": name,
                "options": dt_options,
            }.items()
            if v is not None
            if v != {}
        }
    }
    logger.info(f"dropTable('{name}')")
    dt_response = driver_commander.request(
        payload=dt_payload,
        timeout_context=_TimeoutContext(
            request_ms=_table_admin_timeout_ms, label=_ta_label
        ),
    )
    if dt_response.get("status") != {"ok": 1}:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from dropTable API command.",
            raw_response=dt_response,
        )
    logger.info(f"finished dropTable('{name}')")
    return dt_response.get("status", {})  # type: ignore[no-any-return]
def drop_table_index(self, name: str, *, keyspace: str | None = None, if_exists: bool | None = None, table_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> None

Drops (deletes) an index (of any kind) from the table it is associated to.

This is a blocking operation: the method returns once the index is deleted.

Note

Although associated to a table, index names are unique across a keyspace. For this reason, no table name is required in this call.

Args

name
the name of the index.
keyspace
the keyspace to which the index belongs. If not specified, the general setting for this database is used.
if_exists
if passed as True, trying to drop a non-existing index will not error, just silently do nothing instead. If not provided, the API default behaviour will hold.
table_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for table_admin_timeout_ms.
timeout_ms
an alias for table_admin_timeout_ms.

Example

>>> # Drop an index from the keyspace:
>>> database.drop_table_index("score_index")
>>> # Drop an index, unless it does not exist already:
>>> database.drop_table_index("score_index", if_exists=True)
Expand source code
def drop_table_index(
    self,
    name: str,
    *,
    keyspace: str | None = None,
    if_exists: bool | None = None,
    table_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Drops (deletes) an index (of any kind) from the table it is associated to.

    This is a blocking operation: the method returns once the index
    is deleted.

    Note:
        Although associated to a table, index names are unique across a keyspace.
        For this reason, no table name is required in this call.

    Args:
        name: the name of the index.
        keyspace: the keyspace to which the index belongs.
            If not specified, the general setting for this database is used.
        if_exists: if passed as True, trying to drop a non-existing index
            will not error, just silently do nothing instead. If not provided,
            the API default behaviour will hold.
        table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `table_admin_timeout_ms`.
        timeout_ms: an alias for `table_admin_timeout_ms`.

    Example:
        >>> # Drop an index from the keyspace:
        >>> database.drop_table_index("score_index")
        >>> # Drop an index, unless it does not exist already:
        >>> database.drop_table_index("score_index", if_exists=True)
    """

    _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
        timeout_options=self.api_options.timeout_options,
        table_admin_timeout_ms=table_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    di_options: dict[str, bool]
    if if_exists is not None:
        di_options = {"ifExists": if_exists}
    else:
        di_options = {}
    di_payload = {
        "dropIndex": {
            k: v
            for k, v in {
                "name": name,
                "options": di_options,
            }.items()
            if v is not None
            if v != {}
        }
    }
    driver_commander = self._get_driver_commander(keyspace=keyspace)
    logger.info(f"dropIndex('{name}')")
    di_response = driver_commander.request(
        payload=di_payload,
        timeout_context=_TimeoutContext(
            request_ms=_table_admin_timeout_ms, label=_ta_label
        ),
    )
    if di_response.get("status") != {"ok": 1}:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from dropIndex API command.",
            raw_response=di_response,
        )
    logger.info(f"finished dropIndex('{name}')")
def get_collection(self, name: str, *, document_type: type[Any] = dict[str, typing.Any], keyspace: str | None = None, embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = (unset), spawn_api_options: APIOptions | UnsetType = (unset)) ‑> Collection[DOC]

Spawn a Collection object instance representing a collection on this database.

Creating a Collection instance does not have any effect on the actual state of the database: in other words, for the created Collection instance to be used meaningfully, the collection must exist already (for instance, it should have been created previously by calling the create_collection method).

Args

name
the name of the collection.
document_type
this parameter acts a formal specifier for the type checker. If omitted, the resulting Collection is implicitly a Collection[dict[str, Any]]. If provided, it must match the type hint specified in the assignment. See the examples below.
keyspace
the keyspace containing the collection. If no keyspace is specified, the general setting for this database is used.
embedding_api_key
optional API key(s) for interacting with the collection. If an embedding service is configured, and this parameter is not None, each Data API call will include the necessary embedding-related headers as specified by this parameter. If a string is passed, it translates into the one "embedding api key" header (i.e. EmbeddingAPIKeyHeaderProvider). For some vectorize providers/models, if using header-based authentication, specialized subclasses of EmbeddingHeadersProvider should be supplied.
spawn_api_options
a specification - complete or partial - of the API Options to override the defaults inherited from the Database. This allows for a deeper configuration of the collection, e.g. concerning timeouts; if this is passed together with the named timeout parameters, the latter will take precedence in their respective settings.

Returns

a Collection instance, representing the desired collection (but without any form of validation).

Example

>>> my_col = my_db.get_collection("my_collection")
>>> my_col.count_documents({}, upper_bound=100)
41

Note

The attribute and indexing syntax forms achieve the same effect as this method. In other words, the following are equivalent: my_db.get_collection("coll_name") my_db.coll_name my_db["coll_name"]

Expand source code
def get_collection(
    self,
    name: str,
    *,
    document_type: type[Any] = DefaultDocumentType,
    keyspace: str | None = None,
    embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
    spawn_api_options: APIOptions | UnsetType = _UNSET,
) -> Collection[DOC]:
    """
    Spawn a `Collection` object instance representing a collection
    on this database.

    Creating a `Collection` instance does not have any effect on the
    actual state of the database: in other words, for the created
    `Collection` instance to be used meaningfully, the collection
    must exist already (for instance, it should have been created
    previously by calling the `create_collection` method).

    Args:
        name: the name of the collection.
        document_type: this parameter acts a formal specifier for the type checker.
            If omitted, the resulting Collection is implicitly
            a `Collection[dict[str, Any]]`. If provided, it must match the
            type hint specified in the assignment.
            See the examples below.
        keyspace: the keyspace containing the collection. If no keyspace
            is specified, the general setting for this database is used.
        embedding_api_key: optional API key(s) for interacting with the collection.
            If an embedding service is configured, and this parameter is not None,
            each Data API call will include the necessary embedding-related headers
            as specified by this parameter. If a string is passed, it translates
            into the one "embedding api key" header
            (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
            For some vectorize providers/models, if using header-based
            authentication, specialized subclasses of
            `astrapy.authentication.EmbeddingHeadersProvider` should be supplied.
        spawn_api_options: a specification - complete or partial - of the
            API Options to override the defaults inherited from the Database.
            This allows for a deeper configuration of the collection, e.g.
            concerning timeouts; if this is passed together with
            the named timeout parameters, the latter will take precedence
            in their respective settings.

    Returns:
        a `Collection` instance, representing the desired collection
            (but without any form of validation).

    Example:
        >>> my_col = my_db.get_collection("my_collection")
        >>> my_col.count_documents({}, upper_bound=100)
        41

    Note:
        The attribute and indexing syntax forms achieve the same effect
        as this method. In other words, the following are equivalent:
            my_db.get_collection("coll_name")
            my_db.coll_name
            my_db["coll_name"]
    """

    # lazy importing here against circular-import error
    from astrapy.collection import Collection

    resulting_api_options = self.api_options.with_override(
        spawn_api_options,
    ).with_override(
        APIOptions(
            embedding_api_key=embedding_api_key,
        ),
    )

    _keyspace = keyspace or self.keyspace
    if _keyspace is None:
        raise ValueError(
            "No keyspace specified. This operation requires a keyspace to "
            "be set, e.g. through the `use_keyspace` method."
        )
    return Collection(
        database=self,
        name=name,
        keyspace=_keyspace,
        api_options=resulting_api_options,
    )
def get_database_admin(self, *, token: str | TokenProvider | UnsetType = (unset), spawn_api_options: APIOptions | UnsetType = (unset)) ‑> DatabaseAdmin

Return a DatabaseAdmin object corresponding to this database, for use in admin tasks such as managing keyspaces.

This method, depending on the environment where the database resides, returns an appropriate subclass of DatabaseAdmin.

Args

token
an access token with enough permission on the database to perform the desired tasks. If omitted (as it can generally be done), the token of this Database is used. This can be either a literal token string or a subclass of TokenProvider.
spawn_api_options
a specification - complete or partial - of the API Options to override the defaults. This allows for a deeper configuration of the database admin, e.g. concerning timeouts; if this is passed together with the equivalent named parameters, the latter will take precedence in their respective settings.

Returns

A DatabaseAdmin instance targeting this database. More precisely, for Astra DB an instance of AstraDBDatabaseAdmin is returned; for other environments, an instance of DataAPIDatabaseAdmin is returned.

Example

>>> my_db_admin = my_db.get_database_admin()
>>> if "new_keyspace" not in my_db_admin.list_keyspaces():
...     my_db_admin.create_keyspace("new_keyspace")
>>> my_db_admin.list_keyspaces()
['default_keyspace', 'new_keyspace']
Expand source code
def get_database_admin(
    self,
    *,
    token: str | TokenProvider | UnsetType = _UNSET,
    spawn_api_options: APIOptions | UnsetType = _UNSET,
) -> DatabaseAdmin:
    """
    Return a DatabaseAdmin object corresponding to this database, for
    use in admin tasks such as managing keyspaces.

    This method, depending on the environment where the database resides,
    returns an appropriate subclass of DatabaseAdmin.

    Args:
        token: an access token with enough permission on the database to
            perform the desired tasks. If omitted (as it can generally be done),
            the token of this Database is used.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        spawn_api_options: a specification - complete or partial - of the
            API Options to override the defaults.
            This allows for a deeper configuration of the database admin, e.g.
            concerning timeouts; if this is passed together with
            the equivalent named parameters, the latter will take precedence
            in their respective settings.

    Returns:
        A DatabaseAdmin instance targeting this database. More precisely,
        for Astra DB an instance of `AstraDBDatabaseAdmin` is returned;
        for other environments, an instance of `DataAPIDatabaseAdmin` is returned.

    Example:
        >>> my_db_admin = my_db.get_database_admin()
        >>> if "new_keyspace" not in my_db_admin.list_keyspaces():
        ...     my_db_admin.create_keyspace("new_keyspace")
        >>> my_db_admin.list_keyspaces()
        ['default_keyspace', 'new_keyspace']
    """

    # lazy importing here to avoid circular dependency
    from astrapy.admin.admin import AstraDBDatabaseAdmin, DataAPIDatabaseAdmin

    arg_api_options = APIOptions(
        token=token,
    )
    api_options = self.api_options.with_override(spawn_api_options).with_override(
        arg_api_options
    )

    if api_options.environment in Environment.astra_db_values:
        return AstraDBDatabaseAdmin(
            api_endpoint=self.api_endpoint,
            api_options=api_options,
            spawner_database=self,
        )
    else:
        return DataAPIDatabaseAdmin(
            api_endpoint=self.api_endpoint,
            api_options=api_options,
            spawner_database=self,
        )
def get_table(self, name: str, *, row_type: type[Any] = dict[str, typing.Any], keyspace: str | None = None, embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = (unset), spawn_api_options: APIOptions | UnsetType = (unset)) ‑> Table[ROW]

Spawn a Table object instance representing a table on this database.

Creating a Table instance does not have any effect on the actual state of the database: in other words, for the created Table instance to be used meaningfully, the table must exist already (for instance, it should have been created previously by calling the create_table method).

Args

name
the name of the table.
row_type
this parameter acts a formal specifier for the type checker. If omitted, the resulting Table is implicitly a Table[dict[str, Any]]. If provided, it must match the type hint specified in the assignment. See the examples below.
keyspace
the keyspace containing the table. If no keyspace is specified, the general setting for this database is used.
embedding_api_key
optional API key(s) for interacting with the table. If an embedding service is configured, and this parameter is not None, each Data API call will include the necessary embedding-related headers as specified by this parameter. If a string is passed, it translates into the one "embedding api key" header (i.e. EmbeddingAPIKeyHeaderProvider). For some vectorize providers/models, if using header-based authentication, specialized subclasses of EmbeddingHeadersProvider should be supplied.
spawn_api_options
a specification - complete or partial - of the API Options to override the defaults inherited from the Database. This allows for a deeper configuration of the table, e.g. concerning timeouts; if this is passed together with the named timeout parameters, the latter will take precedence in their respective settings.

Returns

a Table instance, representing the desired table (but without any form of validation).

Example

>>> # Get a Table object (and read a property of it as an example):
>>> my_table = database.get_table("games")
>>> my_table.full_name
'default_keyspace.games'
>>>
>>> # Get a Table object in a specific keyspace,
>>> # and set an embedding API key to it:
>>> my_other_table = database.get_table(
...     "tournaments",
...     keyspace="the_other_keyspace",
...     embedding_api_key="secret-012abc...",
... )
>>>
>>> from astrapy import Table
>>> MyCustomDictType = dict[str, int]
>>>
>>> # Get a Table object typed with a specific type for its rows:
>>> my_typed_table: Table[MyCustomDictType] = database.get_table(
...     "games",
...     row_type=MyCustomDictType,
... )
Expand source code
def get_table(
    self,
    name: str,
    *,
    row_type: type[Any] = DefaultRowType,
    keyspace: str | None = None,
    embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
    spawn_api_options: APIOptions | UnsetType = _UNSET,
) -> Table[ROW]:
    """
    Spawn a `Table` object instance representing a table
    on this database.

    Creating a `Table` instance does not have any effect on the
    actual state of the database: in other words, for the created
    `Table` instance to be used meaningfully, the table
    must exist already (for instance, it should have been created
    previously by calling the `create_table` method).

    Args:
        name: the name of the table.
        row_type: this parameter acts a formal specifier for the type checker.
            If omitted, the resulting Table is implicitly a `Table[dict[str, Any]]`.
            If provided, it must match the type hint specified in the assignment.
            See the examples below.
        keyspace: the keyspace containing the table. If no keyspace
            is specified, the general setting for this database is used.
        embedding_api_key: optional API key(s) for interacting with the table.
            If an embedding service is configured, and this parameter is not None,
            each Data API call will include the necessary embedding-related headers
            as specified by this parameter. If a string is passed, it translates
            into the one "embedding api key" header
            (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
            For some vectorize providers/models, if using header-based
            authentication, specialized subclasses of
            `astrapy.authentication.EmbeddingHeadersProvider` should be supplied.
        spawn_api_options: a specification - complete or partial - of the
            API Options to override the defaults inherited from the Database.
            This allows for a deeper configuration of the table, e.g.
            concerning timeouts; if this is passed together with
            the named timeout parameters, the latter will take precedence
            in their respective settings.

    Returns:
        a `Table` instance, representing the desired table
            (but without any form of validation).

    Example:
        >>> # Get a Table object (and read a property of it as an example):
        >>> my_table = database.get_table("games")
        >>> my_table.full_name
        'default_keyspace.games'
        >>>
        >>> # Get a Table object in a specific keyspace,
        >>> # and set an embedding API key to it:
        >>> my_other_table = database.get_table(
        ...     "tournaments",
        ...     keyspace="the_other_keyspace",
        ...     embedding_api_key="secret-012abc...",
        ... )
        >>>
        >>> from astrapy import Table
        >>> MyCustomDictType = dict[str, int]
        >>>
        >>> # Get a Table object typed with a specific type for its rows:
        >>> my_typed_table: Table[MyCustomDictType] = database.get_table(
        ...     "games",
        ...     row_type=MyCustomDictType,
        ... )
    """

    # lazy importing here against circular-import error
    from astrapy.table import Table

    resulting_api_options = self.api_options.with_override(
        spawn_api_options,
    ).with_override(
        APIOptions(
            embedding_api_key=embedding_api_key,
        ),
    )

    _keyspace = keyspace or self.keyspace
    if _keyspace is None:
        raise ValueError(
            "No keyspace specified. This operation requires a keyspace to "
            "be set, e.g. through the `use_keyspace` method."
        )
    return Table[ROW](
        database=self,
        name=name,
        keyspace=_keyspace,
        api_options=resulting_api_options,
    )
def info(self, *, database_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> AstraDBDatabaseInfo

Additional information on the database as an AstraDBDatabaseInfo instance.

Some of the returned properties are dynamic throughout the lifetime of the database (such as raw_info["keyspaces"]). For this reason, each invocation of this method triggers a new request to the DevOps API.

Args

database_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for database_admin_timeout_ms.
timeout_ms
an alias for database_admin_timeout_ms.

Example

>>> my_db.info().region
'eu-west-1'
>>> my_db.info().raw_info['datacenters'][0]['dateCreated']
'2023-01-30T12:34:56Z'

Note

see the AstraDBDatabaseInfo documentation for a caveat about the difference between the region and the raw["region"] attributes.

Expand source code
def info(
    self,
    *,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> AstraDBDatabaseInfo:
    """
    Additional information on the database as an AstraDBDatabaseInfo instance.

    Some of the returned properties are dynamic throughout the lifetime
    of the database (such as raw_info["keyspaces"]). For this reason,
    each invocation of this method triggers a new request to the DevOps API.

    Args:
        database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `database_admin_timeout_ms`.
        timeout_ms: an alias for `database_admin_timeout_ms`.

    Example:
        >>> my_db.info().region
        'eu-west-1'

        >>> my_db.info().raw_info['datacenters'][0]['dateCreated']
        '2023-01-30T12:34:56Z'

    Note:
        see the AstraDBDatabaseInfo documentation for a caveat about the difference
        between the `region` and the `raw["region"]` attributes.
    """

    if self.api_options.environment not in Environment.astra_db_values:
        raise InvalidEnvironmentException(
            "Environments outside of Astra DB are not supported."
        )

    _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
        timeout_options=self.api_options.timeout_options,
        database_admin_timeout_ms=database_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info("getting database info")
    database_info = fetch_database_info(
        self.api_endpoint,
        keyspace=self.keyspace,
        request_timeout_ms=_database_admin_timeout_ms,
        api_options=self.api_options,
    )
    if database_info is not None:
        logger.info("finished getting database info")
        return database_info
    else:
        raise DevOpsAPIException("Failure while fetching database info.")
def list_collection_names(self, *, keyspace: str | None = None, collection_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> list[str]

List the names of all collections in a given keyspace of this database.

Args

keyspace
the keyspace to be inspected. If not specified, the general setting for this database is assumed.
collection_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for collection_admin_timeout_ms.
timeout_ms
an alias for collection_admin_timeout_ms.

Returns

a list of the collection names as strings, in no particular order.

Example

>>> my_db.list_collection_names()
['a_collection', 'another_col']
Expand source code
def list_collection_names(
    self,
    *,
    keyspace: str | None = None,
    collection_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> list[str]:
    """
    List the names of all collections in a given keyspace of this database.

    Args:
        keyspace: the keyspace to be inspected. If not specified,
            the general setting for this database is assumed.
        collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `collection_admin_timeout_ms`.
        timeout_ms: an alias for `collection_admin_timeout_ms`.

    Returns:
        a list of the collection names as strings, in no particular order.

    Example:
        >>> my_db.list_collection_names()
        ['a_collection', 'another_col']
    """

    _collection_admin_timeout_ms, _ca_label = _select_singlereq_timeout_ca(
        timeout_options=self.api_options.timeout_options,
        collection_admin_timeout_ms=collection_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    driver_commander = self._get_driver_commander(keyspace=keyspace)
    gc_payload: dict[str, Any] = {"findCollections": {}}
    logger.info("findCollections")
    gc_response = driver_commander.request(
        payload=gc_payload,
        timeout_context=_TimeoutContext(
            request_ms=_collection_admin_timeout_ms, label=_ca_label
        ),
    )
    if "collections" not in gc_response.get("status", {}):
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from findCollections API command.",
            raw_response=gc_response,
        )
    else:
        logger.info("finished findCollections")
        return gc_response["status"]["collections"]  # type: ignore[no-any-return]
def list_collections(self, *, keyspace: str | None = None, collection_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> list[CollectionDescriptor]

List all collections in a given keyspace for this database.

Args

keyspace
the keyspace to be inspected. If not specified, the general setting for this database is assumed.
collection_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for collection_admin_timeout_ms.
timeout_ms
an alias for collection_admin_timeout_ms.

Returns

a list of CollectionDescriptor instances one for each collection.

Example

>>> coll_list = my_db.list_collections()
>>> coll_list
[CollectionDescriptor(name='my_v_col', options=CollectionDefinition())]
>>> for coll_dict in my_db.list_collections():
...     print(coll_dict)
...
CollectionDescriptor(name='my_v_col', options=CollectionDefinition())
Expand source code
def list_collections(
    self,
    *,
    keyspace: str | None = None,
    collection_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> list[CollectionDescriptor]:
    """
    List all collections in a given keyspace for this database.

    Args:
        keyspace: the keyspace to be inspected. If not specified,
            the general setting for this database is assumed.
        collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `collection_admin_timeout_ms`.
        timeout_ms: an alias for `collection_admin_timeout_ms`.

    Returns:
        a list of CollectionDescriptor instances one for each collection.

    Example:
        >>> coll_list = my_db.list_collections()
        >>> coll_list
        [CollectionDescriptor(name='my_v_col', options=CollectionDefinition())]
        >>> for coll_dict in my_db.list_collections():
        ...     print(coll_dict)
        ...
        CollectionDescriptor(name='my_v_col', options=CollectionDefinition())
    """

    _collection_admin_timeout_ms, _ca_label = _select_singlereq_timeout_ca(
        timeout_options=self.api_options.timeout_options,
        collection_admin_timeout_ms=collection_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    return self._list_collections_ctx(
        keyspace=keyspace,
        timeout_context=_TimeoutContext(
            request_ms=_collection_admin_timeout_ms, label=_ca_label
        ),
    )
def list_table_names(self, *, keyspace: str | None = None, table_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> list[str]

List the names of all tables in a given keyspace of this database.

Args

keyspace
the keyspace to be inspected. If not specified, the general setting for this database is assumed.
table_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for table_admin_timeout_ms.
timeout_ms
an alias for table_admin_timeout_ms.

Returns

a list of the table names as strings, in no particular order.

Example

>>> database.list_table_names()
['fighters', 'games']
Expand source code
def list_table_names(
    self,
    *,
    keyspace: str | None = None,
    table_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> list[str]:
    """
    List the names of all tables in a given keyspace of this database.

    Args:
        keyspace: the keyspace to be inspected. If not specified,
            the general setting for this database is assumed.
        table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `table_admin_timeout_ms`.
        timeout_ms: an alias for `table_admin_timeout_ms`.

    Returns:
        a list of the table names as strings, in no particular order.

    Example:
        >>> database.list_table_names()
        ['fighters', 'games']
    """

    _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
        timeout_options=self.api_options.timeout_options,
        table_admin_timeout_ms=table_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    driver_commander = self._get_driver_commander(keyspace=keyspace)
    lt_payload: dict[str, Any] = {"listTables": {}}
    logger.info("listTables")
    lt_response = driver_commander.request(
        payload=lt_payload,
        timeout_context=_TimeoutContext(
            request_ms=_table_admin_timeout_ms, label=_ta_label
        ),
    )
    if "tables" not in lt_response.get("status", {}):
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from listTables API command.",
            raw_response=lt_response,
        )
    else:
        logger.info("finished listTables")
        return lt_response["status"]["tables"]  # type: ignore[no-any-return]
def list_tables(self, *, keyspace: str | None = None, table_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> list[ListTableDescriptor]

List all tables in a given keyspace for this database.

Args

keyspace
the keyspace to be inspected. If not specified, the general setting for this database is assumed.
table_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for table_admin_timeout_ms.
timeout_ms
an alias for table_admin_timeout_ms.

Returns

a list of ListTableDescriptor instances, one for each table.

Example

>>> tables = my_database.list_tables()
>>> tables
[ListTableDescriptor(name='fighters', definition=ListTableDefinition(...
>>> tables[1].name
'games'
>>> tables[1].definition.columns
{'match_id': TableScalarColumnTypeDescriptor(ColumnType.TEXT),...
>>> tables[1].definition.columns['score']
TableScalarColumnTypeDescriptor(ColumnType.INT)
>>> tables[1].definition.primary_key.partition_by
['match_id']
>>> tables[1].definition.primary_key.partition_sort
{'round': 1}
Expand source code
def list_tables(
    self,
    *,
    keyspace: str | None = None,
    table_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> list[ListTableDescriptor]:
    """
    List all tables in a given keyspace for this database.

    Args:
        keyspace: the keyspace to be inspected. If not specified,
            the general setting for this database is assumed.
        table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `table_admin_timeout_ms`.
        timeout_ms: an alias for `table_admin_timeout_ms`.

    Returns:
        a list of ListTableDescriptor instances, one for each table.

    Example:
        >>> tables = my_database.list_tables()
        >>> tables
        [ListTableDescriptor(name='fighters', definition=ListTableDefinition(...
        >>> tables[1].name
        'games'
        >>> tables[1].definition.columns
        {'match_id': TableScalarColumnTypeDescriptor(ColumnType.TEXT),...
        >>> tables[1].definition.columns['score']
        TableScalarColumnTypeDescriptor(ColumnType.INT)
        >>> tables[1].definition.primary_key.partition_by
        ['match_id']
        >>> tables[1].definition.primary_key.partition_sort
        {'round': 1}
    """

    _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
        timeout_options=self.api_options.timeout_options,
        table_admin_timeout_ms=table_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    return self._list_tables_ctx(
        keyspace=keyspace,
        timeout_context=_TimeoutContext(
            request_ms=_table_admin_timeout_ms, label=_ta_label
        ),
    )
def name(self) ‑> str

The name of this database. Note that this bears no unicity guarantees.

Calling this method the first time involves a request to the DevOps API (the resulting database name is then cached). See the astrapy.info method for more details.

Example

>>> my_db.name()
'the_application_database'
Expand source code
def name(self) -> str:
    """
    The name of this database. Note that this bears no unicity guarantees.

    Calling this method the first time involves a request
    to the DevOps API (the resulting database name is then cached).
    See the `info()` method for more details.

    Example:
        >>> my_db.name()
        'the_application_database'
    """

    if self._name is None:
        self._name = self.info().name
    return self._name
def to_async(self, *, keyspace: str | None = None, token: str | TokenProvider | UnsetType = (unset), api_options: APIOptions | UnsetType = (unset)) ‑> AsyncDatabase

Create an AsyncDatabase from this one. Save for the arguments explicitly provided as overrides, everything else is kept identical to this database in the copy.

Args

keyspace
this is the keyspace all method calls will target, unless one is explicitly specified in the call. If no keyspace is supplied when creating a Database, the name "default_keyspace" is set.
token
an Access Token to the database. Example: "AstraCS:xyz…" This can be either a literal token string or a subclass of TokenProvider.
api_options
any additional options to set for the result, in the form of an APIOptions instance (where one can set just the needed attributes). In case the same setting is also provided as named parameter, the latter takes precedence.

Returns

the new copy, an AsyncDatabase instance.

Example

>>> async_database = my_db.to_async()
>>> asyncio.run(async_database.list_collection_names())
Expand source code
def to_async(
    self,
    *,
    keyspace: str | None = None,
    token: str | TokenProvider | UnsetType = _UNSET,
    api_options: APIOptions | UnsetType = _UNSET,
) -> AsyncDatabase:
    """
    Create an AsyncDatabase from this one. Save for the arguments
    explicitly provided as overrides, everything else is kept identical
    to this database in the copy.

    Args:
        keyspace: this is the keyspace all method calls will target, unless
            one is explicitly specified in the call. If no keyspace is supplied
            when creating a Database, the name "default_keyspace" is set.
        token: an Access Token to the database. Example: "AstraCS:xyz..."
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        api_options: any additional options to set for the result, in the form of
            an APIOptions instance (where one can set just the needed attributes).
            In case the same setting is also provided as named parameter,
            the latter takes precedence.

    Returns:
        the new copy, an `AsyncDatabase` instance.

    Example:
        >>> async_database = my_db.to_async()
        >>> asyncio.run(async_database.list_collection_names())
    """

    arg_api_options = APIOptions(
        token=token,
    )
    final_api_options = self.api_options.with_override(api_options).with_override(
        arg_api_options
    )
    return AsyncDatabase(
        api_endpoint=self.api_endpoint,
        keyspace=keyspace or self.keyspace,
        api_options=final_api_options,
    )
def use_keyspace(self, keyspace: str) ‑> None

Switch to a new working keyspace for this database. This method changes (mutates) the Database instance.

Note that this method does not create the keyspace, which should exist already (created for instance with a DatabaseAdmin.create_keyspace call).

Args

keyspace
the new keyspace to use as the database working keyspace.

Returns

None.

Example

>>> my_db.list_collection_names()
['coll_1', 'coll_2']
>>> my_db.use_keyspace("an_empty_keyspace")
>>> my_db.list_collection_names()
[]
Expand source code
def use_keyspace(self, keyspace: str) -> None:
    """
    Switch to a new working keyspace for this database.
    This method changes (mutates) the Database instance.

    Note that this method does not create the keyspace, which should exist
    already (created for instance with a `DatabaseAdmin.create_keyspace` call).

    Args:
        keyspace: the new keyspace to use as the database working keyspace.

    Returns:
        None.

    Example:
        >>> my_db.list_collection_names()
        ['coll_1', 'coll_2']
        >>> my_db.use_keyspace("an_empty_keyspace")
        >>> my_db.list_collection_names()
        []
    """
    logger.info(f"switching to keyspace '{keyspace}'")
    self._using_keyspace = keyspace
    self._api_commander = self._get_api_commander(keyspace=self.keyspace)
def with_options(self, *, keyspace: str | None = None, token: str | TokenProvider | UnsetType = (unset), api_options: APIOptions | UnsetType = (unset)) ‑> Database

Create a clone of this database with some changed attributes.

Args

keyspace
this is the keyspace all method calls will target, unless one is explicitly specified in the call. If no keyspace is supplied when creating a Database, the name "default_keyspace" is set.
token
an Access Token to the database. Example: "AstraCS:xyz...". This can be either a literal token string or a subclass of TokenProvider.
api_options
any additional options to set for the clone, in the form of an APIOptions instance (where one can set just the needed attributes). In case the same setting is also provided as named parameter, the latter takes precedence.

Returns

a new Database instance.

Example

>>> my_db_2 = my_db.with_options(
...     keyspace="the_other_keyspace",
...     token="AstraCS:xyz...",
... )
Expand source code
def with_options(
    self,
    *,
    keyspace: str | None = None,
    token: str | TokenProvider | UnsetType = _UNSET,
    api_options: APIOptions | UnsetType = _UNSET,
) -> Database:
    """
    Create a clone of this database with some changed attributes.

    Args:
        keyspace: this is the keyspace all method calls will target, unless
            one is explicitly specified in the call. If no keyspace is supplied
            when creating a Database, the name "default_keyspace" is set.
        token: an Access Token to the database. Example: `"AstraCS:xyz..."`.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        api_options: any additional options to set for the clone, in the form of
            an APIOptions instance (where one can set just the needed attributes).
            In case the same setting is also provided as named parameter,
            the latter takes precedence.

    Returns:
        a new `Database` instance.

    Example:
        >>> my_db_2 = my_db.with_options(
        ...     keyspace="the_other_keyspace",
        ...     token="AstraCS:xyz...",
        ... )
    """

    return self._copy(
        keyspace=keyspace,
        token=token,
        api_options=api_options,
    )
class Table (*, database: Database, name: str, keyspace: str | None, api_options: FullAPIOptions)

A Data API table, the object to interact with the Data API for structured data, especially for DDL operations. This class has a synchronous interface.

This class is not meant for direct instantiation by the user, rather it is obtained by invoking methods such as get_table of Database, wherefrom the Table inherits its API options such as authentication token and API endpoint. In order to create a table, instead, one should call the create_table method of Database, providing a table definition parameter that can be built in different ways (see the CreateTableDefinition object and examples below).

Args

database
a Database object, instantiated earlier. This represents the database the table belongs to.
name
the table name. This parameter should match an existing table on the database.
keyspace
this is the keyspace to which the table belongs. If nothing is specified, the database's working keyspace is used.
api_options
a complete specification of the API Options for this instance.

Examples

>>> from astrapy import DataAPIClient
>>> client = DataAPIClient()
>>> database = client.get_database(
...     "https://01234567-....apps.astra.datastax.com",
...     token="AstraCS:..."
... )
>>>
>>> # Create a table using the fluent syntax for definition
>>> from astrapy.constants import SortMode
>>> from astrapy.info import (
...     CreateTableDefinition,
...     ColumnType,
... )
>>> table_definition = (
...     CreateTableDefinition.builder()
...     .add_column("match_id", ColumnType.TEXT)
...     .add_column("round", ColumnType.INT)
...     .add_vector_column("m_vector", dimension=3)
...     .add_column("score", ColumnType.INT)
...     .add_column("when", ColumnType.TIMESTAMP)
...     .add_column("winner", ColumnType.TEXT)
...     .add_set_column("fighters", ColumnType.UUID)
...     .add_partition_by(["match_id"])
...     .add_partition_sort({"round": SortMode.ASCENDING})
...     .build()
... )
>>> my_table = database.create_table(
...     "games",
...     definition=table_definition,
... )
>>> # Create a table with the definition as object
>>> # (and do not raise an error if the table exists already)
>>> from astrapy.info import (
...     CreateTableDefinition,
...     TablePrimaryKeyDescriptor,
...     TableScalarColumnTypeDescriptor,
...     TableValuedColumnType,
...     TableValuedColumnTypeDescriptor,
...     TableVectorColumnTypeDescriptor,
... )
>>> table_definition_1 = CreateTableDefinition(
...     columns={
...         "match_id": TableScalarColumnTypeDescriptor(
...             ColumnType.TEXT,
...         ),
...         "round": TableScalarColumnTypeDescriptor(
...             ColumnType.INT,
...         ),
...         "m_vector": TableVectorColumnTypeDescriptor(
...             column_type="vector", dimension=3
...         ),
...         "score": TableScalarColumnTypeDescriptor(
...             ColumnType.INT,
...         ),
...         "when": TableScalarColumnTypeDescriptor(
...             ColumnType.TIMESTAMP,
...         ),
...         "winner": TableScalarColumnTypeDescriptor(
...             ColumnType.TEXT,
...         ),
...         "fighters": TableValuedColumnTypeDescriptor(
...             column_type=TableValuedColumnType.SET,
...             value_type=ColumnType.UUID,
...         ),
...     },
...     primary_key=TablePrimaryKeyDescriptor(
...         partition_by=["match_id"],
...         partition_sort={"round": SortMode.ASCENDING},
...     ),
... )
>>> my_table_1 = database.create_table(
...     "games",
...     definition=table_definition_1,
...     if_not_exists=True,
... )
>>> # Create a table with the definition as plain dictionary
>>> # (and do not raise an error if the table exists already)
>>> table_definition_2 = {
...     "columns": {
...         "match_id": {"type": "text"},
...         "round": {"type": "int"},
...         "m_vector": {"type": "vector", "dimension": 3},
...         "score": {"type": "int"},
...         "when": {"type": "timestamp"},
...         "winner": {"type": "text"},
...         "fighters": {"type": "set", "valueType": "uuid"},
...     },
...     "primaryKey": {
...         "partitionBy": ["match_id"],
...         "partitionSort": {"round": 1},
...     },
... }
>>> my_table_2 = database.create_table(
...     "games",
...     definition=table_definition_2,
...     if_not_exists=True,
... )
>>> # Get a reference to an existing table
>>> # (no checks are performed on DB)
>>> my_table_3 = database.get_table("games")

Note

creating an instance of Table does not trigger, in itself, actual creation of the table on the database. The latter should have been created beforehand, e.g. through the create_table method of a Database.

Expand source code
class Table(Generic[ROW]):
    """
    A Data API table, the object to interact with the Data API for structured data,
    especially for DDL operations. This class has a synchronous interface.

    This class is not meant for direct instantiation by the user, rather
    it is obtained by invoking methods such as `get_table` of Database,
    wherefrom the Table inherits its API options such as authentication
    token and API endpoint.
    In order to create a table, instead, one should call the `create_table`
    method of Database, providing a table definition parameter that can be built
    in different ways (see the `CreateTableDefinition` object and examples below).

    Args:
        database: a Database object, instantiated earlier. This represents
            the database the table belongs to.
        name: the table name. This parameter should match an existing
            table on the database.
        keyspace: this is the keyspace to which the table belongs.
            If nothing is specified, the database's working keyspace is used.
        api_options: a complete specification of the API Options for this instance.

    Examples:
        >>> from astrapy import DataAPIClient
        >>> client = DataAPIClient()
        >>> database = client.get_database(
        ...     "https://01234567-....apps.astra.datastax.com",
        ...     token="AstraCS:..."
        ... )
        >>>

        >>> # Create a table using the fluent syntax for definition
        >>> from astrapy.constants import SortMode
        >>> from astrapy.info import (
        ...     CreateTableDefinition,
        ...     ColumnType,
        ... )
        >>> table_definition = (
        ...     CreateTableDefinition.builder()
        ...     .add_column("match_id", ColumnType.TEXT)
        ...     .add_column("round", ColumnType.INT)
        ...     .add_vector_column("m_vector", dimension=3)
        ...     .add_column("score", ColumnType.INT)
        ...     .add_column("when", ColumnType.TIMESTAMP)
        ...     .add_column("winner", ColumnType.TEXT)
        ...     .add_set_column("fighters", ColumnType.UUID)
        ...     .add_partition_by(["match_id"])
        ...     .add_partition_sort({"round": SortMode.ASCENDING})
        ...     .build()
        ... )
        >>> my_table = database.create_table(
        ...     "games",
        ...     definition=table_definition,
        ... )

        >>> # Create a table with the definition as object
        >>> # (and do not raise an error if the table exists already)
        >>> from astrapy.info import (
        ...     CreateTableDefinition,
        ...     TablePrimaryKeyDescriptor,
        ...     TableScalarColumnTypeDescriptor,
        ...     TableValuedColumnType,
        ...     TableValuedColumnTypeDescriptor,
        ...     TableVectorColumnTypeDescriptor,
        ... )
        >>> table_definition_1 = CreateTableDefinition(
        ...     columns={
        ...         "match_id": TableScalarColumnTypeDescriptor(
        ...             ColumnType.TEXT,
        ...         ),
        ...         "round": TableScalarColumnTypeDescriptor(
        ...             ColumnType.INT,
        ...         ),
        ...         "m_vector": TableVectorColumnTypeDescriptor(
        ...             column_type="vector", dimension=3
        ...         ),
        ...         "score": TableScalarColumnTypeDescriptor(
        ...             ColumnType.INT,
        ...         ),
        ...         "when": TableScalarColumnTypeDescriptor(
        ...             ColumnType.TIMESTAMP,
        ...         ),
        ...         "winner": TableScalarColumnTypeDescriptor(
        ...             ColumnType.TEXT,
        ...         ),
        ...         "fighters": TableValuedColumnTypeDescriptor(
        ...             column_type=TableValuedColumnType.SET,
        ...             value_type=ColumnType.UUID,
        ...         ),
        ...     },
        ...     primary_key=TablePrimaryKeyDescriptor(
        ...         partition_by=["match_id"],
        ...         partition_sort={"round": SortMode.ASCENDING},
        ...     ),
        ... )
        >>> my_table_1 = database.create_table(
        ...     "games",
        ...     definition=table_definition_1,
        ...     if_not_exists=True,
        ... )

        >>> # Create a table with the definition as plain dictionary
        >>> # (and do not raise an error if the table exists already)
        >>> table_definition_2 = {
        ...     "columns": {
        ...         "match_id": {"type": "text"},
        ...         "round": {"type": "int"},
        ...         "m_vector": {"type": "vector", "dimension": 3},
        ...         "score": {"type": "int"},
        ...         "when": {"type": "timestamp"},
        ...         "winner": {"type": "text"},
        ...         "fighters": {"type": "set", "valueType": "uuid"},
        ...     },
        ...     "primaryKey": {
        ...         "partitionBy": ["match_id"],
        ...         "partitionSort": {"round": 1},
        ...     },
        ... }
        >>> my_table_2 = database.create_table(
        ...     "games",
        ...     definition=table_definition_2,
        ...     if_not_exists=True,
        ... )

        >>> # Get a reference to an existing table
        >>> # (no checks are performed on DB)
        >>> my_table_3 = database.get_table("games")

    Note:
        creating an instance of Table does not trigger, in itself, actual
        creation of the table on the database. The latter should have been created
        beforehand, e.g. through the `create_table` method of a Database.
    """

    def __init__(
        self,
        *,
        database: Database,
        name: str,
        keyspace: str | None,
        api_options: FullAPIOptions,
    ) -> None:
        self.api_options = api_options
        self._name = name
        _keyspace = keyspace if keyspace is not None else database.keyspace

        if _keyspace is None:
            raise ValueError("Attempted to create Table with 'keyspace' unset.")

        self._database = database._copy(
            keyspace=_keyspace, api_options=self.api_options
        )
        self._commander_headers = {
            **{DEFAULT_DATA_API_AUTH_HEADER: self.api_options.token.get_token()},
            **self.api_options.embedding_api_key.get_headers(),
            **self.api_options.database_additional_headers,
        }
        self._api_commander = self._get_api_commander()
        self._converter_agent: _TableConverterAgent[ROW] = _TableConverterAgent(
            options=self.api_options.serdes_options,
        )

    def __repr__(self) -> str:
        _db_desc = f'database.api_endpoint="{self.database.api_endpoint}"'
        return (
            f'{self.__class__.__name__}(name="{self.name}", '
            f'keyspace="{self.keyspace}", {_db_desc}, '
            f"api_options={self.api_options})"
        )

    def __eq__(self, other: Any) -> bool:
        if isinstance(other, Table):
            return all(
                [
                    self._name == other._name,
                    self._database == other._database,
                    self.api_options == other.api_options,
                ]
            )
        else:
            return False

    def _get_api_commander(self) -> APICommander:
        """Instantiate a new APICommander based on the properties of this class."""

        if self._database.keyspace is None:
            raise ValueError(
                "No keyspace specified. Table requires a keyspace to "
                "be set, e.g. through the `keyspace` constructor parameter."
            )

        base_path_components = [
            comp
            for comp in (
                ncomp.strip("/")
                for ncomp in (
                    self._database.api_options.data_api_url_options.api_path,
                    self._database.api_options.data_api_url_options.api_version,
                    self._database.keyspace,
                    self._name,
                )
                if ncomp is not None
            )
            if comp != ""
        ]
        base_path = f"/{'/'.join(base_path_components)}"
        api_commander = APICommander(
            api_endpoint=self._database.api_endpoint,
            path=base_path,
            headers=self._commander_headers,
            callers=self.api_options.callers,
            redacted_header_names=self.api_options.redacted_header_names,
            handle_decimals_writes=True,
            handle_decimals_reads=True,
        )
        return api_commander

    def _copy(
        self: Table[ROW],
        *,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> Table[ROW]:
        arg_api_options = APIOptions(
            embedding_api_key=embedding_api_key,
        )
        final_api_options = self.api_options.with_override(api_options).with_override(
            arg_api_options
        )
        return Table(
            database=self.database,
            name=self.name,
            keyspace=self.keyspace,
            api_options=final_api_options,
        )

    def with_options(
        self: Table[ROW],
        *,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> Table[ROW]:
        """
        Create a clone of this table with some changed attributes.

        Args:
            embedding_api_key: optional API key(s) for interacting with the table.
                If an embedding service is configured, and this parameter is not None,
                each Data API call will include the necessary embedding-related headers
                as specified by this parameter. If a string is passed, it translates
                into the one "embedding api key" header
                (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
                For some vectorize providers/models, if using header-based authentication,
                specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
                should be supplied.
            api_options: any additional options to set for the clone, in the form of
                an APIOptions instance (where one can set just the needed attributes).
                In case the same setting is also provided as named parameter,
                the latter takes precedence.

        Returns:
            a new Table instance.

        Example:
            >>> table_with_api_key_configured = my_table.with_options(
            ...     embedding_api_key="secret-key-0123abcd...",
            ... )
        """

        return self._copy(
            embedding_api_key=embedding_api_key,
            api_options=api_options,
        )

    def to_async(
        self: Table[ROW],
        *,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncTable[ROW]:
        """
        Create an AsyncTable from this one. Save for the arguments
        explicitly provided as overrides, everything else is kept identical
        to this table in the copy (the database is converted into
        an async object).

        Args:
            embedding_api_key: optional API key(s) for interacting with the table.
                If an embedding service is configured, and this parameter is not None,
                each Data API call will include the necessary embedding-related headers
                as specified by this parameter. If a string is passed, it translates
                into the one "embedding api key" header
                (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
                For some vectorize providers/models, if using header-based authentication,
                specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
                should be supplied.
            api_options: any additional options to set for the result, in the form of
                an APIOptions instance (where one can set just the needed attributes).
                In case the same setting is also provided as named parameter,
                the latter takes precedence.

        Returns:
            the new copy, an AsyncTable instance.

        Example:
            >>> asyncio.run(my_table.to_async().find_one(
            ...     {"match_id": "fight4"},
            ...     projection={"winner": True},
            ... ))
            {"pk": 1, "column": "value}
        """

        arg_api_options = APIOptions(
            embedding_api_key=embedding_api_key,
        )
        final_api_options = self.api_options.with_override(api_options).with_override(
            arg_api_options
        )
        return AsyncTable(
            database=self.database.to_async(),
            name=self.name,
            keyspace=self.keyspace,
            api_options=final_api_options,
        )

    def definition(
        self,
        *,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> ListTableDefinition:
        """
        Query the Data API and return a structure defining the table schema.
        If there are no unsupported colums in the table, the return value has
        the same contents as could have been provided to a `create_table` method call.

        Args:
            table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `table_admin_timeout_ms`.
            timeout_ms: an alias for `table_admin_timeout_ms`.

        Returns:
            A `ListTableDefinition` object, available for inspection.

        Example:
            >>> my_table.definition()
            ListTableDefinition(columns=[match_id,round,fighters, ...  # shortened
        """

        _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
            timeout_options=self.api_options.timeout_options,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info(f"getting tables in search of '{self.name}'")
        self_descriptors = [
            table_desc
            for table_desc in self.database._list_tables_ctx(
                keyspace=None,
                timeout_context=_TimeoutContext(
                    request_ms=_table_admin_timeout_ms,
                    label=_ta_label,
                ),
            )
            if table_desc.name == self.name
        ]
        logger.info(f"finished getting tables in search of '{self.name}'")
        if self_descriptors:
            return self_descriptors[0].definition
        else:
            raise ValueError(
                f"Table {self.keyspace}.{self.name} not found.",
            )

    def info(
        self,
        *,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> TableInfo:
        """
        Return information on the table. This should not be confused with the table
        definition (i.e. the schema).

        Args:
            database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying DevOps API request.
                If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `database_admin_timeout_ms`.
            timeout_ms: an alias for `database_admin_timeout_ms`.

        Returns:
            A TableInfo object for inspection.

        Example:
            >>> # Note: output reformatted for clarity.
            >>> my_table.info()
            TableInfo(
                database_info=AstraDBDatabaseInfo(id=..., name=..., ...),
                keyspace='default_keyspace',
                name='games',
                full_name='default_keyspace.games'
            )
        """

        return TableInfo(
            database_info=self.database.info(
                database_admin_timeout_ms=database_admin_timeout_ms,
                request_timeout_ms=request_timeout_ms,
                timeout_ms=timeout_ms,
            ),
            keyspace=self.keyspace,
            name=self.name,
            full_name=self.full_name,
        )

    @property
    def database(self) -> Database:
        """
        a Database object, the database this table belongs to.

        Example:
            >>> my_table.database.name
            'the_db'
        """

        return self._database

    @property
    def keyspace(self) -> str:
        """
        The keyspace this table is in.

        Example:
            >>> my_table.keyspace
            'default_keyspace'
        """

        _keyspace = self.database.keyspace
        if _keyspace is None:
            raise ValueError("The table's DB is set with keyspace=None")
        return _keyspace

    @property
    def name(self) -> str:
        """
        The name of this table.

        Example:
            >>> my_table.name
            'games'
        """

        return self._name

    @property
    def full_name(self) -> str:
        """
        The fully-qualified table name within the database,
        in the form "keyspace.table_name".

        Example:
            >>> my_table.full_name
            'default_keyspace.my_table'
        """

        return f"{self.keyspace}.{self.name}"

    def _create_generic_index(
        self,
        i_name: str,
        ci_definition: dict[str, Any],
        ci_command: str,
        if_not_exists: bool | None,
        table_admin_timeout_ms: int | None,
        request_timeout_ms: int | None,
        timeout_ms: int | None,
    ) -> None:
        ci_options: dict[str, bool]
        if if_not_exists is not None:
            ci_options = {"ifNotExists": if_not_exists}
        else:
            ci_options = {}
        _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
            timeout_options=self.api_options.timeout_options,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        ci_payload = {
            ci_command: {
                "name": i_name,
                "definition": ci_definition,
                "options": ci_options,
            }
        }
        logger.info(f"{ci_command}('{i_name}')")
        ci_response = self._api_commander.request(
            payload=ci_payload,
            timeout_context=_TimeoutContext(
                request_ms=_table_admin_timeout_ms, label=_ta_label
            ),
        )
        if ci_response.get("status") != {"ok": 1}:
            raise UnexpectedDataAPIResponseException(
                text=f"Faulty response from {ci_command} API command.",
                raw_response=ci_response,
            )
        logger.info(f"finished {ci_command}('{i_name}')")

    def create_index(
        self,
        name: str,
        *,
        column: str,
        options: TableIndexOptions | dict[str, Any] | None = None,
        if_not_exists: bool | None = None,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Create an index on a non-vector column of the table.

        This is a blocking operation: the method returns once the index
        is created and ready to use.

        For creation of a vector index, see method `create_vector_index` instead.

        Args:
            name: the name of the index. Index names must be unique across the keyspace.
            column: the table column on which the index is to be created.
            options: if passed, it must be an instance of `TableIndexOptions`,
                or an equivalent dictionary, which specifies index settings
                such as -- for a text column -- case-sensitivity and so on.
                See the `astrapy.info.TableIndexOptions` class for more details.
            if_not_exists: if set to True, the command will succeed even if an index
                with the specified name already exists (in which case no actual
                index creation takes place on the database). The API default of False
                means that an error is raised by the API in case of name collision.
            table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `table_admin_timeout_ms`.
            timeout_ms: an alias for `table_admin_timeout_ms`.

        Examples:
            >>> from astrapy.info import TableIndexOptions
            >>>
            >>> # create an index on a column
            >>> my_table.create_index(
            ...     "score_index",
            ...     column="score",
            ... )
            >>>
            >>> # create an index on a textual column, specifying indexing options
            >>> my_table.create_index(
            ...     "winner_index",
            ...     column="winner",
            ...     options=TableIndexOptions(
            ...         ascii=False,
            ...         normalize=True,
            ...         case_sensitive=False,
            ...     ),
            ... )
        """

        ci_definition: dict[str, Any] = TableIndexDefinition(
            column=column,
            options=TableIndexOptions.coerce(options or {}),
        ).as_dict()
        ci_command = "createIndex"
        return self._create_generic_index(
            i_name=name,
            ci_definition=ci_definition,
            ci_command=ci_command,
            if_not_exists=if_not_exists,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )

    def create_vector_index(
        self,
        name: str,
        *,
        column: str,
        options: TableVectorIndexOptions | dict[str, Any] | None = None,
        if_not_exists: bool | None = None,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Create a vector index on a vector column of the table, enabling vector
        similarity search operations on it.

        This is a blocking operation: the method returns once the index
        is created and ready to use.

        For creation of a non-vector index, see method `create_index` instead.

        Args:
            name: the name of the index. Index names must be unique across the keyspace.
            column: the table column, of type "vector" on which to create the index.
            options: an instance of `TableVectorIndexOptions`, or an equivalent
                dictionary, which specifies settings for the vector index,
                such as the metric to use or, if desired, a "source model" setting.
                If omitted, the Data API defaults will apply for the index.
                See the `astrapy.info.TableVectorIndexOptions` class for more details.
            if_not_exists: if set to True, the command will succeed even if an index
                with the specified name already exists (in which case no actual
                index creation takes place on the database). The API default of False
                means that an error is raised by the API in case of name collision.
            table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `table_admin_timeout_ms`.
            timeout_ms: an alias for `table_admin_timeout_ms`.

        Example:
            >>> from astrapy.constants import VectorMetric
            >>> from astrapy.info import TableVectorIndexOptions
            >>>
            >>> # create a vector index with dot-product similarity
            >>> my_table.create_vector_index(
            ...     "m_vector_index",
            ...     column="m_vector",
            ...     options=TableVectorIndexOptions(
            ...         metric=VectorMetric.DOT_PRODUCT,
            ...     ),
            ... )
            >>> # specify a source_model (since the previous statement
            >>> # succeeded, this will do nothing because of `if_not_exists`):
            >>> my_table.create_vector_index(
            ...     "m_vector_index",
            ...     column="m_vector",
            ...     options=TableVectorIndexOptions(
            ...         metric=VectorMetric.DOT_PRODUCT,
            ...         source_model="nv-qa-4",
            ...     ),
            ...     if_not_exists=True,
            ... )
            >>> # leave the settings to the Data API defaults of cosine
            >>> # similarity metric (since the previous statement
            >>> # succeeded, this will do nothing because of `if_not_exists`):
            >>> my_table.create_vector_index(
            ...     "m_vector_index",
            ...     column="m_vector",
            ...     if_not_exists=True,
            ... )
        """

        ci_definition: dict[str, Any] = TableVectorIndexDefinition(
            column=column,
            options=TableVectorIndexOptions.coerce(options),
        ).as_dict()
        ci_command = "createVectorIndex"
        return self._create_generic_index(
            i_name=name,
            ci_definition=ci_definition,
            ci_command=ci_command,
            if_not_exists=if_not_exists,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )

    def list_index_names(
        self,
        *,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> list[str]:
        """
        List the names of all indexes existing on this table.

        Args:
            table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `table_admin_timeout_ms`.
            timeout_ms: an alias for `table_admin_timeout_ms`.

        Returns:
            a list of the index names as strings, in no particular order.

        Example:
            >>> my_table.list_index_names()
            ['m_vector_index', 'winner_index', 'score_index']
        """

        _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
            timeout_options=self.api_options.timeout_options,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        li_payload: dict[str, Any] = {"listIndexes": {"options": {}}}
        logger.info("listIndexes")
        li_response = self._api_commander.request(
            payload=li_payload,
            timeout_context=_TimeoutContext(
                request_ms=_table_admin_timeout_ms, label=_ta_label
            ),
        )
        if "indexes" not in li_response.get("status", {}):
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from listIndexes API command.",
                raw_response=li_response,
            )
        else:
            logger.info("finished listIndexes")
            return li_response["status"]["indexes"]  # type: ignore[no-any-return]

    def list_indexes(
        self,
        *,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> list[TableIndexDescriptor]:
        """
        List the full definitions of all indexes existing on this table.

        Args:
            table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `table_admin_timeout_ms`.
            timeout_ms: an alias for `table_admin_timeout_ms`.

        Returns:
            a list of `astrapy.info.TableIndexDescriptor` objects in no particular
            order, each providing the details of an index present on the table.

        Example:
            >>> indexes = my_table.list_indexes()
            >>> indexes
            [TableIndexDescriptor(name='m_vector_index', definition=...)...]  # Note: shortened
            >>> indexes[1].definition.column
            'winner'
            >>> indexes[1].definition.options.case_sensitive
            False
        """

        _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
            timeout_options=self.api_options.timeout_options,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        li_payload: dict[str, Any] = {"listIndexes": {"options": {"explain": True}}}
        logger.info("listIndexes")
        li_response = self._api_commander.request(
            payload=li_payload,
            timeout_context=_TimeoutContext(
                request_ms=_table_admin_timeout_ms, label=_ta_label
            ),
        )
        if "indexes" not in li_response.get("status", {}):
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from listIndexes API command.",
                raw_response=li_response,
            )
        else:
            logger.info("finished listIndexes")
            return [
                TableIndexDescriptor.coerce(index_object)
                for index_object in li_response["status"]["indexes"]
            ]

    @overload
    def alter(
        self,
        operation: AlterTableOperation | dict[str, Any],
        *,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> Table[DefaultRowType]: ...

    @overload
    def alter(
        self,
        operation: AlterTableOperation | dict[str, Any],
        *,
        row_type: type[NEW_ROW],
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> Table[NEW_ROW]: ...

    def alter(
        self,
        operation: AlterTableOperation | dict[str, Any],
        *,
        row_type: type[Any] = DefaultRowType,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> Table[NEW_ROW]:
        """
        Executes one of the available alter-table operations on this table,
        such as adding/dropping columns.

        This is a blocking operation: the method returns once the index
        is created and ready to use.

        Args:
            operation: an instance of one of the `astrapy.info.AlterTable*` classes,
                representing which alter operation to perform and the details thereof.
                A regular dictionary can also be provided, but then it must have the
                alter operation name at its top level: {"add": {"columns": ...}}.
            row_type: this parameter acts a formal specifier for the type checker.
                If omitted, the resulting Table is implicitly a `Table[dict[str, Any]]`.
                If provided, it must match the type hint specified in the assignment.
                See the examples below.
            table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `table_admin_timeout_ms`.
            timeout_ms: an alias for `table_admin_timeout_ms`.

        Examples:
            >>> from astrapy.info import (
            ...     AlterTableAddColumns,
            ...     AlterTableAddVectorize,
            ...     AlterTableDropColumns,
            ...     AlterTableDropVectorize,
            ...     ColumnType,
            ...     TableScalarColumnTypeDescriptor,
            ...     VectorServiceOptions,
            ... )
            >>>
            >>> # Add a column
            >>> new_table_1 = my_table.alter(
            ...     AlterTableAddColumns(
            ...         columns={
            ...             "tie_break": TableScalarColumnTypeDescriptor(
            ...                 column_type=ColumnType.BOOLEAN,
            ...             ),
            ...         }
            ...     )
            ... )
            >>>
            >>> # Drop a column
            >>> new_table_2 = new_table_1.alter(AlterTableDropColumns(
            ...     columns=["tie_break"]
            ... ))
            >>>
            >>> # Add vectorize to a (vector) column
            >>> new_table_3 = new_table_2.alter(
            ...     AlterTableAddVectorize(
            ...         columns={
            ...             "m_vector": VectorServiceOptions(
            ...                 provider="openai",
            ...                 model_name="text-embedding-3-small",
            ...                 authentication={
            ...                     "providerKey": "ASTRA_KMS_API_KEY_NAME",
            ...                 },
            ...             ),
            ...         }
            ...     )
            ... )
            >>>
            >>> # Drop vectorize from a (vector) column
            >>> # (Also demonstrates type hint usage)
            >>> from typing import TypedDict
            >>> from astrapy import Table
            >>> from astrapy.data_types import (
            ...     DataAPISet,
            ...     DataAPITimestamp,
            ...     DataAPIVector,
            ... )
            >>> from astrapy.ids import UUID
            >>>
            >>> class MyMatch(TypedDict):
            ...     match_id: str
            ...     round: int
            ...     m_vector: DataAPIVector
            ...     score: int
            ...     when: DataAPITimestamp
            ...     winner: str
            ...     fighters: DataAPISet[UUID]
            ...
            >>> new_table_4: Table[MyMatch] = new_table_3.alter(
            ...     AlterTableDropVectorize(columns=["m_vector"]),
            ...     row_type=MyMatch,
            ... )
        """

        n_operation: AlterTableOperation
        if isinstance(operation, AlterTableOperation):
            n_operation = operation
        else:
            n_operation = AlterTableOperation.from_full_dict(operation)
        _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
            timeout_options=self.api_options.timeout_options,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        at_operation_name = n_operation._name
        at_payload = {
            "alterTable": {
                "operation": {
                    at_operation_name: n_operation.as_dict(),
                },
            },
        }
        logger.info(f"alterTable({at_operation_name})")
        at_response = self._api_commander.request(
            payload=at_payload,
            timeout_context=_TimeoutContext(
                request_ms=_table_admin_timeout_ms, label=_ta_label
            ),
        )
        if at_response.get("status") != {"ok": 1}:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from alterTable API command.",
                raw_response=at_response,
            )
        logger.info(f"finished alterTable({at_operation_name})")
        return Table(
            database=self.database,
            name=self.name,
            keyspace=self.keyspace,
            api_options=self.api_options,
        )

    def insert_one(
        self,
        row: ROW,
        *,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> TableInsertOneResult:
        """
        Insert a single row in the table,
        with implied overwrite in case of primary key collision.

        Inserting a row whose primary key correspond to an entry alredy stored
        in the table has the effect of an in-place update: the row is overwritten.
        However, if the row being inserted is partially provided, i.e. some columns
        are not specified, these are left unchanged on the database. To explicitly
        reset them, specify their value as appropriate to their data type,
        i.e. `None`, `{}` or analogous.

        Args:
            row: a dictionary expressing the row to insert. The primary key
                must be specified in full, while any other column may be omitted
                if desired (in which case it is left as is on DB).
                The values for the various columns supplied in the row must
                be of the right data type for the insertion to succeed.
                Non-primary-key columns can also be explicitly set to null.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a TableInsertOneResult object, whose attributes are the primary key
            of the inserted row both in the form of a dictionary and of a tuple.

        Examples:
            >>> # a full-row insert using astrapy's datatypes
            >>> from astrapy.data_types import (
            ...     DataAPISet,
            ...     DataAPITimestamp,
            ...     DataAPIVector,
            ... )
            >>> from astrapy.ids import UUID
            >>>
            >>> insert_result = my_table.insert_one(
            ...     {
            ...         "match_id": "mtch_0",
            ...         "round": 1,
            ...         "m_vector": DataAPIVector([0.4, -0.6, 0.2]),
            ...         "score": 18,
            ...         "when": DataAPITimestamp.from_string("2024-11-28T11:30:00Z"),
            ...         "winner": "Victor",
            ...         "fighters": DataAPISet([
            ...             UUID("0193539a-2770-8c09-a32a-111111111111"),
            ...         ]),
            ...     },
            ... )
            >>> insert_result.inserted_id
            {'match_id': 'mtch_0', 'round': 1}
            >>> insert_result.inserted_id_tuple
            ('mtch_0', 1)
            >>>
            >>> # a partial-row (which in this case overwrites some of the values)
            >>> my_table.insert_one(
            ...     {
            ...         "match_id": "mtch_0",
            ...         "round": 1,
            ...         "winner": "Victor Vector",
            ...         "fighters": DataAPISet([
            ...             UUID("0193539a-2770-8c09-a32a-111111111111"),
            ...             UUID("0193539a-2880-8875-9f07-222222222222"),
            ...         ]),
            ...     },
            ... )
            TableInsertOneResult(inserted_id={'match_id': 'mtch_0', 'round': 1} ...
            >>>
            >>> # another insertion demonstrating standard-library datatypes in values
            >>> import datetime
            >>>
            >>> my_table.insert_one(
            ...     {
            ...         "match_id": "mtch_0",
            ...         "round": 2,
            ...         "winner": "Angela",
            ...         "score": 25,
            ...         "when": datetime.datetime(
            ...             2024, 7, 13, 12, 55, 30, 889,
            ...             tzinfo=datetime.timezone.utc,
            ...         ),
            ...         "fighters": {
            ...             UUID("019353cb-8e01-8276-a190-333333333333"),
            ...         },
            ...         "m_vector": [0.4, -0.6, 0.2],
            ...     },
            ... )
            TableInsertOneResult(inserted_id={'match_id': 'mtch_0', 'round': 2}, ...
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        io_payload = self._converter_agent.preprocess_payload(
            {"insertOne": {"document": row}}
        )
        logger.info(f"insertOne on '{self.name}'")
        io_response = self._api_commander.request(
            payload=io_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished insertOne on '{self.name}'")
        if "insertedIds" in io_response.get("status", {}):
            if not io_response["status"]["insertedIds"]:
                raise UnexpectedDataAPIResponseException(
                    text="Response from insertOne API command has empty 'insertedIds'.",
                    raw_response=io_response,
                )
            if not io_response["status"]["primaryKeySchema"]:
                raise UnexpectedDataAPIResponseException(
                    text=(
                        "Response from insertOne API command has "
                        "empty 'primaryKeySchema'."
                    ),
                    raw_response=io_response,
                )
            inserted_id_list = io_response["status"]["insertedIds"][0]
            inserted_id_tuple, inserted_id = self._converter_agent.postprocess_key(
                inserted_id_list,
                primary_key_schema_dict=io_response["status"]["primaryKeySchema"],
            )
            return TableInsertOneResult(
                raw_results=[io_response],
                inserted_id=inserted_id,
                inserted_id_tuple=inserted_id_tuple,
            )
        else:
            raise UnexpectedDataAPIResponseException(
                text="Response from insertOne API command missing 'insertedIds'.",
                raw_response=io_response,
            )

    def _prepare_keys_from_status(
        self, status: dict[str, Any] | None, raise_on_missing: bool = False
    ) -> tuple[list[dict[str, Any]], list[tuple[Any, ...]]]:
        ids: list[dict[str, Any]]
        id_tuples: list[tuple[Any, ...]]
        if status is None:
            if raise_on_missing:
                raise UnexpectedDataAPIResponseException(
                    text="'status' not found in API response",
                    raw_response=None,
                )
            else:
                ids = []
                id_tuples = []
        else:
            if "primaryKeySchema" not in status:
                raise UnexpectedDataAPIResponseException(
                    text=(
                        "received a 'status' without 'primaryKeySchema' "
                        f"in API response (received: {status})"
                    ),
                    raw_response=None,
                )
            if "insertedIds" not in status:
                raise UnexpectedDataAPIResponseException(
                    text=(
                        "received a 'status' without 'insertedIds' "
                        f"in API response (received: {status})"
                    ),
                    raw_response=None,
                )
            primary_key_schema = status["primaryKeySchema"]
            id_tuples_and_ids = self._converter_agent.postprocess_keys(
                status["insertedIds"],
                primary_key_schema_dict=primary_key_schema,
            )
            id_tuples = [tpl for tpl, _ in id_tuples_and_ids]
            ids = [id for _, id in id_tuples_and_ids]
        return ids, id_tuples

    def insert_many(
        self,
        rows: Iterable[ROW],
        *,
        ordered: bool = False,
        chunk_size: int | None = None,
        concurrency: int | None = None,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> TableInsertManyResult:
        """
        Insert a number of rows into the table,
        with implied overwrite in case of primary key collision.

        Inserting rows whose primary key correspond to entries alredy stored
        in the table has the effect of an in-place update: the rows are overwritten.
        However, if the rows being inserted are partially provided, i.e. some columns
        are not specified, these are left unchanged on the database. To explicitly
        reset them, specify their value as appropriate to their data type,
        i.e. `None`, `{}` or analogous.

        Args:
            rows: an iterable of dictionaries, each expressing a row to insert.
                Each row must at least fully specify the primary key column values,
                while any other column may be omitted if desired (in which case
                it is left as is on DB).
                The values for the various columns supplied in each row must
                be of the right data type for the insertion to succeed.
                Non-primary-key columns can also be explicitly set to null.
            ordered: if False (default), the insertions can occur in arbitrary order
                and possibly concurrently. If True, they are processed sequentially.
                If there are no specific reasons against it, unordered insertions
                re to be preferred as they complete much faster.
            chunk_size: how many rows to include in each single API request.
                Exceeding the server maximum allowed value results in an error.
                Leave it unspecified (recommended) to use the system default.
            concurrency: maximum number of concurrent requests to the API at
                a given time. It cannot be more than one for ordered insertions.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                whole operation, which may consist of several API requests.
                If not provided, this object's defaults apply.
            request_timeout_ms: a timeout, in milliseconds, to impose on each
                individual HTTP request to the Data API to accomplish the operation.
                If not provided, this object's defaults apply.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a TableInsertManyResult object, whose attributes are the primary key
            of the inserted rows both in the form of dictionaries and of tuples.

        Examples:
            >>> # Insert complete and partial rows at once (concurrently)
            >>> from astrapy.data_types import (
            ...     DataAPISet,
            ...     DataAPITimestamp,
            ...     DataAPIVector,
            ... )
            >>> from astrapy.ids import UUID
            >>>
            >>> insert_result = my_table.insert_many(
            ...     [
            ...         {
            ...             "match_id": "fight4",
            ...             "round": 1,
            ...             "winner": "Victor",
            ...             "score": 18,
            ...             "when": DataAPITimestamp.from_string(
            ...                 "2024-11-28T11:30:00Z",
            ...             ),
            ...             "fighters": DataAPISet([
            ...                 UUID("0193539a-2770-8c09-a32a-111111111111"),
            ...                 UUID('019353e3-00b4-83f9-a127-222222222222'),
            ...             ]),
            ...             "m_vector": DataAPIVector([0.4, -0.6, 0.2]),
            ...         },
            ...         {"match_id": "fight5", "round": 1, "winner": "Adam"},
            ...         {"match_id": "fight5", "round": 2, "winner": "Betta"},
            ...         {"match_id": "fight5", "round": 3, "winner": "Caio"},
            ...         {
            ...             "match_id": "challenge6",
            ...             "round": 1,
            ...             "winner": "Donna",
            ...             "m_vector": [0.9, -0.1, -0.3],
            ...         },
            ...         {"match_id": "challenge6", "round": 2, "winner": "Erick"},
            ...         {"match_id": "challenge6", "round": 3, "winner": "Fiona"},
            ...         {"match_id": "tournamentA", "round": 1, "winner": "Gael"},
            ...         {"match_id": "tournamentA", "round": 2, "winner": "Hanna"},
            ...         {
            ...             "match_id": "tournamentA",
            ...             "round": 3,
            ...             "winner": "Ian",
            ...             "fighters": DataAPISet([
            ...                 UUID("0193539a-2770-8c09-a32a-111111111111"),
            ...             ]),
            ...         },
            ...         {"match_id": "fight7", "round": 1, "winner": "Joy"},
            ...         {"match_id": "fight7", "round": 2, "winner": "Kevin"},
            ...         {"match_id": "fight7", "round": 3, "winner": "Lauretta"},
            ...     ],
            ...     concurrency=10,
            ...     chunk_size=3,
            ... )
            >>> insert_result.inserted_ids
            [{'match_id': 'fight4', 'round': 1}, {'match_id': 'fight5', ...
            >>> insert_result.inserted_id_tuples
            [('fight4', 1), ('fight5', 1), ('fight5', 2), ('fight5', 3), ...
            >>>
            >>> # Ordered insertion
            >>> # (would stop on first failure; predictable end result on DB)
            >>> my_table.insert_many(
            ...     [
            ...         {"match_id": "fight5", "round": 1, "winner": "Adam0"},
            ...         {"match_id": "fight5", "round": 2, "winner": "Betta0"},
            ...         {"match_id": "fight5", "round": 3, "winner": "Caio0"},
            ...         {"match_id": "fight5", "round": 1, "winner": "Adam Zuul"},
            ...         {"match_id": "fight5", "round": 2, "winner": "Betta Vigo"},
            ...         {"match_id": "fight5", "round": 3, "winner": "Caio Gozer"},
            ...     ],
            ...     ordered=True,
            ... )
            TableInsertManyResult(inserted_ids=[{'match_id': 'fight5', 'round': 1}, ...

        Note:
            Unordered insertions are executed with some degree of concurrency,
            so it is usually better to prefer this mode unless the order in the
            row sequence is important.

        Note:
            If some of the rows are unsuitable for insertion, for instance
            have the wrong data type for a column or lack the primary key,
            the Data API validation check will fail for those specific requests
            that contain the faulty rows. Depending on concurrency and the value
            of the `ordered` parameter, a number of rows in general could have
            been successfully inserted.
            It is possible to capture such a scenario, and inspect which rows
            actually got inserted, by catching an error of type
            `astrapy.exceptions.TableInsertManyException`: its `partial_result`
            attribute is precisely a `TableInsertManyResult`, encoding details
            on the successful writes.
        """

        _general_method_timeout_ms, _gmt_label = _first_valid_timeout(
            (general_method_timeout_ms, "general_method_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (
                self.api_options.timeout_options.general_method_timeout_ms,
                "general_method_timeout_ms",
            ),
        )
        _request_timeout_ms, _rt_label = _first_valid_timeout(
            (request_timeout_ms, "request_timeout_ms"),
            (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
        )
        if concurrency is None:
            if ordered:
                _concurrency = 1
            else:
                _concurrency = DEFAULT_INSERT_MANY_CONCURRENCY
        else:
            _concurrency = concurrency
        if _concurrency > 1 and ordered:
            raise ValueError("Cannot run ordered insert_many concurrently.")
        if chunk_size is None:
            _chunk_size = DEFAULT_INSERT_MANY_CHUNK_SIZE
        else:
            _chunk_size = chunk_size
        _rows = list(rows)
        logger.info(f"inserting {len(_rows)} rows in '{self.name}'")
        raw_results: list[dict[str, Any]] = []
        timeout_manager = MultiCallTimeoutManager(
            overall_timeout_ms=_general_method_timeout_ms,
            timeout_label=_gmt_label,
        )
        if ordered:
            options = {"ordered": True}
            inserted_ids: list[Any] = []
            inserted_id_tuples: list[Any] = []
            for i in range(0, len(_rows), _chunk_size):
                im_payload = self._converter_agent.preprocess_payload(
                    {
                        "insertMany": {
                            "documents": _rows[i : i + _chunk_size],
                            "options": options,
                        },
                    },
                )
                logger.info(f"insertMany on '{self.name}'")
                chunk_response = self._api_commander.request(
                    payload=im_payload,
                    raise_api_errors=False,
                    timeout_context=timeout_manager.remaining_timeout(
                        cap_time_ms=_request_timeout_ms,
                        cap_timeout_label=_rt_label,
                    ),
                )
                logger.info(f"finished insertMany on '{self.name}'")
                # accumulate the results in this call
                chunk_inserted_ids, chunk_inserted_ids_tuples = (
                    self._prepare_keys_from_status(chunk_response.get("status"))
                )
                inserted_ids += chunk_inserted_ids
                inserted_id_tuples += chunk_inserted_ids_tuples
                raw_results += [chunk_response]
                # if errors, quit early
                if chunk_response.get("errors", []):
                    partial_result = TableInsertManyResult(
                        raw_results=raw_results,
                        inserted_ids=inserted_ids,
                        inserted_id_tuples=inserted_id_tuples,
                    )
                    raise TableInsertManyException.from_response(
                        command=None,
                        raw_response=chunk_response,
                        partial_result=partial_result,
                    )

            # return
            full_result = TableInsertManyResult(
                raw_results=raw_results,
                inserted_ids=inserted_ids,
                inserted_id_tuples=inserted_id_tuples,
            )
            logger.info(f"finished inserting {len(_rows)} rows in '{self.name}'")
            return full_result

        else:
            # unordered: concurrent or not, do all of them and parse the results
            options = {"ordered": False}
            if _concurrency > 1:
                with ThreadPoolExecutor(max_workers=_concurrency) as executor:

                    def _chunk_insertor(
                        row_chunk: list[dict[str, Any]],
                    ) -> dict[str, Any]:
                        im_payload = self._converter_agent.preprocess_payload(
                            {
                                "insertMany": {
                                    "documents": row_chunk,
                                    "options": options,
                                },
                            },
                        )
                        logger.info(f"insertMany(chunk) on '{self.name}'")
                        im_response = self._api_commander.request(
                            payload=im_payload,
                            raise_api_errors=False,
                            timeout_context=timeout_manager.remaining_timeout(
                                cap_time_ms=_request_timeout_ms,
                                cap_timeout_label=_rt_label,
                            ),
                        )
                        logger.info(f"finished insertMany(chunk) on '{self.name}'")
                        return im_response

                    raw_results = list(
                        executor.map(
                            _chunk_insertor,
                            (
                                _rows[i : i + _chunk_size]
                                for i in range(0, len(_rows), _chunk_size)
                            ),
                        )
                    )
            else:
                for i in range(0, len(_rows), _chunk_size):
                    im_payload = self._converter_agent.preprocess_payload(
                        {
                            "insertMany": {
                                "documents": _rows[i : i + _chunk_size],
                                "options": options,
                            },
                        },
                    )
                    logger.info(f"insertMany(chunk) on '{self.name}'")
                    im_response = self._api_commander.request(
                        payload=im_payload,
                        raise_api_errors=False,
                        timeout_context=timeout_manager.remaining_timeout(
                            cap_time_ms=_request_timeout_ms,
                            cap_timeout_label=_rt_label,
                        ),
                    )
                    logger.info(f"finished insertMany(chunk) on '{self.name}'")
                    raw_results.append(im_response)
            # recast raw_results. Each response has its schema: unfold appropriately
            ids_and_tuples_per_chunk = [
                self._prepare_keys_from_status(chunk_response.get("status"))
                for chunk_response in raw_results
            ]
            inserted_ids = [
                inserted_id
                for chunk_ids, _ in ids_and_tuples_per_chunk
                for inserted_id in chunk_ids
            ]
            inserted_id_tuples = [
                inserted_id_tuple
                for _, chunk_id_tuples in ids_and_tuples_per_chunk
                for inserted_id_tuple in chunk_id_tuples
            ]
            # check-raise
            if any(
                [chunk_response.get("errors", []) for chunk_response in raw_results]
            ):
                partial_result = TableInsertManyResult(
                    raw_results=raw_results,
                    inserted_ids=inserted_ids,
                    inserted_id_tuples=inserted_id_tuples,
                )
                raise TableInsertManyException.from_responses(
                    commands=[None for _ in raw_results],
                    raw_responses=raw_results,
                    partial_result=partial_result,
                )

            # return
            full_result = TableInsertManyResult(
                raw_results=raw_results,
                inserted_ids=inserted_ids,
                inserted_id_tuples=inserted_id_tuples,
            )
            logger.info(f"finished inserting {len(_rows)} rows in '{self.name}'")
            return full_result

    @overload
    def find(
        self,
        filter: FilterType | None = None,
        *,
        projection: ProjectionType | None = None,
        row_type: None = None,
        skip: int | None = None,
        limit: int | None = None,
        include_similarity: bool | None = None,
        include_sort_vector: bool | None = None,
        sort: SortType | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> TableFindCursor[ROW, ROW]: ...

    @overload
    def find(
        self,
        filter: FilterType | None = None,
        *,
        projection: ProjectionType | None = None,
        row_type: type[ROW2],
        skip: int | None = None,
        limit: int | None = None,
        include_similarity: bool | None = None,
        include_sort_vector: bool | None = None,
        sort: SortType | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> TableFindCursor[ROW, ROW2]: ...

    def find(
        self,
        filter: FilterType | None = None,
        *,
        projection: ProjectionType | None = None,
        row_type: type[ROW2] | None = None,
        skip: int | None = None,
        limit: int | None = None,
        include_similarity: bool | None = None,
        include_sort_vector: bool | None = None,
        sort: SortType | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> TableFindCursor[ROW, ROW2]:
        """
        Find rows on the table matching the provided filters
        and according to sorting criteria including vector similarity.

        The returned TableFindCursor object, representing the stream of results,
        can be iterated over, or consumed and manipulated in several other ways
        (see the examples below and the `TableFindCursor` documentation for details).
        Since the amount of returned items can be large, TableFindCursor is a lazy
        object, that fetches new data while it is being read using the Data API
        pagination mechanism.

        Invoking `.to_list()` on a TableFindCursor will cause it to consume all
        rows and materialize the entire result set as a list. This is not recommended
        if the amount of results is very large.

        Args:
            filter: a dictionary expressing which condition the returned rows
                must satisfy. The filter can use operators, such as "$eq" for equality,
                and require columns to compare with literal values. Simple examples
                are `{}` (zero filter, not recommended for large tables),
                `{"match_no": 123}` (a shorthand for `{"match_no": {"$eq": 123}}`,
                or `{"match_no": 123, "round": "C"}` (multiple conditions are
                implicitly combined with "$and").
                Please consult the Data API documentation for a more detailed
                explanation of table search filters and tips on their usage.
            projection: a prescription on which columns to return for the matching rows.
                The projection can take the form `{"column1": True, "column2": True}`.
                `{"*": True}` (i.e. return the whole row), or the complementary
                form that excludes columns: `{"column1": False, "column2": False}`.
                To optimize bandwidth usage, it is recommended to use a projection,
                especially to avoid unnecessary columns of type vector with
                high-dimensional embeddings.
            row_type: this parameter acts a formal specifier for the type checker.
                If omitted, the resulting cursor is implicitly a
                `TableFindCursor[ROW, ROW]`, i.e. maintains the same type for
                the items it returns as that for the rows in the table. Strictly
                typed code may want to specify this parameter especially when a
                projection is given.
            skip: if provided, it is a number of rows that would be obtained first
                in the response and are instead skipped.
            limit: a maximum amount of rows to get from the table. The returned cursor
                will stop yielding rows when either this number is reached or there
                really are no more matches in the table.
            include_similarity: a boolean to request the numeric value of the
                similarity to be returned as an added "$similarity" key in each returned
                row. It can be used meaningfully only in a vector search (see `sort`).
            include_sort_vector: a boolean to request the search query vector.
                If set to True (and if the search is a vector search), calling
                the `get_sort_vector` method on the returned cursor will yield
                the vector used for the ANN search.
            sort: this dictionary parameter controls the order in which the rows
                are returned. The sort parameter can express either a vector search or
                a regular (ascending/descending, even hierarchical) sorting.
                * For a vector search the parameter takes the form
                `{"vector_column": qv}`, with the query vector `qv` of the appropriate
                type (list of floats or DataAPIVector). If the table has automatic
                embedding generation ("vectorize") enabled on that column, the form
                `{"vectorize_enabled_column": "query text"}` is also valid.
                * In the case of non-vector sorting, the parameter specifies the
                column(s) and the ascending/descending ordering required.
                If multiple columns are provided, the sorting applies them
                hierarchically to the rows. Examples are `{"score": SortMode.ASCENDING}`
                (equivalently `{"score": +1}`), `{"score": +1, "when": -1}`.
                Note that, depending on the column(s) chosen for sorting, the table
                partitioning structure, and the presence of indexes, the sorting
                may be done in-memory by the API. In that case, there may be performance
                implications and limitations on the amount of items returned.
                Consult the Data API documentation for more details on this topic.
            request_timeout_ms: a timeout, in milliseconds, to impose on each
                individual HTTP request to the Data API to accomplish the operation.
                If not provided, this object's defaults apply.
            timeout_ms: an alias for `request_timeout_ms`.

        Returns:
            a TableFindCursor object, that can be iterated over (and manipulated
            in several ways), that if needed handles pagination under the hood
            as the rows are consumed.

        Note:
            As the rows are retrieved in chunks progressively, while the cursor
            is being iterated over, it is possible that the actual results
            obtained will reflect changes occurring to the table contents in
            real time.

        Examples:
            >>> # Iterate over results:
            >>> for row in my_table.find({"match_id": "challenge6"}):
            ...     print(f"(R:{row['round']}): winner {row['winner']}")
            ...
            (R:1): winner Donna
            (R:2): winner Erick
            (R:3): winner Fiona
            >>> # Optimize bandwidth using a projection:
            >>> proj = {"round": True, "winner": True}
            >>> for row in my_table.find({"match_id": "challenge6"}, projection=proj):
            ...     print(f"(R:{row['round']}): winner {row['winner']}")
            ...
            (R:1): winner Donna
            (R:2): winner Erick
            (R:3): winner Fiona
            >>> # Filter on the partitioning:
            >>> my_table.find({"match_id": "challenge6"}).to_list()
            [{'match_id': 'challenge6', 'round': 1, 'fighters': DataAPISet([]), ...
            >>>
            >>> # Filter on primary key:
            >>> my_table.find({"match_id": "challenge6", "round": 1}).to_list()
            [{'match_id': 'challenge6', 'round': 1, 'fighters': DataAPISet([]), ...
            >>>
            >>> # Filter on a regular indexed column:
            >>> my_table.find({"winner": "Caio Gozer"}).to_list()
            [{'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
            >>>
            >>> # Non-equality filter on a regular indexed column:
            >>> my_table.find({"score": {"$gte": 15}}).to_list()
            [{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
            >>>
            >>> # Filter on a regular non-indexed column:
            >>> # (not recommended performance-wise)
            >>> my_table.find(
            ...     {"when": {
            ...         "$gte": DataAPITimestamp.from_string("1999-12-31T01:23:44Z")
            ...     }}
            ... ).to_list()
            The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
            [{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
            >>>
            >>> # Empty filter (not recommended performance-wise):
            >>> my_table.find({}).to_list()
            The Data API returned a warning: {'errorCode': 'ZERO_FILTER_OPERATIONS', ...
            [{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
            >>>
            >>> # Filter on the primary key and a regular non-indexed column:
            >>> # (not recommended performance-wise)
            >>> my_table.find(
            ...     {"match_id": "fight5", "round": 3, "winner": "Caio Gozer"}
            ... ).to_list()
            The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
            [{'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
            >>>
            >>> # Filter on a regular non-indexed column (and incomplete primary key)
            >>> # (not recommended performance-wise)
            >>> my_table.find({"round": 3, "winner": "Caio Gozer"}).to_list()
            The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
            [{'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
            >>>
            >>> # Vector search with "sort" (on an appropriately-indexed vector column):
            >>> my_table.find(
            ...     {},
            ...     sort={"m_vector": DataAPIVector([0.2, 0.3, 0.4])},
            ...     projection={"winner": True},
            ...     limit=3,
            ... ).to_list()
            [{'winner': 'Donna'}, {'winner': 'Victor'}]
            >>>
            >>> # Hybrid search with vector sort and non-vector filtering:
            >>> my_table.find(
            ...     {"match_id": "fight4"},
            ...     sort={"m_vector": DataAPIVector([0.2, 0.3, 0.4])},
            ...     projection={"winner": True},
            ...     limit=3,
            ... ).to_list()
            [{'winner': 'Victor'}]
            >>>
            >>> # Return the numeric value of the vector similarity
            >>> # (also demonstrating that one can pass a plain list for a vector):
            >>> my_table.find(
            ...     {},
            ...     sort={"m_vector": [0.2, 0.3, 0.4]},
            ...     projection={"winner": True},
            ...     limit=3,
            ...     include_similarity=True,
            ... ).to_list()
            [{'winner': 'Donna', '$similarity': 0.515}, {'winner': 'Victor', ...
            >>>
            >>> # Non-vector sorting on a 'partitionSort' column:
            >>> my_table.find(
            ...     {"match_id": "fight5"},
            ...     sort={"round": SortMode.DESCENDING},
            ...     projection={"winner": True},
            ... ).to_list()
            [{'winner': 'Caio Gozer'}, {'winner': 'Betta Vigo'}, ...
            >>>
            >>> # Using `skip` and `limit`:
            >>> my_table.find(
            ...     {"match_id": "fight5"},
            ...     sort={"round": SortMode.DESCENDING},
            ...     projection={"winner": True},
            ...     skip=1,
            ...     limit=2,
            ... ).to_list()
            The Data API returned a warning: {'errorCode': 'IN_MEMORY_SORTING...
            [{'winner': 'Betta Vigo'}, {'winner': 'Adam Zuul'}]
            >>>
            >>> # Non-vector sorting on a regular column:
            >>> # (not recommended performance-wise)
            >>> my_table.find(
            ...     {"match_id": "fight5"},
            ...     sort={"winner": SortMode.ASCENDING},
            ...     projection={"winner": True},
            ... ).to_list()
            The Data API returned a warning: {'errorCode': 'IN_MEMORY_SORTING...
            [{'winner': 'Adam Zuul'}, {'winner': 'Betta Vigo'}, ...
            >>>
            >>> # Using `.map()` on a cursor:
            >>> winner_cursor = my_table.find(
            ...     {"match_id": "fight5"},
            ...     sort={"round": SortMode.DESCENDING},
            ...     projection={"winner": True},
            ...     limit=5,
            ... )
            >>> print("/".join(winner_cursor.map(lambda row: row["winner"].upper())))
            CAIO GOZER/BETTA VIGO/ADAM ZUUL
            >>>
            >>> # Some other examples of cursor manipulation
            >>> matches_cursor = my_table.find(
            ...     sort={"m_vector": DataAPIVector([-0.1, 0.15, 0.3])}
            ... )
            >>> matches_cursor.has_next()
            True
            >>> next(matches_cursor)
            {'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
            >>> matches_cursor.consumed
            1
            >>> matches_cursor.rewind()
            >>> matches_cursor.consumed
            0
            >>> matches_cursor.has_next()
            True
            >>> matches_cursor.close()
            >>> try:
            ...     next(matches_cursor)
            ... except StopIteration:
            ...     print("StopIteration triggered.")
            ...
            StopIteration triggered.
        """

        # lazy-import here to avoid circular import issues
        from astrapy.cursors import TableFindCursor

        _request_timeout_ms, _rt_label = _first_valid_timeout(
            (request_timeout_ms, "request_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
        )
        return (
            TableFindCursor(
                table=self,
                request_timeout_ms=_request_timeout_ms,
                overall_timeout_ms=None,
                request_timeout_label=_rt_label,
            )
            .filter(filter)
            .project(projection)
            .skip(skip)
            .limit(limit)
            .sort(sort)
            .include_similarity(include_similarity)
            .include_sort_vector(include_sort_vector)
        )

    def find_one(
        self,
        filter: FilterType | None = None,
        *,
        projection: ProjectionType | None = None,
        include_similarity: bool | None = None,
        sort: SortType | None = None,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> ROW | None:
        """
        Run a search according to the given filtering and sorting criteria
        and return the top row matching it, or nothing if there are none.

        The parameters are analogous to some of the parameters to the `find` method
        (which has a few more that do not make sense in this case, such as `limit`).

        Args:
            filter: a dictionary expressing which condition the returned row
                must satisfy. The filter can use operators, such as "$eq" for equality,
                and require columns to compare with literal values. Simple examples
                are `{}` (zero filter), `{"match_no": 123}` (a shorthand for
                `{"match_no": {"$eq": 123}}`, or `{"match_no": 123, "round": "C"}`
                (multiple conditions are implicitly combined with "$and").
                Please consult the Data API documentation for a more detailed
                explanation of table search filters and tips on their usage.
            projection: a prescription on which columns to return for the matching row.
                The projection can take the form `{"column1": True, "column2": True}`.
                `{"*": True}` (i.e. return the whole row), or the complementary
                form that excludes columns: `{"column1": False, "column2": False}`.
                To optimize bandwidth usage, it is recommended to use a projection,
                especially to avoid unnecessary columns of type vector with
                high-dimensional embeddings.
            include_similarity: a boolean to request the numeric value of the
                similarity to be returned as an added "$similarity" key in the returned
                row. It can be used meaningfully only in a vector search (see `sort`).
            sort: this dictionary parameter controls the sorting order, hence determines
                which row is being returned.
                The sort parameter can express either a vector search or
                a regular (ascending/descending, even hierarchical) sorting.
                * For a vector search the parameter takes the form
                `{"vector_column": qv}`, with the query vector `qv` of the appropriate
                type (list of floats or DataAPIVector). If the table has automatic
                embedding generation ("vectorize") enabled on that column, the form
                `{"vectorize_enabled_column": "query text"}` is also valid.
                * In the case of non-vector sorting, the parameter specifies the
                column(s) and the ascending/descending ordering required.
                If multiple columns are provided, the sorting applies them
                hierarchically to the rows. Examples are `{"score": SortMode.ASCENDING}`
                (equivalently `{"score": +1}`), `{"score": +1, "when": -1}`.
                Note that, depending on the column(s) chosen for sorting, the table
                partitioning structure, and the presence of indexes, the sorting
                may be done in-memory by the API. In that case, there may be performance
                implications.
                Consult the Data API documentation for more details on this topic.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a dictionary expressing the result if a row is found, otherwise None.

        Examples:
            >>> from astrapy.constants import SortMode
            >>> from astrapy.data_types import DataAPITimestamp, DataAPIVector
            >>>
            >>> # Filter on the partitioning:
            >>> my_table.find_one({"match_id": "challenge6"})
            {'match_id': 'challenge6', 'round': 1, 'fighters': DataAPISet([]), ...
            >>>
            >>> # A find with no matches:
            >>> str(my_table.find_one({"match_id": "not_real"}))
            'None'
            >>>
            >>> # Optimize bandwidth using a projection:
            >>> my_table.find_one(
            ...     {"match_id": "challenge6"},
            ...     projection={"round": True, "winner": True},
            ... )
            {'round': 1, 'winner': 'Donna'}
            >>>
            >>> # Filter on primary key:
            >>> my_table.find_one({"match_id": "challenge6", "round": 1})
            {'match_id': 'challenge6', 'round': 1, 'fighters': DataAPISet([]), ...
            >>>
            >>> # Filter on a regular indexed column:
            >>> my_table.find_one({"winner": "Caio Gozer"})
            {'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
            >>>
            >>> # Non-equality filter on a regular indexed column:
            >>> my_table.find_one({"score": {"$gte": 15}})
            {'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
            >>>
            >>> # Filter on a regular non-indexed column:
            >>> # (not recommended performance-wise)
            >>> my_table.find_one(
            ...     {"when": {
            ...         "$gte": DataAPITimestamp.from_string("1999-12-31T01:23:44Z")
            ...     }}
            ... )
            The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
            {'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
            >>>
            >>> # Empty filter:
            >>> my_table.find_one({})
            The Data API returned a warning: {'errorCode': 'ZERO_FILTER_OPERATIONS', ...
            {'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
            >>>
            >>> # Filter on the primary key and a regular non-indexed column:
            >>> # (not recommended performance-wise)
            >>> my_table.find_one(
            ...     {"match_id": "fight5", "round": 3, "winner": "Caio Gozer"}
            ... )
            The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
            {'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
            >>>
            >>> # Filter on a regular non-indexed column (and incomplete primary key)
            >>> # (not recommended performance-wise)
            >>> my_table.find_one({"round": 3, "winner": "Caio Gozer"})
            The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
            {'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
            >>>
            >>> # Vector search with "sort" (on an appropriately-indexed vector column):
            >>> my_table.find_one(
            ...     {},
            ...     sort={"m_vector": DataAPIVector([0.2, 0.3, 0.4])},
            ...     projection={"winner": True},
            ... )
            {'winner': 'Donna'}
            >>>
            >>> # Hybrid search with vector sort and non-vector filtering:
            >>> my_table.find_one(
            ...     {"match_id": "fight4"},
            ...     sort={"m_vector": DataAPIVector([0.2, 0.3, 0.4])},
            ...     projection={"winner": True},
            ... )
            {'winner': 'Victor'}
            >>>
            >>> # Return the numeric value of the vector similarity
            >>> # (also demonstrating that one can pass a plain list for a vector):
            >>> my_table.find_one(
            ...     {},
            ...     sort={"m_vector": [0.2, 0.3, 0.4]},
            ...     projection={"winner": True},
            ...     include_similarity=True,
            ... )
            {'winner': 'Donna', '$similarity': 0.515}
            >>>
            >>> # Non-vector sorting on a 'partitionSort' column:
            >>> my_table.find_one(
            ...     {"match_id": "fight5"},
            ...     sort={"round": SortMode.DESCENDING},
            ...     projection={"winner": True},
            ... )
            {'winner': 'Caio Gozer'}
            >>>
            >>> # Non-vector sorting on a regular column:
            >>> # (not recommended performance-wise)
            >>> my_table.find_one(
            ...     {"match_id": "fight5"},
            ...     sort={"winner": SortMode.ASCENDING},
            ...     projection={"winner": True},
            ... )
            The Data API returned a warning: {'errorCode': 'IN_MEMORY_SORTING...
            {'winner': 'Adam Zuul'}
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        fo_options = (
            None
            if include_similarity is None
            else {"includeSimilarity": include_similarity}
        )
        fo_payload = self._converter_agent.preprocess_payload(
            {
                "findOne": {
                    k: v
                    for k, v in {
                        "filter": filter,
                        "projection": normalize_optional_projection(projection),
                        "options": fo_options,
                        "sort": sort,
                    }.items()
                    if v is not None
                }
            }
        )
        fo_response = self._api_commander.request(
            payload=fo_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        if "document" not in (fo_response.get("data") or {}):
            raise UnexpectedDataAPIResponseException(
                text="Response from findOne API command missing 'document'.",
                raw_response=fo_response,
            )
        if "projectionSchema" not in (fo_response.get("status") or {}):
            raise UnexpectedDataAPIResponseException(
                text="Response from findOne API command missing 'projectionSchema'.",
                raw_response=fo_response,
            )
        doc_response = fo_response["data"]["document"]
        if doc_response is None:
            return None
        return self._converter_agent.postprocess_row(
            fo_response["data"]["document"],
            columns_dict=fo_response["status"]["projectionSchema"],
            similarity_pseudocolumn="$similarity" if include_similarity else None,
        )

    def distinct(
        self,
        key: str,
        *,
        filter: FilterType | None = None,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> list[Any]:
        """
        Return a list of the unique values of `key` across the rows
        in the table that match the provided filter.

        Args:
            key: the name of the field whose value is inspected across rows.
                Keys are typically just column names, although they can use
                the dot notation to select particular entries in map columns.
                For set and list columns, individual entries are "unrolled"
                automatically; in particular, for lists, numeric indices
                can be used in the key dot-notation syntax.
                Example of acceptable `key` values:
                    "a_column"
                    "map_column.map_key"
                    "list_column.2"
            filter: a dictionary expressing which condition the inspected rows
                must satisfy. The filter can use operators, such as "$eq" for equality,
                and require columns to compare with literal values. Simple examples
                are `{}` (zero filter), `{"match_no": 123}` (a shorthand for
                `{"match_no": {"$eq": 123}}`, or `{"match_no": 123, "round": "C"}`
                (multiple conditions are implicitly combined with "$and").
                Please consult the Data API documentation for a more detailed
                explanation of table search filters and tips on their usage.
            general_method_timeout_ms: a timeout, in milliseconds, for the whole
                requested operation (which may involve multiple API requests).
                This method, being based on `find` (see) may entail successive HTTP API
                requests, depending on the amount of involved rows.
                If not provided, this object's defaults apply.
            request_timeout_ms: a timeout, in milliseconds, for each API request.
                If not provided, this object's defaults apply.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a list of all different values for `key` found across the rows
            that match the filter. The result list has no repeated items.

        Examples:
            >>> my_table.distinct("winner", filter={"match_id": "challenge6"})
            ['Donna', 'Erick', 'Fiona']
            >>>
            >>> # distinct values across the whole table:
            >>> # (not recommended performance-wise)
            >>> my_table.distinct("winner")
            The Data API returned a warning: {'errorCode': 'ZERO_FILTER_OPERATIONS', ...
            ['Victor', 'Adam Zuul', 'Betta Vigo', 'Caio Gozer', 'Donna', 'Erick', ...
            >>>
            >>> # Over a column containing null values
            >>> # (also with composite filter):
            >>> my_table.distinct(
            ...     "score",
            ...     filter={"match_id": {"$in": ["fight4", "tournamentA"]}},
            ... )
            [18, None]
            >>>
            >>> # distinct over a set column (automatically "unrolled"):
            >>> my_table.distinct(
            ...     "fighters",
            ...     filter={"match_id": {"$in": ["fight4", "tournamentA"]}},
            ... )
            [UUID('0193539a-2770-8c09-a32a-111111111111'), UUID('019353e3-00b4-...

        Note:
            It must be kept in mind that `distinct` is a client-side operation,
            which effectively browses all required rows using the logic
            of the `find` method and collects the unique values found for `key`.
            As such, there may be performance, latency and ultimately
            billing implications if the amount of matching rows is large.

        Note:
            For details on the behaviour of "distinct" in conjunction with
            real-time changes in the table contents, see the
            Note of the `find` command.
        """

        # lazy-import here to avoid circular import issues
        from astrapy.cursors import TableFindCursor

        _general_method_timeout_ms, _gmt_label = _first_valid_timeout(
            (general_method_timeout_ms, "general_method_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (
                self.api_options.timeout_options.general_method_timeout_ms,
                "general_method_timeout_ms",
            ),
        )
        _request_timeout_ms, _rt_label = _first_valid_timeout(
            (request_timeout_ms, "request_timeout_ms"),
            (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
        )
        # preparing cursor:
        _extractor = _create_document_key_extractor(key)
        _key = _reduce_distinct_key_to_shallow_safe(key)
        if _key == "":
            raise ValueError(
                "The 'key' parameter for distinct cannot be empty "
                "or start with a list index."
            )
        # relaxing the type hint (limited to within this method body)
        f_cursor: TableFindCursor[dict[str, Any], dict[str, Any]] = (
            TableFindCursor(
                table=self,
                request_timeout_ms=_request_timeout_ms,
                overall_timeout_ms=_general_method_timeout_ms,
                request_timeout_label=_rt_label,
                overall_timeout_label=_gmt_label,
            )  # type: ignore[assignment]
            .filter(filter)
            .project({_key: True})
        )
        # consuming it:
        _item_hashes = set()
        distinct_items: list[Any] = []
        logger.info(f"running distinct() on '{self.name}'")
        for document in f_cursor:
            for item in _extractor(document):
                _item_hash = _hash_document(
                    item, options=self.api_options.serdes_options
                )
                if _item_hash not in _item_hashes:
                    _item_hashes.add(_item_hash)
                    distinct_items.append(item)
        logger.info(f"finished running distinct() on '{self.name}'")
        return distinct_items

    def count_documents(
        self,
        filter: FilterType,
        *,
        upper_bound: int,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> int:
        """
        Count the row in the table matching the specified filter.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"name": "John", "age": 59}
                    {"$and": [{"name": {"$eq": "John"}}, {"age": {"$gt": 58}}]}
                See the Data API documentation for the full set of operators.
            upper_bound: a required ceiling on the result of the count operation.
                If the actual number of rows exceeds this value,
                an exception will be raised.
                Furthermore, if the actual number of rows exceeds the maximum
                count that the Data API can reach (regardless of upper_bound),
                an exception will be raised.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            the exact count of matching rows.

        Examples:
            >>> my_table.insert_many([{"seq": i} for i in range(20)])
            TableInsertManyResult(...)
            >>> my_table.count_documents({}, upper_bound=100)
            20
            >>> my_table.count_documents({"seq":{"$gt": 15}}, upper_bound=100)
            4
            >>> my_table.count_documents({}, upper_bound=10)
            Traceback (most recent call last):
                ... ...
            astrapy.exceptions.TooManyRowsToCountException

        Note:
            Count operations are expensive: for this reason, the best practice
            is to provide a reasonable `upper_bound` according to the caller
            expectations. Moreover, indiscriminate usage of count operations
            for sizeable amounts of rows (i.e. in the thousands and more)
            is discouraged in favor of alternative application-specific solutions.
            Keep in mind that the Data API has a hard upper limit on the amount
            of rows it will count, and that an exception will be thrown
            by this method if this limit is encountered.
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        cd_payload = {"countDocuments": {"filter": filter}}
        logger.info(f"countDocuments on '{self.name}'")
        cd_response = self._api_commander.request(
            payload=cd_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished countDocuments on '{self.name}'")
        if "count" in cd_response.get("status", {}):
            count: int = cd_response["status"]["count"]
            if cd_response["status"].get("moreData", False):
                raise TooManyRowsToCountException(
                    text=f"Document count exceeds {count}, the maximum allowed by the server",
                    server_max_count_exceeded=True,
                )
            else:
                if count > upper_bound:
                    raise TooManyRowsToCountException(
                        text="Document count exceeds required upper bound",
                        server_max_count_exceeded=False,
                    )
                else:
                    return count
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from countDocuments API command.",
                raw_response=cd_response,
            )

    def estimated_document_count(
        self,
        *,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> int:
        """
        Query the API server for an estimate of the document count in the table.

        Contrary to `count_documents`, this method has no filtering parameters.

        Args:
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a server-provided estimate count of the documents in the table.

        Example:
            >>> my_table.estimated_document_count()
            5820
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        ed_payload: dict[str, Any] = {"estimatedDocumentCount": {}}
        logger.info(f"estimatedDocumentCount on '{self.name}'")
        ed_response = self._api_commander.request(
            payload=ed_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished estimatedDocumentCount on '{self.name}'")
        if "count" in ed_response.get("status", {}):
            count: int = ed_response["status"]["count"]
            return count
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from estimatedDocumentCount API command.",
                raw_response=ed_response,
            )

    def update_one(
        self,
        filter: FilterType,
        update: dict[str, Any],
        *,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Update a single document on the table, changing some or all of the columns,
        with the implicit behaviour of inserting a new row if no match is found.

        Args:
            filter: a predicate expressing the table primary key in full,
                i.e. a dictionary defining values for all columns that form the
                primary key. An example may be `{"match_id": "fight4", "round": 1}`.
            update: the update prescription to apply to the row, expressed
                as a dictionary conforming to the Data API syntax. The update
                operators for tables are `$set` and `$unset` (in particular,
                setting a column to None has the same effect as the $unset operator).
                Examples are `{"$set": {"round": 12}}` and
                `{"$unset": {"winner": "", "score": ""}}`.
                Note that the update operation cannot alter the primary key columns.
                See the Data API documentation for more details.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Examples:
            >>> from astrapy.data_types import DataAPISet
            >>>
            >>> # Set a new value for a column
            >>> my_table.update_one(
            ...     {"match_id": "fight4", "round": 1},
            ...     update={"$set": {"winner": "Winona"}},
            ... )
            >>>
            >>> # Set a new value for a column while unsetting another colum
            >>> my_table.update_one(
            ...     {"match_id": "fight4", "round": 1},
            ...     update={"$set": {"winner": None, "score": 24}},
            ... )
            >>>
            >>> # Set a 'set' column to empty
            >>> my_table.update_one(
            ...     {"match_id": "fight4", "round": 1},
            ...     update={"$set": {"fighters": DataAPISet()}},
            ... )
            >>>
            >>> # Set a 'set' column to empty using None
            >>> my_table.update_one(
            ...     {"match_id": "fight4", "round": 1},
            ...     update={"$set": {"fighters": None}},
            ... )
            >>>
            >>> # Set a 'set' column to empty using a regular (empty) set
            >>> my_table.update_one(
            ...     {"match_id": "fight4", "round": 1},
            ...     update={"$set": {"fighters": set()}},
            ... )
            >>>
            >>> # Set a 'set' column to empty using $unset
            >>> my_table.update_one(
            ...     {"match_id": "fight4", "round": 1},
            ...     update={"$unset": {"fighters": None}},
            ... )
            >>>
            >>> # A non-existing primary key creates a new row
            >>> my_table.update_one(
            ...     {"match_id": "bar_fight", "round": 4},
            ...     update={"$set": {"score": 8, "winner": "Jack"}},
            ... )
            >>>
            >>> # Delete column values for a row (they'll read as None now)
            >>> my_table.update_one(
            ...     {"match_id": "challenge6", "round": 2},
            ...     update={"$unset": {"winner": None, "score": None}},
            ... )

        Note:
            a row created entirely with update operations (as opposed to insertions)
            may, correspondingly, be deleted by means of an $unset update on all columns.
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        uo_payload = {
            "updateOne": {
                k: v
                for k, v in {
                    "filter": filter,
                    "update": self._converter_agent.preprocess_payload(update),
                }.items()
                if v is not None
            }
        }
        logger.info(f"updateOne on '{self.name}'")
        uo_response = self._api_commander.request(
            payload=uo_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished updateOne on '{self.name}'")
        if "status" in uo_response:
            # the contents are disregarded and the method just returns:
            return
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from updateOne API command.",
                raw_response=uo_response,
            )

    def delete_one(
        self,
        filter: FilterType,
        *,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Delete a row, matching the provided value of the primary key.
        If no row is found with that primary key, the method does nothing.

        Args:
            filter: a predicate expressing the table primary key in full,
                i.e. a dictionary defining values for all columns that form the
                primary key. A row (at most one) is deleted if it matches that primary
                key. An example filter may be `{"match_id": "fight4", "round": 1}`.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Examples:
            >>> # Count the rows matching a certain filter
            >>> len(my_table.find({"match_id": "fight7"}).to_list())
            3
            >>>
            >>> # Delete a row belonging to the group
            >>> my_table.delete_one({"match_id": "fight7", "round": 2})
            >>>
            >>> # Count again
            >>> len(my_table.find({"match_id": "fight7"}).to_list())
            2
            >>>
            >>> # Attempt the delete again (nothing to delete)
            >>> my_table.delete_one({"match_id": "fight7", "round": 2})
            >>>
            >>> # The count is unchanged
            >>> len(my_table.find({"match_id": "fight7"}).to_list())
            2
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        do_payload = self._converter_agent.preprocess_payload(
            {
                "deleteOne": {
                    k: v
                    for k, v in {
                        "filter": filter,
                    }.items()
                    if v is not None
                }
            }
        )
        logger.info(f"deleteOne on '{self.name}'")
        do_response = self._api_commander.request(
            payload=do_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished deleteOne on '{self.name}'")
        if do_response.get("status", {}).get("deletedCount") == -1:
            return
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from deleteOne API command.",
                raw_response=do_response,
            )

    def delete_many(
        self,
        filter: FilterType,
        *,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Delete all rows matching a provided filter condition.
        This operation can target from a single row to the entirety of the table.

        Args:
            filter: a filter dictionary to specify which row(s) must be deleted.
                1. If the filter is in the form `{"pk1": val1, "pk2": val2 ...}`
                and specified the primary key in full, at most one row is deleted,
                the one with that primary key.
                2. If the table has "partitionSort" columns, some or all of them
                may be left out (the least significant of them can also employ
                an inequality, or range, predicate): a range of rows, but always
                within a single partition, will be deleted.
                3. If an empty filter, `{}`, is passed, this operation empties
                the table completely. *USE WITH CARE*.
                4. Other kinds of filtering clauses are forbidden.
                In the following examples, the table is partitioned
                by columns ["pa1", "pa2"] and has partitionSort "ps1" and "ps2" in that
                order.
                Valid filter examples:
                - `{"pa1": x, "pa2": y, "ps1": z, "ps2": t}`: deletes one row
                - `{"pa1": x, "pa2": y, "ps1": z}`: deletes multiple rows
                - `{"pa1": x, "pa2": y, "ps1": z, "ps2": {"$lt": q}}`: del. multiple rows
                - `{"pa1": x, "pa2": y}`: deletes all rows in the partition
                - `{}`: empties the table (*CAUTION*)
                Invalid filter examples:
                - `{"pa1": x}`: incomplete partition key
                - `{"pa1": x, "ps1" z}`: incomplete partition key (whatever is added)
                - `{"pa1": x, "pa2": y, "ps1": {"$lt": r}, "ps2": t}`: inequality on
                  a non-least-significant partitionSort column provided.
                - `{"pa1": x, "pa2": y, "ps2": t}`: cannot skip "ps1"
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Examples:
            >>> # Delete a single row (full primary key specified):
            >>> my_table.delete_many({"match_id": "fight4", "round": 1})
            >>>
            >>> # Delete part of a partition (inequality on the
            >>> # last-mentioned 'partitionSort' column):
            >>> my_table.delete_many({"match_id": "fight5", "round": {"$gte": 5}})
            >>>
            >>> # Delete a whole partition (leave 'partitionSort' unspecified):
            >>> my_table.delete_many({"match_id": "fight7"})
            >>>
            >>> # empty the table entirely with empty filter (*CAUTION*):
            >>> my_table.delete_many({})
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        dm_payload = self._converter_agent.preprocess_payload(
            {
                "deleteMany": {
                    k: v
                    for k, v in {
                        "filter": filter,
                    }.items()
                    if v is not None
                }
            }
        )
        logger.info(f"deleteMany on '{self.name}'")
        dm_response = self._api_commander.request(
            payload=dm_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished deleteMany on '{self.name}'")
        if dm_response.get("status", {}).get("deletedCount") == -1:
            return
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from deleteMany API command.",
                raw_response=dm_response,
            )

    def drop(
        self,
        *,
        if_exists: bool | None = None,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Drop the table, i.e. delete it from the database along with
        all the rows stored therein.

        Args:
            if_exists: if passed as True, trying to drop a non-existing table
                will not error, just silently do nothing instead. If not provided,
                the API default behaviour will hold.
            table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `table_admin_timeout_ms`.
            timeout_ms: an alias for `table_admin_timeout_ms`.

        Example:
            >>> # List tables:
            >>> my_table.database.list_table_names()
            ['games']
            >>>
            >>> # Drop this table:
            >>> my_table.drop()
            >>>
            >>> # List tables again:
            >>> my_table.database.list_table_names()
            []
            >>>
            >>> # Try working on the table now:
            >>> from astrapy.exceptions import DataAPIResponseException
            >>> try:
            ...     my_table.find_one({})
            ... except DataAPIResponseException as err:
            ...     print(str(err))
            ...
            Collection does not exist [...] (COLLECTION_NOT_EXIST)

        Note:
            Use with caution.

        Note:
            Once the method succeeds, methods on this object can still be invoked:
            however, this hardly makes sense as the underlying actual table
            is no more.
            It is responsibility of the developer to design a correct flow
            which avoids using a deceased collection any further.
        """

        logger.info(f"dropping table '{self.name}' (self)")
        self.database.drop_table(
            self.name,
            if_exists=if_exists,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info(f"finished dropping table '{self.name}' (self)")

    def command(
        self,
        body: dict[str, Any] | None,
        *,
        raise_api_errors: bool = True,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> dict[str, Any]:
        """
        Send a POST request to the Data API for this table with
        an arbitrary, caller-provided payload.
        No transformations or type conversions are made on the provided payload.

        Args:
            body: a JSON-serializable dictionary, the payload of the request.
            raise_api_errors: if True, responses with a nonempty 'errors' field
                result in an astrapy exception being raised.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a dictionary with the response of the HTTP request.

        Example:
            >>> my_table.command({
            ...     "findOne": {
            ...         "filter": {"match_id": "fight4"},
            ...         "projection": {"winner": True},
            ...     }
            ... })
            {'data': {'document': {'winner': 'Victor'}}, 'status': ...  # shortened
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        _cmd_desc: str
        if body:
            _cmd_desc = ",".join(sorted(body.keys()))
        else:
            _cmd_desc = "(none)"
        logger.info(f"command={_cmd_desc} on '{self.name}'")
        command_result = self._api_commander.request(
            payload=body,
            raise_api_errors=raise_api_errors,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished command={_cmd_desc} on '{self.name}'")
        return command_result

Ancestors

  • typing.Generic

Instance variables

var databaseDatabase

a Database object, the database this table belongs to.

Example

>>> my_table.database.name
'the_db'
Expand source code
@property
def database(self) -> Database:
    """
    a Database object, the database this table belongs to.

    Example:
        >>> my_table.database.name
        'the_db'
    """

    return self._database
var full_name : str

The fully-qualified table name within the database, in the form "keyspace.table_name".

Example

>>> my_table.full_name
'default_keyspace.my_table'
Expand source code
@property
def full_name(self) -> str:
    """
    The fully-qualified table name within the database,
    in the form "keyspace.table_name".

    Example:
        >>> my_table.full_name
        'default_keyspace.my_table'
    """

    return f"{self.keyspace}.{self.name}"
var keyspace : str

The keyspace this table is in.

Example

>>> my_table.keyspace
'default_keyspace'
Expand source code
@property
def keyspace(self) -> str:
    """
    The keyspace this table is in.

    Example:
        >>> my_table.keyspace
        'default_keyspace'
    """

    _keyspace = self.database.keyspace
    if _keyspace is None:
        raise ValueError("The table's DB is set with keyspace=None")
    return _keyspace
var name : str

The name of this table.

Example

>>> my_table.name
'games'
Expand source code
@property
def name(self) -> str:
    """
    The name of this table.

    Example:
        >>> my_table.name
        'games'
    """

    return self._name

Methods

def alter(self, operation: AlterTableOperation | dict[str, Any], *, row_type: type[Any] = dict[str, typing.Any], table_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> Table[~NEW_ROW]

Executes one of the available alter-table operations on this table, such as adding/dropping columns.

This is a blocking operation: the method returns once the index is created and ready to use.

Args

operation
an instance of one of the astrapy.info.AlterTable* classes, representing which alter operation to perform and the details thereof. A regular dictionary can also be provided, but then it must have the alter operation name at its top level: {"add": {"columns": …}}.
row_type
this parameter acts a formal specifier for the type checker. If omitted, the resulting Table is implicitly a Table[dict[str, Any]]. If provided, it must match the type hint specified in the assignment. See the examples below.
table_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for table_admin_timeout_ms.
timeout_ms
an alias for table_admin_timeout_ms.

Examples

>>> from astrapy.info import (
...     AlterTableAddColumns,
...     AlterTableAddVectorize,
...     AlterTableDropColumns,
...     AlterTableDropVectorize,
...     ColumnType,
...     TableScalarColumnTypeDescriptor,
...     VectorServiceOptions,
... )
>>>
>>> # Add a column
>>> new_table_1 = my_table.alter(
...     AlterTableAddColumns(
...         columns={
...             "tie_break": TableScalarColumnTypeDescriptor(
...                 column_type=ColumnType.BOOLEAN,
...             ),
...         }
...     )
... )
>>>
>>> # Drop a column
>>> new_table_2 = new_table_1.alter(AlterTableDropColumns(
...     columns=["tie_break"]
... ))
>>>
>>> # Add vectorize to a (vector) column
>>> new_table_3 = new_table_2.alter(
...     AlterTableAddVectorize(
...         columns={
...             "m_vector": VectorServiceOptions(
...                 provider="openai",
...                 model_name="text-embedding-3-small",
...                 authentication={
...                     "providerKey": "ASTRA_KMS_API_KEY_NAME",
...                 },
...             ),
...         }
...     )
... )
>>>
>>> # Drop vectorize from a (vector) column
>>> # (Also demonstrates type hint usage)
>>> from typing import TypedDict
>>> from astrapy import Table
>>> from astrapy.data_types import (
...     DataAPISet,
...     DataAPITimestamp,
...     DataAPIVector,
... )
>>> from astrapy.ids import UUID
>>>
>>> class MyMatch(TypedDict):
...     match_id: str
...     round: int
...     m_vector: DataAPIVector
...     score: int
...     when: DataAPITimestamp
...     winner: str
...     fighters: DataAPISet[UUID]
...
>>> new_table_4: Table[MyMatch] = new_table_3.alter(
...     AlterTableDropVectorize(columns=["m_vector"]),
...     row_type=MyMatch,
... )
Expand source code
def alter(
    self,
    operation: AlterTableOperation | dict[str, Any],
    *,
    row_type: type[Any] = DefaultRowType,
    table_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> Table[NEW_ROW]:
    """
    Executes one of the available alter-table operations on this table,
    such as adding/dropping columns.

    This is a blocking operation: the method returns once the index
    is created and ready to use.

    Args:
        operation: an instance of one of the `astrapy.info.AlterTable*` classes,
            representing which alter operation to perform and the details thereof.
            A regular dictionary can also be provided, but then it must have the
            alter operation name at its top level: {"add": {"columns": ...}}.
        row_type: this parameter acts a formal specifier for the type checker.
            If omitted, the resulting Table is implicitly a `Table[dict[str, Any]]`.
            If provided, it must match the type hint specified in the assignment.
            See the examples below.
        table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `table_admin_timeout_ms`.
        timeout_ms: an alias for `table_admin_timeout_ms`.

    Examples:
        >>> from astrapy.info import (
        ...     AlterTableAddColumns,
        ...     AlterTableAddVectorize,
        ...     AlterTableDropColumns,
        ...     AlterTableDropVectorize,
        ...     ColumnType,
        ...     TableScalarColumnTypeDescriptor,
        ...     VectorServiceOptions,
        ... )
        >>>
        >>> # Add a column
        >>> new_table_1 = my_table.alter(
        ...     AlterTableAddColumns(
        ...         columns={
        ...             "tie_break": TableScalarColumnTypeDescriptor(
        ...                 column_type=ColumnType.BOOLEAN,
        ...             ),
        ...         }
        ...     )
        ... )
        >>>
        >>> # Drop a column
        >>> new_table_2 = new_table_1.alter(AlterTableDropColumns(
        ...     columns=["tie_break"]
        ... ))
        >>>
        >>> # Add vectorize to a (vector) column
        >>> new_table_3 = new_table_2.alter(
        ...     AlterTableAddVectorize(
        ...         columns={
        ...             "m_vector": VectorServiceOptions(
        ...                 provider="openai",
        ...                 model_name="text-embedding-3-small",
        ...                 authentication={
        ...                     "providerKey": "ASTRA_KMS_API_KEY_NAME",
        ...                 },
        ...             ),
        ...         }
        ...     )
        ... )
        >>>
        >>> # Drop vectorize from a (vector) column
        >>> # (Also demonstrates type hint usage)
        >>> from typing import TypedDict
        >>> from astrapy import Table
        >>> from astrapy.data_types import (
        ...     DataAPISet,
        ...     DataAPITimestamp,
        ...     DataAPIVector,
        ... )
        >>> from astrapy.ids import UUID
        >>>
        >>> class MyMatch(TypedDict):
        ...     match_id: str
        ...     round: int
        ...     m_vector: DataAPIVector
        ...     score: int
        ...     when: DataAPITimestamp
        ...     winner: str
        ...     fighters: DataAPISet[UUID]
        ...
        >>> new_table_4: Table[MyMatch] = new_table_3.alter(
        ...     AlterTableDropVectorize(columns=["m_vector"]),
        ...     row_type=MyMatch,
        ... )
    """

    n_operation: AlterTableOperation
    if isinstance(operation, AlterTableOperation):
        n_operation = operation
    else:
        n_operation = AlterTableOperation.from_full_dict(operation)
    _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
        timeout_options=self.api_options.timeout_options,
        table_admin_timeout_ms=table_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    at_operation_name = n_operation._name
    at_payload = {
        "alterTable": {
            "operation": {
                at_operation_name: n_operation.as_dict(),
            },
        },
    }
    logger.info(f"alterTable({at_operation_name})")
    at_response = self._api_commander.request(
        payload=at_payload,
        timeout_context=_TimeoutContext(
            request_ms=_table_admin_timeout_ms, label=_ta_label
        ),
    )
    if at_response.get("status") != {"ok": 1}:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from alterTable API command.",
            raw_response=at_response,
        )
    logger.info(f"finished alterTable({at_operation_name})")
    return Table(
        database=self.database,
        name=self.name,
        keyspace=self.keyspace,
        api_options=self.api_options,
    )
def command(self, body: dict[str, Any] | None, *, raise_api_errors: bool = True, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> dict[str, typing.Any]

Send a POST request to the Data API for this table with an arbitrary, caller-provided payload. No transformations or type conversions are made on the provided payload.

Args

body
a JSON-serializable dictionary, the payload of the request.
raise_api_errors
if True, responses with a nonempty 'errors' field result in an astrapy exception being raised.
general_method_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for general_method_timeout_ms.
timeout_ms
an alias for general_method_timeout_ms.

Returns

a dictionary with the response of the HTTP request.

Example

>>> my_table.command({
...     "findOne": {
...         "filter": {"match_id": "fight4"},
...         "projection": {"winner": True},
...     }
... })
{'data': {'document': {'winner': 'Victor'}}, 'status': ...  # shortened
Expand source code
def command(
    self,
    body: dict[str, Any] | None,
    *,
    raise_api_errors: bool = True,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> dict[str, Any]:
    """
    Send a POST request to the Data API for this table with
    an arbitrary, caller-provided payload.
    No transformations or type conversions are made on the provided payload.

    Args:
        body: a JSON-serializable dictionary, the payload of the request.
        raise_api_errors: if True, responses with a nonempty 'errors' field
            result in an astrapy exception being raised.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a dictionary with the response of the HTTP request.

    Example:
        >>> my_table.command({
        ...     "findOne": {
        ...         "filter": {"match_id": "fight4"},
        ...         "projection": {"winner": True},
        ...     }
        ... })
        {'data': {'document': {'winner': 'Victor'}}, 'status': ...  # shortened
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    _cmd_desc: str
    if body:
        _cmd_desc = ",".join(sorted(body.keys()))
    else:
        _cmd_desc = "(none)"
    logger.info(f"command={_cmd_desc} on '{self.name}'")
    command_result = self._api_commander.request(
        payload=body,
        raise_api_errors=raise_api_errors,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished command={_cmd_desc} on '{self.name}'")
    return command_result
def count_documents(self, filter: FilterType, *, upper_bound: int, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> int

Count the row in the table matching the specified filter.

Args

filter
a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"name": "John", "age": 59} {"$and": [{"name": {"$eq": "John"}}, {"age": {"$gt": 58}}]} See the Data API documentation for the full set of operators.
upper_bound
a required ceiling on the result of the count operation. If the actual number of rows exceeds this value, an exception will be raised. Furthermore, if the actual number of rows exceeds the maximum count that the Data API can reach (regardless of upper_bound), an exception will be raised.
general_method_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for general_method_timeout_ms.
timeout_ms
an alias for general_method_timeout_ms.

Returns

the exact count of matching rows.

Examples

>>> my_table.insert_many([{"seq": i} for i in range(20)])
TableInsertManyResult(...)
>>> my_table.count_documents({}, upper_bound=100)
20
>>> my_table.count_documents({"seq":{"$gt": 15}}, upper_bound=100)
4
>>> my_table.count_documents({}, upper_bound=10)
Traceback (most recent call last):
    ... ...
astrapy.exceptions.TooManyRowsToCountException

Note

Count operations are expensive: for this reason, the best practice is to provide a reasonable upper_bound according to the caller expectations. Moreover, indiscriminate usage of count operations for sizeable amounts of rows (i.e. in the thousands and more) is discouraged in favor of alternative application-specific solutions. Keep in mind that the Data API has a hard upper limit on the amount of rows it will count, and that an exception will be thrown by this method if this limit is encountered.

Expand source code
def count_documents(
    self,
    filter: FilterType,
    *,
    upper_bound: int,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> int:
    """
    Count the row in the table matching the specified filter.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"name": "John", "age": 59}
                {"$and": [{"name": {"$eq": "John"}}, {"age": {"$gt": 58}}]}
            See the Data API documentation for the full set of operators.
        upper_bound: a required ceiling on the result of the count operation.
            If the actual number of rows exceeds this value,
            an exception will be raised.
            Furthermore, if the actual number of rows exceeds the maximum
            count that the Data API can reach (regardless of upper_bound),
            an exception will be raised.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        the exact count of matching rows.

    Examples:
        >>> my_table.insert_many([{"seq": i} for i in range(20)])
        TableInsertManyResult(...)
        >>> my_table.count_documents({}, upper_bound=100)
        20
        >>> my_table.count_documents({"seq":{"$gt": 15}}, upper_bound=100)
        4
        >>> my_table.count_documents({}, upper_bound=10)
        Traceback (most recent call last):
            ... ...
        astrapy.exceptions.TooManyRowsToCountException

    Note:
        Count operations are expensive: for this reason, the best practice
        is to provide a reasonable `upper_bound` according to the caller
        expectations. Moreover, indiscriminate usage of count operations
        for sizeable amounts of rows (i.e. in the thousands and more)
        is discouraged in favor of alternative application-specific solutions.
        Keep in mind that the Data API has a hard upper limit on the amount
        of rows it will count, and that an exception will be thrown
        by this method if this limit is encountered.
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    cd_payload = {"countDocuments": {"filter": filter}}
    logger.info(f"countDocuments on '{self.name}'")
    cd_response = self._api_commander.request(
        payload=cd_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished countDocuments on '{self.name}'")
    if "count" in cd_response.get("status", {}):
        count: int = cd_response["status"]["count"]
        if cd_response["status"].get("moreData", False):
            raise TooManyRowsToCountException(
                text=f"Document count exceeds {count}, the maximum allowed by the server",
                server_max_count_exceeded=True,
            )
        else:
            if count > upper_bound:
                raise TooManyRowsToCountException(
                    text="Document count exceeds required upper bound",
                    server_max_count_exceeded=False,
                )
            else:
                return count
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from countDocuments API command.",
            raw_response=cd_response,
        )
def create_index(self, name: str, *, column: str, options: TableIndexOptions | dict[str, Any] | None = None, if_not_exists: bool | None = None, table_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> None

Create an index on a non-vector column of the table.

This is a blocking operation: the method returns once the index is created and ready to use.

For creation of a vector index, see method create_vector_index instead.

Args

name
the name of the index. Index names must be unique across the keyspace.
column
the table column on which the index is to be created.
options
if passed, it must be an instance of TableIndexOptions, or an equivalent dictionary, which specifies index settings such as – for a text column – case-sensitivity and so on. See the TableIndexOptions class for more details.
if_not_exists
if set to True, the command will succeed even if an index with the specified name already exists (in which case no actual index creation takes place on the database). The API default of False means that an error is raised by the API in case of name collision.
table_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for table_admin_timeout_ms.
timeout_ms
an alias for table_admin_timeout_ms.

Examples

>>> from astrapy.info import TableIndexOptions
>>>
>>> # create an index on a column
>>> my_table.create_index(
...     "score_index",
...     column="score",
... )
>>>
>>> # create an index on a textual column, specifying indexing options
>>> my_table.create_index(
...     "winner_index",
...     column="winner",
...     options=TableIndexOptions(
...         ascii=False,
...         normalize=True,
...         case_sensitive=False,
...     ),
... )
Expand source code
def create_index(
    self,
    name: str,
    *,
    column: str,
    options: TableIndexOptions | dict[str, Any] | None = None,
    if_not_exists: bool | None = None,
    table_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Create an index on a non-vector column of the table.

    This is a blocking operation: the method returns once the index
    is created and ready to use.

    For creation of a vector index, see method `create_vector_index` instead.

    Args:
        name: the name of the index. Index names must be unique across the keyspace.
        column: the table column on which the index is to be created.
        options: if passed, it must be an instance of `TableIndexOptions`,
            or an equivalent dictionary, which specifies index settings
            such as -- for a text column -- case-sensitivity and so on.
            See the `astrapy.info.TableIndexOptions` class for more details.
        if_not_exists: if set to True, the command will succeed even if an index
            with the specified name already exists (in which case no actual
            index creation takes place on the database). The API default of False
            means that an error is raised by the API in case of name collision.
        table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `table_admin_timeout_ms`.
        timeout_ms: an alias for `table_admin_timeout_ms`.

    Examples:
        >>> from astrapy.info import TableIndexOptions
        >>>
        >>> # create an index on a column
        >>> my_table.create_index(
        ...     "score_index",
        ...     column="score",
        ... )
        >>>
        >>> # create an index on a textual column, specifying indexing options
        >>> my_table.create_index(
        ...     "winner_index",
        ...     column="winner",
        ...     options=TableIndexOptions(
        ...         ascii=False,
        ...         normalize=True,
        ...         case_sensitive=False,
        ...     ),
        ... )
    """

    ci_definition: dict[str, Any] = TableIndexDefinition(
        column=column,
        options=TableIndexOptions.coerce(options or {}),
    ).as_dict()
    ci_command = "createIndex"
    return self._create_generic_index(
        i_name=name,
        ci_definition=ci_definition,
        ci_command=ci_command,
        if_not_exists=if_not_exists,
        table_admin_timeout_ms=table_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
def create_vector_index(self, name: str, *, column: str, options: TableVectorIndexOptions | dict[str, Any] | None = None, if_not_exists: bool | None = None, table_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> None

Create a vector index on a vector column of the table, enabling vector similarity search operations on it.

This is a blocking operation: the method returns once the index is created and ready to use.

For creation of a non-vector index, see method create_index instead.

Args

name
the name of the index. Index names must be unique across the keyspace.
column
the table column, of type "vector" on which to create the index.
options
an instance of TableVectorIndexOptions, or an equivalent dictionary, which specifies settings for the vector index, such as the metric to use or, if desired, a "source model" setting. If omitted, the Data API defaults will apply for the index. See the TableVectorIndexOptions class for more details.
if_not_exists
if set to True, the command will succeed even if an index with the specified name already exists (in which case no actual index creation takes place on the database). The API default of False means that an error is raised by the API in case of name collision.
table_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for table_admin_timeout_ms.
timeout_ms
an alias for table_admin_timeout_ms.

Example

>>> from astrapy.constants import VectorMetric
>>> from astrapy.info import TableVectorIndexOptions
>>>
>>> # create a vector index with dot-product similarity
>>> my_table.create_vector_index(
...     "m_vector_index",
...     column="m_vector",
...     options=TableVectorIndexOptions(
...         metric=VectorMetric.DOT_PRODUCT,
...     ),
... )
>>> # specify a source_model (since the previous statement
>>> # succeeded, this will do nothing because of <code>if\_not\_exists</code>):
>>> my_table.create_vector_index(
...     "m_vector_index",
...     column="m_vector",
...     options=TableVectorIndexOptions(
...         metric=VectorMetric.DOT_PRODUCT,
...         source_model="nv-qa-4",
...     ),
...     if_not_exists=True,
... )
>>> # leave the settings to the Data API defaults of cosine
>>> # similarity metric (since the previous statement
>>> # succeeded, this will do nothing because of <code>if\_not\_exists</code>):
>>> my_table.create_vector_index(
...     "m_vector_index",
...     column="m_vector",
...     if_not_exists=True,
... )
Expand source code
def create_vector_index(
    self,
    name: str,
    *,
    column: str,
    options: TableVectorIndexOptions | dict[str, Any] | None = None,
    if_not_exists: bool | None = None,
    table_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Create a vector index on a vector column of the table, enabling vector
    similarity search operations on it.

    This is a blocking operation: the method returns once the index
    is created and ready to use.

    For creation of a non-vector index, see method `create_index` instead.

    Args:
        name: the name of the index. Index names must be unique across the keyspace.
        column: the table column, of type "vector" on which to create the index.
        options: an instance of `TableVectorIndexOptions`, or an equivalent
            dictionary, which specifies settings for the vector index,
            such as the metric to use or, if desired, a "source model" setting.
            If omitted, the Data API defaults will apply for the index.
            See the `astrapy.info.TableVectorIndexOptions` class for more details.
        if_not_exists: if set to True, the command will succeed even if an index
            with the specified name already exists (in which case no actual
            index creation takes place on the database). The API default of False
            means that an error is raised by the API in case of name collision.
        table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `table_admin_timeout_ms`.
        timeout_ms: an alias for `table_admin_timeout_ms`.

    Example:
        >>> from astrapy.constants import VectorMetric
        >>> from astrapy.info import TableVectorIndexOptions
        >>>
        >>> # create a vector index with dot-product similarity
        >>> my_table.create_vector_index(
        ...     "m_vector_index",
        ...     column="m_vector",
        ...     options=TableVectorIndexOptions(
        ...         metric=VectorMetric.DOT_PRODUCT,
        ...     ),
        ... )
        >>> # specify a source_model (since the previous statement
        >>> # succeeded, this will do nothing because of `if_not_exists`):
        >>> my_table.create_vector_index(
        ...     "m_vector_index",
        ...     column="m_vector",
        ...     options=TableVectorIndexOptions(
        ...         metric=VectorMetric.DOT_PRODUCT,
        ...         source_model="nv-qa-4",
        ...     ),
        ...     if_not_exists=True,
        ... )
        >>> # leave the settings to the Data API defaults of cosine
        >>> # similarity metric (since the previous statement
        >>> # succeeded, this will do nothing because of `if_not_exists`):
        >>> my_table.create_vector_index(
        ...     "m_vector_index",
        ...     column="m_vector",
        ...     if_not_exists=True,
        ... )
    """

    ci_definition: dict[str, Any] = TableVectorIndexDefinition(
        column=column,
        options=TableVectorIndexOptions.coerce(options),
    ).as_dict()
    ci_command = "createVectorIndex"
    return self._create_generic_index(
        i_name=name,
        ci_definition=ci_definition,
        ci_command=ci_command,
        if_not_exists=if_not_exists,
        table_admin_timeout_ms=table_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
def definition(self, *, table_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> ListTableDefinition

Query the Data API and return a structure defining the table schema. If there are no unsupported colums in the table, the return value has the same contents as could have been provided to a create_table method call.

Args

table_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for table_admin_timeout_ms.
timeout_ms
an alias for table_admin_timeout_ms.

Returns

A ListTableDefinition object, available for inspection.

Example

>>> my_table.definition()
ListTableDefinition(columns=[match_id,round,fighters, ...  # shortened
Expand source code
def definition(
    self,
    *,
    table_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> ListTableDefinition:
    """
    Query the Data API and return a structure defining the table schema.
    If there are no unsupported colums in the table, the return value has
    the same contents as could have been provided to a `create_table` method call.

    Args:
        table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `table_admin_timeout_ms`.
        timeout_ms: an alias for `table_admin_timeout_ms`.

    Returns:
        A `ListTableDefinition` object, available for inspection.

    Example:
        >>> my_table.definition()
        ListTableDefinition(columns=[match_id,round,fighters, ...  # shortened
    """

    _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
        timeout_options=self.api_options.timeout_options,
        table_admin_timeout_ms=table_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info(f"getting tables in search of '{self.name}'")
    self_descriptors = [
        table_desc
        for table_desc in self.database._list_tables_ctx(
            keyspace=None,
            timeout_context=_TimeoutContext(
                request_ms=_table_admin_timeout_ms,
                label=_ta_label,
            ),
        )
        if table_desc.name == self.name
    ]
    logger.info(f"finished getting tables in search of '{self.name}'")
    if self_descriptors:
        return self_descriptors[0].definition
    else:
        raise ValueError(
            f"Table {self.keyspace}.{self.name} not found.",
        )
def delete_many(self, filter: FilterType, *, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> None

Delete all rows matching a provided filter condition. This operation can target from a single row to the entirety of the table.

Args

filter
a filter dictionary to specify which row(s) must be deleted. 1. If the filter is in the form {"pk1": val1, "pk2": val2 ...} and specified the primary key in full, at most one row is deleted, the one with that primary key. 2. If the table has "partitionSort" columns, some or all of them may be left out (the least significant of them can also employ an inequality, or range, predicate): a range of rows, but always within a single partition, will be deleted. 3. If an empty filter, {}, is passed, this operation empties the table completely. USE WITH CARE. 4. Other kinds of filtering clauses are forbidden. In the following examples, the table is partitioned by columns ["pa1", "pa2"] and has partitionSort "ps1" and "ps2" in that order. Valid filter examples: - {"pa1": x, "pa2": y, "ps1": z, "ps2": t}: deletes one row - {"pa1": x, "pa2": y, "ps1": z}: deletes multiple rows - {"pa1": x, "pa2": y, "ps1": z, "ps2": {"$lt": q}}: del. multiple rows - {"pa1": x, "pa2": y}: deletes all rows in the partition - {}: empties the table (CAUTION) Invalid filter examples: - {"pa1": x}: incomplete partition key - {"pa1": x, "ps1" z}: incomplete partition key (whatever is added) - {"pa1": x, "pa2": y, "ps1": {"$lt": r}, "ps2": t}: inequality on a non-least-significant partitionSort column provided. - {"pa1": x, "pa2": y, "ps2": t}: cannot skip "ps1"
general_method_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for general_method_timeout_ms.
timeout_ms
an alias for general_method_timeout_ms.

Examples

>>> # Delete a single row (full primary key specified):
>>> my_table.delete_many({"match_id": "fight4", "round": 1})
>>>
>>> # Delete part of a partition (inequality on the
>>> # last-mentioned 'partitionSort' column):
>>> my_table.delete_many({"match_id": "fight5", "round": {"$gte": 5}})
>>>
>>> # Delete a whole partition (leave 'partitionSort' unspecified):
>>> my_table.delete_many({"match_id": "fight7"})
>>>
>>> # empty the table entirely with empty filter (*CAUTION*):
>>> my_table.delete_many({})
Expand source code
def delete_many(
    self,
    filter: FilterType,
    *,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Delete all rows matching a provided filter condition.
    This operation can target from a single row to the entirety of the table.

    Args:
        filter: a filter dictionary to specify which row(s) must be deleted.
            1. If the filter is in the form `{"pk1": val1, "pk2": val2 ...}`
            and specified the primary key in full, at most one row is deleted,
            the one with that primary key.
            2. If the table has "partitionSort" columns, some or all of them
            may be left out (the least significant of them can also employ
            an inequality, or range, predicate): a range of rows, but always
            within a single partition, will be deleted.
            3. If an empty filter, `{}`, is passed, this operation empties
            the table completely. *USE WITH CARE*.
            4. Other kinds of filtering clauses are forbidden.
            In the following examples, the table is partitioned
            by columns ["pa1", "pa2"] and has partitionSort "ps1" and "ps2" in that
            order.
            Valid filter examples:
            - `{"pa1": x, "pa2": y, "ps1": z, "ps2": t}`: deletes one row
            - `{"pa1": x, "pa2": y, "ps1": z}`: deletes multiple rows
            - `{"pa1": x, "pa2": y, "ps1": z, "ps2": {"$lt": q}}`: del. multiple rows
            - `{"pa1": x, "pa2": y}`: deletes all rows in the partition
            - `{}`: empties the table (*CAUTION*)
            Invalid filter examples:
            - `{"pa1": x}`: incomplete partition key
            - `{"pa1": x, "ps1" z}`: incomplete partition key (whatever is added)
            - `{"pa1": x, "pa2": y, "ps1": {"$lt": r}, "ps2": t}`: inequality on
              a non-least-significant partitionSort column provided.
            - `{"pa1": x, "pa2": y, "ps2": t}`: cannot skip "ps1"
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Examples:
        >>> # Delete a single row (full primary key specified):
        >>> my_table.delete_many({"match_id": "fight4", "round": 1})
        >>>
        >>> # Delete part of a partition (inequality on the
        >>> # last-mentioned 'partitionSort' column):
        >>> my_table.delete_many({"match_id": "fight5", "round": {"$gte": 5}})
        >>>
        >>> # Delete a whole partition (leave 'partitionSort' unspecified):
        >>> my_table.delete_many({"match_id": "fight7"})
        >>>
        >>> # empty the table entirely with empty filter (*CAUTION*):
        >>> my_table.delete_many({})
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    dm_payload = self._converter_agent.preprocess_payload(
        {
            "deleteMany": {
                k: v
                for k, v in {
                    "filter": filter,
                }.items()
                if v is not None
            }
        }
    )
    logger.info(f"deleteMany on '{self.name}'")
    dm_response = self._api_commander.request(
        payload=dm_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished deleteMany on '{self.name}'")
    if dm_response.get("status", {}).get("deletedCount") == -1:
        return
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from deleteMany API command.",
            raw_response=dm_response,
        )
def delete_one(self, filter: FilterType, *, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> None

Delete a row, matching the provided value of the primary key. If no row is found with that primary key, the method does nothing.

Args

filter
a predicate expressing the table primary key in full, i.e. a dictionary defining values for all columns that form the primary key. A row (at most one) is deleted if it matches that primary key. An example filter may be {"match_id": "fight4", "round": 1}.
general_method_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for general_method_timeout_ms.
timeout_ms
an alias for general_method_timeout_ms.

Examples

>>> # Count the rows matching a certain filter
>>> len(my_table.find({"match_id": "fight7"}).to_list())
3
>>>
>>> # Delete a row belonging to the group
>>> my_table.delete_one({"match_id": "fight7", "round": 2})
>>>
>>> # Count again
>>> len(my_table.find({"match_id": "fight7"}).to_list())
2
>>>
>>> # Attempt the delete again (nothing to delete)
>>> my_table.delete_one({"match_id": "fight7", "round": 2})
>>>
>>> # The count is unchanged
>>> len(my_table.find({"match_id": "fight7"}).to_list())
2
Expand source code
def delete_one(
    self,
    filter: FilterType,
    *,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Delete a row, matching the provided value of the primary key.
    If no row is found with that primary key, the method does nothing.

    Args:
        filter: a predicate expressing the table primary key in full,
            i.e. a dictionary defining values for all columns that form the
            primary key. A row (at most one) is deleted if it matches that primary
            key. An example filter may be `{"match_id": "fight4", "round": 1}`.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Examples:
        >>> # Count the rows matching a certain filter
        >>> len(my_table.find({"match_id": "fight7"}).to_list())
        3
        >>>
        >>> # Delete a row belonging to the group
        >>> my_table.delete_one({"match_id": "fight7", "round": 2})
        >>>
        >>> # Count again
        >>> len(my_table.find({"match_id": "fight7"}).to_list())
        2
        >>>
        >>> # Attempt the delete again (nothing to delete)
        >>> my_table.delete_one({"match_id": "fight7", "round": 2})
        >>>
        >>> # The count is unchanged
        >>> len(my_table.find({"match_id": "fight7"}).to_list())
        2
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    do_payload = self._converter_agent.preprocess_payload(
        {
            "deleteOne": {
                k: v
                for k, v in {
                    "filter": filter,
                }.items()
                if v is not None
            }
        }
    )
    logger.info(f"deleteOne on '{self.name}'")
    do_response = self._api_commander.request(
        payload=do_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished deleteOne on '{self.name}'")
    if do_response.get("status", {}).get("deletedCount") == -1:
        return
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from deleteOne API command.",
            raw_response=do_response,
        )
def distinct(self, key: str, *, filter: FilterType | None = None, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> list[typing.Any]

Return a list of the unique values of key across the rows in the table that match the provided filter.

Args

key
the name of the field whose value is inspected across rows. Keys are typically just column names, although they can use the dot notation to select particular entries in map columns. For set and list columns, individual entries are "unrolled" automatically; in particular, for lists, numeric indices can be used in the key dot-notation syntax. Example of acceptable key values: "a_column" "map_column.map_key" "list_column.2"
filter
a dictionary expressing which condition the inspected rows must satisfy. The filter can use operators, such as "$eq" for equality, and require columns to compare with literal values. Simple examples are {} (zero filter), {"match_no": 123} (a shorthand for {"match_no": {"$eq": 123}}, or {"match_no": 123, "round": "C"} (multiple conditions are implicitly combined with "$and"). Please consult the Data API documentation for a more detailed explanation of table search filters and tips on their usage.
general_method_timeout_ms
a timeout, in milliseconds, for the whole requested operation (which may involve multiple API requests). This method, being based on find (see) may entail successive HTTP API requests, depending on the amount of involved rows. If not provided, this object's defaults apply.
request_timeout_ms
a timeout, in milliseconds, for each API request. If not provided, this object's defaults apply.
timeout_ms
an alias for general_method_timeout_ms.

Returns

a list of all different values for key found across the rows that match the filter. The result list has no repeated items.

Examples

>>> my_table.distinct("winner", filter={"match_id": "challenge6"})
['Donna', 'Erick', 'Fiona']
>>>
>>> # distinct values across the whole table:
>>> # (not recommended performance-wise)
>>> my_table.distinct("winner")
The Data API returned a warning: {'errorCode': 'ZERO_FILTER_OPERATIONS', ...
['Victor', 'Adam Zuul', 'Betta Vigo', 'Caio Gozer', 'Donna', 'Erick', ...
>>>
>>> # Over a column containing null values
>>> # (also with composite filter):
>>> my_table.distinct(
...     "score",
...     filter={"match_id": {"$in": ["fight4", "tournamentA"]}},
... )
[18, None]
>>>
>>> # distinct over a set column (automatically "unrolled"):
>>> my_table.distinct(
...     "fighters",
...     filter={"match_id": {"$in": ["fight4", "tournamentA"]}},
... )
[UUID('0193539a-2770-8c09-a32a-111111111111'), UUID('019353e3-00b4-...

Note

It must be kept in mind that distinct is a client-side operation, which effectively browses all required rows using the logic of the find method and collects the unique values found for key. As such, there may be performance, latency and ultimately billing implications if the amount of matching rows is large.

Note

For details on the behaviour of "distinct" in conjunction with real-time changes in the table contents, see the Note of the find command.

Expand source code
def distinct(
    self,
    key: str,
    *,
    filter: FilterType | None = None,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> list[Any]:
    """
    Return a list of the unique values of `key` across the rows
    in the table that match the provided filter.

    Args:
        key: the name of the field whose value is inspected across rows.
            Keys are typically just column names, although they can use
            the dot notation to select particular entries in map columns.
            For set and list columns, individual entries are "unrolled"
            automatically; in particular, for lists, numeric indices
            can be used in the key dot-notation syntax.
            Example of acceptable `key` values:
                "a_column"
                "map_column.map_key"
                "list_column.2"
        filter: a dictionary expressing which condition the inspected rows
            must satisfy. The filter can use operators, such as "$eq" for equality,
            and require columns to compare with literal values. Simple examples
            are `{}` (zero filter), `{"match_no": 123}` (a shorthand for
            `{"match_no": {"$eq": 123}}`, or `{"match_no": 123, "round": "C"}`
            (multiple conditions are implicitly combined with "$and").
            Please consult the Data API documentation for a more detailed
            explanation of table search filters and tips on their usage.
        general_method_timeout_ms: a timeout, in milliseconds, for the whole
            requested operation (which may involve multiple API requests).
            This method, being based on `find` (see) may entail successive HTTP API
            requests, depending on the amount of involved rows.
            If not provided, this object's defaults apply.
        request_timeout_ms: a timeout, in milliseconds, for each API request.
            If not provided, this object's defaults apply.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a list of all different values for `key` found across the rows
        that match the filter. The result list has no repeated items.

    Examples:
        >>> my_table.distinct("winner", filter={"match_id": "challenge6"})
        ['Donna', 'Erick', 'Fiona']
        >>>
        >>> # distinct values across the whole table:
        >>> # (not recommended performance-wise)
        >>> my_table.distinct("winner")
        The Data API returned a warning: {'errorCode': 'ZERO_FILTER_OPERATIONS', ...
        ['Victor', 'Adam Zuul', 'Betta Vigo', 'Caio Gozer', 'Donna', 'Erick', ...
        >>>
        >>> # Over a column containing null values
        >>> # (also with composite filter):
        >>> my_table.distinct(
        ...     "score",
        ...     filter={"match_id": {"$in": ["fight4", "tournamentA"]}},
        ... )
        [18, None]
        >>>
        >>> # distinct over a set column (automatically "unrolled"):
        >>> my_table.distinct(
        ...     "fighters",
        ...     filter={"match_id": {"$in": ["fight4", "tournamentA"]}},
        ... )
        [UUID('0193539a-2770-8c09-a32a-111111111111'), UUID('019353e3-00b4-...

    Note:
        It must be kept in mind that `distinct` is a client-side operation,
        which effectively browses all required rows using the logic
        of the `find` method and collects the unique values found for `key`.
        As such, there may be performance, latency and ultimately
        billing implications if the amount of matching rows is large.

    Note:
        For details on the behaviour of "distinct" in conjunction with
        real-time changes in the table contents, see the
        Note of the `find` command.
    """

    # lazy-import here to avoid circular import issues
    from astrapy.cursors import TableFindCursor

    _general_method_timeout_ms, _gmt_label = _first_valid_timeout(
        (general_method_timeout_ms, "general_method_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (
            self.api_options.timeout_options.general_method_timeout_ms,
            "general_method_timeout_ms",
        ),
    )
    _request_timeout_ms, _rt_label = _first_valid_timeout(
        (request_timeout_ms, "request_timeout_ms"),
        (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
    )
    # preparing cursor:
    _extractor = _create_document_key_extractor(key)
    _key = _reduce_distinct_key_to_shallow_safe(key)
    if _key == "":
        raise ValueError(
            "The 'key' parameter for distinct cannot be empty "
            "or start with a list index."
        )
    # relaxing the type hint (limited to within this method body)
    f_cursor: TableFindCursor[dict[str, Any], dict[str, Any]] = (
        TableFindCursor(
            table=self,
            request_timeout_ms=_request_timeout_ms,
            overall_timeout_ms=_general_method_timeout_ms,
            request_timeout_label=_rt_label,
            overall_timeout_label=_gmt_label,
        )  # type: ignore[assignment]
        .filter(filter)
        .project({_key: True})
    )
    # consuming it:
    _item_hashes = set()
    distinct_items: list[Any] = []
    logger.info(f"running distinct() on '{self.name}'")
    for document in f_cursor:
        for item in _extractor(document):
            _item_hash = _hash_document(
                item, options=self.api_options.serdes_options
            )
            if _item_hash not in _item_hashes:
                _item_hashes.add(_item_hash)
                distinct_items.append(item)
    logger.info(f"finished running distinct() on '{self.name}'")
    return distinct_items
def drop(self, *, if_exists: bool | None = None, table_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> None

Drop the table, i.e. delete it from the database along with all the rows stored therein.

Args

if_exists
if passed as True, trying to drop a non-existing table will not error, just silently do nothing instead. If not provided, the API default behaviour will hold.
table_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for table_admin_timeout_ms.
timeout_ms
an alias for table_admin_timeout_ms.

Example

>>> # List tables:
>>> my_table.database.list_table_names()
['games']
>>>
>>> # Drop this table:
>>> my_table.drop()
>>>
>>> # List tables again:
>>> my_table.database.list_table_names()
[]
>>>
>>> # Try working on the table now:
>>> from astrapy.exceptions import DataAPIResponseException
>>> try:
...     my_table.find_one({})
... except DataAPIResponseException as err:
...     print(str(err))
...
Collection does not exist [...] (COLLECTION_NOT_EXIST)

Note

Use with caution.

Note

Once the method succeeds, methods on this object can still be invoked: however, this hardly makes sense as the underlying actual table is no more. It is responsibility of the developer to design a correct flow which avoids using a deceased collection any further.

Expand source code
def drop(
    self,
    *,
    if_exists: bool | None = None,
    table_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Drop the table, i.e. delete it from the database along with
    all the rows stored therein.

    Args:
        if_exists: if passed as True, trying to drop a non-existing table
            will not error, just silently do nothing instead. If not provided,
            the API default behaviour will hold.
        table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `table_admin_timeout_ms`.
        timeout_ms: an alias for `table_admin_timeout_ms`.

    Example:
        >>> # List tables:
        >>> my_table.database.list_table_names()
        ['games']
        >>>
        >>> # Drop this table:
        >>> my_table.drop()
        >>>
        >>> # List tables again:
        >>> my_table.database.list_table_names()
        []
        >>>
        >>> # Try working on the table now:
        >>> from astrapy.exceptions import DataAPIResponseException
        >>> try:
        ...     my_table.find_one({})
        ... except DataAPIResponseException as err:
        ...     print(str(err))
        ...
        Collection does not exist [...] (COLLECTION_NOT_EXIST)

    Note:
        Use with caution.

    Note:
        Once the method succeeds, methods on this object can still be invoked:
        however, this hardly makes sense as the underlying actual table
        is no more.
        It is responsibility of the developer to design a correct flow
        which avoids using a deceased collection any further.
    """

    logger.info(f"dropping table '{self.name}' (self)")
    self.database.drop_table(
        self.name,
        if_exists=if_exists,
        table_admin_timeout_ms=table_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info(f"finished dropping table '{self.name}' (self)")
def estimated_document_count(self, *, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> int

Query the API server for an estimate of the document count in the table.

Contrary to count_documents, this method has no filtering parameters.

Args

general_method_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for general_method_timeout_ms.
timeout_ms
an alias for general_method_timeout_ms.

Returns

a server-provided estimate count of the documents in the table.

Example

>>> my_table.estimated_document_count()
5820
Expand source code
def estimated_document_count(
    self,
    *,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> int:
    """
    Query the API server for an estimate of the document count in the table.

    Contrary to `count_documents`, this method has no filtering parameters.

    Args:
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a server-provided estimate count of the documents in the table.

    Example:
        >>> my_table.estimated_document_count()
        5820
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    ed_payload: dict[str, Any] = {"estimatedDocumentCount": {}}
    logger.info(f"estimatedDocumentCount on '{self.name}'")
    ed_response = self._api_commander.request(
        payload=ed_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished estimatedDocumentCount on '{self.name}'")
    if "count" in ed_response.get("status", {}):
        count: int = ed_response["status"]["count"]
        return count
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from estimatedDocumentCount API command.",
            raw_response=ed_response,
        )
def find(self, filter: FilterType | None = None, *, projection: ProjectionType | None = None, row_type: type[ROW2] | None = None, skip: int | None = None, limit: int | None = None, include_similarity: bool | None = None, include_sort_vector: bool | None = None, sort: SortType | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> TableFindCursor[ROW, ROW2]

Find rows on the table matching the provided filters and according to sorting criteria including vector similarity.

The returned TableFindCursor object, representing the stream of results, can be iterated over, or consumed and manipulated in several other ways (see the examples below and the TableFindCursor documentation for details). Since the amount of returned items can be large, TableFindCursor is a lazy object, that fetches new data while it is being read using the Data API pagination mechanism.

Invoking .to_list() on a TableFindCursor will cause it to consume all rows and materialize the entire result set as a list. This is not recommended if the amount of results is very large.

Args

filter
a dictionary expressing which condition the returned rows must satisfy. The filter can use operators, such as "$eq" for equality, and require columns to compare with literal values. Simple examples are {} (zero filter, not recommended for large tables), {"match_no": 123} (a shorthand for {"match_no": {"$eq": 123}}, or {"match_no": 123, "round": "C"} (multiple conditions are implicitly combined with "$and"). Please consult the Data API documentation for a more detailed explanation of table search filters and tips on their usage.
projection
a prescription on which columns to return for the matching rows. The projection can take the form {"column1": True, "column2": True}. {"*": True} (i.e. return the whole row), or the complementary form that excludes columns: {"column1": False, "column2": False}. To optimize bandwidth usage, it is recommended to use a projection, especially to avoid unnecessary columns of type vector with high-dimensional embeddings.
row_type
this parameter acts a formal specifier for the type checker. If omitted, the resulting cursor is implicitly a TableFindCursor[ROW, ROW], i.e. maintains the same type for the items it returns as that for the rows in the table. Strictly typed code may want to specify this parameter especially when a projection is given.
skip
if provided, it is a number of rows that would be obtained first in the response and are instead skipped.
limit
a maximum amount of rows to get from the table. The returned cursor will stop yielding rows when either this number is reached or there really are no more matches in the table.
include_similarity
a boolean to request the numeric value of the similarity to be returned as an added "$similarity" key in each returned row. It can be used meaningfully only in a vector search (see sort).
include_sort_vector
a boolean to request the search query vector. If set to True (and if the search is a vector search), calling the get_sort_vector method on the returned cursor will yield the vector used for the ANN search.
sort
this dictionary parameter controls the order in which the rows are returned. The sort parameter can express either a vector search or a regular (ascending/descending, even hierarchical) sorting. * For a vector search the parameter takes the form {"vector_column": qv}, with the query vector qv of the appropriate type (list of floats or DataAPIVector). If the table has automatic embedding generation ("vectorize") enabled on that column, the form {"vectorize_enabled_column": "query text"} is also valid. * In the case of non-vector sorting, the parameter specifies the column(s) and the ascending/descending ordering required. If multiple columns are provided, the sorting applies them hierarchically to the rows. Examples are {"score": SortMode.ASCENDING} (equivalently {"score": +1}), {"score": +1, "when": -1}. Note that, depending on the column(s) chosen for sorting, the table partitioning structure, and the presence of indexes, the sorting may be done in-memory by the API. In that case, there may be performance implications and limitations on the amount of items returned. Consult the Data API documentation for more details on this topic.
request_timeout_ms
a timeout, in milliseconds, to impose on each individual HTTP request to the Data API to accomplish the operation. If not provided, this object's defaults apply.
timeout_ms
an alias for request_timeout_ms.

Returns

a TableFindCursor object, that can be iterated over (and manipulated in several ways), that if needed handles pagination under the hood as the rows are consumed.

Note

As the rows are retrieved in chunks progressively, while the cursor is being iterated over, it is possible that the actual results obtained will reflect changes occurring to the table contents in real time.

Examples

>>> # Iterate over results:
>>> for row in my_table.find({"match_id": "challenge6"}):
...     print(f"(R:{row['round']}): winner {row['winner']}")
...
(R:1): winner Donna
(R:2): winner Erick
(R:3): winner Fiona
>>> # Optimize bandwidth using a projection:
>>> proj = {"round": True, "winner": True}
>>> for row in my_table.find({"match_id": "challenge6"}, projection=proj):
...     print(f"(R:{row['round']}): winner {row['winner']}")
...
(R:1): winner Donna
(R:2): winner Erick
(R:3): winner Fiona
>>> # Filter on the partitioning:
>>> my_table.find({"match_id": "challenge6"}).to_list()
[{'match_id': 'challenge6', 'round': 1, 'fighters': DataAPISet([]), ...
>>>
>>> # Filter on primary key:
>>> my_table.find({"match_id": "challenge6", "round": 1}).to_list()
[{'match_id': 'challenge6', 'round': 1, 'fighters': DataAPISet([]), ...
>>>
>>> # Filter on a regular indexed column:
>>> my_table.find({"winner": "Caio Gozer"}).to_list()
[{'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
>>>
>>> # Non-equality filter on a regular indexed column:
>>> my_table.find({"score": {"$gte": 15}}).to_list()
[{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
>>>
>>> # Filter on a regular non-indexed column:
>>> # (not recommended performance-wise)
>>> my_table.find(
...     {"when": {
...         "$gte": DataAPITimestamp.from_string("1999-12-31T01:23:44Z")
...     }}
... ).to_list()
The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
[{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
>>>
>>> # Empty filter (not recommended performance-wise):
>>> my_table.find({}).to_list()
The Data API returned a warning: {'errorCode': 'ZERO_FILTER_OPERATIONS', ...
[{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
>>>
>>> # Filter on the primary key and a regular non-indexed column:
>>> # (not recommended performance-wise)
>>> my_table.find(
...     {"match_id": "fight5", "round": 3, "winner": "Caio Gozer"}
... ).to_list()
The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
[{'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
>>>
>>> # Filter on a regular non-indexed column (and incomplete primary key)
>>> # (not recommended performance-wise)
>>> my_table.find({"round": 3, "winner": "Caio Gozer"}).to_list()
The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
[{'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
>>>
>>> # Vector search with "sort" (on an appropriately-indexed vector column):
>>> my_table.find(
...     {},
...     sort={"m_vector": DataAPIVector([0.2, 0.3, 0.4])},
...     projection={"winner": True},
...     limit=3,
... ).to_list()
[{'winner': 'Donna'}, {'winner': 'Victor'}]
>>>
>>> # Hybrid search with vector sort and non-vector filtering:
>>> my_table.find(
...     {"match_id": "fight4"},
...     sort={"m_vector": DataAPIVector([0.2, 0.3, 0.4])},
...     projection={"winner": True},
...     limit=3,
... ).to_list()
[{'winner': 'Victor'}]
>>>
>>> # Return the numeric value of the vector similarity
>>> # (also demonstrating that one can pass a plain list for a vector):
>>> my_table.find(
...     {},
...     sort={"m_vector": [0.2, 0.3, 0.4]},
...     projection={"winner": True},
...     limit=3,
...     include_similarity=True,
... ).to_list()
[{'winner': 'Donna', '$similarity': 0.515}, {'winner': 'Victor', ...
>>>
>>> # Non-vector sorting on a 'partitionSort' column:
>>> my_table.find(
...     {"match_id": "fight5"},
...     sort={"round": SortMode.DESCENDING},
...     projection={"winner": True},
... ).to_list()
[{'winner': 'Caio Gozer'}, {'winner': 'Betta Vigo'}, ...
>>>
>>> # Using `skip` and <code>limit</code>:
>>> my_table.find(
...     {"match_id": "fight5"},
...     sort={"round": SortMode.DESCENDING},
...     projection={"winner": True},
...     skip=1,
...     limit=2,
... ).to_list()
The Data API returned a warning: {'errorCode': 'IN_MEMORY_SORTING...
[{'winner': 'Betta Vigo'}, {'winner': 'Adam Zuul'}]
>>>
>>> # Non-vector sorting on a regular column:
>>> # (not recommended performance-wise)
>>> my_table.find(
...     {"match_id": "fight5"},
...     sort={"winner": SortMode.ASCENDING},
...     projection={"winner": True},
... ).to_list()
The Data API returned a warning: {'errorCode': 'IN_MEMORY_SORTING...
[{'winner': 'Adam Zuul'}, {'winner': 'Betta Vigo'}, ...
>>>
>>> # Using `.map()` on a cursor:
>>> winner_cursor = my_table.find(
...     {"match_id": "fight5"},
...     sort={"round": SortMode.DESCENDING},
...     projection={"winner": True},
...     limit=5,
... )
>>> print("/".join(winner_cursor.map(lambda row: row["winner"].upper())))
CAIO GOZER/BETTA VIGO/ADAM ZUUL
>>>
>>> # Some other examples of cursor manipulation
>>> matches_cursor = my_table.find(
...     sort={"m_vector": DataAPIVector([-0.1, 0.15, 0.3])}
... )
>>> matches_cursor.has_next()
True
>>> next(matches_cursor)
{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
>>> matches_cursor.consumed
1
>>> matches_cursor.rewind()
>>> matches_cursor.consumed
0
>>> matches_cursor.has_next()
True
>>> matches_cursor.close()
>>> try:
...     next(matches_cursor)
... except StopIteration:
...     print("StopIteration triggered.")
...
StopIteration triggered.
Expand source code
def find(
    self,
    filter: FilterType | None = None,
    *,
    projection: ProjectionType | None = None,
    row_type: type[ROW2] | None = None,
    skip: int | None = None,
    limit: int | None = None,
    include_similarity: bool | None = None,
    include_sort_vector: bool | None = None,
    sort: SortType | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> TableFindCursor[ROW, ROW2]:
    """
    Find rows on the table matching the provided filters
    and according to sorting criteria including vector similarity.

    The returned TableFindCursor object, representing the stream of results,
    can be iterated over, or consumed and manipulated in several other ways
    (see the examples below and the `TableFindCursor` documentation for details).
    Since the amount of returned items can be large, TableFindCursor is a lazy
    object, that fetches new data while it is being read using the Data API
    pagination mechanism.

    Invoking `.to_list()` on a TableFindCursor will cause it to consume all
    rows and materialize the entire result set as a list. This is not recommended
    if the amount of results is very large.

    Args:
        filter: a dictionary expressing which condition the returned rows
            must satisfy. The filter can use operators, such as "$eq" for equality,
            and require columns to compare with literal values. Simple examples
            are `{}` (zero filter, not recommended for large tables),
            `{"match_no": 123}` (a shorthand for `{"match_no": {"$eq": 123}}`,
            or `{"match_no": 123, "round": "C"}` (multiple conditions are
            implicitly combined with "$and").
            Please consult the Data API documentation for a more detailed
            explanation of table search filters and tips on their usage.
        projection: a prescription on which columns to return for the matching rows.
            The projection can take the form `{"column1": True, "column2": True}`.
            `{"*": True}` (i.e. return the whole row), or the complementary
            form that excludes columns: `{"column1": False, "column2": False}`.
            To optimize bandwidth usage, it is recommended to use a projection,
            especially to avoid unnecessary columns of type vector with
            high-dimensional embeddings.
        row_type: this parameter acts a formal specifier for the type checker.
            If omitted, the resulting cursor is implicitly a
            `TableFindCursor[ROW, ROW]`, i.e. maintains the same type for
            the items it returns as that for the rows in the table. Strictly
            typed code may want to specify this parameter especially when a
            projection is given.
        skip: if provided, it is a number of rows that would be obtained first
            in the response and are instead skipped.
        limit: a maximum amount of rows to get from the table. The returned cursor
            will stop yielding rows when either this number is reached or there
            really are no more matches in the table.
        include_similarity: a boolean to request the numeric value of the
            similarity to be returned as an added "$similarity" key in each returned
            row. It can be used meaningfully only in a vector search (see `sort`).
        include_sort_vector: a boolean to request the search query vector.
            If set to True (and if the search is a vector search), calling
            the `get_sort_vector` method on the returned cursor will yield
            the vector used for the ANN search.
        sort: this dictionary parameter controls the order in which the rows
            are returned. The sort parameter can express either a vector search or
            a regular (ascending/descending, even hierarchical) sorting.
            * For a vector search the parameter takes the form
            `{"vector_column": qv}`, with the query vector `qv` of the appropriate
            type (list of floats or DataAPIVector). If the table has automatic
            embedding generation ("vectorize") enabled on that column, the form
            `{"vectorize_enabled_column": "query text"}` is also valid.
            * In the case of non-vector sorting, the parameter specifies the
            column(s) and the ascending/descending ordering required.
            If multiple columns are provided, the sorting applies them
            hierarchically to the rows. Examples are `{"score": SortMode.ASCENDING}`
            (equivalently `{"score": +1}`), `{"score": +1, "when": -1}`.
            Note that, depending on the column(s) chosen for sorting, the table
            partitioning structure, and the presence of indexes, the sorting
            may be done in-memory by the API. In that case, there may be performance
            implications and limitations on the amount of items returned.
            Consult the Data API documentation for more details on this topic.
        request_timeout_ms: a timeout, in milliseconds, to impose on each
            individual HTTP request to the Data API to accomplish the operation.
            If not provided, this object's defaults apply.
        timeout_ms: an alias for `request_timeout_ms`.

    Returns:
        a TableFindCursor object, that can be iterated over (and manipulated
        in several ways), that if needed handles pagination under the hood
        as the rows are consumed.

    Note:
        As the rows are retrieved in chunks progressively, while the cursor
        is being iterated over, it is possible that the actual results
        obtained will reflect changes occurring to the table contents in
        real time.

    Examples:
        >>> # Iterate over results:
        >>> for row in my_table.find({"match_id": "challenge6"}):
        ...     print(f"(R:{row['round']}): winner {row['winner']}")
        ...
        (R:1): winner Donna
        (R:2): winner Erick
        (R:3): winner Fiona
        >>> # Optimize bandwidth using a projection:
        >>> proj = {"round": True, "winner": True}
        >>> for row in my_table.find({"match_id": "challenge6"}, projection=proj):
        ...     print(f"(R:{row['round']}): winner {row['winner']}")
        ...
        (R:1): winner Donna
        (R:2): winner Erick
        (R:3): winner Fiona
        >>> # Filter on the partitioning:
        >>> my_table.find({"match_id": "challenge6"}).to_list()
        [{'match_id': 'challenge6', 'round': 1, 'fighters': DataAPISet([]), ...
        >>>
        >>> # Filter on primary key:
        >>> my_table.find({"match_id": "challenge6", "round": 1}).to_list()
        [{'match_id': 'challenge6', 'round': 1, 'fighters': DataAPISet([]), ...
        >>>
        >>> # Filter on a regular indexed column:
        >>> my_table.find({"winner": "Caio Gozer"}).to_list()
        [{'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
        >>>
        >>> # Non-equality filter on a regular indexed column:
        >>> my_table.find({"score": {"$gte": 15}}).to_list()
        [{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
        >>>
        >>> # Filter on a regular non-indexed column:
        >>> # (not recommended performance-wise)
        >>> my_table.find(
        ...     {"when": {
        ...         "$gte": DataAPITimestamp.from_string("1999-12-31T01:23:44Z")
        ...     }}
        ... ).to_list()
        The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
        [{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
        >>>
        >>> # Empty filter (not recommended performance-wise):
        >>> my_table.find({}).to_list()
        The Data API returned a warning: {'errorCode': 'ZERO_FILTER_OPERATIONS', ...
        [{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
        >>>
        >>> # Filter on the primary key and a regular non-indexed column:
        >>> # (not recommended performance-wise)
        >>> my_table.find(
        ...     {"match_id": "fight5", "round": 3, "winner": "Caio Gozer"}
        ... ).to_list()
        The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
        [{'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
        >>>
        >>> # Filter on a regular non-indexed column (and incomplete primary key)
        >>> # (not recommended performance-wise)
        >>> my_table.find({"round": 3, "winner": "Caio Gozer"}).to_list()
        The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
        [{'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
        >>>
        >>> # Vector search with "sort" (on an appropriately-indexed vector column):
        >>> my_table.find(
        ...     {},
        ...     sort={"m_vector": DataAPIVector([0.2, 0.3, 0.4])},
        ...     projection={"winner": True},
        ...     limit=3,
        ... ).to_list()
        [{'winner': 'Donna'}, {'winner': 'Victor'}]
        >>>
        >>> # Hybrid search with vector sort and non-vector filtering:
        >>> my_table.find(
        ...     {"match_id": "fight4"},
        ...     sort={"m_vector": DataAPIVector([0.2, 0.3, 0.4])},
        ...     projection={"winner": True},
        ...     limit=3,
        ... ).to_list()
        [{'winner': 'Victor'}]
        >>>
        >>> # Return the numeric value of the vector similarity
        >>> # (also demonstrating that one can pass a plain list for a vector):
        >>> my_table.find(
        ...     {},
        ...     sort={"m_vector": [0.2, 0.3, 0.4]},
        ...     projection={"winner": True},
        ...     limit=3,
        ...     include_similarity=True,
        ... ).to_list()
        [{'winner': 'Donna', '$similarity': 0.515}, {'winner': 'Victor', ...
        >>>
        >>> # Non-vector sorting on a 'partitionSort' column:
        >>> my_table.find(
        ...     {"match_id": "fight5"},
        ...     sort={"round": SortMode.DESCENDING},
        ...     projection={"winner": True},
        ... ).to_list()
        [{'winner': 'Caio Gozer'}, {'winner': 'Betta Vigo'}, ...
        >>>
        >>> # Using `skip` and `limit`:
        >>> my_table.find(
        ...     {"match_id": "fight5"},
        ...     sort={"round": SortMode.DESCENDING},
        ...     projection={"winner": True},
        ...     skip=1,
        ...     limit=2,
        ... ).to_list()
        The Data API returned a warning: {'errorCode': 'IN_MEMORY_SORTING...
        [{'winner': 'Betta Vigo'}, {'winner': 'Adam Zuul'}]
        >>>
        >>> # Non-vector sorting on a regular column:
        >>> # (not recommended performance-wise)
        >>> my_table.find(
        ...     {"match_id": "fight5"},
        ...     sort={"winner": SortMode.ASCENDING},
        ...     projection={"winner": True},
        ... ).to_list()
        The Data API returned a warning: {'errorCode': 'IN_MEMORY_SORTING...
        [{'winner': 'Adam Zuul'}, {'winner': 'Betta Vigo'}, ...
        >>>
        >>> # Using `.map()` on a cursor:
        >>> winner_cursor = my_table.find(
        ...     {"match_id": "fight5"},
        ...     sort={"round": SortMode.DESCENDING},
        ...     projection={"winner": True},
        ...     limit=5,
        ... )
        >>> print("/".join(winner_cursor.map(lambda row: row["winner"].upper())))
        CAIO GOZER/BETTA VIGO/ADAM ZUUL
        >>>
        >>> # Some other examples of cursor manipulation
        >>> matches_cursor = my_table.find(
        ...     sort={"m_vector": DataAPIVector([-0.1, 0.15, 0.3])}
        ... )
        >>> matches_cursor.has_next()
        True
        >>> next(matches_cursor)
        {'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
        >>> matches_cursor.consumed
        1
        >>> matches_cursor.rewind()
        >>> matches_cursor.consumed
        0
        >>> matches_cursor.has_next()
        True
        >>> matches_cursor.close()
        >>> try:
        ...     next(matches_cursor)
        ... except StopIteration:
        ...     print("StopIteration triggered.")
        ...
        StopIteration triggered.
    """

    # lazy-import here to avoid circular import issues
    from astrapy.cursors import TableFindCursor

    _request_timeout_ms, _rt_label = _first_valid_timeout(
        (request_timeout_ms, "request_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
    )
    return (
        TableFindCursor(
            table=self,
            request_timeout_ms=_request_timeout_ms,
            overall_timeout_ms=None,
            request_timeout_label=_rt_label,
        )
        .filter(filter)
        .project(projection)
        .skip(skip)
        .limit(limit)
        .sort(sort)
        .include_similarity(include_similarity)
        .include_sort_vector(include_sort_vector)
    )
def find_one(self, filter: FilterType | None = None, *, projection: ProjectionType | None = None, include_similarity: bool | None = None, sort: SortType | None = None, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> Optional[~ROW]

Run a search according to the given filtering and sorting criteria and return the top row matching it, or nothing if there are none.

The parameters are analogous to some of the parameters to the find method (which has a few more that do not make sense in this case, such as limit).

Args

filter
a dictionary expressing which condition the returned row must satisfy. The filter can use operators, such as "$eq" for equality, and require columns to compare with literal values. Simple examples are {} (zero filter), {"match_no": 123} (a shorthand for {"match_no": {"$eq": 123}}, or {"match_no": 123, "round": "C"} (multiple conditions are implicitly combined with "$and"). Please consult the Data API documentation for a more detailed explanation of table search filters and tips on their usage.
projection
a prescription on which columns to return for the matching row. The projection can take the form {"column1": True, "column2": True}. {"*": True} (i.e. return the whole row), or the complementary form that excludes columns: {"column1": False, "column2": False}. To optimize bandwidth usage, it is recommended to use a projection, especially to avoid unnecessary columns of type vector with high-dimensional embeddings.
include_similarity
a boolean to request the numeric value of the similarity to be returned as an added "$similarity" key in the returned row. It can be used meaningfully only in a vector search (see sort).
sort
this dictionary parameter controls the sorting order, hence determines which row is being returned. The sort parameter can express either a vector search or a regular (ascending/descending, even hierarchical) sorting. * For a vector search the parameter takes the form {"vector_column": qv}, with the query vector qv of the appropriate type (list of floats or DataAPIVector). If the table has automatic embedding generation ("vectorize") enabled on that column, the form {"vectorize_enabled_column": "query text"} is also valid. * In the case of non-vector sorting, the parameter specifies the column(s) and the ascending/descending ordering required. If multiple columns are provided, the sorting applies them hierarchically to the rows. Examples are {"score": SortMode.ASCENDING} (equivalently {"score": +1}), {"score": +1, "when": -1}. Note that, depending on the column(s) chosen for sorting, the table partitioning structure, and the presence of indexes, the sorting may be done in-memory by the API. In that case, there may be performance implications. Consult the Data API documentation for more details on this topic.
general_method_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for general_method_timeout_ms.
timeout_ms
an alias for general_method_timeout_ms.

Returns

a dictionary expressing the result if a row is found, otherwise None.

Examples

>>> from astrapy.constants import SortMode
>>> from astrapy.data_types import DataAPITimestamp, DataAPIVector
>>>
>>> # Filter on the partitioning:
>>> my_table.find_one({"match_id": "challenge6"})
{'match_id': 'challenge6', 'round': 1, 'fighters': DataAPISet([]), ...
>>>
>>> # A find with no matches:
>>> str(my_table.find_one({"match_id": "not_real"}))
'None'
>>>
>>> # Optimize bandwidth using a projection:
>>> my_table.find_one(
...     {"match_id": "challenge6"},
...     projection={"round": True, "winner": True},
... )
{'round': 1, 'winner': 'Donna'}
>>>
>>> # Filter on primary key:
>>> my_table.find_one({"match_id": "challenge6", "round": 1})
{'match_id': 'challenge6', 'round': 1, 'fighters': DataAPISet([]), ...
>>>
>>> # Filter on a regular indexed column:
>>> my_table.find_one({"winner": "Caio Gozer"})
{'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
>>>
>>> # Non-equality filter on a regular indexed column:
>>> my_table.find_one({"score": {"$gte": 15}})
{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
>>>
>>> # Filter on a regular non-indexed column:
>>> # (not recommended performance-wise)
>>> my_table.find_one(
...     {"when": {
...         "$gte": DataAPITimestamp.from_string("1999-12-31T01:23:44Z")
...     }}
... )
The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
>>>
>>> # Empty filter:
>>> my_table.find_one({})
The Data API returned a warning: {'errorCode': 'ZERO_FILTER_OPERATIONS', ...
{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
>>>
>>> # Filter on the primary key and a regular non-indexed column:
>>> # (not recommended performance-wise)
>>> my_table.find_one(
...     {"match_id": "fight5", "round": 3, "winner": "Caio Gozer"}
... )
The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
{'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
>>>
>>> # Filter on a regular non-indexed column (and incomplete primary key)
>>> # (not recommended performance-wise)
>>> my_table.find_one({"round": 3, "winner": "Caio Gozer"})
The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
{'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
>>>
>>> # Vector search with "sort" (on an appropriately-indexed vector column):
>>> my_table.find_one(
...     {},
...     sort={"m_vector": DataAPIVector([0.2, 0.3, 0.4])},
...     projection={"winner": True},
... )
{'winner': 'Donna'}
>>>
>>> # Hybrid search with vector sort and non-vector filtering:
>>> my_table.find_one(
...     {"match_id": "fight4"},
...     sort={"m_vector": DataAPIVector([0.2, 0.3, 0.4])},
...     projection={"winner": True},
... )
{'winner': 'Victor'}
>>>
>>> # Return the numeric value of the vector similarity
>>> # (also demonstrating that one can pass a plain list for a vector):
>>> my_table.find_one(
...     {},
...     sort={"m_vector": [0.2, 0.3, 0.4]},
...     projection={"winner": True},
...     include_similarity=True,
... )
{'winner': 'Donna', '$similarity': 0.515}
>>>
>>> # Non-vector sorting on a 'partitionSort' column:
>>> my_table.find_one(
...     {"match_id": "fight5"},
...     sort={"round": SortMode.DESCENDING},
...     projection={"winner": True},
... )
{'winner': 'Caio Gozer'}
>>>
>>> # Non-vector sorting on a regular column:
>>> # (not recommended performance-wise)
>>> my_table.find_one(
...     {"match_id": "fight5"},
...     sort={"winner": SortMode.ASCENDING},
...     projection={"winner": True},
... )
The Data API returned a warning: {'errorCode': 'IN_MEMORY_SORTING...
{'winner': 'Adam Zuul'}
Expand source code
def find_one(
    self,
    filter: FilterType | None = None,
    *,
    projection: ProjectionType | None = None,
    include_similarity: bool | None = None,
    sort: SortType | None = None,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> ROW | None:
    """
    Run a search according to the given filtering and sorting criteria
    and return the top row matching it, or nothing if there are none.

    The parameters are analogous to some of the parameters to the `find` method
    (which has a few more that do not make sense in this case, such as `limit`).

    Args:
        filter: a dictionary expressing which condition the returned row
            must satisfy. The filter can use operators, such as "$eq" for equality,
            and require columns to compare with literal values. Simple examples
            are `{}` (zero filter), `{"match_no": 123}` (a shorthand for
            `{"match_no": {"$eq": 123}}`, or `{"match_no": 123, "round": "C"}`
            (multiple conditions are implicitly combined with "$and").
            Please consult the Data API documentation for a more detailed
            explanation of table search filters and tips on their usage.
        projection: a prescription on which columns to return for the matching row.
            The projection can take the form `{"column1": True, "column2": True}`.
            `{"*": True}` (i.e. return the whole row), or the complementary
            form that excludes columns: `{"column1": False, "column2": False}`.
            To optimize bandwidth usage, it is recommended to use a projection,
            especially to avoid unnecessary columns of type vector with
            high-dimensional embeddings.
        include_similarity: a boolean to request the numeric value of the
            similarity to be returned as an added "$similarity" key in the returned
            row. It can be used meaningfully only in a vector search (see `sort`).
        sort: this dictionary parameter controls the sorting order, hence determines
            which row is being returned.
            The sort parameter can express either a vector search or
            a regular (ascending/descending, even hierarchical) sorting.
            * For a vector search the parameter takes the form
            `{"vector_column": qv}`, with the query vector `qv` of the appropriate
            type (list of floats or DataAPIVector). If the table has automatic
            embedding generation ("vectorize") enabled on that column, the form
            `{"vectorize_enabled_column": "query text"}` is also valid.
            * In the case of non-vector sorting, the parameter specifies the
            column(s) and the ascending/descending ordering required.
            If multiple columns are provided, the sorting applies them
            hierarchically to the rows. Examples are `{"score": SortMode.ASCENDING}`
            (equivalently `{"score": +1}`), `{"score": +1, "when": -1}`.
            Note that, depending on the column(s) chosen for sorting, the table
            partitioning structure, and the presence of indexes, the sorting
            may be done in-memory by the API. In that case, there may be performance
            implications.
            Consult the Data API documentation for more details on this topic.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a dictionary expressing the result if a row is found, otherwise None.

    Examples:
        >>> from astrapy.constants import SortMode
        >>> from astrapy.data_types import DataAPITimestamp, DataAPIVector
        >>>
        >>> # Filter on the partitioning:
        >>> my_table.find_one({"match_id": "challenge6"})
        {'match_id': 'challenge6', 'round': 1, 'fighters': DataAPISet([]), ...
        >>>
        >>> # A find with no matches:
        >>> str(my_table.find_one({"match_id": "not_real"}))
        'None'
        >>>
        >>> # Optimize bandwidth using a projection:
        >>> my_table.find_one(
        ...     {"match_id": "challenge6"},
        ...     projection={"round": True, "winner": True},
        ... )
        {'round': 1, 'winner': 'Donna'}
        >>>
        >>> # Filter on primary key:
        >>> my_table.find_one({"match_id": "challenge6", "round": 1})
        {'match_id': 'challenge6', 'round': 1, 'fighters': DataAPISet([]), ...
        >>>
        >>> # Filter on a regular indexed column:
        >>> my_table.find_one({"winner": "Caio Gozer"})
        {'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
        >>>
        >>> # Non-equality filter on a regular indexed column:
        >>> my_table.find_one({"score": {"$gte": 15}})
        {'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
        >>>
        >>> # Filter on a regular non-indexed column:
        >>> # (not recommended performance-wise)
        >>> my_table.find_one(
        ...     {"when": {
        ...         "$gte": DataAPITimestamp.from_string("1999-12-31T01:23:44Z")
        ...     }}
        ... )
        The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
        {'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
        >>>
        >>> # Empty filter:
        >>> my_table.find_one({})
        The Data API returned a warning: {'errorCode': 'ZERO_FILTER_OPERATIONS', ...
        {'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
        >>>
        >>> # Filter on the primary key and a regular non-indexed column:
        >>> # (not recommended performance-wise)
        >>> my_table.find_one(
        ...     {"match_id": "fight5", "round": 3, "winner": "Caio Gozer"}
        ... )
        The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
        {'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
        >>>
        >>> # Filter on a regular non-indexed column (and incomplete primary key)
        >>> # (not recommended performance-wise)
        >>> my_table.find_one({"round": 3, "winner": "Caio Gozer"})
        The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
        {'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
        >>>
        >>> # Vector search with "sort" (on an appropriately-indexed vector column):
        >>> my_table.find_one(
        ...     {},
        ...     sort={"m_vector": DataAPIVector([0.2, 0.3, 0.4])},
        ...     projection={"winner": True},
        ... )
        {'winner': 'Donna'}
        >>>
        >>> # Hybrid search with vector sort and non-vector filtering:
        >>> my_table.find_one(
        ...     {"match_id": "fight4"},
        ...     sort={"m_vector": DataAPIVector([0.2, 0.3, 0.4])},
        ...     projection={"winner": True},
        ... )
        {'winner': 'Victor'}
        >>>
        >>> # Return the numeric value of the vector similarity
        >>> # (also demonstrating that one can pass a plain list for a vector):
        >>> my_table.find_one(
        ...     {},
        ...     sort={"m_vector": [0.2, 0.3, 0.4]},
        ...     projection={"winner": True},
        ...     include_similarity=True,
        ... )
        {'winner': 'Donna', '$similarity': 0.515}
        >>>
        >>> # Non-vector sorting on a 'partitionSort' column:
        >>> my_table.find_one(
        ...     {"match_id": "fight5"},
        ...     sort={"round": SortMode.DESCENDING},
        ...     projection={"winner": True},
        ... )
        {'winner': 'Caio Gozer'}
        >>>
        >>> # Non-vector sorting on a regular column:
        >>> # (not recommended performance-wise)
        >>> my_table.find_one(
        ...     {"match_id": "fight5"},
        ...     sort={"winner": SortMode.ASCENDING},
        ...     projection={"winner": True},
        ... )
        The Data API returned a warning: {'errorCode': 'IN_MEMORY_SORTING...
        {'winner': 'Adam Zuul'}
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    fo_options = (
        None
        if include_similarity is None
        else {"includeSimilarity": include_similarity}
    )
    fo_payload = self._converter_agent.preprocess_payload(
        {
            "findOne": {
                k: v
                for k, v in {
                    "filter": filter,
                    "projection": normalize_optional_projection(projection),
                    "options": fo_options,
                    "sort": sort,
                }.items()
                if v is not None
            }
        }
    )
    fo_response = self._api_commander.request(
        payload=fo_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    if "document" not in (fo_response.get("data") or {}):
        raise UnexpectedDataAPIResponseException(
            text="Response from findOne API command missing 'document'.",
            raw_response=fo_response,
        )
    if "projectionSchema" not in (fo_response.get("status") or {}):
        raise UnexpectedDataAPIResponseException(
            text="Response from findOne API command missing 'projectionSchema'.",
            raw_response=fo_response,
        )
    doc_response = fo_response["data"]["document"]
    if doc_response is None:
        return None
    return self._converter_agent.postprocess_row(
        fo_response["data"]["document"],
        columns_dict=fo_response["status"]["projectionSchema"],
        similarity_pseudocolumn="$similarity" if include_similarity else None,
    )
def info(self, *, database_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> TableInfo

Return information on the table. This should not be confused with the table definition (i.e. the schema).

Args

database_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying DevOps API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for database_admin_timeout_ms.
timeout_ms
an alias for database_admin_timeout_ms.

Returns

A TableInfo object for inspection.

Example

>>> # Note: output reformatted for clarity.
>>> my_table.info()
TableInfo(
    database_info=AstraDBDatabaseInfo(id=..., name=..., ...),
    keyspace='default_keyspace',
    name='games',
    full_name='default_keyspace.games'
)
Expand source code
def info(
    self,
    *,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> TableInfo:
    """
    Return information on the table. This should not be confused with the table
    definition (i.e. the schema).

    Args:
        database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying DevOps API request.
            If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `database_admin_timeout_ms`.
        timeout_ms: an alias for `database_admin_timeout_ms`.

    Returns:
        A TableInfo object for inspection.

    Example:
        >>> # Note: output reformatted for clarity.
        >>> my_table.info()
        TableInfo(
            database_info=AstraDBDatabaseInfo(id=..., name=..., ...),
            keyspace='default_keyspace',
            name='games',
            full_name='default_keyspace.games'
        )
    """

    return TableInfo(
        database_info=self.database.info(
            database_admin_timeout_ms=database_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        ),
        keyspace=self.keyspace,
        name=self.name,
        full_name=self.full_name,
    )
def insert_many(self, rows: Iterable[ROW], *, ordered: bool = False, chunk_size: int | None = None, concurrency: int | None = None, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> TableInsertManyResult

Insert a number of rows into the table, with implied overwrite in case of primary key collision.

Inserting rows whose primary key correspond to entries alredy stored in the table has the effect of an in-place update: the rows are overwritten. However, if the rows being inserted are partially provided, i.e. some columns are not specified, these are left unchanged on the database. To explicitly reset them, specify their value as appropriate to their data type, i.e. None, {} or analogous.

Args

rows
an iterable of dictionaries, each expressing a row to insert. Each row must at least fully specify the primary key column values, while any other column may be omitted if desired (in which case it is left as is on DB). The values for the various columns supplied in each row must be of the right data type for the insertion to succeed. Non-primary-key columns can also be explicitly set to null.
ordered
if False (default), the insertions can occur in arbitrary order and possibly concurrently. If True, they are processed sequentially. If there are no specific reasons against it, unordered insertions re to be preferred as they complete much faster.
chunk_size
how many rows to include in each single API request. Exceeding the server maximum allowed value results in an error. Leave it unspecified (recommended) to use the system default.
concurrency
maximum number of concurrent requests to the API at a given time. It cannot be more than one for ordered insertions.
general_method_timeout_ms
a timeout, in milliseconds, to impose on the whole operation, which may consist of several API requests. If not provided, this object's defaults apply.
request_timeout_ms
a timeout, in milliseconds, to impose on each individual HTTP request to the Data API to accomplish the operation. If not provided, this object's defaults apply.
timeout_ms
an alias for general_method_timeout_ms.

Returns

a TableInsertManyResult object, whose attributes are the primary key of the inserted rows both in the form of dictionaries and of tuples.

Examples

>>> # Insert complete and partial rows at once (concurrently)
>>> from astrapy.data_types import (
...     DataAPISet,
...     DataAPITimestamp,
...     DataAPIVector,
... )
>>> from astrapy.ids import UUID
>>>
>>> insert_result = my_table.insert_many(
...     [
...         {
...             "match_id": "fight4",
...             "round": 1,
...             "winner": "Victor",
...             "score": 18,
...             "when": DataAPITimestamp.from_string(
...                 "2024-11-28T11:30:00Z",
...             ),
...             "fighters": DataAPISet([
...                 UUID("0193539a-2770-8c09-a32a-111111111111"),
...                 UUID('019353e3-00b4-83f9-a127-222222222222'),
...             ]),
...             "m_vector": DataAPIVector([0.4, -0.6, 0.2]),
...         },
...         {"match_id": "fight5", "round": 1, "winner": "Adam"},
...         {"match_id": "fight5", "round": 2, "winner": "Betta"},
...         {"match_id": "fight5", "round": 3, "winner": "Caio"},
...         {
...             "match_id": "challenge6",
...             "round": 1,
...             "winner": "Donna",
...             "m_vector": [0.9, -0.1, -0.3],
...         },
...         {"match_id": "challenge6", "round": 2, "winner": "Erick"},
...         {"match_id": "challenge6", "round": 3, "winner": "Fiona"},
...         {"match_id": "tournamentA", "round": 1, "winner": "Gael"},
...         {"match_id": "tournamentA", "round": 2, "winner": "Hanna"},
...         {
...             "match_id": "tournamentA",
...             "round": 3,
...             "winner": "Ian",
...             "fighters": DataAPISet([
...                 UUID("0193539a-2770-8c09-a32a-111111111111"),
...             ]),
...         },
...         {"match_id": "fight7", "round": 1, "winner": "Joy"},
...         {"match_id": "fight7", "round": 2, "winner": "Kevin"},
...         {"match_id": "fight7", "round": 3, "winner": "Lauretta"},
...     ],
...     concurrency=10,
...     chunk_size=3,
... )
>>> insert_result.inserted_ids
[{'match_id': 'fight4', 'round': 1}, {'match_id': 'fight5', ...
>>> insert_result.inserted_id_tuples
[('fight4', 1), ('fight5', 1), ('fight5', 2), ('fight5', 3), ...
>>>
>>> # Ordered insertion
>>> # (would stop on first failure; predictable end result on DB)
>>> my_table.insert_many(
...     [
...         {"match_id": "fight5", "round": 1, "winner": "Adam0"},
...         {"match_id": "fight5", "round": 2, "winner": "Betta0"},
...         {"match_id": "fight5", "round": 3, "winner": "Caio0"},
...         {"match_id": "fight5", "round": 1, "winner": "Adam Zuul"},
...         {"match_id": "fight5", "round": 2, "winner": "Betta Vigo"},
...         {"match_id": "fight5", "round": 3, "winner": "Caio Gozer"},
...     ],
...     ordered=True,
... )
TableInsertManyResult(inserted_ids=[{'match_id': 'fight5', 'round': 1}, ...

Note

Unordered insertions are executed with some degree of concurrency, so it is usually better to prefer this mode unless the order in the row sequence is important.

Note

If some of the rows are unsuitable for insertion, for instance have the wrong data type for a column or lack the primary key, the Data API validation check will fail for those specific requests that contain the faulty rows. Depending on concurrency and the value of the ordered parameter, a number of rows in general could have been successfully inserted. It is possible to capture such a scenario, and inspect which rows actually got inserted, by catching an error of type TableInsertManyException: its partial_result attribute is precisely a TableInsertManyResult, encoding details on the successful writes.

Expand source code
def insert_many(
    self,
    rows: Iterable[ROW],
    *,
    ordered: bool = False,
    chunk_size: int | None = None,
    concurrency: int | None = None,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> TableInsertManyResult:
    """
    Insert a number of rows into the table,
    with implied overwrite in case of primary key collision.

    Inserting rows whose primary key correspond to entries alredy stored
    in the table has the effect of an in-place update: the rows are overwritten.
    However, if the rows being inserted are partially provided, i.e. some columns
    are not specified, these are left unchanged on the database. To explicitly
    reset them, specify their value as appropriate to their data type,
    i.e. `None`, `{}` or analogous.

    Args:
        rows: an iterable of dictionaries, each expressing a row to insert.
            Each row must at least fully specify the primary key column values,
            while any other column may be omitted if desired (in which case
            it is left as is on DB).
            The values for the various columns supplied in each row must
            be of the right data type for the insertion to succeed.
            Non-primary-key columns can also be explicitly set to null.
        ordered: if False (default), the insertions can occur in arbitrary order
            and possibly concurrently. If True, they are processed sequentially.
            If there are no specific reasons against it, unordered insertions
            re to be preferred as they complete much faster.
        chunk_size: how many rows to include in each single API request.
            Exceeding the server maximum allowed value results in an error.
            Leave it unspecified (recommended) to use the system default.
        concurrency: maximum number of concurrent requests to the API at
            a given time. It cannot be more than one for ordered insertions.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            whole operation, which may consist of several API requests.
            If not provided, this object's defaults apply.
        request_timeout_ms: a timeout, in milliseconds, to impose on each
            individual HTTP request to the Data API to accomplish the operation.
            If not provided, this object's defaults apply.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a TableInsertManyResult object, whose attributes are the primary key
        of the inserted rows both in the form of dictionaries and of tuples.

    Examples:
        >>> # Insert complete and partial rows at once (concurrently)
        >>> from astrapy.data_types import (
        ...     DataAPISet,
        ...     DataAPITimestamp,
        ...     DataAPIVector,
        ... )
        >>> from astrapy.ids import UUID
        >>>
        >>> insert_result = my_table.insert_many(
        ...     [
        ...         {
        ...             "match_id": "fight4",
        ...             "round": 1,
        ...             "winner": "Victor",
        ...             "score": 18,
        ...             "when": DataAPITimestamp.from_string(
        ...                 "2024-11-28T11:30:00Z",
        ...             ),
        ...             "fighters": DataAPISet([
        ...                 UUID("0193539a-2770-8c09-a32a-111111111111"),
        ...                 UUID('019353e3-00b4-83f9-a127-222222222222'),
        ...             ]),
        ...             "m_vector": DataAPIVector([0.4, -0.6, 0.2]),
        ...         },
        ...         {"match_id": "fight5", "round": 1, "winner": "Adam"},
        ...         {"match_id": "fight5", "round": 2, "winner": "Betta"},
        ...         {"match_id": "fight5", "round": 3, "winner": "Caio"},
        ...         {
        ...             "match_id": "challenge6",
        ...             "round": 1,
        ...             "winner": "Donna",
        ...             "m_vector": [0.9, -0.1, -0.3],
        ...         },
        ...         {"match_id": "challenge6", "round": 2, "winner": "Erick"},
        ...         {"match_id": "challenge6", "round": 3, "winner": "Fiona"},
        ...         {"match_id": "tournamentA", "round": 1, "winner": "Gael"},
        ...         {"match_id": "tournamentA", "round": 2, "winner": "Hanna"},
        ...         {
        ...             "match_id": "tournamentA",
        ...             "round": 3,
        ...             "winner": "Ian",
        ...             "fighters": DataAPISet([
        ...                 UUID("0193539a-2770-8c09-a32a-111111111111"),
        ...             ]),
        ...         },
        ...         {"match_id": "fight7", "round": 1, "winner": "Joy"},
        ...         {"match_id": "fight7", "round": 2, "winner": "Kevin"},
        ...         {"match_id": "fight7", "round": 3, "winner": "Lauretta"},
        ...     ],
        ...     concurrency=10,
        ...     chunk_size=3,
        ... )
        >>> insert_result.inserted_ids
        [{'match_id': 'fight4', 'round': 1}, {'match_id': 'fight5', ...
        >>> insert_result.inserted_id_tuples
        [('fight4', 1), ('fight5', 1), ('fight5', 2), ('fight5', 3), ...
        >>>
        >>> # Ordered insertion
        >>> # (would stop on first failure; predictable end result on DB)
        >>> my_table.insert_many(
        ...     [
        ...         {"match_id": "fight5", "round": 1, "winner": "Adam0"},
        ...         {"match_id": "fight5", "round": 2, "winner": "Betta0"},
        ...         {"match_id": "fight5", "round": 3, "winner": "Caio0"},
        ...         {"match_id": "fight5", "round": 1, "winner": "Adam Zuul"},
        ...         {"match_id": "fight5", "round": 2, "winner": "Betta Vigo"},
        ...         {"match_id": "fight5", "round": 3, "winner": "Caio Gozer"},
        ...     ],
        ...     ordered=True,
        ... )
        TableInsertManyResult(inserted_ids=[{'match_id': 'fight5', 'round': 1}, ...

    Note:
        Unordered insertions are executed with some degree of concurrency,
        so it is usually better to prefer this mode unless the order in the
        row sequence is important.

    Note:
        If some of the rows are unsuitable for insertion, for instance
        have the wrong data type for a column or lack the primary key,
        the Data API validation check will fail for those specific requests
        that contain the faulty rows. Depending on concurrency and the value
        of the `ordered` parameter, a number of rows in general could have
        been successfully inserted.
        It is possible to capture such a scenario, and inspect which rows
        actually got inserted, by catching an error of type
        `astrapy.exceptions.TableInsertManyException`: its `partial_result`
        attribute is precisely a `TableInsertManyResult`, encoding details
        on the successful writes.
    """

    _general_method_timeout_ms, _gmt_label = _first_valid_timeout(
        (general_method_timeout_ms, "general_method_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (
            self.api_options.timeout_options.general_method_timeout_ms,
            "general_method_timeout_ms",
        ),
    )
    _request_timeout_ms, _rt_label = _first_valid_timeout(
        (request_timeout_ms, "request_timeout_ms"),
        (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
    )
    if concurrency is None:
        if ordered:
            _concurrency = 1
        else:
            _concurrency = DEFAULT_INSERT_MANY_CONCURRENCY
    else:
        _concurrency = concurrency
    if _concurrency > 1 and ordered:
        raise ValueError("Cannot run ordered insert_many concurrently.")
    if chunk_size is None:
        _chunk_size = DEFAULT_INSERT_MANY_CHUNK_SIZE
    else:
        _chunk_size = chunk_size
    _rows = list(rows)
    logger.info(f"inserting {len(_rows)} rows in '{self.name}'")
    raw_results: list[dict[str, Any]] = []
    timeout_manager = MultiCallTimeoutManager(
        overall_timeout_ms=_general_method_timeout_ms,
        timeout_label=_gmt_label,
    )
    if ordered:
        options = {"ordered": True}
        inserted_ids: list[Any] = []
        inserted_id_tuples: list[Any] = []
        for i in range(0, len(_rows), _chunk_size):
            im_payload = self._converter_agent.preprocess_payload(
                {
                    "insertMany": {
                        "documents": _rows[i : i + _chunk_size],
                        "options": options,
                    },
                },
            )
            logger.info(f"insertMany on '{self.name}'")
            chunk_response = self._api_commander.request(
                payload=im_payload,
                raise_api_errors=False,
                timeout_context=timeout_manager.remaining_timeout(
                    cap_time_ms=_request_timeout_ms,
                    cap_timeout_label=_rt_label,
                ),
            )
            logger.info(f"finished insertMany on '{self.name}'")
            # accumulate the results in this call
            chunk_inserted_ids, chunk_inserted_ids_tuples = (
                self._prepare_keys_from_status(chunk_response.get("status"))
            )
            inserted_ids += chunk_inserted_ids
            inserted_id_tuples += chunk_inserted_ids_tuples
            raw_results += [chunk_response]
            # if errors, quit early
            if chunk_response.get("errors", []):
                partial_result = TableInsertManyResult(
                    raw_results=raw_results,
                    inserted_ids=inserted_ids,
                    inserted_id_tuples=inserted_id_tuples,
                )
                raise TableInsertManyException.from_response(
                    command=None,
                    raw_response=chunk_response,
                    partial_result=partial_result,
                )

        # return
        full_result = TableInsertManyResult(
            raw_results=raw_results,
            inserted_ids=inserted_ids,
            inserted_id_tuples=inserted_id_tuples,
        )
        logger.info(f"finished inserting {len(_rows)} rows in '{self.name}'")
        return full_result

    else:
        # unordered: concurrent or not, do all of them and parse the results
        options = {"ordered": False}
        if _concurrency > 1:
            with ThreadPoolExecutor(max_workers=_concurrency) as executor:

                def _chunk_insertor(
                    row_chunk: list[dict[str, Any]],
                ) -> dict[str, Any]:
                    im_payload = self._converter_agent.preprocess_payload(
                        {
                            "insertMany": {
                                "documents": row_chunk,
                                "options": options,
                            },
                        },
                    )
                    logger.info(f"insertMany(chunk) on '{self.name}'")
                    im_response = self._api_commander.request(
                        payload=im_payload,
                        raise_api_errors=False,
                        timeout_context=timeout_manager.remaining_timeout(
                            cap_time_ms=_request_timeout_ms,
                            cap_timeout_label=_rt_label,
                        ),
                    )
                    logger.info(f"finished insertMany(chunk) on '{self.name}'")
                    return im_response

                raw_results = list(
                    executor.map(
                        _chunk_insertor,
                        (
                            _rows[i : i + _chunk_size]
                            for i in range(0, len(_rows), _chunk_size)
                        ),
                    )
                )
        else:
            for i in range(0, len(_rows), _chunk_size):
                im_payload = self._converter_agent.preprocess_payload(
                    {
                        "insertMany": {
                            "documents": _rows[i : i + _chunk_size],
                            "options": options,
                        },
                    },
                )
                logger.info(f"insertMany(chunk) on '{self.name}'")
                im_response = self._api_commander.request(
                    payload=im_payload,
                    raise_api_errors=False,
                    timeout_context=timeout_manager.remaining_timeout(
                        cap_time_ms=_request_timeout_ms,
                        cap_timeout_label=_rt_label,
                    ),
                )
                logger.info(f"finished insertMany(chunk) on '{self.name}'")
                raw_results.append(im_response)
        # recast raw_results. Each response has its schema: unfold appropriately
        ids_and_tuples_per_chunk = [
            self._prepare_keys_from_status(chunk_response.get("status"))
            for chunk_response in raw_results
        ]
        inserted_ids = [
            inserted_id
            for chunk_ids, _ in ids_and_tuples_per_chunk
            for inserted_id in chunk_ids
        ]
        inserted_id_tuples = [
            inserted_id_tuple
            for _, chunk_id_tuples in ids_and_tuples_per_chunk
            for inserted_id_tuple in chunk_id_tuples
        ]
        # check-raise
        if any(
            [chunk_response.get("errors", []) for chunk_response in raw_results]
        ):
            partial_result = TableInsertManyResult(
                raw_results=raw_results,
                inserted_ids=inserted_ids,
                inserted_id_tuples=inserted_id_tuples,
            )
            raise TableInsertManyException.from_responses(
                commands=[None for _ in raw_results],
                raw_responses=raw_results,
                partial_result=partial_result,
            )

        # return
        full_result = TableInsertManyResult(
            raw_results=raw_results,
            inserted_ids=inserted_ids,
            inserted_id_tuples=inserted_id_tuples,
        )
        logger.info(f"finished inserting {len(_rows)} rows in '{self.name}'")
        return full_result
def insert_one(self, row: ROW, *, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> TableInsertOneResult

Insert a single row in the table, with implied overwrite in case of primary key collision.

Inserting a row whose primary key correspond to an entry alredy stored in the table has the effect of an in-place update: the row is overwritten. However, if the row being inserted is partially provided, i.e. some columns are not specified, these are left unchanged on the database. To explicitly reset them, specify their value as appropriate to their data type, i.e. None, {} or analogous.

Args

row
a dictionary expressing the row to insert. The primary key must be specified in full, while any other column may be omitted if desired (in which case it is left as is on DB). The values for the various columns supplied in the row must be of the right data type for the insertion to succeed. Non-primary-key columns can also be explicitly set to null.
general_method_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for general_method_timeout_ms.
timeout_ms
an alias for general_method_timeout_ms.

Returns

a TableInsertOneResult object, whose attributes are the primary key of the inserted row both in the form of a dictionary and of a tuple.

Examples

>>> # a full-row insert using astrapy's datatypes
>>> from astrapy.data_types import (
...     DataAPISet,
...     DataAPITimestamp,
...     DataAPIVector,
... )
>>> from astrapy.ids import UUID
>>>
>>> insert_result = my_table.insert_one(
...     {
...         "match_id": "mtch_0",
...         "round": 1,
...         "m_vector": DataAPIVector([0.4, -0.6, 0.2]),
...         "score": 18,
...         "when": DataAPITimestamp.from_string("2024-11-28T11:30:00Z"),
...         "winner": "Victor",
...         "fighters": DataAPISet([
...             UUID("0193539a-2770-8c09-a32a-111111111111"),
...         ]),
...     },
... )
>>> insert_result.inserted_id
{'match_id': 'mtch_0', 'round': 1}
>>> insert_result.inserted_id_tuple
('mtch_0', 1)
>>>
>>> # a partial-row (which in this case overwrites some of the values)
>>> my_table.insert_one(
...     {
...         "match_id": "mtch_0",
...         "round": 1,
...         "winner": "Victor Vector",
...         "fighters": DataAPISet([
...             UUID("0193539a-2770-8c09-a32a-111111111111"),
...             UUID("0193539a-2880-8875-9f07-222222222222"),
...         ]),
...     },
... )
TableInsertOneResult(inserted_id={'match_id': 'mtch_0', 'round': 1} ...
>>>
>>> # another insertion demonstrating standard-library datatypes in values
>>> import datetime
>>>
>>> my_table.insert_one(
...     {
...         "match_id": "mtch_0",
...         "round": 2,
...         "winner": "Angela",
...         "score": 25,
...         "when": datetime.datetime(
...             2024, 7, 13, 12, 55, 30, 889,
...             tzinfo=datetime.timezone.utc,
...         ),
...         "fighters": {
...             UUID("019353cb-8e01-8276-a190-333333333333"),
...         },
...         "m_vector": [0.4, -0.6, 0.2],
...     },
... )
TableInsertOneResult(inserted_id={'match_id': 'mtch_0', 'round': 2}, ...
Expand source code
def insert_one(
    self,
    row: ROW,
    *,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> TableInsertOneResult:
    """
    Insert a single row in the table,
    with implied overwrite in case of primary key collision.

    Inserting a row whose primary key correspond to an entry alredy stored
    in the table has the effect of an in-place update: the row is overwritten.
    However, if the row being inserted is partially provided, i.e. some columns
    are not specified, these are left unchanged on the database. To explicitly
    reset them, specify their value as appropriate to their data type,
    i.e. `None`, `{}` or analogous.

    Args:
        row: a dictionary expressing the row to insert. The primary key
            must be specified in full, while any other column may be omitted
            if desired (in which case it is left as is on DB).
            The values for the various columns supplied in the row must
            be of the right data type for the insertion to succeed.
            Non-primary-key columns can also be explicitly set to null.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a TableInsertOneResult object, whose attributes are the primary key
        of the inserted row both in the form of a dictionary and of a tuple.

    Examples:
        >>> # a full-row insert using astrapy's datatypes
        >>> from astrapy.data_types import (
        ...     DataAPISet,
        ...     DataAPITimestamp,
        ...     DataAPIVector,
        ... )
        >>> from astrapy.ids import UUID
        >>>
        >>> insert_result = my_table.insert_one(
        ...     {
        ...         "match_id": "mtch_0",
        ...         "round": 1,
        ...         "m_vector": DataAPIVector([0.4, -0.6, 0.2]),
        ...         "score": 18,
        ...         "when": DataAPITimestamp.from_string("2024-11-28T11:30:00Z"),
        ...         "winner": "Victor",
        ...         "fighters": DataAPISet([
        ...             UUID("0193539a-2770-8c09-a32a-111111111111"),
        ...         ]),
        ...     },
        ... )
        >>> insert_result.inserted_id
        {'match_id': 'mtch_0', 'round': 1}
        >>> insert_result.inserted_id_tuple
        ('mtch_0', 1)
        >>>
        >>> # a partial-row (which in this case overwrites some of the values)
        >>> my_table.insert_one(
        ...     {
        ...         "match_id": "mtch_0",
        ...         "round": 1,
        ...         "winner": "Victor Vector",
        ...         "fighters": DataAPISet([
        ...             UUID("0193539a-2770-8c09-a32a-111111111111"),
        ...             UUID("0193539a-2880-8875-9f07-222222222222"),
        ...         ]),
        ...     },
        ... )
        TableInsertOneResult(inserted_id={'match_id': 'mtch_0', 'round': 1} ...
        >>>
        >>> # another insertion demonstrating standard-library datatypes in values
        >>> import datetime
        >>>
        >>> my_table.insert_one(
        ...     {
        ...         "match_id": "mtch_0",
        ...         "round": 2,
        ...         "winner": "Angela",
        ...         "score": 25,
        ...         "when": datetime.datetime(
        ...             2024, 7, 13, 12, 55, 30, 889,
        ...             tzinfo=datetime.timezone.utc,
        ...         ),
        ...         "fighters": {
        ...             UUID("019353cb-8e01-8276-a190-333333333333"),
        ...         },
        ...         "m_vector": [0.4, -0.6, 0.2],
        ...     },
        ... )
        TableInsertOneResult(inserted_id={'match_id': 'mtch_0', 'round': 2}, ...
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    io_payload = self._converter_agent.preprocess_payload(
        {"insertOne": {"document": row}}
    )
    logger.info(f"insertOne on '{self.name}'")
    io_response = self._api_commander.request(
        payload=io_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished insertOne on '{self.name}'")
    if "insertedIds" in io_response.get("status", {}):
        if not io_response["status"]["insertedIds"]:
            raise UnexpectedDataAPIResponseException(
                text="Response from insertOne API command has empty 'insertedIds'.",
                raw_response=io_response,
            )
        if not io_response["status"]["primaryKeySchema"]:
            raise UnexpectedDataAPIResponseException(
                text=(
                    "Response from insertOne API command has "
                    "empty 'primaryKeySchema'."
                ),
                raw_response=io_response,
            )
        inserted_id_list = io_response["status"]["insertedIds"][0]
        inserted_id_tuple, inserted_id = self._converter_agent.postprocess_key(
            inserted_id_list,
            primary_key_schema_dict=io_response["status"]["primaryKeySchema"],
        )
        return TableInsertOneResult(
            raw_results=[io_response],
            inserted_id=inserted_id,
            inserted_id_tuple=inserted_id_tuple,
        )
    else:
        raise UnexpectedDataAPIResponseException(
            text="Response from insertOne API command missing 'insertedIds'.",
            raw_response=io_response,
        )
def list_index_names(self, *, table_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> list[str]

List the names of all indexes existing on this table.

Args

table_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for table_admin_timeout_ms.
timeout_ms
an alias for table_admin_timeout_ms.

Returns

a list of the index names as strings, in no particular order.

Example

>>> my_table.list_index_names()
['m_vector_index', 'winner_index', 'score_index']
Expand source code
def list_index_names(
    self,
    *,
    table_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> list[str]:
    """
    List the names of all indexes existing on this table.

    Args:
        table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `table_admin_timeout_ms`.
        timeout_ms: an alias for `table_admin_timeout_ms`.

    Returns:
        a list of the index names as strings, in no particular order.

    Example:
        >>> my_table.list_index_names()
        ['m_vector_index', 'winner_index', 'score_index']
    """

    _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
        timeout_options=self.api_options.timeout_options,
        table_admin_timeout_ms=table_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    li_payload: dict[str, Any] = {"listIndexes": {"options": {}}}
    logger.info("listIndexes")
    li_response = self._api_commander.request(
        payload=li_payload,
        timeout_context=_TimeoutContext(
            request_ms=_table_admin_timeout_ms, label=_ta_label
        ),
    )
    if "indexes" not in li_response.get("status", {}):
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from listIndexes API command.",
            raw_response=li_response,
        )
    else:
        logger.info("finished listIndexes")
        return li_response["status"]["indexes"]  # type: ignore[no-any-return]
def list_indexes(self, *, table_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> list[TableIndexDescriptor]

List the full definitions of all indexes existing on this table.

Args

table_admin_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for table_admin_timeout_ms.
timeout_ms
an alias for table_admin_timeout_ms.

Returns

a list of TableIndexDescriptor objects in no particular order, each providing the details of an index present on the table.

Example

>>> indexes = my_table.list_indexes()
>>> indexes
[TableIndexDescriptor(name='m_vector_index', definition=...)...]  # Note: shortened
>>> indexes[1].definition.column
'winner'
>>> indexes[1].definition.options.case_sensitive
False
Expand source code
def list_indexes(
    self,
    *,
    table_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> list[TableIndexDescriptor]:
    """
    List the full definitions of all indexes existing on this table.

    Args:
        table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `table_admin_timeout_ms`.
        timeout_ms: an alias for `table_admin_timeout_ms`.

    Returns:
        a list of `astrapy.info.TableIndexDescriptor` objects in no particular
        order, each providing the details of an index present on the table.

    Example:
        >>> indexes = my_table.list_indexes()
        >>> indexes
        [TableIndexDescriptor(name='m_vector_index', definition=...)...]  # Note: shortened
        >>> indexes[1].definition.column
        'winner'
        >>> indexes[1].definition.options.case_sensitive
        False
    """

    _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
        timeout_options=self.api_options.timeout_options,
        table_admin_timeout_ms=table_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    li_payload: dict[str, Any] = {"listIndexes": {"options": {"explain": True}}}
    logger.info("listIndexes")
    li_response = self._api_commander.request(
        payload=li_payload,
        timeout_context=_TimeoutContext(
            request_ms=_table_admin_timeout_ms, label=_ta_label
        ),
    )
    if "indexes" not in li_response.get("status", {}):
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from listIndexes API command.",
            raw_response=li_response,
        )
    else:
        logger.info("finished listIndexes")
        return [
            TableIndexDescriptor.coerce(index_object)
            for index_object in li_response["status"]["indexes"]
        ]
def to_async(self: Table[ROW], *, embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = (unset), api_options: APIOptions | UnsetType = (unset)) ‑> AsyncTable[ROW]

Create an AsyncTable from this one. Save for the arguments explicitly provided as overrides, everything else is kept identical to this table in the copy (the database is converted into an async object).

Args

embedding_api_key
optional API key(s) for interacting with the table. If an embedding service is configured, and this parameter is not None, each Data API call will include the necessary embedding-related headers as specified by this parameter. If a string is passed, it translates into the one "embedding api key" header (i.e. EmbeddingAPIKeyHeaderProvider). For some vectorize providers/models, if using header-based authentication, specialized subclasses of EmbeddingHeadersProvider should be supplied.
api_options
any additional options to set for the result, in the form of an APIOptions instance (where one can set just the needed attributes). In case the same setting is also provided as named parameter, the latter takes precedence.

Returns

the new copy, an AsyncTable instance.

Example

>>> asyncio.run(my_table.to_async().find_one(
...     {"match_id": "fight4"},
...     projection={"winner": True},
... ))
{"pk": 1, "column": "value}
Expand source code
def to_async(
    self: Table[ROW],
    *,
    embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
    api_options: APIOptions | UnsetType = _UNSET,
) -> AsyncTable[ROW]:
    """
    Create an AsyncTable from this one. Save for the arguments
    explicitly provided as overrides, everything else is kept identical
    to this table in the copy (the database is converted into
    an async object).

    Args:
        embedding_api_key: optional API key(s) for interacting with the table.
            If an embedding service is configured, and this parameter is not None,
            each Data API call will include the necessary embedding-related headers
            as specified by this parameter. If a string is passed, it translates
            into the one "embedding api key" header
            (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
            For some vectorize providers/models, if using header-based authentication,
            specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
            should be supplied.
        api_options: any additional options to set for the result, in the form of
            an APIOptions instance (where one can set just the needed attributes).
            In case the same setting is also provided as named parameter,
            the latter takes precedence.

    Returns:
        the new copy, an AsyncTable instance.

    Example:
        >>> asyncio.run(my_table.to_async().find_one(
        ...     {"match_id": "fight4"},
        ...     projection={"winner": True},
        ... ))
        {"pk": 1, "column": "value}
    """

    arg_api_options = APIOptions(
        embedding_api_key=embedding_api_key,
    )
    final_api_options = self.api_options.with_override(api_options).with_override(
        arg_api_options
    )
    return AsyncTable(
        database=self.database.to_async(),
        name=self.name,
        keyspace=self.keyspace,
        api_options=final_api_options,
    )
def update_one(self, filter: FilterType, update: dict[str, Any], *, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> None

Update a single document on the table, changing some or all of the columns, with the implicit behaviour of inserting a new row if no match is found.

Args

filter
a predicate expressing the table primary key in full, i.e. a dictionary defining values for all columns that form the primary key. An example may be {"match_id": "fight4", "round": 1}.
update
the update prescription to apply to the row, expressed as a dictionary conforming to the Data API syntax. The update operators for tables are $set and $unset (in particular, setting a column to None has the same effect as the $unset operator). Examples are {"$set": {"round": 12}} and {"$unset": {"winner": "", "score": ""}}. Note that the update operation cannot alter the primary key columns. See the Data API documentation for more details.
general_method_timeout_ms
a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms
an alias for general_method_timeout_ms.
timeout_ms
an alias for general_method_timeout_ms.

Examples

>>> from astrapy.data_types import DataAPISet
>>>
>>> # Set a new value for a column
>>> my_table.update_one(
...     {"match_id": "fight4", "round": 1},
...     update={"$set": {"winner": "Winona"}},
... )
>>>
>>> # Set a new value for a column while unsetting another colum
>>> my_table.update_one(
...     {"match_id": "fight4", "round": 1},
...     update={"$set": {"winner": None, "score": 24}},
... )
>>>
>>> # Set a 'set' column to empty
>>> my_table.update_one(
...     {"match_id": "fight4", "round": 1},
...     update={"$set": {"fighters": DataAPISet()}},
... )
>>>
>>> # Set a 'set' column to empty using None
>>> my_table.update_one(
...     {"match_id": "fight4", "round": 1},
...     update={"$set": {"fighters": None}},
... )
>>>
>>> # Set a 'set' column to empty using a regular (empty) set
>>> my_table.update_one(
...     {"match_id": "fight4", "round": 1},
...     update={"$set": {"fighters": set()}},
... )
>>>
>>> # Set a 'set' column to empty using $unset
>>> my_table.update_one(
...     {"match_id": "fight4", "round": 1},
...     update={"$unset": {"fighters": None}},
... )
>>>
>>> # A non-existing primary key creates a new row
>>> my_table.update_one(
...     {"match_id": "bar_fight", "round": 4},
...     update={"$set": {"score": 8, "winner": "Jack"}},
... )
>>>
>>> # Delete column values for a row (they'll read as None now)
>>> my_table.update_one(
...     {"match_id": "challenge6", "round": 2},
...     update={"$unset": {"winner": None, "score": None}},
... )

Note

a row created entirely with update operations (as opposed to insertions) may, correspondingly, be deleted by means of an $unset update on all columns.

Expand source code
def update_one(
    self,
    filter: FilterType,
    update: dict[str, Any],
    *,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Update a single document on the table, changing some or all of the columns,
    with the implicit behaviour of inserting a new row if no match is found.

    Args:
        filter: a predicate expressing the table primary key in full,
            i.e. a dictionary defining values for all columns that form the
            primary key. An example may be `{"match_id": "fight4", "round": 1}`.
        update: the update prescription to apply to the row, expressed
            as a dictionary conforming to the Data API syntax. The update
            operators for tables are `$set` and `$unset` (in particular,
            setting a column to None has the same effect as the $unset operator).
            Examples are `{"$set": {"round": 12}}` and
            `{"$unset": {"winner": "", "score": ""}}`.
            Note that the update operation cannot alter the primary key columns.
            See the Data API documentation for more details.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Examples:
        >>> from astrapy.data_types import DataAPISet
        >>>
        >>> # Set a new value for a column
        >>> my_table.update_one(
        ...     {"match_id": "fight4", "round": 1},
        ...     update={"$set": {"winner": "Winona"}},
        ... )
        >>>
        >>> # Set a new value for a column while unsetting another colum
        >>> my_table.update_one(
        ...     {"match_id": "fight4", "round": 1},
        ...     update={"$set": {"winner": None, "score": 24}},
        ... )
        >>>
        >>> # Set a 'set' column to empty
        >>> my_table.update_one(
        ...     {"match_id": "fight4", "round": 1},
        ...     update={"$set": {"fighters": DataAPISet()}},
        ... )
        >>>
        >>> # Set a 'set' column to empty using None
        >>> my_table.update_one(
        ...     {"match_id": "fight4", "round": 1},
        ...     update={"$set": {"fighters": None}},
        ... )
        >>>
        >>> # Set a 'set' column to empty using a regular (empty) set
        >>> my_table.update_one(
        ...     {"match_id": "fight4", "round": 1},
        ...     update={"$set": {"fighters": set()}},
        ... )
        >>>
        >>> # Set a 'set' column to empty using $unset
        >>> my_table.update_one(
        ...     {"match_id": "fight4", "round": 1},
        ...     update={"$unset": {"fighters": None}},
        ... )
        >>>
        >>> # A non-existing primary key creates a new row
        >>> my_table.update_one(
        ...     {"match_id": "bar_fight", "round": 4},
        ...     update={"$set": {"score": 8, "winner": "Jack"}},
        ... )
        >>>
        >>> # Delete column values for a row (they'll read as None now)
        >>> my_table.update_one(
        ...     {"match_id": "challenge6", "round": 2},
        ...     update={"$unset": {"winner": None, "score": None}},
        ... )

    Note:
        a row created entirely with update operations (as opposed to insertions)
        may, correspondingly, be deleted by means of an $unset update on all columns.
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    uo_payload = {
        "updateOne": {
            k: v
            for k, v in {
                "filter": filter,
                "update": self._converter_agent.preprocess_payload(update),
            }.items()
            if v is not None
        }
    }
    logger.info(f"updateOne on '{self.name}'")
    uo_response = self._api_commander.request(
        payload=uo_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished updateOne on '{self.name}'")
    if "status" in uo_response:
        # the contents are disregarded and the method just returns:
        return
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from updateOne API command.",
            raw_response=uo_response,
        )
def with_options(self: Table[ROW], *, embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = (unset), api_options: APIOptions | UnsetType = (unset)) ‑> Table[ROW]

Create a clone of this table with some changed attributes.

Args

embedding_api_key
optional API key(s) for interacting with the table. If an embedding service is configured, and this parameter is not None, each Data API call will include the necessary embedding-related headers as specified by this parameter. If a string is passed, it translates into the one "embedding api key" header (i.e. EmbeddingAPIKeyHeaderProvider). For some vectorize providers/models, if using header-based authentication, specialized subclasses of EmbeddingHeadersProvider should be supplied.
api_options
any additional options to set for the clone, in the form of an APIOptions instance (where one can set just the needed attributes). In case the same setting is also provided as named parameter, the latter takes precedence.

Returns

a new Table instance.

Example

>>> table_with_api_key_configured = my_table.with_options(
...     embedding_api_key="secret-key-0123abcd...",
... )
Expand source code
def with_options(
    self: Table[ROW],
    *,
    embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
    api_options: APIOptions | UnsetType = _UNSET,
) -> Table[ROW]:
    """
    Create a clone of this table with some changed attributes.

    Args:
        embedding_api_key: optional API key(s) for interacting with the table.
            If an embedding service is configured, and this parameter is not None,
            each Data API call will include the necessary embedding-related headers
            as specified by this parameter. If a string is passed, it translates
            into the one "embedding api key" header
            (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
            For some vectorize providers/models, if using header-based authentication,
            specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
            should be supplied.
        api_options: any additional options to set for the clone, in the form of
            an APIOptions instance (where one can set just the needed attributes).
            In case the same setting is also provided as named parameter,
            the latter takes precedence.

    Returns:
        a new Table instance.

    Example:
        >>> table_with_api_key_configured = my_table.with_options(
        ...     embedding_api_key="secret-key-0123abcd...",
        ... )
    """

    return self._copy(
        embedding_api_key=embedding_api_key,
        api_options=api_options,
    )