Package `astrapy`

Expand source code

# Copyright DataStax, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

import importlib.metadata
import os

import toml


def get_version() -> str:
    try:
        # Poetry will create a __version__ attribute in the package's __init__.py file
        return importlib.metadata.version(__package__)

    # If the package is not installed, we can still get the version from the pyproject.toml file
    except importlib.metadata.PackageNotFoundError:
        # Get the path to the pyproject.toml file
        dir_path = os.path.dirname(os.path.realpath(__file__))
        pyproject_path = os.path.join(dir_path, "..", "pyproject.toml")

        # Read the pyproject.toml file and get the version from the poetry section
        try:
            with open(pyproject_path, encoding="utf-8") as pyproject:
                # Load the pyproject.toml file as a dictionary
                file_contents = pyproject.read()
                pyproject_data = toml.loads(file_contents)

                # Return the version from the poetry section
                return str(pyproject_data["tool"]["poetry"]["version"])

        # If the pyproject.toml file does not exist or the version is not found, return unknown
        except (FileNotFoundError, KeyError):
            return "unknown"


__version__: str = get_version()


from astrapy import api_options  # noqa: E402, F401
from astrapy.admin import (  # noqa: E402
    AstraDBAdmin,
    AstraDBDatabaseAdmin,
    DataAPIDatabaseAdmin,
)
from astrapy.client import DataAPIClient  # noqa: E402
from astrapy.collection import AsyncCollection, Collection  # noqa: E402

# A circular-import issue requires this to happen at the end of this module:
from astrapy.database import AsyncDatabase, Database  # noqa: E402
from astrapy.table import AsyncTable, Table  # noqa: E402

__all__ = [
    "AstraDBAdmin",
    "AstraDBDatabaseAdmin",
    "AsyncCollection",
    "AsyncDatabase",
    "AsyncTable",
    "Collection",
    "Database",
    "DataAPIClient",
    "DataAPIDatabaseAdmin",
    "Table",
    "__version__",
]


__pdoc__ = {
    "ids": False,
    "settings": False,
}

Sub-modules

astrapy.admin
astrapy.api_options
astrapy.authentication
astrapy.client
astrapy.collection
astrapy.constants
astrapy.cursors
astrapy.data
astrapy.data_types
astrapy.database
astrapy.exceptions
astrapy.info
astrapy.results
astrapy.table
astrapy.utils

Classes

class AstraDBAdmin (*, api_options: FullAPIOptions)

An "admin" object, able to perform administrative tasks at the databases level, such as creating, listing or dropping databases.

This class is not meant for direct instantiation by the user, rather it is obtained by invoking methods such as get_admin of AstraDBClient.

Args

api_options: a complete specification of the API Options for this instance.

Example

>>> from astrapy import DataAPIClient
>>> my_client = DataAPIClient("AstraCS:...")
>>> my_astra_db_admin = my_client.get_admin()
>>> database_list = my_astra_db_admin.list_databases()
>>> len(database_list)
3
>>> database_list[2].id
'01234567-...'
>>> my_db_admin = my_astra_db_admin.get_database_admin("01234567-...")
>>> my_db_admin.list_keyspaces()
['default_keyspace', 'staging_keyspace']

Note

a more powerful token may be required than the one sufficient for working in the Database, Collection and Table classes. Check the provided token if "Unauthorized" errors are encountered.

Expand source code

class AstraDBAdmin:
    """
    An "admin" object, able to perform administrative tasks at the databases
    level, such as creating, listing or dropping databases.

    This class is not meant for direct instantiation by the user, rather
    it is obtained by invoking methods such as `get_admin`
    of AstraDBClient.

    Args:
        api_options: a complete specification of the API Options for this instance.

    Example:
        >>> from astrapy import DataAPIClient
        >>> my_client = DataAPIClient("AstraCS:...")
        >>> my_astra_db_admin = my_client.get_admin()
        >>> database_list = my_astra_db_admin.list_databases()
        >>> len(database_list)
        3
        >>> database_list[2].id
        '01234567-...'
        >>> my_db_admin = my_astra_db_admin.get_database_admin("01234567-...")
        >>> my_db_admin.list_keyspaces()
        ['default_keyspace', 'staging_keyspace']

    Note:
        a more powerful token may be required than the one sufficient for working
        in the Database, Collection and Table classes. Check the provided token
        if "Unauthorized" errors are encountered.
    """

    def __init__(
        self,
        *,
        api_options: FullAPIOptions,
    ) -> None:
        if api_options.environment not in Environment.astra_db_values:
            raise InvalidEnvironmentException(
                "Environments outside of Astra DB are not supported."
            )

        self.api_options = api_options
        self._dev_ops_commander_headers: dict[str, str | None]
        if self.api_options.token:
            _token_str = self.api_options.token.get_token()
            self._dev_ops_commander_headers = {
                DEFAULT_DEV_OPS_AUTH_HEADER: f"{DEFAULT_DEV_OPS_AUTH_PREFIX}{_token_str}",
                **self.api_options.admin_additional_headers,
            }
        else:
            self._dev_ops_commander_headers = {
                **self.api_options.admin_additional_headers,
            }
        self._dev_ops_api_commander = self._get_dev_ops_api_commander()

    def __repr__(self) -> str:
        return f"{self.__class__.__name__}({self.api_options})"

    def __eq__(self, other: Any) -> bool:
        if isinstance(other, AstraDBAdmin):
            return all([self.api_options == other.api_options])
        else:
            return False

    def _get_dev_ops_api_commander(self) -> APICommander:
        """Instantiate a new APICommander based on the properties of this class."""
        base_path_components = [
            comp
            for comp in (
                ncomp.strip("/")
                for ncomp in (
                    self.api_options.dev_ops_api_url_options.dev_ops_api_version,
                    "databases",
                )
                if ncomp is not None
            )
            if comp != ""
        ]
        dev_ops_base_path = "/".join(base_path_components)
        dev_ops_commander = APICommander(
            api_endpoint=self.api_options.dev_ops_api_url_options.dev_ops_url,
            path=dev_ops_base_path,
            headers=self._dev_ops_commander_headers,
            callers=self.api_options.callers,
            dev_ops_api=True,
            redacted_header_names=self.api_options.redacted_header_names,
        )
        return dev_ops_commander

    def _copy(
        self,
        *,
        token: str | TokenProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> AstraDBAdmin:
        arg_api_options = APIOptions(
            token=token,
        )
        final_api_options = self.api_options.with_override(api_options).with_override(
            arg_api_options
        )
        return AstraDBAdmin(api_options=final_api_options)

    def with_options(
        self,
        *,
        token: str | TokenProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> AstraDBAdmin:
        """
        Create a clone of this AstraDBAdmin with some changed attributes.

        Args:
            token: an Access Token to the database. Example: `"AstraCS:xyz..."`.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            api_options: any additional options to set for the clone, in the form of
                an APIOptions instance (where one can set just the needed attributes).
                In case the same setting is also provided as named parameter,
                the latter takes precedence.

        Returns:
            a new AstraDBAdmin instance.

        Example:
            >>> different_auth_astra_db_admin = my_astra_db_admin.with_options(
            ...     token="AstraCS:xyz...",
            ... )
        """

        return self._copy(
            token=token,
            api_options=api_options,
        )

    def list_databases(
        self,
        *,
        include: str | None = None,
        provider: str | None = None,
        page_size: int | None = None,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> list[AstraDBAdminDatabaseInfo]:
        """
        Get the list of databases, as obtained with a request to the DevOps API.

        Args:
            include: a filter on what databases are to be returned. As per
                DevOps API, defaults to "nonterminated". Pass "all" to include
                the already terminated databases.
            provider: a filter on the cloud provider for the databases.
                As per DevOps API, defaults to "ALL". Pass e.g. "AWS" to
                restrict the results.
            page_size: number of results per page from the DevOps API.
            database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (While in the case of very many databases this method may entail
                multiple DevOps API requests, it is assumed here that this method
                amounts almost always to one single request: the only timeout
                imposed on this method execution is one acting on each individual
                request, with no checks on its overall completion time.)
            request_timeout_ms: an alias for `database_admin_timeout_ms`.
            timeout_ms: an alias for `database_admin_timeout_ms`.

        Returns:
            A list of AstraDBAdminDatabaseInfo objects.

        Example:
            >>> database_list = my_astra_db_admin.list_databases()
            >>> len(database_list)
            3
            >>> database_list[2].id
            '01234567-...'
            >>> database_list[2].status
            'ACTIVE'
            >>> database_list[2].info.region
            'eu-west-1'
        """

        _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
            timeout_options=self.api_options.timeout_options,
            database_admin_timeout_ms=database_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        return self._list_databases_ctx(
            include=include,
            provider=provider,
            page_size=page_size,
            timeout_context=_TimeoutContext(
                request_ms=_database_admin_timeout_ms, label=_da_label
            ),
        )

    def _list_databases_ctx(
        self,
        *,
        include: str | None,
        provider: str | None,
        page_size: int | None,
        timeout_context: _TimeoutContext,
    ) -> list[AstraDBAdminDatabaseInfo]:
        # version of the method, but with timeouts made into a _TimeoutContext
        logger.info("getting databases (DevOps API)")
        request_params_0 = {
            k: v
            for k, v in {
                "include": include,
                "provider": provider,
                "limit": page_size or DEV_OPS_DEFAULT_DATABASES_PAGE_SIZE,
            }.items()
            if v is not None
        }
        responses: list[dict[str, Any]] = []
        logger.info("request 0, getting databases (DevOps API)")
        response_0 = self._dev_ops_api_commander.request(
            http_method=HttpMethod.GET,
            request_params=request_params_0,
            timeout_context=timeout_context,
        )
        if not isinstance(response_0, list):
            raise DevOpsAPIException(
                "Faulty response from get-databases DevOps API command.",
            )
        logger.info("finished request 0, getting databases (DevOps API)")
        responses += [response_0]
        while len(responses[-1]) >= request_params_0["limit"]:
            if "id" not in responses[-1][-1]:
                raise DevOpsAPIException(
                    "Faulty response from get-databases DevOps API command.",
                )
            last_received_db_id = responses[-1][-1]["id"]
            request_params_n = {
                **request_params_0,
                **{"starting_after": last_received_db_id},
            }
            logger.info(
                "request %s, getting databases (DevOps API)",
                len(responses),
            )
            response_n = self._dev_ops_api_commander.request(
                http_method=HttpMethod.GET,
                request_params=request_params_n,
                timeout_context=timeout_context,
            )
            logger.info(
                "finished request %s, getting databases (DevOps API)",
                len(responses),
            )
            if not isinstance(response_n, list):
                raise DevOpsAPIException(
                    "Faulty response from get-databases DevOps API command.",
                )
            responses += [response_n]

        logger.info("finished getting databases (DevOps API)")
        return [
            _recast_as_admin_database_info(
                db_dict,
                environment=self.api_options.environment,
            )
            for response in responses
            for db_dict in response
        ]

    async def async_list_databases(
        self,
        *,
        include: str | None = None,
        provider: str | None = None,
        page_size: int | None = None,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> list[AstraDBAdminDatabaseInfo]:
        """
        Get the list of databases, as obtained with a request to the DevOps API.
        Async version of the method, for use in an asyncio context.

        Args:
            include: a filter on what databases are to be returned. As per
                DevOps API, defaults to "nonterminated". Pass "all" to include
                the already terminated databases.
            provider: a filter on the cloud provider for the databases.
                As per DevOps API, defaults to "ALL". Pass e.g. "AWS" to
                restrict the results.
            page_size: number of results per page from the DevOps API.
            database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (While in the case of very many databases this method may entail
                multiple DevOps API requests, it is assumed here that this method
                amounts almost always to one single request: the only timeout
                imposed on this method execution is one acting on each individual
                request, with no checks on its overall completion time.)
            request_timeout_ms: an alias for `database_admin_timeout_ms`.
            timeout_ms: an alias for `database_admin_timeout_ms`.

        Returns:
            A list of AstraDBAdminDatabaseInfo objects.

        Example:
            >>> async def check_if_db_exists(db_id: str) -> bool:
            ...     db_list = await my_astra_db_admin.async_list_databases()
            ...     return db_id in db_list
            ...
            >>> asyncio.run(check_if_db_exists("xyz"))
            True
            >>> asyncio.run(check_if_db_exists("01234567-..."))
            False
        """

        _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
            timeout_options=self.api_options.timeout_options,
            database_admin_timeout_ms=database_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        return await self._async_list_databases_ctx(
            include=include,
            provider=provider,
            page_size=page_size,
            timeout_context=_TimeoutContext(
                request_ms=_database_admin_timeout_ms, label=_da_label
            ),
        )

    async def _async_list_databases_ctx(
        self,
        *,
        include: str | None,
        provider: str | None,
        page_size: int | None,
        timeout_context: _TimeoutContext,
    ) -> list[AstraDBAdminDatabaseInfo]:
        # version of the method, but with timeouts made into a _TimeoutContext
        logger.info("getting databases (DevOps API), async")
        request_params_0 = {
            k: v
            for k, v in {
                "include": include,
                "provider": provider,
                "limit": page_size or DEV_OPS_DEFAULT_DATABASES_PAGE_SIZE,
            }.items()
            if v is not None
        }
        responses: list[dict[str, Any]] = []
        logger.info("request 0, getting databases (DevOps API), async")
        response_0 = await self._dev_ops_api_commander.async_request(
            http_method=HttpMethod.GET,
            request_params=request_params_0,
            timeout_context=timeout_context,
        )
        if not isinstance(response_0, list):
            raise DevOpsAPIException(
                "Faulty response from get-databases DevOps API command.",
            )
        logger.info("finished request 0, getting databases (DevOps API), async")
        responses += [response_0]
        while len(responses[-1]) >= request_params_0["limit"]:
            if "id" not in responses[-1][-1]:
                raise DevOpsAPIException(
                    "Faulty response from get-databases DevOps API command.",
                )
            last_received_db_id = responses[-1][-1]["id"]
            request_params_n = {
                **request_params_0,
                **{"starting_after": last_received_db_id},
            }
            logger.info(
                "request %s, getting databases (DevOps API)",
                len(responses),
            )
            response_n = await self._dev_ops_api_commander.async_request(
                http_method=HttpMethod.GET,
                request_params=request_params_n,
                timeout_context=timeout_context,
            )
            logger.info(
                "finished request %s, getting databases (DevOps API), async",
                len(responses),
            )
            if not isinstance(response_n, list):
                raise DevOpsAPIException(
                    "Faulty response from get-databases DevOps API command.",
                )
            responses += [response_n]

        logger.info("finished getting databases (DevOps API), async")
        return [
            _recast_as_admin_database_info(
                db_dict,
                environment=self.api_options.environment,
            )
            for response in responses
            for db_dict in response
        ]

    def database_info(
        self,
        id: str,
        *,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> AstraDBAdminDatabaseInfo:
        """
        Get the full information on a given database, through a request to the DevOps API.

        Args:
            id: the ID of the target database, e. g.
                "01234567-89ab-cdef-0123-456789abcdef".
            database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying DevOps API request.
                If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `database_admin_timeout_ms`.
            timeout_ms: an alias for `database_admin_timeout_ms`.

        Returns:
            An AstraDBAdminDatabaseInfo object.

        Example:
            >>> details_of_my_db = my_astra_db_admin.database_info("01234567-...")
            >>> details_of_my_db.id
            '01234567-...'
            >>> details_of_my_db.status
            'ACTIVE'
            >>> details_of_my_db.info.region
            'eu-west-1'
        """

        _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
            timeout_options=self.api_options.timeout_options,
            database_admin_timeout_ms=database_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        return self._database_info_ctx(
            id=id,
            timeout_context=_TimeoutContext(
                request_ms=_database_admin_timeout_ms, label=_da_label
            ),
        )

    def _database_info_ctx(
        self,
        id: str,
        *,
        timeout_context: _TimeoutContext,
    ) -> AstraDBAdminDatabaseInfo:
        # version of the method, but with timeouts made into a _TimeoutContext
        logger.info(f"getting database info for '{id}' (DevOps API)")
        gd_response = self._dev_ops_api_commander.request(
            http_method=HttpMethod.GET,
            additional_path=id,
            timeout_context=timeout_context,
        )
        logger.info(f"finished getting database info for '{id}' (DevOps API)")
        return _recast_as_admin_database_info(
            gd_response,
            environment=self.api_options.environment,
        )

    async def async_database_info(
        self,
        id: str,
        *,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> AstraDBAdminDatabaseInfo:
        """
        Get the full information on a given database, through a request to the DevOps API.
        This is an awaitable method suitable for use within an asyncio event loop.

        Args:
            id: the ID of the target database, e. g.
                "01234567-89ab-cdef-0123-456789abcdef".
            database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying DevOps API request.
                If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `database_admin_timeout_ms`.
            timeout_ms: an alias for `database_admin_timeout_ms`.

        Returns:
            An AstraDBAdminDatabaseInfo object.

        Example:
            >>> async def check_if_db_active(db_id: str) -> bool:
            ...     db_info = await my_astra_db_admin.async_database_info(db_id)
            ...     return db_info.status == "ACTIVE"
            ...
            >>> asyncio.run(check_if_db_active("01234567-..."))
            True
        """

        _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
            timeout_options=self.api_options.timeout_options,
            database_admin_timeout_ms=database_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        return await self._async_database_info_ctx(
            id=id,
            timeout_context=_TimeoutContext(
                request_ms=_database_admin_timeout_ms, label=_da_label
            ),
        )

    async def _async_database_info_ctx(
        self,
        id: str,
        *,
        timeout_context: _TimeoutContext,
    ) -> AstraDBAdminDatabaseInfo:
        # version of the method, but with timeouts made into a _TimeoutContext
        logger.info(f"getting database info for '{id}' (DevOps API), async")
        gd_response = await self._dev_ops_api_commander.async_request(
            http_method=HttpMethod.GET,
            additional_path=id,
            timeout_context=timeout_context,
        )
        logger.info(f"finished getting database info for '{id}' (DevOps API), async")
        return _recast_as_admin_database_info(
            gd_response,
            environment=self.api_options.environment,
        )

    def create_database(
        self,
        name: str,
        *,
        cloud_provider: str,
        region: str,
        keyspace: str | None = None,
        wait_until_active: bool = True,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
        token: str | TokenProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> AstraDBDatabaseAdmin:
        """
        Create a database as requested, optionally waiting for it to be ready.

        Args:
            name: the desired name for the database.
            cloud_provider: one of 'aws', 'gcp' or 'azure'.
            region: any of the available cloud regions.
            keyspace: name for the one keyspace the database starts with.
                If omitted, DevOps API will use its default.
            wait_until_active: if True (default), the method returns only after
                the newly-created database is in ACTIVE state (a few minutes,
                usually). If False, it will return right after issuing the
                creation request to the DevOps API, and it will be responsibility
                of the caller to check the database status before working with it.
            database_admin_timeout_ms: a timeout, in milliseconds, for the whole
                requested operation to complete. This is used only
                if `wait_until_active` is true, i.e. if the method call must
                wait and keep querying the DevOps API for the status of the
                newly-created database.
            request_timeout_ms: a timeout, in milliseconds, for
                each underlying DevOps API HTTP request.
            timeout_ms: an alias for *both* the `request_timeout_ms` and
                `database_admin_timeout_ms` timeout parameters. In practice,
                regardless of `wait_until_active`, this parameter dictates an
                overall timeout on this method call.
            token: if supplied, is passed to the returned Database instead of
                the one set for this object.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            spawn_api_options: a specification - complete or partial - of the
                API Options to override the defaults inherited from the AstraDBAdmin.
                This allows for a deeper configuration of the database admin, e.g.
                concerning timeouts; if this is passed together with
                the named timeout parameters, the latter will take precedence
                in their respective settings.

        Returns:
            An AstraDBDatabaseAdmin instance.

        Note: a timeout event is no guarantee at all that the
        creation request has not reached the API server and is not going
        to be, in fact, honored.

        Example:
            >>> my_new_db_admin = my_astra_db_admin.create_database(
            ...     "new_database",
            ...     cloud_provider="aws",
            ...     region="ap-south-1",
            ... )
            >>> my_new_db = my_new_db_admin.get_database()
            >>> my_coll = my_new_db.create_collection(
            ...     "movies",
            ...     definition=(
            ...         CollectionDefinition.builder()
            ...         .set_vector_dimension(2)
            ...         .build()
            ...     )
            ... )
            >>> my_coll.insert_one({"title": "The Title", "$vector": [0.1, 0.2]})
        """

        _database_admin_timeout_ms, _da_label = _first_valid_timeout(
            (database_admin_timeout_ms, "database_admin_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (
                self.api_options.timeout_options.database_admin_timeout_ms,
                "database_admin_timeout_ms",
            ),
        )
        _request_timeout_ms, _rt_label = _first_valid_timeout(
            (request_timeout_ms, "request_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
        )
        cd_payload = {
            k: v
            for k, v in {
                "name": name,
                "tier": "serverless",
                "cloudProvider": cloud_provider,
                "region": region,
                "capacityUnits": 1,
                "dbType": "vector",
                "keyspace": keyspace,
            }.items()
            if v is not None
        }
        timeout_manager = MultiCallTimeoutManager(
            overall_timeout_ms=_database_admin_timeout_ms,
            dev_ops_api=True,
            timeout_label=_da_label,
        )
        logger.info(
            f"creating database {name}/({cloud_provider}, {region}) (DevOps API)"
        )
        cd_raw_response = self._dev_ops_api_commander.raw_request(
            http_method=HttpMethod.POST,
            payload=cd_payload,
            timeout_context=timeout_manager.remaining_timeout(
                cap_time_ms=_request_timeout_ms,
                cap_timeout_label=_rt_label,
            ),
        )
        if cd_raw_response.status_code != DEV_OPS_RESPONSE_HTTP_CREATED:
            raise DevOpsAPIException(
                f"DB creation ('{name}') failed: API returned HTTP "
                f"{cd_raw_response.status_code} instead of "
                f"{DEV_OPS_RESPONSE_HTTP_CREATED} - Created."
            )
        new_database_id = cd_raw_response.headers["Location"]
        logger.info(
            "DevOps API returned from creating database "
            f"{name}/({cloud_provider}, {region})"
        )
        if wait_until_active:
            last_status_seen = DEV_OPS_DATABASE_STATUS_PENDING
            while last_status_seen in {
                DEV_OPS_DATABASE_STATUS_PENDING,
                DEV_OPS_DATABASE_STATUS_INITIALIZING,
            }:
                logger.info(f"sleeping to poll for status of '{new_database_id}'")
                time.sleep(DEV_OPS_DATABASE_POLL_INTERVAL_S)
                last_db_info = self._database_info_ctx(
                    id=new_database_id,
                    timeout_context=timeout_manager.remaining_timeout(
                        cap_time_ms=_request_timeout_ms,
                        cap_timeout_label=_rt_label,
                    ),
                )
                last_status_seen = last_db_info.status
            if last_status_seen != DEV_OPS_DATABASE_STATUS_ACTIVE:
                raise DevOpsAPIException(
                    f"Database {name} entered unexpected status {last_status_seen} after PENDING"
                )
        # return the database instance
        logger.info(
            f"finished creating database '{new_database_id}' = "
            f"{name}/({cloud_provider}, {region}) (DevOps API)"
        )
        _final_api_options = self.api_options.with_override(
            spawn_api_options
        ).with_override(APIOptions(token=token))
        return AstraDBDatabaseAdmin.from_astra_db_admin(
            api_endpoint=build_api_endpoint(
                environment=self.api_options.environment,
                database_id=new_database_id,
                region=region,
            ),
            astra_db_admin=self,
            spawn_api_options=_final_api_options,
        )

    async def async_create_database(
        self,
        name: str,
        *,
        cloud_provider: str,
        region: str,
        keyspace: str | None = None,
        wait_until_active: bool = True,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
        token: str | TokenProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> AstraDBDatabaseAdmin:
        """
        Create a database as requested, optionally waiting for it to be ready.
        This is an awaitable method suitable for use within an asyncio event loop.

        Args:
            name: the desired name for the database.
            cloud_provider: one of 'aws', 'gcp' or 'azure'.
            region: any of the available cloud regions.
            keyspace: name for the one keyspace the database starts with.
                If omitted, DevOps API will use its default.
            wait_until_active: if True (default), the method returns only after
                the newly-created database is in ACTIVE state (a few minutes,
                usually). If False, it will return right after issuing the
                creation request to the DevOps API, and it will be responsibility
                of the caller to check the database status before working with it.
            database_admin_timeout_ms: a timeout, in milliseconds, for the whole
                requested operation to complete. This is used only
                if `wait_until_active` is true, i.e. if the method call must
                wait and keep querying the DevOps API for the status of the
                newly-created database.
            request_timeout_ms: a timeout, in milliseconds, for
                each underlying DevOps API HTTP request.
            timeout_ms: an alias for *both* the `request_timeout_ms` and
                `database_admin_timeout_ms` timeout parameters. In practice,
                regardless of `wait_until_active`, this parameter dictates an
                overall timeout on this method call.
            token: if supplied, is passed to the returned Database instead of
                the one set for this object.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            spawn_api_options: a specification - complete or partial - of the
                API Options to override the defaults inherited from the AstraDBAdmin.
                This allows for a deeper configuration of the database admin, e.g.
                concerning timeouts; if this is passed together with
                the named timeout parameters, the latter will take precedence
                in their respective settings.

        Returns:
            An AstraDBDatabaseAdmin instance.

        Note: a timeout event is no guarantee at all that the
        creation request has not reached the API server and is not going
        to be, in fact, honored.

        Example:
            >>> asyncio.run(
            ...     my_astra_db_admin.async_create_database(
            ...         "new_database",
            ...         cloud_provider="aws",
            ...         region="ap-south-1",
            ....    )
            ... )
            AstraDBDatabaseAdmin(id=...)
        """

        _database_admin_timeout_ms, _da_label = _first_valid_timeout(
            (database_admin_timeout_ms, "database_admin_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (
                self.api_options.timeout_options.database_admin_timeout_ms,
                "database_admin_timeout_ms",
            ),
        )
        _request_timeout_ms, _rt_label = _first_valid_timeout(
            (request_timeout_ms, "request_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
        )
        cd_payload = {
            k: v
            for k, v in {
                "name": name,
                "tier": "serverless",
                "cloudProvider": cloud_provider,
                "region": region,
                "capacityUnits": 1,
                "dbType": "vector",
                "keyspace": keyspace,
            }.items()
            if v is not None
        }
        timeout_manager = MultiCallTimeoutManager(
            overall_timeout_ms=_database_admin_timeout_ms,
            dev_ops_api=True,
            timeout_label=_da_label,
        )
        logger.info(
            f"creating database {name}/({cloud_provider}, {region}) "
            "(DevOps API), async"
        )
        cd_raw_response = await self._dev_ops_api_commander.async_raw_request(
            http_method=HttpMethod.POST,
            payload=cd_payload,
            timeout_context=timeout_manager.remaining_timeout(
                cap_time_ms=_request_timeout_ms,
                cap_timeout_label=_rt_label,
            ),
        )
        if cd_raw_response.status_code != DEV_OPS_RESPONSE_HTTP_CREATED:
            raise DevOpsAPIException(
                f"DB creation ('{name}') failed: API returned HTTP "
                f"{cd_raw_response.status_code} instead of "
                f"{DEV_OPS_RESPONSE_HTTP_CREATED} - Created."
            )
        new_database_id = cd_raw_response.headers["Location"]
        logger.info(
            "DevOps API returned from creating database "
            f"{name}/({cloud_provider}, {region}), async"
        )
        if wait_until_active:
            last_status_seen = DEV_OPS_DATABASE_STATUS_PENDING
            while last_status_seen in {
                DEV_OPS_DATABASE_STATUS_PENDING,
                DEV_OPS_DATABASE_STATUS_INITIALIZING,
            }:
                logger.info(
                    f"sleeping to poll for status of '{new_database_id}', async"
                )
                await asyncio.sleep(DEV_OPS_DATABASE_POLL_INTERVAL_S)
                last_db_info = await self._async_database_info_ctx(
                    id=new_database_id,
                    timeout_context=timeout_manager.remaining_timeout(
                        cap_time_ms=_request_timeout_ms,
                        cap_timeout_label=_rt_label,
                    ),
                )
                last_status_seen = last_db_info.status
            if last_status_seen != DEV_OPS_DATABASE_STATUS_ACTIVE:
                raise DevOpsAPIException(
                    f"Database {name} entered unexpected status "
                    f"{last_status_seen} after PENDING"
                )
        # return the database instance
        logger.info(
            f"finished creating database '{new_database_id}' = "
            f"{name}/({cloud_provider}, {region}) (DevOps API), async"
        )
        _final_api_options = self.api_options.with_override(
            spawn_api_options
        ).with_override(APIOptions(token=token))
        return AstraDBDatabaseAdmin.from_astra_db_admin(
            api_endpoint=build_api_endpoint(
                environment=self.api_options.environment,
                database_id=new_database_id,
                region=region,
            ),
            astra_db_admin=self,
            spawn_api_options=_final_api_options,
        )

    def drop_database(
        self,
        id: str,
        *,
        wait_until_active: bool = True,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Drop a database, i.e. delete it completely and permanently with all its data.

        Args:
            id: The ID of the database to drop, e. g.
                "01234567-89ab-cdef-0123-456789abcdef".
            wait_until_active: if True (default), the method returns only after
                the database has actually been deleted (generally a few minutes).
                If False, it will return right after issuing the
                drop request to the DevOps API, and it will be responsibility
                of the caller to check the database status/availability
                after that, if desired.
            database_admin_timeout_ms: a timeout, in milliseconds, for the whole
                requested operation to complete. This is used only
                if `wait_until_active` is true, i.e. if the method call must
                wait and keep querying the DevOps API for the status of the
                newly-deleted database.
            request_timeout_ms: a timeout, in milliseconds, for
                each underlying DevOps API HTTP request.
            timeout_ms: an alias for *both* the `request_timeout_ms` and
                `database_admin_timeout_ms` timeout parameters. In practice,
                regardless of `wait_until_active`, this parameter dictates an
                overall timeout on this method call.

        Note: a timeout event is no guarantee at all that the
        deletion request has not reached the API server and is not going
        to be, in fact, honored.

        Example:
            >>> database_list_pre = my_astra_db_admin.list_databases()
            >>> len(database_list_pre)
            3
            >>> my_astra_db_admin.drop_database("01234567-...")
            >>> database_list_post = my_astra_db_admin.list_databases()
            >>> len(database_list_post)
            2
        """

        _database_admin_timeout_ms, _da_label = _first_valid_timeout(
            (database_admin_timeout_ms, "database_admin_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (
                self.api_options.timeout_options.database_admin_timeout_ms,
                "database_admin_timeout_ms",
            ),
        )
        _request_timeout_ms, _rt_label = _first_valid_timeout(
            (request_timeout_ms, "request_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
        )
        timeout_manager = MultiCallTimeoutManager(
            overall_timeout_ms=_database_admin_timeout_ms,
            dev_ops_api=True,
            timeout_label=_da_label,
        )
        logger.info(f"dropping database '{id}' (DevOps API)")
        te_raw_response = self._dev_ops_api_commander.raw_request(
            http_method=HttpMethod.POST,
            additional_path=f"{id}/terminate",
            timeout_context=timeout_manager.remaining_timeout(
                cap_time_ms=_request_timeout_ms,
                cap_timeout_label=_rt_label,
            ),
        )
        if te_raw_response.status_code != DEV_OPS_RESPONSE_HTTP_ACCEPTED:
            raise DevOpsAPIException(
                f"DB deletion ('{id}') failed: API returned HTTP "
                f"{te_raw_response.status_code} instead of "
                f"{DEV_OPS_RESPONSE_HTTP_ACCEPTED} - Created"
            )
        logger.info(f"DevOps API returned from dropping database '{id}'")
        if wait_until_active:
            last_status_seen: str | None = DEV_OPS_DATABASE_STATUS_TERMINATING
            _db_name: str | None = None
            while last_status_seen == DEV_OPS_DATABASE_STATUS_TERMINATING:
                logger.info(f"sleeping to poll for status of '{id}'")
                time.sleep(DEV_OPS_DATABASE_POLL_INTERVAL_S)
                #
                detected_databases = [
                    a_db_info
                    for a_db_info in self._list_databases_ctx(
                        include=None,
                        provider=None,
                        page_size=None,
                        timeout_context=timeout_manager.remaining_timeout(
                            cap_time_ms=_request_timeout_ms,
                            cap_timeout_label=_rt_label,
                        ),
                    )
                    if a_db_info.id == id
                ]
                if detected_databases:
                    last_status_seen = detected_databases[0].status
                    _db_name = detected_databases[0].name
                else:
                    last_status_seen = None
            if last_status_seen is not None:
                _name_desc = f" ({_db_name})" if _db_name else ""
                raise DevOpsAPIException(
                    f"Database {id}{_name_desc} entered unexpected status "
                    f"{last_status_seen} after PENDING"
                )
        logger.info(f"finished dropping database '{id}' (DevOps API)")

    async def async_drop_database(
        self,
        id: str,
        *,
        wait_until_active: bool = True,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Drop a database, i.e. delete it completely and permanently with all its data.
        Async version of the method, for use in an asyncio context.

        Args:
            id: The ID of the database to drop, e. g.
                "01234567-89ab-cdef-0123-456789abcdef".
            wait_until_active: if True (default), the method returns only after
                the database has actually been deleted (generally a few minutes).
                If False, it will return right after issuing the
                drop request to the DevOps API, and it will be responsibility
                of the caller to check the database status/availability
                after that, if desired.
            database_admin_timeout_ms: a timeout, in milliseconds, for the whole
                requested operation to complete. This is used only
                if `wait_until_active` is true, i.e. if the method call must
                wait and keep querying the DevOps API for the status of the
                newly-deleted database.
            request_timeout_ms: a timeout, in milliseconds, for
                each underlying DevOps API HTTP request.
            timeout_ms: an alias for *both* the `request_timeout_ms` and
                `database_admin_timeout_ms` timeout parameters. In practice,
                regardless of `wait_until_active`, this parameter dictates an
                overall timeout on this method call.

        Note: a timeout event is no guarantee at all that the
        deletion request has not reached the API server and is not going
        to be, in fact, honored.

        Example:
            >>> asyncio.run(
            ...     my_astra_db_admin.async_drop_database("01234567-...")
            ... )
        """

        _database_admin_timeout_ms, _da_label = _first_valid_timeout(
            (database_admin_timeout_ms, "database_admin_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (
                self.api_options.timeout_options.database_admin_timeout_ms,
                "database_admin_timeout_ms",
            ),
        )
        _request_timeout_ms, _rt_label = _first_valid_timeout(
            (request_timeout_ms, "request_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
        )
        timeout_manager = MultiCallTimeoutManager(
            overall_timeout_ms=_database_admin_timeout_ms,
            dev_ops_api=True,
            timeout_label=_da_label,
        )
        logger.info(f"dropping database '{id}' (DevOps API), async")
        te_raw_response = await self._dev_ops_api_commander.async_raw_request(
            http_method=HttpMethod.POST,
            additional_path=f"{id}/terminate",
            timeout_context=timeout_manager.remaining_timeout(
                cap_time_ms=_request_timeout_ms,
                cap_timeout_label=_rt_label,
            ),
        )
        if te_raw_response.status_code != DEV_OPS_RESPONSE_HTTP_ACCEPTED:
            raise DevOpsAPIException(
                f"DB deletion ('{id}') failed: API returned HTTP "
                f"{te_raw_response.status_code} instead of "
                f"{DEV_OPS_RESPONSE_HTTP_ACCEPTED} - Created"
            )
        logger.info(f"DevOps API returned from dropping database '{id}', async")
        if wait_until_active:
            last_status_seen: str | None = DEV_OPS_DATABASE_STATUS_TERMINATING
            _db_name: str | None = None
            while last_status_seen == DEV_OPS_DATABASE_STATUS_TERMINATING:
                logger.info(f"sleeping to poll for status of '{id}', async")
                await asyncio.sleep(DEV_OPS_DATABASE_POLL_INTERVAL_S)
                #
                detected_databases = [
                    a_db_info
                    for a_db_info in await self._async_list_databases_ctx(
                        include=None,
                        provider=None,
                        page_size=None,
                        timeout_context=timeout_manager.remaining_timeout(
                            cap_time_ms=_request_timeout_ms,
                            cap_timeout_label=_rt_label,
                        ),
                    )
                    if a_db_info.id == id
                ]
                if detected_databases:
                    last_status_seen = detected_databases[0].status
                    _db_name = detected_databases[0].name
                else:
                    last_status_seen = None
            if last_status_seen is not None:
                _name_desc = f" ({_db_name})" if _db_name else ""
                raise DevOpsAPIException(
                    f"Database {id}{_name_desc} entered unexpected status "
                    f"{last_status_seen} after PENDING"
                )
        logger.info(f"finished dropping database '{id}' (DevOps API), async")

    def get_database_admin(
        self,
        api_endpoint_or_id: str | None = None,
        *,
        api_endpoint: str | None = None,
        id: str | None = None,
        region: str | None = None,
        token: str | TokenProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> AstraDBDatabaseAdmin:
        """
        Create an AstraDBDatabaseAdmin object for admin work within a certain database.

        The database can be specified by its API endpoint or, alternatively,
        by its (id, region) parameters: these two call patterns exclude each other.

        Args:
            api_endpoint_or_id: positional parameter that can stand for both
                `api_endpoint` and `id`. Passing them together is an error.
            api_endpoint: the API Endpoint for the target database
                (i.e. `https://<ID>-<REGION>.apps.astra.datastax.com`.
                Note that no 'Custom Domain' endpoints are accepted).
                The database must exist already for the resulting object
                to be effectively used; in other words, this invocation
                does not create the database, just the object instance.
            id: the target database ID. This is alternative to using the API Endpoint.
            region: the region to use for connecting to the database.
                This parameter must be supplied if (and only if) the `id` is
                given for the database instead of the full API endpoint.
            token: if supplied, is passed to the Database instead of
                the one set for this object.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            spawn_api_options: a specification - complete or partial - of the
                API Options to override the defaults inherited from the AstraDBAdmin.
                This allows for a deeper configuration of the database admin, i.e.
                beyond just specifying a token; if this is passed together with
                the named token parameter, the latter will take precedence.

        Returns:
            An AstraDBDatabaseAdmin instance representing the requested database.

        Example:
            >>> my_db_admin = my_astra_db_admin.get_database_admin("01234567-...")
            >>> my_db_admin.list_keyspaces()
            ['default_keyspace']
            >>> my_db_admin.create_keyspace("that_other_one")
            >>> my_db_admin.list_keyspaces()
            ['default_keyspace', 'that_other_one']

        Note:
            This method does not perform any admin-level operation through
            the DevOps API. For actual creation of a database, see the
            `create_database` method.
        """

        _api_endpoint_p, _id_p = check_id_endpoint_parg_kwargs(
            p_arg=api_endpoint_or_id, api_endpoint=api_endpoint, id=id
        )
        resulting_api_options = self.api_options.with_override(
            spawn_api_options
        ).with_override(
            APIOptions(token=token),
        )

        # handle the "endpoint passed as id" case first:
        if _api_endpoint_p is not None:
            if region is not None:
                raise ValueError(
                    "Parameter `region` not supported with an API endpoint."
                )
            return AstraDBDatabaseAdmin.from_astra_db_admin(
                api_endpoint=_api_endpoint_p,
                astra_db_admin=self,
                spawn_api_options=resulting_api_options,
            )
        else:
            if _id_p is None:
                raise ValueError("Either `api_endpoint` or `id` must be supplied.")
            if region is None:
                raise ValueError("Parameter `region` must be supplied with `id`.")

            return AstraDBDatabaseAdmin.from_astra_db_admin(
                api_endpoint=build_api_endpoint(
                    environment=self.api_options.environment,
                    database_id=_id_p,
                    region=region,
                ),
                astra_db_admin=self,
                spawn_api_options=resulting_api_options,
            )

    def get_database(
        self,
        api_endpoint_or_id: str | None = None,
        *,
        api_endpoint: str | None = None,
        keyspace: str | None = None,
        id: str | None = None,
        region: str | None = None,
        token: str | TokenProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> Database:
        """
        Create a Database instance for a specific database, to be used
        when doing data-level work (such as creating/managing collections).

        The database can be specified by its API endpoint or, alternatively,
        by its (id, region) parameters: these two call patterns exclude each other.

        Args:
            api_endpoint_or_id: positional parameter that can stand for both
                `api_endpoint` and `id`. Passing them together is an error.
            api_endpoint: the API Endpoint for the target database
                (e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`,
                or a custom domain if one is configured for the database).
                The database must exist already for the resulting object
                to be effectively used; in other words, this invocation
                does not create the database, just the object instance.
            keyspace: if provided, it is passed to the Database; otherwise
                the Database class will apply an environment-specific default.
            id: the target database ID. This is alternative to using the API Endpoint.
            region: the region to use for connecting to the database.
                This parameter must be supplied if (and only if) the `id` is
                given for the database instead of the full API endpoint.
            token: if supplied, is passed to the Database instead of
                the one set for this object.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            spawn_api_options: a specification - complete or partial - of the
                API Options to override the defaults inherited from the AstraDBAdmin.
                This allows for a deeper configuration of the database, i.e. beyond
                just specifying a token; if this is passed together with
                the named token parameter, the latter will take precedence.

        Returns:
            A Database object ready to be used.

        Example:
            >>> my_db = my_astra_db_admin.get_database(
            ...     "https://<ID>-<REGION>.apps.astra.datastax.com",
            ...     keyspace="my_prod_keyspace",
            ... )
            >>> coll = my_db.create_collection(
            ...     "movies",
            ...     definition=(
            ...         CollectionDefinition.builder()
            ...         .set_vector_dimension(2)
            ...         .build()
            ...     )
            ... )
            >>> my_coll.insert_one({"title": "The Title", "$vector": [0.3, 0.4]})
        """

        _api_endpoint_p, _id_p = check_id_endpoint_parg_kwargs(
            p_arg=api_endpoint_or_id, api_endpoint=api_endpoint, id=id
        )
        # lazy importing here to avoid circular dependency
        from astrapy import Database

        resulting_api_options = self.api_options.with_override(
            spawn_api_options
        ).with_override(
            APIOptions(token=token),
        )

        # handle the "endpoint passed as id" case first:
        if _api_endpoint_p is not None:
            if region is not None:
                raise ValueError(
                    "Parameter `region` not supported with an API endpoint."
                )

            parsed_api_endpoint = parse_api_endpoint(_api_endpoint_p)
            if parsed_api_endpoint is not None:
                if parsed_api_endpoint.environment != resulting_api_options.environment:
                    raise InvalidEnvironmentException(
                        "Environment mismatch between client and provided "
                        "API endpoint. You can try adding "
                        f'`environment="{parsed_api_endpoint.environment}"` '
                        "to the DataAPIClient creation statement."
                    )
                return Database(
                    api_endpoint=_api_endpoint_p,
                    keyspace=keyspace,
                    api_options=resulting_api_options,
                )
            else:
                msg = api_endpoint_parsing_cdinfo_message(_api_endpoint_p)
                logger.info(msg)
                return Database(
                    api_endpoint=_api_endpoint_p,
                    keyspace=keyspace,
                    api_options=resulting_api_options,
                )
        else:
            # the case where an ID is passed:
            if _id_p is None:
                raise ValueError("Either `api_endpoint` or `id` must be supplied.")
            if region is None:
                raise ValueError("Parameter `region` must be supplied with `id`.")

            _api_endpoint = build_api_endpoint(
                environment=self.api_options.environment,
                database_id=_id_p,
                region=region,
            )
            return Database(
                api_endpoint=_api_endpoint,
                keyspace=keyspace,
                api_options=resulting_api_options,
            )

    def get_async_database(
        self,
        api_endpoint_or_id: str | None = None,
        *,
        api_endpoint: str | None = None,
        keyspace: str | None = None,
        id: str | None = None,
        region: str | None = None,
        token: str | TokenProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncDatabase:
        """
        Create an AsyncDatabase instance for a specific database, to be used
        when doing data-level work (such as creating/managing collections).

        The database can be specified by its API endpoint or, alternatively,
        by its (id, region) parameters: these two call patterns exclude each other.

        Args:
            api_endpoint_or_id: positional parameter that can stand for both
                `api_endpoint` and `id`. Passing them together is an error.
            api_endpoint: the API Endpoint for the target database
                (e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`,
                or a custom domain if one is configured for the database).
                The database must exist already for the resulting object
                to be effectively used; in other words, this invocation
                does not create the database, just the object instance.
            keyspace: if provided, it is passed to the AsyncDatabase; otherwise
                the AsyncDatabase class will apply an environment-specific default.
            id: the target database ID. This is alternative to using the API Endpoint.
            region: the region to use for connecting to the database.
                This parameter must be supplied if (and only if) the `id` is
                given for the database instead of the full API endpoint.
            token: if supplied, is passed to the AsyncDatabase instead of
                the one set for this object.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            spawn_api_options: a specification - complete or partial - of the
                API Options to override the defaults inherited from the AstraDBAdmin.
                This allows for a deeper configuration of the database, i.e. beyond
                just specifying a token; if this is passed together with
                the named token parameter, the latter will take precedence.

        Returns:
            An AsyncDatabase object ready to be used.

        Example:
            >>> async def create_use_collection(
            ...     admin: AstraDBAdmin,
            ...     api_endpoint: str,
            ...     keyspace: str,
            ... ) -> None:
            ...     my_async_db = admin.get_async_database(
            ...         api_endpoint,
            ...         keyspace=keyspace,
            ...     )
            ...     a_coll = await my_async_db.create_collection(
            ...         "movies",
            ...         definition=(
            ...             CollectionDefinition.builder()
            ...             .set_vector_dimension(2)
            ...             .build()
            ...         )
            ...     )
            ...     await a_coll.insert_one(
            ...         {"title": "The Title", "$vector": [0.3, 0.4]}
            ...     )
            ...
            >>> asyncio.run(create_use_collection(
            ...     my_admin,
            ...     "https://<ID>-<REGION>.apps.astra.datastax.com",
            ...     "default_keyspace",
            ... ))
            >>>
        """

        return self.get_database(
            api_endpoint_or_id=api_endpoint_or_id,
            api_endpoint=api_endpoint,
            keyspace=keyspace,
            id=id,
            region=region,
            token=token,
            spawn_api_options=spawn_api_options,
        ).to_async()

Methods

async def async_create_database(self, name: str, *, cloud_provider: str, region: str, keyspace: str | None = None, wait_until_active: bool = True, database_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None, token: str | TokenProvider | UnsetType = (unset), spawn_api_options: APIOptions | UnsetType = (unset)) ‑> AstraDBDatabaseAdmin

Create a database as requested, optionally waiting for it to be ready. This is an awaitable method suitable for use within an asyncio event loop.

Args

name: the desired name for the database.
cloud_provider: one of 'aws', 'gcp' or 'azure'.
region: any of the available cloud regions.
keyspace: name for the one keyspace the database starts with. If omitted, DevOps API will use its default.
wait_until_active: if True (default), the method returns only after the newly-created database is in ACTIVE state (a few minutes, usually). If False, it will return right after issuing the creation request to the DevOps API, and it will be responsibility of the caller to check the database status before working with it.
database_admin_timeout_ms: a timeout, in milliseconds, for the whole requested operation to complete. This is used only if wait_until_active is true, i.e. if the method call must wait and keep querying the DevOps API for the status of the newly-created database.
request_timeout_ms: a timeout, in milliseconds, for each underlying DevOps API HTTP request.
timeout_ms: an alias for both the request_timeout_ms and database_admin_timeout_ms timeout parameters. In practice, regardless of wait_until_active, this parameter dictates an overall timeout on this method call.
token: if supplied, is passed to the returned Database instead of the one set for this object. This can be either a literal token string or a subclass of TokenProvider.
spawn_api_options: a specification - complete or partial - of the API Options to override the defaults inherited from the AstraDBAdmin. This allows for a deeper configuration of the database admin, e.g. concerning timeouts; if this is passed together with the named timeout parameters, the latter will take precedence in their respective settings.

Returns

An AstraDBDatabaseAdmin instance. Note: a timeout event is no guarantee at all that the creation request has not reached the API server and is not going to be, in fact, honored.

Example

>>> asyncio.run(
...     my_astra_db_admin.async_create_database(
...         "new_database",
...         cloud_provider="aws",
...         region="ap-south-1",
....    )
... )
AstraDBDatabaseAdmin(id=...)

Expand source code

async def async_create_database(
    self,
    name: str,
    *,
    cloud_provider: str,
    region: str,
    keyspace: str | None = None,
    wait_until_active: bool = True,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
    token: str | TokenProvider | UnsetType = _UNSET,
    spawn_api_options: APIOptions | UnsetType = _UNSET,
) -> AstraDBDatabaseAdmin:
    """
    Create a database as requested, optionally waiting for it to be ready.
    This is an awaitable method suitable for use within an asyncio event loop.

    Args:
        name: the desired name for the database.
        cloud_provider: one of 'aws', 'gcp' or 'azure'.
        region: any of the available cloud regions.
        keyspace: name for the one keyspace the database starts with.
            If omitted, DevOps API will use its default.
        wait_until_active: if True (default), the method returns only after
            the newly-created database is in ACTIVE state (a few minutes,
            usually). If False, it will return right after issuing the
            creation request to the DevOps API, and it will be responsibility
            of the caller to check the database status before working with it.
        database_admin_timeout_ms: a timeout, in milliseconds, for the whole
            requested operation to complete. This is used only
            if `wait_until_active` is true, i.e. if the method call must
            wait and keep querying the DevOps API for the status of the
            newly-created database.
        request_timeout_ms: a timeout, in milliseconds, for
            each underlying DevOps API HTTP request.
        timeout_ms: an alias for *both* the `request_timeout_ms` and
            `database_admin_timeout_ms` timeout parameters. In practice,
            regardless of `wait_until_active`, this parameter dictates an
            overall timeout on this method call.
        token: if supplied, is passed to the returned Database instead of
            the one set for this object.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        spawn_api_options: a specification - complete or partial - of the
            API Options to override the defaults inherited from the AstraDBAdmin.
            This allows for a deeper configuration of the database admin, e.g.
            concerning timeouts; if this is passed together with
            the named timeout parameters, the latter will take precedence
            in their respective settings.

    Returns:
        An AstraDBDatabaseAdmin instance.

    Note: a timeout event is no guarantee at all that the
    creation request has not reached the API server and is not going
    to be, in fact, honored.

    Example:
        >>> asyncio.run(
        ...     my_astra_db_admin.async_create_database(
        ...         "new_database",
        ...         cloud_provider="aws",
        ...         region="ap-south-1",
        ....    )
        ... )
        AstraDBDatabaseAdmin(id=...)
    """

    _database_admin_timeout_ms, _da_label = _first_valid_timeout(
        (database_admin_timeout_ms, "database_admin_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (
            self.api_options.timeout_options.database_admin_timeout_ms,
            "database_admin_timeout_ms",
        ),
    )
    _request_timeout_ms, _rt_label = _first_valid_timeout(
        (request_timeout_ms, "request_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
    )
    cd_payload = {
        k: v
        for k, v in {
            "name": name,
            "tier": "serverless",
            "cloudProvider": cloud_provider,
            "region": region,
            "capacityUnits": 1,
            "dbType": "vector",
            "keyspace": keyspace,
        }.items()
        if v is not None
    }
    timeout_manager = MultiCallTimeoutManager(
        overall_timeout_ms=_database_admin_timeout_ms,
        dev_ops_api=True,
        timeout_label=_da_label,
    )
    logger.info(
        f"creating database {name}/({cloud_provider}, {region}) "
        "(DevOps API), async"
    )
    cd_raw_response = await self._dev_ops_api_commander.async_raw_request(
        http_method=HttpMethod.POST,
        payload=cd_payload,
        timeout_context=timeout_manager.remaining_timeout(
            cap_time_ms=_request_timeout_ms,
            cap_timeout_label=_rt_label,
        ),
    )
    if cd_raw_response.status_code != DEV_OPS_RESPONSE_HTTP_CREATED:
        raise DevOpsAPIException(
            f"DB creation ('{name}') failed: API returned HTTP "
            f"{cd_raw_response.status_code} instead of "
            f"{DEV_OPS_RESPONSE_HTTP_CREATED} - Created."
        )
    new_database_id = cd_raw_response.headers["Location"]
    logger.info(
        "DevOps API returned from creating database "
        f"{name}/({cloud_provider}, {region}), async"
    )
    if wait_until_active:
        last_status_seen = DEV_OPS_DATABASE_STATUS_PENDING
        while last_status_seen in {
            DEV_OPS_DATABASE_STATUS_PENDING,
            DEV_OPS_DATABASE_STATUS_INITIALIZING,
        }:
            logger.info(
                f"sleeping to poll for status of '{new_database_id}', async"
            )
            await asyncio.sleep(DEV_OPS_DATABASE_POLL_INTERVAL_S)
            last_db_info = await self._async_database_info_ctx(
                id=new_database_id,
                timeout_context=timeout_manager.remaining_timeout(
                    cap_time_ms=_request_timeout_ms,
                    cap_timeout_label=_rt_label,
                ),
            )
            last_status_seen = last_db_info.status
        if last_status_seen != DEV_OPS_DATABASE_STATUS_ACTIVE:
            raise DevOpsAPIException(
                f"Database {name} entered unexpected status "
                f"{last_status_seen} after PENDING"
            )
    # return the database instance
    logger.info(
        f"finished creating database '{new_database_id}' = "
        f"{name}/({cloud_provider}, {region}) (DevOps API), async"
    )
    _final_api_options = self.api_options.with_override(
        spawn_api_options
    ).with_override(APIOptions(token=token))
    return AstraDBDatabaseAdmin.from_astra_db_admin(
        api_endpoint=build_api_endpoint(
            environment=self.api_options.environment,
            database_id=new_database_id,
            region=region,
        ),
        astra_db_admin=self,
        spawn_api_options=_final_api_options,
    )

async def async_database_info(self, id: str, *, database_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> AstraDBAdminDatabaseInfo

Get the full information on a given database, through a request to the DevOps API. This is an awaitable method suitable for use within an asyncio event loop.

Args

id: the ID of the target database, e. g. "01234567-89ab-cdef-0123-456789abcdef".
database_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying DevOps API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for database_admin_timeout_ms.
timeout_ms: an alias for database_admin_timeout_ms.

Returns

An AstraDBAdminDatabaseInfo object.

Example

>>> async def check_if_db_active(db_id: str) -> bool:
...     db_info = await my_astra_db_admin.async_database_info(db_id)
...     return db_info.status == "ACTIVE"
...
>>> asyncio.run(check_if_db_active("01234567-..."))
True

Expand source code

async def async_database_info(
    self,
    id: str,
    *,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> AstraDBAdminDatabaseInfo:
    """
    Get the full information on a given database, through a request to the DevOps API.
    This is an awaitable method suitable for use within an asyncio event loop.

    Args:
        id: the ID of the target database, e. g.
            "01234567-89ab-cdef-0123-456789abcdef".
        database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying DevOps API request.
            If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `database_admin_timeout_ms`.
        timeout_ms: an alias for `database_admin_timeout_ms`.

    Returns:
        An AstraDBAdminDatabaseInfo object.

    Example:
        >>> async def check_if_db_active(db_id: str) -> bool:
        ...     db_info = await my_astra_db_admin.async_database_info(db_id)
        ...     return db_info.status == "ACTIVE"
        ...
        >>> asyncio.run(check_if_db_active("01234567-..."))
        True
    """

    _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
        timeout_options=self.api_options.timeout_options,
        database_admin_timeout_ms=database_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    return await self._async_database_info_ctx(
        id=id,
        timeout_context=_TimeoutContext(
            request_ms=_database_admin_timeout_ms, label=_da_label
        ),
    )

async def async_drop_database(self, id: str, *, wait_until_active: bool = True, database_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> None

Drop a database, i.e. delete it completely and permanently with all its data. Async version of the method, for use in an asyncio context.

Args

id: The ID of the database to drop, e. g. "01234567-89ab-cdef-0123-456789abcdef".
wait_until_active: if True (default), the method returns only after the database has actually been deleted (generally a few minutes). If False, it will return right after issuing the drop request to the DevOps API, and it will be responsibility of the caller to check the database status/availability after that, if desired.
database_admin_timeout_ms: a timeout, in milliseconds, for the whole requested operation to complete. This is used only if wait_until_active is true, i.e. if the method call must wait and keep querying the DevOps API for the status of the newly-deleted database.
request_timeout_ms: a timeout, in milliseconds, for each underlying DevOps API HTTP request.
timeout_ms: an alias for both the request_timeout_ms and database_admin_timeout_ms timeout parameters. In practice, regardless of wait_until_active, this parameter dictates an overall timeout on this method call.

Note: a timeout event is no guarantee at all that the deletion request has not reached the API server and is not going to be, in fact, honored.

Example

>>> asyncio.run(
...     my_astra_db_admin.async_drop_database("01234567-...")
... )

Expand source code

async def async_drop_database(
    self,
    id: str,
    *,
    wait_until_active: bool = True,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Drop a database, i.e. delete it completely and permanently with all its data.
    Async version of the method, for use in an asyncio context.

    Args:
        id: The ID of the database to drop, e. g.
            "01234567-89ab-cdef-0123-456789abcdef".
        wait_until_active: if True (default), the method returns only after
            the database has actually been deleted (generally a few minutes).
            If False, it will return right after issuing the
            drop request to the DevOps API, and it will be responsibility
            of the caller to check the database status/availability
            after that, if desired.
        database_admin_timeout_ms: a timeout, in milliseconds, for the whole
            requested operation to complete. This is used only
            if `wait_until_active` is true, i.e. if the method call must
            wait and keep querying the DevOps API for the status of the
            newly-deleted database.
        request_timeout_ms: a timeout, in milliseconds, for
            each underlying DevOps API HTTP request.
        timeout_ms: an alias for *both* the `request_timeout_ms` and
            `database_admin_timeout_ms` timeout parameters. In practice,
            regardless of `wait_until_active`, this parameter dictates an
            overall timeout on this method call.

    Note: a timeout event is no guarantee at all that the
    deletion request has not reached the API server and is not going
    to be, in fact, honored.

    Example:
        >>> asyncio.run(
        ...     my_astra_db_admin.async_drop_database("01234567-...")
        ... )
    """

    _database_admin_timeout_ms, _da_label = _first_valid_timeout(
        (database_admin_timeout_ms, "database_admin_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (
            self.api_options.timeout_options.database_admin_timeout_ms,
            "database_admin_timeout_ms",
        ),
    )
    _request_timeout_ms, _rt_label = _first_valid_timeout(
        (request_timeout_ms, "request_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
    )
    timeout_manager = MultiCallTimeoutManager(
        overall_timeout_ms=_database_admin_timeout_ms,
        dev_ops_api=True,
        timeout_label=_da_label,
    )
    logger.info(f"dropping database '{id}' (DevOps API), async")
    te_raw_response = await self._dev_ops_api_commander.async_raw_request(
        http_method=HttpMethod.POST,
        additional_path=f"{id}/terminate",
        timeout_context=timeout_manager.remaining_timeout(
            cap_time_ms=_request_timeout_ms,
            cap_timeout_label=_rt_label,
        ),
    )
    if te_raw_response.status_code != DEV_OPS_RESPONSE_HTTP_ACCEPTED:
        raise DevOpsAPIException(
            f"DB deletion ('{id}') failed: API returned HTTP "
            f"{te_raw_response.status_code} instead of "
            f"{DEV_OPS_RESPONSE_HTTP_ACCEPTED} - Created"
        )
    logger.info(f"DevOps API returned from dropping database '{id}', async")
    if wait_until_active:
        last_status_seen: str | None = DEV_OPS_DATABASE_STATUS_TERMINATING
        _db_name: str | None = None
        while last_status_seen == DEV_OPS_DATABASE_STATUS_TERMINATING:
            logger.info(f"sleeping to poll for status of '{id}', async")
            await asyncio.sleep(DEV_OPS_DATABASE_POLL_INTERVAL_S)
            #
            detected_databases = [
                a_db_info
                for a_db_info in await self._async_list_databases_ctx(
                    include=None,
                    provider=None,
                    page_size=None,
                    timeout_context=timeout_manager.remaining_timeout(
                        cap_time_ms=_request_timeout_ms,
                        cap_timeout_label=_rt_label,
                    ),
                )
                if a_db_info.id == id
            ]
            if detected_databases:
                last_status_seen = detected_databases[0].status
                _db_name = detected_databases[0].name
            else:
                last_status_seen = None
        if last_status_seen is not None:
            _name_desc = f" ({_db_name})" if _db_name else ""
            raise DevOpsAPIException(
                f"Database {id}{_name_desc} entered unexpected status "
                f"{last_status_seen} after PENDING"
            )
    logger.info(f"finished dropping database '{id}' (DevOps API), async")

Get the list of databases, as obtained with a request to the DevOps API. Async version of the method, for use in an asyncio context.

Args

include: a filter on what databases are to be returned. As per DevOps API, defaults to "nonterminated". Pass "all" to include the already terminated databases.
provider: a filter on the cloud provider for the databases. As per DevOps API, defaults to "ALL". Pass e.g. "AWS" to restrict the results.
page_size: number of results per page from the DevOps API.
database_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (While in the case of very many databases this method may entail multiple DevOps API requests, it is assumed here that this method amounts almost always to one single request: the only timeout imposed on this method execution is one acting on each individual request, with no checks on its overall completion time.)
request_timeout_ms: an alias for database_admin_timeout_ms.
timeout_ms: an alias for database_admin_timeout_ms.

Returns

A list of AstraDBAdminDatabaseInfo objects.

Example

>>> async def check_if_db_exists(db_id: str) -> bool:
...     db_list = await my_astra_db_admin.async_list_databases()
...     return db_id in db_list
...
>>> asyncio.run(check_if_db_exists("xyz"))
True
>>> asyncio.run(check_if_db_exists("01234567-..."))
False

Expand source code

async def async_list_databases(
    self,
    *,
    include: str | None = None,
    provider: str | None = None,
    page_size: int | None = None,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> list[AstraDBAdminDatabaseInfo]:
    """
    Get the list of databases, as obtained with a request to the DevOps API.
    Async version of the method, for use in an asyncio context.

    Args:
        include: a filter on what databases are to be returned. As per
            DevOps API, defaults to "nonterminated". Pass "all" to include
            the already terminated databases.
        provider: a filter on the cloud provider for the databases.
            As per DevOps API, defaults to "ALL". Pass e.g. "AWS" to
            restrict the results.
        page_size: number of results per page from the DevOps API.
        database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (While in the case of very many databases this method may entail
            multiple DevOps API requests, it is assumed here that this method
            amounts almost always to one single request: the only timeout
            imposed on this method execution is one acting on each individual
            request, with no checks on its overall completion time.)
        request_timeout_ms: an alias for `database_admin_timeout_ms`.
        timeout_ms: an alias for `database_admin_timeout_ms`.

    Returns:
        A list of AstraDBAdminDatabaseInfo objects.

    Example:
        >>> async def check_if_db_exists(db_id: str) -> bool:
        ...     db_list = await my_astra_db_admin.async_list_databases()
        ...     return db_id in db_list
        ...
        >>> asyncio.run(check_if_db_exists("xyz"))
        True
        >>> asyncio.run(check_if_db_exists("01234567-..."))
        False
    """

    _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
        timeout_options=self.api_options.timeout_options,
        database_admin_timeout_ms=database_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    return await self._async_list_databases_ctx(
        include=include,
        provider=provider,
        page_size=page_size,
        timeout_context=_TimeoutContext(
            request_ms=_database_admin_timeout_ms, label=_da_label
        ),
    )

def create_database(self, name: str, *, cloud_provider: str, region: str, keyspace: str | None = None, wait_until_active: bool = True, database_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None, token: str | TokenProvider | UnsetType = (unset), spawn_api_options: APIOptions | UnsetType = (unset)) ‑> AstraDBDatabaseAdmin

Create a database as requested, optionally waiting for it to be ready.

Args

name: the desired name for the database.
cloud_provider: one of 'aws', 'gcp' or 'azure'.
region: any of the available cloud regions.
keyspace: name for the one keyspace the database starts with. If omitted, DevOps API will use its default.
wait_until_active: if True (default), the method returns only after the newly-created database is in ACTIVE state (a few minutes, usually). If False, it will return right after issuing the creation request to the DevOps API, and it will be responsibility of the caller to check the database status before working with it.
database_admin_timeout_ms: a timeout, in milliseconds, for the whole requested operation to complete. This is used only if wait_until_active is true, i.e. if the method call must wait and keep querying the DevOps API for the status of the newly-created database.
request_timeout_ms: a timeout, in milliseconds, for each underlying DevOps API HTTP request.
timeout_ms: an alias for both the request_timeout_ms and database_admin_timeout_ms timeout parameters. In practice, regardless of wait_until_active, this parameter dictates an overall timeout on this method call.
token: if supplied, is passed to the returned Database instead of the one set for this object. This can be either a literal token string or a subclass of TokenProvider.
spawn_api_options: a specification - complete or partial - of the API Options to override the defaults inherited from the AstraDBAdmin. This allows for a deeper configuration of the database admin, e.g. concerning timeouts; if this is passed together with the named timeout parameters, the latter will take precedence in their respective settings.

Returns

An AstraDBDatabaseAdmin instance. Note: a timeout event is no guarantee at all that the creation request has not reached the API server and is not going to be, in fact, honored.

Example

>>> my_new_db_admin = my_astra_db_admin.create_database(
...     "new_database",
...     cloud_provider="aws",
...     region="ap-south-1",
... )
>>> my_new_db = my_new_db_admin.get_database()
>>> my_coll = my_new_db.create_collection(
...     "movies",
...     definition=(
...         CollectionDefinition.builder()
...         .set_vector_dimension(2)
...         .build()
...     )
... )
>>> my_coll.insert_one({"title": "The Title", "$vector": [0.1, 0.2]})

Expand source code

def create_database(
    self,
    name: str,
    *,
    cloud_provider: str,
    region: str,
    keyspace: str | None = None,
    wait_until_active: bool = True,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
    token: str | TokenProvider | UnsetType = _UNSET,
    spawn_api_options: APIOptions | UnsetType = _UNSET,
) -> AstraDBDatabaseAdmin:
    """
    Create a database as requested, optionally waiting for it to be ready.

    Args:
        name: the desired name for the database.
        cloud_provider: one of 'aws', 'gcp' or 'azure'.
        region: any of the available cloud regions.
        keyspace: name for the one keyspace the database starts with.
            If omitted, DevOps API will use its default.
        wait_until_active: if True (default), the method returns only after
            the newly-created database is in ACTIVE state (a few minutes,
            usually). If False, it will return right after issuing the
            creation request to the DevOps API, and it will be responsibility
            of the caller to check the database status before working with it.
        database_admin_timeout_ms: a timeout, in milliseconds, for the whole
            requested operation to complete. This is used only
            if `wait_until_active` is true, i.e. if the method call must
            wait and keep querying the DevOps API for the status of the
            newly-created database.
        request_timeout_ms: a timeout, in milliseconds, for
            each underlying DevOps API HTTP request.
        timeout_ms: an alias for *both* the `request_timeout_ms` and
            `database_admin_timeout_ms` timeout parameters. In practice,
            regardless of `wait_until_active`, this parameter dictates an
            overall timeout on this method call.
        token: if supplied, is passed to the returned Database instead of
            the one set for this object.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        spawn_api_options: a specification - complete or partial - of the
            API Options to override the defaults inherited from the AstraDBAdmin.
            This allows for a deeper configuration of the database admin, e.g.
            concerning timeouts; if this is passed together with
            the named timeout parameters, the latter will take precedence
            in their respective settings.

    Returns:
        An AstraDBDatabaseAdmin instance.

    Note: a timeout event is no guarantee at all that the
    creation request has not reached the API server and is not going
    to be, in fact, honored.

    Example:
        >>> my_new_db_admin = my_astra_db_admin.create_database(
        ...     "new_database",
        ...     cloud_provider="aws",
        ...     region="ap-south-1",
        ... )
        >>> my_new_db = my_new_db_admin.get_database()
        >>> my_coll = my_new_db.create_collection(
        ...     "movies",
        ...     definition=(
        ...         CollectionDefinition.builder()
        ...         .set_vector_dimension(2)
        ...         .build()
        ...     )
        ... )
        >>> my_coll.insert_one({"title": "The Title", "$vector": [0.1, 0.2]})
    """

    _database_admin_timeout_ms, _da_label = _first_valid_timeout(
        (database_admin_timeout_ms, "database_admin_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (
            self.api_options.timeout_options.database_admin_timeout_ms,
            "database_admin_timeout_ms",
        ),
    )
    _request_timeout_ms, _rt_label = _first_valid_timeout(
        (request_timeout_ms, "request_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
    )
    cd_payload = {
        k: v
        for k, v in {
            "name": name,
            "tier": "serverless",
            "cloudProvider": cloud_provider,
            "region": region,
            "capacityUnits": 1,
            "dbType": "vector",
            "keyspace": keyspace,
        }.items()
        if v is not None
    }
    timeout_manager = MultiCallTimeoutManager(
        overall_timeout_ms=_database_admin_timeout_ms,
        dev_ops_api=True,
        timeout_label=_da_label,
    )
    logger.info(
        f"creating database {name}/({cloud_provider}, {region}) (DevOps API)"
    )
    cd_raw_response = self._dev_ops_api_commander.raw_request(
        http_method=HttpMethod.POST,
        payload=cd_payload,
        timeout_context=timeout_manager.remaining_timeout(
            cap_time_ms=_request_timeout_ms,
            cap_timeout_label=_rt_label,
        ),
    )
    if cd_raw_response.status_code != DEV_OPS_RESPONSE_HTTP_CREATED:
        raise DevOpsAPIException(
            f"DB creation ('{name}') failed: API returned HTTP "
            f"{cd_raw_response.status_code} instead of "
            f"{DEV_OPS_RESPONSE_HTTP_CREATED} - Created."
        )
    new_database_id = cd_raw_response.headers["Location"]
    logger.info(
        "DevOps API returned from creating database "
        f"{name}/({cloud_provider}, {region})"
    )
    if wait_until_active:
        last_status_seen = DEV_OPS_DATABASE_STATUS_PENDING
        while last_status_seen in {
            DEV_OPS_DATABASE_STATUS_PENDING,
            DEV_OPS_DATABASE_STATUS_INITIALIZING,
        }:
            logger.info(f"sleeping to poll for status of '{new_database_id}'")
            time.sleep(DEV_OPS_DATABASE_POLL_INTERVAL_S)
            last_db_info = self._database_info_ctx(
                id=new_database_id,
                timeout_context=timeout_manager.remaining_timeout(
                    cap_time_ms=_request_timeout_ms,
                    cap_timeout_label=_rt_label,
                ),
            )
            last_status_seen = last_db_info.status
        if last_status_seen != DEV_OPS_DATABASE_STATUS_ACTIVE:
            raise DevOpsAPIException(
                f"Database {name} entered unexpected status {last_status_seen} after PENDING"
            )
    # return the database instance
    logger.info(
        f"finished creating database '{new_database_id}' = "
        f"{name}/({cloud_provider}, {region}) (DevOps API)"
    )
    _final_api_options = self.api_options.with_override(
        spawn_api_options
    ).with_override(APIOptions(token=token))
    return AstraDBDatabaseAdmin.from_astra_db_admin(
        api_endpoint=build_api_endpoint(
            environment=self.api_options.environment,
            database_id=new_database_id,
            region=region,
        ),
        astra_db_admin=self,
        spawn_api_options=_final_api_options,
    )

def database_info(self, id: str, *, database_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> AstraDBAdminDatabaseInfo

Get the full information on a given database, through a request to the DevOps API.

Args

id: the ID of the target database, e. g. "01234567-89ab-cdef-0123-456789abcdef".
database_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying DevOps API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for database_admin_timeout_ms.
timeout_ms: an alias for database_admin_timeout_ms.

Returns

An AstraDBAdminDatabaseInfo object.

Example

>>> details_of_my_db = my_astra_db_admin.database_info("01234567-...")
>>> details_of_my_db.id
'01234567-...'
>>> details_of_my_db.status
'ACTIVE'
>>> details_of_my_db.info.region
'eu-west-1'

Expand source code

def database_info(
    self,
    id: str,
    *,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> AstraDBAdminDatabaseInfo:
    """
    Get the full information on a given database, through a request to the DevOps API.

    Args:
        id: the ID of the target database, e. g.
            "01234567-89ab-cdef-0123-456789abcdef".
        database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying DevOps API request.
            If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `database_admin_timeout_ms`.
        timeout_ms: an alias for `database_admin_timeout_ms`.

    Returns:
        An AstraDBAdminDatabaseInfo object.

    Example:
        >>> details_of_my_db = my_astra_db_admin.database_info("01234567-...")
        >>> details_of_my_db.id
        '01234567-...'
        >>> details_of_my_db.status
        'ACTIVE'
        >>> details_of_my_db.info.region
        'eu-west-1'
    """

    _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
        timeout_options=self.api_options.timeout_options,
        database_admin_timeout_ms=database_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    return self._database_info_ctx(
        id=id,
        timeout_context=_TimeoutContext(
            request_ms=_database_admin_timeout_ms, label=_da_label
        ),
    )

def drop_database(self, id: str, *, wait_until_active: bool = True, database_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> None

Drop a database, i.e. delete it completely and permanently with all its data.

Args

id: The ID of the database to drop, e. g. "01234567-89ab-cdef-0123-456789abcdef".
wait_until_active: if True (default), the method returns only after the database has actually been deleted (generally a few minutes). If False, it will return right after issuing the drop request to the DevOps API, and it will be responsibility of the caller to check the database status/availability after that, if desired.
database_admin_timeout_ms: a timeout, in milliseconds, for the whole requested operation to complete. This is used only if wait_until_active is true, i.e. if the method call must wait and keep querying the DevOps API for the status of the newly-deleted database.
request_timeout_ms: a timeout, in milliseconds, for each underlying DevOps API HTTP request.
timeout_ms: an alias for both the request_timeout_ms and database_admin_timeout_ms timeout parameters. In practice, regardless of wait_until_active, this parameter dictates an overall timeout on this method call.

Note: a timeout event is no guarantee at all that the deletion request has not reached the API server and is not going to be, in fact, honored.

Example

>>> database_list_pre = my_astra_db_admin.list_databases()
>>> len(database_list_pre)
3
>>> my_astra_db_admin.drop_database("01234567-...")
>>> database_list_post = my_astra_db_admin.list_databases()
>>> len(database_list_post)
2

Expand source code

def drop_database(
    self,
    id: str,
    *,
    wait_until_active: bool = True,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Drop a database, i.e. delete it completely and permanently with all its data.

    Args:
        id: The ID of the database to drop, e. g.
            "01234567-89ab-cdef-0123-456789abcdef".
        wait_until_active: if True (default), the method returns only after
            the database has actually been deleted (generally a few minutes).
            If False, it will return right after issuing the
            drop request to the DevOps API, and it will be responsibility
            of the caller to check the database status/availability
            after that, if desired.
        database_admin_timeout_ms: a timeout, in milliseconds, for the whole
            requested operation to complete. This is used only
            if `wait_until_active` is true, i.e. if the method call must
            wait and keep querying the DevOps API for the status of the
            newly-deleted database.
        request_timeout_ms: a timeout, in milliseconds, for
            each underlying DevOps API HTTP request.
        timeout_ms: an alias for *both* the `request_timeout_ms` and
            `database_admin_timeout_ms` timeout parameters. In practice,
            regardless of `wait_until_active`, this parameter dictates an
            overall timeout on this method call.

    Note: a timeout event is no guarantee at all that the
    deletion request has not reached the API server and is not going
    to be, in fact, honored.

    Example:
        >>> database_list_pre = my_astra_db_admin.list_databases()
        >>> len(database_list_pre)
        3
        >>> my_astra_db_admin.drop_database("01234567-...")
        >>> database_list_post = my_astra_db_admin.list_databases()
        >>> len(database_list_post)
        2
    """

    _database_admin_timeout_ms, _da_label = _first_valid_timeout(
        (database_admin_timeout_ms, "database_admin_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (
            self.api_options.timeout_options.database_admin_timeout_ms,
            "database_admin_timeout_ms",
        ),
    )
    _request_timeout_ms, _rt_label = _first_valid_timeout(
        (request_timeout_ms, "request_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
    )
    timeout_manager = MultiCallTimeoutManager(
        overall_timeout_ms=_database_admin_timeout_ms,
        dev_ops_api=True,
        timeout_label=_da_label,
    )
    logger.info(f"dropping database '{id}' (DevOps API)")
    te_raw_response = self._dev_ops_api_commander.raw_request(
        http_method=HttpMethod.POST,
        additional_path=f"{id}/terminate",
        timeout_context=timeout_manager.remaining_timeout(
            cap_time_ms=_request_timeout_ms,
            cap_timeout_label=_rt_label,
        ),
    )
    if te_raw_response.status_code != DEV_OPS_RESPONSE_HTTP_ACCEPTED:
        raise DevOpsAPIException(
            f"DB deletion ('{id}') failed: API returned HTTP "
            f"{te_raw_response.status_code} instead of "
            f"{DEV_OPS_RESPONSE_HTTP_ACCEPTED} - Created"
        )
    logger.info(f"DevOps API returned from dropping database '{id}'")
    if wait_until_active:
        last_status_seen: str | None = DEV_OPS_DATABASE_STATUS_TERMINATING
        _db_name: str | None = None
        while last_status_seen == DEV_OPS_DATABASE_STATUS_TERMINATING:
            logger.info(f"sleeping to poll for status of '{id}'")
            time.sleep(DEV_OPS_DATABASE_POLL_INTERVAL_S)
            #
            detected_databases = [
                a_db_info
                for a_db_info in self._list_databases_ctx(
                    include=None,
                    provider=None,
                    page_size=None,
                    timeout_context=timeout_manager.remaining_timeout(
                        cap_time_ms=_request_timeout_ms,
                        cap_timeout_label=_rt_label,
                    ),
                )
                if a_db_info.id == id
            ]
            if detected_databases:
                last_status_seen = detected_databases[0].status
                _db_name = detected_databases[0].name
            else:
                last_status_seen = None
        if last_status_seen is not None:
            _name_desc = f" ({_db_name})" if _db_name else ""
            raise DevOpsAPIException(
                f"Database {id}{_name_desc} entered unexpected status "
                f"{last_status_seen} after PENDING"
            )
    logger.info(f"finished dropping database '{id}' (DevOps API)")

Create an AsyncDatabase instance for a specific database, to be used when doing data-level work (such as creating/managing collections).

The database can be specified by its API endpoint or, alternatively, by its (id, region) parameters: these two call patterns exclude each other.

Args

api_endpoint_or_id: positional parameter that can stand for both api_endpoint and id. Passing them together is an error.
api_endpoint: the API Endpoint for the target database (e.g. https://<ID>-<REGION>.apps.astra.datastax.com, or a custom domain if one is configured for the database). The database must exist already for the resulting object to be effectively used; in other words, this invocation does not create the database, just the object instance.
keyspace: if provided, it is passed to the AsyncDatabase; otherwise the AsyncDatabase class will apply an environment-specific default.
id: the target database ID. This is alternative to using the API Endpoint.
region: the region to use for connecting to the database. This parameter must be supplied if (and only if) the id is given for the database instead of the full API endpoint.
token: if supplied, is passed to the AsyncDatabase instead of the one set for this object. This can be either a literal token string or a subclass of TokenProvider.
spawn_api_options: a specification - complete or partial - of the API Options to override the defaults inherited from the AstraDBAdmin. This allows for a deeper configuration of the database, i.e. beyond just specifying a token; if this is passed together with the named token parameter, the latter will take precedence.

Returns

An AsyncDatabase object ready to be used.

Example

>>> async def create_use_collection(
...     admin: AstraDBAdmin,
...     api_endpoint: str,
...     keyspace: str,
... ) -> None:
...     my_async_db = admin.get_async_database(
...         api_endpoint,
...         keyspace=keyspace,
...     )
...     a_coll = await my_async_db.create_collection(
...         "movies",
...         definition=(
...             CollectionDefinition.builder()
...             .set_vector_dimension(2)
...             .build()
...         )
...     )
...     await a_coll.insert_one(
...         {"title": "The Title", "$vector": [0.3, 0.4]}
...     )
...
>>> asyncio.run(create_use_collection(
...     my_admin,
...     "https://<ID>-<REGION>.apps.astra.datastax.com",
...     "default_keyspace",
... ))
>>>

Expand source code

def get_async_database(
    self,
    api_endpoint_or_id: str | None = None,
    *,
    api_endpoint: str | None = None,
    keyspace: str | None = None,
    id: str | None = None,
    region: str | None = None,
    token: str | TokenProvider | UnsetType = _UNSET,
    spawn_api_options: APIOptions | UnsetType = _UNSET,
) -> AsyncDatabase:
    """
    Create an AsyncDatabase instance for a specific database, to be used
    when doing data-level work (such as creating/managing collections).

    The database can be specified by its API endpoint or, alternatively,
    by its (id, region) parameters: these two call patterns exclude each other.

    Args:
        api_endpoint_or_id: positional parameter that can stand for both
            `api_endpoint` and `id`. Passing them together is an error.
        api_endpoint: the API Endpoint for the target database
            (e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`,
            or a custom domain if one is configured for the database).
            The database must exist already for the resulting object
            to be effectively used; in other words, this invocation
            does not create the database, just the object instance.
        keyspace: if provided, it is passed to the AsyncDatabase; otherwise
            the AsyncDatabase class will apply an environment-specific default.
        id: the target database ID. This is alternative to using the API Endpoint.
        region: the region to use for connecting to the database.
            This parameter must be supplied if (and only if) the `id` is
            given for the database instead of the full API endpoint.
        token: if supplied, is passed to the AsyncDatabase instead of
            the one set for this object.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        spawn_api_options: a specification - complete or partial - of the
            API Options to override the defaults inherited from the AstraDBAdmin.
            This allows for a deeper configuration of the database, i.e. beyond
            just specifying a token; if this is passed together with
            the named token parameter, the latter will take precedence.

    Returns:
        An AsyncDatabase object ready to be used.

    Example:
        >>> async def create_use_collection(
        ...     admin: AstraDBAdmin,
        ...     api_endpoint: str,
        ...     keyspace: str,
        ... ) -> None:
        ...     my_async_db = admin.get_async_database(
        ...         api_endpoint,
        ...         keyspace=keyspace,
        ...     )
        ...     a_coll = await my_async_db.create_collection(
        ...         "movies",
        ...         definition=(
        ...             CollectionDefinition.builder()
        ...             .set_vector_dimension(2)
        ...             .build()
        ...         )
        ...     )
        ...     await a_coll.insert_one(
        ...         {"title": "The Title", "$vector": [0.3, 0.4]}
        ...     )
        ...
        >>> asyncio.run(create_use_collection(
        ...     my_admin,
        ...     "https://<ID>-<REGION>.apps.astra.datastax.com",
        ...     "default_keyspace",
        ... ))
        >>>
    """

    return self.get_database(
        api_endpoint_or_id=api_endpoint_or_id,
        api_endpoint=api_endpoint,
        keyspace=keyspace,
        id=id,
        region=region,
        token=token,
        spawn_api_options=spawn_api_options,
    ).to_async()

Create a Database instance for a specific database, to be used when doing data-level work (such as creating/managing collections).

The database can be specified by its API endpoint or, alternatively, by its (id, region) parameters: these two call patterns exclude each other.

Args

api_endpoint_or_id: positional parameter that can stand for both api_endpoint and id. Passing them together is an error.
api_endpoint: the API Endpoint for the target database (e.g. https://<ID>-<REGION>.apps.astra.datastax.com, or a custom domain if one is configured for the database). The database must exist already for the resulting object to be effectively used; in other words, this invocation does not create the database, just the object instance.
keyspace: if provided, it is passed to the Database; otherwise the Database class will apply an environment-specific default.
id: the target database ID. This is alternative to using the API Endpoint.
region: the region to use for connecting to the database. This parameter must be supplied if (and only if) the id is given for the database instead of the full API endpoint.
token: if supplied, is passed to the Database instead of the one set for this object. This can be either a literal token string or a subclass of TokenProvider.
spawn_api_options: a specification - complete or partial - of the API Options to override the defaults inherited from the AstraDBAdmin. This allows for a deeper configuration of the database, i.e. beyond just specifying a token; if this is passed together with the named token parameter, the latter will take precedence.

Returns

A Database object ready to be used.

Example

>>> my_db = my_astra_db_admin.get_database(
...     "https://<ID>-<REGION>.apps.astra.datastax.com",
...     keyspace="my_prod_keyspace",
... )
>>> coll = my_db.create_collection(
...     "movies",
...     definition=(
...         CollectionDefinition.builder()
...         .set_vector_dimension(2)
...         .build()
...     )
... )
>>> my_coll.insert_one({"title": "The Title", "$vector": [0.3, 0.4]})

Expand source code

def get_database(
    self,
    api_endpoint_or_id: str | None = None,
    *,
    api_endpoint: str | None = None,
    keyspace: str | None = None,
    id: str | None = None,
    region: str | None = None,
    token: str | TokenProvider | UnsetType = _UNSET,
    spawn_api_options: APIOptions | UnsetType = _UNSET,
) -> Database:
    """
    Create a Database instance for a specific database, to be used
    when doing data-level work (such as creating/managing collections).

    The database can be specified by its API endpoint or, alternatively,
    by its (id, region) parameters: these two call patterns exclude each other.

    Args:
        api_endpoint_or_id: positional parameter that can stand for both
            `api_endpoint` and `id`. Passing them together is an error.
        api_endpoint: the API Endpoint for the target database
            (e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`,
            or a custom domain if one is configured for the database).
            The database must exist already for the resulting object
            to be effectively used; in other words, this invocation
            does not create the database, just the object instance.
        keyspace: if provided, it is passed to the Database; otherwise
            the Database class will apply an environment-specific default.
        id: the target database ID. This is alternative to using the API Endpoint.
        region: the region to use for connecting to the database.
            This parameter must be supplied if (and only if) the `id` is
            given for the database instead of the full API endpoint.
        token: if supplied, is passed to the Database instead of
            the one set for this object.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        spawn_api_options: a specification - complete or partial - of the
            API Options to override the defaults inherited from the AstraDBAdmin.
            This allows for a deeper configuration of the database, i.e. beyond
            just specifying a token; if this is passed together with
            the named token parameter, the latter will take precedence.

    Returns:
        A Database object ready to be used.

    Example:
        >>> my_db = my_astra_db_admin.get_database(
        ...     "https://<ID>-<REGION>.apps.astra.datastax.com",
        ...     keyspace="my_prod_keyspace",
        ... )
        >>> coll = my_db.create_collection(
        ...     "movies",
        ...     definition=(
        ...         CollectionDefinition.builder()
        ...         .set_vector_dimension(2)
        ...         .build()
        ...     )
        ... )
        >>> my_coll.insert_one({"title": "The Title", "$vector": [0.3, 0.4]})
    """

    _api_endpoint_p, _id_p = check_id_endpoint_parg_kwargs(
        p_arg=api_endpoint_or_id, api_endpoint=api_endpoint, id=id
    )
    # lazy importing here to avoid circular dependency
    from astrapy import Database

    resulting_api_options = self.api_options.with_override(
        spawn_api_options
    ).with_override(
        APIOptions(token=token),
    )

    # handle the "endpoint passed as id" case first:
    if _api_endpoint_p is not None:
        if region is not None:
            raise ValueError(
                "Parameter `region` not supported with an API endpoint."
            )

        parsed_api_endpoint = parse_api_endpoint(_api_endpoint_p)
        if parsed_api_endpoint is not None:
            if parsed_api_endpoint.environment != resulting_api_options.environment:
                raise InvalidEnvironmentException(
                    "Environment mismatch between client and provided "
                    "API endpoint. You can try adding "
                    f'`environment="{parsed_api_endpoint.environment}"` '
                    "to the DataAPIClient creation statement."
                )
            return Database(
                api_endpoint=_api_endpoint_p,
                keyspace=keyspace,
                api_options=resulting_api_options,
            )
        else:
            msg = api_endpoint_parsing_cdinfo_message(_api_endpoint_p)
            logger.info(msg)
            return Database(
                api_endpoint=_api_endpoint_p,
                keyspace=keyspace,
                api_options=resulting_api_options,
            )
    else:
        # the case where an ID is passed:
        if _id_p is None:
            raise ValueError("Either `api_endpoint` or `id` must be supplied.")
        if region is None:
            raise ValueError("Parameter `region` must be supplied with `id`.")

        _api_endpoint = build_api_endpoint(
            environment=self.api_options.environment,
            database_id=_id_p,
            region=region,
        )
        return Database(
            api_endpoint=_api_endpoint,
            keyspace=keyspace,
            api_options=resulting_api_options,
        )

Create an AstraDBDatabaseAdmin object for admin work within a certain database.

The database can be specified by its API endpoint or, alternatively, by its (id, region) parameters: these two call patterns exclude each other.

Args

api_endpoint_or_id: positional parameter that can stand for both api_endpoint and id. Passing them together is an error.
api_endpoint: the API Endpoint for the target database (i.e. https://<ID>-<REGION>.apps.astra.datastax.com. Note that no 'Custom Domain' endpoints are accepted). The database must exist already for the resulting object to be effectively used; in other words, this invocation does not create the database, just the object instance.
id: the target database ID. This is alternative to using the API Endpoint.
region: the region to use for connecting to the database. This parameter must be supplied if (and only if) the id is given for the database instead of the full API endpoint.
token: if supplied, is passed to the Database instead of the one set for this object. This can be either a literal token string or a subclass of TokenProvider.
spawn_api_options: a specification - complete or partial - of the API Options to override the defaults inherited from the AstraDBAdmin. This allows for a deeper configuration of the database admin, i.e. beyond just specifying a token; if this is passed together with the named token parameter, the latter will take precedence.

Returns

An AstraDBDatabaseAdmin instance representing the requested database.

Example

>>> my_db_admin = my_astra_db_admin.get_database_admin("01234567-...")
>>> my_db_admin.list_keyspaces()
['default_keyspace']
>>> my_db_admin.create_keyspace("that_other_one")
>>> my_db_admin.list_keyspaces()
['default_keyspace', 'that_other_one']

Note

This method does not perform any admin-level operation through the DevOps API. For actual creation of a database, see the create_database method.

Expand source code

def get_database_admin(
    self,
    api_endpoint_or_id: str | None = None,
    *,
    api_endpoint: str | None = None,
    id: str | None = None,
    region: str | None = None,
    token: str | TokenProvider | UnsetType = _UNSET,
    spawn_api_options: APIOptions | UnsetType = _UNSET,
) -> AstraDBDatabaseAdmin:
    """
    Create an AstraDBDatabaseAdmin object for admin work within a certain database.

    The database can be specified by its API endpoint or, alternatively,
    by its (id, region) parameters: these two call patterns exclude each other.

    Args:
        api_endpoint_or_id: positional parameter that can stand for both
            `api_endpoint` and `id`. Passing them together is an error.
        api_endpoint: the API Endpoint for the target database
            (i.e. `https://<ID>-<REGION>.apps.astra.datastax.com`.
            Note that no 'Custom Domain' endpoints are accepted).
            The database must exist already for the resulting object
            to be effectively used; in other words, this invocation
            does not create the database, just the object instance.
        id: the target database ID. This is alternative to using the API Endpoint.
        region: the region to use for connecting to the database.
            This parameter must be supplied if (and only if) the `id` is
            given for the database instead of the full API endpoint.
        token: if supplied, is passed to the Database instead of
            the one set for this object.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        spawn_api_options: a specification - complete or partial - of the
            API Options to override the defaults inherited from the AstraDBAdmin.
            This allows for a deeper configuration of the database admin, i.e.
            beyond just specifying a token; if this is passed together with
            the named token parameter, the latter will take precedence.

    Returns:
        An AstraDBDatabaseAdmin instance representing the requested database.

    Example:
        >>> my_db_admin = my_astra_db_admin.get_database_admin("01234567-...")
        >>> my_db_admin.list_keyspaces()
        ['default_keyspace']
        >>> my_db_admin.create_keyspace("that_other_one")
        >>> my_db_admin.list_keyspaces()
        ['default_keyspace', 'that_other_one']

    Note:
        This method does not perform any admin-level operation through
        the DevOps API. For actual creation of a database, see the
        `create_database` method.
    """

    _api_endpoint_p, _id_p = check_id_endpoint_parg_kwargs(
        p_arg=api_endpoint_or_id, api_endpoint=api_endpoint, id=id
    )
    resulting_api_options = self.api_options.with_override(
        spawn_api_options
    ).with_override(
        APIOptions(token=token),
    )

    # handle the "endpoint passed as id" case first:
    if _api_endpoint_p is not None:
        if region is not None:
            raise ValueError(
                "Parameter `region` not supported with an API endpoint."
            )
        return AstraDBDatabaseAdmin.from_astra_db_admin(
            api_endpoint=_api_endpoint_p,
            astra_db_admin=self,
            spawn_api_options=resulting_api_options,
        )
    else:
        if _id_p is None:
            raise ValueError("Either `api_endpoint` or `id` must be supplied.")
        if region is None:
            raise ValueError("Parameter `region` must be supplied with `id`.")

        return AstraDBDatabaseAdmin.from_astra_db_admin(
            api_endpoint=build_api_endpoint(
                environment=self.api_options.environment,
                database_id=_id_p,
                region=region,
            ),
            astra_db_admin=self,
            spawn_api_options=resulting_api_options,
        )

Get the list of databases, as obtained with a request to the DevOps API.

Args

include: a filter on what databases are to be returned. As per DevOps API, defaults to "nonterminated". Pass "all" to include the already terminated databases.
provider: a filter on the cloud provider for the databases. As per DevOps API, defaults to "ALL". Pass e.g. "AWS" to restrict the results.
page_size: number of results per page from the DevOps API.
database_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (While in the case of very many databases this method may entail multiple DevOps API requests, it is assumed here that this method amounts almost always to one single request: the only timeout imposed on this method execution is one acting on each individual request, with no checks on its overall completion time.)
request_timeout_ms: an alias for database_admin_timeout_ms.
timeout_ms: an alias for database_admin_timeout_ms.

Returns

A list of AstraDBAdminDatabaseInfo objects.

Example

>>> database_list = my_astra_db_admin.list_databases()
>>> len(database_list)
3
>>> database_list[2].id
'01234567-...'
>>> database_list[2].status
'ACTIVE'
>>> database_list[2].info.region
'eu-west-1'

Expand source code

def list_databases(
    self,
    *,
    include: str | None = None,
    provider: str | None = None,
    page_size: int | None = None,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> list[AstraDBAdminDatabaseInfo]:
    """
    Get the list of databases, as obtained with a request to the DevOps API.

    Args:
        include: a filter on what databases are to be returned. As per
            DevOps API, defaults to "nonterminated". Pass "all" to include
            the already terminated databases.
        provider: a filter on the cloud provider for the databases.
            As per DevOps API, defaults to "ALL". Pass e.g. "AWS" to
            restrict the results.
        page_size: number of results per page from the DevOps API.
        database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (While in the case of very many databases this method may entail
            multiple DevOps API requests, it is assumed here that this method
            amounts almost always to one single request: the only timeout
            imposed on this method execution is one acting on each individual
            request, with no checks on its overall completion time.)
        request_timeout_ms: an alias for `database_admin_timeout_ms`.
        timeout_ms: an alias for `database_admin_timeout_ms`.

    Returns:
        A list of AstraDBAdminDatabaseInfo objects.

    Example:
        >>> database_list = my_astra_db_admin.list_databases()
        >>> len(database_list)
        3
        >>> database_list[2].id
        '01234567-...'
        >>> database_list[2].status
        'ACTIVE'
        >>> database_list[2].info.region
        'eu-west-1'
    """

    _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
        timeout_options=self.api_options.timeout_options,
        database_admin_timeout_ms=database_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    return self._list_databases_ctx(
        include=include,
        provider=provider,
        page_size=page_size,
        timeout_context=_TimeoutContext(
            request_ms=_database_admin_timeout_ms, label=_da_label
        ),
    )

def with_options(self, *, token: str | TokenProvider | UnsetType = (unset), api_options: APIOptions | UnsetType = (unset)) ‑> AstraDBAdmin

Create a clone of this AstraDBAdmin with some changed attributes.

Args

token: an Access Token to the database. Example: "AstraCS:xyz...". This can be either a literal token string or a subclass of TokenProvider.
api_options: any additional options to set for the clone, in the form of an APIOptions instance (where one can set just the needed attributes). In case the same setting is also provided as named parameter, the latter takes precedence.

Returns

a new AstraDBAdmin instance.

Example

>>> different_auth_astra_db_admin = my_astra_db_admin.with_options(
...     token="AstraCS:xyz...",
... )

Expand source code

def with_options(
    self,
    *,
    token: str | TokenProvider | UnsetType = _UNSET,
    api_options: APIOptions | UnsetType = _UNSET,
) -> AstraDBAdmin:
    """
    Create a clone of this AstraDBAdmin with some changed attributes.

    Args:
        token: an Access Token to the database. Example: `"AstraCS:xyz..."`.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        api_options: any additional options to set for the clone, in the form of
            an APIOptions instance (where one can set just the needed attributes).
            In case the same setting is also provided as named parameter,
            the latter takes precedence.

    Returns:
        a new AstraDBAdmin instance.

    Example:
        >>> different_auth_astra_db_admin = my_astra_db_admin.with_options(
        ...     token="AstraCS:xyz...",
        ... )
    """

    return self._copy(
        token=token,
        api_options=api_options,
    )

class AstraDBDatabaseAdmin (*, api_endpoint: str, api_options: FullAPIOptions, spawner_database: Database | AsyncDatabase | None = None, spawner_astra_db_admin: AstraDBAdmin | None = None)

An "admin" object, able to perform administrative tasks at the keyspaces level (i.e. within a certain database), such as creating/listing/dropping keyspaces.

This is one layer below the AstraDBAdmin concept, in that it is tied to a single database and enables admin work within it.

This class is not meant for direct instantiation by the user, rather it is obtained by invoking methods such as get_database_admin of AstraDBAdmin.

Args

api_endpoint: the API Endpoint for the target database (i.e. https://<ID>-<REGION>.apps.astra.datastax.com. Note that no 'Custom Domain' endpoints are accepted). The database must exist already for the resulting object to be effectively used; in other words, this invocation does not create the database, just the object instance. Database admin objects cannot work with 'Custom Domain' endpoints.
api_options: a complete specification of the API Options for this instance.
spawner_database: either a Database or an AsyncDatabase instance. This represents the database class which spawns this admin object, so that, if required, a keyspace creation can retroactively "use" the new keyspace in the spawner. Used to enable the Async/Database.get_admin_database().create_keyspace() pattern.
spawner_astra_db_admin: an AstraDBAdmin instance. This, if provided, is the instance that spawned this Database Admin and is used to delegate operations such as drop, get_database and so on. If not passed, a new one is created automatically.

Example

>>> from astrapy import DataAPIClient
>>> my_client = DataAPIClient("AstraCS:...")
>>> admin_for_my_db = my_client.get_admin().get_database_admin(
...     "https://<ID>-<REGION>.apps.astra.datastax.com"
... )
>>> admin_for_my_db.list_keyspaces()
['default_keyspace', 'staging_keyspace']
>>> admin_for_my_db.info().status
'ACTIVE'

Note

creating an instance of AstraDBDatabaseAdmin does not trigger actual creation of the database itself, which should exist beforehand. To create databases, see the AstraDBAdmin class.

Note

a more powerful token may be required than the one sufficient for working in the Database, Collection and Table classes. Check the provided token if "Unauthorized" errors are encountered.

Expand source code

class AstraDBDatabaseAdmin(DatabaseAdmin):
    """
    An "admin" object, able to perform administrative tasks at the keyspaces level
    (i.e. within a certain database), such as creating/listing/dropping keyspaces.

    This is one layer below the AstraDBAdmin concept, in that it is tied to
    a single database and enables admin work within it.

    This class is not meant for direct instantiation by the user, rather
    it is obtained by invoking methods such as `get_database_admin`
    of AstraDBAdmin.

    Args:
        api_endpoint: the API Endpoint for the target database
            (i.e. `https://<ID>-<REGION>.apps.astra.datastax.com`.
            Note that no 'Custom Domain' endpoints are accepted).
            The database must exist already for the resulting object
            to be effectively used; in other words, this invocation
            does not create the database, just the object instance.
            Database admin objects cannot work with 'Custom Domain' endpoints.
        api_options: a complete specification of the API Options for this instance.
        spawner_database: either a Database or an AsyncDatabase instance. This represents
            the database class which spawns this admin object, so that, if required,
            a keyspace creation can retroactively "use" the new keyspace in the spawner.
            Used to enable the Async/Database.get_admin_database().create_keyspace()
            pattern.
        spawner_astra_db_admin: an AstraDBAdmin instance. This, if provided, is
            the instance that spawned this Database Admin and is used to delegate
            operations such as drop, get_database and so on. If not passed, a new
            one is created automatically.

    Example:
        >>> from astrapy import DataAPIClient
        >>> my_client = DataAPIClient("AstraCS:...")
        >>> admin_for_my_db = my_client.get_admin().get_database_admin(
        ...     "https://<ID>-<REGION>.apps.astra.datastax.com"
        ... )
        >>> admin_for_my_db.list_keyspaces()
        ['default_keyspace', 'staging_keyspace']
        >>> admin_for_my_db.info().status
        'ACTIVE'

    Note:
        creating an instance of AstraDBDatabaseAdmin does not trigger actual creation
        of the database itself, which should exist beforehand. To create databases,
        see the AstraDBAdmin class.

    Note:
        a more powerful token may be required than the one sufficient for working
        in the Database, Collection and Table classes. Check the provided token
        if "Unauthorized" errors are encountered.
    """

    def __init__(
        self,
        *,
        api_endpoint: str,
        api_options: FullAPIOptions,
        spawner_database: Database | AsyncDatabase | None = None,
        spawner_astra_db_admin: AstraDBAdmin | None = None,
    ) -> None:
        # lazy import here to avoid circular dependency
        from astrapy.database import Database

        if api_options.environment not in Environment.astra_db_values:
            raise InvalidEnvironmentException(
                "Environments outside of Astra DB are not supported."
            )

        self.api_options = api_options
        self.api_endpoint = api_endpoint
        parsed_api_endpoint = parse_api_endpoint(self.api_endpoint)
        if parsed_api_endpoint is None:
            msg = api_endpoint_parsing_error_message(self.api_endpoint)
            raise ValueError(msg)
        self._database_id = parsed_api_endpoint.database_id
        self._region = parsed_api_endpoint.region
        if parsed_api_endpoint.environment != self.api_options.environment:
            raise InvalidEnvironmentException(
                "Environment mismatch between client and provided "
                "API endpoint. You can try adding "
                f'`environment="{parsed_api_endpoint.environment}"` '
                "to the class constructor."
            )
        if spawner_database is not None:
            self.spawner_database = spawner_database
        else:
            # leaving the keyspace to its per-environment default
            # (a task for the Database)
            self.spawner_database = Database(
                api_endpoint=self.api_endpoint,
                keyspace=None,
                api_options=self.api_options,
            )

        # API-commander-specific init (for the vectorizeOps invocations)
        # even if Data API, this is admin and must use the Admin additional headers:
        self._commander_headers = {
            DEFAULT_DATA_API_AUTH_HEADER: self.api_options.token.get_token(),
            **self.api_options.admin_additional_headers,
        }
        self._api_commander = self._get_api_commander()

        # DevOps-API-commander specific init (keyspace CRUD, etc)
        self._dev_ops_commander_headers: dict[str, str | None]
        if self.api_options.token:
            _token = self.api_options.token.get_token()
            self._dev_ops_commander_headers = {
                DEFAULT_DEV_OPS_AUTH_HEADER: f"{DEFAULT_DEV_OPS_AUTH_PREFIX}{_token}",
                **self.api_options.admin_additional_headers,
            }
        else:
            self._dev_ops_commander_headers = {
                **self.api_options.admin_additional_headers,
            }
        self._dev_ops_api_commander = self._get_dev_ops_api_commander()

        # this class keeps a reference to the AstraDBAdmin associated to this org:
        if spawner_astra_db_admin is None:
            self._astra_db_admin = AstraDBAdmin(api_options=self.api_options)
        else:
            self._astra_db_admin = spawner_astra_db_admin

    def __repr__(self) -> str:
        parts = [
            f'api_endpoint="{self.api_endpoint}"',
            f"api_options={self.api_options}",
        ]
        return f"{self.__class__.__name__}({', '.join(parts)})"

    def __eq__(self, other: Any) -> bool:
        if isinstance(other, AstraDBDatabaseAdmin):
            return all(
                [
                    self.api_endpoint == other.api_endpoint,
                    self.api_options == other.api_options,
                ]
            )
        else:
            return False

    def _get_api_commander(self) -> APICommander:
        """Instantiate a new APICommander for Data API calls."""
        base_path_components = [
            comp
            for comp in (
                ncomp.strip("/")
                for ncomp in (
                    self.api_options.data_api_url_options.api_path,
                    self.api_options.data_api_url_options.api_version,
                )
                if ncomp is not None
            )
            if comp != ""
        ]
        base_path = f"/{'/'.join(base_path_components)}"
        api_commander = APICommander(
            api_endpoint=self.api_endpoint,
            path=base_path,
            headers=self._commander_headers,
            callers=self.api_options.callers,
            redacted_header_names=self.api_options.redacted_header_names,
        )
        return api_commander

    def _get_dev_ops_api_commander(self) -> APICommander:
        """Instantiate a new APICommander for DevOps calls."""
        base_path_components = [
            comp
            for comp in (
                ncomp.strip("/")
                for ncomp in (
                    self.api_options.dev_ops_api_url_options.dev_ops_api_version,
                    "databases",
                    self._database_id,
                )
                if ncomp is not None
            )
            if comp != ""
        ]
        dev_ops_base_path = "/".join(base_path_components)
        dev_ops_commander = APICommander(
            api_endpoint=self.api_options.dev_ops_api_url_options.dev_ops_url,
            path=dev_ops_base_path,
            headers=self._dev_ops_commander_headers,
            callers=self.api_options.callers,
            dev_ops_api=True,
            redacted_header_names=self.api_options.redacted_header_names,
        )
        return dev_ops_commander

    def _copy(
        self,
        *,
        token: str | TokenProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> AstraDBDatabaseAdmin:
        arg_api_options = APIOptions(
            token=token,
        )
        final_api_options = self.api_options.with_override(api_options).with_override(
            arg_api_options
        )
        return AstraDBDatabaseAdmin(
            api_endpoint=self.api_endpoint,
            api_options=final_api_options,
            spawner_database=self.spawner_database,
            spawner_astra_db_admin=self._astra_db_admin,
        )

    def with_options(
        self,
        *,
        token: str | TokenProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> AstraDBDatabaseAdmin:
        """
        Create a clone of this AstraDBDatabaseAdmin with some changed attributes.

        Args:
            token: an Access Token to the database. Example: `"AstraCS:xyz..."`.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            api_options: any additional options to set for the clone, in the form of
                an APIOptions instance (where one can set just the needed attributes).
                In case the same setting is also provided as named parameter,
                the latter takes precedence.

        Returns:
            a new AstraDBDatabaseAdmin instance.

        Example:
            >>> admin_for_my_other_db = admin_for_my_db.with_options(
            ...     "https://<ID>-<REGION>.apps.astra.datastax.com",
            ... )
        """

        return self._copy(
            token=token,
            api_options=api_options,
        )

    @property
    def id(self) -> str:
        """
        The ID of this database admin.

        Example:
            >>> my_db_admin.id
            '01234567-89ab-cdef-0123-456789abcdef'
        """
        return self._database_id

    @property
    def region(self) -> str:
        """
        The region for this database admin.

        Example:
            >>> my_db_admin.region
            'us-east-1'
        """
        return self._region

    @staticmethod
    def from_astra_db_admin(
        api_endpoint: str,
        *,
        astra_db_admin: AstraDBAdmin,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> AstraDBDatabaseAdmin:
        """
        Create an AstraDBDatabaseAdmin from an AstraDBAdmin and an API Endpoint.

        Args:
            api_endpoint: the API Endpoint for the target database
                (e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`).
                The database must exist already for the resulting object
                to be effectively used; in other words, this invocation
                does not create the database, just the object instance.
            astra_db_admin: an AstraDBAdmin object that has visibility over
                the target database.
            spawn_api_options: a specification - complete or partial - of the
                API Options to override the defaults inherited from the AstraDBAdmin.
                This allows for a deeper configuration of the database, e.g.
                concerning timeouts.

        Returns:
            An AstraDBDatabaseAdmin object, for admin work within the database.

        Example:
            >>> from astrapy import DataAPIClient, AstraDBDatabaseAdmin
            >>> admin_for_my_db = AstraDBDatabaseAdmin.from_astra_db_admin(
            ...     "https://<ID>-<REGION>.apps.astra.datastax.com",
            ...     astra_db_admin=DataAPIClient("AstraCS:...").get_admin(),
            ... )
            >>> admin_for_my_db.list_keyspaces()
            ['default_keyspace', 'staging_keyspace']
            >>> admin_for_my_db.info().status
            'ACTIVE'

        Note:
            Creating an instance of AstraDBDatabaseAdmin does not trigger actual creation
            of the database itself, which should exist beforehand. To create databases,
            see the AstraDBAdmin class.
        """

        return AstraDBDatabaseAdmin(
            api_endpoint=api_endpoint,
            api_options=astra_db_admin.api_options.with_override(spawn_api_options),
            spawner_astra_db_admin=astra_db_admin,
        )

    def info(
        self,
        *,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> AstraDBAdminDatabaseInfo:
        """
        Query the DevOps API for the full info on this database.

        Args:
            database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `database_admin_timeout_ms`.
            timeout_ms: an alias for `database_admin_timeout_ms`.

        Returns:
            An AstraDBAdminDatabaseInfo object.

        Example:
            >>> my_db_info = admin_for_my_db.info()
            >>> my_db_info.status
            'ACTIVE'
            >>> my_db_info.info.region
            'us-east1'
        """

        logger.info(f"getting info ('{self._database_id}')")
        req_response = self._astra_db_admin.database_info(
            id=self._database_id,
            database_admin_timeout_ms=database_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info(f"finished getting info ('{self._database_id}')")
        return req_response

    async def async_info(
        self,
        *,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> AstraDBAdminDatabaseInfo:
        """
        Query the DevOps API for the full info on this database.
        Async version of the method, for use in an asyncio context.

        Args:
            database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `database_admin_timeout_ms`.
            timeout_ms: an alias for `database_admin_timeout_ms`.

        Returns:
            An AstraDBAdminDatabaseInfo object.

        Example:
            >>> async def wait_until_active(db_admin: AstraDBDatabaseAdmin) -> None:
            ...     while True:
            ...         info = await db_admin.async_info()
            ...         if info.status == "ACTIVE":
            ...             return
            ...
            >>> asyncio.run(wait_until_active(admin_for_my_db))
        """

        logger.info(f"getting info ('{self._database_id}'), async")
        req_response = await self._astra_db_admin.async_database_info(
            id=self._database_id,
            database_admin_timeout_ms=database_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info(f"finished getting info ('{self._database_id}'), async")
        return req_response

    def list_keyspaces(
        self,
        *,
        keyspace_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> list[str]:
        """
        Query the DevOps API for a list of the keyspaces in the database.

        Args:
            keyspace_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `keyspace_admin_timeout_ms`.
            timeout_ms: an alias for `keyspace_admin_timeout_ms`.

        Returns:
            A list of the keyspaces, each a string, in no particular order.

        Example:
            >>> admin_for_my_db.list_keyspaces()
            ['default_keyspace', 'staging_keyspace']
        """

        logger.info(f"getting keyspaces ('{self._database_id}')")
        info = self.info(
            database_admin_timeout_ms=keyspace_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info(f"finished getting keyspaces ('{self._database_id}')")
        if info.raw is None:
            raise DevOpsAPIException("Could not get the keyspace list.")
        else:
            return info.raw.get("info", {}).get("keyspaces") or []

    async def async_list_keyspaces(
        self,
        *,
        keyspace_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> list[str]:
        """
        Query the DevOps API for a list of the keyspaces in the database.
        Async version of the method, for use in an asyncio context.

        Args:
            keyspace_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `keyspace_admin_timeout_ms`.
            timeout_ms: an alias for `keyspace_admin_timeout_ms`.

        Returns:
            A list of the keyspaces, each a string, in no particular order.

        Example:
            >>> async def check_if_ks_exists(
            ...     db_admin: AstraDBDatabaseAdmin, keyspace: str
            ... ) -> bool:
            ...     ks_list = await db_admin.async_list_keyspaces()
            ...     return keyspace in ks_list
            ...
            >>> asyncio.run(check_if_ks_exists(admin_for_my_db, "dragons"))
            False
            >>> asyncio.run(check_if_db_exists(admin_for_my_db, "app_keyspace"))
            True
        """

        logger.info(f"getting keyspaces ('{self._database_id}'), async")
        info = await self.async_info(
            database_admin_timeout_ms=keyspace_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info(f"finished getting keyspaces ('{self._database_id}'), async")
        if info.raw is None:
            raise DevOpsAPIException("Could not get the keyspace list.")
        else:
            return info.raw.get("info", {}).get("keyspaces") or []

    def create_keyspace(
        self,
        name: str,
        *,
        wait_until_active: bool = True,
        update_db_keyspace: bool | None = None,
        keyspace_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
        **kwargs: Any,
    ) -> None:
        """
        Create a keyspace in this database as requested,
        optionally waiting for it to be ready.

        Args:
            name: the keyspace name. If supplying a keyspace that exists
                already, the method call proceeds as usual, no errors are
                raised, and the whole invocation is a no-op.
            wait_until_active: if True (default), the method returns only after
                the target database is in ACTIVE state again (a few
                seconds, usually). If False, it will return right after issuing the
                creation request to the DevOps API, and it will be responsibility
                of the caller to check the database status/keyspace availability
                before working with it.
            update_db_keyspace: if True, the `Database` or `AsyncDatabase` class
                that spawned this DatabaseAdmin, if any, gets updated to work on
                the newly-created keyspace starting when this method returns.
            keyspace_admin_timeout_ms: a timeout, in milliseconds, for the whole
                requested operation to complete. This is used only
                if `wait_until_active` is true, i.e. if the method call must
                wait and keep querying the DevOps API for the status of the
                database during keyspace creation.
            request_timeout_ms: a timeout, in milliseconds, for
                each underlying DevOps API HTTP request.
            timeout_ms: an alias for *both* the `request_timeout_ms` and
                `keyspace_admin_timeout_ms` timeout parameters. In practice,
                regardless of `wait_until_active`, this parameter dictates an
                overall timeout on this method call.

        Note: a timeout event is no guarantee at all that the
        creation request has not reached the API server and is not going
        to be, in fact, honored.

        Example:
            >>> my_db_admin.keyspaces()
            ['default_keyspace']
            >>> my_db_admin.create_keyspace("that_other_one")
            >>> my_db_admin.list_keyspaces()
            ['default_keyspace', 'that_other_one']
        """

        _keyspace_admin_timeout_ms, _ka_label = _first_valid_timeout(
            (keyspace_admin_timeout_ms, "keyspace_admin_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (
                self.api_options.timeout_options.keyspace_admin_timeout_ms,
                "keyspace_admin_timeout_ms",
            ),
        )
        _request_timeout_ms, _rt_label = _first_valid_timeout(
            (request_timeout_ms, "request_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
        )
        timeout_manager = MultiCallTimeoutManager(
            overall_timeout_ms=_keyspace_admin_timeout_ms,
            dev_ops_api=True,
            timeout_label=_ka_label,
        )
        logger.info(
            f"creating keyspace '{name}' on " f"'{self._database_id}' (DevOps API)"
        )
        cn_raw_response = self._dev_ops_api_commander.raw_request(
            http_method=HttpMethod.POST,
            additional_path=f"keyspaces/{name}",
            timeout_context=timeout_manager.remaining_timeout(
                cap_time_ms=_request_timeout_ms,
                cap_timeout_label=_rt_label,
            ),
        )
        if cn_raw_response.status_code != DEV_OPS_RESPONSE_HTTP_CREATED:
            raise DevOpsAPIException(
                f"keyspace creation ('{name}') failed: API returned HTTP "
                f"{cn_raw_response.status_code} instead of "
                f"{DEV_OPS_RESPONSE_HTTP_CREATED} - Created."
            )
        logger.info(
            "DevOps API returned from creating keyspace "
            f"'{name}' on '{self._database_id}'"
        )
        if wait_until_active:
            last_status_seen = DEV_OPS_DATABASE_STATUS_MAINTENANCE
            while last_status_seen == DEV_OPS_DATABASE_STATUS_MAINTENANCE:
                logger.info(f"sleeping to poll for status of '{self._database_id}'")
                time.sleep(DEV_OPS_KEYSPACE_POLL_INTERVAL_S)
                last_status_seen = self._astra_db_admin._database_info_ctx(
                    id=self._database_id,
                    timeout_context=timeout_manager.remaining_timeout(
                        cap_time_ms=_request_timeout_ms,
                        cap_timeout_label=_rt_label,
                    ),
                ).status
            if last_status_seen != DEV_OPS_DATABASE_STATUS_ACTIVE:
                raise DevOpsAPIException(
                    f"Database entered unexpected status {last_status_seen} after MAINTENANCE."
                )
            # is the keyspace found?
            if name not in self.list_keyspaces():
                raise DevOpsAPIException("Could not create the keyspace.")
        logger.info(
            f"finished creating keyspace '{name}' on "
            f"'{self._database_id}' (DevOps API)"
        )
        if update_db_keyspace:
            self.spawner_database.use_keyspace(name)

    async def async_create_keyspace(
        self,
        name: str,
        *,
        wait_until_active: bool = True,
        update_db_keyspace: bool | None = None,
        keyspace_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
        **kwargs: Any,
    ) -> None:
        """
        Create a keyspace in this database as requested,
        optionally waiting for it to be ready.
        Async version of the method, for use in an asyncio context.

        Args:
            name: the keyspace name. If supplying a keyspace that exists
                already, the method call proceeds as usual, no errors are
                raised, and the whole invocation is a no-op.
            wait_until_active: if True (default), the method returns only after
                the target database is in ACTIVE state again (a few
                seconds, usually). If False, it will return right after issuing the
                creation request to the DevOps API, and it will be responsibility
                of the caller to check the database status/keyspace availability
                before working with it.
            update_db_keyspace: if True, the `Database` or `AsyncDatabase` class
                that spawned this DatabaseAdmin, if any, gets updated to work on
                the newly-created keyspace starting when this method returns.
            keyspace_admin_timeout_ms: a timeout, in milliseconds, for the whole
                requested operation to complete. This is used only
                if `wait_until_active` is true, i.e. if the method call must
                wait and keep querying the DevOps API for the status of the
                database during keyspace creation.
            request_timeout_ms: a timeout, in milliseconds, for
                each underlying DevOps API HTTP request.
            timeout_ms: an alias for *both* the `request_timeout_ms` and
                `keyspace_admin_timeout_ms` timeout parameters. In practice,
                regardless of `wait_until_active`, this parameter dictates an
                overall timeout on this method call.

        Note: a timeout event is no guarantee at all that the
        creation request has not reached the API server and is not going
        to be, in fact, honored.

        Example:
            >>> asyncio.run(
            ...     my_db_admin.async_create_keyspace("app_keyspace")
            ... )
        """

        _keyspace_admin_timeout_ms, _ka_label = _first_valid_timeout(
            (keyspace_admin_timeout_ms, "keyspace_admin_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (
                self.api_options.timeout_options.keyspace_admin_timeout_ms,
                "keyspace_admin_timeout_ms",
            ),
        )
        _request_timeout_ms, _rt_label = _first_valid_timeout(
            (request_timeout_ms, "request_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
        )
        timeout_manager = MultiCallTimeoutManager(
            overall_timeout_ms=_keyspace_admin_timeout_ms,
            dev_ops_api=True,
            timeout_label=_ka_label,
        )
        logger.info(
            f"creating keyspace '{name}' on "
            f"'{self._database_id}' (DevOps API), async"
        )
        cn_raw_response = await self._dev_ops_api_commander.async_raw_request(
            http_method=HttpMethod.POST,
            additional_path=f"keyspaces/{name}",
            timeout_context=timeout_manager.remaining_timeout(
                cap_time_ms=_request_timeout_ms,
                cap_timeout_label=_rt_label,
            ),
        )
        if cn_raw_response.status_code != DEV_OPS_RESPONSE_HTTP_CREATED:
            raise DevOpsAPIException(
                f"keyspace creation ('{name}') failed: API returned HTTP "
                f"{cn_raw_response.status_code} instead of "
                f"{DEV_OPS_RESPONSE_HTTP_CREATED} - Created."
            )
        logger.info(
            f"DevOps API returned from creating keyspace "
            f"'{name}' on '{self._database_id}', async"
        )
        if wait_until_active:
            last_status_seen = DEV_OPS_DATABASE_STATUS_MAINTENANCE
            while last_status_seen == DEV_OPS_DATABASE_STATUS_MAINTENANCE:
                logger.info(
                    f"sleeping to poll for status of '{self._database_id}', async"
                )
                await asyncio.sleep(DEV_OPS_KEYSPACE_POLL_INTERVAL_S)
                last_db_info = await self._astra_db_admin._async_database_info_ctx(
                    id=self._database_id,
                    timeout_context=timeout_manager.remaining_timeout(
                        cap_time_ms=_request_timeout_ms,
                        cap_timeout_label=_rt_label,
                    ),
                )
                last_status_seen = last_db_info.status
            if last_status_seen != DEV_OPS_DATABASE_STATUS_ACTIVE:
                raise DevOpsAPIException(
                    f"Database entered unexpected status {last_status_seen} after MAINTENANCE."
                )
            # is the keyspace found?
            if name not in await self.async_list_keyspaces():
                raise DevOpsAPIException("Could not create the keyspace.")
        logger.info(
            f"finished creating keyspace '{name}' on "
            f"'{self._database_id}' (DevOps API), async"
        )
        if update_db_keyspace:
            self.spawner_database.use_keyspace(name)

    def drop_keyspace(
        self,
        name: str,
        *,
        wait_until_active: bool = True,
        keyspace_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Delete a keyspace from the database, optionally waiting for the database
        to become active again.

        Args:
            name: the keyspace to delete. If it does not exist in this database,
                an error is raised.
            wait_until_active: if True (default), the method returns only after
                the target database is in ACTIVE state again (a few
                seconds, usually). If False, it will return right after issuing the
                deletion request to the DevOps API, and it will be responsibility
                of the caller to check the database status/keyspace availability
                before working with it.
            keyspace_admin_timeout_ms: a timeout, in milliseconds, for the whole
                requested operation to complete. This is used only
                if `wait_until_active` is true, i.e. if the method call must
                wait and keep querying the DevOps API for the status of the
                database during keyspace deletion.
            request_timeout_ms: a timeout, in milliseconds, for
                each underlying DevOps API HTTP request.
            timeout_ms: an alias for *both* the `request_timeout_ms` and
                `keyspace_admin_timeout_ms` timeout parameters. In practice,
                regardless of `wait_until_active`, this parameter dictates an
                overall timeout on this method call.

        Note: a timeout event is no guarantee at all that the
        deletion request has not reached the API server and is not going
        to be, in fact, honored.

        Example:
            >>> my_db_admin.list_keyspaces()
            ['default_keyspace', 'that_other_one']
            >>> my_db_admin.drop_keyspace("that_other_one")
            >>> my_db_admin.list_keyspaces()
            ['default_keyspace']
        """

        _keyspace_admin_timeout_ms, _ka_label = _first_valid_timeout(
            (keyspace_admin_timeout_ms, "keyspace_admin_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (
                self.api_options.timeout_options.keyspace_admin_timeout_ms,
                "keyspace_admin_timeout_ms",
            ),
        )
        _request_timeout_ms, _rt_label = _first_valid_timeout(
            (request_timeout_ms, "request_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
        )
        timeout_manager = MultiCallTimeoutManager(
            overall_timeout_ms=_keyspace_admin_timeout_ms,
            dev_ops_api=True,
            timeout_label=_ka_label,
        )
        logger.info(
            f"dropping keyspace '{name}' on " f"'{self._database_id}' (DevOps API)"
        )
        dk_raw_response = self._dev_ops_api_commander.raw_request(
            http_method=HttpMethod.DELETE,
            additional_path=f"keyspaces/{name}",
            timeout_context=timeout_manager.remaining_timeout(
                cap_time_ms=_request_timeout_ms,
                cap_timeout_label=_rt_label,
            ),
        )
        if dk_raw_response.status_code != DEV_OPS_RESPONSE_HTTP_ACCEPTED:
            raise DevOpsAPIException(
                f"keyspace deletion ('{id}') failed: API returned HTTP "
                f"{dk_raw_response.status_code} instead of "
                f"{DEV_OPS_RESPONSE_HTTP_ACCEPTED} - Created"
            )
        logger.info(
            "DevOps API returned from dropping keyspace "
            f"'{name}' on '{self._database_id}'"
        )
        if wait_until_active:
            last_status_seen = DEV_OPS_DATABASE_STATUS_MAINTENANCE
            while last_status_seen == DEV_OPS_DATABASE_STATUS_MAINTENANCE:
                logger.info(f"sleeping to poll for status of '{self._database_id}'")
                time.sleep(DEV_OPS_KEYSPACE_POLL_INTERVAL_S)
                last_status_seen = self._astra_db_admin._database_info_ctx(
                    id=self._database_id,
                    timeout_context=timeout_manager.remaining_timeout(
                        cap_time_ms=_request_timeout_ms,
                        cap_timeout_label=_rt_label,
                    ),
                ).status
            if last_status_seen != DEV_OPS_DATABASE_STATUS_ACTIVE:
                raise DevOpsAPIException(
                    f"Database entered unexpected status {last_status_seen} after MAINTENANCE."
                )
            # is the keyspace found?
            if name in self.list_keyspaces():
                raise DevOpsAPIException("Could not drop the keyspace.")
        logger.info(
            f"finished dropping keyspace '{name}' on "
            f"'{self._database_id}' (DevOps API)"
        )

    async def async_drop_keyspace(
        self,
        name: str,
        *,
        wait_until_active: bool = True,
        keyspace_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Delete a keyspace from the database, optionally waiting for the database
        to become active again.
        Async version of the method, for use in an asyncio context.

        Args:
            name: the keyspace to delete. If it does not exist in this database,
                an error is raised.
            wait_until_active: if True (default), the method returns only after
                the target database is in ACTIVE state again (a few
                seconds, usually). If False, it will return right after issuing the
                deletion request to the DevOps API, and it will be responsibility
                of the caller to check the database status/keyspace availability
                before working with it.
            keyspace_admin_timeout_ms: a timeout, in milliseconds, for the whole
                requested operation to complete. This is used only
                if `wait_until_active` is true, i.e. if the method call must
                wait and keep querying the DevOps API for the status of the
                database during keyspace deletion.
            request_timeout_ms: a timeout, in milliseconds, for
                each underlying DevOps API HTTP request.
            timeout_ms: an alias for *both* the `request_timeout_ms` and
                `keyspace_admin_timeout_ms` timeout parameters. In practice,
                regardless of `wait_until_active`, this parameter dictates an
                overall timeout on this method call.

        Note: a timeout event is no guarantee at all that the
        deletion request has not reached the API server and is not going
        to be, in fact, honored.

        Example:
            >>> asyncio.run(
            ...     my_db_admin.async_drop_keyspace("app_keyspace")
            ... )
        """

        _keyspace_admin_timeout_ms, _ka_label = _first_valid_timeout(
            (keyspace_admin_timeout_ms, "keyspace_admin_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (
                self.api_options.timeout_options.keyspace_admin_timeout_ms,
                "keyspace_admin_timeout_ms",
            ),
        )
        _request_timeout_ms, _rt_label = _first_valid_timeout(
            (request_timeout_ms, "request_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
        )
        timeout_manager = MultiCallTimeoutManager(
            overall_timeout_ms=_keyspace_admin_timeout_ms,
            dev_ops_api=True,
            timeout_label=_ka_label,
        )
        logger.info(
            f"dropping keyspace '{name}' on "
            f"'{self._database_id}' (DevOps API), async"
        )
        dk_raw_response = await self._dev_ops_api_commander.async_raw_request(
            http_method=HttpMethod.DELETE,
            additional_path=f"keyspaces/{name}",
            timeout_context=timeout_manager.remaining_timeout(
                cap_time_ms=_request_timeout_ms,
                cap_timeout_label=_rt_label,
            ),
        )
        if dk_raw_response.status_code != DEV_OPS_RESPONSE_HTTP_ACCEPTED:
            raise DevOpsAPIException(
                f"keyspace deletion ('{id}') failed: API returned HTTP "
                f"{dk_raw_response.status_code} instead of "
                f"{DEV_OPS_RESPONSE_HTTP_ACCEPTED} - Created"
            )
        logger.info(
            f"DevOps API returned from dropping keyspace "
            f"'{name}' on '{self._database_id}', async"
        )
        if wait_until_active:
            last_status_seen = DEV_OPS_DATABASE_STATUS_MAINTENANCE
            while last_status_seen == DEV_OPS_DATABASE_STATUS_MAINTENANCE:
                logger.info(
                    f"sleeping to poll for status of '{self._database_id}', async"
                )
                await asyncio.sleep(DEV_OPS_KEYSPACE_POLL_INTERVAL_S)
                last_db_info = await self._astra_db_admin._async_database_info_ctx(
                    id=self._database_id,
                    timeout_context=timeout_manager.remaining_timeout(
                        cap_time_ms=_request_timeout_ms,
                        cap_timeout_label=_rt_label,
                    ),
                )
                last_status_seen = last_db_info.status
            if last_status_seen != DEV_OPS_DATABASE_STATUS_ACTIVE:
                raise DevOpsAPIException(
                    f"Database entered unexpected status {last_status_seen} after MAINTENANCE."
                )
            # is the keyspace found?
            if name in await self.async_list_keyspaces():
                raise DevOpsAPIException("Could not drop the keyspace.")
        logger.info(
            f"finished dropping keyspace '{name}' on "
            f"'{self._database_id}' (DevOps API), async"
        )

    def drop(
        self,
        *,
        wait_until_active: bool = True,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Drop this database, i.e. delete it completely and permanently with all its data.

        This method wraps the `drop_database` method of the AstraDBAdmin class,
        where more information may be found.

        Args:
            wait_until_active: if True (default), the method returns only after
                the database has actually been deleted (generally a few minutes).
                If False, it will return right after issuing the
                drop request to the DevOps API, and it will be responsibility
                of the caller to check the database status/availability
                after that, if desired.
            database_admin_timeout_ms: a timeout, in milliseconds, for the whole
                requested operation to complete. This is used only
                if `wait_until_active` is true, i.e. if the method call must
                wait and keep querying the DevOps API for the status of the
                newly-deleted database.
            request_timeout_ms: a timeout, in milliseconds, for
                each underlying DevOps API HTTP request.
            timeout_ms: an alias for *both* the `request_timeout_ms` and
                `database_admin_timeout_ms` timeout parameters. In practice,
                regardless of `wait_until_active`, this parameter dictates an
                overall timeout on this method call.

        Note: a timeout event is no guarantee at all that the
        deletion request has not reached the API server and is not going
        to be, in fact, honored.

        Example:
            >>> my_db_admin.list_keyspaces()
            ['default_keyspace', 'that_other_one']
            >>> my_db_admin.drop()
            >>> my_db_admin.list_keyspaces()  # raises a 404 Not Found http error

        Note:
            Once the method succeeds, methods on this object -- such as `info()`,
            or `list_keyspaces()` -- can still be invoked: however, this hardly
            makes sense as the underlying actual database is no more.
            It is responsibility of the developer to design a correct flow
            which avoids using a deceased database any further.
        """

        logger.info(f"dropping this database ('{self._database_id}')")
        return self._astra_db_admin.drop_database(
            id=self._database_id,
            wait_until_active=wait_until_active,
            database_admin_timeout_ms=database_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info(f"finished dropping this database ('{self._database_id}')")

    async def async_drop(
        self,
        *,
        wait_until_active: bool = True,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Drop this database, i.e. delete it completely and permanently with all its data.
        Async version of the method, for use in an asyncio context.

        This method wraps the `drop_database` method of the AstraDBAdmin class,
        where more information may be found.

        Args:
            wait_until_active: if True (default), the method returns only after
                the database has actually been deleted (generally a few minutes).
                If False, it will return right after issuing the
                drop request to the DevOps API, and it will be responsibility
                of the caller to check the database status/availability
                after that, if desired.
            database_admin_timeout_ms: a timeout, in milliseconds, for the whole
                requested operation to complete. This is used only
                if `wait_until_active` is true, i.e. if the method call must
                wait and keep querying the DevOps API for the status of the
                newly-deleted database.
            request_timeout_ms: a timeout, in milliseconds, for
                each underlying DevOps API HTTP request.
            timeout_ms: an alias for *both* the `request_timeout_ms` and
                `database_admin_timeout_ms` timeout parameters. In practice,
                regardless of `wait_until_active`, this parameter dictates an
                overall timeout on this method call.

        Note: a timeout event is no guarantee at all that the
        deletion request has not reached the API server and is not going
        to be, in fact, honored.

        Example:
            >>> asyncio.run(my_db_admin.async_drop())

        Note:
            Once the method succeeds, methods on this object -- such as `info()`,
            or `list_keyspaces()` -- can still be invoked: however, this hardly
            makes sense as the underlying actual database is no more.
            It is responsibility of the developer to design a correct flow
            which avoids using a deceased database any further.
        """

        logger.info(f"dropping this database ('{self._database_id}'), async")
        return await self._astra_db_admin.async_drop_database(
            id=self._database_id,
            wait_until_active=wait_until_active,
            database_admin_timeout_ms=database_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info(f"finished dropping this database ('{self._database_id}'), async")

    def get_database(
        self,
        *,
        keyspace: str | None = None,
        token: str | TokenProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> Database:
        """
        Create a Database instance from this database admin, for data-related tasks.

        Args:
            keyspace: if provided, it is passed to the Database; otherwise
                the Database class will apply an environment-specific default.
            token: if supplied, is passed to the Database instead of
                the one set for this object. Useful if one wants to work in
                a least-privilege manner, limiting the permissions for non-admin work.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            spawn_api_options: a specification - complete or partial - of the
                API Options to override the defaults inherited from the database admin.
                This allows for a deeper configuration of the database admin, e.g.
                concerning timeouts; if this is passed together with
                the named timeout parameters, the latter will take precedence
                in their respective settings.

        Returns:
            A Database object, ready to work with data, collections and tables.

        Example:
            >>> my_db = my_db_admin.get_database()
            >>> my_db.list_collection_names()
            ['movies', 'another_collection']

        Note:
            creating an instance of Database does not trigger actual creation
            of the database itself, which should exist beforehand. To create databases,
            see the AstraDBAdmin class.
        """

        return self._astra_db_admin.get_database(
            api_endpoint=self.api_endpoint,
            keyspace=keyspace,
            token=token,
            spawn_api_options=spawn_api_options,
        )

    def get_async_database(
        self,
        *,
        keyspace: str | None = None,
        token: str | TokenProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncDatabase:
        """
        Create an AsyncDatabase instance from this database admin,
        for data-related tasks.

        Args:
            keyspace: if provided, it is passed to the AsyncDatabase; otherwise
                the AsyncDatabase class will apply an environment-specific default.
            token: if supplied, is passed to the AsyncDatabase instead of
                the one set for this object. Useful if one wants to work in
                a least-privilege manner, limiting the permissions for non-admin work.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            spawn_api_options: a specification - complete or partial - of the
                API Options to override the defaults inherited from the database admin.
                This allows for a deeper configuration of the database admin, e.g.
                concerning timeouts; if this is passed together with
                the named timeout parameters, the latter will take precedence
                in their respective settings.

        Returns:
            An AsyncDatabase object, ready to work with data, collections and tables.
        """

        return self.get_database(
            keyspace=keyspace,
            token=token,
            spawn_api_options=spawn_api_options,
        ).to_async()

    def find_embedding_providers(
        self,
        *,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> FindEmbeddingProvidersResult:
        """
        Query the API for the full information on available embedding providers.

        Args:
            database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `database_admin_timeout_ms`.
            timeout_ms: an alias for `database_admin_timeout_ms`.

        Returns:
            A `FindEmbeddingProvidersResult` object with the complete information
            returned by the API about available embedding providers

        Example (output abridged and indented for clarity):
            >>> admin_for_my_db.find_embedding_providers()
            FindEmbeddingProvidersResult(embedding_providers=..., openai, ...)
            >>> admin_for_my_db.find_embedding_providers().embedding_providers
            {
                'openai': EmbeddingProvider(
                    display_name='OpenAI',
                    models=[
                        EmbeddingProviderModel(name='text-embedding-3-small'),
                        ...
                    ]
                ),
                ...
            }
        """

        _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
            timeout_options=self.api_options.timeout_options,
            database_admin_timeout_ms=database_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info("findEmbeddingProviders")
        fe_response = self._api_commander.request(
            payload={"findEmbeddingProviders": {}},
            timeout_context=_TimeoutContext(
                request_ms=_database_admin_timeout_ms, label=_da_label
            ),
        )
        if "embeddingProviders" not in fe_response.get("status", {}):
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from findEmbeddingProviders API command.",
                raw_response=fe_response,
            )
        else:
            logger.info("finished findEmbeddingProviders")
            return FindEmbeddingProvidersResult._from_dict(fe_response["status"])

    async def async_find_embedding_providers(
        self,
        *,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> FindEmbeddingProvidersResult:
        """
        Query the API for the full information on available embedding providers.
        Async version of the method, for use in an asyncio context.

        Args:
            database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `database_admin_timeout_ms`.
            timeout_ms: an alias for `database_admin_timeout_ms`.

        Returns:
            A `FindEmbeddingProvidersResult` object with the complete information
            returned by the API about available embedding providers

        Example (output abridged and indented for clarity):
            >>> asyncio.run(admin_for_my_db.find_embedding_providers())
            FindEmbeddingProvidersResult(embedding_providers=..., openai, ...)
            >>> asyncio.run(
            ...     admin_for_my_db.find_embedding_providers()
            ... ).embedding_providers
            {
                'openai': EmbeddingProvider(
                    display_name='OpenAI',
                    models=[
                        EmbeddingProviderModel(name='text-embedding-3-small'),
                        ...
                    ]
                ),
                ...
            }
        """

        _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
            timeout_options=self.api_options.timeout_options,
            database_admin_timeout_ms=database_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info("findEmbeddingProviders, async")
        fe_response = await self._api_commander.async_request(
            payload={"findEmbeddingProviders": {}},
            timeout_context=_TimeoutContext(
                request_ms=_database_admin_timeout_ms, label=_da_label
            ),
        )
        if "embeddingProviders" not in fe_response.get("status", {}):
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from findEmbeddingProviders API command.",
                raw_response=fe_response,
            )
        else:
            logger.info("finished findEmbeddingProviders, async")
            return FindEmbeddingProvidersResult._from_dict(fe_response["status"])

    def find_reranking_providers(
        self,
        *,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> FindRerankingProvidersResult:
        """
        Query the API for the full information on available reranking providers.

        Args:
            database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `database_admin_timeout_ms`.
            timeout_ms: an alias for `database_admin_timeout_ms`.

        Returns:
            A `FindRerankingProvidersResult` object with the complete information
            returned by the API about available reranking providers

        Example (output abridged and indented for clarity):
            >>> admin_for_my_db.find_reranking_providers()
            FindRerankingProvidersResult(reranking_providers=nvidia)
            >>> admin_for_my_db.find_reranking_providers().reranking_providers
            {
                'nvidia': RerankingProvider(
                    <Default>
                    display_name='Nvidia',
                    models=[
                        RerankingProviderModel(
                            <Default>
                            name='nvidia/llama-3.2-nv-rerankqa-1b-v2'
                        ),
                        ...
                    ]
                ),
                ...
            }
        """

        _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
            timeout_options=self.api_options.timeout_options,
            database_admin_timeout_ms=database_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info("findRerankingProviders")
        fr_response = self._api_commander.request(
            payload={"findRerankingProviders": {}},
            timeout_context=_TimeoutContext(
                request_ms=_database_admin_timeout_ms, label=_da_label
            ),
        )
        if "rerankingProviders" not in fr_response.get("status", {}):
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from findRerankingProviders API command.",
                raw_response=fr_response,
            )
        else:
            logger.info("finished findRerankingProviders")
            return FindRerankingProvidersResult._from_dict(fr_response["status"])

    async def async_find_reranking_providers(
        self,
        *,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> FindRerankingProvidersResult:
        """
        Query the API for the full information on available reranking providers.
        Async version of the method, for use in an asyncio context.

        Args:
            database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `database_admin_timeout_ms`.
            timeout_ms: an alias for `database_admin_timeout_ms`.

        Returns:
            A `FindRerankingProvidersResult` object with the complete information
            returned by the API about available reranking providers

        Example (output abridged and indented for clarity):
            >>> asyncio.run(admin_for_my_db.find_reranking_providers())
            FindRerankingProvidersResult(reranking_providers=nvidia)
            >>> asyncio.run(
            ...     admin_for_my_db.find_reranking_providers()
            ... ).reranking_providers
            {
                'nvidia': RerankingProvider(
                    <Default>
                    display_name='Nvidia',
                    models=[
                        RerankingProviderModel(
                            <Default>
                            name='nvidia/llama-3.2-nv-rerankqa-1b-v2'
                        ),
                        ...
                    ]
                ),
                ...
            }
        """

        _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
            timeout_options=self.api_options.timeout_options,
            database_admin_timeout_ms=database_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info("findRerankingProviders, async")
        fr_response = await self._api_commander.async_request(
            payload={"findRerankingProviders": {}},
            timeout_context=_TimeoutContext(
                request_ms=_database_admin_timeout_ms, label=_da_label
            ),
        )
        if "rerankingProviders" not in fr_response.get("status", {}):
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from findRerankingProviders API command.",
                raw_response=fr_response,
            )
        else:
            logger.info("finished findRerankingProviders, async")
            return FindRerankingProvidersResult._from_dict(fr_response["status"])

Ancestors

DatabaseAdmin
abc.ABC

Static methods

def from_astra_db_admin(api_endpoint: str, *, astra_db_admin: AstraDBAdmin, spawn_api_options: APIOptions | UnsetType = (unset)) ‑> AstraDBDatabaseAdmin

Create an AstraDBDatabaseAdmin from an AstraDBAdmin and an API Endpoint.

Args

api_endpoint: the API Endpoint for the target database (e.g. https://<ID>-<REGION>.apps.astra.datastax.com). The database must exist already for the resulting object to be effectively used; in other words, this invocation does not create the database, just the object instance.
astra_db_admin: an AstraDBAdmin object that has visibility over the target database.
spawn_api_options: a specification - complete or partial - of the API Options to override the defaults inherited from the AstraDBAdmin. This allows for a deeper configuration of the database, e.g. concerning timeouts.

Returns

An AstraDBDatabaseAdmin object, for admin work within the database.

Example

>>> from astrapy import DataAPIClient, AstraDBDatabaseAdmin
>>> admin_for_my_db = AstraDBDatabaseAdmin.from_astra_db_admin(
...     "https://<ID>-<REGION>.apps.astra.datastax.com",
...     astra_db_admin=DataAPIClient("AstraCS:...").get_admin(),
... )
>>> admin_for_my_db.list_keyspaces()
['default_keyspace', 'staging_keyspace']
>>> admin_for_my_db.info().status
'ACTIVE'

Note

Creating an instance of AstraDBDatabaseAdmin does not trigger actual creation of the database itself, which should exist beforehand. To create databases, see the AstraDBAdmin class.

Expand source code

@staticmethod
def from_astra_db_admin(
    api_endpoint: str,
    *,
    astra_db_admin: AstraDBAdmin,
    spawn_api_options: APIOptions | UnsetType = _UNSET,
) -> AstraDBDatabaseAdmin:
    """
    Create an AstraDBDatabaseAdmin from an AstraDBAdmin and an API Endpoint.

    Args:
        api_endpoint: the API Endpoint for the target database
            (e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`).
            The database must exist already for the resulting object
            to be effectively used; in other words, this invocation
            does not create the database, just the object instance.
        astra_db_admin: an AstraDBAdmin object that has visibility over
            the target database.
        spawn_api_options: a specification - complete or partial - of the
            API Options to override the defaults inherited from the AstraDBAdmin.
            This allows for a deeper configuration of the database, e.g.
            concerning timeouts.

    Returns:
        An AstraDBDatabaseAdmin object, for admin work within the database.

    Example:
        >>> from astrapy import DataAPIClient, AstraDBDatabaseAdmin
        >>> admin_for_my_db = AstraDBDatabaseAdmin.from_astra_db_admin(
        ...     "https://<ID>-<REGION>.apps.astra.datastax.com",
        ...     astra_db_admin=DataAPIClient("AstraCS:...").get_admin(),
        ... )
        >>> admin_for_my_db.list_keyspaces()
        ['default_keyspace', 'staging_keyspace']
        >>> admin_for_my_db.info().status
        'ACTIVE'

    Note:
        Creating an instance of AstraDBDatabaseAdmin does not trigger actual creation
        of the database itself, which should exist beforehand. To create databases,
        see the AstraDBAdmin class.
    """

    return AstraDBDatabaseAdmin(
        api_endpoint=api_endpoint,
        api_options=astra_db_admin.api_options.with_override(spawn_api_options),
        spawner_astra_db_admin=astra_db_admin,
    )

Instance variables

var id : str

The ID of this database admin.

Example

>>> my_db_admin.id
'01234567-89ab-cdef-0123-456789abcdef'

Expand source code

@property
def id(self) -> str:
    """
    The ID of this database admin.

    Example:
        >>> my_db_admin.id
        '01234567-89ab-cdef-0123-456789abcdef'
    """
    return self._database_id

var region : str

The region for this database admin.

Example

>>> my_db_admin.region
'us-east-1'

Expand source code

@property
def region(self) -> str:
    """
    The region for this database admin.

    Example:
        >>> my_db_admin.region
        'us-east-1'
    """
    return self._region

Methods

async def async_create_keyspace(self, name: str, *, wait_until_active: bool = True, update_db_keyspace: bool | None = None, keyspace_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None, **kwargs: Any) ‑> None

Create a keyspace in this database as requested, optionally waiting for it to be ready. Async version of the method, for use in an asyncio context.

Args

name: the keyspace name. If supplying a keyspace that exists already, the method call proceeds as usual, no errors are raised, and the whole invocation is a no-op.
wait_until_active: if True (default), the method returns only after the target database is in ACTIVE state again (a few seconds, usually). If False, it will return right after issuing the creation request to the DevOps API, and it will be responsibility of the caller to check the database status/keyspace availability before working with it.
update_db_keyspace: if True, the Database or AsyncDatabase class that spawned this DatabaseAdmin, if any, gets updated to work on the newly-created keyspace starting when this method returns.
keyspace_admin_timeout_ms: a timeout, in milliseconds, for the whole requested operation to complete. This is used only if wait_until_active is true, i.e. if the method call must wait and keep querying the DevOps API for the status of the database during keyspace creation.
request_timeout_ms: a timeout, in milliseconds, for each underlying DevOps API HTTP request.
timeout_ms: an alias for both the request_timeout_ms and keyspace_admin_timeout_ms timeout parameters. In practice, regardless of wait_until_active, this parameter dictates an overall timeout on this method call.

Note: a timeout event is no guarantee at all that the creation request has not reached the API server and is not going to be, in fact, honored.

Example

>>> asyncio.run(
...     my_db_admin.async_create_keyspace("app_keyspace")
... )

Expand source code

async def async_create_keyspace(
    self,
    name: str,
    *,
    wait_until_active: bool = True,
    update_db_keyspace: bool | None = None,
    keyspace_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
    **kwargs: Any,
) -> None:
    """
    Create a keyspace in this database as requested,
    optionally waiting for it to be ready.
    Async version of the method, for use in an asyncio context.

    Args:
        name: the keyspace name. If supplying a keyspace that exists
            already, the method call proceeds as usual, no errors are
            raised, and the whole invocation is a no-op.
        wait_until_active: if True (default), the method returns only after
            the target database is in ACTIVE state again (a few
            seconds, usually). If False, it will return right after issuing the
            creation request to the DevOps API, and it will be responsibility
            of the caller to check the database status/keyspace availability
            before working with it.
        update_db_keyspace: if True, the `Database` or `AsyncDatabase` class
            that spawned this DatabaseAdmin, if any, gets updated to work on
            the newly-created keyspace starting when this method returns.
        keyspace_admin_timeout_ms: a timeout, in milliseconds, for the whole
            requested operation to complete. This is used only
            if `wait_until_active` is true, i.e. if the method call must
            wait and keep querying the DevOps API for the status of the
            database during keyspace creation.
        request_timeout_ms: a timeout, in milliseconds, for
            each underlying DevOps API HTTP request.
        timeout_ms: an alias for *both* the `request_timeout_ms` and
            `keyspace_admin_timeout_ms` timeout parameters. In practice,
            regardless of `wait_until_active`, this parameter dictates an
            overall timeout on this method call.

    Note: a timeout event is no guarantee at all that the
    creation request has not reached the API server and is not going
    to be, in fact, honored.

    Example:
        >>> asyncio.run(
        ...     my_db_admin.async_create_keyspace("app_keyspace")
        ... )
    """

    _keyspace_admin_timeout_ms, _ka_label = _first_valid_timeout(
        (keyspace_admin_timeout_ms, "keyspace_admin_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (
            self.api_options.timeout_options.keyspace_admin_timeout_ms,
            "keyspace_admin_timeout_ms",
        ),
    )
    _request_timeout_ms, _rt_label = _first_valid_timeout(
        (request_timeout_ms, "request_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
    )
    timeout_manager = MultiCallTimeoutManager(
        overall_timeout_ms=_keyspace_admin_timeout_ms,
        dev_ops_api=True,
        timeout_label=_ka_label,
    )
    logger.info(
        f"creating keyspace '{name}' on "
        f"'{self._database_id}' (DevOps API), async"
    )
    cn_raw_response = await self._dev_ops_api_commander.async_raw_request(
        http_method=HttpMethod.POST,
        additional_path=f"keyspaces/{name}",
        timeout_context=timeout_manager.remaining_timeout(
            cap_time_ms=_request_timeout_ms,
            cap_timeout_label=_rt_label,
        ),
    )
    if cn_raw_response.status_code != DEV_OPS_RESPONSE_HTTP_CREATED:
        raise DevOpsAPIException(
            f"keyspace creation ('{name}') failed: API returned HTTP "
            f"{cn_raw_response.status_code} instead of "
            f"{DEV_OPS_RESPONSE_HTTP_CREATED} - Created."
        )
    logger.info(
        f"DevOps API returned from creating keyspace "
        f"'{name}' on '{self._database_id}', async"
    )
    if wait_until_active:
        last_status_seen = DEV_OPS_DATABASE_STATUS_MAINTENANCE
        while last_status_seen == DEV_OPS_DATABASE_STATUS_MAINTENANCE:
            logger.info(
                f"sleeping to poll for status of '{self._database_id}', async"
            )
            await asyncio.sleep(DEV_OPS_KEYSPACE_POLL_INTERVAL_S)
            last_db_info = await self._astra_db_admin._async_database_info_ctx(
                id=self._database_id,
                timeout_context=timeout_manager.remaining_timeout(
                    cap_time_ms=_request_timeout_ms,
                    cap_timeout_label=_rt_label,
                ),
            )
            last_status_seen = last_db_info.status
        if last_status_seen != DEV_OPS_DATABASE_STATUS_ACTIVE:
            raise DevOpsAPIException(
                f"Database entered unexpected status {last_status_seen} after MAINTENANCE."
            )
        # is the keyspace found?
        if name not in await self.async_list_keyspaces():
            raise DevOpsAPIException("Could not create the keyspace.")
    logger.info(
        f"finished creating keyspace '{name}' on "
        f"'{self._database_id}' (DevOps API), async"
    )
    if update_db_keyspace:
        self.spawner_database.use_keyspace(name)

async def async_drop(self, *, wait_until_active: bool = True, database_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> None

Drop this database, i.e. delete it completely and permanently with all its data. Async version of the method, for use in an asyncio context.

This method wraps the drop_database method of the AstraDBAdmin class, where more information may be found.

Args

wait_until_active: if True (default), the method returns only after the database has actually been deleted (generally a few minutes). If False, it will return right after issuing the drop request to the DevOps API, and it will be responsibility of the caller to check the database status/availability after that, if desired.
database_admin_timeout_ms: a timeout, in milliseconds, for the whole requested operation to complete. This is used only if wait_until_active is true, i.e. if the method call must wait and keep querying the DevOps API for the status of the newly-deleted database.
request_timeout_ms: a timeout, in milliseconds, for each underlying DevOps API HTTP request.
timeout_ms: an alias for both the request_timeout_ms and database_admin_timeout_ms timeout parameters. In practice, regardless of wait_until_active, this parameter dictates an overall timeout on this method call.

Note: a timeout event is no guarantee at all that the deletion request has not reached the API server and is not going to be, in fact, honored.

Example

>>> asyncio.run(my_db_admin.async_drop())

Note

Once the method succeeds, methods on this object – such as astrapy.info, or list_keyspaces() – can still be invoked: however, this hardly makes sense as the underlying actual database is no more. It is responsibility of the developer to design a correct flow which avoids using a deceased database any further.

Expand source code

async def async_drop(
    self,
    *,
    wait_until_active: bool = True,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Drop this database, i.e. delete it completely and permanently with all its data.
    Async version of the method, for use in an asyncio context.

    This method wraps the `drop_database` method of the AstraDBAdmin class,
    where more information may be found.

    Args:
        wait_until_active: if True (default), the method returns only after
            the database has actually been deleted (generally a few minutes).
            If False, it will return right after issuing the
            drop request to the DevOps API, and it will be responsibility
            of the caller to check the database status/availability
            after that, if desired.
        database_admin_timeout_ms: a timeout, in milliseconds, for the whole
            requested operation to complete. This is used only
            if `wait_until_active` is true, i.e. if the method call must
            wait and keep querying the DevOps API for the status of the
            newly-deleted database.
        request_timeout_ms: a timeout, in milliseconds, for
            each underlying DevOps API HTTP request.
        timeout_ms: an alias for *both* the `request_timeout_ms` and
            `database_admin_timeout_ms` timeout parameters. In practice,
            regardless of `wait_until_active`, this parameter dictates an
            overall timeout on this method call.

    Note: a timeout event is no guarantee at all that the
    deletion request has not reached the API server and is not going
    to be, in fact, honored.

    Example:
        >>> asyncio.run(my_db_admin.async_drop())

    Note:
        Once the method succeeds, methods on this object -- such as `info()`,
        or `list_keyspaces()` -- can still be invoked: however, this hardly
        makes sense as the underlying actual database is no more.
        It is responsibility of the developer to design a correct flow
        which avoids using a deceased database any further.
    """

    logger.info(f"dropping this database ('{self._database_id}'), async")
    return await self._astra_db_admin.async_drop_database(
        id=self._database_id,
        wait_until_active=wait_until_active,
        database_admin_timeout_ms=database_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info(f"finished dropping this database ('{self._database_id}'), async")

async def async_drop_keyspace(self, name: str, *, wait_until_active: bool = True, keyspace_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> None

Delete a keyspace from the database, optionally waiting for the database to become active again. Async version of the method, for use in an asyncio context.

Args

name: the keyspace to delete. If it does not exist in this database, an error is raised.
wait_until_active: if True (default), the method returns only after the target database is in ACTIVE state again (a few seconds, usually). If False, it will return right after issuing the deletion request to the DevOps API, and it will be responsibility of the caller to check the database status/keyspace availability before working with it.
keyspace_admin_timeout_ms: a timeout, in milliseconds, for the whole requested operation to complete. This is used only if wait_until_active is true, i.e. if the method call must wait and keep querying the DevOps API for the status of the database during keyspace deletion.
request_timeout_ms: a timeout, in milliseconds, for each underlying DevOps API HTTP request.
timeout_ms: an alias for both the request_timeout_ms and keyspace_admin_timeout_ms timeout parameters. In practice, regardless of wait_until_active, this parameter dictates an overall timeout on this method call.

Note: a timeout event is no guarantee at all that the deletion request has not reached the API server and is not going to be, in fact, honored.

Example

>>> asyncio.run(
...     my_db_admin.async_drop_keyspace("app_keyspace")
... )

Expand source code

async def async_drop_keyspace(
    self,
    name: str,
    *,
    wait_until_active: bool = True,
    keyspace_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Delete a keyspace from the database, optionally waiting for the database
    to become active again.
    Async version of the method, for use in an asyncio context.

    Args:
        name: the keyspace to delete. If it does not exist in this database,
            an error is raised.
        wait_until_active: if True (default), the method returns only after
            the target database is in ACTIVE state again (a few
            seconds, usually). If False, it will return right after issuing the
            deletion request to the DevOps API, and it will be responsibility
            of the caller to check the database status/keyspace availability
            before working with it.
        keyspace_admin_timeout_ms: a timeout, in milliseconds, for the whole
            requested operation to complete. This is used only
            if `wait_until_active` is true, i.e. if the method call must
            wait and keep querying the DevOps API for the status of the
            database during keyspace deletion.
        request_timeout_ms: a timeout, in milliseconds, for
            each underlying DevOps API HTTP request.
        timeout_ms: an alias for *both* the `request_timeout_ms` and
            `keyspace_admin_timeout_ms` timeout parameters. In practice,
            regardless of `wait_until_active`, this parameter dictates an
            overall timeout on this method call.

    Note: a timeout event is no guarantee at all that the
    deletion request has not reached the API server and is not going
    to be, in fact, honored.

    Example:
        >>> asyncio.run(
        ...     my_db_admin.async_drop_keyspace("app_keyspace")
        ... )
    """

    _keyspace_admin_timeout_ms, _ka_label = _first_valid_timeout(
        (keyspace_admin_timeout_ms, "keyspace_admin_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (
            self.api_options.timeout_options.keyspace_admin_timeout_ms,
            "keyspace_admin_timeout_ms",
        ),
    )
    _request_timeout_ms, _rt_label = _first_valid_timeout(
        (request_timeout_ms, "request_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
    )
    timeout_manager = MultiCallTimeoutManager(
        overall_timeout_ms=_keyspace_admin_timeout_ms,
        dev_ops_api=True,
        timeout_label=_ka_label,
    )
    logger.info(
        f"dropping keyspace '{name}' on "
        f"'{self._database_id}' (DevOps API), async"
    )
    dk_raw_response = await self._dev_ops_api_commander.async_raw_request(
        http_method=HttpMethod.DELETE,
        additional_path=f"keyspaces/{name}",
        timeout_context=timeout_manager.remaining_timeout(
            cap_time_ms=_request_timeout_ms,
            cap_timeout_label=_rt_label,
        ),
    )
    if dk_raw_response.status_code != DEV_OPS_RESPONSE_HTTP_ACCEPTED:
        raise DevOpsAPIException(
            f"keyspace deletion ('{id}') failed: API returned HTTP "
            f"{dk_raw_response.status_code} instead of "
            f"{DEV_OPS_RESPONSE_HTTP_ACCEPTED} - Created"
        )
    logger.info(
        f"DevOps API returned from dropping keyspace "
        f"'{name}' on '{self._database_id}', async"
    )
    if wait_until_active:
        last_status_seen = DEV_OPS_DATABASE_STATUS_MAINTENANCE
        while last_status_seen == DEV_OPS_DATABASE_STATUS_MAINTENANCE:
            logger.info(
                f"sleeping to poll for status of '{self._database_id}', async"
            )
            await asyncio.sleep(DEV_OPS_KEYSPACE_POLL_INTERVAL_S)
            last_db_info = await self._astra_db_admin._async_database_info_ctx(
                id=self._database_id,
                timeout_context=timeout_manager.remaining_timeout(
                    cap_time_ms=_request_timeout_ms,
                    cap_timeout_label=_rt_label,
                ),
            )
            last_status_seen = last_db_info.status
        if last_status_seen != DEV_OPS_DATABASE_STATUS_ACTIVE:
            raise DevOpsAPIException(
                f"Database entered unexpected status {last_status_seen} after MAINTENANCE."
            )
        # is the keyspace found?
        if name in await self.async_list_keyspaces():
            raise DevOpsAPIException("Could not drop the keyspace.")
    logger.info(
        f"finished dropping keyspace '{name}' on "
        f"'{self._database_id}' (DevOps API), async"
    )

async def async_find_embedding_providers(self, *, database_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> FindEmbeddingProvidersResult

Query the API for the full information on available embedding providers. Async version of the method, for use in an asyncio context.

Args

database_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for database_admin_timeout_ms.
timeout_ms: an alias for database_admin_timeout_ms.

Returns

A FindEmbeddingProvidersResult object with the complete information returned by the API about available embedding providers Example (output abridged and indented for clarity): >>> asyncio.run(admin_for_my_db.find_embedding_providers()) FindEmbeddingProvidersResult(embedding_providers=…, openai, …) >>> asyncio.run( … admin_for_my_db.find_embedding_providers() … ).embedding_providers { 'openai': EmbeddingProvider( display_name='OpenAI', models=[ EmbeddingProviderModel(name='text-embedding-3-small'), … ] ), … }

Expand source code

async def async_find_embedding_providers(
    self,
    *,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> FindEmbeddingProvidersResult:
    """
    Query the API for the full information on available embedding providers.
    Async version of the method, for use in an asyncio context.

    Args:
        database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `database_admin_timeout_ms`.
        timeout_ms: an alias for `database_admin_timeout_ms`.

    Returns:
        A `FindEmbeddingProvidersResult` object with the complete information
        returned by the API about available embedding providers

    Example (output abridged and indented for clarity):
        >>> asyncio.run(admin_for_my_db.find_embedding_providers())
        FindEmbeddingProvidersResult(embedding_providers=..., openai, ...)
        >>> asyncio.run(
        ...     admin_for_my_db.find_embedding_providers()
        ... ).embedding_providers
        {
            'openai': EmbeddingProvider(
                display_name='OpenAI',
                models=[
                    EmbeddingProviderModel(name='text-embedding-3-small'),
                    ...
                ]
            ),
            ...
        }
    """

    _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
        timeout_options=self.api_options.timeout_options,
        database_admin_timeout_ms=database_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info("findEmbeddingProviders, async")
    fe_response = await self._api_commander.async_request(
        payload={"findEmbeddingProviders": {}},
        timeout_context=_TimeoutContext(
            request_ms=_database_admin_timeout_ms, label=_da_label
        ),
    )
    if "embeddingProviders" not in fe_response.get("status", {}):
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from findEmbeddingProviders API command.",
            raw_response=fe_response,
        )
    else:
        logger.info("finished findEmbeddingProviders, async")
        return FindEmbeddingProvidersResult._from_dict(fe_response["status"])

async def async_find_reranking_providers(self, *, database_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> FindRerankingProvidersResult

Query the API for the full information on available reranking providers. Async version of the method, for use in an asyncio context.

Args

database_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for database_admin_timeout_ms.
timeout_ms: an alias for database_admin_timeout_ms.

Returns

A FindRerankingProvidersResult object with the complete information returned by the API about available reranking providers Example (output abridged and indented for clarity): >>> asyncio.run(admin_for_my_db.find_reranking_providers()) FindRerankingProvidersResult(reranking_providers=nvidia) >>> asyncio.run( … admin_for_my_db.find_reranking_providers() … ).reranking_providers { 'nvidia': RerankingProvider( display_name='Nvidia', models=[ RerankingProviderModel( name='nvidia/llama-3.2-nv-rerankqa-1b-v2' ), … ] ), … }

Expand source code

async def async_find_reranking_providers(
    self,
    *,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> FindRerankingProvidersResult:
    """
    Query the API for the full information on available reranking providers.
    Async version of the method, for use in an asyncio context.

    Args:
        database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `database_admin_timeout_ms`.
        timeout_ms: an alias for `database_admin_timeout_ms`.

    Returns:
        A `FindRerankingProvidersResult` object with the complete information
        returned by the API about available reranking providers

    Example (output abridged and indented for clarity):
        >>> asyncio.run(admin_for_my_db.find_reranking_providers())
        FindRerankingProvidersResult(reranking_providers=nvidia)
        >>> asyncio.run(
        ...     admin_for_my_db.find_reranking_providers()
        ... ).reranking_providers
        {
            'nvidia': RerankingProvider(
                <Default>
                display_name='Nvidia',
                models=[
                    RerankingProviderModel(
                        <Default>
                        name='nvidia/llama-3.2-nv-rerankqa-1b-v2'
                    ),
                    ...
                ]
            ),
            ...
        }
    """

    _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
        timeout_options=self.api_options.timeout_options,
        database_admin_timeout_ms=database_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info("findRerankingProviders, async")
    fr_response = await self._api_commander.async_request(
        payload={"findRerankingProviders": {}},
        timeout_context=_TimeoutContext(
            request_ms=_database_admin_timeout_ms, label=_da_label
        ),
    )
    if "rerankingProviders" not in fr_response.get("status", {}):
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from findRerankingProviders API command.",
            raw_response=fr_response,
        )
    else:
        logger.info("finished findRerankingProviders, async")
        return FindRerankingProvidersResult._from_dict(fr_response["status"])

async def async_info(self, *, database_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> AstraDBAdminDatabaseInfo

Query the DevOps API for the full info on this database. Async version of the method, for use in an asyncio context.

Args

database_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for database_admin_timeout_ms.
timeout_ms: an alias for database_admin_timeout_ms.

Returns

An AstraDBAdminDatabaseInfo object.

Example

>>> async def wait_until_active(db_admin: AstraDBDatabaseAdmin) -> None:
...     while True:
...         info = await db_admin.async_info()
...         if info.status == "ACTIVE":
...             return
...
>>> asyncio.run(wait_until_active(admin_for_my_db))

Expand source code

async def async_info(
    self,
    *,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> AstraDBAdminDatabaseInfo:
    """
    Query the DevOps API for the full info on this database.
    Async version of the method, for use in an asyncio context.

    Args:
        database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `database_admin_timeout_ms`.
        timeout_ms: an alias for `database_admin_timeout_ms`.

    Returns:
        An AstraDBAdminDatabaseInfo object.

    Example:
        >>> async def wait_until_active(db_admin: AstraDBDatabaseAdmin) -> None:
        ...     while True:
        ...         info = await db_admin.async_info()
        ...         if info.status == "ACTIVE":
        ...             return
        ...
        >>> asyncio.run(wait_until_active(admin_for_my_db))
    """

    logger.info(f"getting info ('{self._database_id}'), async")
    req_response = await self._astra_db_admin.async_database_info(
        id=self._database_id,
        database_admin_timeout_ms=database_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info(f"finished getting info ('{self._database_id}'), async")
    return req_response

async def async_list_keyspaces(self, *, keyspace_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> list[str]

Query the DevOps API for a list of the keyspaces in the database. Async version of the method, for use in an asyncio context.

Args

keyspace_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for keyspace_admin_timeout_ms.
timeout_ms: an alias for keyspace_admin_timeout_ms.

Returns

A list of the keyspaces, each a string, in no particular order.

Example

>>> async def check_if_ks_exists(
...     db_admin: AstraDBDatabaseAdmin, keyspace: str
... ) -> bool:
...     ks_list = await db_admin.async_list_keyspaces()
...     return keyspace in ks_list
...
>>> asyncio.run(check_if_ks_exists(admin_for_my_db, "dragons"))
False
>>> asyncio.run(check_if_db_exists(admin_for_my_db, "app_keyspace"))
True

Expand source code

async def async_list_keyspaces(
    self,
    *,
    keyspace_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> list[str]:
    """
    Query the DevOps API for a list of the keyspaces in the database.
    Async version of the method, for use in an asyncio context.

    Args:
        keyspace_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `keyspace_admin_timeout_ms`.
        timeout_ms: an alias for `keyspace_admin_timeout_ms`.

    Returns:
        A list of the keyspaces, each a string, in no particular order.

    Example:
        >>> async def check_if_ks_exists(
        ...     db_admin: AstraDBDatabaseAdmin, keyspace: str
        ... ) -> bool:
        ...     ks_list = await db_admin.async_list_keyspaces()
        ...     return keyspace in ks_list
        ...
        >>> asyncio.run(check_if_ks_exists(admin_for_my_db, "dragons"))
        False
        >>> asyncio.run(check_if_db_exists(admin_for_my_db, "app_keyspace"))
        True
    """

    logger.info(f"getting keyspaces ('{self._database_id}'), async")
    info = await self.async_info(
        database_admin_timeout_ms=keyspace_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info(f"finished getting keyspaces ('{self._database_id}'), async")
    if info.raw is None:
        raise DevOpsAPIException("Could not get the keyspace list.")
    else:
        return info.raw.get("info", {}).get("keyspaces") or []

def create_keyspace(self, name: str, *, wait_until_active: bool = True, update_db_keyspace: bool | None = None, keyspace_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None, **kwargs: Any) ‑> None

Create a keyspace in this database as requested, optionally waiting for it to be ready.

Args

name: the keyspace name. If supplying a keyspace that exists already, the method call proceeds as usual, no errors are raised, and the whole invocation is a no-op.
wait_until_active: if True (default), the method returns only after the target database is in ACTIVE state again (a few seconds, usually). If False, it will return right after issuing the creation request to the DevOps API, and it will be responsibility of the caller to check the database status/keyspace availability before working with it.
update_db_keyspace: if True, the Database or AsyncDatabase class that spawned this DatabaseAdmin, if any, gets updated to work on the newly-created keyspace starting when this method returns.
keyspace_admin_timeout_ms: a timeout, in milliseconds, for the whole requested operation to complete. This is used only if wait_until_active is true, i.e. if the method call must wait and keep querying the DevOps API for the status of the database during keyspace creation.
request_timeout_ms: a timeout, in milliseconds, for each underlying DevOps API HTTP request.
timeout_ms: an alias for both the request_timeout_ms and keyspace_admin_timeout_ms timeout parameters. In practice, regardless of wait_until_active, this parameter dictates an overall timeout on this method call.

Note: a timeout event is no guarantee at all that the creation request has not reached the API server and is not going to be, in fact, honored.

Example

>>> my_db_admin.keyspaces()
['default_keyspace']
>>> my_db_admin.create_keyspace("that_other_one")
>>> my_db_admin.list_keyspaces()
['default_keyspace', 'that_other_one']

Expand source code

def create_keyspace(
    self,
    name: str,
    *,
    wait_until_active: bool = True,
    update_db_keyspace: bool | None = None,
    keyspace_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
    **kwargs: Any,
) -> None:
    """
    Create a keyspace in this database as requested,
    optionally waiting for it to be ready.

    Args:
        name: the keyspace name. If supplying a keyspace that exists
            already, the method call proceeds as usual, no errors are
            raised, and the whole invocation is a no-op.
        wait_until_active: if True (default), the method returns only after
            the target database is in ACTIVE state again (a few
            seconds, usually). If False, it will return right after issuing the
            creation request to the DevOps API, and it will be responsibility
            of the caller to check the database status/keyspace availability
            before working with it.
        update_db_keyspace: if True, the `Database` or `AsyncDatabase` class
            that spawned this DatabaseAdmin, if any, gets updated to work on
            the newly-created keyspace starting when this method returns.
        keyspace_admin_timeout_ms: a timeout, in milliseconds, for the whole
            requested operation to complete. This is used only
            if `wait_until_active` is true, i.e. if the method call must
            wait and keep querying the DevOps API for the status of the
            database during keyspace creation.
        request_timeout_ms: a timeout, in milliseconds, for
            each underlying DevOps API HTTP request.
        timeout_ms: an alias for *both* the `request_timeout_ms` and
            `keyspace_admin_timeout_ms` timeout parameters. In practice,
            regardless of `wait_until_active`, this parameter dictates an
            overall timeout on this method call.

    Note: a timeout event is no guarantee at all that the
    creation request has not reached the API server and is not going
    to be, in fact, honored.

    Example:
        >>> my_db_admin.keyspaces()
        ['default_keyspace']
        >>> my_db_admin.create_keyspace("that_other_one")
        >>> my_db_admin.list_keyspaces()
        ['default_keyspace', 'that_other_one']
    """

    _keyspace_admin_timeout_ms, _ka_label = _first_valid_timeout(
        (keyspace_admin_timeout_ms, "keyspace_admin_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (
            self.api_options.timeout_options.keyspace_admin_timeout_ms,
            "keyspace_admin_timeout_ms",
        ),
    )
    _request_timeout_ms, _rt_label = _first_valid_timeout(
        (request_timeout_ms, "request_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
    )
    timeout_manager = MultiCallTimeoutManager(
        overall_timeout_ms=_keyspace_admin_timeout_ms,
        dev_ops_api=True,
        timeout_label=_ka_label,
    )
    logger.info(
        f"creating keyspace '{name}' on " f"'{self._database_id}' (DevOps API)"
    )
    cn_raw_response = self._dev_ops_api_commander.raw_request(
        http_method=HttpMethod.POST,
        additional_path=f"keyspaces/{name}",
        timeout_context=timeout_manager.remaining_timeout(
            cap_time_ms=_request_timeout_ms,
            cap_timeout_label=_rt_label,
        ),
    )
    if cn_raw_response.status_code != DEV_OPS_RESPONSE_HTTP_CREATED:
        raise DevOpsAPIException(
            f"keyspace creation ('{name}') failed: API returned HTTP "
            f"{cn_raw_response.status_code} instead of "
            f"{DEV_OPS_RESPONSE_HTTP_CREATED} - Created."
        )
    logger.info(
        "DevOps API returned from creating keyspace "
        f"'{name}' on '{self._database_id}'"
    )
    if wait_until_active:
        last_status_seen = DEV_OPS_DATABASE_STATUS_MAINTENANCE
        while last_status_seen == DEV_OPS_DATABASE_STATUS_MAINTENANCE:
            logger.info(f"sleeping to poll for status of '{self._database_id}'")
            time.sleep(DEV_OPS_KEYSPACE_POLL_INTERVAL_S)
            last_status_seen = self._astra_db_admin._database_info_ctx(
                id=self._database_id,
                timeout_context=timeout_manager.remaining_timeout(
                    cap_time_ms=_request_timeout_ms,
                    cap_timeout_label=_rt_label,
                ),
            ).status
        if last_status_seen != DEV_OPS_DATABASE_STATUS_ACTIVE:
            raise DevOpsAPIException(
                f"Database entered unexpected status {last_status_seen} after MAINTENANCE."
            )
        # is the keyspace found?
        if name not in self.list_keyspaces():
            raise DevOpsAPIException("Could not create the keyspace.")
    logger.info(
        f"finished creating keyspace '{name}' on "
        f"'{self._database_id}' (DevOps API)"
    )
    if update_db_keyspace:
        self.spawner_database.use_keyspace(name)

def drop(self, *, wait_until_active: bool = True, database_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> None

Drop this database, i.e. delete it completely and permanently with all its data.

This method wraps the drop_database method of the AstraDBAdmin class, where more information may be found.

Args

wait_until_active: if True (default), the method returns only after the database has actually been deleted (generally a few minutes). If False, it will return right after issuing the drop request to the DevOps API, and it will be responsibility of the caller to check the database status/availability after that, if desired.
database_admin_timeout_ms: a timeout, in milliseconds, for the whole requested operation to complete. This is used only if wait_until_active is true, i.e. if the method call must wait and keep querying the DevOps API for the status of the newly-deleted database.
request_timeout_ms: a timeout, in milliseconds, for each underlying DevOps API HTTP request.
timeout_ms: an alias for both the request_timeout_ms and database_admin_timeout_ms timeout parameters. In practice, regardless of wait_until_active, this parameter dictates an overall timeout on this method call.

Note: a timeout event is no guarantee at all that the deletion request has not reached the API server and is not going to be, in fact, honored.

Example

>>> my_db_admin.list_keyspaces()
['default_keyspace', 'that_other_one']
>>> my_db_admin.drop()
>>> my_db_admin.list_keyspaces()  # raises a 404 Not Found http error

Note

Expand source code

def drop(
    self,
    *,
    wait_until_active: bool = True,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Drop this database, i.e. delete it completely and permanently with all its data.

    This method wraps the `drop_database` method of the AstraDBAdmin class,
    where more information may be found.

    Args:
        wait_until_active: if True (default), the method returns only after
            the database has actually been deleted (generally a few minutes).
            If False, it will return right after issuing the
            drop request to the DevOps API, and it will be responsibility
            of the caller to check the database status/availability
            after that, if desired.
        database_admin_timeout_ms: a timeout, in milliseconds, for the whole
            requested operation to complete. This is used only
            if `wait_until_active` is true, i.e. if the method call must
            wait and keep querying the DevOps API for the status of the
            newly-deleted database.
        request_timeout_ms: a timeout, in milliseconds, for
            each underlying DevOps API HTTP request.
        timeout_ms: an alias for *both* the `request_timeout_ms` and
            `database_admin_timeout_ms` timeout parameters. In practice,
            regardless of `wait_until_active`, this parameter dictates an
            overall timeout on this method call.

    Note: a timeout event is no guarantee at all that the
    deletion request has not reached the API server and is not going
    to be, in fact, honored.

    Example:
        >>> my_db_admin.list_keyspaces()
        ['default_keyspace', 'that_other_one']
        >>> my_db_admin.drop()
        >>> my_db_admin.list_keyspaces()  # raises a 404 Not Found http error

    Note:
        Once the method succeeds, methods on this object -- such as `info()`,
        or `list_keyspaces()` -- can still be invoked: however, this hardly
        makes sense as the underlying actual database is no more.
        It is responsibility of the developer to design a correct flow
        which avoids using a deceased database any further.
    """

    logger.info(f"dropping this database ('{self._database_id}')")
    return self._astra_db_admin.drop_database(
        id=self._database_id,
        wait_until_active=wait_until_active,
        database_admin_timeout_ms=database_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info(f"finished dropping this database ('{self._database_id}')")

def drop_keyspace(self, name: str, *, wait_until_active: bool = True, keyspace_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> None

Delete a keyspace from the database, optionally waiting for the database to become active again.

Args

name: the keyspace to delete. If it does not exist in this database, an error is raised.
wait_until_active: if True (default), the method returns only after the target database is in ACTIVE state again (a few seconds, usually). If False, it will return right after issuing the deletion request to the DevOps API, and it will be responsibility of the caller to check the database status/keyspace availability before working with it.
keyspace_admin_timeout_ms: a timeout, in milliseconds, for the whole requested operation to complete. This is used only if wait_until_active is true, i.e. if the method call must wait and keep querying the DevOps API for the status of the database during keyspace deletion.
request_timeout_ms: a timeout, in milliseconds, for each underlying DevOps API HTTP request.
timeout_ms: an alias for both the request_timeout_ms and keyspace_admin_timeout_ms timeout parameters. In practice, regardless of wait_until_active, this parameter dictates an overall timeout on this method call.

Note: a timeout event is no guarantee at all that the deletion request has not reached the API server and is not going to be, in fact, honored.

Example

>>> my_db_admin.list_keyspaces()
['default_keyspace', 'that_other_one']
>>> my_db_admin.drop_keyspace("that_other_one")
>>> my_db_admin.list_keyspaces()
['default_keyspace']

Expand source code

def drop_keyspace(
    self,
    name: str,
    *,
    wait_until_active: bool = True,
    keyspace_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Delete a keyspace from the database, optionally waiting for the database
    to become active again.

    Args:
        name: the keyspace to delete. If it does not exist in this database,
            an error is raised.
        wait_until_active: if True (default), the method returns only after
            the target database is in ACTIVE state again (a few
            seconds, usually). If False, it will return right after issuing the
            deletion request to the DevOps API, and it will be responsibility
            of the caller to check the database status/keyspace availability
            before working with it.
        keyspace_admin_timeout_ms: a timeout, in milliseconds, for the whole
            requested operation to complete. This is used only
            if `wait_until_active` is true, i.e. if the method call must
            wait and keep querying the DevOps API for the status of the
            database during keyspace deletion.
        request_timeout_ms: a timeout, in milliseconds, for
            each underlying DevOps API HTTP request.
        timeout_ms: an alias for *both* the `request_timeout_ms` and
            `keyspace_admin_timeout_ms` timeout parameters. In practice,
            regardless of `wait_until_active`, this parameter dictates an
            overall timeout on this method call.

    Note: a timeout event is no guarantee at all that the
    deletion request has not reached the API server and is not going
    to be, in fact, honored.

    Example:
        >>> my_db_admin.list_keyspaces()
        ['default_keyspace', 'that_other_one']
        >>> my_db_admin.drop_keyspace("that_other_one")
        >>> my_db_admin.list_keyspaces()
        ['default_keyspace']
    """

    _keyspace_admin_timeout_ms, _ka_label = _first_valid_timeout(
        (keyspace_admin_timeout_ms, "keyspace_admin_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (
            self.api_options.timeout_options.keyspace_admin_timeout_ms,
            "keyspace_admin_timeout_ms",
        ),
    )
    _request_timeout_ms, _rt_label = _first_valid_timeout(
        (request_timeout_ms, "request_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
    )
    timeout_manager = MultiCallTimeoutManager(
        overall_timeout_ms=_keyspace_admin_timeout_ms,
        dev_ops_api=True,
        timeout_label=_ka_label,
    )
    logger.info(
        f"dropping keyspace '{name}' on " f"'{self._database_id}' (DevOps API)"
    )
    dk_raw_response = self._dev_ops_api_commander.raw_request(
        http_method=HttpMethod.DELETE,
        additional_path=f"keyspaces/{name}",
        timeout_context=timeout_manager.remaining_timeout(
            cap_time_ms=_request_timeout_ms,
            cap_timeout_label=_rt_label,
        ),
    )
    if dk_raw_response.status_code != DEV_OPS_RESPONSE_HTTP_ACCEPTED:
        raise DevOpsAPIException(
            f"keyspace deletion ('{id}') failed: API returned HTTP "
            f"{dk_raw_response.status_code} instead of "
            f"{DEV_OPS_RESPONSE_HTTP_ACCEPTED} - Created"
        )
    logger.info(
        "DevOps API returned from dropping keyspace "
        f"'{name}' on '{self._database_id}'"
    )
    if wait_until_active:
        last_status_seen = DEV_OPS_DATABASE_STATUS_MAINTENANCE
        while last_status_seen == DEV_OPS_DATABASE_STATUS_MAINTENANCE:
            logger.info(f"sleeping to poll for status of '{self._database_id}'")
            time.sleep(DEV_OPS_KEYSPACE_POLL_INTERVAL_S)
            last_status_seen = self._astra_db_admin._database_info_ctx(
                id=self._database_id,
                timeout_context=timeout_manager.remaining_timeout(
                    cap_time_ms=_request_timeout_ms,
                    cap_timeout_label=_rt_label,
                ),
            ).status
        if last_status_seen != DEV_OPS_DATABASE_STATUS_ACTIVE:
            raise DevOpsAPIException(
                f"Database entered unexpected status {last_status_seen} after MAINTENANCE."
            )
        # is the keyspace found?
        if name in self.list_keyspaces():
            raise DevOpsAPIException("Could not drop the keyspace.")
    logger.info(
        f"finished dropping keyspace '{name}' on "
        f"'{self._database_id}' (DevOps API)"
    )

def find_embedding_providers(self, *, database_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> FindEmbeddingProvidersResult

Query the API for the full information on available embedding providers.

Args

database_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for database_admin_timeout_ms.
timeout_ms: an alias for database_admin_timeout_ms.

Returns

A FindEmbeddingProvidersResult object with the complete information returned by the API about available embedding providers Example (output abridged and indented for clarity): >>> admin_for_my_db.find_embedding_providers() FindEmbeddingProvidersResult(embedding_providers=…, openai, …) >>> admin_for_my_db.find_embedding_providers().embedding_providers { 'openai': EmbeddingProvider( display_name='OpenAI', models=[ EmbeddingProviderModel(name='text-embedding-3-small'), … ] ), … }

Expand source code

def find_embedding_providers(
    self,
    *,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> FindEmbeddingProvidersResult:
    """
    Query the API for the full information on available embedding providers.

    Args:
        database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `database_admin_timeout_ms`.
        timeout_ms: an alias for `database_admin_timeout_ms`.

    Returns:
        A `FindEmbeddingProvidersResult` object with the complete information
        returned by the API about available embedding providers

    Example (output abridged and indented for clarity):
        >>> admin_for_my_db.find_embedding_providers()
        FindEmbeddingProvidersResult(embedding_providers=..., openai, ...)
        >>> admin_for_my_db.find_embedding_providers().embedding_providers
        {
            'openai': EmbeddingProvider(
                display_name='OpenAI',
                models=[
                    EmbeddingProviderModel(name='text-embedding-3-small'),
                    ...
                ]
            ),
            ...
        }
    """

    _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
        timeout_options=self.api_options.timeout_options,
        database_admin_timeout_ms=database_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info("findEmbeddingProviders")
    fe_response = self._api_commander.request(
        payload={"findEmbeddingProviders": {}},
        timeout_context=_TimeoutContext(
            request_ms=_database_admin_timeout_ms, label=_da_label
        ),
    )
    if "embeddingProviders" not in fe_response.get("status", {}):
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from findEmbeddingProviders API command.",
            raw_response=fe_response,
        )
    else:
        logger.info("finished findEmbeddingProviders")
        return FindEmbeddingProvidersResult._from_dict(fe_response["status"])

def find_reranking_providers(self, *, database_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> FindRerankingProvidersResult

Query the API for the full information on available reranking providers.

Args

database_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for database_admin_timeout_ms.
timeout_ms: an alias for database_admin_timeout_ms.

Returns

A FindRerankingProvidersResult object with the complete information returned by the API about available reranking providers Example (output abridged and indented for clarity): >>> admin_for_my_db.find_reranking_providers() FindRerankingProvidersResult(reranking_providers=nvidia) >>> admin_for_my_db.find_reranking_providers().reranking_providers { 'nvidia': RerankingProvider( display_name='Nvidia', models=[ RerankingProviderModel( name='nvidia/llama-3.2-nv-rerankqa-1b-v2' ), … ] ), … }

Expand source code

def find_reranking_providers(
    self,
    *,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> FindRerankingProvidersResult:
    """
    Query the API for the full information on available reranking providers.

    Args:
        database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `database_admin_timeout_ms`.
        timeout_ms: an alias for `database_admin_timeout_ms`.

    Returns:
        A `FindRerankingProvidersResult` object with the complete information
        returned by the API about available reranking providers

    Example (output abridged and indented for clarity):
        >>> admin_for_my_db.find_reranking_providers()
        FindRerankingProvidersResult(reranking_providers=nvidia)
        >>> admin_for_my_db.find_reranking_providers().reranking_providers
        {
            'nvidia': RerankingProvider(
                <Default>
                display_name='Nvidia',
                models=[
                    RerankingProviderModel(
                        <Default>
                        name='nvidia/llama-3.2-nv-rerankqa-1b-v2'
                    ),
                    ...
                ]
            ),
            ...
        }
    """

    _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
        timeout_options=self.api_options.timeout_options,
        database_admin_timeout_ms=database_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info("findRerankingProviders")
    fr_response = self._api_commander.request(
        payload={"findRerankingProviders": {}},
        timeout_context=_TimeoutContext(
            request_ms=_database_admin_timeout_ms, label=_da_label
        ),
    )
    if "rerankingProviders" not in fr_response.get("status", {}):
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from findRerankingProviders API command.",
            raw_response=fr_response,
        )
    else:
        logger.info("finished findRerankingProviders")
        return FindRerankingProvidersResult._from_dict(fr_response["status"])

def get_async_database(self, *, keyspace: str | None = None, token: str | TokenProvider | UnsetType = (unset), spawn_api_options: APIOptions | UnsetType = (unset)) ‑> AsyncDatabase

Create an AsyncDatabase instance from this database admin, for data-related tasks.

Args

keyspace: if provided, it is passed to the AsyncDatabase; otherwise the AsyncDatabase class will apply an environment-specific default.
token: if supplied, is passed to the AsyncDatabase instead of the one set for this object. Useful if one wants to work in a least-privilege manner, limiting the permissions for non-admin work. This can be either a literal token string or a subclass of TokenProvider.
spawn_api_options: a specification - complete or partial - of the API Options to override the defaults inherited from the database admin. This allows for a deeper configuration of the database admin, e.g. concerning timeouts; if this is passed together with the named timeout parameters, the latter will take precedence in their respective settings.

Returns

An AsyncDatabase object, ready to work with data, collections and tables.

Expand source code

def get_async_database(
    self,
    *,
    keyspace: str | None = None,
    token: str | TokenProvider | UnsetType = _UNSET,
    spawn_api_options: APIOptions | UnsetType = _UNSET,
) -> AsyncDatabase:
    """
    Create an AsyncDatabase instance from this database admin,
    for data-related tasks.

    Args:
        keyspace: if provided, it is passed to the AsyncDatabase; otherwise
            the AsyncDatabase class will apply an environment-specific default.
        token: if supplied, is passed to the AsyncDatabase instead of
            the one set for this object. Useful if one wants to work in
            a least-privilege manner, limiting the permissions for non-admin work.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        spawn_api_options: a specification - complete or partial - of the
            API Options to override the defaults inherited from the database admin.
            This allows for a deeper configuration of the database admin, e.g.
            concerning timeouts; if this is passed together with
            the named timeout parameters, the latter will take precedence
            in their respective settings.

    Returns:
        An AsyncDatabase object, ready to work with data, collections and tables.
    """

    return self.get_database(
        keyspace=keyspace,
        token=token,
        spawn_api_options=spawn_api_options,
    ).to_async()

def get_database(self, *, keyspace: str | None = None, token: str | TokenProvider | UnsetType = (unset), spawn_api_options: APIOptions | UnsetType = (unset)) ‑> Database

Create a Database instance from this database admin, for data-related tasks.

Args

keyspace: if provided, it is passed to the Database; otherwise the Database class will apply an environment-specific default.
token: if supplied, is passed to the Database instead of the one set for this object. Useful if one wants to work in a least-privilege manner, limiting the permissions for non-admin work. This can be either a literal token string or a subclass of TokenProvider.
spawn_api_options: a specification - complete or partial - of the API Options to override the defaults inherited from the database admin. This allows for a deeper configuration of the database admin, e.g. concerning timeouts; if this is passed together with the named timeout parameters, the latter will take precedence in their respective settings.

Returns

A Database object, ready to work with data, collections and tables.

Example

>>> my_db = my_db_admin.get_database()
>>> my_db.list_collection_names()
['movies', 'another_collection']

Note

creating an instance of Database does not trigger actual creation of the database itself, which should exist beforehand. To create databases, see the AstraDBAdmin class.

Expand source code

def get_database(
    self,
    *,
    keyspace: str | None = None,
    token: str | TokenProvider | UnsetType = _UNSET,
    spawn_api_options: APIOptions | UnsetType = _UNSET,
) -> Database:
    """
    Create a Database instance from this database admin, for data-related tasks.

    Args:
        keyspace: if provided, it is passed to the Database; otherwise
            the Database class will apply an environment-specific default.
        token: if supplied, is passed to the Database instead of
            the one set for this object. Useful if one wants to work in
            a least-privilege manner, limiting the permissions for non-admin work.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        spawn_api_options: a specification - complete or partial - of the
            API Options to override the defaults inherited from the database admin.
            This allows for a deeper configuration of the database admin, e.g.
            concerning timeouts; if this is passed together with
            the named timeout parameters, the latter will take precedence
            in their respective settings.

    Returns:
        A Database object, ready to work with data, collections and tables.

    Example:
        >>> my_db = my_db_admin.get_database()
        >>> my_db.list_collection_names()
        ['movies', 'another_collection']

    Note:
        creating an instance of Database does not trigger actual creation
        of the database itself, which should exist beforehand. To create databases,
        see the AstraDBAdmin class.
    """

    return self._astra_db_admin.get_database(
        api_endpoint=self.api_endpoint,
        keyspace=keyspace,
        token=token,
        spawn_api_options=spawn_api_options,
    )

def info(self, *, database_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> AstraDBAdminDatabaseInfo

Query the DevOps API for the full info on this database.

Args

database_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for database_admin_timeout_ms.
timeout_ms: an alias for database_admin_timeout_ms.

Returns

An AstraDBAdminDatabaseInfo object.

Example

>>> my_db_info = admin_for_my_db.info()
>>> my_db_info.status
'ACTIVE'
>>> my_db_info.info.region
'us-east1'

Expand source code

def info(
    self,
    *,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> AstraDBAdminDatabaseInfo:
    """
    Query the DevOps API for the full info on this database.

    Args:
        database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `database_admin_timeout_ms`.
        timeout_ms: an alias for `database_admin_timeout_ms`.

    Returns:
        An AstraDBAdminDatabaseInfo object.

    Example:
        >>> my_db_info = admin_for_my_db.info()
        >>> my_db_info.status
        'ACTIVE'
        >>> my_db_info.info.region
        'us-east1'
    """

    logger.info(f"getting info ('{self._database_id}')")
    req_response = self._astra_db_admin.database_info(
        id=self._database_id,
        database_admin_timeout_ms=database_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info(f"finished getting info ('{self._database_id}')")
    return req_response

def list_keyspaces(self, *, keyspace_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> list[str]

Query the DevOps API for a list of the keyspaces in the database.

Args

keyspace_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for keyspace_admin_timeout_ms.
timeout_ms: an alias for keyspace_admin_timeout_ms.

Returns

A list of the keyspaces, each a string, in no particular order.

Example

>>> admin_for_my_db.list_keyspaces()
['default_keyspace', 'staging_keyspace']

Expand source code

def list_keyspaces(
    self,
    *,
    keyspace_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> list[str]:
    """
    Query the DevOps API for a list of the keyspaces in the database.

    Args:
        keyspace_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `keyspace_admin_timeout_ms`.
        timeout_ms: an alias for `keyspace_admin_timeout_ms`.

    Returns:
        A list of the keyspaces, each a string, in no particular order.

    Example:
        >>> admin_for_my_db.list_keyspaces()
        ['default_keyspace', 'staging_keyspace']
    """

    logger.info(f"getting keyspaces ('{self._database_id}')")
    info = self.info(
        database_admin_timeout_ms=keyspace_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info(f"finished getting keyspaces ('{self._database_id}')")
    if info.raw is None:
        raise DevOpsAPIException("Could not get the keyspace list.")
    else:
        return info.raw.get("info", {}).get("keyspaces") or []

def with_options(self, *, token: str | TokenProvider | UnsetType = (unset), api_options: APIOptions | UnsetType = (unset)) ‑> AstraDBDatabaseAdmin

Create a clone of this AstraDBDatabaseAdmin with some changed attributes.

Args

token: an Access Token to the database. Example: "AstraCS:xyz...". This can be either a literal token string or a subclass of TokenProvider.
api_options: any additional options to set for the clone, in the form of an APIOptions instance (where one can set just the needed attributes). In case the same setting is also provided as named parameter, the latter takes precedence.

Returns

a new AstraDBDatabaseAdmin instance.

Example

>>> admin_for_my_other_db = admin_for_my_db.with_options(
...     "https://<ID>-<REGION>.apps.astra.datastax.com",
... )

Expand source code

def with_options(
    self,
    *,
    token: str | TokenProvider | UnsetType = _UNSET,
    api_options: APIOptions | UnsetType = _UNSET,
) -> AstraDBDatabaseAdmin:
    """
    Create a clone of this AstraDBDatabaseAdmin with some changed attributes.

    Args:
        token: an Access Token to the database. Example: `"AstraCS:xyz..."`.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        api_options: any additional options to set for the clone, in the form of
            an APIOptions instance (where one can set just the needed attributes).
            In case the same setting is also provided as named parameter,
            the latter takes precedence.

    Returns:
        a new AstraDBDatabaseAdmin instance.

    Example:
        >>> admin_for_my_other_db = admin_for_my_db.with_options(
        ...     "https://<ID>-<REGION>.apps.astra.datastax.com",
        ... )
    """

    return self._copy(
        token=token,
        api_options=api_options,
    )

class AsyncCollection (*, database: AsyncDatabase, name: str, keyspace: str | None, api_options: FullAPIOptions)

A Data API collection, the object to interact with the Data API for unstructured (schemaless) data, especially for DDL operations. This class has an asynchronous interface for use with asyncio.

This class is not meant for direct instantiation by the user, rather it is obtained by invoking methods such as get_collection of AsyncDatabase, wherefrom the AsyncCollection inherits its API options such as authentication token and API endpoint.

Args

database: a Database object, instantiated earlier. This represents the database the collection belongs to.
name: the collection name. This parameter should match an existing collection on the database.
keyspace: this is the keyspace to which the collection belongs. If nothing is specified, the database's working keyspace is used.
api_options: a complete specification of the API Options for this instance.

Examples

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> from astrapy import DataAPIClient
>>> client = DataAPIClient()
>>> async_database = client.get_async_database(
...     "https://01234567-....apps.astra.datastax.com",
...     token="AstraCS:..."
... )

>>> # Create a collection using the fluent syntax for its definition
>>> from astrapy.constants import VectorMetric
>>> from astrapy.info import CollectionDefinition
>>>
>>> collection_definition = (
...     CollectionDefinition.builder()
...     .set_vector_dimension(3)
...     .set_vector_metric(VectorMetric.DOT_PRODUCT)
...     .set_indexing("deny", ["annotations", "logs"])
...     .build()
... )
>>> my_collection = await async_database.create_collection(
...     "my_events",
...     definition=collection_definition,
... )

>>> # Create a collection with the definition as object
>>> from astrapy.info import CollectionVectorOptions
>>>
>>> collection_definition_1 = CollectionDefinition(
...     vector=CollectionVectorOptions(
...         dimension=3,
...         metric=VectorMetric.DOT_PRODUCT,
...     ),
...     indexing={"deny": ["annotations", "logs"]},
... )
>>> my_collection_1 = await async_database.create_collection(
...     "my_events",
...     definition=collection_definition_1,
... )
>>>

>>> # Create a collection with the definition as plain dictionary
>>> collection_definition_2 = {
...     "indexing": {"deny": ["annotations", "logs"]},
...     "vector": {
...         "dimension": 3,
...         "metric": VectorMetric.DOT_PRODUCT,
...     },
... }
>>> my_collection_2 = await async_database.create_collection(
...     "my_events",
...     definition=collection_definition_2,
... )

>>> # Get a reference to an existing collection
>>> # (no checks are performed on DB)
>>> my_collection_3a = async_database.get_collection("my_events")
>>> my_collection_3b = async_database.my_events
>>> my_collection_3c = async_database["my_events"]

Note

creating an instance of AsyncCollection does not trigger actual creation of the collection on the database. The latter should have been created beforehand, e.g. through the create_collection method of an AsyncDatabase.

Expand source code

class AsyncCollection(Generic[DOC]):
    """
    A Data API collection, the object to interact with the Data API for unstructured
    (schemaless) data, especially for DDL operations.
    This class has an asynchronous interface for use with asyncio.

    This class is not meant for direct instantiation by the user, rather
    it is obtained by invoking methods such as `get_collection` of AsyncDatabase,
    wherefrom the AsyncCollection inherits its API options such as authentication
    token and API endpoint.

    Args:
        database: a Database object, instantiated earlier. This represents
            the database the collection belongs to.
        name: the collection name. This parameter should match an existing
            collection on the database.
        keyspace: this is the keyspace to which the collection belongs.
            If nothing is specified, the database's working keyspace is used.
        api_options: a complete specification of the API Options for this instance.

    Examples:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> from astrapy import DataAPIClient
        >>> client = DataAPIClient()
        >>> async_database = client.get_async_database(
        ...     "https://01234567-....apps.astra.datastax.com",
        ...     token="AstraCS:..."
        ... )

        >>> # Create a collection using the fluent syntax for its definition
        >>> from astrapy.constants import VectorMetric
        >>> from astrapy.info import CollectionDefinition
        >>>
        >>> collection_definition = (
        ...     CollectionDefinition.builder()
        ...     .set_vector_dimension(3)
        ...     .set_vector_metric(VectorMetric.DOT_PRODUCT)
        ...     .set_indexing("deny", ["annotations", "logs"])
        ...     .build()
        ... )
        >>> my_collection = await async_database.create_collection(
        ...     "my_events",
        ...     definition=collection_definition,
        ... )

        >>>
        >>> # Create a collection with the definition as object
        >>> from astrapy.info import CollectionVectorOptions
        >>>
        >>> collection_definition_1 = CollectionDefinition(
        ...     vector=CollectionVectorOptions(
        ...         dimension=3,
        ...         metric=VectorMetric.DOT_PRODUCT,
        ...     ),
        ...     indexing={"deny": ["annotations", "logs"]},
        ... )
        >>> my_collection_1 = await async_database.create_collection(
        ...     "my_events",
        ...     definition=collection_definition_1,
        ... )
        >>>

        >>> # Create a collection with the definition as plain dictionary
        >>> collection_definition_2 = {
        ...     "indexing": {"deny": ["annotations", "logs"]},
        ...     "vector": {
        ...         "dimension": 3,
        ...         "metric": VectorMetric.DOT_PRODUCT,
        ...     },
        ... }
        >>> my_collection_2 = await async_database.create_collection(
        ...     "my_events",
        ...     definition=collection_definition_2,
        ... )

        >>> # Get a reference to an existing collection
        >>> # (no checks are performed on DB)
        >>> my_collection_3a = async_database.get_collection("my_events")
        >>> my_collection_3b = async_database.my_events
        >>> my_collection_3c = async_database["my_events"]

    Note:
        creating an instance of AsyncCollection does not trigger actual creation
        of the collection on the database. The latter should have been created
        beforehand, e.g. through the `create_collection` method of an AsyncDatabase.
    """

    def __init__(
        self,
        *,
        database: AsyncDatabase,
        name: str,
        keyspace: str | None,
        api_options: FullAPIOptions,
    ) -> None:
        self.api_options = api_options
        self._name = name
        _keyspace = keyspace if keyspace is not None else database.keyspace

        if _keyspace is None:
            raise ValueError("Attempted to create Collection with 'keyspace' unset.")

        self._database = database._copy(
            keyspace=_keyspace, api_options=self.api_options
        )
        self._commander_headers = {
            **{DEFAULT_DATA_API_AUTH_HEADER: self.api_options.token.get_token()},
            **self.api_options.embedding_api_key.get_headers(),
            **self.api_options.reranking_api_key.get_headers(),
            **self.api_options.database_additional_headers,
        }
        self._api_commander = self._get_api_commander()

    def __repr__(self) -> str:
        _db_desc = f'database.api_endpoint="{self.database.api_endpoint}"'
        return (
            f'{self.__class__.__name__}(name="{self.name}", '
            f'keyspace="{self.keyspace}", {_db_desc}, '
            f"api_options={self.api_options})"
        )

    def __eq__(self, other: Any) -> bool:
        if isinstance(other, AsyncCollection):
            return all(
                [
                    self._name == other._name,
                    self._database == other._database,
                    self.api_options == other.api_options,
                ]
            )
        else:
            return False

    def __call__(self, *pargs: Any, **kwargs: Any) -> None:
        raise TypeError(
            f"'{self.__class__.__name__}' object is not callable. If you "
            f"meant to call the '{self.name}' method on a "
            f"'{self.database.__class__.__name__}' object "
            "it is failing because no such method exists."
        )

    def _get_api_commander(self) -> APICommander:
        """Instantiate a new APICommander based on the properties of this class."""

        if self._database.keyspace is None:
            raise ValueError(
                "No keyspace specified. AsyncCollection requires a keyspace to "
                "be set, e.g. through the `keyspace` constructor parameter."
            )

        base_path_components = [
            comp
            for comp in (
                ncomp.strip("/")
                for ncomp in (
                    self._database.api_options.data_api_url_options.api_path,
                    self._database.api_options.data_api_url_options.api_version,
                    self._database.keyspace,
                    self._name,
                )
                if ncomp is not None
            )
            if comp != ""
        ]
        base_path = f"/{'/'.join(base_path_components)}"
        api_commander = APICommander(
            api_endpoint=self._database.api_endpoint,
            path=base_path,
            headers=self._commander_headers,
            callers=self.api_options.callers,
            redacted_header_names=self.api_options.redacted_header_names,
            handle_decimals_writes=(
                self.api_options.serdes_options.use_decimals_in_collections
            ),
            handle_decimals_reads=(
                self.api_options.serdes_options.use_decimals_in_collections
            ),
        )
        return api_commander

    async def __aenter__(self: AsyncCollection[DOC]) -> AsyncCollection[DOC]:
        return self

    async def __aexit__(
        self,
        exc_type: type[BaseException] | None = None,
        exc_value: BaseException | None = None,
        traceback: TracebackType | None = None,
    ) -> None:
        if self._api_commander is not None:
            await self._api_commander.__aexit__(
                exc_type=exc_type,
                exc_value=exc_value,
                traceback=traceback,
            )

    async def _converted_request(
        self,
        *,
        http_method: str = HttpMethod.POST,
        payload: dict[str, Any] | None = None,
        additional_path: str | None = None,
        request_params: dict[str, Any] = {},
        raise_api_errors: bool = True,
        timeout_context: _TimeoutContext,
    ) -> dict[str, Any]:
        converted_payload = preprocess_collection_payload(
            payload, options=self.api_options.serdes_options
        )
        raw_response_json = await self._api_commander.async_request(
            http_method=http_method,
            payload=converted_payload,
            additional_path=additional_path,
            request_params=request_params,
            raise_api_errors=raise_api_errors,
            timeout_context=timeout_context,
        )
        response_json = postprocess_collection_response(
            raw_response_json, options=self.api_options.serdes_options
        )
        return response_json

    def _copy(
        self: AsyncCollection[DOC],
        *,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncCollection[DOC]:
        arg_api_options = APIOptions(
            embedding_api_key=embedding_api_key,
            reranking_api_key=reranking_api_key,
        )
        final_api_options = self.api_options.with_override(api_options).with_override(
            arg_api_options
        )
        return AsyncCollection(
            database=self.database,
            name=self.name,
            keyspace=self.keyspace,
            api_options=final_api_options,
        )

    def with_options(
        self: AsyncCollection[DOC],
        *,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncCollection[DOC]:
        """
        Create a clone of this collection with some changed attributes.

        Args:
            embedding_api_key: optional API key(s) for interacting with the collection.
                If an embedding service is configured, and this parameter is not None,
                each Data API call will include the necessary embedding-related headers
                as specified by this parameter. If a string is passed, it translates
                into the one "embedding api key" header
                (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
                For some vectorize providers/models, if using header-based authentication,
                specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
                should be supplied.
            reranking_api_key: optional API key(s) for interacting with the collection.
                If a reranker is configured for the collection, and this parameter
                is not None, Data API calls will include the appropriate
                reranker-related headers according to this parameter. Reranker services
                may not necessarily require this setting (e.g. if the service needs no
                authentication, or one is configured as part of the collection
                definition relying on a "shared secret").
                If a string is passed, it is translated into an instance of
                `astrapy.authentication.RerankingAPIKeyHeaderProvider`.
            api_options: any additional options to set for the clone, in the form of
                an APIOptions instance (where one can set just the needed attributes).
                In case the same setting is also provided as named parameter,
                the latter takes precedence.

        Returns:
            a new AsyncCollection instance.

        Example:
            >>> collection_with_api_key_configured = my_async_collection.with_options(
            ...     embedding_api_key="secret-key-0123abcd...",
            ... )
        """

        return self._copy(
            embedding_api_key=embedding_api_key,
            reranking_api_key=reranking_api_key,
            api_options=api_options,
        )

    def to_sync(
        self: AsyncCollection[DOC],
        *,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> Collection[DOC]:
        """
        Create a Collection from this one. Save for the arguments
        explicitly provided as overrides, everything else is kept identical
        to this collection in the copy (the database is converted into
        a sync object).

        Args:
            embedding_api_key: optional API key(s) for interacting with the collection.
                If an embedding service is configured, and this parameter is not None,
                each Data API call will include the necessary embedding-related headers
                as specified by this parameter. If a string is passed, it translates
                into the one "embedding api key" header
                (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
                For some vectorize providers/models, if using header-based authentication,
                specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
                should be supplied.
            reranking_api_key: optional API key(s) for interacting with the collection.
                If a reranker is configured for the collection, and this parameter
                is not None, Data API calls will include the appropriate
                reranker-related headers according to this parameter. Reranker services
                may not necessarily require this setting (e.g. if the service needs no
                authentication, or one is configured as part of the collection
                definition relying on a "shared secret").
                If a string is passed, it is translated into an instance of
                `astrapy.authentication.RerankingAPIKeyHeaderProvider`.
            api_options: any additional options to set for the result, in the form of
                an APIOptions instance (where one can set just the needed attributes).
                In case the same setting is also provided as named parameter,
                the latter takes precedence.

        Returns:
            the new copy, a Collection instance.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> my_async_coll.to_sync().count_documents({}, upper_bound=100)
            77
        """

        arg_api_options = APIOptions(
            embedding_api_key=embedding_api_key,
            reranking_api_key=reranking_api_key,
        )
        final_api_options = self.api_options.with_override(api_options).with_override(
            arg_api_options
        )
        return Collection(
            database=self.database.to_sync(),
            name=self.name,
            keyspace=self.keyspace,
            api_options=final_api_options,
        )

    async def options(
        self,
        *,
        collection_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> CollectionDefinition:
        """
        Get the collection options, i.e. its configuration as read from the database.

        The method issues a request to the Data API each time is invoked,
        without caching mechanisms: this ensures up-to-date information
        for usages such as real-time collection validation by the application.

        Args:
            collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `collection_admin_timeout_ms`.
            timeout_ms: an alias for `collection_admin_timeout_ms`.

        Returns:
            a CollectionDefinition instance describing the collection.
            (See also the database `list_collections` method.)

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> asyncio.run(my_async_coll.options())
            CollectionDefinition(vector=CollectionVectorOptions(dimension=3, metric='cosine'))
        """

        _collection_admin_timeout_ms, _ca_label = _select_singlereq_timeout_ca(
            timeout_options=self.api_options.timeout_options,
            collection_admin_timeout_ms=collection_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info(f"getting collections in search of '{self.name}'")
        self_descriptors = [
            coll_desc
            for coll_desc in await self.database._list_collections_ctx(
                keyspace=None,
                timeout_context=_TimeoutContext(
                    request_ms=_collection_admin_timeout_ms,
                    label=_ca_label,
                ),
            )
            if coll_desc.name == self.name
        ]
        logger.info(f"finished getting collections in search of '{self.name}'")
        if self_descriptors:
            return self_descriptors[0].definition
        else:
            raise RuntimeError(
                f"Collection {self.keyspace}.{self.name} not found.",
            )

    async def info(
        self,
        *,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> CollectionInfo:
        """
        Information on the collection (name, location, database), in the
        form of a CollectionInfo object.

        Not to be confused with the collection `options` method (related
        to the collection internal configuration).

        Args:
            database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying DevOps API request.
                If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `database_admin_timeout_ms`.
            timeout_ms: an alias for `database_admin_timeout_ms`.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> asyncio.run(my_async_coll.info()).database_info.region
            'us-east1'
            >>> asyncio.run(my_async_coll.info()).full_name
            'default_keyspace.my_v_collection'

        Note:
            the returned CollectionInfo wraps, among other things,
            the database information: as such, calling this method
            triggers the same-named method of a Database object (which, in turn,
            performs a HTTP request to the DevOps API).
            See the documentation for `Database.info()` for more details.
        """

        db_info = await self.database.info(
            database_admin_timeout_ms=database_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        return CollectionInfo(
            database_info=db_info,
            keyspace=self.keyspace,
            name=self.name,
            full_name=self.full_name,
        )

    @property
    def database(self) -> AsyncDatabase:
        """
        a Database object, the database this collection belongs to.

        Example:
            >>> my_async_coll.database.name
            'the_db'
        """

        return self._database

    @property
    def keyspace(self) -> str:
        """
        The keyspace this collection is in.

        Example:
            >>> my_async_coll.keyspace
            'default_keyspace'
        """

        _keyspace = self.database.keyspace
        if _keyspace is None:
            raise RuntimeError("The collection's DB is set with keyspace=None")
        return _keyspace

    @property
    def name(self) -> str:
        """
        The name of this collection.

        Example:
            >>> my_async_coll.name
            'my_v_collection'
        """

        return self._name

    @property
    def full_name(self) -> str:
        """
        The fully-qualified collection name within the database,
        in the form "keyspace.collection_name".

        Example:
            >>> my_async_coll.full_name
            'default_keyspace.my_v_collection'
        """

        return f"{self.keyspace}.{self.name}"

    async def insert_one(
        self,
        document: DOC,
        *,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> CollectionInsertOneResult:
        """
        Insert a single document in the collection in an atomic operation.

        Args:
            document: the dictionary expressing the document to insert.
                The `_id` field of the document can be left out, in which
                case it will be created automatically.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a CollectionInsertOneResult object.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> async def write_and_count(acol: AsyncCollection) -> None:
            ...     count0 = await acol.count_documents({}, upper_bound=10)
            ...     print("count0", count0)
            ...     await acol.insert_one(
            ...         {
            ...             "age": 30,
            ...             "name": "Smith",
            ...             "food": ["pear", "peach"],
            ...             "likes_fruit": True,
            ...         },
            ...     )
            ...     await acol.insert_one({"_id": "user-123", "age": 50, "name": "Maccio"})
            ...     count1 = await acol.count_documents({}, upper_bound=10)
            ...     print("count1", count1)
            ...
            >>> asyncio.run(write_and_count(my_async_coll))
            count0 0
            count1 2

            >>> asyncio.run(my_async_coll.insert_one({"tag": v", "$vector": [10, 11]}))
            CollectionInsertOneResult(...)

        Note:
            If an `_id` is explicitly provided, which corresponds to a document
            that exists already in the collection, an error is raised and
            the insertion fails.
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        io_payload = {"insertOne": {"document": document}}
        logger.info(f"insertOne on '{self.name}'")
        io_response = await self._converted_request(
            payload=io_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished insertOne on '{self.name}'")
        if "insertedIds" in io_response.get("status", {}):
            if io_response["status"]["insertedIds"]:
                inserted_id = io_response["status"]["insertedIds"][0]
                return CollectionInsertOneResult(
                    raw_results=[io_response],
                    inserted_id=inserted_id,
                )
            else:
                raise UnexpectedDataAPIResponseException(
                    text="Faulty response from insert_one API command.",
                    raw_response=io_response,
                )
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from insert_one API command.",
                raw_response=io_response,
            )

    async def insert_many(
        self,
        documents: Iterable[DOC],
        *,
        ordered: bool = False,
        chunk_size: int | None = None,
        concurrency: int | None = None,
        request_timeout_ms: int | None = None,
        general_method_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> CollectionInsertManyResult:
        """
        Insert a list of documents into the collection.
        This is not an atomic operation.

        Args:
            documents: an iterable of dictionaries, each a document to insert.
                Documents may specify their `_id` field or leave it out, in which
                case it will be added automatically.
            ordered: if False (default), the insertions can occur in arbitrary order
                and possibly concurrently. If True, they are processed sequentially.
                If there are no specific reasons against it, unordered insertions are to
                be preferred as they complete much faster.
            chunk_size: how many documents to include in a single API request.
                Exceeding the server maximum allowed value results in an error.
                Leave it unspecified (recommended) to use the system default.
            concurrency: maximum number of concurrent requests to the API at
                a given time. It cannot be more than one for ordered insertions.
            request_timeout_ms: a timeout, in milliseconds, for each API request.
                If not passed, the collection-level setting is used instead.
            general_method_timeout_ms: a timeout, in milliseconds, for the whole
                requested operation (which may involve multiple API requests).
                If not passed, the collection-level setting is used instead.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a CollectionInsertManyResult object.

        Examples:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> async def write_and_count(acol: AsyncCollection) -> None:
            ...             count0 = await acol.count_documents({}, upper_bound=10)
            ...             print("count0", count0)
            ...             im_result1 = await acol.insert_many(
            ...                 [
            ...                     {"a": 10},
            ...                     {"a": 5},
            ...                     {"b": [True, False, False]},
            ...                 ],
            ...                 ordered=True,
            ...             )
            ...             print("inserted1", im_result1.inserted_ids)
            ...             count1 = await acol.count_documents({}, upper_bound=100)
            ...             print("count1", count1)
            ...             await acol.insert_many(
            ...                 [{"seq": i} for i in range(50)],
            ...                 concurrency=5,
            ...             )
            ...             count2 = await acol.count_documents({}, upper_bound=100)
            ...             print("count2", count2)
            ...
            >>> asyncio.run(write_and_count(my_async_coll))
            count0 0
            inserted1 ['e3c2a684-...', '1de4949f-...', '167dacc3-...']
            count1 3
            count2 53
            >>> asyncio.run(my_async_coll.insert_many(
            ...     [
            ...         {"tag": "a", "$vector": [1, 2]},
            ...         {"tag": "b", "$vector": [3, 4]},
            ...     ]
            ... ))
            CollectionInsertManyResult(...)

        Note:
            Unordered insertions are executed with some degree of concurrency,
            so it is usually better to prefer this mode unless the order in the
            document sequence is important.

        Note:
            A failure mode for this command is related to certain faulty documents
            found among those to insert: for example, a document may have an ID
            already found on the collection, or its vector dimension may not
            match the collection setting.

            For an ordered insertion, the method will raise an exception at
            the first such faulty document -- nevertheless, all documents processed
            until then will end up being written to the database.

            For unordered insertions, if the error stems from faulty documents
            the insertion proceeds until exhausting the input documents: then,
            an exception is raised -- and all insertable documents will have been
            written to the database, including those "after" the troublesome ones.

            Errors occurring during an insert_many operation, for that reason,
            may result in a `CollectionInsertManyException` being raised.
            This exception allows to inspect the list of document IDs that were
            successfully inserted, while accessing at the same time the underlying
            "root errors" that made the full method call to fail.
        """

        _general_method_timeout_ms, _gmt_label = _first_valid_timeout(
            (general_method_timeout_ms, "general_method_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (
                self.api_options.timeout_options.general_method_timeout_ms,
                "general_method_timeout_ms",
            ),
        )
        _request_timeout_ms, _rt_label = _first_valid_timeout(
            (request_timeout_ms, "request_timeout_ms"),
            (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
        )
        if concurrency is None:
            if ordered:
                _concurrency = 1
            else:
                _concurrency = DEFAULT_INSERT_MANY_CONCURRENCY
        else:
            _concurrency = concurrency
        if _concurrency > 1 and ordered:
            raise ValueError("Cannot run ordered insert_many concurrently.")
        if chunk_size is None:
            _chunk_size = DEFAULT_INSERT_MANY_CHUNK_SIZE
        else:
            _chunk_size = chunk_size
        _documents = list(documents)
        logger.info(f"inserting {len(_documents)} documents in '{self.name}'")
        raw_results: list[dict[str, Any]] = []
        im_payloads: list[dict[str, Any]] = []
        timeout_manager = MultiCallTimeoutManager(
            overall_timeout_ms=_general_method_timeout_ms,
            timeout_label=_gmt_label,
        )
        if ordered:
            options = {"ordered": True, "returnDocumentResponses": True}
            inserted_ids: list[Any] = []
            for i in range(0, len(_documents), _chunk_size):
                im_payload = {
                    "insertMany": {
                        "documents": _documents[i : i + _chunk_size],
                        "options": options,
                    },
                }
                logger.info(f"insertMany(chunk) on '{self.name}'")
                chunk_response = await self._converted_request(
                    payload=im_payload,
                    raise_api_errors=False,
                    timeout_context=timeout_manager.remaining_timeout(
                        cap_time_ms=_request_timeout_ms,
                        cap_timeout_label=_rt_label,
                    ),
                )
                logger.info(f"finished insertMany(chunk) on '{self.name}'")
                # accumulate the results in this call
                chunk_inserted_ids = [
                    doc_resp["_id"]
                    for doc_resp in (chunk_response.get("status") or {}).get(
                        "documentResponses", []
                    )
                    if doc_resp["status"] == "OK"
                ]
                inserted_ids += chunk_inserted_ids
                raw_results += [chunk_response]
                im_payloads += [im_payload]
                # if errors, quit early
                if chunk_response.get("errors", []):
                    response_exception = DataAPIResponseException.from_response(
                        command=im_payload,
                        raw_response=chunk_response,
                    )
                    raise CollectionInsertManyException(
                        inserted_ids=inserted_ids, exceptions=[response_exception]
                    )

            # return
            full_result = CollectionInsertManyResult(
                raw_results=raw_results,
                inserted_ids=inserted_ids,
            )
            logger.info(
                f"finished inserting {len(_documents)} documents in '{self.name}'"
            )
            return full_result

        else:
            # unordered: concurrent or not, do all of them and parse the results
            options = {"ordered": False, "returnDocumentResponses": True}

            sem = asyncio.Semaphore(_concurrency)

            async def concurrent_insert_chunk(
                document_chunk: list[DOC],
            ) -> tuple[dict[str, Any], dict[str, Any]]:
                async with sem:
                    im_payload = {
                        "insertMany": {
                            "documents": document_chunk,
                            "options": options,
                        },
                    }
                    logger.info(f"insertMany(chunk) on '{self.name}'")
                    im_response = await self._converted_request(
                        payload=im_payload,
                        raise_api_errors=False,
                        timeout_context=timeout_manager.remaining_timeout(
                            cap_time_ms=_request_timeout_ms,
                            cap_timeout_label=_rt_label,
                        ),
                    )
                    logger.info(f"finished insertMany(chunk) on '{self.name}'")
                    return im_payload, im_response

            raw_pl_results_pairs: list[tuple[dict[str, Any], dict[str, Any]]]
            if _concurrency > 1:
                tasks = [
                    asyncio.create_task(
                        concurrent_insert_chunk(_documents[i : i + _chunk_size])
                    )
                    for i in range(0, len(_documents), _chunk_size)
                ]
                raw_pl_results_pairs = await asyncio.gather(*tasks)
            else:
                raw_pl_results_pairs = [
                    await concurrent_insert_chunk(_documents[i : i + _chunk_size])
                    for i in range(0, len(_documents), _chunk_size)
                ]

            if raw_pl_results_pairs:
                im_payloads, raw_results = list(zip(*raw_pl_results_pairs))
            else:
                im_payloads, raw_results = [], []

            # recast raw_results
            inserted_ids = [
                doc_resp["_id"]
                for chunk_response in raw_results
                for doc_resp in (chunk_response.get("status") or {}).get(
                    "documentResponses", []
                )
                if doc_resp["status"] == "OK"
            ]

            # check-raise
            response_exceptions = [
                DataAPIResponseException.from_response(
                    command=chunk_payload,
                    raw_response=chunk_response,
                )
                for chunk_payload, chunk_response in zip(im_payloads, raw_results)
                if chunk_response.get("errors", [])
            ]
            if response_exceptions:
                raise CollectionInsertManyException(
                    inserted_ids=inserted_ids,
                    exceptions=response_exceptions,
                )

            # return
            full_result = CollectionInsertManyResult(
                raw_results=raw_results,
                inserted_ids=inserted_ids,
            )
            logger.info(
                f"finished inserting {len(_documents)} documents in '{self.name}'"
            )
            return full_result

    @overload
    def find(
        self,
        filter: FilterType | None = None,
        *,
        projection: ProjectionType | None = None,
        document_type: None = None,
        skip: int | None = None,
        limit: int | None = None,
        include_similarity: bool | None = None,
        include_sort_vector: bool | None = None,
        sort: SortType | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> AsyncCollectionFindCursor[DOC, DOC]: ...

    @overload
    def find(
        self,
        filter: FilterType | None = None,
        *,
        projection: ProjectionType | None = None,
        document_type: type[DOC2],
        skip: int | None = None,
        limit: int | None = None,
        include_similarity: bool | None = None,
        include_sort_vector: bool | None = None,
        sort: SortType | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> AsyncCollectionFindCursor[DOC, DOC2]: ...

    def find(
        self,
        filter: FilterType | None = None,
        *,
        projection: ProjectionType | None = None,
        document_type: type[DOC2] | None = None,
        skip: int | None = None,
        limit: int | None = None,
        include_similarity: bool | None = None,
        include_sort_vector: bool | None = None,
        sort: SortType | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> AsyncCollectionFindCursor[DOC, DOC2]:
        """
        Find documents on the collection, matching a certain provided filter.

        The method returns a cursor that can then be iterated over. Depending
        on the method call pattern, the iteration over all documents can reflect
        collection mutations occurred since the `find` method was called, or not.
        In cases where the cursor reflects mutations in real-time, it will iterate
        over cursors in an approximate way (i.e. exhibiting occasional skipped
        or duplicate documents). This happens when making use of the `sort`
        option in a non-vector-search manner.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            projection: it controls which parts of the document are returned.
                It can be an allow-list: `{"f1": True, "f2": True}`,
                or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
                (except for the `_id` and other special fields, which can be
                associated to both True or False independently of the rest
                of the specification).
                The special star-projections `{"*": True}` and `{"*": False}`
                have the effect of returning the whole document and `{}` respectively.
                For lists in documents, slice directives can be passed to select
                portions of the list: for instance, `{"array": {"$slice": 2}}`,
                `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
                `{"array": {"$slice": [-4, 2]}}`.
                An iterable over strings will be treated implicitly as an allow-list.
                The default projection (used if this parameter is not passed) does not
                necessarily include "special" fields such as `$vector` or `$vectorize`.
                See the Data API documentation for more on projections.
            document_type: this parameter acts a formal specifier for the type checker.
                If omitted, the resulting cursor is implicitly an
                `AsyncCollectionFindCursor[DOC, DOC]`, i.e. maintains the same type for
                the items it returns as that for the documents in the collection.
                Strictly typed code may want to specify this parameter especially when
                a projection is given.
            skip: with this integer parameter, what would be the first `skip`
                documents returned by the query are discarded, and the results
                start from the (skip+1)-th document.
                This parameter can be used only in conjunction with an explicit
                `sort` criterion of the ascending/descending type (i.e. it cannot
                be used when not sorting, nor with vector-based ANN search).
            limit: this (integer) parameter sets a limit over how many documents
                are returned. Once `limit` is reached (or the cursor is exhausted
                for lack of matching documents), nothing more is returned.
            include_similarity: a boolean to request the numeric value of the
                similarity to be returned as an added "$similarity" key in each
                returned document. It can be used meaningfully only in a vector
                search (see `sort`).
            include_sort_vector: a boolean to request the search query vector.
                If set to True (and if the invocation is a vector search), calling
                the `get_sort_vector` method on the returned cursor will yield
                the vector used for the ANN search.
            sort: with this dictionary parameter one can control the order
                the documents are returned. See the Note about sorting, as well as
                the one about upper bounds, for details.
                Vector-based ANN sorting is achieved by providing a "$vector"
                or a "$vectorize" key in `sort`.
            request_timeout_ms: a timeout, in milliseconds, for each single one
                of the underlying HTTP requests used to fetch documents as the
                cursor is iterated over.
                If not passed, the collection-level setting is used instead.
            timeout_ms: an alias for `request_timeout_ms`.

        Returns:
            a AsyncCollectionFindCursor object, that can be iterated over (and
            manipulated in several ways). The cursor, if needed, handles pagination
            under the hood as the documents are consumed.

        Examples:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> async def run_finds(acol: AsyncCollection) -> None:
            ...             filter = {"seq": {"$exists": True}}
            ...             print("find results 1:")
            ...             async for doc in acol.find(filter, projection={"seq": True}, limit=5):
            ...                 print(doc["seq"])
            ...             async_cursor1 = acol.find(
            ...                 {},
            ...                 limit=4,
            ...                 sort={"seq": astrapy.constants.SortMode.DESCENDING},
            ...             )
            ...             ids = [doc["_id"] async for doc in async_cursor1]
            ...             print("find results 2:", ids)
            ...
            >>> asyncio.run(run_finds(my_async_coll))
            find results 1:
            48
            35
            7
            11
            13
            find results 2: ['d656cd9d-...', '479c7ce8-...', '96dc87fd-...', '83f0a21f-...']

            >>> async def run_vector_finds(acol: AsyncCollection) -> None:
            ...     await acol.insert_many([
            ...         {"tag": "A", "$vector": [4, 5]},
            ...         {"tag": "B", "$vector": [3, 4]},
            ...         {"tag": "C", "$vector": [3, 2]},
            ...         {"tag": "D", "$vector": [4, 1]},
            ...         {"tag": "E", "$vector": [2, 5]},
            ...     ])
            ...     ann_tags = [
            ...         document["tag"]
            ...         async for document in acol.find(
            ...             {},
            ...             sort={"$vector": [3, 3]},
            ...             limit=3,
            ...         )
            ...     ]
            ...     return ann_tags
            ...
            >>> asyncio.run(run_vector_finds(my_async_coll))
            ['A', 'B', 'C']
            >>> # (assuming the collection has metric VectorMetric.COSINE)

            >>> async_cursor = my_async_coll.find(
            ...     sort={"$vector": [3, 3]},
            ...     limit=3,
            ...     include_sort_vector=True,
            ... )
            >>> asyncio.run(async_cursor.get_sort_vector())
            [3.0, 3.0]
            >>> asyncio.run(async_cursor.__anext__())
            {'_id': 'b13ce177-738e-47ec-bce1-77738ee7ec93', 'tag': 'A'}
            >>> asyncio.run(async_cursor.get_sort_vector())
            [3.0, 3.0]

        Note:
            The following are example values for the `sort` parameter.
            When no particular order is required:
                sort={}
            When sorting by a certain value in ascending/descending order:
                sort={"field": SortMode.ASCENDING}
                sort={"field": SortMode.DESCENDING}
            When sorting first by "field" and then by "subfield"
            (while modern Python versions preserve the order of dictionaries,
            it is suggested for clarity to employ a `collections.OrderedDict`
            in these cases):
                sort={
                    "field": SortMode.ASCENDING,
                    "subfield": SortMode.ASCENDING,
                }
            When running a vector similarity (ANN) search:
                sort={"$vector": [0.4, 0.15, -0.5]}

        Note:
            Some combinations of arguments impose an implicit upper bound on the
            number of documents that are returned by the Data API. More specifically:
            (a) Vector ANN searches cannot return more than a number of documents
            that at the time of writing is set to 1000 items.
            (b) When using a sort criterion of the ascending/descending type,
            the Data API will return a smaller number of documents, set to 20
            at the time of writing, and stop there. The returned documents are
            the top results across the whole collection according to the requested
            criterion.

        Note:
            When not specifying sorting criteria at all (by vector or otherwise),
            the cursor can scroll through an arbitrary number of documents as
            the Data API and the client periodically exchange new chunks of documents.
            It should be noted that the behavior of the cursor in the case documents
            have been added/removed after the `find` was started depends on database
            internals and it is not guaranteed, nor excluded, that such "real-time"
            changes in the data would be picked up by the cursor.
        """

        # lazy-import here to avoid circular import issues
        from astrapy.cursors import AsyncCollectionFindCursor

        _request_timeout_ms, _rt_label = _first_valid_timeout(
            (request_timeout_ms, "request_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
        )
        return (
            AsyncCollectionFindCursor(
                collection=self,
                request_timeout_ms=_request_timeout_ms,
                overall_timeout_ms=None,
                request_timeout_label=_rt_label,
            )
            .filter(filter)
            .project(projection)
            .skip(skip)
            .limit(limit)
            .sort(sort)
            .include_similarity(include_similarity)
            .include_sort_vector(include_sort_vector)
        )

    async def find_one(
        self,
        filter: FilterType | None = None,
        *,
        projection: ProjectionType | None = None,
        include_similarity: bool | None = None,
        sort: SortType | None = None,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> DOC | None:
        """
        Run a search, returning the first document in the collection that matches
        provided filters, if any is found.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            projection: it controls which parts of the document are returned.
                It can be an allow-list: `{"f1": True, "f2": True}`,
                or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
                (except for the `_id` and other special fields, which can be
                associated to both True or False independently of the rest
                of the specification).
                The special star-projections `{"*": True}` and `{"*": False}`
                have the effect of returning the whole document and `{}` respectively.
                For lists in documents, slice directives can be passed to select
                portions of the list: for instance, `{"array": {"$slice": 2}}`,
                `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
                `{"array": {"$slice": [-4, 2]}}`.
                An iterable over strings will be treated implicitly as an allow-list.
                The default projection (used if this parameter is not passed) does not
                necessarily include "special" fields such as `$vector` or `$vectorize`.
                See the Data API documentation for more on projections.
            include_similarity: a boolean to request the numeric value of the
                similarity to be returned as an added "$similarity" key in the
                returned document. It can be used meaningfully only in a vector
                search (see `sort`).
            sort: with this dictionary parameter one can control the order
                the documents are returned. See the Note about sorting for details.
                Vector-based ANN sorting is achieved by providing a "$vector"
                or a "$vectorize" key in `sort`.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a dictionary expressing the required document, otherwise None.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> async def demo_find_one(acol: AsyncCollection) -> None:
            ....    print("Count:", await acol.count_documents({}, upper_bound=100))
            ...     result0 = await acol.find_one({})
            ...     print("result0", result0)
            ...     result1 = await acol.find_one({"seq": 10})
            ...     print("result1", result1)
            ...     result2 = await acol.find_one({"seq": 1011})
            ...     print("result2", result2)
            ...     result3 = await acol.find_one({}, projection={"seq": False})
            ...     print("result3", result3)
            ...     result4 = await acol.find_one(
            ...         {},
            ...         sort={"seq": astrapy.constants.SortMode.DESCENDING},
            ...     )
            ...     print("result4", result4)
            ...
            >>>
            >>> asyncio.run(demo_find_one(my_async_coll))
            Count: 50
            result0 {'_id': '479c7ce8-...', 'seq': 48}
            result1 {'_id': '93e992c4-...', 'seq': 10}
            result2 None
            result3 {'_id': '479c7ce8-...'}
            result4 {'_id': 'd656cd9d-...', 'seq': 49}

            >>> asyncio.run(my_async_coll.find_one(
            ...     {},
            ...     sort={"$vector": [1, 0]},
            ...     projection={"*": True},
            ... ))
            {'_id': '...', 'tag': 'D', '$vector': [4.0, 1.0]}

        Note:
            See the `find` method for more details on the accepted parameters
            (whereas `skip` and `limit` are not valid parameters for `find_one`).
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        fo_options = (
            None
            if include_similarity is None
            else {"includeSimilarity": include_similarity}
        )
        fo_payload = {
            "findOne": {
                k: v
                for k, v in {
                    "filter": filter,
                    "projection": normalize_optional_projection(projection),
                    "options": fo_options,
                    "sort": sort,
                }.items()
                if v is not None
            }
        }
        fo_response = await self._converted_request(
            payload=fo_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        if "document" not in (fo_response.get("data") or {}):
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from findOne API command.",
                raw_response=fo_response,
            )
        doc_response = fo_response["data"]["document"]
        if doc_response is None:
            return None
        return fo_response["data"]["document"]  # type: ignore[no-any-return]

    async def distinct(
        self,
        key: str | Iterable[str | int],
        *,
        filter: FilterType | None = None,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> list[Any]:
        """
        Return a list of the unique values of `key` across the documents
        in the collection that match the provided filter.

        Args:
            key: the name of the field whose value is inspected across documents.
                Keys can be just field names (as is often the case), but
                the dot-notation is also accepted to mean subkeys or indices
                within lists (for example, "map_field.subkey" or "list_field.2").
                If a field has literal dots or ampersands in its name, this
                parameter must be escaped to be treated properly.
                The key can also be a list of strings and numbers, in which case
                no escape is necessary: each item in the list is a field name/index,
                for example ["map_field", "subkey"] or ["list_field", 2].
                If lists are encountered and no numeric index is specified,
                all items in the list are visited.
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            general_method_timeout_ms: a timeout, in milliseconds, for the whole
                requested operation (which may involve multiple API requests).
                This method, being based on `find` (see) may entail successive HTTP API
                requests, depending on the amount of involved documents.
            request_timeout_ms: a timeout, in milliseconds, for each API request.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a list of all different values for `key` found across the documents
            that match the filter. The result list has no repeated items.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> async def run_distinct(acol: AsyncCollection) -> None:
            ...     await acol.insert_many(
            ...         [
            ...             {"name": "Marco", "food": ["apple", "orange"], "city": "Helsinki"},
            ...             {"name": "Emma", "food": {"likes_fruit": True, "allergies": []}},
            ...         ]
            ...     )
            ...     distinct0 = await acol.distinct("name")
            ...     print("distinct('name')", distinct0)
            ...     distinct1 = await acol.distinct("city")
            ...     print("distinct('city')", distinct1)
            ...     distinct2 = await acol.distinct("food")
            ...     print("distinct('food')", distinct2)
            ...     distinct3 = await acol.distinct("food.1")
            ...     print("distinct('food.1')", distinct3)
            ...     distinct4 = await acol.distinct("food.allergies")
            ...     print("distinct('food.allergies')", distinct4)
            ...     distinct5 = await acol.distinct("food.likes_fruit")
            ...     print("distinct('food.likes_fruit')", distinct5)
            ...
            >>> asyncio.run(run_distinct(my_async_coll))
            distinct('name') ['Emma', 'Marco']
            distinct('city') ['Helsinki']
            distinct('food') [{'likes_fruit': True, 'allergies': []}, 'apple', 'orange']
            distinct('food.1') ['orange']
            distinct('food.allergies') []
            distinct('food.likes_fruit') [True]

        Note:
            It must be kept in mind that `distinct` is a client-side operation,
            which effectively browses all required documents using the logic
            of the `find` method and collects the unique values found for `key`.
            As such, there may be performance, latency and ultimately
            billing implications if the amount of matching documents is large.

        Note:
            For details on the behaviour of "distinct" in conjunction with
            real-time changes in the collection contents, see the
            Note of the `find` command.
        """

        # lazy-import here to avoid circular import issues
        from astrapy.cursors import AsyncCollectionFindCursor

        _general_method_timeout_ms, _gmt_label = _first_valid_timeout(
            (general_method_timeout_ms, "general_method_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (
                self.api_options.timeout_options.general_method_timeout_ms,
                "general_method_timeout_ms",
            ),
        )
        _request_timeout_ms, _rt_label = _first_valid_timeout(
            (request_timeout_ms, "request_timeout_ms"),
            (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
        )
        # preparing cursor:
        _extractor = _create_document_key_extractor(key)
        _key = _reduce_distinct_key_to_safe(key)
        # relaxing the type hint (limited to within this method body)
        f_cursor: AsyncCollectionFindCursor[dict[str, Any], dict[str, Any]] = (
            AsyncCollectionFindCursor(
                collection=self,
                request_timeout_ms=_request_timeout_ms,
                overall_timeout_ms=_general_method_timeout_ms,
                request_timeout_label=_rt_label,
                overall_timeout_label=_gmt_label,
            )  # type: ignore[assignment]
            .filter(filter)
            .project({_key: True})
        )
        # consuming it:
        _item_hashes = set()
        distinct_items: list[Any] = []
        logger.info(f"running distinct() on '{self.name}'")
        async for document in f_cursor:
            for item in _extractor(document):
                _item_hash = _hash_collection_document(
                    item, options=self.api_options.serdes_options
                )
                if _item_hash not in _item_hashes:
                    _item_hashes.add(_item_hash)
                    distinct_items.append(item)
        logger.info(f"finished running distinct() on '{self.name}'")
        return distinct_items

    @overload
    def find_and_rerank(
        self,
        filter: FilterType | None = None,
        *,
        sort: HybridSortType,
        projection: ProjectionType | None = None,
        document_type: None = None,
        limit: int | None = None,
        hybrid_limits: int | dict[str, int] | None = None,
        include_scores: bool | None = None,
        include_sort_vector: bool | None = None,
        rerank_on: str | None = None,
        rerank_query: str | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> AsyncCollectionFindAndRerankCursor[DOC, RerankedResult[DOC]]: ...

    @overload
    def find_and_rerank(
        self,
        filter: FilterType | None = None,
        *,
        sort: HybridSortType,
        projection: ProjectionType | None = None,
        document_type: type[DOC2],
        limit: int | None = None,
        hybrid_limits: int | dict[str, int] | None = None,
        include_scores: bool | None = None,
        include_sort_vector: bool | None = None,
        rerank_on: str | None = None,
        rerank_query: str | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> AsyncCollectionFindAndRerankCursor[DOC, RerankedResult[DOC2]]: ...

    @beta_method
    def find_and_rerank(
        self,
        filter: FilterType | None = None,
        *,
        sort: HybridSortType,
        projection: ProjectionType | None = None,
        document_type: type[DOC2] | None = None,
        limit: int | None = None,
        hybrid_limits: int | dict[str, int] | None = None,
        include_scores: bool | None = None,
        include_sort_vector: bool | None = None,
        rerank_on: str | None = None,
        rerank_query: str | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> AsyncCollectionFindAndRerankCursor[DOC, RerankedResult[DOC2]]:
        """
        Find relevant documents, combining vector and lexical matches through reranking.

        For this method to succeed, the collection must be created with the required
        hybrid capabilities (see the `create_collection` method of the Database class).

        The method returns a cursor that can then be iterated over, which yields
        the resulting documents, generally paired with accompanying information
        such as scores.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            sort: a clause specifying the criteria for selecting the top matching
                documents. This must provide enough information for both a lexical
                and a vector similarity to be performed (the latter either query text
                or by query vector, depending on the collection configuration).
                Examples are: `sort={"$hybrid": "xyz"}`,
                `sort={"$hybrid": {"$vectorize": "xyz", "$lexical": "abc"}}`,
                `sort={"$hybrid": {"$vector": DataAPIVector(...), "$lexical": "abc"}}`.
                Note this differs from the `sort` parameter for the `find` method.
            projection: it controls which parts of the document are returned.
                It can be an allow-list: `{"f1": True, "f2": True}`,
                or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
                (except for the `_id` and other special fields, which can be
                associated to both True or False independently of the rest
                of the specification).
                The special star-projections `{"*": True}` and `{"*": False}`
                have the effect of returning the whole document and `{}` respectively.
                For lists in documents, slice directives can be passed to select
                portions of the list: for instance, `{"array": {"$slice": 2}}`,
                `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
                `{"array": {"$slice": [-4, 2]}}`.
                An iterable over strings will be treated implicitly as an allow-list.
                The default projection (used if this parameter is not passed) does not
                necessarily include "special" fields such as `$vector` or `$vectorize`.
                See the Data API documentation for more on projections.
            document_type: this parameter acts a formal specifier for the type checker.
                If omitted, the resulting cursor is implicitly a
                `AsyncCollectionFindAndRerankCursor[DOC, DOC]`, i.e. maintains the same
                type for the items it returns as that for the documents in the
                collection. Strictly typed code may want to specify this parameter
                especially when a projection is given.
            limit: maximum number of documents to return as the result of the final
                rerank step.
            hybrid_limits: this controls the amount of documents that are fetched by
                each of the individual retrieval operations that are combined in the
                rerank step. It can be either a number or a dictionary of strings to
                numbers, the latter case expressing different counts for the different
                retrievals. For example: `hybrid_limits=50`,
                `hybrid_limits={"$vector": 20, "$lexical": 10}`.
            include_scores: a boolean to request the scores to be returned along with
                the resulting documents. If this is set, the scores can be read in the
                the map `scores` attribute of each RerankedResult (the map is
                otherwise empty).
            include_sort_vector: a boolean to request the search query vector
                used for the vector-search part of the find operation.
                If set to True, calling the `get_sort_vector` method on the returned
                cursor will yield the vector used for the ANN search.
            rerank_on: for collections without a vectorize (server-side embeddings)
                service, this is used to specify the field name that is then used
                during reranking.
            rerank_query: for collections without a vectorize (server-side embeddings)
                service, this is used to specify the query text for the reranker.
            request_timeout_ms: a timeout, in milliseconds, for each single one
                of the underlying HTTP requests used to fetch documents as the
                cursor is iterated over.
                If not passed, the collection-level setting is used instead.
            timeout_ms: an alias for `request_timeout_ms`.

        Returns:
            an AsyncCollectionFindAndRerankCursor object, that can be iterated over
            (and manipulated in several ways).

        Examples:
            >>> # NOTE: may require slight adaptation to an async context.
            >>> #       See the same method on Collection for more usage patterns.
            >>>
            >>> async def run_find_and_reranks(acol: AsyncCollection) -> None:
            ...     print("find results 1:")
            ...     async for r_res in acol.find_and_rerank(
            ...         sort={"$hybrid": "query text"},
            ...         limit=3,
            ...     ):
            ...         print(r_res.document["wkd"])
            ...     async_cursor1 = acol.find_and_rerank(
            ...         {"wkd": {"$ne": "Mon"}},
            ...         sort={"$hybrid": "query text"},
            ...         limit=3,
            ...     )
            ...     ids = [r_res.document["_id"] async for r_res in async_cursor1]
            ...     print("find results 2:", ids)
            ...
            >>> asyncio.run(run_find_and_reranks(my_async_coll))
            find results 1:
            Mon
            Thu
            Sat
            find results 2: ['D', 'F', 'B']
        """

        # lazy-import here to avoid circular import issues
        from astrapy.cursors import AsyncCollectionFindAndRerankCursor

        _request_timeout_ms, _rt_label = _first_valid_timeout(
            (request_timeout_ms, "request_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
        )
        return (
            AsyncCollectionFindAndRerankCursor(
                collection=self,
                request_timeout_ms=_request_timeout_ms,
                overall_timeout_ms=None,
                request_timeout_label=_rt_label,
            )
            .filter(filter)
            .project(projection)
            .limit(limit)
            .sort(sort)
            .hybrid_limits(hybrid_limits)
            .rerank_on(rerank_on)
            .rerank_query(rerank_query)
            .include_scores(include_scores)
            .include_sort_vector(include_sort_vector)
        )

    async def count_documents(
        self,
        filter: FilterType,
        *,
        upper_bound: int,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> int:
        """
        Count the documents in the collection matching the specified filter.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            upper_bound: a required ceiling on the result of the count operation.
                If the actual number of documents exceeds this value,
                an exception will be raised.
                Furthermore, if the actual number of documents exceeds the maximum
                count that the Data API can reach (regardless of upper_bound),
                an exception will be raised.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            the exact count of matching documents.

        Example:
            >>> async def do_count_docs(acol: AsyncCollection) -> None:
            ...     await acol.insert_many([{"seq": i} for i in range(20)])
            ...     count0 = await acol.count_documents({}, upper_bound=100)
            ...     print("count0", count0)
            ...     count1 = await acol.count_documents(
            ...         {"seq":{"$gt": 15}}, upper_bound=100
            ...     )
            ...     print("count1", count1)
            ...     count2 = await acol.count_documents({}, upper_bound=10)
            ...     print("count2", count2)
            ...
            >>> asyncio.run(do_count_docs(my_async_coll))
            count0 20
            count1 4
            Traceback (most recent call last):
                ... ...
            astrapy.exceptions.TooManyDocumentsToCountException

        Note:
            Count operations are expensive: for this reason, the best practice
            is to provide a reasonable `upper_bound` according to the caller
            expectations. Moreover, indiscriminate usage of count operations
            for sizeable amounts of documents (i.e. in the thousands and more)
            is discouraged in favor of alternative application-specific solutions.
            Keep in mind that the Data API has a hard upper limit on the amount
            of documents it will count, and that an exception will be thrown
            by this method if this limit is encountered.
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        cd_payload = {"countDocuments": {"filter": filter}}
        logger.info(f"countDocuments on '{self.name}'")
        cd_response = await self._converted_request(
            payload=cd_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished countDocuments on '{self.name}'")
        if "count" in cd_response.get("status", {}):
            count: int = cd_response["status"]["count"]
            if cd_response["status"].get("moreData", False):
                raise TooManyDocumentsToCountException(
                    text=f"Document count exceeds {count}, the maximum allowed by the server",
                    server_max_count_exceeded=True,
                )
            else:
                if count > upper_bound:
                    raise TooManyDocumentsToCountException(
                        text="Document count exceeds required upper bound",
                        server_max_count_exceeded=False,
                    )
                else:
                    return count
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from countDocuments API command.",
                raw_response=cd_response,
            )

    async def estimated_document_count(
        self,
        *,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> int:
        """
        Query the API server for an estimate of the document count in the collection.

        Contrary to `count_documents`, this method has no filtering parameters.

        Args:
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a server-provided estimate count of the documents in the collection.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> asyncio.run(my_async_coll.estimated_document_count())
            35700
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        ed_payload: dict[str, Any] = {"estimatedDocumentCount": {}}
        logger.info(f"estimatedDocumentCount on '{self.name}'")
        ed_response = await self._converted_request(
            payload=ed_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished estimatedDocumentCount on '{self.name}'")
        if "count" in ed_response.get("status", {}):
            count: int = ed_response["status"]["count"]
            return count
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from estimatedDocumentCount API command.",
                raw_response=ed_response,
            )

    async def find_one_and_replace(
        self,
        filter: FilterType,
        replacement: DOC,
        *,
        projection: ProjectionType | None = None,
        sort: SortType | None = None,
        upsert: bool = False,
        return_document: str = ReturnDocument.BEFORE,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> DOC | None:
        """
        Find a document on the collection and replace it entirely with a new one,
        optionally inserting a new one if no match is found.

        Args:

            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            replacement: the new document to write into the collection.
            projection: it controls which parts of the document are returned.
                It can be an allow-list: `{"f1": True, "f2": True}`,
                or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
                (except for the `_id` and other special fields, which can be
                associated to both True or False independently of the rest
                of the specification).
                The special star-projections `{"*": True}` and `{"*": False}`
                have the effect of returning the whole document and `{}` respectively.
                For lists in documents, slice directives can be passed to select
                portions of the list: for instance, `{"array": {"$slice": 2}}`,
                `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
                `{"array": {"$slice": [-4, 2]}}`.
                An iterable over strings will be treated implicitly as an allow-list.
                The default projection (used if this parameter is not passed) does not
                necessarily include "special" fields such as `$vector` or `$vectorize`.
                See the Data API documentation for more on projections.
            sort: with this dictionary parameter one can control the sorting
                order of the documents matching the filter, effectively
                determining what document will come first and hence be the
                replaced one. See the `find` method for more on sorting.
                Vector-based ANN sorting is achieved by providing a "$vector"
                or a "$vectorize" key in `sort`.
            upsert: this parameter controls the behavior in absence of matches.
                If True, `replacement` is inserted as a new document
                if no matches are found on the collection. If False,
                the operation silently does nothing in case of no matches.
            return_document: a flag controlling what document is returned:
                if set to `ReturnDocument.BEFORE`, or the string "before",
                the document found on database is returned; if set to
                `ReturnDocument.AFTER`, or the string "after", the new
                document is returned. The default is "before".
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            A document, either the one before the replace operation or the
            one after that. Alternatively, the method returns None to represent
            that no matching document was found, or that no replacement
            was inserted (depending on the `return_document` parameter).

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> async def do_find_one_and_replace(
            ...     acol: AsyncCollection
            ... ) -> None:
            ...     await acol.insert_one(
            ...         {"_id": "rule1", "text": "all animals are equal"}
            ...     )
            ...     result0 = await acol.find_one_and_replace(
            ...         {"_id": "rule1"},
            ...         {"text": "some animals are more equal!"},
            ...     )
            ...     print("result0", result0)
            ...     result1 = await acol.find_one_and_replace(
            ...         {"text": "some animals are more equal!"},
            ...         {"text": "and the pigs are the rulers"},
            ...         return_document=astrapy.constants.ReturnDocument.AFTER,
            ...     )
            ...     print("result1", result1)
            ...     result2 = await acol.find_one_and_replace(
            ...         {"_id": "rule2"},
            ...         {"text": "F=ma^2"},
            ...         return_document=astrapy.constants.ReturnDocument.AFTER,
            ...     )
            ...     print("result2", result2)
            ...     result3 = await acol.find_one_and_replace(
            ...         {"_id": "rule2"},
            ...         {"text": "F=ma"},
            ...         upsert=True,
            ...         return_document=astrapy.constants.ReturnDocument.AFTER,
            ...         projection={"_id": False},
            ...     )
            ...     print("result3", result3)
            ...
            >>> asyncio.run(do_find_one_and_replace(my_async_coll))
            result0 {'_id': 'rule1', 'text': 'all animals are equal'}
            result1 {'_id': 'rule1', 'text': 'and the pigs are the rulers'}
            result2 None
            result3 {'text': 'F=ma'}
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        options = {
            "returnDocument": return_document,
            "upsert": upsert,
        }
        fo_payload = {
            "findOneAndReplace": {
                k: v
                for k, v in {
                    "filter": filter,
                    "projection": normalize_optional_projection(projection),
                    "replacement": replacement,
                    "options": options,
                    "sort": sort,
                }.items()
                if v is not None
            }
        }
        logger.info(f"findOneAndReplace on '{self.name}'")
        fo_response = await self._converted_request(
            payload=fo_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished findOneAndReplace on '{self.name}'")
        if "document" in fo_response.get("data", {}):
            ret_document = fo_response.get("data", {}).get("document")
            if ret_document is None:
                return None
            else:
                return ret_document  # type: ignore[no-any-return]
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from find_one_and_replace API command.",
                raw_response=fo_response,
            )

    async def replace_one(
        self,
        filter: FilterType,
        replacement: DOC,
        *,
        sort: SortType | None = None,
        upsert: bool = False,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> CollectionUpdateResult:
        """
        Replace a single document on the collection with a new one,
        optionally inserting a new one if no match is found.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            replacement: the new document to write into the collection.
            sort: with this dictionary parameter one can control the sorting
                order of the documents matching the filter, effectively
                determining what document will come first and hence be the
                replaced one. See the `find` method for more on sorting.
                Vector-based ANN sorting is achieved by providing a "$vector"
                or a "$vectorize" key in `sort`.
            upsert: this parameter controls the behavior in absence of matches.
                If True, `replacement` is inserted as a new document
                if no matches are found on the collection. If False,
                the operation silently does nothing in case of no matches.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a CollectionUpdateResult object summarizing the outcome of
            the replace operation.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> async def do_replace_one(acol: AsyncCollection) -> None:
            ...     await acol.insert_one({"Marco": "Polo"})
            ...     result0 = await acol.replace_one(
            ...         {"Marco": {"$exists": True}},
            ...         {"Buda": "Pest"},
            ...     )
            ...     print("result0.update_info", result0.update_info)
            ...     doc1 = await acol.find_one({"Buda": "Pest"})
            ...     print("doc1", doc1)
            ...     result1 = await acol.replace_one(
            ...         {"Mirco": {"$exists": True}},
            ...         {"Oh": "yeah?"},
            ...     )
            ...     print("result1.update_info", result1.update_info)
            ...     result2 = await acol.replace_one(
            ...         {"Mirco": {"$exists": True}},
            ...         {"Oh": "yeah?"},
            ...         upsert=True,
            ...     )
            ...     print("result2.update_info", result2.update_info)
            ...
            >>> asyncio.run(do_replace_one(my_async_coll))
            result0.update_info {'n': 1, 'updatedExisting': True, 'ok': 1.0, 'nModified': 1}
            doc1 {'_id': '6e669a5a-...', 'Buda': 'Pest'}
            result1.update_info {'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0}
            result2.update_info {'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '30e34e00-...'}
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        options = {
            "upsert": upsert,
        }
        fo_payload = {
            "findOneAndReplace": {
                k: v
                for k, v in {
                    "filter": filter,
                    "replacement": replacement,
                    "options": options,
                    "sort": sort,
                }.items()
                if v is not None
            }
        }
        logger.info(f"findOneAndReplace on '{self.name}'")
        fo_response = await self._converted_request(
            payload=fo_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished findOneAndReplace on '{self.name}'")
        if "document" in fo_response.get("data", {}):
            fo_status = fo_response.get("status") or {}
            _update_info = _prepare_update_info([fo_status])
            return CollectionUpdateResult(
                raw_results=[fo_response],
                update_info=_update_info,
            )
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from find_one_and_replace API command.",
                raw_response=fo_response,
            )

    async def find_one_and_update(
        self,
        filter: FilterType,
        update: dict[str, Any],
        *,
        projection: ProjectionType | None = None,
        sort: SortType | None = None,
        upsert: bool = False,
        return_document: str = ReturnDocument.BEFORE,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> DOC | None:
        """
        Find a document on the collection and update it as requested,
        optionally inserting a new one if no match is found.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            update: the update prescription to apply to the document, expressed
                as a dictionary as per Data API syntax. Examples are:
                    {"$set": {"field": "value}}
                    {"$inc": {"counter": 10}}
                    {"$unset": {"field": ""}}
                See the Data API documentation for the full syntax.
            projection: it controls which parts of the document are returned.
                It can be an allow-list: `{"f1": True, "f2": True}`,
                or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
                (except for the `_id` and other special fields, which can be
                associated to both True or False independently of the rest
                of the specification).
                The special star-projections `{"*": True}` and `{"*": False}`
                have the effect of returning the whole document and `{}` respectively.
                For lists in documents, slice directives can be passed to select
                portions of the list: for instance, `{"array": {"$slice": 2}}`,
                `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
                `{"array": {"$slice": [-4, 2]}}`.
                An iterable over strings will be treated implicitly as an allow-list.
                The default projection (used if this parameter is not passed) does not
                necessarily include "special" fields such as `$vector` or `$vectorize`.
                See the Data API documentation for more on projections.
            sort: with this dictionary parameter one can control the sorting
                order of the documents matching the filter, effectively
                determining what document will come first and hence be the
                replaced one. See the `find` method for more on sorting.
                Vector-based ANN sorting is achieved by providing a "$vector"
                or a "$vectorize" key in `sort`.
            upsert: this parameter controls the behavior in absence of matches.
                If True, a new document (resulting from applying the `update`
                to an empty document) is inserted if no matches are found on
                the collection. If False, the operation silently does nothing
                in case of no matches.
            return_document: a flag controlling what document is returned:
                if set to `ReturnDocument.BEFORE`, or the string "before",
                the document found on database is returned; if set to
                `ReturnDocument.AFTER`, or the string "after", the new
                document is returned. The default is "before".
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            A document (or a projection thereof, as required), either the one
            before the replace operation or the one after that.
            Alternatively, the method returns None to represent
            that no matching document was found, or that no update
            was applied (depending on the `return_document` parameter).

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> async def do_find_one_and_update(acol: AsyncCollection) -> None:
            ...     await acol.insert_one({"Marco": "Polo"})
            ...     result0 = await acol.find_one_and_update(
            ...         {"Marco": {"$exists": True}},
            ...         {"$set": {"title": "Mr."}},
            ...     )
            ...     print("result0", result0)
            ...     result1 = await acol.find_one_and_update(
            ...         {"title": "Mr."},
            ...         {"$inc": {"rank": 3}},
            ...         projection=["title", "rank"],
            ...         return_document=astrapy.constants.ReturnDocument.AFTER,
            ...     )
            ...     print("result1", result1)
            ...     result2 = await acol.find_one_and_update(
            ...         {"name": "Johnny"},
            ...         {"$set": {"rank": 0}},
            ...         return_document=astrapy.constants.ReturnDocument.AFTER,
            ...     )
            ...     print("result2", result2)
            ...     result3 = await acol.find_one_and_update(
            ...         {"name": "Johnny"},
            ...         {"$set": {"rank": 0}},
            ...         upsert=True,
            ...         return_document=astrapy.constants.ReturnDocument.AFTER,
            ...     )
            ...     print("result3", result3)
            ...
            >>> asyncio.run(do_find_one_and_update(my_async_coll))
            result0 {'_id': 'f7c936d3-b0a0-45eb-a676-e2829662a57c', 'Marco': 'Polo'}
            result1 {'_id': 'f7c936d3-b0a0-45eb-a676-e2829662a57c', 'title': 'Mr.', 'rank': 3}
            result2 None
            result3 {'_id': 'db3d678d-14d4-4caa-82d2-d5fb77dab7ec', 'name': 'Johnny', 'rank': 0}
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        options = {
            "returnDocument": return_document,
            "upsert": upsert,
        }
        fo_payload = {
            "findOneAndUpdate": {
                k: v
                for k, v in {
                    "filter": filter,
                    "update": update,
                    "options": options,
                    "sort": sort,
                    "projection": normalize_optional_projection(projection),
                }.items()
                if v is not None
            }
        }
        logger.info(f"findOneAndUpdate on '{self.name}'")
        fo_response = await self._converted_request(
            payload=fo_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished findOneAndUpdate on '{self.name}'")
        if "document" in fo_response.get("data", {}):
            ret_document = fo_response.get("data", {}).get("document")
            if ret_document is None:
                return None
            else:
                return ret_document  # type: ignore[no-any-return]
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from find_one_and_update API command.",
                raw_response=fo_response,
            )

    async def update_one(
        self,
        filter: FilterType,
        update: dict[str, Any],
        *,
        sort: SortType | None = None,
        upsert: bool = False,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> CollectionUpdateResult:
        """
        Update a single document on the collection as requested,
        optionally inserting a new one if no match is found.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            update: the update prescription to apply to the document, expressed
                as a dictionary as per Data API syntax. Examples are:
                    {"$set": {"field": "value}}
                    {"$inc": {"counter": 10}}
                    {"$unset": {"field": ""}}
                See the Data API documentation for the full syntax.
            sort: with this dictionary parameter one can control the sorting
                order of the documents matching the filter, effectively
                determining what document will come first and hence be the
                replaced one. See the `find` method for more on sorting.
                Vector-based ANN sorting is achieved by providing a "$vector"
                or a "$vectorize" key in `sort`.
            upsert: this parameter controls the behavior in absence of matches.
                If True, a new document (resulting from applying the `update`
                to an empty document) is inserted if no matches are found on
                the collection. If False, the operation silently does nothing
                in case of no matches.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a CollectionUpdateResult object summarizing the outcome of
            the update operation.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> async def do_update_one(acol: AsyncCollection) -> None:
            ...     await acol.insert_one({"Marco": "Polo"})
            ...     result0 = await acol.update_one(
            ...         {"Marco": {"$exists": True}},
            ...         {"$inc": {"rank": 3}},
            ...     )
            ...     print("result0.update_info", result0.update_info)
            ...     result1 = await acol.update_one(
            ...         {"Mirko": {"$exists": True}},
            ...         {"$inc": {"rank": 3}},
            ...     )
            ...     print("result1.update_info", result1.update_info)
            ...     result2 = await acol.update_one(
            ...         {"Mirko": {"$exists": True}},
            ...         {"$inc": {"rank": 3}},
            ...         upsert=True,
            ...     )
            ...     print("result2.update_info", result2.update_info)
            ...
            >>> asyncio.run(do_update_one(my_async_coll))
            result0.update_info {'n': 1, 'updatedExisting': True, 'ok': 1.0, 'nModified': 1})
            result1.update_info {'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0})
            result2.update_info {'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '75748092-...'}
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        options = {
            "upsert": upsert,
        }
        uo_payload = {
            "updateOne": {
                k: v
                for k, v in {
                    "filter": filter,
                    "update": update,
                    "options": options,
                    "sort": sort,
                }.items()
                if v is not None
            }
        }
        logger.info(f"updateOne on '{self.name}'")
        uo_response = await self._converted_request(
            payload=uo_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished updateOne on '{self.name}'")
        if "status" in uo_response:
            uo_status = uo_response["status"]
            _update_info = _prepare_update_info([uo_status])
            return CollectionUpdateResult(
                raw_results=[uo_response],
                update_info=_update_info,
            )
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from updateOne API command.",
                raw_response=uo_response,
            )

    async def update_many(
        self,
        filter: FilterType,
        update: dict[str, Any],
        *,
        upsert: bool = False,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> CollectionUpdateResult:
        """
        Apply an update operation to all documents matching a condition,
        optionally inserting one documents in absence of matches.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            update: the update prescription to apply to the documents, expressed
                as a dictionary as per Data API syntax. Examples are:
                    {"$set": {"field": "value}}
                    {"$inc": {"counter": 10}}
                    {"$unset": {"field": ""}}
                See the Data API documentation for the full syntax.
            upsert: this parameter controls the behavior in absence of matches.
                If True, a single new document (resulting from applying `update`
                to an empty document) is inserted if no matches are found on
                the collection. If False, the operation silently does nothing
                in case of no matches.
            general_method_timeout_ms: a timeout, in milliseconds, for the whole
                requested operation (which may involve multiple API requests).
                This method may entail successive HTTP API requests,
                depending on the amount of involved documents.
                If not passed, the collection-level setting is used instead.
            request_timeout_ms: a timeout, in milliseconds, for each API request.
                If not passed, the collection-level setting is used instead.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a CollectionUpdateResult object summarizing the outcome of
            the update operation.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> async def do_update_many(acol: AsyncCollection) -> None:
            ...     await acol.insert_many([{"c": "red"}, {"c": "green"}, {"c": "blue"}])
            ...     result0 = await acol.update_many(
            ...         {"c": {"$ne": "green"}},
            ...         {"$set": {"nongreen": True}},
            ...     )
            ...     print("result0.update_info", result0.update_info)
            ...     result1 = await acol.update_many(
            ...         {"c": "orange"},
            ...         {"$set": {"is_also_fruit": True}},
            ...     )
            ...     print("result1.update_info", result1.update_info)
            ...     result2 = await acol.update_many(
            ...         {"c": "orange"},
            ...         {"$set": {"is_also_fruit": True}},
            ...         upsert=True,
            ...     )
            ...     print("result2.update_info", result2.update_info)
            ...
            >>> asyncio.run(do_update_many(my_async_coll))
            result0.update_info {'n': 2, 'updatedExisting': True, 'ok': 1.0, 'nModified': 2}
            result1.update_info {'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0}
            result2.update_info {'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '79ffd5a3-ab99-4dff-a2a5-4aaa0e59e854'}

        Note:
            Similarly to the case of `find` (see its docstring for more details),
            running this command while, at the same time, another process is
            inserting new documents which match the filter of the `update_many`
            can result in an unpredictable fraction of these documents being updated.
            In other words, it cannot be easily predicted whether a given
            newly-inserted document will be picked up by the update_many command or not.
        """

        _general_method_timeout_ms, _gmt_label = _first_valid_timeout(
            (general_method_timeout_ms, "general_method_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (
                self.api_options.timeout_options.general_method_timeout_ms,
                "general_method_timeout_ms",
            ),
        )
        _request_timeout_ms, _rt_label = _first_valid_timeout(
            (request_timeout_ms, "request_timeout_ms"),
            (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
        )
        api_options = {
            "upsert": upsert,
        }
        page_state_options: dict[str, str] = {}
        um_responses: list[dict[str, Any]] = []
        um_statuses: list[dict[str, Any]] = []
        must_proceed = True
        logger.info(f"starting update_many on '{self.name}'")
        timeout_manager = MultiCallTimeoutManager(
            overall_timeout_ms=_general_method_timeout_ms,
            timeout_label=_gmt_label,
        )
        while must_proceed:
            options = {**api_options, **page_state_options}
            this_um_payload = {
                "updateMany": {
                    k: v
                    for k, v in {
                        "filter": filter,
                        "update": update,
                        "options": options,
                    }.items()
                    if v is not None
                }
            }
            logger.info(f"updateMany on '{self.name}'")
            this_um_response = await self._converted_request(
                payload=this_um_payload,
                raise_api_errors=False,
                timeout_context=timeout_manager.remaining_timeout(
                    cap_time_ms=_request_timeout_ms,
                    cap_timeout_label=_rt_label,
                ),
            )
            logger.info(f"finished updateMany on '{self.name}'")
            this_um_status = this_um_response.get("status") or {}
            #
            # if errors, quit early
            if this_um_response.get("errors", []):
                partial_update_info = _prepare_update_info(um_statuses)
                partial_result = CollectionUpdateResult(
                    raw_results=um_responses,
                    update_info=partial_update_info,
                )
                cause_exception = DataAPIResponseException.from_response(
                    command=this_um_payload,
                    raw_response=this_um_response,
                )
                raise CollectionUpdateManyException(
                    partial_result=partial_result,
                    cause=cause_exception,
                )
            else:
                if "status" not in this_um_response:
                    raise UnexpectedDataAPIResponseException(
                        text="Faulty response from update_many API command.",
                        raw_response=this_um_response,
                    )
                um_responses.append(this_um_response)
                um_statuses.append(this_um_status)
                next_page_state = this_um_status.get("nextPageState")
                if next_page_state is not None:
                    must_proceed = True
                    page_state_options = {"pageState": next_page_state}
                else:
                    must_proceed = False
                    page_state_options = {}

        update_info = _prepare_update_info(um_statuses)
        logger.info(f"finished update_many on '{self.name}'")
        return CollectionUpdateResult(
            raw_results=um_responses,
            update_info=update_info,
        )

    async def find_one_and_delete(
        self,
        filter: FilterType,
        *,
        projection: ProjectionType | None = None,
        sort: SortType | None = None,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> DOC | None:
        """
        Find a document in the collection and delete it. The deleted document,
        however, is the return value of the method.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            projection: it controls which parts of the document are returned.
                It can be an allow-list: `{"f1": True, "f2": True}`,
                or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
                (except for the `_id` and other special fields, which can be
                associated to both True or False independently of the rest
                of the specification).
                The special star-projections `{"*": True}` and `{"*": False}`
                have the effect of returning the whole document and `{}` respectively.
                For lists in documents, slice directives can be passed to select
                portions of the list: for instance, `{"array": {"$slice": 2}}`,
                `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
                `{"array": {"$slice": [-4, 2]}}`.
                An iterable over strings will be treated implicitly as an allow-list.
                The default projection (used if this parameter is not passed) does not
                necessarily include "special" fields such as `$vector` or `$vectorize`.
                See the Data API documentation for more on projections.
            sort: with this dictionary parameter one can control the sorting
                order of the documents matching the filter, effectively
                determining what document will come first and hence be the
                replaced one. See the `find` method for more on sorting.
                Vector-based ANN sorting is achieved by providing a "$vector"
                or a "$vectorize" key in `sort`.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            Either the document (or a projection thereof, as requested), or None
            if no matches were found in the first place.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> async def do_find_one_and_delete(acol: AsyncCollection) -> None:
            ...     await acol.insert_many(
            ...         [
            ...             {"species": "swan", "class": "Aves"},
            ...             {"species": "frog", "class": "Amphibia"},
            ...         ],
            ...     )
            ...     delete_result0 = await acol.find_one_and_delete(
            ...         {"species": {"$ne": "frog"}},
            ...         projection=["species"],
            ...     )
            ...     print("delete_result0", delete_result0)
            ...     delete_result1 = await acol.find_one_and_delete(
            ...         {"species": {"$ne": "frog"}},
            ...     )
            ...     print("delete_result1", delete_result1)
            ...
            >>> asyncio.run(do_find_one_and_delete(my_async_coll))
            delete_result0 {'_id': 'f335cd0f-...', 'species': 'swan'}
            delete_result1 None
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        _projection = normalize_optional_projection(projection)
        fo_payload = {
            "findOneAndDelete": {
                k: v
                for k, v in {
                    "filter": filter,
                    "sort": sort,
                    "projection": _projection,
                }.items()
                if v is not None
            }
        }
        logger.info(f"findOneAndDelete on '{self.name}'")
        fo_response = await self._converted_request(
            payload=fo_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished findOneAndDelete on '{self.name}'")
        if "document" in fo_response.get("data", {}):
            document = fo_response["data"]["document"]
            return document  # type: ignore[no-any-return]
        else:
            deleted_count = fo_response.get("status", {}).get("deletedCount")
            if deleted_count == 0:
                return None
            else:
                raise UnexpectedDataAPIResponseException(
                    text="Faulty response from find_one_and_delete API command.",
                    raw_response=fo_response,
                )

    async def delete_one(
        self,
        filter: FilterType,
        *,
        sort: SortType | None = None,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> CollectionDeleteResult:
        """
        Delete one document matching a provided filter.
        This method never deletes more than a single document, regardless
        of the number of matches to the provided filters.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            sort: with this dictionary parameter one can control the sorting
                order of the documents matching the filter, effectively
                determining what document will come first and hence be the
                replaced one. See the `find` method for more on sorting.
                Vector-based ANN sorting is achieved by providing a "$vector"
                or a "$vectorize" key in `sort`.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a CollectionDeleteResult object summarizing the outcome of the
            delete operation.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> asyncio.run(my_async_coll.insert_many(
            ...     [{"seq": 1}, {"seq": 0}, {"seq": 2}]
            ... ))
            CollectionInsertManyResult(...)
            >>> asyncio.run(my_async_coll.delete_one({"seq": 1}))
            CollectionDeleteResult(raw_results=..., deleted_count=1)
            >>> asyncio.run(my_async_coll.distinct("seq"))
            [0, 2]
            >>> asyncio.run(my_async_coll.delete_one(
            ...     {"seq": {"$exists": True}},
            ...     sort={"seq": astrapy.constants.SortMode.DESCENDING},
            ... ))
            CollectionDeleteResult(raw_results=..., deleted_count=1)
            >>> asyncio.run(my_async_coll.distinct("seq"))
            [0]
            >>> asyncio.run(my_async_coll.delete_one({"seq": 2}))
            CollectionDeleteResult(raw_results=..., deleted_count=0)
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        do_payload = {
            "deleteOne": {
                k: v
                for k, v in {
                    "filter": filter,
                    "sort": sort,
                }.items()
                if v is not None
            }
        }
        logger.info(f"deleteOne on '{self.name}'")
        do_response = await self._converted_request(
            payload=do_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished deleteOne on '{self.name}'")
        if "deletedCount" in do_response.get("status", {}):
            deleted_count = do_response["status"]["deletedCount"]
            return CollectionDeleteResult(
                deleted_count=deleted_count,
                raw_results=[do_response],
            )
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from delete_one API command.",
                raw_response=do_response,
            )

    async def delete_many(
        self,
        filter: FilterType,
        *,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> CollectionDeleteResult:
        """
        Delete all documents matching a provided filter.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
                Passing an empty filter, `{}`, completely erases all contents
                of the collection.
            general_method_timeout_ms: a timeout, in milliseconds, for the whole
                requested operation (which may involve multiple API requests).
                This method may entail successive HTTP API requests,
                depending on the amount of involved documents.
                If not passed, the collection-level setting is used instead.
            request_timeout_ms: a timeout, in milliseconds, for each API request.
                If not passed, the collection-level setting is used instead.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a CollectionDeleteResult object summarizing the outcome of the
            delete operation.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> async def do_delete_many(acol: AsyncCollection) -> None:
            ...     await acol.insert_many([{"seq": 1}, {"seq": 0}, {"seq": 2}])
            ...     delete_result0 = await acol.delete_many({"seq": {"$lte": 1}})
            ...     print("delete_result0.deleted_count", delete_result0.deleted_count)
            ...     distinct1 = await acol.distinct("seq")
            ...     print("distinct1", distinct1)
            ...     delete_result2 = await acol.delete_many({"seq": {"$lte": 1}})
            ...     print("delete_result2.deleted_count", delete_result2.deleted_count)
            ...
            >>> asyncio.run(do_delete_many(my_async_coll))
            delete_result0.deleted_count 2
            distinct1 [2]
            delete_result2.deleted_count 0

        Note:
            This operation is in general not atomic. Depending on the amount
            of matching documents, it can keep running (in a blocking way)
            for a macroscopic time. In that case, new documents that are
            meanwhile inserted (e.g. from another process/application) will be
            deleted during the execution of this method call until the
            collection is devoid of matches.
            An exception is the `filter={}` case, whereby the operation is atomic.
        """

        _general_method_timeout_ms, _gmt_label = _first_valid_timeout(
            (general_method_timeout_ms, "general_method_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (
                self.api_options.timeout_options.general_method_timeout_ms,
                "general_method_timeout_ms",
            ),
        )
        _request_timeout_ms, _rt_label = _first_valid_timeout(
            (request_timeout_ms, "request_timeout_ms"),
            (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
        )
        dm_responses: list[dict[str, Any]] = []
        deleted_count = 0
        must_proceed = True
        timeout_manager = MultiCallTimeoutManager(
            overall_timeout_ms=_general_method_timeout_ms,
            timeout_label=_gmt_label,
        )
        this_dm_payload = {"deleteMany": {"filter": filter}}
        logger.info(f"starting delete_many on '{self.name}'")
        while must_proceed:
            logger.info(f"deleteMany on '{self.name}'")
            this_dm_response = await self._converted_request(
                payload=this_dm_payload,
                raise_api_errors=False,
                timeout_context=timeout_manager.remaining_timeout(
                    cap_time_ms=_request_timeout_ms,
                    cap_timeout_label=_rt_label,
                ),
            )
            logger.info(f"finished deleteMany on '{self.name}'")
            # if errors, quit early
            if this_dm_response.get("errors", []):
                partial_result = CollectionDeleteResult(
                    deleted_count=deleted_count,
                    raw_results=dm_responses,
                )
                cause_exception = DataAPIResponseException.from_response(
                    command=this_dm_payload,
                    raw_response=this_dm_response,
                )
                raise CollectionDeleteManyException(
                    partial_result=partial_result,
                    cause=cause_exception,
                )
            else:
                this_dc = this_dm_response.get("status", {}).get("deletedCount")
                if this_dc is None:
                    raise UnexpectedDataAPIResponseException(
                        text="Faulty response from delete_many API command.",
                        raw_response=this_dm_response,
                    )
                dm_responses.append(this_dm_response)
                deleted_count += this_dc
                must_proceed = this_dm_response.get("status", {}).get("moreData", False)

        logger.info(f"finished delete_many on '{self.name}'")
        return CollectionDeleteResult(
            deleted_count=deleted_count,
            raw_results=dm_responses,
        )

    async def drop(
        self,
        *,
        collection_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Drop the collection, i.e. delete it from the database along with
        all the documents it contains.

        Args:
            collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `collection_admin_timeout_ms`.
            timeout_ms: an alias for `collection_admin_timeout_ms`.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> async def drop_and_check(acol: AsyncCollection) -> None:
            ...     doc0 = await acol.find_one({})
            ...     print("doc0", doc0)
            ...     await acol.drop()
            ...     doc1 = await acol.find_one({})
            ...
            >>> asyncio.run(drop_and_check(my_async_coll))
            doc0 {'_id': '...', 'z': -10}
            Traceback (most recent call last):
                ... ...
            astrapy.exceptions.DataAPIResponseException: Collection does not exist, ...

        Note:
            Use with caution.

        Note:
            Once the method succeeds, methods on this object can still be invoked:
            however, this hardly makes sense as the underlying actual collection
            is no more.
            It is responsibility of the developer to design a correct flow
            which avoids using a deceased collection any further.
        """

        logger.info(f"dropping collection '{self.name}' (self)")
        await self.database.drop_collection(
            self.name,
            collection_admin_timeout_ms=collection_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info(f"finished dropping collection '{self.name}' (self)")

    async def command(
        self,
        body: dict[str, Any] | None,
        *,
        raise_api_errors: bool = True,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> dict[str, Any]:
        """
        Send a POST request to the Data API for this collection with
        an arbitrary, caller-provided payload.
        No transformations or type conversions are made on the provided payload.

        Args:
            body: a JSON-serializable dictionary, the payload of the request.
            raise_api_errors: if True, responses with a nonempty 'errors' field
                result in an astrapy exception being raised.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a dictionary with the response of the HTTP request.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> asyncio.await(my_async_coll.command({"countDocuments": {}}))
            {'status': {'count': 123}}
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        _cmd_desc: str
        if body:
            _cmd_desc = ",".join(sorted(body.keys()))
        else:
            _cmd_desc = "(none)"
        logger.info(f"command={_cmd_desc} on '{self.name}'")
        command_result = await self._api_commander.async_request(
            payload=body,
            raise_api_errors=raise_api_errors,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished command={_cmd_desc} on '{self.name}'")
        return command_result

Ancestors

typing.Generic

Instance variables

var database : AsyncDatabase

a Database object, the database this collection belongs to.

Example

>>> my_async_coll.database.name
'the_db'

Expand source code

@property
def database(self) -> AsyncDatabase:
    """
    a Database object, the database this collection belongs to.

    Example:
        >>> my_async_coll.database.name
        'the_db'
    """

    return self._database

var full_name : str

The fully-qualified collection name within the database, in the form "keyspace.collection_name".

Example

>>> my_async_coll.full_name
'default_keyspace.my_v_collection'

Expand source code

@property
def full_name(self) -> str:
    """
    The fully-qualified collection name within the database,
    in the form "keyspace.collection_name".

    Example:
        >>> my_async_coll.full_name
        'default_keyspace.my_v_collection'
    """

    return f"{self.keyspace}.{self.name}"

var keyspace : str

The keyspace this collection is in.

Example

>>> my_async_coll.keyspace
'default_keyspace'

Expand source code

@property
def keyspace(self) -> str:
    """
    The keyspace this collection is in.

    Example:
        >>> my_async_coll.keyspace
        'default_keyspace'
    """

    _keyspace = self.database.keyspace
    if _keyspace is None:
        raise RuntimeError("The collection's DB is set with keyspace=None")
    return _keyspace

var name : str

The name of this collection.

Example

>>> my_async_coll.name
'my_v_collection'

Expand source code

@property
def name(self) -> str:
    """
    The name of this collection.

    Example:
        >>> my_async_coll.name
        'my_v_collection'
    """

    return self._name

Methods

async def command(self, body: dict[str, Any] | None, *, raise_api_errors: bool = True, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> dict[str, typing.Any]

Send a POST request to the Data API for this collection with an arbitrary, caller-provided payload. No transformations or type conversions are made on the provided payload.

Args

body: a JSON-serializable dictionary, the payload of the request.
raise_api_errors: if True, responses with a nonempty 'errors' field result in an astrapy exception being raised.
general_method_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for general_method_timeout_ms.
timeout_ms: an alias for general_method_timeout_ms.

Returns

a dictionary with the response of the HTTP request.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> asyncio.await(my_async_coll.command({"countDocuments": {}}))
{'status': {'count': 123}}

Expand source code

async def command(
    self,
    body: dict[str, Any] | None,
    *,
    raise_api_errors: bool = True,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> dict[str, Any]:
    """
    Send a POST request to the Data API for this collection with
    an arbitrary, caller-provided payload.
    No transformations or type conversions are made on the provided payload.

    Args:
        body: a JSON-serializable dictionary, the payload of the request.
        raise_api_errors: if True, responses with a nonempty 'errors' field
            result in an astrapy exception being raised.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a dictionary with the response of the HTTP request.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> asyncio.await(my_async_coll.command({"countDocuments": {}}))
        {'status': {'count': 123}}
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    _cmd_desc: str
    if body:
        _cmd_desc = ",".join(sorted(body.keys()))
    else:
        _cmd_desc = "(none)"
    logger.info(f"command={_cmd_desc} on '{self.name}'")
    command_result = await self._api_commander.async_request(
        payload=body,
        raise_api_errors=raise_api_errors,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished command={_cmd_desc} on '{self.name}'")
    return command_result

async def count_documents(self, filter: FilterType, *, upper_bound: int, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> int

Count the documents in the collection matching the specified filter.

Args

filter: a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
upper_bound: a required ceiling on the result of the count operation. If the actual number of documents exceeds this value, an exception will be raised. Furthermore, if the actual number of documents exceeds the maximum count that the Data API can reach (regardless of upper_bound), an exception will be raised.
general_method_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for general_method_timeout_ms.
timeout_ms: an alias for general_method_timeout_ms.

Returns

the exact count of matching documents.

Example

>>> async def do_count_docs(acol: AsyncCollection) -> None:
...     await acol.insert_many([{"seq": i} for i in range(20)])
...     count0 = await acol.count_documents({}, upper_bound=100)
...     print("count0", count0)
...     count1 = await acol.count_documents(
...         {"seq":{"$gt": 15}}, upper_bound=100
...     )
...     print("count1", count1)
...     count2 = await acol.count_documents({}, upper_bound=10)
...     print("count2", count2)
...
>>> asyncio.run(do_count_docs(my_async_coll))
count0 20
count1 4
Traceback (most recent call last):
    ... ...
astrapy.exceptions.TooManyDocumentsToCountException

Note

Count operations are expensive: for this reason, the best practice is to provide a reasonable upper_bound according to the caller expectations. Moreover, indiscriminate usage of count operations for sizeable amounts of documents (i.e. in the thousands and more) is discouraged in favor of alternative application-specific solutions. Keep in mind that the Data API has a hard upper limit on the amount of documents it will count, and that an exception will be thrown by this method if this limit is encountered.

Expand source code

async def count_documents(
    self,
    filter: FilterType,
    *,
    upper_bound: int,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> int:
    """
    Count the documents in the collection matching the specified filter.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        upper_bound: a required ceiling on the result of the count operation.
            If the actual number of documents exceeds this value,
            an exception will be raised.
            Furthermore, if the actual number of documents exceeds the maximum
            count that the Data API can reach (regardless of upper_bound),
            an exception will be raised.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        the exact count of matching documents.

    Example:
        >>> async def do_count_docs(acol: AsyncCollection) -> None:
        ...     await acol.insert_many([{"seq": i} for i in range(20)])
        ...     count0 = await acol.count_documents({}, upper_bound=100)
        ...     print("count0", count0)
        ...     count1 = await acol.count_documents(
        ...         {"seq":{"$gt": 15}}, upper_bound=100
        ...     )
        ...     print("count1", count1)
        ...     count2 = await acol.count_documents({}, upper_bound=10)
        ...     print("count2", count2)
        ...
        >>> asyncio.run(do_count_docs(my_async_coll))
        count0 20
        count1 4
        Traceback (most recent call last):
            ... ...
        astrapy.exceptions.TooManyDocumentsToCountException

    Note:
        Count operations are expensive: for this reason, the best practice
        is to provide a reasonable `upper_bound` according to the caller
        expectations. Moreover, indiscriminate usage of count operations
        for sizeable amounts of documents (i.e. in the thousands and more)
        is discouraged in favor of alternative application-specific solutions.
        Keep in mind that the Data API has a hard upper limit on the amount
        of documents it will count, and that an exception will be thrown
        by this method if this limit is encountered.
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    cd_payload = {"countDocuments": {"filter": filter}}
    logger.info(f"countDocuments on '{self.name}'")
    cd_response = await self._converted_request(
        payload=cd_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished countDocuments on '{self.name}'")
    if "count" in cd_response.get("status", {}):
        count: int = cd_response["status"]["count"]
        if cd_response["status"].get("moreData", False):
            raise TooManyDocumentsToCountException(
                text=f"Document count exceeds {count}, the maximum allowed by the server",
                server_max_count_exceeded=True,
            )
        else:
            if count > upper_bound:
                raise TooManyDocumentsToCountException(
                    text="Document count exceeds required upper bound",
                    server_max_count_exceeded=False,
                )
            else:
                return count
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from countDocuments API command.",
            raw_response=cd_response,
        )

async def delete_many(self, filter: FilterType, *, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> CollectionDeleteResult

Delete all documents matching a provided filter.

Args

filter: a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators. Passing an empty filter, {}, completely erases all contents of the collection.
general_method_timeout_ms: a timeout, in milliseconds, for the whole requested operation (which may involve multiple API requests). This method may entail successive HTTP API requests, depending on the amount of involved documents. If not passed, the collection-level setting is used instead.
request_timeout_ms: a timeout, in milliseconds, for each API request. If not passed, the collection-level setting is used instead.
timeout_ms: an alias for general_method_timeout_ms.

Returns

a CollectionDeleteResult object summarizing the outcome of the delete operation.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> async def do_delete_many(acol: AsyncCollection) -> None:
...     await acol.insert_many([{"seq": 1}, {"seq": 0}, {"seq": 2}])
...     delete_result0 = await acol.delete_many({"seq": {"$lte": 1}})
...     print("delete_result0.deleted_count", delete_result0.deleted_count)
...     distinct1 = await acol.distinct("seq")
...     print("distinct1", distinct1)
...     delete_result2 = await acol.delete_many({"seq": {"$lte": 1}})
...     print("delete_result2.deleted_count", delete_result2.deleted_count)
...
>>> asyncio.run(do_delete_many(my_async_coll))
delete_result0.deleted_count 2
distinct1 [2]
delete_result2.deleted_count 0

Note

This operation is in general not atomic. Depending on the amount of matching documents, it can keep running (in a blocking way) for a macroscopic time. In that case, new documents that are meanwhile inserted (e.g. from another process/application) will be deleted during the execution of this method call until the collection is devoid of matches. An exception is the filter={} case, whereby the operation is atomic.

Expand source code

async def delete_many(
    self,
    filter: FilterType,
    *,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> CollectionDeleteResult:
    """
    Delete all documents matching a provided filter.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
            Passing an empty filter, `{}`, completely erases all contents
            of the collection.
        general_method_timeout_ms: a timeout, in milliseconds, for the whole
            requested operation (which may involve multiple API requests).
            This method may entail successive HTTP API requests,
            depending on the amount of involved documents.
            If not passed, the collection-level setting is used instead.
        request_timeout_ms: a timeout, in milliseconds, for each API request.
            If not passed, the collection-level setting is used instead.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a CollectionDeleteResult object summarizing the outcome of the
        delete operation.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> async def do_delete_many(acol: AsyncCollection) -> None:
        ...     await acol.insert_many([{"seq": 1}, {"seq": 0}, {"seq": 2}])
        ...     delete_result0 = await acol.delete_many({"seq": {"$lte": 1}})
        ...     print("delete_result0.deleted_count", delete_result0.deleted_count)
        ...     distinct1 = await acol.distinct("seq")
        ...     print("distinct1", distinct1)
        ...     delete_result2 = await acol.delete_many({"seq": {"$lte": 1}})
        ...     print("delete_result2.deleted_count", delete_result2.deleted_count)
        ...
        >>> asyncio.run(do_delete_many(my_async_coll))
        delete_result0.deleted_count 2
        distinct1 [2]
        delete_result2.deleted_count 0

    Note:
        This operation is in general not atomic. Depending on the amount
        of matching documents, it can keep running (in a blocking way)
        for a macroscopic time. In that case, new documents that are
        meanwhile inserted (e.g. from another process/application) will be
        deleted during the execution of this method call until the
        collection is devoid of matches.
        An exception is the `filter={}` case, whereby the operation is atomic.
    """

    _general_method_timeout_ms, _gmt_label = _first_valid_timeout(
        (general_method_timeout_ms, "general_method_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (
            self.api_options.timeout_options.general_method_timeout_ms,
            "general_method_timeout_ms",
        ),
    )
    _request_timeout_ms, _rt_label = _first_valid_timeout(
        (request_timeout_ms, "request_timeout_ms"),
        (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
    )
    dm_responses: list[dict[str, Any]] = []
    deleted_count = 0
    must_proceed = True
    timeout_manager = MultiCallTimeoutManager(
        overall_timeout_ms=_general_method_timeout_ms,
        timeout_label=_gmt_label,
    )
    this_dm_payload = {"deleteMany": {"filter": filter}}
    logger.info(f"starting delete_many on '{self.name}'")
    while must_proceed:
        logger.info(f"deleteMany on '{self.name}'")
        this_dm_response = await self._converted_request(
            payload=this_dm_payload,
            raise_api_errors=False,
            timeout_context=timeout_manager.remaining_timeout(
                cap_time_ms=_request_timeout_ms,
                cap_timeout_label=_rt_label,
            ),
        )
        logger.info(f"finished deleteMany on '{self.name}'")
        # if errors, quit early
        if this_dm_response.get("errors", []):
            partial_result = CollectionDeleteResult(
                deleted_count=deleted_count,
                raw_results=dm_responses,
            )
            cause_exception = DataAPIResponseException.from_response(
                command=this_dm_payload,
                raw_response=this_dm_response,
            )
            raise CollectionDeleteManyException(
                partial_result=partial_result,
                cause=cause_exception,
            )
        else:
            this_dc = this_dm_response.get("status", {}).get("deletedCount")
            if this_dc is None:
                raise UnexpectedDataAPIResponseException(
                    text="Faulty response from delete_many API command.",
                    raw_response=this_dm_response,
                )
            dm_responses.append(this_dm_response)
            deleted_count += this_dc
            must_proceed = this_dm_response.get("status", {}).get("moreData", False)

    logger.info(f"finished delete_many on '{self.name}'")
    return CollectionDeleteResult(
        deleted_count=deleted_count,
        raw_results=dm_responses,
    )

async def delete_one(self, filter: FilterType, *, sort: SortType | None = None, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> CollectionDeleteResult

Delete one document matching a provided filter. This method never deletes more than a single document, regardless of the number of matches to the provided filters.

Args

filter: a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
sort: with this dictionary parameter one can control the sorting order of the documents matching the filter, effectively determining what document will come first and hence be the replaced one. See the find method for more on sorting. Vector-based ANN sorting is achieved by providing a "$vector" or a "$vectorize" key in sort.
general_method_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for general_method_timeout_ms.
timeout_ms: an alias for general_method_timeout_ms.

Returns

a CollectionDeleteResult object summarizing the outcome of the delete operation.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> asyncio.run(my_async_coll.insert_many(
...     [{"seq": 1}, {"seq": 0}, {"seq": 2}]
... ))
CollectionInsertManyResult(...)
>>> asyncio.run(my_async_coll.delete_one({"seq": 1}))
CollectionDeleteResult(raw_results=..., deleted_count=1)
>>> asyncio.run(my_async_coll.distinct("seq"))
[0, 2]
>>> asyncio.run(my_async_coll.delete_one(
...     {"seq": {"$exists": True}},
...     sort={"seq": astrapy.constants.SortMode.DESCENDING},
... ))
CollectionDeleteResult(raw_results=..., deleted_count=1)
>>> asyncio.run(my_async_coll.distinct("seq"))
[0]
>>> asyncio.run(my_async_coll.delete_one({"seq": 2}))
CollectionDeleteResult(raw_results=..., deleted_count=0)

Expand source code

async def delete_one(
    self,
    filter: FilterType,
    *,
    sort: SortType | None = None,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> CollectionDeleteResult:
    """
    Delete one document matching a provided filter.
    This method never deletes more than a single document, regardless
    of the number of matches to the provided filters.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        sort: with this dictionary parameter one can control the sorting
            order of the documents matching the filter, effectively
            determining what document will come first and hence be the
            replaced one. See the `find` method for more on sorting.
            Vector-based ANN sorting is achieved by providing a "$vector"
            or a "$vectorize" key in `sort`.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a CollectionDeleteResult object summarizing the outcome of the
        delete operation.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> asyncio.run(my_async_coll.insert_many(
        ...     [{"seq": 1}, {"seq": 0}, {"seq": 2}]
        ... ))
        CollectionInsertManyResult(...)
        >>> asyncio.run(my_async_coll.delete_one({"seq": 1}))
        CollectionDeleteResult(raw_results=..., deleted_count=1)
        >>> asyncio.run(my_async_coll.distinct("seq"))
        [0, 2]
        >>> asyncio.run(my_async_coll.delete_one(
        ...     {"seq": {"$exists": True}},
        ...     sort={"seq": astrapy.constants.SortMode.DESCENDING},
        ... ))
        CollectionDeleteResult(raw_results=..., deleted_count=1)
        >>> asyncio.run(my_async_coll.distinct("seq"))
        [0]
        >>> asyncio.run(my_async_coll.delete_one({"seq": 2}))
        CollectionDeleteResult(raw_results=..., deleted_count=0)
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    do_payload = {
        "deleteOne": {
            k: v
            for k, v in {
                "filter": filter,
                "sort": sort,
            }.items()
            if v is not None
        }
    }
    logger.info(f"deleteOne on '{self.name}'")
    do_response = await self._converted_request(
        payload=do_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished deleteOne on '{self.name}'")
    if "deletedCount" in do_response.get("status", {}):
        deleted_count = do_response["status"]["deletedCount"]
        return CollectionDeleteResult(
            deleted_count=deleted_count,
            raw_results=[do_response],
        )
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from delete_one API command.",
            raw_response=do_response,
        )

Return a list of the unique values of key across the documents in the collection that match the provided filter.

Args

key: the name of the field whose value is inspected across documents. Keys can be just field names (as is often the case), but the dot-notation is also accepted to mean subkeys or indices within lists (for example, "map_field.subkey" or "list_field.2"). If a field has literal dots or ampersands in its name, this parameter must be escaped to be treated properly. The key can also be a list of strings and numbers, in which case no escape is necessary: each item in the list is a field name/index, for example ["map_field", "subkey"] or ["list_field", 2]. If lists are encountered and no numeric index is specified, all items in the list are visited.
filter: a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
general_method_timeout_ms: a timeout, in milliseconds, for the whole requested operation (which may involve multiple API requests). This method, being based on find (see) may entail successive HTTP API requests, depending on the amount of involved documents.
request_timeout_ms: a timeout, in milliseconds, for each API request.
timeout_ms: an alias for general_method_timeout_ms.

Returns

a list of all different values for key found across the documents that match the filter. The result list has no repeated items.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> async def run_distinct(acol: AsyncCollection) -> None:
...     await acol.insert_many(
...         [
...             {"name": "Marco", "food": ["apple", "orange"], "city": "Helsinki"},
...             {"name": "Emma", "food": {"likes_fruit": True, "allergies": []}},
...         ]
...     )
...     distinct0 = await acol.distinct("name")
...     print("distinct('name')", distinct0)
...     distinct1 = await acol.distinct("city")
...     print("distinct('city')", distinct1)
...     distinct2 = await acol.distinct("food")
...     print("distinct('food')", distinct2)
...     distinct3 = await acol.distinct("food.1")
...     print("distinct('food.1')", distinct3)
...     distinct4 = await acol.distinct("food.allergies")
...     print("distinct('food.allergies')", distinct4)
...     distinct5 = await acol.distinct("food.likes_fruit")
...     print("distinct('food.likes_fruit')", distinct5)
...
>>> asyncio.run(run_distinct(my_async_coll))
distinct('name') ['Emma', 'Marco']
distinct('city') ['Helsinki']
distinct('food') [{'likes_fruit': True, 'allergies': []}, 'apple', 'orange']
distinct('food.1') ['orange']
distinct('food.allergies') []
distinct('food.likes_fruit') [True]

Note

It must be kept in mind that distinct is a client-side operation, which effectively browses all required documents using the logic of the find method and collects the unique values found for key. As such, there may be performance, latency and ultimately billing implications if the amount of matching documents is large.

Note

For details on the behaviour of "distinct" in conjunction with real-time changes in the collection contents, see the Note of the find command.

Expand source code

async def distinct(
    self,
    key: str | Iterable[str | int],
    *,
    filter: FilterType | None = None,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> list[Any]:
    """
    Return a list of the unique values of `key` across the documents
    in the collection that match the provided filter.

    Args:
        key: the name of the field whose value is inspected across documents.
            Keys can be just field names (as is often the case), but
            the dot-notation is also accepted to mean subkeys or indices
            within lists (for example, "map_field.subkey" or "list_field.2").
            If a field has literal dots or ampersands in its name, this
            parameter must be escaped to be treated properly.
            The key can also be a list of strings and numbers, in which case
            no escape is necessary: each item in the list is a field name/index,
            for example ["map_field", "subkey"] or ["list_field", 2].
            If lists are encountered and no numeric index is specified,
            all items in the list are visited.
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        general_method_timeout_ms: a timeout, in milliseconds, for the whole
            requested operation (which may involve multiple API requests).
            This method, being based on `find` (see) may entail successive HTTP API
            requests, depending on the amount of involved documents.
        request_timeout_ms: a timeout, in milliseconds, for each API request.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a list of all different values for `key` found across the documents
        that match the filter. The result list has no repeated items.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> async def run_distinct(acol: AsyncCollection) -> None:
        ...     await acol.insert_many(
        ...         [
        ...             {"name": "Marco", "food": ["apple", "orange"], "city": "Helsinki"},
        ...             {"name": "Emma", "food": {"likes_fruit": True, "allergies": []}},
        ...         ]
        ...     )
        ...     distinct0 = await acol.distinct("name")
        ...     print("distinct('name')", distinct0)
        ...     distinct1 = await acol.distinct("city")
        ...     print("distinct('city')", distinct1)
        ...     distinct2 = await acol.distinct("food")
        ...     print("distinct('food')", distinct2)
        ...     distinct3 = await acol.distinct("food.1")
        ...     print("distinct('food.1')", distinct3)
        ...     distinct4 = await acol.distinct("food.allergies")
        ...     print("distinct('food.allergies')", distinct4)
        ...     distinct5 = await acol.distinct("food.likes_fruit")
        ...     print("distinct('food.likes_fruit')", distinct5)
        ...
        >>> asyncio.run(run_distinct(my_async_coll))
        distinct('name') ['Emma', 'Marco']
        distinct('city') ['Helsinki']
        distinct('food') [{'likes_fruit': True, 'allergies': []}, 'apple', 'orange']
        distinct('food.1') ['orange']
        distinct('food.allergies') []
        distinct('food.likes_fruit') [True]

    Note:
        It must be kept in mind that `distinct` is a client-side operation,
        which effectively browses all required documents using the logic
        of the `find` method and collects the unique values found for `key`.
        As such, there may be performance, latency and ultimately
        billing implications if the amount of matching documents is large.

    Note:
        For details on the behaviour of "distinct" in conjunction with
        real-time changes in the collection contents, see the
        Note of the `find` command.
    """

    # lazy-import here to avoid circular import issues
    from astrapy.cursors import AsyncCollectionFindCursor

    _general_method_timeout_ms, _gmt_label = _first_valid_timeout(
        (general_method_timeout_ms, "general_method_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (
            self.api_options.timeout_options.general_method_timeout_ms,
            "general_method_timeout_ms",
        ),
    )
    _request_timeout_ms, _rt_label = _first_valid_timeout(
        (request_timeout_ms, "request_timeout_ms"),
        (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
    )
    # preparing cursor:
    _extractor = _create_document_key_extractor(key)
    _key = _reduce_distinct_key_to_safe(key)
    # relaxing the type hint (limited to within this method body)
    f_cursor: AsyncCollectionFindCursor[dict[str, Any], dict[str, Any]] = (
        AsyncCollectionFindCursor(
            collection=self,
            request_timeout_ms=_request_timeout_ms,
            overall_timeout_ms=_general_method_timeout_ms,
            request_timeout_label=_rt_label,
            overall_timeout_label=_gmt_label,
        )  # type: ignore[assignment]
        .filter(filter)
        .project({_key: True})
    )
    # consuming it:
    _item_hashes = set()
    distinct_items: list[Any] = []
    logger.info(f"running distinct() on '{self.name}'")
    async for document in f_cursor:
        for item in _extractor(document):
            _item_hash = _hash_collection_document(
                item, options=self.api_options.serdes_options
            )
            if _item_hash not in _item_hashes:
                _item_hashes.add(_item_hash)
                distinct_items.append(item)
    logger.info(f"finished running distinct() on '{self.name}'")
    return distinct_items

async def drop(self, *, collection_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> None

Drop the collection, i.e. delete it from the database along with all the documents it contains.

Args

collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for collection_admin_timeout_ms.
timeout_ms: an alias for collection_admin_timeout_ms.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> async def drop_and_check(acol: AsyncCollection) -> None:
...     doc0 = await acol.find_one({})
...     print("doc0", doc0)
...     await acol.drop()
...     doc1 = await acol.find_one({})
...
>>> asyncio.run(drop_and_check(my_async_coll))
doc0 {'_id': '...', 'z': -10}
Traceback (most recent call last):
    ... ...
astrapy.exceptions.DataAPIResponseException: Collection does not exist, ...

Note

Use with caution.

Note

Once the method succeeds, methods on this object can still be invoked: however, this hardly makes sense as the underlying actual collection is no more. It is responsibility of the developer to design a correct flow which avoids using a deceased collection any further.

Expand source code

async def drop(
    self,
    *,
    collection_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Drop the collection, i.e. delete it from the database along with
    all the documents it contains.

    Args:
        collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `collection_admin_timeout_ms`.
        timeout_ms: an alias for `collection_admin_timeout_ms`.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> async def drop_and_check(acol: AsyncCollection) -> None:
        ...     doc0 = await acol.find_one({})
        ...     print("doc0", doc0)
        ...     await acol.drop()
        ...     doc1 = await acol.find_one({})
        ...
        >>> asyncio.run(drop_and_check(my_async_coll))
        doc0 {'_id': '...', 'z': -10}
        Traceback (most recent call last):
            ... ...
        astrapy.exceptions.DataAPIResponseException: Collection does not exist, ...

    Note:
        Use with caution.

    Note:
        Once the method succeeds, methods on this object can still be invoked:
        however, this hardly makes sense as the underlying actual collection
        is no more.
        It is responsibility of the developer to design a correct flow
        which avoids using a deceased collection any further.
    """

    logger.info(f"dropping collection '{self.name}' (self)")
    await self.database.drop_collection(
        self.name,
        collection_admin_timeout_ms=collection_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info(f"finished dropping collection '{self.name}' (self)")

async def estimated_document_count(self, *, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> int

Query the API server for an estimate of the document count in the collection.

Contrary to count_documents, this method has no filtering parameters.

Args

general_method_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for general_method_timeout_ms.
timeout_ms: an alias for general_method_timeout_ms.

Returns

a server-provided estimate count of the documents in the collection.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> asyncio.run(my_async_coll.estimated_document_count())
35700

Expand source code

async def estimated_document_count(
    self,
    *,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> int:
    """
    Query the API server for an estimate of the document count in the collection.

    Contrary to `count_documents`, this method has no filtering parameters.

    Args:
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a server-provided estimate count of the documents in the collection.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> asyncio.run(my_async_coll.estimated_document_count())
        35700
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    ed_payload: dict[str, Any] = {"estimatedDocumentCount": {}}
    logger.info(f"estimatedDocumentCount on '{self.name}'")
    ed_response = await self._converted_request(
        payload=ed_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished estimatedDocumentCount on '{self.name}'")
    if "count" in ed_response.get("status", {}):
        count: int = ed_response["status"]["count"]
        return count
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from estimatedDocumentCount API command.",
            raw_response=ed_response,
        )

Find documents on the collection, matching a certain provided filter.

The method returns a cursor that can then be iterated over. Depending on the method call pattern, the iteration over all documents can reflect collection mutations occurred since the find method was called, or not. In cases where the cursor reflects mutations in real-time, it will iterate over cursors in an approximate way (i.e. exhibiting occasional skipped or duplicate documents). This happens when making use of the sort option in a non-vector-search manner.

Args

filter: a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
projection: it controls which parts of the document are returned. It can be an allow-list: {"f1": True, "f2": True}, or a deny-list: {"fx": False, "fy": False}, but not a mixture (except for the _id and other special fields, which can be associated to both True or False independently of the rest of the specification). The special star-projections {"*": True} and {"*": False} have the effect of returning the whole document and {} respectively. For lists in documents, slice directives can be passed to select portions of the list: for instance, {"array": {"$slice": 2}}, {"array": {"$slice": -2}}, {"array": {"$slice": [4, 2]}} or {"array": {"$slice": [-4, 2]}}. An iterable over strings will be treated implicitly as an allow-list. The default projection (used if this parameter is not passed) does not necessarily include "special" fields such as $vector or $vectorize. See the Data API documentation for more on projections.
document_type: this parameter acts a formal specifier for the type checker. If omitted, the resulting cursor is implicitly an AsyncCollectionFindCursor[DOC, DOC], i.e. maintains the same type for the items it returns as that for the documents in the collection. Strictly typed code may want to specify this parameter especially when a projection is given.
skip: with this integer parameter, what would be the first skip documents returned by the query are discarded, and the results start from the (skip+1)-th document. This parameter can be used only in conjunction with an explicit sort criterion of the ascending/descending type (i.e. it cannot be used when not sorting, nor with vector-based ANN search).
limit: this (integer) parameter sets a limit over how many documents are returned. Once limit is reached (or the cursor is exhausted for lack of matching documents), nothing more is returned.
include_similarity: a boolean to request the numeric value of the similarity to be returned as an added "$similarity" key in each returned document. It can be used meaningfully only in a vector search (see sort).
include_sort_vector: a boolean to request the search query vector. If set to True (and if the invocation is a vector search), calling the get_sort_vector method on the returned cursor will yield the vector used for the ANN search.
sort: with this dictionary parameter one can control the order the documents are returned. See the Note about sorting, as well as the one about upper bounds, for details. Vector-based ANN sorting is achieved by providing a "$vector" or a "$vectorize" key in sort.
request_timeout_ms: a timeout, in milliseconds, for each single one of the underlying HTTP requests used to fetch documents as the cursor is iterated over. If not passed, the collection-level setting is used instead.
timeout_ms: an alias for request_timeout_ms.

Returns

a AsyncCollectionFindCursor object, that can be iterated over (and manipulated in several ways). The cursor, if needed, handles pagination under the hood as the documents are consumed.

Examples

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> async def run_finds(acol: AsyncCollection) -> None:
...             filter = {"seq": {"$exists": True}}
...             print("find results 1:")
...             async for doc in acol.find(filter, projection={"seq": True}, limit=5):
...                 print(doc["seq"])
...             async_cursor1 = acol.find(
...                 {},
...                 limit=4,
...                 sort={"seq": astrapy.constants.SortMode.DESCENDING},
...             )
...             ids = [doc["_id"] async for doc in async_cursor1]
...             print("find results 2:", ids)
...
>>> asyncio.run(run_finds(my_async_coll))
find results 1:
48
35
7
11
13
find results 2: ['d656cd9d-...', '479c7ce8-...', '96dc87fd-...', '83f0a21f-...']

>>> async def run_vector_finds(acol: AsyncCollection) -> None:
...     await acol.insert_many([
...         {"tag": "A", "$vector": [4, 5]},
...         {"tag": "B", "$vector": [3, 4]},
...         {"tag": "C", "$vector": [3, 2]},
...         {"tag": "D", "$vector": [4, 1]},
...         {"tag": "E", "$vector": [2, 5]},
...     ])
...     ann_tags = [
...         document["tag"]
...         async for document in acol.find(
...             {},
...             sort={"$vector": [3, 3]},
...             limit=3,
...         )
...     ]
...     return ann_tags
...
>>> asyncio.run(run_vector_finds(my_async_coll))
['A', 'B', 'C']
>>> # (assuming the collection has metric VectorMetric.COSINE)

>>> async_cursor = my_async_coll.find(
...     sort={"$vector": [3, 3]},
...     limit=3,
...     include_sort_vector=True,
... )
>>> asyncio.run(async_cursor.get_sort_vector())
[3.0, 3.0]
>>> asyncio.run(async_cursor.__anext__())
{'_id': 'b13ce177-738e-47ec-bce1-77738ee7ec93', 'tag': 'A'}
>>> asyncio.run(async_cursor.get_sort_vector())
[3.0, 3.0]

Note

The following are example values for the sort parameter. When no particular order is required: sort={} When sorting by a certain value in ascending/descending order: sort={"field": SortMode.ASCENDING} sort={"field": SortMode.DESCENDING} When sorting first by "field" and then by "subfield" (while modern Python versions preserve the order of dictionaries, it is suggested for clarity to employ a collections.OrderedDict in these cases): sort={ "field": SortMode.ASCENDING, "subfield": SortMode.ASCENDING, } When running a vector similarity (ANN) search: sort={"$vector": [0.4, 0.15, -0.5]}

Note

Some combinations of arguments impose an implicit upper bound on the number of documents that are returned by the Data API. More specifically: (a) Vector ANN searches cannot return more than a number of documents that at the time of writing is set to 1000 items. (b) When using a sort criterion of the ascending/descending type, the Data API will return a smaller number of documents, set to 20 at the time of writing, and stop there. The returned documents are the top results across the whole collection according to the requested criterion.

Note

When not specifying sorting criteria at all (by vector or otherwise), the cursor can scroll through an arbitrary number of documents as the Data API and the client periodically exchange new chunks of documents. It should be noted that the behavior of the cursor in the case documents have been added/removed after the find was started depends on database internals and it is not guaranteed, nor excluded, that such "real-time" changes in the data would be picked up by the cursor.

Expand source code

def find(
    self,
    filter: FilterType | None = None,
    *,
    projection: ProjectionType | None = None,
    document_type: type[DOC2] | None = None,
    skip: int | None = None,
    limit: int | None = None,
    include_similarity: bool | None = None,
    include_sort_vector: bool | None = None,
    sort: SortType | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> AsyncCollectionFindCursor[DOC, DOC2]:
    """
    Find documents on the collection, matching a certain provided filter.

    The method returns a cursor that can then be iterated over. Depending
    on the method call pattern, the iteration over all documents can reflect
    collection mutations occurred since the `find` method was called, or not.
    In cases where the cursor reflects mutations in real-time, it will iterate
    over cursors in an approximate way (i.e. exhibiting occasional skipped
    or duplicate documents). This happens when making use of the `sort`
    option in a non-vector-search manner.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        projection: it controls which parts of the document are returned.
            It can be an allow-list: `{"f1": True, "f2": True}`,
            or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
            (except for the `_id` and other special fields, which can be
            associated to both True or False independently of the rest
            of the specification).
            The special star-projections `{"*": True}` and `{"*": False}`
            have the effect of returning the whole document and `{}` respectively.
            For lists in documents, slice directives can be passed to select
            portions of the list: for instance, `{"array": {"$slice": 2}}`,
            `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
            `{"array": {"$slice": [-4, 2]}}`.
            An iterable over strings will be treated implicitly as an allow-list.
            The default projection (used if this parameter is not passed) does not
            necessarily include "special" fields such as `$vector` or `$vectorize`.
            See the Data API documentation for more on projections.
        document_type: this parameter acts a formal specifier for the type checker.
            If omitted, the resulting cursor is implicitly an
            `AsyncCollectionFindCursor[DOC, DOC]`, i.e. maintains the same type for
            the items it returns as that for the documents in the collection.
            Strictly typed code may want to specify this parameter especially when
            a projection is given.
        skip: with this integer parameter, what would be the first `skip`
            documents returned by the query are discarded, and the results
            start from the (skip+1)-th document.
            This parameter can be used only in conjunction with an explicit
            `sort` criterion of the ascending/descending type (i.e. it cannot
            be used when not sorting, nor with vector-based ANN search).
        limit: this (integer) parameter sets a limit over how many documents
            are returned. Once `limit` is reached (or the cursor is exhausted
            for lack of matching documents), nothing more is returned.
        include_similarity: a boolean to request the numeric value of the
            similarity to be returned as an added "$similarity" key in each
            returned document. It can be used meaningfully only in a vector
            search (see `sort`).
        include_sort_vector: a boolean to request the search query vector.
            If set to True (and if the invocation is a vector search), calling
            the `get_sort_vector` method on the returned cursor will yield
            the vector used for the ANN search.
        sort: with this dictionary parameter one can control the order
            the documents are returned. See the Note about sorting, as well as
            the one about upper bounds, for details.
            Vector-based ANN sorting is achieved by providing a "$vector"
            or a "$vectorize" key in `sort`.
        request_timeout_ms: a timeout, in milliseconds, for each single one
            of the underlying HTTP requests used to fetch documents as the
            cursor is iterated over.
            If not passed, the collection-level setting is used instead.
        timeout_ms: an alias for `request_timeout_ms`.

    Returns:
        a AsyncCollectionFindCursor object, that can be iterated over (and
        manipulated in several ways). The cursor, if needed, handles pagination
        under the hood as the documents are consumed.

    Examples:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> async def run_finds(acol: AsyncCollection) -> None:
        ...             filter = {"seq": {"$exists": True}}
        ...             print("find results 1:")
        ...             async for doc in acol.find(filter, projection={"seq": True}, limit=5):
        ...                 print(doc["seq"])
        ...             async_cursor1 = acol.find(
        ...                 {},
        ...                 limit=4,
        ...                 sort={"seq": astrapy.constants.SortMode.DESCENDING},
        ...             )
        ...             ids = [doc["_id"] async for doc in async_cursor1]
        ...             print("find results 2:", ids)
        ...
        >>> asyncio.run(run_finds(my_async_coll))
        find results 1:
        48
        35
        7
        11
        13
        find results 2: ['d656cd9d-...', '479c7ce8-...', '96dc87fd-...', '83f0a21f-...']

        >>> async def run_vector_finds(acol: AsyncCollection) -> None:
        ...     await acol.insert_many([
        ...         {"tag": "A", "$vector": [4, 5]},
        ...         {"tag": "B", "$vector": [3, 4]},
        ...         {"tag": "C", "$vector": [3, 2]},
        ...         {"tag": "D", "$vector": [4, 1]},
        ...         {"tag": "E", "$vector": [2, 5]},
        ...     ])
        ...     ann_tags = [
        ...         document["tag"]
        ...         async for document in acol.find(
        ...             {},
        ...             sort={"$vector": [3, 3]},
        ...             limit=3,
        ...         )
        ...     ]
        ...     return ann_tags
        ...
        >>> asyncio.run(run_vector_finds(my_async_coll))
        ['A', 'B', 'C']
        >>> # (assuming the collection has metric VectorMetric.COSINE)

        >>> async_cursor = my_async_coll.find(
        ...     sort={"$vector": [3, 3]},
        ...     limit=3,
        ...     include_sort_vector=True,
        ... )
        >>> asyncio.run(async_cursor.get_sort_vector())
        [3.0, 3.0]
        >>> asyncio.run(async_cursor.__anext__())
        {'_id': 'b13ce177-738e-47ec-bce1-77738ee7ec93', 'tag': 'A'}
        >>> asyncio.run(async_cursor.get_sort_vector())
        [3.0, 3.0]

    Note:
        The following are example values for the `sort` parameter.
        When no particular order is required:
            sort={}
        When sorting by a certain value in ascending/descending order:
            sort={"field": SortMode.ASCENDING}
            sort={"field": SortMode.DESCENDING}
        When sorting first by "field" and then by "subfield"
        (while modern Python versions preserve the order of dictionaries,
        it is suggested for clarity to employ a `collections.OrderedDict`
        in these cases):
            sort={
                "field": SortMode.ASCENDING,
                "subfield": SortMode.ASCENDING,
            }
        When running a vector similarity (ANN) search:
            sort={"$vector": [0.4, 0.15, -0.5]}

    Note:
        Some combinations of arguments impose an implicit upper bound on the
        number of documents that are returned by the Data API. More specifically:
        (a) Vector ANN searches cannot return more than a number of documents
        that at the time of writing is set to 1000 items.
        (b) When using a sort criterion of the ascending/descending type,
        the Data API will return a smaller number of documents, set to 20
        at the time of writing, and stop there. The returned documents are
        the top results across the whole collection according to the requested
        criterion.

    Note:
        When not specifying sorting criteria at all (by vector or otherwise),
        the cursor can scroll through an arbitrary number of documents as
        the Data API and the client periodically exchange new chunks of documents.
        It should be noted that the behavior of the cursor in the case documents
        have been added/removed after the `find` was started depends on database
        internals and it is not guaranteed, nor excluded, that such "real-time"
        changes in the data would be picked up by the cursor.
    """

    # lazy-import here to avoid circular import issues
    from astrapy.cursors import AsyncCollectionFindCursor

    _request_timeout_ms, _rt_label = _first_valid_timeout(
        (request_timeout_ms, "request_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
    )
    return (
        AsyncCollectionFindCursor(
            collection=self,
            request_timeout_ms=_request_timeout_ms,
            overall_timeout_ms=None,
            request_timeout_label=_rt_label,
        )
        .filter(filter)
        .project(projection)
        .skip(skip)
        .limit(limit)
        .sort(sort)
        .include_similarity(include_similarity)
        .include_sort_vector(include_sort_vector)
    )

Find relevant documents, combining vector and lexical matches through reranking.

For this method to succeed, the collection must be created with the required hybrid capabilities (see the create_collection method of the Database class).

The method returns a cursor that can then be iterated over, which yields the resulting documents, generally paired with accompanying information such as scores.

Args

filter: a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
sort: a clause specifying the criteria for selecting the top matching documents. This must provide enough information for both a lexical and a vector similarity to be performed (the latter either query text or by query vector, depending on the collection configuration). Examples are: sort={"$hybrid": "xyz"}, sort={"$hybrid": {"$vectorize": "xyz", "$lexical": "abc"}}, sort={"$hybrid": {"$vector": DataAPIVector(...), "$lexical": "abc"}}. Note this differs from the sort parameter for the find method.
projection: it controls which parts of the document are returned. It can be an allow-list: {"f1": True, "f2": True}, or a deny-list: {"fx": False, "fy": False}, but not a mixture (except for the _id and other special fields, which can be associated to both True or False independently of the rest of the specification). The special star-projections {"*": True} and {"*": False} have the effect of returning the whole document and {} respectively. For lists in documents, slice directives can be passed to select portions of the list: for instance, {"array": {"$slice": 2}}, {"array": {"$slice": -2}}, {"array": {"$slice": [4, 2]}} or {"array": {"$slice": [-4, 2]}}. An iterable over strings will be treated implicitly as an allow-list. The default projection (used if this parameter is not passed) does not necessarily include "special" fields such as $vector or $vectorize. See the Data API documentation for more on projections.
document_type: this parameter acts a formal specifier for the type checker. If omitted, the resulting cursor is implicitly a AsyncCollectionFindAndRerankCursor[DOC, DOC], i.e. maintains the same type for the items it returns as that for the documents in the collection. Strictly typed code may want to specify this parameter especially when a projection is given.
limit: maximum number of documents to return as the result of the final rerank step.
hybrid_limits: this controls the amount of documents that are fetched by each of the individual retrieval operations that are combined in the rerank step. It can be either a number or a dictionary of strings to numbers, the latter case expressing different counts for the different retrievals. For example: hybrid_limits=50, hybrid_limits={"$vector": 20, "$lexical": 10}.
include_scores: a boolean to request the scores to be returned along with the resulting documents. If this is set, the scores can be read in the the map scores attribute of each RerankedResult (the map is otherwise empty).
include_sort_vector: a boolean to request the search query vector used for the vector-search part of the find operation. If set to True, calling the get_sort_vector method on the returned cursor will yield the vector used for the ANN search.
rerank_on: for collections without a vectorize (server-side embeddings) service, this is used to specify the field name that is then used during reranking.
rerank_query: for collections without a vectorize (server-side embeddings) service, this is used to specify the query text for the reranker.
request_timeout_ms: a timeout, in milliseconds, for each single one of the underlying HTTP requests used to fetch documents as the cursor is iterated over. If not passed, the collection-level setting is used instead.
timeout_ms: an alias for request_timeout_ms.

Returns

an AsyncCollectionFindAndRerankCursor object, that can be iterated over (and manipulated in several ways).

Examples

>>> # NOTE: may require slight adaptation to an async context.
>>> #       See the same method on Collection for more usage patterns.
>>>
>>> async def run_find_and_reranks(acol: AsyncCollection) -> None:
...     print("find results 1:")
...     async for r_res in acol.find_and_rerank(
...         sort={"$hybrid": "query text"},
...         limit=3,
...     ):
...         print(r_res.document["wkd"])
...     async_cursor1 = acol.find_and_rerank(
...         {"wkd": {"$ne": "Mon"}},
...         sort={"$hybrid": "query text"},
...         limit=3,
...     )
...     ids = [r_res.document["_id"] async for r_res in async_cursor1]
...     print("find results 2:", ids)
...
>>> asyncio.run(run_find_and_reranks(my_async_coll))
find results 1:
Mon
Thu
Sat
find results 2: ['D', 'F', 'B']

Expand source code

@beta_method
def find_and_rerank(
    self,
    filter: FilterType | None = None,
    *,
    sort: HybridSortType,
    projection: ProjectionType | None = None,
    document_type: type[DOC2] | None = None,
    limit: int | None = None,
    hybrid_limits: int | dict[str, int] | None = None,
    include_scores: bool | None = None,
    include_sort_vector: bool | None = None,
    rerank_on: str | None = None,
    rerank_query: str | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> AsyncCollectionFindAndRerankCursor[DOC, RerankedResult[DOC2]]:
    """
    Find relevant documents, combining vector and lexical matches through reranking.

    For this method to succeed, the collection must be created with the required
    hybrid capabilities (see the `create_collection` method of the Database class).

    The method returns a cursor that can then be iterated over, which yields
    the resulting documents, generally paired with accompanying information
    such as scores.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        sort: a clause specifying the criteria for selecting the top matching
            documents. This must provide enough information for both a lexical
            and a vector similarity to be performed (the latter either query text
            or by query vector, depending on the collection configuration).
            Examples are: `sort={"$hybrid": "xyz"}`,
            `sort={"$hybrid": {"$vectorize": "xyz", "$lexical": "abc"}}`,
            `sort={"$hybrid": {"$vector": DataAPIVector(...), "$lexical": "abc"}}`.
            Note this differs from the `sort` parameter for the `find` method.
        projection: it controls which parts of the document are returned.
            It can be an allow-list: `{"f1": True, "f2": True}`,
            or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
            (except for the `_id` and other special fields, which can be
            associated to both True or False independently of the rest
            of the specification).
            The special star-projections `{"*": True}` and `{"*": False}`
            have the effect of returning the whole document and `{}` respectively.
            For lists in documents, slice directives can be passed to select
            portions of the list: for instance, `{"array": {"$slice": 2}}`,
            `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
            `{"array": {"$slice": [-4, 2]}}`.
            An iterable over strings will be treated implicitly as an allow-list.
            The default projection (used if this parameter is not passed) does not
            necessarily include "special" fields such as `$vector` or `$vectorize`.
            See the Data API documentation for more on projections.
        document_type: this parameter acts a formal specifier for the type checker.
            If omitted, the resulting cursor is implicitly a
            `AsyncCollectionFindAndRerankCursor[DOC, DOC]`, i.e. maintains the same
            type for the items it returns as that for the documents in the
            collection. Strictly typed code may want to specify this parameter
            especially when a projection is given.
        limit: maximum number of documents to return as the result of the final
            rerank step.
        hybrid_limits: this controls the amount of documents that are fetched by
            each of the individual retrieval operations that are combined in the
            rerank step. It can be either a number or a dictionary of strings to
            numbers, the latter case expressing different counts for the different
            retrievals. For example: `hybrid_limits=50`,
            `hybrid_limits={"$vector": 20, "$lexical": 10}`.
        include_scores: a boolean to request the scores to be returned along with
            the resulting documents. If this is set, the scores can be read in the
            the map `scores` attribute of each RerankedResult (the map is
            otherwise empty).
        include_sort_vector: a boolean to request the search query vector
            used for the vector-search part of the find operation.
            If set to True, calling the `get_sort_vector` method on the returned
            cursor will yield the vector used for the ANN search.
        rerank_on: for collections without a vectorize (server-side embeddings)
            service, this is used to specify the field name that is then used
            during reranking.
        rerank_query: for collections without a vectorize (server-side embeddings)
            service, this is used to specify the query text for the reranker.
        request_timeout_ms: a timeout, in milliseconds, for each single one
            of the underlying HTTP requests used to fetch documents as the
            cursor is iterated over.
            If not passed, the collection-level setting is used instead.
        timeout_ms: an alias for `request_timeout_ms`.

    Returns:
        an AsyncCollectionFindAndRerankCursor object, that can be iterated over
        (and manipulated in several ways).

    Examples:
        >>> # NOTE: may require slight adaptation to an async context.
        >>> #       See the same method on Collection for more usage patterns.
        >>>
        >>> async def run_find_and_reranks(acol: AsyncCollection) -> None:
        ...     print("find results 1:")
        ...     async for r_res in acol.find_and_rerank(
        ...         sort={"$hybrid": "query text"},
        ...         limit=3,
        ...     ):
        ...         print(r_res.document["wkd"])
        ...     async_cursor1 = acol.find_and_rerank(
        ...         {"wkd": {"$ne": "Mon"}},
        ...         sort={"$hybrid": "query text"},
        ...         limit=3,
        ...     )
        ...     ids = [r_res.document["_id"] async for r_res in async_cursor1]
        ...     print("find results 2:", ids)
        ...
        >>> asyncio.run(run_find_and_reranks(my_async_coll))
        find results 1:
        Mon
        Thu
        Sat
        find results 2: ['D', 'F', 'B']
    """

    # lazy-import here to avoid circular import issues
    from astrapy.cursors import AsyncCollectionFindAndRerankCursor

    _request_timeout_ms, _rt_label = _first_valid_timeout(
        (request_timeout_ms, "request_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
    )
    return (
        AsyncCollectionFindAndRerankCursor(
            collection=self,
            request_timeout_ms=_request_timeout_ms,
            overall_timeout_ms=None,
            request_timeout_label=_rt_label,
        )
        .filter(filter)
        .project(projection)
        .limit(limit)
        .sort(sort)
        .hybrid_limits(hybrid_limits)
        .rerank_on(rerank_on)
        .rerank_query(rerank_query)
        .include_scores(include_scores)
        .include_sort_vector(include_sort_vector)
    )

Run a search, returning the first document in the collection that matches provided filters, if any is found.

Args

filter: a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
projection: it controls which parts of the document are returned. It can be an allow-list: {"f1": True, "f2": True}, or a deny-list: {"fx": False, "fy": False}, but not a mixture (except for the _id and other special fields, which can be associated to both True or False independently of the rest of the specification). The special star-projections {"*": True} and {"*": False} have the effect of returning the whole document and {} respectively. For lists in documents, slice directives can be passed to select portions of the list: for instance, {"array": {"$slice": 2}}, {"array": {"$slice": -2}}, {"array": {"$slice": [4, 2]}} or {"array": {"$slice": [-4, 2]}}. An iterable over strings will be treated implicitly as an allow-list. The default projection (used if this parameter is not passed) does not necessarily include "special" fields such as $vector or $vectorize. See the Data API documentation for more on projections.
include_similarity: a boolean to request the numeric value of the similarity to be returned as an added "$similarity" key in the returned document. It can be used meaningfully only in a vector search (see sort).
sort: with this dictionary parameter one can control the order the documents are returned. See the Note about sorting for details. Vector-based ANN sorting is achieved by providing a "$vector" or a "$vectorize" key in sort.
general_method_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for general_method_timeout_ms.
timeout_ms: an alias for general_method_timeout_ms.

Returns

a dictionary expressing the required document, otherwise None.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> async def demo_find_one(acol: AsyncCollection) -> None:
....    print("Count:", await acol.count_documents({}, upper_bound=100))
...     result0 = await acol.find_one({})
...     print("result0", result0)
...     result1 = await acol.find_one({"seq": 10})
...     print("result1", result1)
...     result2 = await acol.find_one({"seq": 1011})
...     print("result2", result2)
...     result3 = await acol.find_one({}, projection={"seq": False})
...     print("result3", result3)
...     result4 = await acol.find_one(
...         {},
...         sort={"seq": astrapy.constants.SortMode.DESCENDING},
...     )
...     print("result4", result4)
...
>>>
>>> asyncio.run(demo_find_one(my_async_coll))
Count: 50
result0 {'_id': '479c7ce8-...', 'seq': 48}
result1 {'_id': '93e992c4-...', 'seq': 10}
result2 None
result3 {'_id': '479c7ce8-...'}
result4 {'_id': 'd656cd9d-...', 'seq': 49}

>>> asyncio.run(my_async_coll.find_one(
...     {},
...     sort={"$vector": [1, 0]},
...     projection={"*": True},
... ))
{'_id': '...', 'tag': 'D', '$vector': [4.0, 1.0]}

Note

See the find method for more details on the accepted parameters (whereas skip and limit are not valid parameters for find_one).

Expand source code

async def find_one(
    self,
    filter: FilterType | None = None,
    *,
    projection: ProjectionType | None = None,
    include_similarity: bool | None = None,
    sort: SortType | None = None,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> DOC | None:
    """
    Run a search, returning the first document in the collection that matches
    provided filters, if any is found.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        projection: it controls which parts of the document are returned.
            It can be an allow-list: `{"f1": True, "f2": True}`,
            or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
            (except for the `_id` and other special fields, which can be
            associated to both True or False independently of the rest
            of the specification).
            The special star-projections `{"*": True}` and `{"*": False}`
            have the effect of returning the whole document and `{}` respectively.
            For lists in documents, slice directives can be passed to select
            portions of the list: for instance, `{"array": {"$slice": 2}}`,
            `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
            `{"array": {"$slice": [-4, 2]}}`.
            An iterable over strings will be treated implicitly as an allow-list.
            The default projection (used if this parameter is not passed) does not
            necessarily include "special" fields such as `$vector` or `$vectorize`.
            See the Data API documentation for more on projections.
        include_similarity: a boolean to request the numeric value of the
            similarity to be returned as an added "$similarity" key in the
            returned document. It can be used meaningfully only in a vector
            search (see `sort`).
        sort: with this dictionary parameter one can control the order
            the documents are returned. See the Note about sorting for details.
            Vector-based ANN sorting is achieved by providing a "$vector"
            or a "$vectorize" key in `sort`.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a dictionary expressing the required document, otherwise None.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> async def demo_find_one(acol: AsyncCollection) -> None:
        ....    print("Count:", await acol.count_documents({}, upper_bound=100))
        ...     result0 = await acol.find_one({})
        ...     print("result0", result0)
        ...     result1 = await acol.find_one({"seq": 10})
        ...     print("result1", result1)
        ...     result2 = await acol.find_one({"seq": 1011})
        ...     print("result2", result2)
        ...     result3 = await acol.find_one({}, projection={"seq": False})
        ...     print("result3", result3)
        ...     result4 = await acol.find_one(
        ...         {},
        ...         sort={"seq": astrapy.constants.SortMode.DESCENDING},
        ...     )
        ...     print("result4", result4)
        ...
        >>>
        >>> asyncio.run(demo_find_one(my_async_coll))
        Count: 50
        result0 {'_id': '479c7ce8-...', 'seq': 48}
        result1 {'_id': '93e992c4-...', 'seq': 10}
        result2 None
        result3 {'_id': '479c7ce8-...'}
        result4 {'_id': 'd656cd9d-...', 'seq': 49}

        >>> asyncio.run(my_async_coll.find_one(
        ...     {},
        ...     sort={"$vector": [1, 0]},
        ...     projection={"*": True},
        ... ))
        {'_id': '...', 'tag': 'D', '$vector': [4.0, 1.0]}

    Note:
        See the `find` method for more details on the accepted parameters
        (whereas `skip` and `limit` are not valid parameters for `find_one`).
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    fo_options = (
        None
        if include_similarity is None
        else {"includeSimilarity": include_similarity}
    )
    fo_payload = {
        "findOne": {
            k: v
            for k, v in {
                "filter": filter,
                "projection": normalize_optional_projection(projection),
                "options": fo_options,
                "sort": sort,
            }.items()
            if v is not None
        }
    }
    fo_response = await self._converted_request(
        payload=fo_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    if "document" not in (fo_response.get("data") or {}):
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from findOne API command.",
            raw_response=fo_response,
        )
    doc_response = fo_response["data"]["document"]
    if doc_response is None:
        return None
    return fo_response["data"]["document"]  # type: ignore[no-any-return]

async def find_one_and_delete(self, filter: FilterType, *, projection: ProjectionType | None = None, sort: SortType | None = None, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> Optional[~DOC]

Find a document in the collection and delete it. The deleted document, however, is the return value of the method.

Args

filter: a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
projection: it controls which parts of the document are returned. It can be an allow-list: {"f1": True, "f2": True}, or a deny-list: {"fx": False, "fy": False}, but not a mixture (except for the _id and other special fields, which can be associated to both True or False independently of the rest of the specification). The special star-projections {"*": True} and {"*": False} have the effect of returning the whole document and {} respectively. For lists in documents, slice directives can be passed to select portions of the list: for instance, {"array": {"$slice": 2}}, {"array": {"$slice": -2}}, {"array": {"$slice": [4, 2]}} or {"array": {"$slice": [-4, 2]}}. An iterable over strings will be treated implicitly as an allow-list. The default projection (used if this parameter is not passed) does not necessarily include "special" fields such as $vector or $vectorize. See the Data API documentation for more on projections.
sort: with this dictionary parameter one can control the sorting order of the documents matching the filter, effectively determining what document will come first and hence be the replaced one. See the find method for more on sorting. Vector-based ANN sorting is achieved by providing a "$vector" or a "$vectorize" key in sort.
general_method_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for general_method_timeout_ms.
timeout_ms: an alias for general_method_timeout_ms.

Returns

Either the document (or a projection thereof, as requested), or None if no matches were found in the first place.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> async def do_find_one_and_delete(acol: AsyncCollection) -> None:
...     await acol.insert_many(
...         [
...             {"species": "swan", "class": "Aves"},
...             {"species": "frog", "class": "Amphibia"},
...         ],
...     )
...     delete_result0 = await acol.find_one_and_delete(
...         {"species": {"$ne": "frog"}},
...         projection=["species"],
...     )
...     print("delete_result0", delete_result0)
...     delete_result1 = await acol.find_one_and_delete(
...         {"species": {"$ne": "frog"}},
...     )
...     print("delete_result1", delete_result1)
...
>>> asyncio.run(do_find_one_and_delete(my_async_coll))
delete_result0 {'_id': 'f335cd0f-...', 'species': 'swan'}
delete_result1 None

Expand source code

async def find_one_and_delete(
    self,
    filter: FilterType,
    *,
    projection: ProjectionType | None = None,
    sort: SortType | None = None,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> DOC | None:
    """
    Find a document in the collection and delete it. The deleted document,
    however, is the return value of the method.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        projection: it controls which parts of the document are returned.
            It can be an allow-list: `{"f1": True, "f2": True}`,
            or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
            (except for the `_id` and other special fields, which can be
            associated to both True or False independently of the rest
            of the specification).
            The special star-projections `{"*": True}` and `{"*": False}`
            have the effect of returning the whole document and `{}` respectively.
            For lists in documents, slice directives can be passed to select
            portions of the list: for instance, `{"array": {"$slice": 2}}`,
            `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
            `{"array": {"$slice": [-4, 2]}}`.
            An iterable over strings will be treated implicitly as an allow-list.
            The default projection (used if this parameter is not passed) does not
            necessarily include "special" fields such as `$vector` or `$vectorize`.
            See the Data API documentation for more on projections.
        sort: with this dictionary parameter one can control the sorting
            order of the documents matching the filter, effectively
            determining what document will come first and hence be the
            replaced one. See the `find` method for more on sorting.
            Vector-based ANN sorting is achieved by providing a "$vector"
            or a "$vectorize" key in `sort`.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        Either the document (or a projection thereof, as requested), or None
        if no matches were found in the first place.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> async def do_find_one_and_delete(acol: AsyncCollection) -> None:
        ...     await acol.insert_many(
        ...         [
        ...             {"species": "swan", "class": "Aves"},
        ...             {"species": "frog", "class": "Amphibia"},
        ...         ],
        ...     )
        ...     delete_result0 = await acol.find_one_and_delete(
        ...         {"species": {"$ne": "frog"}},
        ...         projection=["species"],
        ...     )
        ...     print("delete_result0", delete_result0)
        ...     delete_result1 = await acol.find_one_and_delete(
        ...         {"species": {"$ne": "frog"}},
        ...     )
        ...     print("delete_result1", delete_result1)
        ...
        >>> asyncio.run(do_find_one_and_delete(my_async_coll))
        delete_result0 {'_id': 'f335cd0f-...', 'species': 'swan'}
        delete_result1 None
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    _projection = normalize_optional_projection(projection)
    fo_payload = {
        "findOneAndDelete": {
            k: v
            for k, v in {
                "filter": filter,
                "sort": sort,
                "projection": _projection,
            }.items()
            if v is not None
        }
    }
    logger.info(f"findOneAndDelete on '{self.name}'")
    fo_response = await self._converted_request(
        payload=fo_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished findOneAndDelete on '{self.name}'")
    if "document" in fo_response.get("data", {}):
        document = fo_response["data"]["document"]
        return document  # type: ignore[no-any-return]
    else:
        deleted_count = fo_response.get("status", {}).get("deletedCount")
        if deleted_count == 0:
            return None
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from find_one_and_delete API command.",
                raw_response=fo_response,
            )

async def find_one_and_replace(self, filter: FilterType, replacement: DOC, *, projection: ProjectionType | None = None, sort: SortType | None = None, upsert: bool = False, return_document: str = 'before', general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> Optional[~DOC]

Find a document on the collection and replace it entirely with a new one, optionally inserting a new one if no match is found.

Args

filter: a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
replacement: the new document to write into the collection.
projection: it controls which parts of the document are returned. It can be an allow-list: {"f1": True, "f2": True}, or a deny-list: {"fx": False, "fy": False}, but not a mixture (except for the _id and other special fields, which can be associated to both True or False independently of the rest of the specification). The special star-projections {"*": True} and {"*": False} have the effect of returning the whole document and {} respectively. For lists in documents, slice directives can be passed to select portions of the list: for instance, {"array": {"$slice": 2}}, {"array": {"$slice": -2}}, {"array": {"$slice": [4, 2]}} or {"array": {"$slice": [-4, 2]}}. An iterable over strings will be treated implicitly as an allow-list. The default projection (used if this parameter is not passed) does not necessarily include "special" fields such as $vector or $vectorize. See the Data API documentation for more on projections.
sort: with this dictionary parameter one can control the sorting order of the documents matching the filter, effectively determining what document will come first and hence be the replaced one. See the find method for more on sorting. Vector-based ANN sorting is achieved by providing a "$vector" or a "$vectorize" key in sort.
upsert: this parameter controls the behavior in absence of matches. If True, replacement is inserted as a new document if no matches are found on the collection. If False, the operation silently does nothing in case of no matches.
return_document: a flag controlling what document is returned: if set to ReturnDocument.BEFORE, or the string "before", the document found on database is returned; if set to ReturnDocument.AFTER, or the string "after", the new document is returned. The default is "before".
general_method_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for general_method_timeout_ms.
timeout_ms: an alias for general_method_timeout_ms.

Returns

A document, either the one before the replace operation or the one after that. Alternatively, the method returns None to represent that no matching document was found, or that no replacement was inserted (depending on the return_document parameter).

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> async def do_find_one_and_replace(
...     acol: AsyncCollection
... ) -> None:
...     await acol.insert_one(
...         {"_id": "rule1", "text": "all animals are equal"}
...     )
...     result0 = await acol.find_one_and_replace(
...         {"_id": "rule1"},
...         {"text": "some animals are more equal!"},
...     )
...     print("result0", result0)
...     result1 = await acol.find_one_and_replace(
...         {"text": "some animals are more equal!"},
...         {"text": "and the pigs are the rulers"},
...         return_document=astrapy.constants.ReturnDocument.AFTER,
...     )
...     print("result1", result1)
...     result2 = await acol.find_one_and_replace(
...         {"_id": "rule2"},
...         {"text": "F=ma^2"},
...         return_document=astrapy.constants.ReturnDocument.AFTER,
...     )
...     print("result2", result2)
...     result3 = await acol.find_one_and_replace(
...         {"_id": "rule2"},
...         {"text": "F=ma"},
...         upsert=True,
...         return_document=astrapy.constants.ReturnDocument.AFTER,
...         projection={"_id": False},
...     )
...     print("result3", result3)
...
>>> asyncio.run(do_find_one_and_replace(my_async_coll))
result0 {'_id': 'rule1', 'text': 'all animals are equal'}
result1 {'_id': 'rule1', 'text': 'and the pigs are the rulers'}
result2 None
result3 {'text': 'F=ma'}

Expand source code

async def find_one_and_replace(
    self,
    filter: FilterType,
    replacement: DOC,
    *,
    projection: ProjectionType | None = None,
    sort: SortType | None = None,
    upsert: bool = False,
    return_document: str = ReturnDocument.BEFORE,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> DOC | None:
    """
    Find a document on the collection and replace it entirely with a new one,
    optionally inserting a new one if no match is found.

    Args:

        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        replacement: the new document to write into the collection.
        projection: it controls which parts of the document are returned.
            It can be an allow-list: `{"f1": True, "f2": True}`,
            or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
            (except for the `_id` and other special fields, which can be
            associated to both True or False independently of the rest
            of the specification).
            The special star-projections `{"*": True}` and `{"*": False}`
            have the effect of returning the whole document and `{}` respectively.
            For lists in documents, slice directives can be passed to select
            portions of the list: for instance, `{"array": {"$slice": 2}}`,
            `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
            `{"array": {"$slice": [-4, 2]}}`.
            An iterable over strings will be treated implicitly as an allow-list.
            The default projection (used if this parameter is not passed) does not
            necessarily include "special" fields such as `$vector` or `$vectorize`.
            See the Data API documentation for more on projections.
        sort: with this dictionary parameter one can control the sorting
            order of the documents matching the filter, effectively
            determining what document will come first and hence be the
            replaced one. See the `find` method for more on sorting.
            Vector-based ANN sorting is achieved by providing a "$vector"
            or a "$vectorize" key in `sort`.
        upsert: this parameter controls the behavior in absence of matches.
            If True, `replacement` is inserted as a new document
            if no matches are found on the collection. If False,
            the operation silently does nothing in case of no matches.
        return_document: a flag controlling what document is returned:
            if set to `ReturnDocument.BEFORE`, or the string "before",
            the document found on database is returned; if set to
            `ReturnDocument.AFTER`, or the string "after", the new
            document is returned. The default is "before".
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        A document, either the one before the replace operation or the
        one after that. Alternatively, the method returns None to represent
        that no matching document was found, or that no replacement
        was inserted (depending on the `return_document` parameter).

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> async def do_find_one_and_replace(
        ...     acol: AsyncCollection
        ... ) -> None:
        ...     await acol.insert_one(
        ...         {"_id": "rule1", "text": "all animals are equal"}
        ...     )
        ...     result0 = await acol.find_one_and_replace(
        ...         {"_id": "rule1"},
        ...         {"text": "some animals are more equal!"},
        ...     )
        ...     print("result0", result0)
        ...     result1 = await acol.find_one_and_replace(
        ...         {"text": "some animals are more equal!"},
        ...         {"text": "and the pigs are the rulers"},
        ...         return_document=astrapy.constants.ReturnDocument.AFTER,
        ...     )
        ...     print("result1", result1)
        ...     result2 = await acol.find_one_and_replace(
        ...         {"_id": "rule2"},
        ...         {"text": "F=ma^2"},
        ...         return_document=astrapy.constants.ReturnDocument.AFTER,
        ...     )
        ...     print("result2", result2)
        ...     result3 = await acol.find_one_and_replace(
        ...         {"_id": "rule2"},
        ...         {"text": "F=ma"},
        ...         upsert=True,
        ...         return_document=astrapy.constants.ReturnDocument.AFTER,
        ...         projection={"_id": False},
        ...     )
        ...     print("result3", result3)
        ...
        >>> asyncio.run(do_find_one_and_replace(my_async_coll))
        result0 {'_id': 'rule1', 'text': 'all animals are equal'}
        result1 {'_id': 'rule1', 'text': 'and the pigs are the rulers'}
        result2 None
        result3 {'text': 'F=ma'}
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    options = {
        "returnDocument": return_document,
        "upsert": upsert,
    }
    fo_payload = {
        "findOneAndReplace": {
            k: v
            for k, v in {
                "filter": filter,
                "projection": normalize_optional_projection(projection),
                "replacement": replacement,
                "options": options,
                "sort": sort,
            }.items()
            if v is not None
        }
    }
    logger.info(f"findOneAndReplace on '{self.name}'")
    fo_response = await self._converted_request(
        payload=fo_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished findOneAndReplace on '{self.name}'")
    if "document" in fo_response.get("data", {}):
        ret_document = fo_response.get("data", {}).get("document")
        if ret_document is None:
            return None
        else:
            return ret_document  # type: ignore[no-any-return]
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from find_one_and_replace API command.",
            raw_response=fo_response,
        )

async def find_one_and_update(self, filter: FilterType, update: dict[str, Any], *, projection: ProjectionType | None = None, sort: SortType | None = None, upsert: bool = False, return_document: str = 'before', general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> Optional[~DOC]

Find a document on the collection and update it as requested, optionally inserting a new one if no match is found.

Args

filter: a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
update: the update prescription to apply to the document, expressed as a dictionary as per Data API syntax. Examples are: {"$set": {"field": "value}} {"$inc": {"counter": 10}} {"$unset": {"field": ""}} See the Data API documentation for the full syntax.
projection: it controls which parts of the document are returned. It can be an allow-list: {"f1": True, "f2": True}, or a deny-list: {"fx": False, "fy": False}, but not a mixture (except for the _id and other special fields, which can be associated to both True or False independently of the rest of the specification). The special star-projections {"*": True} and {"*": False} have the effect of returning the whole document and {} respectively. For lists in documents, slice directives can be passed to select portions of the list: for instance, {"array": {"$slice": 2}}, {"array": {"$slice": -2}}, {"array": {"$slice": [4, 2]}} or {"array": {"$slice": [-4, 2]}}. An iterable over strings will be treated implicitly as an allow-list. The default projection (used if this parameter is not passed) does not necessarily include "special" fields such as $vector or $vectorize. See the Data API documentation for more on projections.
sort: with this dictionary parameter one can control the sorting order of the documents matching the filter, effectively determining what document will come first and hence be the replaced one. See the find method for more on sorting. Vector-based ANN sorting is achieved by providing a "$vector" or a "$vectorize" key in sort.
upsert: this parameter controls the behavior in absence of matches. If True, a new document (resulting from applying the update to an empty document) is inserted if no matches are found on the collection. If False, the operation silently does nothing in case of no matches.
return_document: a flag controlling what document is returned: if set to ReturnDocument.BEFORE, or the string "before", the document found on database is returned; if set to ReturnDocument.AFTER, or the string "after", the new document is returned. The default is "before".
general_method_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for general_method_timeout_ms.
timeout_ms: an alias for general_method_timeout_ms.

Returns

A document (or a projection thereof, as required), either the one before the replace operation or the one after that. Alternatively, the method returns None to represent that no matching document was found, or that no update was applied (depending on the return_document parameter).

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> async def do_find_one_and_update(acol: AsyncCollection) -> None:
...     await acol.insert_one({"Marco": "Polo"})
...     result0 = await acol.find_one_and_update(
...         {"Marco": {"$exists": True}},
...         {"$set": {"title": "Mr."}},
...     )
...     print("result0", result0)
...     result1 = await acol.find_one_and_update(
...         {"title": "Mr."},
...         {"$inc": {"rank": 3}},
...         projection=["title", "rank"],
...         return_document=astrapy.constants.ReturnDocument.AFTER,
...     )
...     print("result1", result1)
...     result2 = await acol.find_one_and_update(
...         {"name": "Johnny"},
...         {"$set": {"rank": 0}},
...         return_document=astrapy.constants.ReturnDocument.AFTER,
...     )
...     print("result2", result2)
...     result3 = await acol.find_one_and_update(
...         {"name": "Johnny"},
...         {"$set": {"rank": 0}},
...         upsert=True,
...         return_document=astrapy.constants.ReturnDocument.AFTER,
...     )
...     print("result3", result3)
...
>>> asyncio.run(do_find_one_and_update(my_async_coll))
result0 {'_id': 'f7c936d3-b0a0-45eb-a676-e2829662a57c', 'Marco': 'Polo'}
result1 {'_id': 'f7c936d3-b0a0-45eb-a676-e2829662a57c', 'title': 'Mr.', 'rank': 3}
result2 None
result3 {'_id': 'db3d678d-14d4-4caa-82d2-d5fb77dab7ec', 'name': 'Johnny', 'rank': 0}

Expand source code

async def find_one_and_update(
    self,
    filter: FilterType,
    update: dict[str, Any],
    *,
    projection: ProjectionType | None = None,
    sort: SortType | None = None,
    upsert: bool = False,
    return_document: str = ReturnDocument.BEFORE,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> DOC | None:
    """
    Find a document on the collection and update it as requested,
    optionally inserting a new one if no match is found.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        update: the update prescription to apply to the document, expressed
            as a dictionary as per Data API syntax. Examples are:
                {"$set": {"field": "value}}
                {"$inc": {"counter": 10}}
                {"$unset": {"field": ""}}
            See the Data API documentation for the full syntax.
        projection: it controls which parts of the document are returned.
            It can be an allow-list: `{"f1": True, "f2": True}`,
            or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
            (except for the `_id` and other special fields, which can be
            associated to both True or False independently of the rest
            of the specification).
            The special star-projections `{"*": True}` and `{"*": False}`
            have the effect of returning the whole document and `{}` respectively.
            For lists in documents, slice directives can be passed to select
            portions of the list: for instance, `{"array": {"$slice": 2}}`,
            `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
            `{"array": {"$slice": [-4, 2]}}`.
            An iterable over strings will be treated implicitly as an allow-list.
            The default projection (used if this parameter is not passed) does not
            necessarily include "special" fields such as `$vector` or `$vectorize`.
            See the Data API documentation for more on projections.
        sort: with this dictionary parameter one can control the sorting
            order of the documents matching the filter, effectively
            determining what document will come first and hence be the
            replaced one. See the `find` method for more on sorting.
            Vector-based ANN sorting is achieved by providing a "$vector"
            or a "$vectorize" key in `sort`.
        upsert: this parameter controls the behavior in absence of matches.
            If True, a new document (resulting from applying the `update`
            to an empty document) is inserted if no matches are found on
            the collection. If False, the operation silently does nothing
            in case of no matches.
        return_document: a flag controlling what document is returned:
            if set to `ReturnDocument.BEFORE`, or the string "before",
            the document found on database is returned; if set to
            `ReturnDocument.AFTER`, or the string "after", the new
            document is returned. The default is "before".
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        A document (or a projection thereof, as required), either the one
        before the replace operation or the one after that.
        Alternatively, the method returns None to represent
        that no matching document was found, or that no update
        was applied (depending on the `return_document` parameter).

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> async def do_find_one_and_update(acol: AsyncCollection) -> None:
        ...     await acol.insert_one({"Marco": "Polo"})
        ...     result0 = await acol.find_one_and_update(
        ...         {"Marco": {"$exists": True}},
        ...         {"$set": {"title": "Mr."}},
        ...     )
        ...     print("result0", result0)
        ...     result1 = await acol.find_one_and_update(
        ...         {"title": "Mr."},
        ...         {"$inc": {"rank": 3}},
        ...         projection=["title", "rank"],
        ...         return_document=astrapy.constants.ReturnDocument.AFTER,
        ...     )
        ...     print("result1", result1)
        ...     result2 = await acol.find_one_and_update(
        ...         {"name": "Johnny"},
        ...         {"$set": {"rank": 0}},
        ...         return_document=astrapy.constants.ReturnDocument.AFTER,
        ...     )
        ...     print("result2", result2)
        ...     result3 = await acol.find_one_and_update(
        ...         {"name": "Johnny"},
        ...         {"$set": {"rank": 0}},
        ...         upsert=True,
        ...         return_document=astrapy.constants.ReturnDocument.AFTER,
        ...     )
        ...     print("result3", result3)
        ...
        >>> asyncio.run(do_find_one_and_update(my_async_coll))
        result0 {'_id': 'f7c936d3-b0a0-45eb-a676-e2829662a57c', 'Marco': 'Polo'}
        result1 {'_id': 'f7c936d3-b0a0-45eb-a676-e2829662a57c', 'title': 'Mr.', 'rank': 3}
        result2 None
        result3 {'_id': 'db3d678d-14d4-4caa-82d2-d5fb77dab7ec', 'name': 'Johnny', 'rank': 0}
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    options = {
        "returnDocument": return_document,
        "upsert": upsert,
    }
    fo_payload = {
        "findOneAndUpdate": {
            k: v
            for k, v in {
                "filter": filter,
                "update": update,
                "options": options,
                "sort": sort,
                "projection": normalize_optional_projection(projection),
            }.items()
            if v is not None
        }
    }
    logger.info(f"findOneAndUpdate on '{self.name}'")
    fo_response = await self._converted_request(
        payload=fo_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished findOneAndUpdate on '{self.name}'")
    if "document" in fo_response.get("data", {}):
        ret_document = fo_response.get("data", {}).get("document")
        if ret_document is None:
            return None
        else:
            return ret_document  # type: ignore[no-any-return]
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from find_one_and_update API command.",
            raw_response=fo_response,
        )

async def info(self, *, database_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> CollectionInfo

Information on the collection (name, location, database), in the form of a CollectionInfo object.

Not to be confused with the collection options method (related to the collection internal configuration).

Args

database_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying DevOps API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for database_admin_timeout_ms.
timeout_ms: an alias for database_admin_timeout_ms.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> asyncio.run(my_async_coll.info()).database_info.region
'us-east1'
>>> asyncio.run(my_async_coll.info()).full_name
'default_keyspace.my_v_collection'

Note

the returned CollectionInfo wraps, among other things, the database information: as such, calling this method triggers the same-named method of a Database object (which, in turn, performs a HTTP request to the DevOps API). See the documentation for Database.info() for more details.

Expand source code

async def info(
    self,
    *,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> CollectionInfo:
    """
    Information on the collection (name, location, database), in the
    form of a CollectionInfo object.

    Not to be confused with the collection `options` method (related
    to the collection internal configuration).

    Args:
        database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying DevOps API request.
            If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `database_admin_timeout_ms`.
        timeout_ms: an alias for `database_admin_timeout_ms`.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> asyncio.run(my_async_coll.info()).database_info.region
        'us-east1'
        >>> asyncio.run(my_async_coll.info()).full_name
        'default_keyspace.my_v_collection'

    Note:
        the returned CollectionInfo wraps, among other things,
        the database information: as such, calling this method
        triggers the same-named method of a Database object (which, in turn,
        performs a HTTP request to the DevOps API).
        See the documentation for `Database.info()` for more details.
    """

    db_info = await self.database.info(
        database_admin_timeout_ms=database_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    return CollectionInfo(
        database_info=db_info,
        keyspace=self.keyspace,
        name=self.name,
        full_name=self.full_name,
    )

async def insert_many(self, documents: Iterable[DOC], *, ordered: bool = False, chunk_size: int | None = None, concurrency: int | None = None, request_timeout_ms: int | None = None, general_method_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> CollectionInsertManyResult

Insert a list of documents into the collection. This is not an atomic operation.

Args

documents: an iterable of dictionaries, each a document to insert. Documents may specify their _id field or leave it out, in which case it will be added automatically.
ordered: if False (default), the insertions can occur in arbitrary order and possibly concurrently. If True, they are processed sequentially. If there are no specific reasons against it, unordered insertions are to be preferred as they complete much faster.
chunk_size: how many documents to include in a single API request. Exceeding the server maximum allowed value results in an error. Leave it unspecified (recommended) to use the system default.
concurrency: maximum number of concurrent requests to the API at a given time. It cannot be more than one for ordered insertions.
request_timeout_ms: a timeout, in milliseconds, for each API request. If not passed, the collection-level setting is used instead.
general_method_timeout_ms: a timeout, in milliseconds, for the whole requested operation (which may involve multiple API requests). If not passed, the collection-level setting is used instead.
timeout_ms: an alias for general_method_timeout_ms.

Returns

a CollectionInsertManyResult object.

Examples

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> async def write_and_count(acol: AsyncCollection) -> None:
...             count0 = await acol.count_documents({}, upper_bound=10)
...             print("count0", count0)
...             im_result1 = await acol.insert_many(
...                 [
...                     {"a": 10},
...                     {"a": 5},
...                     {"b": [True, False, False]},
...                 ],
...                 ordered=True,
...             )
...             print("inserted1", im_result1.inserted_ids)
...             count1 = await acol.count_documents({}, upper_bound=100)
...             print("count1", count1)
...             await acol.insert_many(
...                 [{"seq": i} for i in range(50)],
...                 concurrency=5,
...             )
...             count2 = await acol.count_documents({}, upper_bound=100)
...             print("count2", count2)
...
>>> asyncio.run(write_and_count(my_async_coll))
count0 0
inserted1 ['e3c2a684-...', '1de4949f-...', '167dacc3-...']
count1 3
count2 53
>>> asyncio.run(my_async_coll.insert_many(
...     [
...         {"tag": "a", "$vector": [1, 2]},
...         {"tag": "b", "$vector": [3, 4]},
...     ]
... ))
CollectionInsertManyResult(...)

Note

Unordered insertions are executed with some degree of concurrency, so it is usually better to prefer this mode unless the order in the document sequence is important.

Note

A failure mode for this command is related to certain faulty documents found among those to insert: for example, a document may have an ID already found on the collection, or its vector dimension may not match the collection setting.

For an ordered insertion, the method will raise an exception at the first such faulty document – nevertheless, all documents processed until then will end up being written to the database.

For unordered insertions, if the error stems from faulty documents the insertion proceeds until exhausting the input documents: then, an exception is raised – and all insertable documents will have been written to the database, including those "after" the troublesome ones.

Errors occurring during an insert_many operation, for that reason, may result in a CollectionInsertManyException being raised. This exception allows to inspect the list of document IDs that were successfully inserted, while accessing at the same time the underlying "root errors" that made the full method call to fail.

Expand source code

async def insert_many(
    self,
    documents: Iterable[DOC],
    *,
    ordered: bool = False,
    chunk_size: int | None = None,
    concurrency: int | None = None,
    request_timeout_ms: int | None = None,
    general_method_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> CollectionInsertManyResult:
    """
    Insert a list of documents into the collection.
    This is not an atomic operation.

    Args:
        documents: an iterable of dictionaries, each a document to insert.
            Documents may specify their `_id` field or leave it out, in which
            case it will be added automatically.
        ordered: if False (default), the insertions can occur in arbitrary order
            and possibly concurrently. If True, they are processed sequentially.
            If there are no specific reasons against it, unordered insertions are to
            be preferred as they complete much faster.
        chunk_size: how many documents to include in a single API request.
            Exceeding the server maximum allowed value results in an error.
            Leave it unspecified (recommended) to use the system default.
        concurrency: maximum number of concurrent requests to the API at
            a given time. It cannot be more than one for ordered insertions.
        request_timeout_ms: a timeout, in milliseconds, for each API request.
            If not passed, the collection-level setting is used instead.
        general_method_timeout_ms: a timeout, in milliseconds, for the whole
            requested operation (which may involve multiple API requests).
            If not passed, the collection-level setting is used instead.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a CollectionInsertManyResult object.

    Examples:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> async def write_and_count(acol: AsyncCollection) -> None:
        ...             count0 = await acol.count_documents({}, upper_bound=10)
        ...             print("count0", count0)
        ...             im_result1 = await acol.insert_many(
        ...                 [
        ...                     {"a": 10},
        ...                     {"a": 5},
        ...                     {"b": [True, False, False]},
        ...                 ],
        ...                 ordered=True,
        ...             )
        ...             print("inserted1", im_result1.inserted_ids)
        ...             count1 = await acol.count_documents({}, upper_bound=100)
        ...             print("count1", count1)
        ...             await acol.insert_many(
        ...                 [{"seq": i} for i in range(50)],
        ...                 concurrency=5,
        ...             )
        ...             count2 = await acol.count_documents({}, upper_bound=100)
        ...             print("count2", count2)
        ...
        >>> asyncio.run(write_and_count(my_async_coll))
        count0 0
        inserted1 ['e3c2a684-...', '1de4949f-...', '167dacc3-...']
        count1 3
        count2 53
        >>> asyncio.run(my_async_coll.insert_many(
        ...     [
        ...         {"tag": "a", "$vector": [1, 2]},
        ...         {"tag": "b", "$vector": [3, 4]},
        ...     ]
        ... ))
        CollectionInsertManyResult(...)

    Note:
        Unordered insertions are executed with some degree of concurrency,
        so it is usually better to prefer this mode unless the order in the
        document sequence is important.

    Note:
        A failure mode for this command is related to certain faulty documents
        found among those to insert: for example, a document may have an ID
        already found on the collection, or its vector dimension may not
        match the collection setting.

        For an ordered insertion, the method will raise an exception at
        the first such faulty document -- nevertheless, all documents processed
        until then will end up being written to the database.

        For unordered insertions, if the error stems from faulty documents
        the insertion proceeds until exhausting the input documents: then,
        an exception is raised -- and all insertable documents will have been
        written to the database, including those "after" the troublesome ones.

        Errors occurring during an insert_many operation, for that reason,
        may result in a `CollectionInsertManyException` being raised.
        This exception allows to inspect the list of document IDs that were
        successfully inserted, while accessing at the same time the underlying
        "root errors" that made the full method call to fail.
    """

    _general_method_timeout_ms, _gmt_label = _first_valid_timeout(
        (general_method_timeout_ms, "general_method_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (
            self.api_options.timeout_options.general_method_timeout_ms,
            "general_method_timeout_ms",
        ),
    )
    _request_timeout_ms, _rt_label = _first_valid_timeout(
        (request_timeout_ms, "request_timeout_ms"),
        (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
    )
    if concurrency is None:
        if ordered:
            _concurrency = 1
        else:
            _concurrency = DEFAULT_INSERT_MANY_CONCURRENCY
    else:
        _concurrency = concurrency
    if _concurrency > 1 and ordered:
        raise ValueError("Cannot run ordered insert_many concurrently.")
    if chunk_size is None:
        _chunk_size = DEFAULT_INSERT_MANY_CHUNK_SIZE
    else:
        _chunk_size = chunk_size
    _documents = list(documents)
    logger.info(f"inserting {len(_documents)} documents in '{self.name}'")
    raw_results: list[dict[str, Any]] = []
    im_payloads: list[dict[str, Any]] = []
    timeout_manager = MultiCallTimeoutManager(
        overall_timeout_ms=_general_method_timeout_ms,
        timeout_label=_gmt_label,
    )
    if ordered:
        options = {"ordered": True, "returnDocumentResponses": True}
        inserted_ids: list[Any] = []
        for i in range(0, len(_documents), _chunk_size):
            im_payload = {
                "insertMany": {
                    "documents": _documents[i : i + _chunk_size],
                    "options": options,
                },
            }
            logger.info(f"insertMany(chunk) on '{self.name}'")
            chunk_response = await self._converted_request(
                payload=im_payload,
                raise_api_errors=False,
                timeout_context=timeout_manager.remaining_timeout(
                    cap_time_ms=_request_timeout_ms,
                    cap_timeout_label=_rt_label,
                ),
            )
            logger.info(f"finished insertMany(chunk) on '{self.name}'")
            # accumulate the results in this call
            chunk_inserted_ids = [
                doc_resp["_id"]
                for doc_resp in (chunk_response.get("status") or {}).get(
                    "documentResponses", []
                )
                if doc_resp["status"] == "OK"
            ]
            inserted_ids += chunk_inserted_ids
            raw_results += [chunk_response]
            im_payloads += [im_payload]
            # if errors, quit early
            if chunk_response.get("errors", []):
                response_exception = DataAPIResponseException.from_response(
                    command=im_payload,
                    raw_response=chunk_response,
                )
                raise CollectionInsertManyException(
                    inserted_ids=inserted_ids, exceptions=[response_exception]
                )

        # return
        full_result = CollectionInsertManyResult(
            raw_results=raw_results,
            inserted_ids=inserted_ids,
        )
        logger.info(
            f"finished inserting {len(_documents)} documents in '{self.name}'"
        )
        return full_result

    else:
        # unordered: concurrent or not, do all of them and parse the results
        options = {"ordered": False, "returnDocumentResponses": True}

        sem = asyncio.Semaphore(_concurrency)

        async def concurrent_insert_chunk(
            document_chunk: list[DOC],
        ) -> tuple[dict[str, Any], dict[str, Any]]:
            async with sem:
                im_payload = {
                    "insertMany": {
                        "documents": document_chunk,
                        "options": options,
                    },
                }
                logger.info(f"insertMany(chunk) on '{self.name}'")
                im_response = await self._converted_request(
                    payload=im_payload,
                    raise_api_errors=False,
                    timeout_context=timeout_manager.remaining_timeout(
                        cap_time_ms=_request_timeout_ms,
                        cap_timeout_label=_rt_label,
                    ),
                )
                logger.info(f"finished insertMany(chunk) on '{self.name}'")
                return im_payload, im_response

        raw_pl_results_pairs: list[tuple[dict[str, Any], dict[str, Any]]]
        if _concurrency > 1:
            tasks = [
                asyncio.create_task(
                    concurrent_insert_chunk(_documents[i : i + _chunk_size])
                )
                for i in range(0, len(_documents), _chunk_size)
            ]
            raw_pl_results_pairs = await asyncio.gather(*tasks)
        else:
            raw_pl_results_pairs = [
                await concurrent_insert_chunk(_documents[i : i + _chunk_size])
                for i in range(0, len(_documents), _chunk_size)
            ]

        if raw_pl_results_pairs:
            im_payloads, raw_results = list(zip(*raw_pl_results_pairs))
        else:
            im_payloads, raw_results = [], []

        # recast raw_results
        inserted_ids = [
            doc_resp["_id"]
            for chunk_response in raw_results
            for doc_resp in (chunk_response.get("status") or {}).get(
                "documentResponses", []
            )
            if doc_resp["status"] == "OK"
        ]

        # check-raise
        response_exceptions = [
            DataAPIResponseException.from_response(
                command=chunk_payload,
                raw_response=chunk_response,
            )
            for chunk_payload, chunk_response in zip(im_payloads, raw_results)
            if chunk_response.get("errors", [])
        ]
        if response_exceptions:
            raise CollectionInsertManyException(
                inserted_ids=inserted_ids,
                exceptions=response_exceptions,
            )

        # return
        full_result = CollectionInsertManyResult(
            raw_results=raw_results,
            inserted_ids=inserted_ids,
        )
        logger.info(
            f"finished inserting {len(_documents)} documents in '{self.name}'"
        )
        return full_result

async def insert_one(self, document: DOC, *, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> CollectionInsertOneResult

Insert a single document in the collection in an atomic operation.

Args

document: the dictionary expressing the document to insert. The _id field of the document can be left out, in which case it will be created automatically.
general_method_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for general_method_timeout_ms.
timeout_ms: an alias for general_method_timeout_ms.

Returns

a CollectionInsertOneResult object.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> async def write_and_count(acol: AsyncCollection) -> None:
...     count0 = await acol.count_documents({}, upper_bound=10)
...     print("count0", count0)
...     await acol.insert_one(
...         {
...             "age": 30,
...             "name": "Smith",
...             "food": ["pear", "peach"],
...             "likes_fruit": True,
...         },
...     )
...     await acol.insert_one({"_id": "user-123", "age": 50, "name": "Maccio"})
...     count1 = await acol.count_documents({}, upper_bound=10)
...     print("count1", count1)
...
>>> asyncio.run(write_and_count(my_async_coll))
count0 0
count1 2

>>> asyncio.run(my_async_coll.insert_one({"tag": v", "$vector": [10, 11]}))
CollectionInsertOneResult(...)

Note

If an _id is explicitly provided, which corresponds to a document that exists already in the collection, an error is raised and the insertion fails.

Expand source code

async def insert_one(
    self,
    document: DOC,
    *,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> CollectionInsertOneResult:
    """
    Insert a single document in the collection in an atomic operation.

    Args:
        document: the dictionary expressing the document to insert.
            The `_id` field of the document can be left out, in which
            case it will be created automatically.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a CollectionInsertOneResult object.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> async def write_and_count(acol: AsyncCollection) -> None:
        ...     count0 = await acol.count_documents({}, upper_bound=10)
        ...     print("count0", count0)
        ...     await acol.insert_one(
        ...         {
        ...             "age": 30,
        ...             "name": "Smith",
        ...             "food": ["pear", "peach"],
        ...             "likes_fruit": True,
        ...         },
        ...     )
        ...     await acol.insert_one({"_id": "user-123", "age": 50, "name": "Maccio"})
        ...     count1 = await acol.count_documents({}, upper_bound=10)
        ...     print("count1", count1)
        ...
        >>> asyncio.run(write_and_count(my_async_coll))
        count0 0
        count1 2

        >>> asyncio.run(my_async_coll.insert_one({"tag": v", "$vector": [10, 11]}))
        CollectionInsertOneResult(...)

    Note:
        If an `_id` is explicitly provided, which corresponds to a document
        that exists already in the collection, an error is raised and
        the insertion fails.
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    io_payload = {"insertOne": {"document": document}}
    logger.info(f"insertOne on '{self.name}'")
    io_response = await self._converted_request(
        payload=io_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished insertOne on '{self.name}'")
    if "insertedIds" in io_response.get("status", {}):
        if io_response["status"]["insertedIds"]:
            inserted_id = io_response["status"]["insertedIds"][0]
            return CollectionInsertOneResult(
                raw_results=[io_response],
                inserted_id=inserted_id,
            )
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from insert_one API command.",
                raw_response=io_response,
            )
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from insert_one API command.",
            raw_response=io_response,
        )

async def options(self, *, collection_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> CollectionDefinition

Get the collection options, i.e. its configuration as read from the database.

The method issues a request to the Data API each time is invoked, without caching mechanisms: this ensures up-to-date information for usages such as real-time collection validation by the application.

Args

collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for collection_admin_timeout_ms.
timeout_ms: an alias for collection_admin_timeout_ms.

Returns

a CollectionDefinition instance describing the collection. (See also the database list_collections method.)

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> asyncio.run(my_async_coll.options())
CollectionDefinition(vector=CollectionVectorOptions(dimension=3, metric='cosine'))

Expand source code

async def options(
    self,
    *,
    collection_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> CollectionDefinition:
    """
    Get the collection options, i.e. its configuration as read from the database.

    The method issues a request to the Data API each time is invoked,
    without caching mechanisms: this ensures up-to-date information
    for usages such as real-time collection validation by the application.

    Args:
        collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `collection_admin_timeout_ms`.
        timeout_ms: an alias for `collection_admin_timeout_ms`.

    Returns:
        a CollectionDefinition instance describing the collection.
        (See also the database `list_collections` method.)

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> asyncio.run(my_async_coll.options())
        CollectionDefinition(vector=CollectionVectorOptions(dimension=3, metric='cosine'))
    """

    _collection_admin_timeout_ms, _ca_label = _select_singlereq_timeout_ca(
        timeout_options=self.api_options.timeout_options,
        collection_admin_timeout_ms=collection_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info(f"getting collections in search of '{self.name}'")
    self_descriptors = [
        coll_desc
        for coll_desc in await self.database._list_collections_ctx(
            keyspace=None,
            timeout_context=_TimeoutContext(
                request_ms=_collection_admin_timeout_ms,
                label=_ca_label,
            ),
        )
        if coll_desc.name == self.name
    ]
    logger.info(f"finished getting collections in search of '{self.name}'")
    if self_descriptors:
        return self_descriptors[0].definition
    else:
        raise RuntimeError(
            f"Collection {self.keyspace}.{self.name} not found.",
        )

async def replace_one(self, filter: FilterType, replacement: DOC, *, sort: SortType | None = None, upsert: bool = False, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> CollectionUpdateResult

Replace a single document on the collection with a new one, optionally inserting a new one if no match is found.

Args

filter: a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
replacement: the new document to write into the collection.
sort: with this dictionary parameter one can control the sorting order of the documents matching the filter, effectively determining what document will come first and hence be the replaced one. See the find method for more on sorting. Vector-based ANN sorting is achieved by providing a "$vector" or a "$vectorize" key in sort.
upsert: this parameter controls the behavior in absence of matches. If True, replacement is inserted as a new document if no matches are found on the collection. If False, the operation silently does nothing in case of no matches.
general_method_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for general_method_timeout_ms.
timeout_ms: an alias for general_method_timeout_ms.

Returns

a CollectionUpdateResult object summarizing the outcome of the replace operation.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> async def do_replace_one(acol: AsyncCollection) -> None:
...     await acol.insert_one({"Marco": "Polo"})
...     result0 = await acol.replace_one(
...         {"Marco": {"$exists": True}},
...         {"Buda": "Pest"},
...     )
...     print("result0.update_info", result0.update_info)
...     doc1 = await acol.find_one({"Buda": "Pest"})
...     print("doc1", doc1)
...     result1 = await acol.replace_one(
...         {"Mirco": {"$exists": True}},
...         {"Oh": "yeah?"},
...     )
...     print("result1.update_info", result1.update_info)
...     result2 = await acol.replace_one(
...         {"Mirco": {"$exists": True}},
...         {"Oh": "yeah?"},
...         upsert=True,
...     )
...     print("result2.update_info", result2.update_info)
...
>>> asyncio.run(do_replace_one(my_async_coll))
result0.update_info {'n': 1, 'updatedExisting': True, 'ok': 1.0, 'nModified': 1}
doc1 {'_id': '6e669a5a-...', 'Buda': 'Pest'}
result1.update_info {'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0}
result2.update_info {'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '30e34e00-...'}

Expand source code

async def replace_one(
    self,
    filter: FilterType,
    replacement: DOC,
    *,
    sort: SortType | None = None,
    upsert: bool = False,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> CollectionUpdateResult:
    """
    Replace a single document on the collection with a new one,
    optionally inserting a new one if no match is found.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        replacement: the new document to write into the collection.
        sort: with this dictionary parameter one can control the sorting
            order of the documents matching the filter, effectively
            determining what document will come first and hence be the
            replaced one. See the `find` method for more on sorting.
            Vector-based ANN sorting is achieved by providing a "$vector"
            or a "$vectorize" key in `sort`.
        upsert: this parameter controls the behavior in absence of matches.
            If True, `replacement` is inserted as a new document
            if no matches are found on the collection. If False,
            the operation silently does nothing in case of no matches.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a CollectionUpdateResult object summarizing the outcome of
        the replace operation.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> async def do_replace_one(acol: AsyncCollection) -> None:
        ...     await acol.insert_one({"Marco": "Polo"})
        ...     result0 = await acol.replace_one(
        ...         {"Marco": {"$exists": True}},
        ...         {"Buda": "Pest"},
        ...     )
        ...     print("result0.update_info", result0.update_info)
        ...     doc1 = await acol.find_one({"Buda": "Pest"})
        ...     print("doc1", doc1)
        ...     result1 = await acol.replace_one(
        ...         {"Mirco": {"$exists": True}},
        ...         {"Oh": "yeah?"},
        ...     )
        ...     print("result1.update_info", result1.update_info)
        ...     result2 = await acol.replace_one(
        ...         {"Mirco": {"$exists": True}},
        ...         {"Oh": "yeah?"},
        ...         upsert=True,
        ...     )
        ...     print("result2.update_info", result2.update_info)
        ...
        >>> asyncio.run(do_replace_one(my_async_coll))
        result0.update_info {'n': 1, 'updatedExisting': True, 'ok': 1.0, 'nModified': 1}
        doc1 {'_id': '6e669a5a-...', 'Buda': 'Pest'}
        result1.update_info {'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0}
        result2.update_info {'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '30e34e00-...'}
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    options = {
        "upsert": upsert,
    }
    fo_payload = {
        "findOneAndReplace": {
            k: v
            for k, v in {
                "filter": filter,
                "replacement": replacement,
                "options": options,
                "sort": sort,
            }.items()
            if v is not None
        }
    }
    logger.info(f"findOneAndReplace on '{self.name}'")
    fo_response = await self._converted_request(
        payload=fo_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished findOneAndReplace on '{self.name}'")
    if "document" in fo_response.get("data", {}):
        fo_status = fo_response.get("status") or {}
        _update_info = _prepare_update_info([fo_status])
        return CollectionUpdateResult(
            raw_results=[fo_response],
            update_info=_update_info,
        )
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from find_one_and_replace API command.",
            raw_response=fo_response,
        )

Create a Collection from this one. Save for the arguments explicitly provided as overrides, everything else is kept identical to this collection in the copy (the database is converted into a sync object).

Args

embedding_api_key: optional API key(s) for interacting with the collection. If an embedding service is configured, and this parameter is not None, each Data API call will include the necessary embedding-related headers as specified by this parameter. If a string is passed, it translates into the one "embedding api key" header (i.e. EmbeddingAPIKeyHeaderProvider). For some vectorize providers/models, if using header-based authentication, specialized subclasses of EmbeddingHeadersProvider should be supplied.
reranking_api_key: optional API key(s) for interacting with the collection. If a reranker is configured for the collection, and this parameter is not None, Data API calls will include the appropriate reranker-related headers according to this parameter. Reranker services may not necessarily require this setting (e.g. if the service needs no authentication, or one is configured as part of the collection definition relying on a "shared secret"). If a string is passed, it is translated into an instance of RerankingAPIKeyHeaderProvider.
api_options: any additional options to set for the result, in the form of an APIOptions instance (where one can set just the needed attributes). In case the same setting is also provided as named parameter, the latter takes precedence.

Returns

the new copy, a Collection instance.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> my_async_coll.to_sync().count_documents({}, upper_bound=100)
77

Expand source code

def to_sync(
    self: AsyncCollection[DOC],
    *,
    embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
    reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
    api_options: APIOptions | UnsetType = _UNSET,
) -> Collection[DOC]:
    """
    Create a Collection from this one. Save for the arguments
    explicitly provided as overrides, everything else is kept identical
    to this collection in the copy (the database is converted into
    a sync object).

    Args:
        embedding_api_key: optional API key(s) for interacting with the collection.
            If an embedding service is configured, and this parameter is not None,
            each Data API call will include the necessary embedding-related headers
            as specified by this parameter. If a string is passed, it translates
            into the one "embedding api key" header
            (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
            For some vectorize providers/models, if using header-based authentication,
            specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
            should be supplied.
        reranking_api_key: optional API key(s) for interacting with the collection.
            If a reranker is configured for the collection, and this parameter
            is not None, Data API calls will include the appropriate
            reranker-related headers according to this parameter. Reranker services
            may not necessarily require this setting (e.g. if the service needs no
            authentication, or one is configured as part of the collection
            definition relying on a "shared secret").
            If a string is passed, it is translated into an instance of
            `astrapy.authentication.RerankingAPIKeyHeaderProvider`.
        api_options: any additional options to set for the result, in the form of
            an APIOptions instance (where one can set just the needed attributes).
            In case the same setting is also provided as named parameter,
            the latter takes precedence.

    Returns:
        the new copy, a Collection instance.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> my_async_coll.to_sync().count_documents({}, upper_bound=100)
        77
    """

    arg_api_options = APIOptions(
        embedding_api_key=embedding_api_key,
        reranking_api_key=reranking_api_key,
    )
    final_api_options = self.api_options.with_override(api_options).with_override(
        arg_api_options
    )
    return Collection(
        database=self.database.to_sync(),
        name=self.name,
        keyspace=self.keyspace,
        api_options=final_api_options,
    )

async def update_many(self, filter: FilterType, update: dict[str, Any], *, upsert: bool = False, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> CollectionUpdateResult

Apply an update operation to all documents matching a condition, optionally inserting one documents in absence of matches.

Args

filter: a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
update: the update prescription to apply to the documents, expressed as a dictionary as per Data API syntax. Examples are: {"$set": {"field": "value}} {"$inc": {"counter": 10}} {"$unset": {"field": ""}} See the Data API documentation for the full syntax.
upsert: this parameter controls the behavior in absence of matches. If True, a single new document (resulting from applying update to an empty document) is inserted if no matches are found on the collection. If False, the operation silently does nothing in case of no matches.
general_method_timeout_ms: a timeout, in milliseconds, for the whole requested operation (which may involve multiple API requests). This method may entail successive HTTP API requests, depending on the amount of involved documents. If not passed, the collection-level setting is used instead.
request_timeout_ms: a timeout, in milliseconds, for each API request. If not passed, the collection-level setting is used instead.
timeout_ms: an alias for general_method_timeout_ms.

Returns

a CollectionUpdateResult object summarizing the outcome of the update operation.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> async def do_update_many(acol: AsyncCollection) -> None:
...     await acol.insert_many([{"c": "red"}, {"c": "green"}, {"c": "blue"}])
...     result0 = await acol.update_many(
...         {"c": {"$ne": "green"}},
...         {"$set": {"nongreen": True}},
...     )
...     print("result0.update_info", result0.update_info)
...     result1 = await acol.update_many(
...         {"c": "orange"},
...         {"$set": {"is_also_fruit": True}},
...     )
...     print("result1.update_info", result1.update_info)
...     result2 = await acol.update_many(
...         {"c": "orange"},
...         {"$set": {"is_also_fruit": True}},
...         upsert=True,
...     )
...     print("result2.update_info", result2.update_info)
...
>>> asyncio.run(do_update_many(my_async_coll))
result0.update_info {'n': 2, 'updatedExisting': True, 'ok': 1.0, 'nModified': 2}
result1.update_info {'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0}
result2.update_info {'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '79ffd5a3-ab99-4dff-a2a5-4aaa0e59e854'}

Note

Similarly to the case of find (see its docstring for more details), running this command while, at the same time, another process is inserting new documents which match the filter of the update_many can result in an unpredictable fraction of these documents being updated. In other words, it cannot be easily predicted whether a given newly-inserted document will be picked up by the update_many command or not.

Expand source code

async def update_many(
    self,
    filter: FilterType,
    update: dict[str, Any],
    *,
    upsert: bool = False,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> CollectionUpdateResult:
    """
    Apply an update operation to all documents matching a condition,
    optionally inserting one documents in absence of matches.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        update: the update prescription to apply to the documents, expressed
            as a dictionary as per Data API syntax. Examples are:
                {"$set": {"field": "value}}
                {"$inc": {"counter": 10}}
                {"$unset": {"field": ""}}
            See the Data API documentation for the full syntax.
        upsert: this parameter controls the behavior in absence of matches.
            If True, a single new document (resulting from applying `update`
            to an empty document) is inserted if no matches are found on
            the collection. If False, the operation silently does nothing
            in case of no matches.
        general_method_timeout_ms: a timeout, in milliseconds, for the whole
            requested operation (which may involve multiple API requests).
            This method may entail successive HTTP API requests,
            depending on the amount of involved documents.
            If not passed, the collection-level setting is used instead.
        request_timeout_ms: a timeout, in milliseconds, for each API request.
            If not passed, the collection-level setting is used instead.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a CollectionUpdateResult object summarizing the outcome of
        the update operation.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> async def do_update_many(acol: AsyncCollection) -> None:
        ...     await acol.insert_many([{"c": "red"}, {"c": "green"}, {"c": "blue"}])
        ...     result0 = await acol.update_many(
        ...         {"c": {"$ne": "green"}},
        ...         {"$set": {"nongreen": True}},
        ...     )
        ...     print("result0.update_info", result0.update_info)
        ...     result1 = await acol.update_many(
        ...         {"c": "orange"},
        ...         {"$set": {"is_also_fruit": True}},
        ...     )
        ...     print("result1.update_info", result1.update_info)
        ...     result2 = await acol.update_many(
        ...         {"c": "orange"},
        ...         {"$set": {"is_also_fruit": True}},
        ...         upsert=True,
        ...     )
        ...     print("result2.update_info", result2.update_info)
        ...
        >>> asyncio.run(do_update_many(my_async_coll))
        result0.update_info {'n': 2, 'updatedExisting': True, 'ok': 1.0, 'nModified': 2}
        result1.update_info {'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0}
        result2.update_info {'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '79ffd5a3-ab99-4dff-a2a5-4aaa0e59e854'}

    Note:
        Similarly to the case of `find` (see its docstring for more details),
        running this command while, at the same time, another process is
        inserting new documents which match the filter of the `update_many`
        can result in an unpredictable fraction of these documents being updated.
        In other words, it cannot be easily predicted whether a given
        newly-inserted document will be picked up by the update_many command or not.
    """

    _general_method_timeout_ms, _gmt_label = _first_valid_timeout(
        (general_method_timeout_ms, "general_method_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (
            self.api_options.timeout_options.general_method_timeout_ms,
            "general_method_timeout_ms",
        ),
    )
    _request_timeout_ms, _rt_label = _first_valid_timeout(
        (request_timeout_ms, "request_timeout_ms"),
        (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
    )
    api_options = {
        "upsert": upsert,
    }
    page_state_options: dict[str, str] = {}
    um_responses: list[dict[str, Any]] = []
    um_statuses: list[dict[str, Any]] = []
    must_proceed = True
    logger.info(f"starting update_many on '{self.name}'")
    timeout_manager = MultiCallTimeoutManager(
        overall_timeout_ms=_general_method_timeout_ms,
        timeout_label=_gmt_label,
    )
    while must_proceed:
        options = {**api_options, **page_state_options}
        this_um_payload = {
            "updateMany": {
                k: v
                for k, v in {
                    "filter": filter,
                    "update": update,
                    "options": options,
                }.items()
                if v is not None
            }
        }
        logger.info(f"updateMany on '{self.name}'")
        this_um_response = await self._converted_request(
            payload=this_um_payload,
            raise_api_errors=False,
            timeout_context=timeout_manager.remaining_timeout(
                cap_time_ms=_request_timeout_ms,
                cap_timeout_label=_rt_label,
            ),
        )
        logger.info(f"finished updateMany on '{self.name}'")
        this_um_status = this_um_response.get("status") or {}
        #
        # if errors, quit early
        if this_um_response.get("errors", []):
            partial_update_info = _prepare_update_info(um_statuses)
            partial_result = CollectionUpdateResult(
                raw_results=um_responses,
                update_info=partial_update_info,
            )
            cause_exception = DataAPIResponseException.from_response(
                command=this_um_payload,
                raw_response=this_um_response,
            )
            raise CollectionUpdateManyException(
                partial_result=partial_result,
                cause=cause_exception,
            )
        else:
            if "status" not in this_um_response:
                raise UnexpectedDataAPIResponseException(
                    text="Faulty response from update_many API command.",
                    raw_response=this_um_response,
                )
            um_responses.append(this_um_response)
            um_statuses.append(this_um_status)
            next_page_state = this_um_status.get("nextPageState")
            if next_page_state is not None:
                must_proceed = True
                page_state_options = {"pageState": next_page_state}
            else:
                must_proceed = False
                page_state_options = {}

    update_info = _prepare_update_info(um_statuses)
    logger.info(f"finished update_many on '{self.name}'")
    return CollectionUpdateResult(
        raw_results=um_responses,
        update_info=update_info,
    )

async def update_one(self, filter: FilterType, update: dict[str, Any], *, sort: SortType | None = None, upsert: bool = False, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> CollectionUpdateResult

Update a single document on the collection as requested, optionally inserting a new one if no match is found.

Args

filter: a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
update: the update prescription to apply to the document, expressed as a dictionary as per Data API syntax. Examples are: {"$set": {"field": "value}} {"$inc": {"counter": 10}} {"$unset": {"field": ""}} See the Data API documentation for the full syntax.
sort: with this dictionary parameter one can control the sorting order of the documents matching the filter, effectively determining what document will come first and hence be the replaced one. See the find method for more on sorting. Vector-based ANN sorting is achieved by providing a "$vector" or a "$vectorize" key in sort.
upsert: this parameter controls the behavior in absence of matches. If True, a new document (resulting from applying the update to an empty document) is inserted if no matches are found on the collection. If False, the operation silently does nothing in case of no matches.
general_method_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for general_method_timeout_ms.
timeout_ms: an alias for general_method_timeout_ms.

Returns

a CollectionUpdateResult object summarizing the outcome of the update operation.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> async def do_update_one(acol: AsyncCollection) -> None:
...     await acol.insert_one({"Marco": "Polo"})
...     result0 = await acol.update_one(
...         {"Marco": {"$exists": True}},
...         {"$inc": {"rank": 3}},
...     )
...     print("result0.update_info", result0.update_info)
...     result1 = await acol.update_one(
...         {"Mirko": {"$exists": True}},
...         {"$inc": {"rank": 3}},
...     )
...     print("result1.update_info", result1.update_info)
...     result2 = await acol.update_one(
...         {"Mirko": {"$exists": True}},
...         {"$inc": {"rank": 3}},
...         upsert=True,
...     )
...     print("result2.update_info", result2.update_info)
...
>>> asyncio.run(do_update_one(my_async_coll))
result0.update_info {'n': 1, 'updatedExisting': True, 'ok': 1.0, 'nModified': 1})
result1.update_info {'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0})
result2.update_info {'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '75748092-...'}

Expand source code

async def update_one(
    self,
    filter: FilterType,
    update: dict[str, Any],
    *,
    sort: SortType | None = None,
    upsert: bool = False,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> CollectionUpdateResult:
    """
    Update a single document on the collection as requested,
    optionally inserting a new one if no match is found.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        update: the update prescription to apply to the document, expressed
            as a dictionary as per Data API syntax. Examples are:
                {"$set": {"field": "value}}
                {"$inc": {"counter": 10}}
                {"$unset": {"field": ""}}
            See the Data API documentation for the full syntax.
        sort: with this dictionary parameter one can control the sorting
            order of the documents matching the filter, effectively
            determining what document will come first and hence be the
            replaced one. See the `find` method for more on sorting.
            Vector-based ANN sorting is achieved by providing a "$vector"
            or a "$vectorize" key in `sort`.
        upsert: this parameter controls the behavior in absence of matches.
            If True, a new document (resulting from applying the `update`
            to an empty document) is inserted if no matches are found on
            the collection. If False, the operation silently does nothing
            in case of no matches.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a CollectionUpdateResult object summarizing the outcome of
        the update operation.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> async def do_update_one(acol: AsyncCollection) -> None:
        ...     await acol.insert_one({"Marco": "Polo"})
        ...     result0 = await acol.update_one(
        ...         {"Marco": {"$exists": True}},
        ...         {"$inc": {"rank": 3}},
        ...     )
        ...     print("result0.update_info", result0.update_info)
        ...     result1 = await acol.update_one(
        ...         {"Mirko": {"$exists": True}},
        ...         {"$inc": {"rank": 3}},
        ...     )
        ...     print("result1.update_info", result1.update_info)
        ...     result2 = await acol.update_one(
        ...         {"Mirko": {"$exists": True}},
        ...         {"$inc": {"rank": 3}},
        ...         upsert=True,
        ...     )
        ...     print("result2.update_info", result2.update_info)
        ...
        >>> asyncio.run(do_update_one(my_async_coll))
        result0.update_info {'n': 1, 'updatedExisting': True, 'ok': 1.0, 'nModified': 1})
        result1.update_info {'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0})
        result2.update_info {'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '75748092-...'}
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    options = {
        "upsert": upsert,
    }
    uo_payload = {
        "updateOne": {
            k: v
            for k, v in {
                "filter": filter,
                "update": update,
                "options": options,
                "sort": sort,
            }.items()
            if v is not None
        }
    }
    logger.info(f"updateOne on '{self.name}'")
    uo_response = await self._converted_request(
        payload=uo_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished updateOne on '{self.name}'")
    if "status" in uo_response:
        uo_status = uo_response["status"]
        _update_info = _prepare_update_info([uo_status])
        return CollectionUpdateResult(
            raw_results=[uo_response],
            update_info=_update_info,
        )
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from updateOne API command.",
            raw_response=uo_response,
        )

def with_options(self: AsyncCollection[DOC], *, embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = (unset), reranking_api_key: str | RerankingHeadersProvider | UnsetType = (unset), api_options: APIOptions | UnsetType = (unset)) ‑> AsyncCollection[DOC]

Create a clone of this collection with some changed attributes.

Args

embedding_api_key: optional API key(s) for interacting with the collection. If an embedding service is configured, and this parameter is not None, each Data API call will include the necessary embedding-related headers as specified by this parameter. If a string is passed, it translates into the one "embedding api key" header (i.e. EmbeddingAPIKeyHeaderProvider). For some vectorize providers/models, if using header-based authentication, specialized subclasses of EmbeddingHeadersProvider should be supplied.
reranking_api_key: optional API key(s) for interacting with the collection. If a reranker is configured for the collection, and this parameter is not None, Data API calls will include the appropriate reranker-related headers according to this parameter. Reranker services may not necessarily require this setting (e.g. if the service needs no authentication, or one is configured as part of the collection definition relying on a "shared secret"). If a string is passed, it is translated into an instance of RerankingAPIKeyHeaderProvider.
api_options: any additional options to set for the clone, in the form of an APIOptions instance (where one can set just the needed attributes). In case the same setting is also provided as named parameter, the latter takes precedence.

Returns

a new AsyncCollection instance.

Example

>>> collection_with_api_key_configured = my_async_collection.with_options(
...     embedding_api_key="secret-key-0123abcd...",
... )

Expand source code

def with_options(
    self: AsyncCollection[DOC],
    *,
    embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
    reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
    api_options: APIOptions | UnsetType = _UNSET,
) -> AsyncCollection[DOC]:
    """
    Create a clone of this collection with some changed attributes.

    Args:
        embedding_api_key: optional API key(s) for interacting with the collection.
            If an embedding service is configured, and this parameter is not None,
            each Data API call will include the necessary embedding-related headers
            as specified by this parameter. If a string is passed, it translates
            into the one "embedding api key" header
            (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
            For some vectorize providers/models, if using header-based authentication,
            specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
            should be supplied.
        reranking_api_key: optional API key(s) for interacting with the collection.
            If a reranker is configured for the collection, and this parameter
            is not None, Data API calls will include the appropriate
            reranker-related headers according to this parameter. Reranker services
            may not necessarily require this setting (e.g. if the service needs no
            authentication, or one is configured as part of the collection
            definition relying on a "shared secret").
            If a string is passed, it is translated into an instance of
            `astrapy.authentication.RerankingAPIKeyHeaderProvider`.
        api_options: any additional options to set for the clone, in the form of
            an APIOptions instance (where one can set just the needed attributes).
            In case the same setting is also provided as named parameter,
            the latter takes precedence.

    Returns:
        a new AsyncCollection instance.

    Example:
        >>> collection_with_api_key_configured = my_async_collection.with_options(
        ...     embedding_api_key="secret-key-0123abcd...",
        ... )
    """

    return self._copy(
        embedding_api_key=embedding_api_key,
        reranking_api_key=reranking_api_key,
        api_options=api_options,
    )

class AsyncDatabase (*, api_endpoint: str, keyspace: str | None, api_options: FullAPIOptions)

A Data API database. This is the object for doing database-level DML, such as creating/deleting collections, and for obtaining Collection objects themselves. This class has an asynchronous interface.

This class is not meant for direct instantiation by the user, rather it is usually obtained by invoking methods such as get_async_database of AstraDBClient.

On Astra DB, an AsyncDatabase comes with an "API Endpoint", which implies an AsyncDatabase object instance reaches a specific region (relevant point in case of multi-region databases).

An AsyncDatabase is also always set with a "working keyspace" on which all data operations are done (unless otherwise specified).

Args

api_endpoint: the full "API Endpoint" string used to reach the Data API. Example: "https://-.apps.astra.datastax.com"
keyspace: this is the keyspace all method calls will target, unless one is explicitly specified in the call. If no keyspace is supplied when creating a Database, on Astra DB the name "default_keyspace" is set, while on other environments the keyspace is left unspecified: in this case, most operations are unavailable until a keyspace is set (through an explicit use_keyspace invocation or equivalent).
api_options: a complete specification of the API Options for this instance.

Example

>>> from astrapy import DataAPIClient
>>> my_client = astrapy.DataAPIClient()
>>> my_db = my_client.get_async_database(
...    "https://01234567-....apps.astra.datastax.com",
...     token="AstraCS:...",
... )

Note

creating an instance of AsyncDatabase does not trigger actual creation of the database itself, which should exist beforehand. To create databases, see the AstraDBAdmin class.

Expand source code

class AsyncDatabase:
    """
    A Data API database. This is the object for doing database-level
    DML, such as creating/deleting collections, and for obtaining Collection
    objects themselves. This class has an asynchronous interface.

    This class is not meant for direct instantiation by the user, rather
    it is usually obtained by invoking methods such as `get_async_database`
    of AstraDBClient.

    On Astra DB, an AsyncDatabase comes with an "API Endpoint", which implies
    an AsyncDatabase object instance reaches a specific region (relevant point in
    case of multi-region databases).

    An AsyncDatabase is also always set with a "working keyspace" on which all
    data operations are done (unless otherwise specified).

    Args:
        api_endpoint: the full "API Endpoint" string used to reach the Data API.
            Example: "https://<database_id>-<region>.apps.astra.datastax.com"
        keyspace: this is the keyspace all method calls will target, unless
            one is explicitly specified in the call. If no keyspace is supplied
            when creating a Database, on Astra DB the name "default_keyspace" is set,
            while on other environments the keyspace is left unspecified: in this case,
            most operations are unavailable until a keyspace is set (through an explicit
            `use_keyspace` invocation or equivalent).
        api_options: a complete specification of the API Options for this instance.

    Example:
        >>> from astrapy import DataAPIClient
        >>> my_client = astrapy.DataAPIClient()
        >>> my_db = my_client.get_async_database(
        ...    "https://01234567-....apps.astra.datastax.com",
        ...     token="AstraCS:...",
        ... )

    Note:
        creating an instance of AsyncDatabase does not trigger actual creation
        of the database itself, which should exist beforehand. To create databases,
        see the AstraDBAdmin class.
    """

    def __init__(
        self,
        *,
        api_endpoint: str,
        keyspace: str | None,
        api_options: FullAPIOptions,
    ) -> None:
        self.api_options = api_options
        self.api_endpoint = api_endpoint.strip("/")
        # enforce defaults if on Astra DB:
        self._using_keyspace: str | None
        if (
            keyspace is None
            and self.api_options.environment in Environment.astra_db_values
        ):
            self._using_keyspace = DEFAULT_ASTRA_DB_KEYSPACE
        else:
            self._using_keyspace = keyspace

        self._commander_headers = {
            DEFAULT_DATA_API_AUTH_HEADER: self.api_options.token.get_token(),
            **self.api_options.database_additional_headers,
        }
        self._name: str | None = None
        self._api_commander = self._get_api_commander(keyspace=self.keyspace)

    def __getattr__(self, collection_name: str) -> AsyncCollection[DefaultDocumentType]:
        return self.get_collection(name=collection_name)

    def __getitem__(self, collection_name: str) -> AsyncCollection[DefaultDocumentType]:
        return self.get_collection(name=collection_name)

    def __repr__(self) -> str:
        ep_desc = f'api_endpoint="{self.api_endpoint}"'
        keyspace_desc: str | None
        if self._using_keyspace is None:
            keyspace_desc = "keyspace not set"
        else:
            keyspace_desc = f'keyspace="{self._using_keyspace}"'
        api_options_desc = f"api_options={self.api_options}"
        parts = [
            pt for pt in [ep_desc, keyspace_desc, api_options_desc] if pt is not None
        ]
        return f"{self.__class__.__name__}({', '.join(parts)})"

    def __eq__(self, other: Any) -> bool:
        if isinstance(other, AsyncDatabase):
            return all(
                [
                    self.api_endpoint == other.api_endpoint,
                    self.keyspace == other.keyspace,
                    self.api_options == other.api_options,
                ]
            )
        else:
            return False

    def _get_api_commander(self, keyspace: str | None) -> APICommander | None:
        """
        Instantiate a new APICommander based on the properties of this class
        and a provided keyspace.

        If keyspace is None, return None (signaling a "keyspace not set").
        """

        if keyspace is None:
            return None
        else:
            base_path_components = [
                comp
                for comp in (
                    ncomp.strip("/")
                    for ncomp in (
                        self.api_options.data_api_url_options.api_path,
                        self.api_options.data_api_url_options.api_version,
                        keyspace,
                    )
                    if ncomp is not None
                )
                if comp != ""
            ]
            base_path = f"/{'/'.join(base_path_components)}"
            api_commander = APICommander(
                api_endpoint=self.api_endpoint,
                path=base_path,
                headers=self._commander_headers,
                callers=self.api_options.callers,
                redacted_header_names=self.api_options.redacted_header_names,
            )
            return api_commander

    def _get_driver_commander(self, keyspace: str | None) -> APICommander:
        """
        Building on _get_api_commander, fall back to class keyspace in
        creating/returning a commander, and in any case raise an error if not set.
        """
        driver_commander: APICommander | None
        if keyspace:
            driver_commander = self._get_api_commander(keyspace=keyspace)
        else:
            driver_commander = self._api_commander
        if driver_commander is None:
            raise ValueError(
                "No keyspace specified. This operation requires a keyspace to "
                "be set, e.g. through the `use_keyspace` method."
            )
        return driver_commander

    async def __aenter__(self) -> AsyncDatabase:
        return self

    async def __aexit__(
        self,
        exc_type: type[BaseException] | None = None,
        exc_value: BaseException | None = None,
        traceback: TracebackType | None = None,
    ) -> None:
        if self._api_commander is not None:
            await self._api_commander.__aexit__(
                exc_type=exc_type,
                exc_value=exc_value,
                traceback=traceback,
            )

    def _copy(
        self,
        *,
        keyspace: str | None = None,
        token: str | TokenProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncDatabase:
        arg_api_options = APIOptions(
            token=token,
        )
        final_api_options = self.api_options.with_override(api_options).with_override(
            arg_api_options
        )
        return AsyncDatabase(
            api_endpoint=self.api_endpoint,
            keyspace=keyspace or self.keyspace,
            api_options=final_api_options,
        )

    def with_options(
        self,
        *,
        keyspace: str | None = None,
        token: str | TokenProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncDatabase:
        """
        Create a clone of this database with some changed attributes.

        Args:
            keyspace: this is the keyspace all method calls will target, unless
                one is explicitly specified in the call. If no keyspace is supplied
                when creating a Database, the name "default_keyspace" is set.
            token: an Access Token to the database. Example: `"AstraCS:xyz..."`.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            api_options: any additional options to set for the clone, in the form of
                an APIOptions instance (where one can set just the needed attributes).
                In case the same setting is also provided as named parameter,
                the latter takes precedence.

        Returns:
            a new `AsyncDatabase` instance.

        Example:
            >>> async_database_2 = async_database.with_options(
            ...     keyspace="the_other_keyspace",
            ...     token="AstraCS:xyz...",
            ... )
        """

        return self._copy(
            keyspace=keyspace,
            token=token,
            api_options=api_options,
        )

    def to_sync(
        self,
        *,
        keyspace: str | None = None,
        token: str | TokenProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> Database:
        """
        Create a (synchronous) Database from this one. Save for the arguments
        explicitly provided as overrides, everything else is kept identical
        to this database in the copy.

        Args:
            keyspace: this is the keyspace all method calls will target, unless
                one is explicitly specified in the call. If no keyspace is supplied
                when creating a Database, the name "default_keyspace" is set.
            token: an Access Token to the database. Example: "AstraCS:xyz..."
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            api_options: any additional options to set for the result, in the form of
                an APIOptions instance (where one can set just the needed attributes).
                In case the same setting is also provided as named parameter,
                the latter takes precedence.

        Returns:
            the new copy, a `Database` instance.

        Example:
            >>> my_sync_db = async_database.to_sync()
            >>> my_sync_db.list_collection_names()
            ['a_collection', 'another_collection']
        """

        arg_api_options = APIOptions(
            token=token,
        )
        final_api_options = self.api_options.with_override(api_options).with_override(
            arg_api_options
        )
        return Database(
            api_endpoint=self.api_endpoint,
            keyspace=keyspace or self.keyspace,
            api_options=final_api_options,
        )

    def use_keyspace(self, keyspace: str) -> None:
        """
        Switch to a new working keyspace for this database.
        This method changes (mutates) the AsyncDatabase instance.

        Note that this method does not create the keyspace, which should exist
        already (created for instance with a `DatabaseAdmin.async_create_keyspace` call).

        Args:
            keyspace: the new keyspace to use as the database working keyspace.

        Returns:
            None.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> asyncio.run(async_database.list_collection_names())
            ['coll_1', 'coll_2']
            >>> async_database.use_keyspace("an_empty_keyspace")
            >>> asyncio.run(async_database.list_collection_names())
            []
        """
        logger.info(f"switching to keyspace '{keyspace}'")
        self._using_keyspace = keyspace
        self._api_commander = self._get_api_commander(keyspace=self.keyspace)

    async def info(
        self,
        *,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> AstraDBDatabaseInfo:
        """
        Additional information on the database as a AstraDBDatabaseInfo instance.

        Some of the returned properties are dynamic throughout the lifetime
        of the database (such as raw_info["keyspaces"]). For this reason,
        each invocation of this method triggers a new request to the DevOps API.

        Not available outside of Astra DB and when using custom domains.

        Args:
            database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `database_admin_timeout_ms`.
            timeout_ms: an alias for `database_admin_timeout_ms`.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> asyncio.run(async_database.info()).region
            'eu-west-1'
            >>> asyncio.run(
            ...     async_database.info()
            ... ).raw_info['datacenters'][0]['dateCreated']
            '2023-01-30T12:34:56Z'

        Note:
            see the AstraDBDatabaseInfo documentation for a caveat about the difference
            between the `region` and the `raw["region"]` attributes.
        """

        if self.api_options.environment not in Environment.astra_db_values:
            raise InvalidEnvironmentException(
                "Environments outside of Astra DB are not supported."
            )
        elif parse_api_endpoint(self.api_endpoint) is None:
            raise InvalidEnvironmentException(
                "Cannot inspect a nonstandard API endpoint for properties."
            )

        _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
            timeout_options=self.api_options.timeout_options,
            database_admin_timeout_ms=database_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info("getting database info")
        database_info = await async_fetch_database_info(
            self.api_endpoint,
            keyspace=self.keyspace,
            request_timeout_ms=_database_admin_timeout_ms,
            api_options=self.api_options,
        )
        if database_info is not None:
            logger.info("finished getting database info")
            return database_info
        else:
            raise DevOpsAPIException("Failure while fetching database info.")

    @property
    def id(self) -> str:
        """
        The ID of this database.
        Not available outside of Astra DB and when using custom domains.

        Example:
            >>> my_async_database.id
            '01234567-89ab-cdef-0123-456789abcdef'
        """

        if self.api_options.environment in Environment.astra_db_values:
            parsed_api_endpoint = parse_api_endpoint(self.api_endpoint)
            if parsed_api_endpoint is not None:
                return parsed_api_endpoint.database_id
            else:
                raise InvalidEnvironmentException(
                    "Cannot inspect a nonstandard API endpoint for properties."
                )
        else:
            raise InvalidEnvironmentException(
                "Database is not in a supported environment for this operation."
            )

    @property
    def region(self) -> str:
        """
        The region where this database is located.

        The region is still well defined in case of multi-region databases,
        since a Database instance connects to exactly one of the regions
        (as specified by the API Endpoint).

        Not available outside of Astra DB and when using custom domains.

        Example:
            >>> my_async_database.region
            'us-west-2'
        """

        if self.api_options.environment in Environment.astra_db_values:
            parsed_api_endpoint = parse_api_endpoint(self.api_endpoint)
            if parsed_api_endpoint is not None:
                return parsed_api_endpoint.region
            else:
                raise InvalidEnvironmentException(
                    "Cannot inspect a nonstandard API endpoint for properties."
                )
        else:
            raise InvalidEnvironmentException(
                "Database is not in a supported environment for this operation."
            )

    async def name(self) -> str:
        """
        The name of this database. Note that this bears no unicity guarantees.

        Calling this method the first time involves a request
        to the DevOps API (the resulting database name is then cached).
        See the `info()` method for more details.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> asyncio.run(async_database.name())
            'the_application_database'
        """

        if self._name is None:
            self._name = (await self.info()).name
        return self._name

    @property
    def keyspace(self) -> str | None:
        """
        The keyspace this database uses as target for all commands when
        no method-call-specific keyspace is specified.

        Returns:
            the working keyspace (a string), or None if not set.

        Example:
            >>> async_database.keyspace
            'the_keyspace'
        """

        return self._using_keyspace

    @overload
    def get_collection(
        self,
        name: str,
        *,
        keyspace: str | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncCollection[DefaultDocumentType]: ...

    @overload
    def get_collection(
        self,
        name: str,
        *,
        document_type: type[DOC],
        keyspace: str | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncCollection[DOC]: ...

    def get_collection(
        self,
        name: str,
        *,
        document_type: type[Any] = DefaultDocumentType,
        keyspace: str | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncCollection[DOC]:
        """
        Spawn an `AsyncCollection` object instance representing a collection
        on this database.

        Creating an `AsyncCollection` instance does not have any effect on the
        actual state of the database: in other words, for the created
        `AsyncCollection` instance to be used meaningfully, the collection
        must exist already (for instance, it should have been created
        previously by calling the `create_collection` method).

        Args:
            name: the name of the collection.
            document_type: this parameter acts a formal specifier for the type checker.
                If omitted, the resulting AsyncCollection is implicitly
                an `AsyncCollection[dict[str, Any]]`. If provided, it must match the
                type hint specified in the assignment.
                See the examples below.
            keyspace: the keyspace containing the collection. If no keyspace
                is specified, the setting for this database is used.
            embedding_api_key: optional API key(s) for interacting with the collection.
                If an embedding service is configured, and this parameter is not None,
                each Data API call will include the necessary embedding-related headers
                as specified by this parameter. If a string is passed, it translates
                into the one "embedding api key" header
                (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
                For some vectorize providers/models, if using header-based
                authentication, specialized subclasses of
                `astrapy.authentication.EmbeddingHeadersProvider` should be supplied.
            reranking_api_key: optional API key(s) for interacting with the collection.
                If a reranker is configured for the collection, and this parameter
                is not None, Data API calls will include the appropriate
                reranker-related headers according to this parameter. Reranker services
                may not necessarily require this setting (e.g. if the service needs no
                authentication, or one is configured as part of the collection
                definition relying on a "shared secret").
                If a string is passed, it is translated into an instance of
                `astrapy.authentication.RerankingAPIKeyHeaderProvider`.
            spawn_api_options: a specification - complete or partial - of the
                API Options to override the defaults inherited from the Database.
                This allows for a deeper configuration of the collection, e.g.
                concerning timeouts; if this is passed together with
                the named timeout parameters, the latter will take precedence
                in their respective settings.

        Returns:
            an `AsyncCollection` instance, representing the desired collection
                (but without any form of validation).

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> async def count_docs(adb: AsyncDatabase, c_name: str) -> int:
            ...    async_col = adb.get_collection(c_name)
            ...    return await async_col.count_documents({}, upper_bound=100)
            ...
            >>> asyncio.run(count_docs(async_database, "my_collection"))
            45

        Note: the attribute and indexing syntax forms achieve the same effect
            as this method, returning an AsyncCollection.
            In other words, the following are equivalent:
                async_database.get_collection("coll_name")
                async_database.coll_name
                async_database["coll_name"]
        """

        # lazy importing here against circular-import error
        from astrapy.collection import AsyncCollection

        resulting_api_options = self.api_options.with_override(
            spawn_api_options,
        ).with_override(
            APIOptions(
                embedding_api_key=embedding_api_key,
                reranking_api_key=reranking_api_key,
            ),
        )

        _keyspace = keyspace or self.keyspace
        if _keyspace is None:
            raise ValueError(
                "No keyspace specified. This operation requires a keyspace to "
                "be set, e.g. through the `use_keyspace` method."
            )
        return AsyncCollection(
            database=self,
            name=name,
            keyspace=_keyspace,
            api_options=resulting_api_options,
        )

    @overload
    async def create_collection(
        self,
        name: str,
        *,
        definition: CollectionDefinition | dict[str, Any] | None = None,
        keyspace: str | None = None,
        collection_admin_timeout_ms: int | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncCollection[DefaultDocumentType]: ...

    @overload
    async def create_collection(
        self,
        name: str,
        *,
        definition: CollectionDefinition | dict[str, Any] | None = None,
        document_type: type[DOC],
        keyspace: str | None = None,
        collection_admin_timeout_ms: int | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncCollection[DOC]: ...

    async def create_collection(
        self,
        name: str,
        *,
        definition: CollectionDefinition | dict[str, Any] | None = None,
        document_type: type[Any] = DefaultDocumentType,
        keyspace: str | None = None,
        collection_admin_timeout_ms: int | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncCollection[DOC]:
        """
        Creates a collection on the database and return the AsyncCollection
        instance that represents it.

        This is a blocking operation: the method returns when the collection
        is ready to be used. As opposed to the `get_collection` instance,
        this method triggers causes the collection to be actually created on DB.

        Args:
            name: the name of the collection.
            definition: a complete collection definition for the table. This can be an
                instance of `CollectionDefinition` or an equivalent (nested) dictionary,
                in which case it will be parsed into a `CollectionDefinition`.
                See the `astrapy.info.CollectionDefinition` class and the
                `AsyncCollection` class for more details and ways to construct this object.
            document_type: this parameter acts a formal specifier for the type checker.
                If omitted, the resulting AsyncCollection is implicitly
                an `AsyncCollection[dict[str, Any]]`. If provided, it must match the
                type hint specified in the assignment.
                See the examples below.
            keyspace: the keyspace where the collection is to be created.
                If not specified, the general setting for this database is used.
            collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
            embedding_api_key: optional API key(s) for interacting with the collection.
                If an embedding service is configured, and this parameter is not None,
                each Data API call will include the necessary embedding-related headers
                as specified by this parameter. If a string is passed, it translates
                into the one "embedding api key" header
                (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
                For some vectorize providers/models, if using header-based authentication,
                specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
                should be supplied.
            reranking_api_key: optional API key(s) for interacting with the collection.
                If a reranker is configured for the collection, and this parameter
                is not None, Data API calls will include the appropriate
                reranker-related headers according to this parameter. Reranker services
                may not necessarily require this setting (e.g. if the service needs no
                authentication, or one is configured as part of the collection
                definition relying on a "shared secret").
                If a string is passed, it is translated into an instance of
                `astrapy.authentication.RerankingAPIKeyHeaderProvider`.
            spawn_api_options: a specification - complete or partial - of the
                API Options to override the defaults inherited from the Database.
                This allows for a deeper configuration of the collection, e.g.
                concerning timeouts; if this is passed together with
                the named timeout parameters, the latter will take precedence
                in their respective settings.

        Returns:
            an `AsyncCollection` instance, representing the newly-created collection.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> # Create a collection using the fluent syntax for its definition
            >>> from astrapy.constants import VectorMetric
            >>> from astrapy.info import CollectionDefinition
            >>>
            >>> collection_definition = (
            ...     CollectionDefinition.builder()
            ...     .set_vector_dimension(3)
            ...     .set_vector_metric(VectorMetric.DOT_PRODUCT)
            ...     .set_indexing("deny", ["annotations", "logs"])
            ...     .build()
            ... )
            >>> my_collection = asyncio.run(async_database.create_collection(
            ...     "my_events",
            ...     definition=collection_definition,
            ... ))
            >>>
            >>> # Create a collection with the definition as object
            >>> from astrapy.info import CollectionVectorOptions
            >>>
            >>> collection_definition_1 = CollectionDefinition(
            ...     vector=CollectionVectorOptions(
            ...         dimension=3,
            ...         metric=VectorMetric.DOT_PRODUCT,
            ...     ),
            ...     indexing={"deny": ["annotations", "logs"]},
            ... )
            >>> my_collection_1 = asyncio.run(async_database.create_collection(
            ...     "my_events",
            ...     definition=collection_definition_1,
            ... ))
            >>>
            >>>
            >>> # Create a collection with the definition as plain dictionary
            >>> collection_definition_2 = {
            ...     "indexing": {"deny": ["annotations", "logs"]},
            ...     "vector": {
            ...         "dimension": 3,
            ...         "metric": VectorMetric.DOT_PRODUCT,
            ...     },
            ... }
            >>> my_collection_2 = asyncio.run(async_database.create_collection(
            ...     "my_events",
            ...     definition=collection_definition_2,
            ... ))
        """

        cc_definition: dict[str, Any] = CollectionDefinition.coerce(
            definition or {}
        ).as_dict()
        if collection_admin_timeout_ms is not None:
            _collection_admin_timeout_ms = collection_admin_timeout_ms
            _ca_label = "collection_admin_timeout_ms"
        else:
            _collection_admin_timeout_ms = (
                self.api_options.timeout_options.collection_admin_timeout_ms
            )
            _ca_label = "collection_admin_timeout_ms"
        driver_commander = self._get_driver_commander(keyspace=keyspace)
        cc_payload = {
            "createCollection": {
                k: v
                for k, v in {
                    "name": name,
                    "options": cc_definition,
                }.items()
                if v is not None
                if v != {}
            }
        }
        logger.info(f"createCollection('{name}')")
        cc_response = await driver_commander.async_request(
            payload=cc_payload,
            timeout_context=_TimeoutContext(
                request_ms=_collection_admin_timeout_ms, label=_ca_label
            ),
        )
        if cc_response.get("status") != {"ok": 1}:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from createCollection API command.",
                raw_response=cc_response,
            )
        logger.info(f"finished createCollection('{name}')")
        return self.get_collection(
            name,
            document_type=document_type,
            keyspace=keyspace,
            embedding_api_key=embedding_api_key,
            reranking_api_key=reranking_api_key,
            spawn_api_options=spawn_api_options,
        )

    async def drop_collection(
        self,
        name: str,
        *,
        keyspace: str | None = None,
        collection_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> dict[str, Any]:
        """
        Drop a collection from the database, along with all documents therein.

        Args:
            name: the name of the collection to drop.
            keyspace: the keyspace where the collection resides. If not specified,
                the database working keyspace is assumed.
            collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `collection_admin_timeout_ms`.
            timeout_ms: an alias for `collection_admin_timeout_ms`.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> asyncio.run(async_database.list_collection_names())
            ['a_collection', 'my_v_col', 'another_col']
            >>> asyncio.run(async_database.drop_collection("my_v_col"))
            >>> asyncio.run(async_database.list_collection_names())
            ['a_collection', 'another_col']
        """

        _collection_admin_timeout_ms, _ca_label = _select_singlereq_timeout_ca(
            timeout_options=self.api_options.timeout_options,
            collection_admin_timeout_ms=collection_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        _keyspace = keyspace or self.keyspace
        driver_commander = self._get_driver_commander(keyspace=_keyspace)
        dc_payload = {"deleteCollection": {"name": name}}
        logger.info(f"deleteCollection('{name}')")
        dc_response = await driver_commander.async_request(
            payload=dc_payload,
            timeout_context=_TimeoutContext(
                request_ms=_collection_admin_timeout_ms, label=_ca_label
            ),
        )
        if dc_response.get("status") != {"ok": 1}:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from deleteCollection API command.",
                raw_response=dc_response,
            )
        logger.info(f"finished deleteCollection('{name}')")
        return dc_response.get("status", {})  # type: ignore[no-any-return]

    async def list_collections(
        self,
        *,
        keyspace: str | None = None,
        collection_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> list[CollectionDescriptor]:
        """
        List all collections in a given keyspace for this database.

        Args:
            keyspace: the keyspace to be inspected. If not specified,
                the general setting for this database is assumed.
            collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `collection_admin_timeout_ms`.
            timeout_ms: an alias for `collection_admin_timeout_ms`.

        Returns:
            a list of CollectionDescriptor instances one for each collection.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> async def a_list_colls(adb: AsyncDatabase) -> None:
            ...     a_coll_list = await adb.list_collections()
            ...     print("* list:", a_coll_list)
            ...     for coll in await adb.list_collections():
            ...         print("* coll:", coll)
            ...
            >>> asyncio.run(a_list_colls(async_database))
            * list: [CollectionDescriptor(name='my_v_col', options=CollectionDefinition())]
            * coll: CollectionDescriptor(name='my_v_col', options=CollectionDefinition())
        """

        _collection_admin_timeout_ms, _ca_label = _select_singlereq_timeout_ca(
            timeout_options=self.api_options.timeout_options,
            collection_admin_timeout_ms=collection_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        return await self._list_collections_ctx(
            keyspace=keyspace,
            timeout_context=_TimeoutContext(
                request_ms=_collection_admin_timeout_ms, label=_ca_label
            ),
        )

    async def _list_collections_ctx(
        self,
        *,
        keyspace: str | None,
        timeout_context: _TimeoutContext,
    ) -> list[CollectionDescriptor]:
        driver_commander = self._get_driver_commander(keyspace=keyspace)
        gc_payload = {"findCollections": {"options": {"explain": True}}}
        logger.info("findCollections")
        gc_response = await driver_commander.async_request(
            payload=gc_payload,
            timeout_context=timeout_context,
        )
        if "collections" not in gc_response.get("status", {}):
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from findCollections API command.",
                raw_response=gc_response,
            )
        else:
            # we know this is a list of dicts, to marshal into "descriptors"
            logger.info("finished findCollections")
            return [
                CollectionDescriptor._from_dict(col_dict)
                for col_dict in gc_response["status"]["collections"]
            ]

    async def list_collection_names(
        self,
        *,
        keyspace: str | None = None,
        collection_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> list[str]:
        """
        List the names of all collections in a given keyspace of this database.

        Args:
            keyspace: the keyspace to be inspected. If not specified,
                the general setting for this database is assumed.
            collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `collection_admin_timeout_ms`.
            timeout_ms: an alias for `collection_admin_timeout_ms`.

        Returns:
            a list of the collection names as strings, in no particular order.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> asyncio.run(async_database.list_collection_names())
            ['a_collection', 'another_col']
        """

        _collection_admin_timeout_ms, _ca_label = _select_singlereq_timeout_ca(
            timeout_options=self.api_options.timeout_options,
            collection_admin_timeout_ms=collection_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        driver_commander = self._get_driver_commander(keyspace=keyspace)
        gc_payload: dict[str, Any] = {"findCollections": {}}
        logger.info("findCollections")
        gc_response = await driver_commander.async_request(
            payload=gc_payload,
            timeout_context=_TimeoutContext(
                request_ms=_collection_admin_timeout_ms, label=_ca_label
            ),
        )
        if "collections" not in gc_response.get("status", {}):
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from findCollections API command.",
                raw_response=gc_response,
            )
        else:
            logger.info("finished findCollections")
            return gc_response["status"]["collections"]  # type: ignore[no-any-return]

    @overload
    def get_table(
        self,
        name: str,
        *,
        keyspace: str | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncTable[DefaultRowType]: ...

    @overload
    def get_table(
        self,
        name: str,
        *,
        row_type: type[ROW],
        keyspace: str | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncTable[ROW]: ...

    def get_table(
        self,
        name: str,
        *,
        row_type: type[Any] = DefaultRowType,
        keyspace: str | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncTable[ROW]:
        """
        Spawn an `AsyncTable` object instance representing a table
        on this database.

        Creating a `AsyncTable` instance does not have any effect on the
        actual state of the database: in other words, for the created
        `AsyncTable` instance to be used meaningfully, the table
        must exist already (for instance, it should have been created
        previously by calling the `create_table` method).

        Args:
            name: the name of the table.
            row_type: this parameter acts a formal specifier for the type checker.
                If omitted, the resulting AsyncTable is implicitly
                an `AsyncTable[dict[str, Any]]`. If provided, it must match
                the type hint specified in the assignment.
                See the examples below.
            keyspace: the keyspace containing the table. If no keyspace
                is specified, the general setting for this database is used.
            embedding_api_key: optional API key(s) for interacting with the table.
                If an embedding service is configured, and this parameter is not None,
                each Data API call will include the necessary embedding-related headers
                as specified by this parameter. If a string is passed, it translates
                into the one "embedding api key" header
                (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
                For some vectorize providers/models, if using header-based
                authentication, specialized subclasses of
                `astrapy.authentication.EmbeddingHeadersProvider` should be supplied.
            reranking_api_key: optional API key(s) for interacting with the table.
                If a reranker is configured for the table, and this parameter
                is not None, Data API calls will include the appropriate
                reranker-related headers according to this parameter. Reranker services
                may not necessarily require this setting (e.g. if the service needs no
                authentication, or one is configured as part of the table
                definition relying on a "shared secret").
                If a string is passed, it is translated into an instance of
                `astrapy.authentication.RerankingAPIKeyHeaderProvider`.
            spawn_api_options: a specification - complete or partial - of the
                API Options to override the defaults inherited from the Database.
                This allows for a deeper configuration of the table, e.g.
                concerning timeouts; if this is passed together with
                the named timeout parameters, the latter will take precedence
                in their respective settings.

        Returns:
            an `AsyncTable` instance, representing the desired table
                (but without any form of validation).

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> # Get an AsyncTable object (and read a property of it as an example):
            >>> my_async_table = async_database.get_table("games")
            >>> my_async_table.full_name
            'default_keyspace.games'
            >>>
            >>> # Get an AsyncTable object in a specific keyspace,
            >>> # and set an embedding API key to it:
            >>> my_other_async_table = async_database.get_table(
            ...     "tournaments",
            ...     keyspace="the_other_keyspace",
            ...     embedding_api_key="secret-012abc...",
            ... )
            >>> from astrapy import AsyncTable
            >>> MyCustomDictType = dict[str, int]
            >>>
            >>> # Get an AsyncTable object typed with a specific type for its rows:
            >>> my_typed_async_table: AsyncTable[MyCustomDictType] = async_database.get_table(
            ...     "games",
            ...     row_type=MyCustomDictType,
            ... )
        """

        # lazy importing here against circular-import error
        from astrapy.table import AsyncTable

        resulting_api_options = self.api_options.with_override(
            spawn_api_options,
        ).with_override(
            APIOptions(
                embedding_api_key=embedding_api_key,
                reranking_api_key=reranking_api_key,
            ),
        )

        _keyspace = keyspace or self.keyspace
        if _keyspace is None:
            raise ValueError(
                "No keyspace specified. This operation requires a keyspace to "
                "be set, e.g. through the `use_keyspace` method."
            )
        return AsyncTable[ROW](
            database=self,
            name=name,
            keyspace=_keyspace,
            api_options=resulting_api_options,
        )

    @overload
    async def create_table(
        self,
        name: str,
        *,
        definition: CreateTableDefinition | dict[str, Any],
        keyspace: str | None = None,
        if_not_exists: bool | None = None,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncTable[DefaultRowType]: ...

    @overload
    async def create_table(
        self,
        name: str,
        *,
        definition: CreateTableDefinition | dict[str, Any],
        row_type: type[ROW],
        keyspace: str | None = None,
        if_not_exists: bool | None = None,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncTable[ROW]: ...

    async def create_table(
        self,
        name: str,
        *,
        definition: CreateTableDefinition | dict[str, Any],
        row_type: type[Any] = DefaultRowType,
        keyspace: str | None = None,
        if_not_exists: bool | None = None,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncTable[ROW]:
        """
        Creates a table on the database and return the AsyncTable
        instance that represents it.

        This is a blocking operation: the method returns when the table
        is ready to be used. As opposed to the `get_table` method call,
        this method causes the table to be actually created on DB.

        Args:
            name: the name of the table.
            definition: a complete table definition for the table. This can be an
                instance of `CreateTableDefinition` or an equivalent (nested) dictionary,
                in which case it will be parsed into a `CreateTableDefinition`.
                See the `astrapy.info.CreateTableDefinition` class and the
                `AsyncTable` class for more details and ways to construct this object.
            row_type: this parameter acts a formal specifier for the type checker.
                If omitted, the resulting AsyncTable is implicitly
                an `AsyncTable[dict[str, Any]]`. If provided, it must match
                the type hint specified in the assignment.
                See the examples below.
            keyspace: the keyspace where the table is to be created.
                If not specified, the general setting for this database is used.
            if_not_exists: if set to True, the command will succeed even if a table
                with the specified name already exists (in which case no actual
                table creation takes place on the database). Defaults to False,
                i.e. an error is raised by the API in case of table-name collision.
            table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `table_admin_timeout_ms`.
            timeout_ms: an alias for `table_admin_timeout_ms`.
            embedding_api_key: optional API key(s) for interacting with the table.
                If an embedding service is configured, and this parameter is not None,
                each Data API call will include the necessary embedding-related headers
                as specified by this parameter. If a string is passed, it translates
                into the one "embedding api key" header
                (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
                For some vectorize providers/models, if using header-based authentication,
                specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
                should be supplied.
            reranking_api_key: optional API key(s) for interacting with the table.
                If a reranker is configured for the table, and this parameter
                is not None, Data API calls will include the appropriate
                reranker-related headers according to this parameter. Reranker services
                may not necessarily require this setting (e.g. if the service needs no
                authentication, or one is configured as part of the table
                definition relying on a "shared secret").
                If a string is passed, it is translated into an instance of
                `astrapy.authentication.RerankingAPIKeyHeaderProvider`.
            spawn_api_options: a specification - complete or partial - of the
                API Options to override the defaults inherited from the Database.
                This allows for a deeper configuration of the table, e.g.
                concerning timeouts; if this is passed together with
                the named timeout parameters, the latter will take precedence
                in their respective settings.

        Returns:
            an `AsyncTable` instance, representing the
            newly-created table.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> # Create a table using the fluent syntax for definition
            >>> from astrapy.constants import SortMode
            >>> from astrapy.info import (
            ...     CreateTableDefinition,
            ...     ColumnType,
            ... )
            >>> table_definition = (
            ...     CreateTableDefinition.builder()
            ...     .add_column("match_id", ColumnType.TEXT)
            ...     .add_column("round", ColumnType.INT)
            ...     .add_vector_column("m_vector", dimension=3)
            ...     .add_column("score", ColumnType.INT)
            ...     .add_column("when", ColumnType.TIMESTAMP)
            ...     .add_column("winner", ColumnType.TEXT)
            ...     .add_set_column("fighters", ColumnType.UUID)
            ...     .add_partition_by(["match_id"])
            ...     .add_partition_sort({"round": SortMode.ASCENDING})
            ...     .build()
            ... )
            >>> my_async_table = asyncio.run(async_database.create_table(
            ...     "games",
            ...     definition=table_definition,
            ... ))
            >>>
            >>> # Create a table with the definition as object
            >>> # (and do not raise an error if the table exists already)
            >>> from astrapy.info import (
            ...     CreateTableDefinition,
            ...     TablePrimaryKeyDescriptor,
            ...     TableScalarColumnTypeDescriptor,
            ...     TableValuedColumnType,
            ...     TableValuedColumnTypeDescriptor,
            ...     TableVectorColumnTypeDescriptor,
            ... )
            >>> table_definition_1 = CreateTableDefinition(
            ...     columns={
            ...         "match_id": TableScalarColumnTypeDescriptor(
            ...             ColumnType.TEXT,
            ...         ),
            ...         "round": TableScalarColumnTypeDescriptor(
            ...             ColumnType.INT,
            ...         ),
            ...         "m_vector": TableVectorColumnTypeDescriptor(
            ...             column_type="vector", dimension=3
            ...         ),
            ...         "score": TableScalarColumnTypeDescriptor(
            ...             ColumnType.INT,
            ...         ),
            ...         "when": TableScalarColumnTypeDescriptor(
            ...             ColumnType.TIMESTAMP,
            ...         ),
            ...         "winner": TableScalarColumnTypeDescriptor(
            ...             ColumnType.TEXT,
            ...         ),
            ...         "fighters": TableValuedColumnTypeDescriptor(
            ...             column_type=TableValuedColumnType.SET,
            ...             value_type=ColumnType.UUID,
            ...         ),
            ...     },
            ...     primary_key=TablePrimaryKeyDescriptor(
            ...         partition_by=["match_id"],
            ...         partition_sort={"round": SortMode.ASCENDING},
            ...     ),
            ... )
            >>> my_async_table_1 = asyncio.run(async_database.create_table(
            ...     "games",
            ...     definition=table_definition_1,
            ...     if_not_exists=True,
            ... ))
            >>>
            >>> # Create a table with the definition as plain dictionary
            >>> # (and do not raise an error if the table exists already)
            >>> table_definition_2 = {
            ...     "columns": {
            ...         "match_id": {"type": "text"},
            ...         "round": {"type": "int"},
            ...         "m_vector": {"type": "vector", "dimension": 3},
            ...         "score": {"type": "int"},
            ...         "when": {"type": "timestamp"},
            ...         "winner": {"type": "text"},
            ...         "fighters": {"type": "set", "valueType": "uuid"},
            ...     },
            ...     "primaryKey": {
            ...         "partitionBy": ["match_id"],
            ...         "partitionSort": {"round": 1},
            ...     },
            ... }
            >>> my_async_table_2 = asyncio.run(async_database.create_table(
            ...     "games",
            ...     definition=table_definition_2,
            ...     if_not_exists=True,
            ... ))
        """

        ct_options: dict[str, bool]
        if if_not_exists is not None:
            ct_options = {"ifNotExists": if_not_exists}
        else:
            ct_options = {}
        ct_definition: dict[str, Any] = CreateTableDefinition.coerce(
            definition
        ).as_dict()
        _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
            timeout_options=self.api_options.timeout_options,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        driver_commander = self._get_driver_commander(keyspace=keyspace)
        ct_payload = {
            "createTable": {
                k: v
                for k, v in {
                    "name": name,
                    "definition": ct_definition,
                    "options": ct_options,
                }.items()
                if v is not None
                if v != {}
            }
        }
        logger.info(f"createTable('{name}')")
        ct_response = await driver_commander.async_request(
            payload=ct_payload,
            timeout_context=_TimeoutContext(
                request_ms=_table_admin_timeout_ms, label=_ta_label
            ),
        )
        if ct_response.get("status") != {"ok": 1}:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from createTable API command.",
                raw_response=ct_response,
            )
        logger.info(f"finished createTable('{name}')")
        return self.get_table(
            name,
            row_type=row_type,
            keyspace=keyspace,
            embedding_api_key=embedding_api_key,
            reranking_api_key=reranking_api_key,
            spawn_api_options=spawn_api_options,
        )

    async def drop_table_index(
        self,
        name: str,
        *,
        keyspace: str | None = None,
        if_exists: bool | None = None,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Drops (deletes) an index (of any kind) from the table it is associated to.

        This is a blocking operation: the method returns once the index
        is deleted.

        Note:
            Although associated to a table, index names are unique across a keyspace.
            For this reason, no table name is required in this call.

        Args:
            name: the name of the index.
            keyspace: the keyspace to which the index belongs.
                If not specified, the general setting for this database is used.
            if_exists: if passed as True, trying to drop a non-existing index
                will not error, just silently do nothing instead. If not provided,
                the API default behaviour will hold.
            table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `table_admin_timeout_ms`.
            timeout_ms: an alias for `table_admin_timeout_ms`.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> # Drop an index from the keyspace:
            >>> await async_database.drop_table_index("score_index")
            >>> # Drop an index, unless it does not exist already:
            >>> await async_database.drop_table_index("score_index", if_exists=True)
        """

        _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
            timeout_options=self.api_options.timeout_options,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        di_options: dict[str, bool]
        if if_exists is not None:
            di_options = {"ifExists": if_exists}
        else:
            di_options = {}
        di_payload = {
            "dropIndex": {
                k: v
                for k, v in {
                    "name": name,
                    "options": di_options,
                }.items()
                if v is not None
                if v != {}
            }
        }
        driver_commander = self._get_driver_commander(keyspace=keyspace)
        logger.info(f"dropIndex('{name}')")
        di_response = await driver_commander.async_request(
            payload=di_payload,
            timeout_context=_TimeoutContext(
                request_ms=_table_admin_timeout_ms, label=_ta_label
            ),
        )
        if di_response.get("status") != {"ok": 1}:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from dropIndex API command.",
                raw_response=di_response,
            )
        logger.info(f"finished dropIndex('{name}')")

    async def drop_table(
        self,
        name: str,
        *,
        keyspace: str | None = None,
        if_exists: bool | None = None,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> dict[str, Any]:
        """
        Drop a table from the database, along with all rows therein and related indexes.

        Args:
            name: the name of the table to drop.
            keyspace: the keyspace where the table resides. If not specified,
                the database working keyspace is assumed.
            if_exists: if passed as True, trying to drop a non-existing table
                will not error, just silently do nothing instead. If not provided,
                the API default behaviour will hold.
            table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `table_admin_timeout_ms`.
            timeout_ms: an alias for `table_admin_timeout_ms`.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> asyncio.run(async_database.list_table_names())
            ['fighters', 'games']
            >>> asyncio.run(async_database.drop_table("fighters"))
            >>> asyncio.run(async_database.list_table_names())
            ['games']
            >>> # not erroring because of if_not_exists:
            >>> asyncio.run(async_database.drop_table("fighters", if_not_exists=True))
        """

        _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
            timeout_options=self.api_options.timeout_options,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        _keyspace = keyspace or self.keyspace
        dt_options: dict[str, bool]
        if if_exists is not None:
            dt_options = {"ifExists": if_exists}
        else:
            dt_options = {}
        driver_commander = self._get_driver_commander(keyspace=_keyspace)
        dt_payload = {
            "dropTable": {
                k: v
                for k, v in {
                    "name": name,
                    "options": dt_options,
                }.items()
                if v is not None
                if v != {}
            }
        }
        logger.info(f"dropTable('{name}')")
        dt_response = await driver_commander.async_request(
            payload=dt_payload,
            timeout_context=_TimeoutContext(
                request_ms=_table_admin_timeout_ms, label=_ta_label
            ),
        )
        if dt_response.get("status") != {"ok": 1}:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from dropTable API command.",
                raw_response=dt_response,
            )
        logger.info(f"finished dropTable('{name}')")
        return dt_response.get("status", {})  # type: ignore[no-any-return]

    async def list_tables(
        self,
        *,
        keyspace: str | None = None,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> list[ListTableDescriptor]:
        """
        List all tables in a given keyspace for this database.

        Args:
            keyspace: the keyspace to be inspected. If not specified,
                the general setting for this database is assumed.
            table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `table_admin_timeout_ms`.
            timeout_ms: an alias for `table_admin_timeout_ms`.

        Returns:
            a list of ListTableDescriptor instances, one for each table.

        Example:
            >>> tables = asyncio.run(my_async_database.list_tables())
            >>> tables
            [ListTableDescriptor(name='fighters', definition=ListTableDefinition(...
            >>> tables[1].name
            'games'
            >>> tables[1].definition.columns
            {'match_id': TableScalarColumnTypeDescriptor(ColumnType.TEXT),...
            >>> tables[1].definition.columns['score']
            TableScalarColumnTypeDescriptor(ColumnType.INT)
            >>> tables[1].definition.primary_key.partition_by
            ['match_id']
            >>> tables[1].definition.primary_key.partition_sort
            {'round': 1}
        """

        _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
            timeout_options=self.api_options.timeout_options,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        return await self._list_tables_ctx(
            keyspace=keyspace,
            timeout_context=_TimeoutContext(
                request_ms=_table_admin_timeout_ms, label=_ta_label
            ),
        )

    async def _list_tables_ctx(
        self,
        *,
        keyspace: str | None,
        timeout_context: _TimeoutContext,
    ) -> list[ListTableDescriptor]:
        driver_commander = self._get_driver_commander(keyspace=keyspace)
        lt_payload = {"listTables": {"options": {"explain": True}}}
        logger.info("listTables")
        lt_response = await driver_commander.async_request(
            payload=lt_payload,
            timeout_context=timeout_context,
        )
        if "tables" not in lt_response.get("status", {}):
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from listTables API command.",
                raw_response=lt_response,
            )
        else:
            # we know this is a list of dicts, to marshal into "descriptors"
            logger.info("finished listTables")
            return [
                ListTableDescriptor.coerce(tab_dict)
                for tab_dict in lt_response["status"]["tables"]
            ]

    async def list_table_names(
        self,
        *,
        keyspace: str | None = None,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> list[str]:
        """
        List the names of all tables in a given keyspace of this database.

        Args:
            keyspace: the keyspace to be inspected. If not specified,
                the general setting for this database is assumed.
            table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `table_admin_timeout_ms`.
            timeout_ms: an alias for `table_admin_timeout_ms`.

        Returns:
            a list of the table names as strings, in no particular order.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> async def destroy_temp_table(async_db: AsyncDatabase) -> None:
            ...     print(await async_db.list_table_names())
            ...     await async_db.drop_table("my_v_tab")
            ...     print(await async_db.list_table_names())
            ...
            >>> asyncio.run(destroy_temp_table(async_database))
            ['fighters', 'my_v_tab', 'games']
            ['fighters', 'games']
        """

        _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
            timeout_options=self.api_options.timeout_options,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        driver_commander = self._get_driver_commander(keyspace=keyspace)
        lt_payload: dict[str, Any] = {"listTables": {}}
        logger.info("listTables")
        lt_response = await driver_commander.async_request(
            payload=lt_payload,
            timeout_context=_TimeoutContext(
                request_ms=_table_admin_timeout_ms, label=_ta_label
            ),
        )
        if "tables" not in lt_response.get("status", {}):
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from listTables API command.",
                raw_response=lt_response,
            )
        else:
            logger.info("finished listTables")
            return lt_response["status"]["tables"]  # type: ignore[no-any-return]

    async def command(
        self,
        body: dict[str, Any],
        *,
        keyspace: str | None | UnsetType = _UNSET,
        collection_or_table_name: str | None = None,
        raise_api_errors: bool = True,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> dict[str, Any]:
        """
        Send a POST request to the Data API for this database with
        an arbitrary, caller-provided payload.

        Args:
            body: a JSON-serializable dictionary, the payload of the request.
            keyspace: the keyspace to use, if any. If a keyspace is employed,
                it is used to construct the full request URL. To run a command
                targeting no specific keyspace (rather, the database as a whole),
                pass an explicit `None`: the request URL will lack the suffix
                "/<keyspace>" component. If unspecified, the working keyspace of
                this database is used. If another keyspace is passed, it will be
                used instead of the database's working one.
            collection_or_table_name: if provided, the name is appended at the end
                of the endpoint. In this way, this method allows collection-
                and table-level arbitrary POST requests as well.
                This parameter cannot be used if `keyspace=None` is explicitly provided.
            raise_api_errors: if True, responses with a nonempty 'errors' field
                result in an astrapy exception being raised.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a dictionary with the response of the HTTP request.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> my_db.command({"findCollections": {}})
            {'status': {'collections': ['my_coll']}}
            >>> my_db.command({"countDocuments": {}}, collection_or_table_name="my_coll")
            {'status': {'count': 123}}
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        _keyspace: str | None
        if keyspace is None:
            if collection_or_table_name is not None:
                raise ValueError(
                    "Cannot pass collection_or_table_name to database "
                    "`command` on a no-keyspace command"
                )
            _keyspace = None
        else:
            if isinstance(keyspace, UnsetType):
                _keyspace = self.keyspace
            else:
                _keyspace = keyspace
        # build the ad-hoc-commander path with _keyspace and the coll.or.table
        base_path_components = [
            comp
            for comp in (
                ncomp.strip("/")
                for ncomp in (
                    self.api_options.data_api_url_options.api_path,
                    self.api_options.data_api_url_options.api_version,
                    _keyspace,
                    collection_or_table_name,
                )
                if ncomp is not None
            )
            if comp != ""
        ]
        base_path = f"/{'/'.join(base_path_components)}"
        command_commander = APICommander(
            api_endpoint=self.api_endpoint,
            path=base_path,
            headers=self._commander_headers,
            callers=self.api_options.callers,
            redacted_header_names=self.api_options.redacted_header_names,
        )

        _cmd_desc = ",".join(sorted(body.keys()))
        logger.info(f"command={_cmd_desc} on {self.__class__.__name__}")
        req_response = await command_commander.async_request(
            payload=body,
            raise_api_errors=raise_api_errors,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"command={_cmd_desc} on {self.__class__.__name__}")
        return req_response

    def get_database_admin(
        self,
        *,
        token: str | TokenProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> DatabaseAdmin:
        """
        Return a DatabaseAdmin object corresponding to this database, for
        use in admin tasks such as managing keyspaces.

        This method, depending on the environment where the database resides,
        returns an appropriate subclass of DatabaseAdmin.

        Args:
            token: an access token with enough permission on the database to
                perform the desired tasks. If omitted (as it can generally be done),
                the token of this Database is used.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            spawn_api_options: a specification - complete or partial - of the
                API Options to override the defaults.
                This allows for a deeper configuration of the database admin, e.g.
                concerning timeouts; if this is passed together with
                the equivalent named parameters, the latter will take precedence
                in their respective settings.

        Returns:
            A DatabaseAdmin instance targeting this database. More precisely,
            for Astra DB an instance of `AstraDBDatabaseAdmin` is returned;
            for other environments, an instance of `DataAPIDatabaseAdmin` is returned.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> my_db_admin = async_database.get_database_admin()
            >>> if "new_keyspace" not in my_db_admin.list_keyspaces():
            ...     my_db_admin.create_keyspace("new_keyspace")
            >>> my_db_admin.list_keyspaces()
            ['default_keyspace', 'new_keyspace']
        """

        # lazy importing here to avoid circular dependency
        from astrapy.admin.admin import AstraDBDatabaseAdmin, DataAPIDatabaseAdmin

        arg_api_options = APIOptions(
            token=token,
        )
        api_options = self.api_options.with_override(spawn_api_options).with_override(
            arg_api_options
        )

        if api_options.environment in Environment.astra_db_values:
            if parse_api_endpoint(self.api_endpoint) is None:
                raise InvalidEnvironmentException(
                    "Cannot use a nonstandard API endpoint for this operation."
                )
            return AstraDBDatabaseAdmin(
                api_endpoint=self.api_endpoint,
                api_options=api_options,
                spawner_database=self,
            )
        else:
            return DataAPIDatabaseAdmin(
                api_endpoint=self.api_endpoint,
                api_options=api_options,
                spawner_database=self,
            )

Instance variables

var id : str

The ID of this database. Not available outside of Astra DB and when using custom domains.

Example

>>> my_async_database.id
'01234567-89ab-cdef-0123-456789abcdef'

Expand source code

@property
def id(self) -> str:
    """
    The ID of this database.
    Not available outside of Astra DB and when using custom domains.

    Example:
        >>> my_async_database.id
        '01234567-89ab-cdef-0123-456789abcdef'
    """

    if self.api_options.environment in Environment.astra_db_values:
        parsed_api_endpoint = parse_api_endpoint(self.api_endpoint)
        if parsed_api_endpoint is not None:
            return parsed_api_endpoint.database_id
        else:
            raise InvalidEnvironmentException(
                "Cannot inspect a nonstandard API endpoint for properties."
            )
    else:
        raise InvalidEnvironmentException(
            "Database is not in a supported environment for this operation."
        )

var keyspace : str | None

The keyspace this database uses as target for all commands when no method-call-specific keyspace is specified.

Returns

the working keyspace (a string), or None if not set.

Example

>>> async_database.keyspace
'the_keyspace'

Expand source code

@property
def keyspace(self) -> str | None:
    """
    The keyspace this database uses as target for all commands when
    no method-call-specific keyspace is specified.

    Returns:
        the working keyspace (a string), or None if not set.

    Example:
        >>> async_database.keyspace
        'the_keyspace'
    """

    return self._using_keyspace

var region : str

The region where this database is located.

The region is still well defined in case of multi-region databases, since a Database instance connects to exactly one of the regions (as specified by the API Endpoint).

Not available outside of Astra DB and when using custom domains.

Example

>>> my_async_database.region
'us-west-2'

Expand source code

@property
def region(self) -> str:
    """
    The region where this database is located.

    The region is still well defined in case of multi-region databases,
    since a Database instance connects to exactly one of the regions
    (as specified by the API Endpoint).

    Not available outside of Astra DB and when using custom domains.

    Example:
        >>> my_async_database.region
        'us-west-2'
    """

    if self.api_options.environment in Environment.astra_db_values:
        parsed_api_endpoint = parse_api_endpoint(self.api_endpoint)
        if parsed_api_endpoint is not None:
            return parsed_api_endpoint.region
        else:
            raise InvalidEnvironmentException(
                "Cannot inspect a nonstandard API endpoint for properties."
            )
    else:
        raise InvalidEnvironmentException(
            "Database is not in a supported environment for this operation."
        )

Methods

async def command(self, body: dict[str, Any], *, keyspace: str | None | UnsetType = (unset), collection_or_table_name: str | None = None, raise_api_errors: bool = True, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> dict[str, typing.Any]

Send a POST request to the Data API for this database with an arbitrary, caller-provided payload.

Args

body: a JSON-serializable dictionary, the payload of the request.
keyspace: the keyspace to use, if any. If a keyspace is employed, it is used to construct the full request URL. To run a command targeting no specific keyspace (rather, the database as a whole), pass an explicit None: the request URL will lack the suffix "/" component. If unspecified, the working keyspace of this database is used. If another keyspace is passed, it will be used instead of the database's working one.
collection_or_table_name: if provided, the name is appended at the end of the endpoint. In this way, this method allows collection- and table-level arbitrary POST requests as well. This parameter cannot be used if keyspace=None is explicitly provided.
raise_api_errors: if True, responses with a nonempty 'errors' field result in an astrapy exception being raised.
general_method_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for general_method_timeout_ms.
timeout_ms: an alias for general_method_timeout_ms.

Returns

a dictionary with the response of the HTTP request.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> my_db.command({"findCollections": {}})
{'status': {'collections': ['my_coll']}}
>>> my_db.command({"countDocuments": {}}, collection_or_table_name="my_coll")
{'status': {'count': 123}}

Expand source code

async def command(
    self,
    body: dict[str, Any],
    *,
    keyspace: str | None | UnsetType = _UNSET,
    collection_or_table_name: str | None = None,
    raise_api_errors: bool = True,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> dict[str, Any]:
    """
    Send a POST request to the Data API for this database with
    an arbitrary, caller-provided payload.

    Args:
        body: a JSON-serializable dictionary, the payload of the request.
        keyspace: the keyspace to use, if any. If a keyspace is employed,
            it is used to construct the full request URL. To run a command
            targeting no specific keyspace (rather, the database as a whole),
            pass an explicit `None`: the request URL will lack the suffix
            "/<keyspace>" component. If unspecified, the working keyspace of
            this database is used. If another keyspace is passed, it will be
            used instead of the database's working one.
        collection_or_table_name: if provided, the name is appended at the end
            of the endpoint. In this way, this method allows collection-
            and table-level arbitrary POST requests as well.
            This parameter cannot be used if `keyspace=None` is explicitly provided.
        raise_api_errors: if True, responses with a nonempty 'errors' field
            result in an astrapy exception being raised.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a dictionary with the response of the HTTP request.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> my_db.command({"findCollections": {}})
        {'status': {'collections': ['my_coll']}}
        >>> my_db.command({"countDocuments": {}}, collection_or_table_name="my_coll")
        {'status': {'count': 123}}
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    _keyspace: str | None
    if keyspace is None:
        if collection_or_table_name is not None:
            raise ValueError(
                "Cannot pass collection_or_table_name to database "
                "`command` on a no-keyspace command"
            )
        _keyspace = None
    else:
        if isinstance(keyspace, UnsetType):
            _keyspace = self.keyspace
        else:
            _keyspace = keyspace
    # build the ad-hoc-commander path with _keyspace and the coll.or.table
    base_path_components = [
        comp
        for comp in (
            ncomp.strip("/")
            for ncomp in (
                self.api_options.data_api_url_options.api_path,
                self.api_options.data_api_url_options.api_version,
                _keyspace,
                collection_or_table_name,
            )
            if ncomp is not None
        )
        if comp != ""
    ]
    base_path = f"/{'/'.join(base_path_components)}"
    command_commander = APICommander(
        api_endpoint=self.api_endpoint,
        path=base_path,
        headers=self._commander_headers,
        callers=self.api_options.callers,
        redacted_header_names=self.api_options.redacted_header_names,
    )

    _cmd_desc = ",".join(sorted(body.keys()))
    logger.info(f"command={_cmd_desc} on {self.__class__.__name__}")
    req_response = await command_commander.async_request(
        payload=body,
        raise_api_errors=raise_api_errors,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"command={_cmd_desc} on {self.__class__.__name__}")
    return req_response

async def create_collection(self, name: str, *, definition: CollectionDefinition | dict[str, Any] | None = None, document_type: type[Any] = typing.Dict[str, typing.Any], keyspace: str | None = None, collection_admin_timeout_ms: int | None = None, embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = (unset), reranking_api_key: str | RerankingHeadersProvider | UnsetType = (unset), spawn_api_options: APIOptions | UnsetType = (unset)) ‑> AsyncCollection[DOC]

Creates a collection on the database and return the AsyncCollection instance that represents it.

This is a blocking operation: the method returns when the collection is ready to be used. As opposed to the get_collection instance, this method triggers causes the collection to be actually created on DB.

Args

name: the name of the collection.
definition: a complete collection definition for the table. This can be an instance of CollectionDefinition or an equivalent (nested) dictionary, in which case it will be parsed into a CollectionDefinition. See the CollectionDefinition class and the AsyncCollection class for more details and ways to construct this object.
document_type: this parameter acts a formal specifier for the type checker. If omitted, the resulting AsyncCollection is implicitly an AsyncCollection[dict[str, Any]]. If provided, it must match the type hint specified in the assignment. See the examples below.
keyspace: the keyspace where the collection is to be created. If not specified, the general setting for this database is used.
collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply.
embedding_api_key: optional API key(s) for interacting with the collection. If an embedding service is configured, and this parameter is not None, each Data API call will include the necessary embedding-related headers as specified by this parameter. If a string is passed, it translates into the one "embedding api key" header (i.e. EmbeddingAPIKeyHeaderProvider). For some vectorize providers/models, if using header-based authentication, specialized subclasses of EmbeddingHeadersProvider should be supplied.
reranking_api_key: optional API key(s) for interacting with the collection. If a reranker is configured for the collection, and this parameter is not None, Data API calls will include the appropriate reranker-related headers according to this parameter. Reranker services may not necessarily require this setting (e.g. if the service needs no authentication, or one is configured as part of the collection definition relying on a "shared secret"). If a string is passed, it is translated into an instance of RerankingAPIKeyHeaderProvider.
spawn_api_options: a specification - complete or partial - of the API Options to override the defaults inherited from the Database. This allows for a deeper configuration of the collection, e.g. concerning timeouts; if this is passed together with the named timeout parameters, the latter will take precedence in their respective settings.

Returns

an AsyncCollection instance, representing the newly-created collection.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> # Create a collection using the fluent syntax for its definition
>>> from astrapy.constants import VectorMetric
>>> from astrapy.info import CollectionDefinition
>>>
>>> collection_definition = (
...     CollectionDefinition.builder()
...     .set_vector_dimension(3)
...     .set_vector_metric(VectorMetric.DOT_PRODUCT)
...     .set_indexing("deny", ["annotations", "logs"])
...     .build()
... )
>>> my_collection = asyncio.run(async_database.create_collection(
...     "my_events",
...     definition=collection_definition,
... ))
>>>
>>> # Create a collection with the definition as object
>>> from astrapy.info import CollectionVectorOptions
>>>
>>> collection_definition_1 = CollectionDefinition(
...     vector=CollectionVectorOptions(
...         dimension=3,
...         metric=VectorMetric.DOT_PRODUCT,
...     ),
...     indexing={"deny": ["annotations", "logs"]},
... )
>>> my_collection_1 = asyncio.run(async_database.create_collection(
...     "my_events",
...     definition=collection_definition_1,
... ))
>>>
>>>
>>> # Create a collection with the definition as plain dictionary
>>> collection_definition_2 = {
...     "indexing": {"deny": ["annotations", "logs"]},
...     "vector": {
...         "dimension": 3,
...         "metric": VectorMetric.DOT_PRODUCT,
...     },
... }
>>> my_collection_2 = asyncio.run(async_database.create_collection(
...     "my_events",
...     definition=collection_definition_2,
... ))

Expand source code

async def create_collection(
    self,
    name: str,
    *,
    definition: CollectionDefinition | dict[str, Any] | None = None,
    document_type: type[Any] = DefaultDocumentType,
    keyspace: str | None = None,
    collection_admin_timeout_ms: int | None = None,
    embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
    reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
    spawn_api_options: APIOptions | UnsetType = _UNSET,
) -> AsyncCollection[DOC]:
    """
    Creates a collection on the database and return the AsyncCollection
    instance that represents it.

    This is a blocking operation: the method returns when the collection
    is ready to be used. As opposed to the `get_collection` instance,
    this method triggers causes the collection to be actually created on DB.

    Args:
        name: the name of the collection.
        definition: a complete collection definition for the table. This can be an
            instance of `CollectionDefinition` or an equivalent (nested) dictionary,
            in which case it will be parsed into a `CollectionDefinition`.
            See the `astrapy.info.CollectionDefinition` class and the
            `AsyncCollection` class for more details and ways to construct this object.
        document_type: this parameter acts a formal specifier for the type checker.
            If omitted, the resulting AsyncCollection is implicitly
            an `AsyncCollection[dict[str, Any]]`. If provided, it must match the
            type hint specified in the assignment.
            See the examples below.
        keyspace: the keyspace where the collection is to be created.
            If not specified, the general setting for this database is used.
        collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
        embedding_api_key: optional API key(s) for interacting with the collection.
            If an embedding service is configured, and this parameter is not None,
            each Data API call will include the necessary embedding-related headers
            as specified by this parameter. If a string is passed, it translates
            into the one "embedding api key" header
            (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
            For some vectorize providers/models, if using header-based authentication,
            specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
            should be supplied.
        reranking_api_key: optional API key(s) for interacting with the collection.
            If a reranker is configured for the collection, and this parameter
            is not None, Data API calls will include the appropriate
            reranker-related headers according to this parameter. Reranker services
            may not necessarily require this setting (e.g. if the service needs no
            authentication, or one is configured as part of the collection
            definition relying on a "shared secret").
            If a string is passed, it is translated into an instance of
            `astrapy.authentication.RerankingAPIKeyHeaderProvider`.
        spawn_api_options: a specification - complete or partial - of the
            API Options to override the defaults inherited from the Database.
            This allows for a deeper configuration of the collection, e.g.
            concerning timeouts; if this is passed together with
            the named timeout parameters, the latter will take precedence
            in their respective settings.

    Returns:
        an `AsyncCollection` instance, representing the newly-created collection.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> # Create a collection using the fluent syntax for its definition
        >>> from astrapy.constants import VectorMetric
        >>> from astrapy.info import CollectionDefinition
        >>>
        >>> collection_definition = (
        ...     CollectionDefinition.builder()
        ...     .set_vector_dimension(3)
        ...     .set_vector_metric(VectorMetric.DOT_PRODUCT)
        ...     .set_indexing("deny", ["annotations", "logs"])
        ...     .build()
        ... )
        >>> my_collection = asyncio.run(async_database.create_collection(
        ...     "my_events",
        ...     definition=collection_definition,
        ... ))
        >>>
        >>> # Create a collection with the definition as object
        >>> from astrapy.info import CollectionVectorOptions
        >>>
        >>> collection_definition_1 = CollectionDefinition(
        ...     vector=CollectionVectorOptions(
        ...         dimension=3,
        ...         metric=VectorMetric.DOT_PRODUCT,
        ...     ),
        ...     indexing={"deny": ["annotations", "logs"]},
        ... )
        >>> my_collection_1 = asyncio.run(async_database.create_collection(
        ...     "my_events",
        ...     definition=collection_definition_1,
        ... ))
        >>>
        >>>
        >>> # Create a collection with the definition as plain dictionary
        >>> collection_definition_2 = {
        ...     "indexing": {"deny": ["annotations", "logs"]},
        ...     "vector": {
        ...         "dimension": 3,
        ...         "metric": VectorMetric.DOT_PRODUCT,
        ...     },
        ... }
        >>> my_collection_2 = asyncio.run(async_database.create_collection(
        ...     "my_events",
        ...     definition=collection_definition_2,
        ... ))
    """

    cc_definition: dict[str, Any] = CollectionDefinition.coerce(
        definition or {}
    ).as_dict()
    if collection_admin_timeout_ms is not None:
        _collection_admin_timeout_ms = collection_admin_timeout_ms
        _ca_label = "collection_admin_timeout_ms"
    else:
        _collection_admin_timeout_ms = (
            self.api_options.timeout_options.collection_admin_timeout_ms
        )
        _ca_label = "collection_admin_timeout_ms"
    driver_commander = self._get_driver_commander(keyspace=keyspace)
    cc_payload = {
        "createCollection": {
            k: v
            for k, v in {
                "name": name,
                "options": cc_definition,
            }.items()
            if v is not None
            if v != {}
        }
    }
    logger.info(f"createCollection('{name}')")
    cc_response = await driver_commander.async_request(
        payload=cc_payload,
        timeout_context=_TimeoutContext(
            request_ms=_collection_admin_timeout_ms, label=_ca_label
        ),
    )
    if cc_response.get("status") != {"ok": 1}:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from createCollection API command.",
            raw_response=cc_response,
        )
    logger.info(f"finished createCollection('{name}')")
    return self.get_collection(
        name,
        document_type=document_type,
        keyspace=keyspace,
        embedding_api_key=embedding_api_key,
        reranking_api_key=reranking_api_key,
        spawn_api_options=spawn_api_options,
    )

async def create_table(self, name: str, *, definition: CreateTableDefinition | dict[str, Any], row_type: type[Any] = typing.Dict[str, typing.Any], keyspace: str | None = None, if_not_exists: bool | None = None, table_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None, embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = (unset), reranking_api_key: str | RerankingHeadersProvider | UnsetType = (unset), spawn_api_options: APIOptions | UnsetType = (unset)) ‑> AsyncTable[ROW]

Creates a table on the database and return the AsyncTable instance that represents it.

This is a blocking operation: the method returns when the table is ready to be used. As opposed to the get_table method call, this method causes the table to be actually created on DB.

Args

name: the name of the table.
definition: a complete table definition for the table. This can be an instance of CreateTableDefinition or an equivalent (nested) dictionary, in which case it will be parsed into a CreateTableDefinition. See the CreateTableDefinition class and the AsyncTable class for more details and ways to construct this object.
row_type: this parameter acts a formal specifier for the type checker. If omitted, the resulting AsyncTable is implicitly an AsyncTable[dict[str, Any]]. If provided, it must match the type hint specified in the assignment. See the examples below.
keyspace: the keyspace where the table is to be created. If not specified, the general setting for this database is used.
if_not_exists: if set to True, the command will succeed even if a table with the specified name already exists (in which case no actual table creation takes place on the database). Defaults to False, i.e. an error is raised by the API in case of table-name collision.
table_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for table_admin_timeout_ms.
timeout_ms: an alias for table_admin_timeout_ms.
embedding_api_key: optional API key(s) for interacting with the table. If an embedding service is configured, and this parameter is not None, each Data API call will include the necessary embedding-related headers as specified by this parameter. If a string is passed, it translates into the one "embedding api key" header (i.e. EmbeddingAPIKeyHeaderProvider). For some vectorize providers/models, if using header-based authentication, specialized subclasses of EmbeddingHeadersProvider should be supplied.
reranking_api_key: optional API key(s) for interacting with the table. If a reranker is configured for the table, and this parameter is not None, Data API calls will include the appropriate reranker-related headers according to this parameter. Reranker services may not necessarily require this setting (e.g. if the service needs no authentication, or one is configured as part of the table definition relying on a "shared secret"). If a string is passed, it is translated into an instance of RerankingAPIKeyHeaderProvider.
spawn_api_options: a specification - complete or partial - of the API Options to override the defaults inherited from the Database. This allows for a deeper configuration of the table, e.g. concerning timeouts; if this is passed together with the named timeout parameters, the latter will take precedence in their respective settings.

Returns

an AsyncTable instance, representing the newly-created table.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> # Create a table using the fluent syntax for definition
>>> from astrapy.constants import SortMode
>>> from astrapy.info import (
...     CreateTableDefinition,
...     ColumnType,
... )
>>> table_definition = (
...     CreateTableDefinition.builder()
...     .add_column("match_id", ColumnType.TEXT)
...     .add_column("round", ColumnType.INT)
...     .add_vector_column("m_vector", dimension=3)
...     .add_column("score", ColumnType.INT)
...     .add_column("when", ColumnType.TIMESTAMP)
...     .add_column("winner", ColumnType.TEXT)
...     .add_set_column("fighters", ColumnType.UUID)
...     .add_partition_by(["match_id"])
...     .add_partition_sort({"round": SortMode.ASCENDING})
...     .build()
... )
>>> my_async_table = asyncio.run(async_database.create_table(
...     "games",
...     definition=table_definition,
... ))
>>>
>>> # Create a table with the definition as object
>>> # (and do not raise an error if the table exists already)
>>> from astrapy.info import (
...     CreateTableDefinition,
...     TablePrimaryKeyDescriptor,
...     TableScalarColumnTypeDescriptor,
...     TableValuedColumnType,
...     TableValuedColumnTypeDescriptor,
...     TableVectorColumnTypeDescriptor,
... )
>>> table_definition_1 = CreateTableDefinition(
...     columns={
...         "match_id": TableScalarColumnTypeDescriptor(
...             ColumnType.TEXT,
...         ),
...         "round": TableScalarColumnTypeDescriptor(
...             ColumnType.INT,
...         ),
...         "m_vector": TableVectorColumnTypeDescriptor(
...             column_type="vector", dimension=3
...         ),
...         "score": TableScalarColumnTypeDescriptor(
...             ColumnType.INT,
...         ),
...         "when": TableScalarColumnTypeDescriptor(
...             ColumnType.TIMESTAMP,
...         ),
...         "winner": TableScalarColumnTypeDescriptor(
...             ColumnType.TEXT,
...         ),
...         "fighters": TableValuedColumnTypeDescriptor(
...             column_type=TableValuedColumnType.SET,
...             value_type=ColumnType.UUID,
...         ),
...     },
...     primary_key=TablePrimaryKeyDescriptor(
...         partition_by=["match_id"],
...         partition_sort={"round": SortMode.ASCENDING},
...     ),
... )
>>> my_async_table_1 = asyncio.run(async_database.create_table(
...     "games",
...     definition=table_definition_1,
...     if_not_exists=True,
... ))
>>>
>>> # Create a table with the definition as plain dictionary
>>> # (and do not raise an error if the table exists already)
>>> table_definition_2 = {
...     "columns": {
...         "match_id": {"type": "text"},
...         "round": {"type": "int"},
...         "m_vector": {"type": "vector", "dimension": 3},
...         "score": {"type": "int"},
...         "when": {"type": "timestamp"},
...         "winner": {"type": "text"},
...         "fighters": {"type": "set", "valueType": "uuid"},
...     },
...     "primaryKey": {
...         "partitionBy": ["match_id"],
...         "partitionSort": {"round": 1},
...     },
... }
>>> my_async_table_2 = asyncio.run(async_database.create_table(
...     "games",
...     definition=table_definition_2,
...     if_not_exists=True,
... ))

Expand source code

async def create_table(
    self,
    name: str,
    *,
    definition: CreateTableDefinition | dict[str, Any],
    row_type: type[Any] = DefaultRowType,
    keyspace: str | None = None,
    if_not_exists: bool | None = None,
    table_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
    embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
    reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
    spawn_api_options: APIOptions | UnsetType = _UNSET,
) -> AsyncTable[ROW]:
    """
    Creates a table on the database and return the AsyncTable
    instance that represents it.

    This is a blocking operation: the method returns when the table
    is ready to be used. As opposed to the `get_table` method call,
    this method causes the table to be actually created on DB.

    Args:
        name: the name of the table.
        definition: a complete table definition for the table. This can be an
            instance of `CreateTableDefinition` or an equivalent (nested) dictionary,
            in which case it will be parsed into a `CreateTableDefinition`.
            See the `astrapy.info.CreateTableDefinition` class and the
            `AsyncTable` class for more details and ways to construct this object.
        row_type: this parameter acts a formal specifier for the type checker.
            If omitted, the resulting AsyncTable is implicitly
            an `AsyncTable[dict[str, Any]]`. If provided, it must match
            the type hint specified in the assignment.
            See the examples below.
        keyspace: the keyspace where the table is to be created.
            If not specified, the general setting for this database is used.
        if_not_exists: if set to True, the command will succeed even if a table
            with the specified name already exists (in which case no actual
            table creation takes place on the database). Defaults to False,
            i.e. an error is raised by the API in case of table-name collision.
        table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `table_admin_timeout_ms`.
        timeout_ms: an alias for `table_admin_timeout_ms`.
        embedding_api_key: optional API key(s) for interacting with the table.
            If an embedding service is configured, and this parameter is not None,
            each Data API call will include the necessary embedding-related headers
            as specified by this parameter. If a string is passed, it translates
            into the one "embedding api key" header
            (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
            For some vectorize providers/models, if using header-based authentication,
            specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
            should be supplied.
        reranking_api_key: optional API key(s) for interacting with the table.
            If a reranker is configured for the table, and this parameter
            is not None, Data API calls will include the appropriate
            reranker-related headers according to this parameter. Reranker services
            may not necessarily require this setting (e.g. if the service needs no
            authentication, or one is configured as part of the table
            definition relying on a "shared secret").
            If a string is passed, it is translated into an instance of
            `astrapy.authentication.RerankingAPIKeyHeaderProvider`.
        spawn_api_options: a specification - complete or partial - of the
            API Options to override the defaults inherited from the Database.
            This allows for a deeper configuration of the table, e.g.
            concerning timeouts; if this is passed together with
            the named timeout parameters, the latter will take precedence
            in their respective settings.

    Returns:
        an `AsyncTable` instance, representing the
        newly-created table.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> # Create a table using the fluent syntax for definition
        >>> from astrapy.constants import SortMode
        >>> from astrapy.info import (
        ...     CreateTableDefinition,
        ...     ColumnType,
        ... )
        >>> table_definition = (
        ...     CreateTableDefinition.builder()
        ...     .add_column("match_id", ColumnType.TEXT)
        ...     .add_column("round", ColumnType.INT)
        ...     .add_vector_column("m_vector", dimension=3)
        ...     .add_column("score", ColumnType.INT)
        ...     .add_column("when", ColumnType.TIMESTAMP)
        ...     .add_column("winner", ColumnType.TEXT)
        ...     .add_set_column("fighters", ColumnType.UUID)
        ...     .add_partition_by(["match_id"])
        ...     .add_partition_sort({"round": SortMode.ASCENDING})
        ...     .build()
        ... )
        >>> my_async_table = asyncio.run(async_database.create_table(
        ...     "games",
        ...     definition=table_definition,
        ... ))
        >>>
        >>> # Create a table with the definition as object
        >>> # (and do not raise an error if the table exists already)
        >>> from astrapy.info import (
        ...     CreateTableDefinition,
        ...     TablePrimaryKeyDescriptor,
        ...     TableScalarColumnTypeDescriptor,
        ...     TableValuedColumnType,
        ...     TableValuedColumnTypeDescriptor,
        ...     TableVectorColumnTypeDescriptor,
        ... )
        >>> table_definition_1 = CreateTableDefinition(
        ...     columns={
        ...         "match_id": TableScalarColumnTypeDescriptor(
        ...             ColumnType.TEXT,
        ...         ),
        ...         "round": TableScalarColumnTypeDescriptor(
        ...             ColumnType.INT,
        ...         ),
        ...         "m_vector": TableVectorColumnTypeDescriptor(
        ...             column_type="vector", dimension=3
        ...         ),
        ...         "score": TableScalarColumnTypeDescriptor(
        ...             ColumnType.INT,
        ...         ),
        ...         "when": TableScalarColumnTypeDescriptor(
        ...             ColumnType.TIMESTAMP,
        ...         ),
        ...         "winner": TableScalarColumnTypeDescriptor(
        ...             ColumnType.TEXT,
        ...         ),
        ...         "fighters": TableValuedColumnTypeDescriptor(
        ...             column_type=TableValuedColumnType.SET,
        ...             value_type=ColumnType.UUID,
        ...         ),
        ...     },
        ...     primary_key=TablePrimaryKeyDescriptor(
        ...         partition_by=["match_id"],
        ...         partition_sort={"round": SortMode.ASCENDING},
        ...     ),
        ... )
        >>> my_async_table_1 = asyncio.run(async_database.create_table(
        ...     "games",
        ...     definition=table_definition_1,
        ...     if_not_exists=True,
        ... ))
        >>>
        >>> # Create a table with the definition as plain dictionary
        >>> # (and do not raise an error if the table exists already)
        >>> table_definition_2 = {
        ...     "columns": {
        ...         "match_id": {"type": "text"},
        ...         "round": {"type": "int"},
        ...         "m_vector": {"type": "vector", "dimension": 3},
        ...         "score": {"type": "int"},
        ...         "when": {"type": "timestamp"},
        ...         "winner": {"type": "text"},
        ...         "fighters": {"type": "set", "valueType": "uuid"},
        ...     },
        ...     "primaryKey": {
        ...         "partitionBy": ["match_id"],
        ...         "partitionSort": {"round": 1},
        ...     },
        ... }
        >>> my_async_table_2 = asyncio.run(async_database.create_table(
        ...     "games",
        ...     definition=table_definition_2,
        ...     if_not_exists=True,
        ... ))
    """

    ct_options: dict[str, bool]
    if if_not_exists is not None:
        ct_options = {"ifNotExists": if_not_exists}
    else:
        ct_options = {}
    ct_definition: dict[str, Any] = CreateTableDefinition.coerce(
        definition
    ).as_dict()
    _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
        timeout_options=self.api_options.timeout_options,
        table_admin_timeout_ms=table_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    driver_commander = self._get_driver_commander(keyspace=keyspace)
    ct_payload = {
        "createTable": {
            k: v
            for k, v in {
                "name": name,
                "definition": ct_definition,
                "options": ct_options,
            }.items()
            if v is not None
            if v != {}
        }
    }
    logger.info(f"createTable('{name}')")
    ct_response = await driver_commander.async_request(
        payload=ct_payload,
        timeout_context=_TimeoutContext(
            request_ms=_table_admin_timeout_ms, label=_ta_label
        ),
    )
    if ct_response.get("status") != {"ok": 1}:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from createTable API command.",
            raw_response=ct_response,
        )
    logger.info(f"finished createTable('{name}')")
    return self.get_table(
        name,
        row_type=row_type,
        keyspace=keyspace,
        embedding_api_key=embedding_api_key,
        reranking_api_key=reranking_api_key,
        spawn_api_options=spawn_api_options,
    )

async def drop_collection(self, name: str, *, keyspace: str | None = None, collection_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> dict[str, typing.Any]

Drop a collection from the database, along with all documents therein.

Args

name: the name of the collection to drop.
keyspace: the keyspace where the collection resides. If not specified, the database working keyspace is assumed.
collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for collection_admin_timeout_ms.
timeout_ms: an alias for collection_admin_timeout_ms.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> asyncio.run(async_database.list_collection_names())
['a_collection', 'my_v_col', 'another_col']
>>> asyncio.run(async_database.drop_collection("my_v_col"))
>>> asyncio.run(async_database.list_collection_names())
['a_collection', 'another_col']

Expand source code

async def drop_collection(
    self,
    name: str,
    *,
    keyspace: str | None = None,
    collection_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> dict[str, Any]:
    """
    Drop a collection from the database, along with all documents therein.

    Args:
        name: the name of the collection to drop.
        keyspace: the keyspace where the collection resides. If not specified,
            the database working keyspace is assumed.
        collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `collection_admin_timeout_ms`.
        timeout_ms: an alias for `collection_admin_timeout_ms`.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> asyncio.run(async_database.list_collection_names())
        ['a_collection', 'my_v_col', 'another_col']
        >>> asyncio.run(async_database.drop_collection("my_v_col"))
        >>> asyncio.run(async_database.list_collection_names())
        ['a_collection', 'another_col']
    """

    _collection_admin_timeout_ms, _ca_label = _select_singlereq_timeout_ca(
        timeout_options=self.api_options.timeout_options,
        collection_admin_timeout_ms=collection_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    _keyspace = keyspace or self.keyspace
    driver_commander = self._get_driver_commander(keyspace=_keyspace)
    dc_payload = {"deleteCollection": {"name": name}}
    logger.info(f"deleteCollection('{name}')")
    dc_response = await driver_commander.async_request(
        payload=dc_payload,
        timeout_context=_TimeoutContext(
            request_ms=_collection_admin_timeout_ms, label=_ca_label
        ),
    )
    if dc_response.get("status") != {"ok": 1}:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from deleteCollection API command.",
            raw_response=dc_response,
        )
    logger.info(f"finished deleteCollection('{name}')")
    return dc_response.get("status", {})  # type: ignore[no-any-return]

Drop a table from the database, along with all rows therein and related indexes.

Args

name: the name of the table to drop.
keyspace: the keyspace where the table resides. If not specified, the database working keyspace is assumed.
if_exists: if passed as True, trying to drop a non-existing table will not error, just silently do nothing instead. If not provided, the API default behaviour will hold.
table_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for table_admin_timeout_ms.
timeout_ms: an alias for table_admin_timeout_ms.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> asyncio.run(async_database.list_table_names())
['fighters', 'games']
>>> asyncio.run(async_database.drop_table("fighters"))
>>> asyncio.run(async_database.list_table_names())
['games']
>>> # not erroring because of if_not_exists:
>>> asyncio.run(async_database.drop_table("fighters", if_not_exists=True))

Expand source code

async def drop_table(
    self,
    name: str,
    *,
    keyspace: str | None = None,
    if_exists: bool | None = None,
    table_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> dict[str, Any]:
    """
    Drop a table from the database, along with all rows therein and related indexes.

    Args:
        name: the name of the table to drop.
        keyspace: the keyspace where the table resides. If not specified,
            the database working keyspace is assumed.
        if_exists: if passed as True, trying to drop a non-existing table
            will not error, just silently do nothing instead. If not provided,
            the API default behaviour will hold.
        table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `table_admin_timeout_ms`.
        timeout_ms: an alias for `table_admin_timeout_ms`.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> asyncio.run(async_database.list_table_names())
        ['fighters', 'games']
        >>> asyncio.run(async_database.drop_table("fighters"))
        >>> asyncio.run(async_database.list_table_names())
        ['games']
        >>> # not erroring because of if_not_exists:
        >>> asyncio.run(async_database.drop_table("fighters", if_not_exists=True))
    """

    _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
        timeout_options=self.api_options.timeout_options,
        table_admin_timeout_ms=table_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    _keyspace = keyspace or self.keyspace
    dt_options: dict[str, bool]
    if if_exists is not None:
        dt_options = {"ifExists": if_exists}
    else:
        dt_options = {}
    driver_commander = self._get_driver_commander(keyspace=_keyspace)
    dt_payload = {
        "dropTable": {
            k: v
            for k, v in {
                "name": name,
                "options": dt_options,
            }.items()
            if v is not None
            if v != {}
        }
    }
    logger.info(f"dropTable('{name}')")
    dt_response = await driver_commander.async_request(
        payload=dt_payload,
        timeout_context=_TimeoutContext(
            request_ms=_table_admin_timeout_ms, label=_ta_label
        ),
    )
    if dt_response.get("status") != {"ok": 1}:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from dropTable API command.",
            raw_response=dt_response,
        )
    logger.info(f"finished dropTable('{name}')")
    return dt_response.get("status", {})  # type: ignore[no-any-return]

Drops (deletes) an index (of any kind) from the table it is associated to.

This is a blocking operation: the method returns once the index is deleted.

Note

Although associated to a table, index names are unique across a keyspace. For this reason, no table name is required in this call.

Args

name: the name of the index.
keyspace: the keyspace to which the index belongs. If not specified, the general setting for this database is used.
if_exists: if passed as True, trying to drop a non-existing index will not error, just silently do nothing instead. If not provided, the API default behaviour will hold.
table_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for table_admin_timeout_ms.
timeout_ms: an alias for table_admin_timeout_ms.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> # Drop an index from the keyspace:
>>> await async_database.drop_table_index("score_index")
>>> # Drop an index, unless it does not exist already:
>>> await async_database.drop_table_index("score_index", if_exists=True)

Expand source code

async def drop_table_index(
    self,
    name: str,
    *,
    keyspace: str | None = None,
    if_exists: bool | None = None,
    table_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Drops (deletes) an index (of any kind) from the table it is associated to.

    This is a blocking operation: the method returns once the index
    is deleted.

    Note:
        Although associated to a table, index names are unique across a keyspace.
        For this reason, no table name is required in this call.

    Args:
        name: the name of the index.
        keyspace: the keyspace to which the index belongs.
            If not specified, the general setting for this database is used.
        if_exists: if passed as True, trying to drop a non-existing index
            will not error, just silently do nothing instead. If not provided,
            the API default behaviour will hold.
        table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `table_admin_timeout_ms`.
        timeout_ms: an alias for `table_admin_timeout_ms`.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> # Drop an index from the keyspace:
        >>> await async_database.drop_table_index("score_index")
        >>> # Drop an index, unless it does not exist already:
        >>> await async_database.drop_table_index("score_index", if_exists=True)
    """

    _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
        timeout_options=self.api_options.timeout_options,
        table_admin_timeout_ms=table_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    di_options: dict[str, bool]
    if if_exists is not None:
        di_options = {"ifExists": if_exists}
    else:
        di_options = {}
    di_payload = {
        "dropIndex": {
            k: v
            for k, v in {
                "name": name,
                "options": di_options,
            }.items()
            if v is not None
            if v != {}
        }
    }
    driver_commander = self._get_driver_commander(keyspace=keyspace)
    logger.info(f"dropIndex('{name}')")
    di_response = await driver_commander.async_request(
        payload=di_payload,
        timeout_context=_TimeoutContext(
            request_ms=_table_admin_timeout_ms, label=_ta_label
        ),
    )
    if di_response.get("status") != {"ok": 1}:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from dropIndex API command.",
            raw_response=di_response,
        )
    logger.info(f"finished dropIndex('{name}')")

def get_collection(self, name: str, *, document_type: type[Any] = typing.Dict[str, typing.Any], keyspace: str | None = None, embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = (unset), reranking_api_key: str | RerankingHeadersProvider | UnsetType = (unset), spawn_api_options: APIOptions | UnsetType = (unset)) ‑> AsyncCollection[DOC]

Spawn an AsyncCollection object instance representing a collection on this database.

Creating an AsyncCollection instance does not have any effect on the actual state of the database: in other words, for the created AsyncCollection instance to be used meaningfully, the collection must exist already (for instance, it should have been created previously by calling the create_collection method).

Args

name: the name of the collection.
document_type: this parameter acts a formal specifier for the type checker. If omitted, the resulting AsyncCollection is implicitly an AsyncCollection[dict[str, Any]]. If provided, it must match the type hint specified in the assignment. See the examples below.
keyspace: the keyspace containing the collection. If no keyspace is specified, the setting for this database is used.
embedding_api_key: optional API key(s) for interacting with the collection. If an embedding service is configured, and this parameter is not None, each Data API call will include the necessary embedding-related headers as specified by this parameter. If a string is passed, it translates into the one "embedding api key" header (i.e. EmbeddingAPIKeyHeaderProvider). For some vectorize providers/models, if using header-based authentication, specialized subclasses of EmbeddingHeadersProvider should be supplied.
reranking_api_key: optional API key(s) for interacting with the collection. If a reranker is configured for the collection, and this parameter is not None, Data API calls will include the appropriate reranker-related headers according to this parameter. Reranker services may not necessarily require this setting (e.g. if the service needs no authentication, or one is configured as part of the collection definition relying on a "shared secret"). If a string is passed, it is translated into an instance of RerankingAPIKeyHeaderProvider.
spawn_api_options: a specification - complete or partial - of the API Options to override the defaults inherited from the Database. This allows for a deeper configuration of the collection, e.g. concerning timeouts; if this is passed together with the named timeout parameters, the latter will take precedence in their respective settings.

Returns

an AsyncCollection instance, representing the desired collection (but without any form of validation).

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> async def count_docs(adb: AsyncDatabase, c_name: str) -> int:
...    async_col = adb.get_collection(c_name)
...    return await async_col.count_documents({}, upper_bound=100)
...
>>> asyncio.run(count_docs(async_database, "my_collection"))
45

Note: the attribute and indexing syntax forms achieve the same effect as this method, returning an AsyncCollection. In other words, the following are equivalent: async_database.get_collection("coll_name") async_database.coll_name async_database["coll_name"]

Expand source code

def get_collection(
    self,
    name: str,
    *,
    document_type: type[Any] = DefaultDocumentType,
    keyspace: str | None = None,
    embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
    reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
    spawn_api_options: APIOptions | UnsetType = _UNSET,
) -> AsyncCollection[DOC]:
    """
    Spawn an `AsyncCollection` object instance representing a collection
    on this database.

    Creating an `AsyncCollection` instance does not have any effect on the
    actual state of the database: in other words, for the created
    `AsyncCollection` instance to be used meaningfully, the collection
    must exist already (for instance, it should have been created
    previously by calling the `create_collection` method).

    Args:
        name: the name of the collection.
        document_type: this parameter acts a formal specifier for the type checker.
            If omitted, the resulting AsyncCollection is implicitly
            an `AsyncCollection[dict[str, Any]]`. If provided, it must match the
            type hint specified in the assignment.
            See the examples below.
        keyspace: the keyspace containing the collection. If no keyspace
            is specified, the setting for this database is used.
        embedding_api_key: optional API key(s) for interacting with the collection.
            If an embedding service is configured, and this parameter is not None,
            each Data API call will include the necessary embedding-related headers
            as specified by this parameter. If a string is passed, it translates
            into the one "embedding api key" header
            (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
            For some vectorize providers/models, if using header-based
            authentication, specialized subclasses of
            `astrapy.authentication.EmbeddingHeadersProvider` should be supplied.
        reranking_api_key: optional API key(s) for interacting with the collection.
            If a reranker is configured for the collection, and this parameter
            is not None, Data API calls will include the appropriate
            reranker-related headers according to this parameter. Reranker services
            may not necessarily require this setting (e.g. if the service needs no
            authentication, or one is configured as part of the collection
            definition relying on a "shared secret").
            If a string is passed, it is translated into an instance of
            `astrapy.authentication.RerankingAPIKeyHeaderProvider`.
        spawn_api_options: a specification - complete or partial - of the
            API Options to override the defaults inherited from the Database.
            This allows for a deeper configuration of the collection, e.g.
            concerning timeouts; if this is passed together with
            the named timeout parameters, the latter will take precedence
            in their respective settings.

    Returns:
        an `AsyncCollection` instance, representing the desired collection
            (but without any form of validation).

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> async def count_docs(adb: AsyncDatabase, c_name: str) -> int:
        ...    async_col = adb.get_collection(c_name)
        ...    return await async_col.count_documents({}, upper_bound=100)
        ...
        >>> asyncio.run(count_docs(async_database, "my_collection"))
        45

    Note: the attribute and indexing syntax forms achieve the same effect
        as this method, returning an AsyncCollection.
        In other words, the following are equivalent:
            async_database.get_collection("coll_name")
            async_database.coll_name
            async_database["coll_name"]
    """

    # lazy importing here against circular-import error
    from astrapy.collection import AsyncCollection

    resulting_api_options = self.api_options.with_override(
        spawn_api_options,
    ).with_override(
        APIOptions(
            embedding_api_key=embedding_api_key,
            reranking_api_key=reranking_api_key,
        ),
    )

    _keyspace = keyspace or self.keyspace
    if _keyspace is None:
        raise ValueError(
            "No keyspace specified. This operation requires a keyspace to "
            "be set, e.g. through the `use_keyspace` method."
        )
    return AsyncCollection(
        database=self,
        name=name,
        keyspace=_keyspace,
        api_options=resulting_api_options,
    )

def get_database_admin(self, *, token: str | TokenProvider | UnsetType = (unset), spawn_api_options: APIOptions | UnsetType = (unset)) ‑> DatabaseAdmin

Return a DatabaseAdmin object corresponding to this database, for use in admin tasks such as managing keyspaces.

This method, depending on the environment where the database resides, returns an appropriate subclass of DatabaseAdmin.

Args

token: an access token with enough permission on the database to perform the desired tasks. If omitted (as it can generally be done), the token of this Database is used. This can be either a literal token string or a subclass of TokenProvider.
spawn_api_options: a specification - complete or partial - of the API Options to override the defaults. This allows for a deeper configuration of the database admin, e.g. concerning timeouts; if this is passed together with the equivalent named parameters, the latter will take precedence in their respective settings.

Returns

A DatabaseAdmin instance targeting this database. More precisely, for Astra DB an instance of AstraDBDatabaseAdmin is returned; for other environments, an instance of DataAPIDatabaseAdmin is returned.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> my_db_admin = async_database.get_database_admin()
>>> if "new_keyspace" not in my_db_admin.list_keyspaces():
...     my_db_admin.create_keyspace("new_keyspace")
>>> my_db_admin.list_keyspaces()
['default_keyspace', 'new_keyspace']

Expand source code

def get_database_admin(
    self,
    *,
    token: str | TokenProvider | UnsetType = _UNSET,
    spawn_api_options: APIOptions | UnsetType = _UNSET,
) -> DatabaseAdmin:
    """
    Return a DatabaseAdmin object corresponding to this database, for
    use in admin tasks such as managing keyspaces.

    This method, depending on the environment where the database resides,
    returns an appropriate subclass of DatabaseAdmin.

    Args:
        token: an access token with enough permission on the database to
            perform the desired tasks. If omitted (as it can generally be done),
            the token of this Database is used.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        spawn_api_options: a specification - complete or partial - of the
            API Options to override the defaults.
            This allows for a deeper configuration of the database admin, e.g.
            concerning timeouts; if this is passed together with
            the equivalent named parameters, the latter will take precedence
            in their respective settings.

    Returns:
        A DatabaseAdmin instance targeting this database. More precisely,
        for Astra DB an instance of `AstraDBDatabaseAdmin` is returned;
        for other environments, an instance of `DataAPIDatabaseAdmin` is returned.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> my_db_admin = async_database.get_database_admin()
        >>> if "new_keyspace" not in my_db_admin.list_keyspaces():
        ...     my_db_admin.create_keyspace("new_keyspace")
        >>> my_db_admin.list_keyspaces()
        ['default_keyspace', 'new_keyspace']
    """

    # lazy importing here to avoid circular dependency
    from astrapy.admin.admin import AstraDBDatabaseAdmin, DataAPIDatabaseAdmin

    arg_api_options = APIOptions(
        token=token,
    )
    api_options = self.api_options.with_override(spawn_api_options).with_override(
        arg_api_options
    )

    if api_options.environment in Environment.astra_db_values:
        if parse_api_endpoint(self.api_endpoint) is None:
            raise InvalidEnvironmentException(
                "Cannot use a nonstandard API endpoint for this operation."
            )
        return AstraDBDatabaseAdmin(
            api_endpoint=self.api_endpoint,
            api_options=api_options,
            spawner_database=self,
        )
    else:
        return DataAPIDatabaseAdmin(
            api_endpoint=self.api_endpoint,
            api_options=api_options,
            spawner_database=self,
        )

def get_table(self, name: str, *, row_type: type[Any] = typing.Dict[str, typing.Any], keyspace: str | None = None, embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = (unset), reranking_api_key: str | RerankingHeadersProvider | UnsetType = (unset), spawn_api_options: APIOptions | UnsetType = (unset)) ‑> AsyncTable[ROW]

Spawn an AsyncTable object instance representing a table on this database.

Creating a AsyncTable instance does not have any effect on the actual state of the database: in other words, for the created AsyncTable instance to be used meaningfully, the table must exist already (for instance, it should have been created previously by calling the create_table method).

Args

name: the name of the table.
row_type: this parameter acts a formal specifier for the type checker. If omitted, the resulting AsyncTable is implicitly an AsyncTable[dict[str, Any]]. If provided, it must match the type hint specified in the assignment. See the examples below.
keyspace: the keyspace containing the table. If no keyspace is specified, the general setting for this database is used.
embedding_api_key: optional API key(s) for interacting with the table. If an embedding service is configured, and this parameter is not None, each Data API call will include the necessary embedding-related headers as specified by this parameter. If a string is passed, it translates into the one "embedding api key" header (i.e. EmbeddingAPIKeyHeaderProvider). For some vectorize providers/models, if using header-based authentication, specialized subclasses of EmbeddingHeadersProvider should be supplied.
reranking_api_key: optional API key(s) for interacting with the table. If a reranker is configured for the table, and this parameter is not None, Data API calls will include the appropriate reranker-related headers according to this parameter. Reranker services may not necessarily require this setting (e.g. if the service needs no authentication, or one is configured as part of the table definition relying on a "shared secret"). If a string is passed, it is translated into an instance of RerankingAPIKeyHeaderProvider.
spawn_api_options: a specification - complete or partial - of the API Options to override the defaults inherited from the Database. This allows for a deeper configuration of the table, e.g. concerning timeouts; if this is passed together with the named timeout parameters, the latter will take precedence in their respective settings.

Returns

an AsyncTable instance, representing the desired table (but without any form of validation).

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> # Get an AsyncTable object (and read a property of it as an example):
>>> my_async_table = async_database.get_table("games")
>>> my_async_table.full_name
'default_keyspace.games'
>>>
>>> # Get an AsyncTable object in a specific keyspace,
>>> # and set an embedding API key to it:
>>> my_other_async_table = async_database.get_table(
...     "tournaments",
...     keyspace="the_other_keyspace",
...     embedding_api_key="secret-012abc...",
... )
>>> from astrapy import AsyncTable
>>> MyCustomDictType = dict[str, int]
>>>
>>> # Get an AsyncTable object typed with a specific type for its rows:
>>> my_typed_async_table: AsyncTable[MyCustomDictType] = async_database.get_table(
...     "games",
...     row_type=MyCustomDictType,
... )

Expand source code

def get_table(
    self,
    name: str,
    *,
    row_type: type[Any] = DefaultRowType,
    keyspace: str | None = None,
    embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
    reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
    spawn_api_options: APIOptions | UnsetType = _UNSET,
) -> AsyncTable[ROW]:
    """
    Spawn an `AsyncTable` object instance representing a table
    on this database.

    Creating a `AsyncTable` instance does not have any effect on the
    actual state of the database: in other words, for the created
    `AsyncTable` instance to be used meaningfully, the table
    must exist already (for instance, it should have been created
    previously by calling the `create_table` method).

    Args:
        name: the name of the table.
        row_type: this parameter acts a formal specifier for the type checker.
            If omitted, the resulting AsyncTable is implicitly
            an `AsyncTable[dict[str, Any]]`. If provided, it must match
            the type hint specified in the assignment.
            See the examples below.
        keyspace: the keyspace containing the table. If no keyspace
            is specified, the general setting for this database is used.
        embedding_api_key: optional API key(s) for interacting with the table.
            If an embedding service is configured, and this parameter is not None,
            each Data API call will include the necessary embedding-related headers
            as specified by this parameter. If a string is passed, it translates
            into the one "embedding api key" header
            (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
            For some vectorize providers/models, if using header-based
            authentication, specialized subclasses of
            `astrapy.authentication.EmbeddingHeadersProvider` should be supplied.
        reranking_api_key: optional API key(s) for interacting with the table.
            If a reranker is configured for the table, and this parameter
            is not None, Data API calls will include the appropriate
            reranker-related headers according to this parameter. Reranker services
            may not necessarily require this setting (e.g. if the service needs no
            authentication, or one is configured as part of the table
            definition relying on a "shared secret").
            If a string is passed, it is translated into an instance of
            `astrapy.authentication.RerankingAPIKeyHeaderProvider`.
        spawn_api_options: a specification - complete or partial - of the
            API Options to override the defaults inherited from the Database.
            This allows for a deeper configuration of the table, e.g.
            concerning timeouts; if this is passed together with
            the named timeout parameters, the latter will take precedence
            in their respective settings.

    Returns:
        an `AsyncTable` instance, representing the desired table
            (but without any form of validation).

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> # Get an AsyncTable object (and read a property of it as an example):
        >>> my_async_table = async_database.get_table("games")
        >>> my_async_table.full_name
        'default_keyspace.games'
        >>>
        >>> # Get an AsyncTable object in a specific keyspace,
        >>> # and set an embedding API key to it:
        >>> my_other_async_table = async_database.get_table(
        ...     "tournaments",
        ...     keyspace="the_other_keyspace",
        ...     embedding_api_key="secret-012abc...",
        ... )
        >>> from astrapy import AsyncTable
        >>> MyCustomDictType = dict[str, int]
        >>>
        >>> # Get an AsyncTable object typed with a specific type for its rows:
        >>> my_typed_async_table: AsyncTable[MyCustomDictType] = async_database.get_table(
        ...     "games",
        ...     row_type=MyCustomDictType,
        ... )
    """

    # lazy importing here against circular-import error
    from astrapy.table import AsyncTable

    resulting_api_options = self.api_options.with_override(
        spawn_api_options,
    ).with_override(
        APIOptions(
            embedding_api_key=embedding_api_key,
            reranking_api_key=reranking_api_key,
        ),
    )

    _keyspace = keyspace or self.keyspace
    if _keyspace is None:
        raise ValueError(
            "No keyspace specified. This operation requires a keyspace to "
            "be set, e.g. through the `use_keyspace` method."
        )
    return AsyncTable[ROW](
        database=self,
        name=name,
        keyspace=_keyspace,
        api_options=resulting_api_options,
    )

async def info(self, *, database_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> AstraDBDatabaseInfo

Additional information on the database as a AstraDBDatabaseInfo instance.

Some of the returned properties are dynamic throughout the lifetime of the database (such as raw_info["keyspaces"]). For this reason, each invocation of this method triggers a new request to the DevOps API.

Not available outside of Astra DB and when using custom domains.

Args

database_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for database_admin_timeout_ms.
timeout_ms: an alias for database_admin_timeout_ms.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> asyncio.run(async_database.info()).region
'eu-west-1'
>>> asyncio.run(
...     async_database.info()
... ).raw_info['datacenters'][0]['dateCreated']
'2023-01-30T12:34:56Z'

Note

see the AstraDBDatabaseInfo documentation for a caveat about the difference between the region and the raw["region"] attributes.

Expand source code

async def info(
    self,
    *,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> AstraDBDatabaseInfo:
    """
    Additional information on the database as a AstraDBDatabaseInfo instance.

    Some of the returned properties are dynamic throughout the lifetime
    of the database (such as raw_info["keyspaces"]). For this reason,
    each invocation of this method triggers a new request to the DevOps API.

    Not available outside of Astra DB and when using custom domains.

    Args:
        database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `database_admin_timeout_ms`.
        timeout_ms: an alias for `database_admin_timeout_ms`.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> asyncio.run(async_database.info()).region
        'eu-west-1'
        >>> asyncio.run(
        ...     async_database.info()
        ... ).raw_info['datacenters'][0]['dateCreated']
        '2023-01-30T12:34:56Z'

    Note:
        see the AstraDBDatabaseInfo documentation for a caveat about the difference
        between the `region` and the `raw["region"]` attributes.
    """

    if self.api_options.environment not in Environment.astra_db_values:
        raise InvalidEnvironmentException(
            "Environments outside of Astra DB are not supported."
        )
    elif parse_api_endpoint(self.api_endpoint) is None:
        raise InvalidEnvironmentException(
            "Cannot inspect a nonstandard API endpoint for properties."
        )

    _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
        timeout_options=self.api_options.timeout_options,
        database_admin_timeout_ms=database_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info("getting database info")
    database_info = await async_fetch_database_info(
        self.api_endpoint,
        keyspace=self.keyspace,
        request_timeout_ms=_database_admin_timeout_ms,
        api_options=self.api_options,
    )
    if database_info is not None:
        logger.info("finished getting database info")
        return database_info
    else:
        raise DevOpsAPIException("Failure while fetching database info.")

async def list_collection_names(self, *, keyspace: str | None = None, collection_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> list[str]

List the names of all collections in a given keyspace of this database.

Args

keyspace: the keyspace to be inspected. If not specified, the general setting for this database is assumed.
collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for collection_admin_timeout_ms.
timeout_ms: an alias for collection_admin_timeout_ms.

Returns

a list of the collection names as strings, in no particular order.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> asyncio.run(async_database.list_collection_names())
['a_collection', 'another_col']

Expand source code

async def list_collection_names(
    self,
    *,
    keyspace: str | None = None,
    collection_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> list[str]:
    """
    List the names of all collections in a given keyspace of this database.

    Args:
        keyspace: the keyspace to be inspected. If not specified,
            the general setting for this database is assumed.
        collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `collection_admin_timeout_ms`.
        timeout_ms: an alias for `collection_admin_timeout_ms`.

    Returns:
        a list of the collection names as strings, in no particular order.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> asyncio.run(async_database.list_collection_names())
        ['a_collection', 'another_col']
    """

    _collection_admin_timeout_ms, _ca_label = _select_singlereq_timeout_ca(
        timeout_options=self.api_options.timeout_options,
        collection_admin_timeout_ms=collection_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    driver_commander = self._get_driver_commander(keyspace=keyspace)
    gc_payload: dict[str, Any] = {"findCollections": {}}
    logger.info("findCollections")
    gc_response = await driver_commander.async_request(
        payload=gc_payload,
        timeout_context=_TimeoutContext(
            request_ms=_collection_admin_timeout_ms, label=_ca_label
        ),
    )
    if "collections" not in gc_response.get("status", {}):
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from findCollections API command.",
            raw_response=gc_response,
        )
    else:
        logger.info("finished findCollections")
        return gc_response["status"]["collections"]  # type: ignore[no-any-return]

async def list_collections(self, *, keyspace: str | None = None, collection_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> list[CollectionDescriptor]

List all collections in a given keyspace for this database.

Args

keyspace: the keyspace to be inspected. If not specified, the general setting for this database is assumed.
collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for collection_admin_timeout_ms.
timeout_ms: an alias for collection_admin_timeout_ms.

Returns

a list of CollectionDescriptor instances one for each collection.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> async def a_list_colls(adb: AsyncDatabase) -> None:
...     a_coll_list = await adb.list_collections()
...     print("* list:", a_coll_list)
...     for coll in await adb.list_collections():
...         print("* coll:", coll)
...
>>> asyncio.run(a_list_colls(async_database))
* list: [CollectionDescriptor(name='my_v_col', options=CollectionDefinition())]
* coll: CollectionDescriptor(name='my_v_col', options=CollectionDefinition())

Expand source code

async def list_collections(
    self,
    *,
    keyspace: str | None = None,
    collection_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> list[CollectionDescriptor]:
    """
    List all collections in a given keyspace for this database.

    Args:
        keyspace: the keyspace to be inspected. If not specified,
            the general setting for this database is assumed.
        collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `collection_admin_timeout_ms`.
        timeout_ms: an alias for `collection_admin_timeout_ms`.

    Returns:
        a list of CollectionDescriptor instances one for each collection.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> async def a_list_colls(adb: AsyncDatabase) -> None:
        ...     a_coll_list = await adb.list_collections()
        ...     print("* list:", a_coll_list)
        ...     for coll in await adb.list_collections():
        ...         print("* coll:", coll)
        ...
        >>> asyncio.run(a_list_colls(async_database))
        * list: [CollectionDescriptor(name='my_v_col', options=CollectionDefinition())]
        * coll: CollectionDescriptor(name='my_v_col', options=CollectionDefinition())
    """

    _collection_admin_timeout_ms, _ca_label = _select_singlereq_timeout_ca(
        timeout_options=self.api_options.timeout_options,
        collection_admin_timeout_ms=collection_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    return await self._list_collections_ctx(
        keyspace=keyspace,
        timeout_context=_TimeoutContext(
            request_ms=_collection_admin_timeout_ms, label=_ca_label
        ),
    )

async def list_table_names(self, *, keyspace: str | None = None, table_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> list[str]

List the names of all tables in a given keyspace of this database.

Args

keyspace: the keyspace to be inspected. If not specified, the general setting for this database is assumed.
table_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for table_admin_timeout_ms.
timeout_ms: an alias for table_admin_timeout_ms.

Returns

a list of the table names as strings, in no particular order.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> async def destroy_temp_table(async_db: AsyncDatabase) -> None:
...     print(await async_db.list_table_names())
...     await async_db.drop_table("my_v_tab")
...     print(await async_db.list_table_names())
...
>>> asyncio.run(destroy_temp_table(async_database))
['fighters', 'my_v_tab', 'games']
['fighters', 'games']

Expand source code

async def list_table_names(
    self,
    *,
    keyspace: str | None = None,
    table_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> list[str]:
    """
    List the names of all tables in a given keyspace of this database.

    Args:
        keyspace: the keyspace to be inspected. If not specified,
            the general setting for this database is assumed.
        table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `table_admin_timeout_ms`.
        timeout_ms: an alias for `table_admin_timeout_ms`.

    Returns:
        a list of the table names as strings, in no particular order.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> async def destroy_temp_table(async_db: AsyncDatabase) -> None:
        ...     print(await async_db.list_table_names())
        ...     await async_db.drop_table("my_v_tab")
        ...     print(await async_db.list_table_names())
        ...
        >>> asyncio.run(destroy_temp_table(async_database))
        ['fighters', 'my_v_tab', 'games']
        ['fighters', 'games']
    """

    _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
        timeout_options=self.api_options.timeout_options,
        table_admin_timeout_ms=table_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    driver_commander = self._get_driver_commander(keyspace=keyspace)
    lt_payload: dict[str, Any] = {"listTables": {}}
    logger.info("listTables")
    lt_response = await driver_commander.async_request(
        payload=lt_payload,
        timeout_context=_TimeoutContext(
            request_ms=_table_admin_timeout_ms, label=_ta_label
        ),
    )
    if "tables" not in lt_response.get("status", {}):
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from listTables API command.",
            raw_response=lt_response,
        )
    else:
        logger.info("finished listTables")
        return lt_response["status"]["tables"]  # type: ignore[no-any-return]

async def list_tables(self, *, keyspace: str | None = None, table_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> list[ListTableDescriptor]

List all tables in a given keyspace for this database.

Args

keyspace: the keyspace to be inspected. If not specified, the general setting for this database is assumed.
table_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for table_admin_timeout_ms.
timeout_ms: an alias for table_admin_timeout_ms.

Returns

a list of ListTableDescriptor instances, one for each table.

Example

>>> tables = asyncio.run(my_async_database.list_tables())
>>> tables
[ListTableDescriptor(name='fighters', definition=ListTableDefinition(...
>>> tables[1].name
'games'
>>> tables[1].definition.columns
{'match_id': TableScalarColumnTypeDescriptor(ColumnType.TEXT),...
>>> tables[1].definition.columns['score']
TableScalarColumnTypeDescriptor(ColumnType.INT)
>>> tables[1].definition.primary_key.partition_by
['match_id']
>>> tables[1].definition.primary_key.partition_sort
{'round': 1}

Expand source code

async def list_tables(
    self,
    *,
    keyspace: str | None = None,
    table_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> list[ListTableDescriptor]:
    """
    List all tables in a given keyspace for this database.

    Args:
        keyspace: the keyspace to be inspected. If not specified,
            the general setting for this database is assumed.
        table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `table_admin_timeout_ms`.
        timeout_ms: an alias for `table_admin_timeout_ms`.

    Returns:
        a list of ListTableDescriptor instances, one for each table.

    Example:
        >>> tables = asyncio.run(my_async_database.list_tables())
        >>> tables
        [ListTableDescriptor(name='fighters', definition=ListTableDefinition(...
        >>> tables[1].name
        'games'
        >>> tables[1].definition.columns
        {'match_id': TableScalarColumnTypeDescriptor(ColumnType.TEXT),...
        >>> tables[1].definition.columns['score']
        TableScalarColumnTypeDescriptor(ColumnType.INT)
        >>> tables[1].definition.primary_key.partition_by
        ['match_id']
        >>> tables[1].definition.primary_key.partition_sort
        {'round': 1}
    """

    _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
        timeout_options=self.api_options.timeout_options,
        table_admin_timeout_ms=table_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    return await self._list_tables_ctx(
        keyspace=keyspace,
        timeout_context=_TimeoutContext(
            request_ms=_table_admin_timeout_ms, label=_ta_label
        ),
    )

async def name(self) ‑> str

The name of this database. Note that this bears no unicity guarantees.

Calling this method the first time involves a request to the DevOps API (the resulting database name is then cached). See the astrapy.info method for more details.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> asyncio.run(async_database.name())
'the_application_database'

Expand source code

async def name(self) -> str:
    """
    The name of this database. Note that this bears no unicity guarantees.

    Calling this method the first time involves a request
    to the DevOps API (the resulting database name is then cached).
    See the `info()` method for more details.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> asyncio.run(async_database.name())
        'the_application_database'
    """

    if self._name is None:
        self._name = (await self.info()).name
    return self._name

def to_sync(self, *, keyspace: str | None = None, token: str | TokenProvider | UnsetType = (unset), api_options: APIOptions | UnsetType = (unset)) ‑> Database

Create a (synchronous) Database from this one. Save for the arguments explicitly provided as overrides, everything else is kept identical to this database in the copy.

Args

keyspace: this is the keyspace all method calls will target, unless one is explicitly specified in the call. If no keyspace is supplied when creating a Database, the name "default_keyspace" is set.
token: an Access Token to the database. Example: "AstraCS:xyz…" This can be either a literal token string or a subclass of TokenProvider.
api_options: any additional options to set for the result, in the form of an APIOptions instance (where one can set just the needed attributes). In case the same setting is also provided as named parameter, the latter takes precedence.

Returns

the new copy, a Database instance.

Example

>>> my_sync_db = async_database.to_sync()
>>> my_sync_db.list_collection_names()
['a_collection', 'another_collection']

Expand source code

def to_sync(
    self,
    *,
    keyspace: str | None = None,
    token: str | TokenProvider | UnsetType = _UNSET,
    api_options: APIOptions | UnsetType = _UNSET,
) -> Database:
    """
    Create a (synchronous) Database from this one. Save for the arguments
    explicitly provided as overrides, everything else is kept identical
    to this database in the copy.

    Args:
        keyspace: this is the keyspace all method calls will target, unless
            one is explicitly specified in the call. If no keyspace is supplied
            when creating a Database, the name "default_keyspace" is set.
        token: an Access Token to the database. Example: "AstraCS:xyz..."
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        api_options: any additional options to set for the result, in the form of
            an APIOptions instance (where one can set just the needed attributes).
            In case the same setting is also provided as named parameter,
            the latter takes precedence.

    Returns:
        the new copy, a `Database` instance.

    Example:
        >>> my_sync_db = async_database.to_sync()
        >>> my_sync_db.list_collection_names()
        ['a_collection', 'another_collection']
    """

    arg_api_options = APIOptions(
        token=token,
    )
    final_api_options = self.api_options.with_override(api_options).with_override(
        arg_api_options
    )
    return Database(
        api_endpoint=self.api_endpoint,
        keyspace=keyspace or self.keyspace,
        api_options=final_api_options,
    )

def use_keyspace(self, keyspace: str) ‑> None

Switch to a new working keyspace for this database. This method changes (mutates) the AsyncDatabase instance.

Note that this method does not create the keyspace, which should exist already (created for instance with a DatabaseAdmin.async_create_keyspace call).

Args

keyspace: the new keyspace to use as the database working keyspace.

Returns

None.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> asyncio.run(async_database.list_collection_names())
['coll_1', 'coll_2']
>>> async_database.use_keyspace("an_empty_keyspace")
>>> asyncio.run(async_database.list_collection_names())
[]

Expand source code

def use_keyspace(self, keyspace: str) -> None:
    """
    Switch to a new working keyspace for this database.
    This method changes (mutates) the AsyncDatabase instance.

    Note that this method does not create the keyspace, which should exist
    already (created for instance with a `DatabaseAdmin.async_create_keyspace` call).

    Args:
        keyspace: the new keyspace to use as the database working keyspace.

    Returns:
        None.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> asyncio.run(async_database.list_collection_names())
        ['coll_1', 'coll_2']
        >>> async_database.use_keyspace("an_empty_keyspace")
        >>> asyncio.run(async_database.list_collection_names())
        []
    """
    logger.info(f"switching to keyspace '{keyspace}'")
    self._using_keyspace = keyspace
    self._api_commander = self._get_api_commander(keyspace=self.keyspace)

def with_options(self, *, keyspace: str | None = None, token: str | TokenProvider | UnsetType = (unset), api_options: APIOptions | UnsetType = (unset)) ‑> AsyncDatabase

Create a clone of this database with some changed attributes.

Args

keyspace: this is the keyspace all method calls will target, unless one is explicitly specified in the call. If no keyspace is supplied when creating a Database, the name "default_keyspace" is set.
token: an Access Token to the database. Example: "AstraCS:xyz...". This can be either a literal token string or a subclass of TokenProvider.
api_options: any additional options to set for the clone, in the form of an APIOptions instance (where one can set just the needed attributes). In case the same setting is also provided as named parameter, the latter takes precedence.

Returns

a new AsyncDatabase instance.

Example

>>> async_database_2 = async_database.with_options(
...     keyspace="the_other_keyspace",
...     token="AstraCS:xyz...",
... )

Expand source code

def with_options(
    self,
    *,
    keyspace: str | None = None,
    token: str | TokenProvider | UnsetType = _UNSET,
    api_options: APIOptions | UnsetType = _UNSET,
) -> AsyncDatabase:
    """
    Create a clone of this database with some changed attributes.

    Args:
        keyspace: this is the keyspace all method calls will target, unless
            one is explicitly specified in the call. If no keyspace is supplied
            when creating a Database, the name "default_keyspace" is set.
        token: an Access Token to the database. Example: `"AstraCS:xyz..."`.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        api_options: any additional options to set for the clone, in the form of
            an APIOptions instance (where one can set just the needed attributes).
            In case the same setting is also provided as named parameter,
            the latter takes precedence.

    Returns:
        a new `AsyncDatabase` instance.

    Example:
        >>> async_database_2 = async_database.with_options(
        ...     keyspace="the_other_keyspace",
        ...     token="AstraCS:xyz...",
        ... )
    """

    return self._copy(
        keyspace=keyspace,
        token=token,
        api_options=api_options,
    )

class AsyncTable (*, database: AsyncDatabase, name: str, keyspace: str | None, api_options: FullAPIOptions)

A Data API table, the object to interact with the Data API for structured data, especially for DDL operations. This class has an asynchronous interface for use with asyncio.

This class is not meant for direct instantiation by the user, rather it is obtained by invoking methods such as get_table of AsyncDatabase, wherefrom the AsyncTable inherits its API options such as authentication token and API endpoint. In order to create a table, instead, one should call the create_table method of AsyncDatabase, providing a table definition parameter that can be built in different ways (see the CreateTableDefinition object and examples below).

Args

database: an AsyncDatabase object, instantiated earlier. This represents the database the table belongs to.
name: the table name. This parameter should match an existing table on the database.
keyspace: this is the keyspace to which the table belongs. If nothing is specified, the database's working keyspace is used.
api_options: a complete specification of the API Options for this instance.

Examples

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> from astrapy import DataAPIClient, AsyncTable
>>> client = astrapy.DataAPIClient()
>>> async_database = client.get_async_database(
...     "https://01234567-....apps.astra.datastax.com",
...     token="AstraCS:..."
... )

>>> # Create a table using the fluent syntax for definition
>>> from astrapy.constants import SortMode
>>> from astrapy.info import (
...     CreateTableDefinition,
...     ColumnType,
... )
>>> table_definition = (
...     CreateTableDefinition.builder()
...     .add_column("match_id", ColumnType.TEXT)
...     .add_column("round", ColumnType.INT)
...     .add_vector_column("m_vector", dimension=3)
...     .add_column("score", ColumnType.INT)
...     .add_column("when", ColumnType.TIMESTAMP)
...     .add_column("winner", ColumnType.TEXT)
...     .add_set_column("fighters", ColumnType.UUID)
...     .add_partition_by(["match_id"])
...     .add_partition_sort({"round": SortMode.ASCENDING})
...     .build()
... )
>>> my_table = await async_database.create_table(
...     "games",
...     definition=table_definition,
... )

>>> # Create a table with the definition as object
>>> # (and do not raise an error if the table exists already)
>>> from astrapy.info import (
...     CreateTableDefinition,
...     TablePrimaryKeyDescriptor,
...     TableScalarColumnTypeDescriptor,
...     TableValuedColumnType,
...     TableValuedColumnTypeDescriptor,
...     TableVectorColumnTypeDescriptor,
... )
>>> table_definition_1 = CreateTableDefinition(
...     columns={
...         "match_id": TableScalarColumnTypeDescriptor(
...             ColumnType.TEXT,
...         ),
...         "round": TableScalarColumnTypeDescriptor(
...             ColumnType.INT,
...         ),
...         "m_vector": TableVectorColumnTypeDescriptor(
...             column_type="vector", dimension=3
...         ),
...         "score": TableScalarColumnTypeDescriptor(
...             ColumnType.INT,
...         ),
...         "when": TableScalarColumnTypeDescriptor(
...             ColumnType.TIMESTAMP,
...         ),
...         "winner": TableScalarColumnTypeDescriptor(
...             ColumnType.TEXT,
...         ),
...         "fighters": TableValuedColumnTypeDescriptor(
...             column_type=TableValuedColumnType.SET,
...             value_type=ColumnType.UUID,
...         ),
...     },
...     primary_key=TablePrimaryKeyDescriptor(
...         partition_by=["match_id"],
...         partition_sort={"round": SortMode.ASCENDING},
...     ),
... )
>>> my_table_1 = await async_database.create_table(
...     "games",
...     definition=table_definition_1,
...     if_not_exists=True,
... )

>>> # Create a table with the definition as plain dictionary
>>> # (and do not raise an error if the table exists already)
>>> table_definition_2 = {
...     "columns": {
...         "match_id": {"type": "text"},
...         "round": {"type": "int"},
...         "m_vector": {"type": "vector", "dimension": 3},
...         "score": {"type": "int"},
...         "when": {"type": "timestamp"},
...         "winner": {"type": "text"},
...         "fighters": {"type": "set", "valueType": "uuid"},
...     },
...     "primaryKey": {
...         "partitionBy": ["match_id"],
...         "partitionSort": {"round": 1},
...     },
... }
>>> my_table_2 = await async_database.create_table(
...     "games",
...     definition=table_definition_2,
...     if_not_exists=True,
... )

>>> # Get a reference to an existing table
>>> # (no checks are performed on DB)
>>> my_table_4 = async_database.get_table("my_already_existing_table")

Note

creating an instance of AsyncTable does not trigger, in itself, actual creation of the table on the database. The latter should have been created beforehand, e.g. through the create_table method of a Database.

Expand source code

class AsyncTable(Generic[ROW]):
    """
    A Data API table, the object to interact with the Data API for structured data,
    especially for DDL operations.
    This class has an asynchronous interface for use with asyncio.

    This class is not meant for direct instantiation by the user, rather
    it is obtained by invoking methods such as `get_table` of AsyncDatabase,
    wherefrom the AsyncTable inherits its API options such as authentication
    token and API endpoint.
    In order to create a table, instead, one should call the `create_table`
    method of AsyncDatabase, providing a table definition parameter that can be built
    in different ways (see the `CreateTableDefinition` object and examples below).

    Args:
        database: an AsyncDatabase object, instantiated earlier. This represents
            the database the table belongs to.
        name: the table name. This parameter should match an existing
            table on the database.
        keyspace: this is the keyspace to which the table belongs.
            If nothing is specified, the database's working keyspace is used.
        api_options: a complete specification of the API Options for this instance.

    Examples:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> from astrapy import DataAPIClient, AsyncTable
        >>> client = astrapy.DataAPIClient()
        >>> async_database = client.get_async_database(
        ...     "https://01234567-....apps.astra.datastax.com",
        ...     token="AstraCS:..."
        ... )

        >>> # Create a table using the fluent syntax for definition
        >>> from astrapy.constants import SortMode
        >>> from astrapy.info import (
        ...     CreateTableDefinition,
        ...     ColumnType,
        ... )
        >>> table_definition = (
        ...     CreateTableDefinition.builder()
        ...     .add_column("match_id", ColumnType.TEXT)
        ...     .add_column("round", ColumnType.INT)
        ...     .add_vector_column("m_vector", dimension=3)
        ...     .add_column("score", ColumnType.INT)
        ...     .add_column("when", ColumnType.TIMESTAMP)
        ...     .add_column("winner", ColumnType.TEXT)
        ...     .add_set_column("fighters", ColumnType.UUID)
        ...     .add_partition_by(["match_id"])
        ...     .add_partition_sort({"round": SortMode.ASCENDING})
        ...     .build()
        ... )
        >>> my_table = await async_database.create_table(
        ...     "games",
        ...     definition=table_definition,
        ... )

        >>> # Create a table with the definition as object
        >>> # (and do not raise an error if the table exists already)
        >>> from astrapy.info import (
        ...     CreateTableDefinition,
        ...     TablePrimaryKeyDescriptor,
        ...     TableScalarColumnTypeDescriptor,
        ...     TableValuedColumnType,
        ...     TableValuedColumnTypeDescriptor,
        ...     TableVectorColumnTypeDescriptor,
        ... )
        >>> table_definition_1 = CreateTableDefinition(
        ...     columns={
        ...         "match_id": TableScalarColumnTypeDescriptor(
        ...             ColumnType.TEXT,
        ...         ),
        ...         "round": TableScalarColumnTypeDescriptor(
        ...             ColumnType.INT,
        ...         ),
        ...         "m_vector": TableVectorColumnTypeDescriptor(
        ...             column_type="vector", dimension=3
        ...         ),
        ...         "score": TableScalarColumnTypeDescriptor(
        ...             ColumnType.INT,
        ...         ),
        ...         "when": TableScalarColumnTypeDescriptor(
        ...             ColumnType.TIMESTAMP,
        ...         ),
        ...         "winner": TableScalarColumnTypeDescriptor(
        ...             ColumnType.TEXT,
        ...         ),
        ...         "fighters": TableValuedColumnTypeDescriptor(
        ...             column_type=TableValuedColumnType.SET,
        ...             value_type=ColumnType.UUID,
        ...         ),
        ...     },
        ...     primary_key=TablePrimaryKeyDescriptor(
        ...         partition_by=["match_id"],
        ...         partition_sort={"round": SortMode.ASCENDING},
        ...     ),
        ... )
        >>> my_table_1 = await async_database.create_table(
        ...     "games",
        ...     definition=table_definition_1,
        ...     if_not_exists=True,
        ... )

        >>> # Create a table with the definition as plain dictionary
        >>> # (and do not raise an error if the table exists already)
        >>> table_definition_2 = {
        ...     "columns": {
        ...         "match_id": {"type": "text"},
        ...         "round": {"type": "int"},
        ...         "m_vector": {"type": "vector", "dimension": 3},
        ...         "score": {"type": "int"},
        ...         "when": {"type": "timestamp"},
        ...         "winner": {"type": "text"},
        ...         "fighters": {"type": "set", "valueType": "uuid"},
        ...     },
        ...     "primaryKey": {
        ...         "partitionBy": ["match_id"],
        ...         "partitionSort": {"round": 1},
        ...     },
        ... }
        >>> my_table_2 = await async_database.create_table(
        ...     "games",
        ...     definition=table_definition_2,
        ...     if_not_exists=True,
        ... )

        >>> # Get a reference to an existing table
        >>> # (no checks are performed on DB)
        >>> my_table_4 = async_database.get_table("my_already_existing_table")

    Note:
        creating an instance of AsyncTable does not trigger, in itself, actual
        creation of the table on the database. The latter should have been created
        beforehand, e.g. through the `create_table` method of a Database.
    """

    def __init__(
        self,
        *,
        database: AsyncDatabase,
        name: str,
        keyspace: str | None,
        api_options: FullAPIOptions,
    ) -> None:
        self.api_options = api_options
        self._name = name
        _keyspace = keyspace if keyspace is not None else database.keyspace

        if _keyspace is None:
            raise ValueError("Attempted to create AsyncTable with 'keyspace' unset.")

        self._database = database._copy(
            keyspace=_keyspace, api_options=self.api_options
        )
        self._commander_headers = {
            **{DEFAULT_DATA_API_AUTH_HEADER: self.api_options.token.get_token()},
            **self.api_options.embedding_api_key.get_headers(),
            **self.api_options.reranking_api_key.get_headers(),
            **self.api_options.database_additional_headers,
        }
        self._api_commander = self._get_api_commander()
        self._converter_agent: _TableConverterAgent[ROW] = _TableConverterAgent(
            options=self.api_options.serdes_options,
        )

    def __repr__(self) -> str:
        _db_desc = f'database.api_endpoint="{self.database.api_endpoint}"'
        return (
            f'{self.__class__.__name__}(name="{self.name}", '
            f'keyspace="{self.keyspace}", {_db_desc}, '
            f"api_options={self.api_options})"
        )

    def __eq__(self, other: Any) -> bool:
        if isinstance(other, AsyncTable):
            return all(
                [
                    self._name == other._name,
                    self._database == other._database,
                    self.api_options == other.api_options,
                ]
            )
        else:
            return False

    def _get_api_commander(self) -> APICommander:
        """Instantiate a new APICommander based on the properties of this class."""

        if self._database.keyspace is None:
            raise ValueError(
                "No keyspace specified. AsyncTable requires a keyspace to "
                "be set, e.g. through the `keyspace` constructor parameter."
            )

        base_path_components = [
            comp
            for comp in (
                ncomp.strip("/")
                for ncomp in (
                    self._database.api_options.data_api_url_options.api_path,
                    self._database.api_options.data_api_url_options.api_version,
                    self._database.keyspace,
                    self._name,
                )
                if ncomp is not None
            )
            if comp != ""
        ]
        base_path = f"/{'/'.join(base_path_components)}"
        api_commander = APICommander(
            api_endpoint=self._database.api_endpoint,
            path=base_path,
            headers=self._commander_headers,
            callers=self.api_options.callers,
            redacted_header_names=self.api_options.redacted_header_names,
            handle_decimals_writes=True,
            handle_decimals_reads=True,
        )
        return api_commander

    async def __aenter__(self: AsyncTable[ROW]) -> AsyncTable[ROW]:
        return self

    async def __aexit__(
        self,
        exc_type: type[BaseException] | None = None,
        exc_value: BaseException | None = None,
        traceback: TracebackType | None = None,
    ) -> None:
        if self._api_commander is not None:
            await self._api_commander.__aexit__(
                exc_type=exc_type,
                exc_value=exc_value,
                traceback=traceback,
            )

    def _copy(
        self: AsyncTable[ROW],
        *,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncTable[ROW]:
        arg_api_options = APIOptions(
            embedding_api_key=embedding_api_key,
            reranking_api_key=reranking_api_key,
        )
        final_api_options = self.api_options.with_override(api_options).with_override(
            arg_api_options
        )
        return AsyncTable(
            database=self.database,
            name=self.name,
            keyspace=self.keyspace,
            api_options=final_api_options,
        )

    def with_options(
        self: AsyncTable[ROW],
        *,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncTable[ROW]:
        """
        Create a clone of this table with some changed attributes.

        Args:
            embedding_api_key: optional API key(s) for interacting with the table.
                If an embedding service is configured, and this parameter is not None,
                each Data API call will include the necessary embedding-related headers
                as specified by this parameter. If a string is passed, it translates
                into the one "embedding api key" header
                (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
                For some vectorize providers/models, if using header-based authentication,
                specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
                should be supplied.
            reranking_api_key: optional API key(s) for interacting with the table.
                If a reranker is configured for the table, and this parameter
                is not None, Data API calls will include the appropriate
                reranker-related headers according to this parameter. Reranker services
                may not necessarily require this setting (e.g. if the service needs no
                authentication, or one is configured as part of the table
                definition relying on a "shared secret").
                If a string is passed, it is translated into an instance of
                `astrapy.authentication.RerankingAPIKeyHeaderProvider`.
            api_options: any additional options to set for the clone, in the form of
                an APIOptions instance (where one can set just the needed attributes).
                In case the same setting is also provided as named parameter,
                the latter takes precedence.

        Returns:
            a new AsyncTable instance.

        Example:
            >>> table_with_api_key_configured = my_async_table.with_options(
            ...     embedding_api_key="secret-key-0123abcd...",
            ... )
        """

        return self._copy(
            embedding_api_key=embedding_api_key,
            reranking_api_key=reranking_api_key,
            api_options=api_options,
        )

    def to_sync(
        self: AsyncTable[ROW],
        *,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> Table[ROW]:
        """
        Create a Table from this one. Save for the arguments
        explicitly provided as overrides, everything else is kept identical
        to this table in the copy (the database is converted into
        an async object).

        Args:
            embedding_api_key: optional API key(s) for interacting with the table.
                If an embedding service is configured, and this parameter is not None,
                each Data API call will include the necessary embedding-related headers
                as specified by this parameter. If a string is passed, it translates
                into the one "embedding api key" header
                (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
                For some vectorize providers/models, if using header-based authentication,
                specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
                should be supplied.
            reranking_api_key: optional API key(s) for interacting with the table.
                If a reranker is configured for the table, and this parameter
                is not None, Data API calls will include the appropriate
                reranker-related headers according to this parameter. Reranker services
                may not necessarily require this setting (e.g. if the service needs no
                authentication, or one is configured as part of the table
                definition relying on a "shared secret").
                If a string is passed, it is translated into an instance of
                `astrapy.authentication.RerankingAPIKeyHeaderProvider`.
            api_options: any additional options to set for the result, in the form of
                an APIOptions instance (where one can set just the needed attributes).
                In case the same setting is also provided as named parameter,
                the latter takes precedence.

        Returns:
            the new copy, a Table instance.

        Example:
            >>> my_async_table.to_sync().find_one(
            ...     {"match_id": "fight4"},
            ...     projection={"winner": True},
            ... )
            {"pk": 1, "column": "value}
        """

        arg_api_options = APIOptions(
            embedding_api_key=embedding_api_key,
            reranking_api_key=reranking_api_key,
        )
        final_api_options = self.api_options.with_override(api_options).with_override(
            arg_api_options
        )
        return Table(
            database=self.database.to_sync(),
            name=self.name,
            keyspace=self.keyspace,
            api_options=final_api_options,
        )

    async def definition(
        self,
        *,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> ListTableDefinition:
        """
        Query the Data API and return a structure defining the table schema.
        If there are no unsupported colums in the table, the return value has
        the same contents as could have been provided to a `create_table` method call.

        Args:
            table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `table_admin_timeout_ms`.
            timeout_ms: an alias for `table_admin_timeout_ms`.

        Returns:
            A `ListTableDefinition` object, available for inspection.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> asyncio.run(my_table.definition())
            ListTableDefinition(columns=[match_id,round,fighters, ...  # shortened
        """

        _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
            timeout_options=self.api_options.timeout_options,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info(f"getting tables in search of '{self.name}'")
        self_descriptors = [
            table_desc
            for table_desc in await self.database._list_tables_ctx(
                keyspace=None,
                timeout_context=_TimeoutContext(
                    request_ms=_table_admin_timeout_ms,
                    label=_ta_label,
                ),
            )
            if table_desc.name == self.name
        ]
        logger.info(f"finished getting tables in search of '{self.name}'")
        if self_descriptors:
            return self_descriptors[0].definition
        else:
            raise RuntimeError(
                f"Table {self.keyspace}.{self.name} not found.",
            )

    async def info(
        self,
        *,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> TableInfo:
        """
        Return information on the table. This should not be confused with the table
        definition (i.e. the schema).

        Args:
            database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying DevOps API request.
                If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `database_admin_timeout_ms`.
            timeout_ms: an alias for `database_admin_timeout_ms`.

        Returns:
            A TableInfo object for inspection.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> # Note: output reformatted for clarity.
            >>> asyncio.run(my_async_table.info())
            TableInfo(
                database_info=AstraDBDatabaseInfo(id=..., name=..., ...),
                keyspace='default_keyspace',
                name='games',
                full_name='default_keyspace.games'
            )
        """

        db_info = await self.database.info(
            database_admin_timeout_ms=database_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        return TableInfo(
            database_info=db_info,
            keyspace=self.keyspace,
            name=self.name,
            full_name=self.full_name,
        )

    @property
    def database(self) -> AsyncDatabase:
        """
        a Database object, the database this table belongs to.

        Example:
            >>> my_async_table.database.name
            'the_db'
        """

        return self._database

    @property
    def keyspace(self) -> str:
        """
        The keyspace this table is in.

        Example:
            >>> my_async_table.keyspace
            'default_keyspace'
        """

        _keyspace = self.database.keyspace
        if _keyspace is None:
            raise RuntimeError("The table's DB is set with keyspace=None")
        return _keyspace

    @property
    def name(self) -> str:
        """
        The name of this table.

        Example:
            >>> my_async_table.name
            'my_table'
        """

        return self._name

    @property
    def full_name(self) -> str:
        """
        The fully-qualified table name within the database,
        in the form "keyspace.table_name".

        Example:
            >>> my_async_table.full_name
            'default_keyspace.my_table'
        """

        return f"{self.keyspace}.{self.name}"

    async def _create_generic_index(
        self,
        i_name: str,
        ci_definition: dict[str, Any],
        ci_command: str,
        if_not_exists: bool | None,
        table_admin_timeout_ms: int | None,
        request_timeout_ms: int | None,
        timeout_ms: int | None,
    ) -> None:
        ci_options: dict[str, bool]
        if if_not_exists is not None:
            ci_options = {"ifNotExists": if_not_exists}
        else:
            ci_options = {}
        _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
            timeout_options=self.api_options.timeout_options,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        ci_payload = {
            ci_command: {
                "name": i_name,
                "definition": ci_definition,
                "options": ci_options,
            }
        }
        logger.info(f"{ci_command}('{i_name}')")
        ci_response = await self._api_commander.async_request(
            payload=ci_payload,
            timeout_context=_TimeoutContext(
                request_ms=_table_admin_timeout_ms, label=_ta_label
            ),
        )
        if ci_response.get("status") != {"ok": 1}:
            raise UnexpectedDataAPIResponseException(
                text=f"Faulty response from {ci_command} API command.",
                raw_response=ci_response,
            )
        logger.info(f"finished {ci_command}('{i_name}')")

    async def create_index(
        self,
        name: str,
        column: str | dict[str, str],
        *,
        options: TableIndexOptions | dict[str, Any] | None = None,
        if_not_exists: bool | None = None,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Create an index on a non-vector column of the table.

        This is a blocking operation: the method returns once the index
        is created and ready to use.

        For creation of a vector index, see method `create_vector_index` instead.

        Args:
            name: the name of the index. Index names must be unique across the keyspace.
            column: the table column on which the index is to be created.
                For a map column, besides a simple string, it can be an object
                in one of the two formats {"column": "$values"}, {"column": "$keys"},
            options: if passed, it must be an instance of `TableIndexOptions`,
                or an equivalent dictionary, which specifies index settings
                such as -- for a text column -- case-sensitivity and so on.
                See the `astrapy.info.TableIndexOptions` class for more details.
            if_not_exists: if set to True, the command will succeed even if an index
                with the specified name already exists (in which case no actual
                index creation takes place on the database). The API default of False
                means that an error is raised by the API in case of name collision.
            table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `table_admin_timeout_ms`.
            timeout_ms: an alias for `table_admin_timeout_ms`.

        Examples:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> from astrapy.info import TableIndexOptions
            >>>
            >>> # create an index on a column
            >>> await my_async_table.create_index(
            ...     "score_index",
            ...     "score",
            ... )
            >>>
            >>> # create an index on a textual column, specifying indexing options
            >>> await my_async_table.create_index(
            ...     "winner_index",
            ...     "winner",
            ...     options=TableIndexOptions(
            ...         ascii=False,
            ...         normalize=True,
            ...         case_sensitive=False,
            ...     ),
            ... )
        """

        ci_definition: dict[str, Any] = TableIndexDefinition(
            column=column,
            options=TableIndexOptions.coerce(options or {}),
        ).as_dict()
        ci_command = "createIndex"
        return await self._create_generic_index(
            i_name=name,
            ci_definition=ci_definition,
            ci_command=ci_command,
            if_not_exists=if_not_exists,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )

    async def create_vector_index(
        self,
        name: str,
        column: str,
        *,
        options: TableVectorIndexOptions | dict[str, Any] | None = None,
        if_not_exists: bool | None = None,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Create a vector index on a vector column of the table, enabling vector
        similarity search operations on it.

        This is a blocking operation: the method returns once the index
        is created and ready to use.

        For creation of a non-vector index, see method `create_index` instead.

        Args:
            name: the name of the index. Index names must be unique across the keyspace.
            column: the table column, of type "vector" on which to create the index.
            options: an instance of `TableVectorIndexOptions`, or an equivalent
                dictionary, which specifies settings for the vector index,
                such as the metric to use or, if desired, a "source model" setting.
                If omitted, the Data API defaults will apply for the index.
                See the `astrapy.info.TableVectorIndexOptions` class for more details.
            if_not_exists: if set to True, the command will succeed even if an index
                with the specified name already exists (in which case no actual
                index creation takes place on the database). The API default of False
                means that an error is raised by the API in case of name collision.
            table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `table_admin_timeout_ms`.
            timeout_ms: an alias for `table_admin_timeout_ms`.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> from astrapy.constants import VectorMetric
            >>> from astrapy.info import TableVectorIndexOptions
            >>>
            >>> # create a vector index with dot-product similarity
            >>> await my_async_table.create_vector_index(
            ...     "m_vector_index",
            ...     "m_vector",
            ...     options=TableVectorIndexOptions(
            ...         metric=VectorMetric.DOT_PRODUCT,
            ...     ),
            ... )
            >>> # specify a source_model (since the previous statement
            >>> # succeeded, this will do nothing because of `if_not_exists`):
            >>> await my_async_table.create_vector_index(
            ...     "m_vector_index",
            ...     "m_vector",
            ...     options=TableVectorIndexOptions(
            ...         metric=VectorMetric.DOT_PRODUCT,
            ...         source_model="nv-qa-4",
            ...     ),
            ...     if_not_exists=True,
            ... )
            >>> # leave the settings to the Data API defaults of cosine
            >>> # similarity metric (since the previous statement
            >>> # succeeded, this will do nothing because of `if_not_exists`):
            >>> await my_async_table.create_vector_index(
            ...     "m_vector_index",
            ...     "m_vector",
            ...     if_not_exists=True,
            ... )
        """

        ci_definition: dict[str, Any] = TableVectorIndexDefinition(
            column=column,
            options=TableVectorIndexOptions.coerce(options),
        ).as_dict()
        ci_command = "createVectorIndex"
        return await self._create_generic_index(
            i_name=name,
            ci_definition=ci_definition,
            ci_command=ci_command,
            if_not_exists=if_not_exists,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )

    async def list_index_names(
        self,
        *,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> list[str]:
        """
        List the names of all indexes existing on this table.

        Args:
            table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `table_admin_timeout_ms`.
            timeout_ms: an alias for `table_admin_timeout_ms`.

        Returns:
            a list of the index names as strings, in no particular order.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> asyncio.run(my_async_table.list_index_names())
            ['m_vector_index', 'winner_index', 'score_index']
        """

        _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
            timeout_options=self.api_options.timeout_options,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        li_payload: dict[str, Any] = {"listIndexes": {"options": {}}}
        logger.info("listIndexes")
        li_response = await self._api_commander.async_request(
            payload=li_payload,
            timeout_context=_TimeoutContext(
                request_ms=_table_admin_timeout_ms, label=_ta_label
            ),
        )
        if "indexes" not in li_response.get("status", {}):
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from listIndexes API command.",
                raw_response=li_response,
            )
        else:
            logger.info("finished listIndexes")
            return li_response["status"]["indexes"]  # type: ignore[no-any-return]

    async def _list_indexes(
        self,
        *,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> list[TableIndexDescriptor]:
        """
        List the full definitions of all indexes existing on this table.

        WARNING: method not public yet, pending completion of its API.

        Args:
            table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `table_admin_timeout_ms`.
            timeout_ms: an alias for `table_admin_timeout_ms`.

        Returns:
            a list of `astrapy.info.TableIndexDescriptor` objects in no particular
            order, each providing the details of an index present on the table.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> indexes = asyncio.run(my_async_table.list_indexes())
            >>> indexes
            [TableIndexDescriptor(name='m_vector_index', definition=...)...]
            >>> # (Note: shortened output above)
            >>> indexes[1].definition.column
            'winner'
            >>> indexes[1].definition.options.case_sensitive
            False
        """

        _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
            timeout_options=self.api_options.timeout_options,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        li_payload: dict[str, Any] = {"listIndexes": {"options": {"explain": True}}}
        logger.info("listIndexes")
        li_response = await self._api_commander.async_request(
            payload=li_payload,
            timeout_context=_TimeoutContext(
                request_ms=_table_admin_timeout_ms, label=_ta_label
            ),
        )
        columns = (
            await self.definition(
                table_admin_timeout_ms=table_admin_timeout_ms,
                request_timeout_ms=request_timeout_ms,
                timeout_ms=timeout_ms,
            )
        ).columns

        if "indexes" not in li_response.get("status", {}):
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from listIndexes API command.",
                raw_response=li_response,
            )
        else:
            logger.info("finished listIndexes")
            return [
                TableIndexDescriptor.coerce(index_object, columns=columns)
                for index_object in li_response["status"]["indexes"]
            ]

    @overload
    async def alter(
        self,
        operation: AlterTableOperation | dict[str, Any],
        *,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> AsyncTable[DefaultRowType]: ...

    @overload
    async def alter(
        self,
        operation: AlterTableOperation | dict[str, Any],
        *,
        row_type: type[NEW_ROW],
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> AsyncTable[NEW_ROW]: ...

    async def alter(
        self,
        operation: AlterTableOperation | dict[str, Any],
        *,
        row_type: type[Any] = DefaultRowType,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> AsyncTable[NEW_ROW]:
        """
        Executes one of the available alter-table operations on this table,
        such as adding/dropping columns.

        This is a blocking operation: the method returns once the index
        is created and ready to use.

        Args:
            operation: an instance of one of the `astrapy.info.AlterTable*` classes,
                representing which alter operation to perform and the details thereof.
                A regular dictionary can also be provided, but then it must have the
                alter operation name at its top level: {"add": {"columns": ...}}.
            row_type: this parameter acts a formal specifier for the type checker.
                If omitted, the resulting AsyncTable is implicitly
                an `AsyncTable[dict[str, Any]]`. If provided, it must match
                the type hint specified in the assignment.
                See the examples below.
            table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `table_admin_timeout_ms`.
            timeout_ms: an alias for `table_admin_timeout_ms`.

        Examples:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> from astrapy.info import (
            ...     AlterTableAddColumns,
            ...     AlterTableAddVectorize,
            ...     AlterTableDropColumns,
            ...     AlterTableDropVectorize,
            ...     ColumnType,
            ...     TableScalarColumnTypeDescriptor,
            ...     VectorServiceOptions,
            ... )
            >>>
            >>> # Add a column
            >>> new_table_1 = await my_table.alter(
            ...     AlterTableAddColumns(
            ...         columns={
            ...             "tie_break": TableScalarColumnTypeDescriptor(
            ...                 column_type=ColumnType.BOOLEAN,
            ...             ),
            ...         }
            ...     )
            ... )
            >>>
            >>> # Drop a column
            >>> new_table_2 = await new_table_1.alter(AlterTableDropColumns(
            ...     columns=["tie_break"]
            ... ))
            >>>
            >>> # Add vectorize to a (vector) column
            >>> new_table_3 = await new_table_2.alter(
            ...     AlterTableAddVectorize(
            ...         columns={
            ...             "m_vector": VectorServiceOptions(
            ...                 provider="openai",
            ...                 model_name="text-embedding-3-small",
            ...                 authentication={
            ...                     "providerKey": "ASTRA_KMS_API_KEY_NAME",
            ...                 },
            ...             ),
            ...         }
            ...     )
            ... )
            >>>
            >>> # Drop vectorize from a (vector) column
            >>> # (Also demonstrates type hint usage)
            >>> from typing import TypedDict
            >>> from astrapy import AsyncTable
            >>> from astrapy.data_types import (
            ...     DataAPISet,
            ...     DataAPITimestamp,
            ...     DataAPIVector,
            ... )
            >>> from astrapy.ids import UUID
            >>>
            >>> class MyMatch(TypedDict):
            ...     match_id: str
            ...     round: int
            ...     m_vector: DataAPIVector
            ...     score: int
            ...     when: DataAPITimestamp
            ...     winner: str
            ...     fighters: DataAPISet[UUID]
            ...
            >>> new_table_4: AsyncTable[MyMatch] = await new_table_3.alter(
            ...     AlterTableDropVectorize(columns=["m_vector"]),
            ...     row_type=MyMatch,
            ... )
        """

        n_operation: AlterTableOperation
        if isinstance(operation, AlterTableOperation):
            n_operation = operation
        else:
            n_operation = AlterTableOperation.from_full_dict(operation)
        _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
            timeout_options=self.api_options.timeout_options,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        at_operation_name = n_operation._name
        at_payload = {
            "alterTable": {
                "operation": {
                    at_operation_name: n_operation.as_dict(),
                },
            },
        }
        logger.info(f"alterTable({at_operation_name})")
        at_response = await self._api_commander.async_request(
            payload=at_payload,
            timeout_context=_TimeoutContext(
                request_ms=_table_admin_timeout_ms, label=_ta_label
            ),
        )
        if at_response.get("status") != {"ok": 1}:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from alterTable API command.",
                raw_response=at_response,
            )
        logger.info(f"finished alterTable({at_operation_name})")
        return AsyncTable(
            database=self.database,
            name=self.name,
            keyspace=self.keyspace,
            api_options=self.api_options,
        )

    async def insert_one(
        self,
        row: ROW,
        *,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> TableInsertOneResult:
        """
        Insert a single row in the table,
        with implied overwrite in case of primary key collision.

        Inserting a row whose primary key correspond to an entry alredy stored
        in the table has the effect of an in-place update: the row is overwritten.
        However, if the row being inserted is partially provided, i.e. some columns
        are not specified, these are left unchanged on the database. To explicitly
        reset them, specify their value as appropriate to their data type,
        i.e. `None`, `{}` or analogous.

        Args:
            row: a dictionary expressing the row to insert. The primary key
                must be specified in full, while any other column may be omitted
                if desired (in which case it is left as is on DB).
                The values for the various columns supplied in the row must
                be of the right data type for the insertion to succeed.
                Non-primary-key columns can also be explicitly set to null.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a TableInsertOneResult object, whose attributes are the primary key
            of the inserted row both in the form of a dictionary and of a tuple.

        Examples:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> # a full-row insert using astrapy's datatypes
            >>> from astrapy.data_types import (
            ...     DataAPISet,
            ...     DataAPITimestamp,
            ...     DataAPIVector,
            ... )
            >>> from astrapy.ids import UUID
            >>>
            >>> insert_result = asyncio.run(my_async_table.insert_one(
            ...     {
            ...         "match_id": "mtch_0",
            ...         "round": 1,
            ...         "m_vector": DataAPIVector([0.4, -0.6, 0.2]),
            ...         "score": 18,
            ...         "when": DataAPITimestamp.from_string("2024-11-28T11:30:00Z"),
            ...         "winner": "Victor",
            ...         "fighters": DataAPISet([
            ...             UUID("0193539a-2770-8c09-a32a-111111111111"),
            ...         ]),
            ...     },
            ... ))
            >>> insert_result.inserted_id
            {'match_id': 'mtch_0', 'round': 1}
            >>> insert_result.inserted_id_tuple
            ('mtch_0', 1)
            >>>
            >>> # a partial-row (which in this case overwrites some of the values)
            >>> asyncio.run(my_async_table.insert_one(
            ...     {
            ...         "match_id": "mtch_0",
            ...         "round": 1,
            ...         "winner": "Victor Vector",
            ...         "fighters": DataAPISet([
            ...             UUID("0193539a-2770-8c09-a32a-111111111111"),
            ...             UUID("0193539a-2880-8875-9f07-222222222222"),
            ...         ]),
            ...     },
            ... ))
            TableInsertOneResult(inserted_id={'match_id': 'mtch_0', 'round': 1} ...
            >>>
            >>> # another insertion demonstrating standard-library datatypes in values
            >>> import datetime
            >>>
            >>> asyncio.run(my_async_table.insert_one(
            ...     {
            ...         "match_id": "mtch_0",
            ...         "round": 2,
            ...         "winner": "Angela",
            ...         "score": 25,
            ...         "when": datetime.datetime(
            ...             2024, 7, 13, 12, 55, 30, 889,
            ...             tzinfo=datetime.timezone.utc,
            ...         ),
            ...         "fighters": {
            ...             UUID("019353cb-8e01-8276-a190-333333333333"),
            ...         },
            ...         "m_vector": [0.4, -0.6, 0.2],
            ...     },
            ... ))
            TableInsertOneResult(inserted_id={'match_id': 'mtch_0', 'round': 2}, ...
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        io_payload = self._converter_agent.preprocess_payload(
            {"insertOne": {"document": row}},
            map2tuple_checker=map2tuple_checker_insert_one,
        )
        logger.info(f"insertOne on '{self.name}'")
        io_response = await self._api_commander.async_request(
            payload=io_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished insertOne on '{self.name}'")
        if "insertedIds" in io_response.get("status", {}):
            if not io_response["status"]["insertedIds"]:
                raise UnexpectedDataAPIResponseException(
                    text="Response from insertOne API command has empty 'insertedIds'.",
                    raw_response=io_response,
                )
            if not io_response["status"]["primaryKeySchema"]:
                raise UnexpectedDataAPIResponseException(
                    text="Response from insertOne API command has empty 'primaryKeySchema'.",
                    raw_response=io_response,
                )
            inserted_id_list = io_response["status"]["insertedIds"][0]
            inserted_id_tuple, inserted_id = self._converter_agent.postprocess_key(
                inserted_id_list,
                primary_key_schema_dict=io_response["status"]["primaryKeySchema"],
            )
            return TableInsertOneResult(
                raw_results=[io_response],
                inserted_id=inserted_id,
                inserted_id_tuple=inserted_id_tuple,
            )
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from insertOne API command.",
                raw_response=io_response,
            )

    def _prepare_keys_from_status(
        self, status: dict[str, Any] | None, raise_on_missing: bool = False
    ) -> tuple[list[dict[str, Any]], list[tuple[Any, ...]]]:
        ids: list[dict[str, Any]]
        id_tuples: list[tuple[Any, ...]]
        if status is None:
            if raise_on_missing:
                raise UnexpectedDataAPIResponseException(
                    text="'status' not found in API response",
                    raw_response=None,
                )
            else:
                ids = []
                id_tuples = []
        else:
            if "documentResponses" not in status:
                raise UnexpectedDataAPIResponseException(
                    text=(
                        "received a 'status' without 'documentResponses' "
                        f"in API response (received: {status})"
                    ),
                    raw_response=None,
                )
            raw_inserted_ids = [
                row_resp["_id"]
                for row_resp in status["documentResponses"]
                if row_resp["status"] == "OK"
            ]
            if raw_inserted_ids:
                if "primaryKeySchema" not in status:
                    raise UnexpectedDataAPIResponseException(
                        text=(
                            "received a 'status' without 'primaryKeySchema' "
                            f"in API response (received: {status})"
                        ),
                        raw_response=None,
                    )
                id_tuples_and_ids = self._converter_agent.postprocess_keys(
                    raw_inserted_ids,
                    primary_key_schema_dict=status["primaryKeySchema"],
                )
                id_tuples = [tpl for tpl, _ in id_tuples_and_ids]
                ids = [id for _, id in id_tuples_and_ids]
            else:
                ids = []
                id_tuples = []
        return ids, id_tuples

    async def insert_many(
        self,
        rows: Iterable[ROW],
        *,
        ordered: bool = False,
        chunk_size: int | None = None,
        concurrency: int | None = None,
        request_timeout_ms: int | None = None,
        general_method_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> TableInsertManyResult:
        """
        Insert a number of rows into the table,
        with implied overwrite in case of primary key collision.

        Inserting rows whose primary key correspond to entries alredy stored
        in the table has the effect of an in-place update: the rows are overwritten.
        However, if the rows being inserted are partially provided, i.e. some columns
        are not specified, these are left unchanged on the database. To explicitly
        reset them, specify their value as appropriate to their data type,
        i.e. `None`, `{}` or analogous.

        Args:
            rows: an iterable of dictionaries, each expressing a row to insert.
                Each row must at least fully specify the primary key column values,
                while any other column may be omitted if desired (in which case
                it is left as is on DB).
                The values for the various columns supplied in each row must
                be of the right data type for the insertion to succeed.
                Non-primary-key columns can also be explicitly set to null.
            ordered: if False (default), the insertions can occur in arbitrary order
                and possibly concurrently. If True, they are processed sequentially.
                If there are no specific reasons against it, unordered insertions
                re to be preferred as they complete much faster.
            chunk_size: how many rows to include in each single API request.
                Exceeding the server maximum allowed value results in an error.
                Leave it unspecified (recommended) to use the system default.
            concurrency: maximum number of concurrent requests to the API at
                a given time. It cannot be more than one for ordered insertions.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                whole operation, which may consist of several API requests.
                If not provided, this object's defaults apply.
            request_timeout_ms: a timeout, in milliseconds, to impose on each
                individual HTTP request to the Data API to accomplish the operation.
                If not provided, this object's defaults apply.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a TableInsertManyResult object, whose attributes are the primary key
            of the inserted rows both in the form of dictionaries and of tuples.

        Examples:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> # Insert complete and partial rows at once (concurrently)
            >>> from astrapy.data_types import (
            ...     DataAPISet,
            ...     DataAPITimestamp,
            ...     DataAPIVector,
            ... )
            >>> from astrapy.ids import UUID
            >>>
            >>> insert_result = asyncio.run(my_async_table.insert_many(
            ...     [
            ...         {
            ...             "match_id": "fight4",
            ...             "round": 1,
            ...             "winner": "Victor",
            ...             "score": 18,
            ...             "when": DataAPITimestamp.from_string(
            ...                 "2024-11-28T11:30:00Z",
            ...             ),
            ...             "fighters": DataAPISet([
            ...                 UUID("0193539a-2770-8c09-a32a-111111111111"),
            ...                 UUID('019353e3-00b4-83f9-a127-222222222222'),
            ...             ]),
            ...             "m_vector": DataAPIVector([0.4, -0.6, 0.2]),
            ...         },
            ...         {"match_id": "fight5", "round": 1, "winner": "Adam"},
            ...         {"match_id": "fight5", "round": 2, "winner": "Betta"},
            ...         {"match_id": "fight5", "round": 3, "winner": "Caio"},
            ...         {
            ...             "match_id": "challenge6",
            ...             "round": 1,
            ...             "winner": "Donna",
            ...             "m_vector": [0.9, -0.1, -0.3],
            ...         },
            ...         {"match_id": "challenge6", "round": 2, "winner": "Erick"},
            ...         {"match_id": "challenge6", "round": 3, "winner": "Fiona"},
            ...         {"match_id": "tournamentA", "round": 1, "winner": "Gael"},
            ...         {"match_id": "tournamentA", "round": 2, "winner": "Hanna"},
            ...         {
            ...             "match_id": "tournamentA",
            ...             "round": 3,
            ...             "winner": "Ian",
            ...             "fighters": DataAPISet([
            ...                 UUID("0193539a-2770-8c09-a32a-111111111111"),
            ...             ]),
            ...         },
            ...         {"match_id": "fight7", "round": 1, "winner": "Joy"},
            ...         {"match_id": "fight7", "round": 2, "winner": "Kevin"},
            ...         {"match_id": "fight7", "round": 3, "winner": "Lauretta"},
            ...     ],
            ...     concurrency=10,
            ...     chunk_size=3,
            ... ))
            >>> insert_result.inserted_ids
            [{'match_id': 'fight4', 'round': 1}, {'match_id': 'fight5', ...
            >>> insert_result.inserted_id_tuples
            [('fight4', 1), ('fight5', 1), ('fight5', 2), ('fight5', 3), ...
            >>>
            >>> # Ordered insertion
            >>> # (would stop on first failure; predictable end result on DB)
            >>> asyncio.run(my_async_table.insert_many(
            ...     [
            ...         {"match_id": "fight5", "round": 1, "winner": "Adam0"},
            ...         {"match_id": "fight5", "round": 2, "winner": "Betta0"},
            ...         {"match_id": "fight5", "round": 3, "winner": "Caio0"},
            ...         {"match_id": "fight5", "round": 1, "winner": "Adam Zuul"},
            ...         {"match_id": "fight5", "round": 2, "winner": "Betta Vigo"},
            ...         {"match_id": "fight5", "round": 3, "winner": "Caio Gozer"},
            ...     ],
            ...     ordered=True,
            ... ))
            TableInsertManyResult(inserted_ids=[{'match_id': 'fight5', 'round': 1}, ...

        Note:
            Unordered insertions are executed with some degree of concurrency,
            so it is usually better to prefer this mode unless the order in the
            row sequence is important.

        Note:
            A failure mode for this command is related to certain faulty rows
            found among those to insert: validation may fail, for example, if the
            vector length does not match the table schema.

            For an ordered insertion, the method will raise an exception at
            the first such faulty row -- nevertheless, all rows processed
            until then will end up being written to the database.

            For unordered insertions, if the error stems from faulty rows
            the insertion proceeds until exhausting the input rows: then,
            an exception is raised -- and all insertable rows will have been
            written to the database, including those "after" the troublesome ones.

            Errors occurring during an insert_many operation, for that reason,
            may result in a `TableInsertManyException` being raised.
            This exception allows to inspect the list of row IDs that were
            successfully inserted, while accessing at the same time the underlying
            "root errors" that made the full method call to fail.
        """

        _general_method_timeout_ms, _gmt_label = _first_valid_timeout(
            (general_method_timeout_ms, "general_method_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (
                self.api_options.timeout_options.general_method_timeout_ms,
                "general_method_timeout_ms",
            ),
        )
        _request_timeout_ms, _rt_label = _first_valid_timeout(
            (request_timeout_ms, "request_timeout_ms"),
            (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
        )
        if concurrency is None:
            if ordered:
                _concurrency = 1
            else:
                _concurrency = DEFAULT_INSERT_MANY_CONCURRENCY
        else:
            _concurrency = concurrency
        if _concurrency > 1 and ordered:
            raise ValueError("Cannot run ordered insert_many concurrently.")
        if chunk_size is None:
            _chunk_size = DEFAULT_INSERT_MANY_CHUNK_SIZE
        else:
            _chunk_size = chunk_size
        _rows = list(rows)
        logger.info(f"inserting {len(_rows)} rows in '{self.name}'")
        raw_results: list[dict[str, Any]] = []
        im_payloads: list[dict[str, Any] | None] = []
        timeout_manager = MultiCallTimeoutManager(
            overall_timeout_ms=_general_method_timeout_ms,
            timeout_label=_gmt_label,
        )
        if ordered:
            options = {"ordered": True, "returnDocumentResponses": True}
            inserted_ids: list[Any] = []
            inserted_id_tuples: list[Any] = []
            for i in range(0, len(_rows), _chunk_size):
                im_payload = self._converter_agent.preprocess_payload(
                    {
                        "insertMany": {
                            "documents": _rows[i : i + _chunk_size],
                            "options": options,
                        },
                    },
                    map2tuple_checker=map2tuple_checker_insert_many,
                )
                logger.info(f"insertMany(chunk) on '{self.name}'")
                chunk_response = await self._api_commander.async_request(
                    payload=im_payload,
                    raise_api_errors=False,
                    timeout_context=timeout_manager.remaining_timeout(
                        cap_time_ms=_request_timeout_ms,
                        cap_timeout_label=_rt_label,
                    ),
                )
                logger.info(f"finished insertMany(chunk) on '{self.name}'")
                # accumulate the results in this call
                chunk_inserted_ids, chunk_inserted_ids_tuples = (
                    self._prepare_keys_from_status(chunk_response.get("status"))
                )
                inserted_ids += chunk_inserted_ids
                inserted_id_tuples += chunk_inserted_ids_tuples
                raw_results += [chunk_response]
                # if errors, quit early
                if chunk_response.get("errors", []):
                    response_exception = DataAPIResponseException.from_response(
                        command=im_payload,
                        raw_response=chunk_response,
                    )
                    raise TableInsertManyException(
                        inserted_ids=inserted_ids,
                        inserted_id_tuples=inserted_id_tuples,
                        exceptions=[response_exception],
                    )

            # return
            full_result = TableInsertManyResult(
                raw_results=raw_results,
                inserted_ids=inserted_ids,
                inserted_id_tuples=inserted_id_tuples,
            )
            logger.info(f"finished inserting {len(_rows)} rows in '{self.name}'")
            return full_result

        else:
            # unordered: concurrent or not, do all of them and parse the results
            options = {"ordered": False, "returnDocumentResponses": True}

            sem = asyncio.Semaphore(_concurrency)

            async def concurrent_insert_chunk(
                row_chunk: list[ROW],
            ) -> tuple[dict[str, Any] | None, dict[str, Any]]:
                async with sem:
                    im_payload = self._converter_agent.preprocess_payload(
                        {
                            "insertMany": {
                                "documents": row_chunk,
                                "options": options,
                            },
                        },
                        map2tuple_checker=map2tuple_checker_insert_many,
                    )
                    logger.info(f"insertMany(chunk) on '{self.name}'")
                    im_response = await self._api_commander.async_request(
                        payload=im_payload,
                        raise_api_errors=False,
                        timeout_context=timeout_manager.remaining_timeout(
                            cap_time_ms=_request_timeout_ms,
                            cap_timeout_label=_rt_label,
                        ),
                    )
                    logger.info(f"finished insertMany(chunk) on '{self.name}'")
                    return im_payload, im_response

            raw_pl_results_pairs: list[tuple[dict[str, Any] | None, dict[str, Any]]]
            if _concurrency > 1:
                tasks = [
                    asyncio.create_task(
                        concurrent_insert_chunk(_rows[i : i + _chunk_size])
                    )
                    for i in range(0, len(_rows), _chunk_size)
                ]
                raw_pl_results_pairs = await asyncio.gather(*tasks)
            else:
                raw_pl_results_pairs = [
                    await concurrent_insert_chunk(_rows[i : i + _chunk_size])
                    for i in range(0, len(_rows), _chunk_size)
                ]

            if raw_pl_results_pairs:
                im_payloads, raw_results = list(zip(*raw_pl_results_pairs))
            else:
                im_payloads, raw_results = [], []

            # recast raw_results. Each response has its schema: unfold appropriately
            ids_and_tuples_per_chunk = [
                self._prepare_keys_from_status(chunk_response.get("status"))
                for chunk_response in raw_results
            ]
            inserted_ids = [
                inserted_id
                for chunk_ids, _ in ids_and_tuples_per_chunk
                for inserted_id in chunk_ids
            ]
            inserted_id_tuples = [
                inserted_id_tuple
                for _, chunk_id_tuples in ids_and_tuples_per_chunk
                for inserted_id_tuple in chunk_id_tuples
            ]

            # check-raise
            response_exceptions = [
                DataAPIResponseException.from_response(
                    command=chunk_payload,
                    raw_response=chunk_response,
                )
                for chunk_payload, chunk_response in zip(im_payloads, raw_results)
                if chunk_response.get("errors", [])
            ]
            if response_exceptions:
                raise TableInsertManyException(
                    inserted_ids=inserted_ids,
                    inserted_id_tuples=inserted_id_tuples,
                    exceptions=response_exceptions,
                )

            # return
            full_result = TableInsertManyResult(
                raw_results=raw_results,
                inserted_ids=inserted_ids,
                inserted_id_tuples=inserted_id_tuples,
            )
            logger.info(f"finished inserting {len(_rows)} rows in '{self.name}'")
            return full_result

    @overload
    def find(
        self,
        filter: FilterType | None = None,
        *,
        projection: ProjectionType | None = None,
        row_type: None = None,
        skip: int | None = None,
        limit: int | None = None,
        include_similarity: bool | None = None,
        include_sort_vector: bool | None = None,
        sort: SortType | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> AsyncTableFindCursor[ROW, ROW]: ...

    @overload
    def find(
        self,
        filter: FilterType | None = None,
        *,
        projection: ProjectionType | None = None,
        row_type: type[ROW2],
        skip: int | None = None,
        limit: int | None = None,
        include_similarity: bool | None = None,
        include_sort_vector: bool | None = None,
        sort: SortType | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> AsyncTableFindCursor[ROW, ROW2]: ...

    def find(
        self,
        filter: FilterType | None = None,
        *,
        projection: ProjectionType | None = None,
        row_type: type[ROW2] | None = None,
        skip: int | None = None,
        limit: int | None = None,
        include_similarity: bool | None = None,
        include_sort_vector: bool | None = None,
        sort: SortType | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> AsyncTableFindCursor[ROW, ROW2]:
        """
        Find rows on the table matching the provided filters
        and according to sorting criteria including vector similarity.

        The returned AsyncTableFindCursor object, representing the stream of results,
        can be iterated over, or consumed and manipulated in several other ways
        (see the examples below and the `TableFindCursor` documentation for details).
        Since the amount of returned items can be large, TableFindCursor is a lazy
        object, that fetches new data while it is being read using the Data API
        pagination mechanism.

        Invoking `.to_list()` on a TableFindCursor will cause it to consume all
        rows and materialize the entire result set as a list. This is not recommended
        if the amount of results is very large.

        Args:
            filter: a dictionary expressing which condition the returned rows
                must satisfy. The filter can use operators, such as "$eq" for equality,
                and require columns to compare with literal values. Simple examples
                are `{}` (zero filter, not recommended for large tables),
                `{"match_no": 123}` (a shorthand for `{"match_no": {"$eq": 123}}`,
                or `{"match_no": 123, "round": "C"}` (multiple conditions are
                implicitly combined with "$and").
                Please consult the Data API documentation for a more detailed
                explanation of table search filters and tips on their usage.
            projection: a prescription on which columns to return for the matching rows.
                The projection can take the form `{"column1": True, "column2": True}`.
                `{"*": True}` (i.e. return the whole row), or the complementary
                form that excludes columns: `{"column1": False, "column2": False}`.
                To optimize bandwidth usage, it is recommended to use a projection,
                especially to avoid unnecessary columns of type vector with
                high-dimensional embeddings.
            row_type: this parameter acts a formal specifier for the type checker.
                If omitted, the resulting cursor is implicitly an
                `AsyncTableFindCursor[ROW, ROW]`, i.e. maintains the same type for
                the items it returns as that for the rows in the table. Strictly
                typed code may want to specify this parameter especially when a
                projection is given.
            skip: if provided, it is a number of rows that would be obtained first
                in the response and are instead skipped.
            limit: a maximum amount of rows to get from the table. The returned cursor
                will stop yielding rows when either this number is reached or there
                really are no more matches in the table.
            include_similarity: a boolean to request the numeric value of the
                similarity to be returned as an added "$similarity" key in each returned
                row. It can be used meaningfully only in a vector search (see `sort`).
            include_sort_vector: a boolean to request the search query vector.
                If set to True (and if the search is a vector search), calling
                the `get_sort_vector` method on the returned cursor will yield
                the vector used for the ANN search.
            sort: this dictionary parameter controls the order in which the rows
                are returned. The sort parameter can express either a vector search or
                a regular (ascending/descending, even hierarchical) sorting.
                * For a vector search the parameter takes the form
                `{"vector_column": qv}`, with the query vector `qv` of the appropriate
                type (list of floats or DataAPIVector). If the table has automatic
                embedding generation ("vectorize") enabled on that column, the form
                `{"vectorize_enabled_column": "query text"}` is also valid.
                * In the case of non-vector sorting, the parameter specifies the
                column(s) and the ascending/descending ordering required.
                If multiple columns are provided, the sorting applies them
                hierarchically to the rows. Examples are `{"score": SortMode.ASCENDING}`
                (equivalently `{"score": +1}`), `{"score": +1, "when": -1}`.
                Note that, depending on the column(s) chosen for sorting, the table
                partitioning structure, and the presence of indexes, the sorting
                may be done in-memory by the API. In that case, there may be performance
                implications and limitations on the amount of items returned.
                Consult the Data API documentation for more details on this topic.
            request_timeout_ms: a timeout, in milliseconds, to impose on each
                individual HTTP request to the Data API to accomplish the operation.
                If not provided, this object's defaults apply.
            timeout_ms: an alias for `request_timeout_ms`.

        Returns:
            a AsyncTableFindCursor object, that can be iterated over (and manipulated
            in several ways). The cursor, if needed, handles pagination under the hood
            as the rows are consumed.

        Note:
            As the rows are retrieved in chunks progressively, while the cursor
            is being iterated over, it is possible that the actual results
            obtained will reflect changes occurring to the table contents in
            real time.

        Examples:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> # Iterate over results:
            >>> async def loop1():
            ...     async for row in my_async_table.find({"match_id": "challenge6"}):
            ...         print(f"(R:{row['round']}): winner {row['winner']}")
            ...
            >>> asyncio.run(loop1())
            (R:1): winner Donna
            (R:2): winner Erick
            (R:3): winner Fiona
            >>>
            >>> # Optimize bandwidth using a projection:
            >>> proj = {"round": True, "winner": True}
            >>> async def loop2():
            ...     async for row in my_async_table.find(
            ...           {"match_id": "challenge6"},
            ...           projection=proj,
            ...     ):
            ...         print(f"(R:{row['round']}): winner {row['winner']}")
            ...
            >>> asyncio.run(loop2())
            (R:1): winner Donna
            (R:2): winner Erick
            (R:3): winner Fiona
            >>>
            >>> # Filter on the partitioning:
            >>> asyncio.run(
            ...     my_async_table.find({"match_id": "challenge6"}).to_list()
            ... )
            [{'match_id': 'challenge6', 'round': 1, 'fighters': DataAPISet([]), ...
            >>>
            >>> # Filter on primary key:
            >>> asyncio.run(
            ...     my_async_table.find(
            ...         {"match_id": "challenge6", "round": 1}
            ...     ).to_list()
            ... )
            [{'match_id': 'challenge6', 'round': 1, 'fighters': DataAPISet([]), ...
            >>>
            >>> # Filter on a regular indexed column:
            >>> asyncio.run(my_async_table.find({"winner": "Caio Gozer"}).to_list())
            [{'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
            >>>
            >>> # Non-equality filter on a regular indexed column:
            >>> asyncio.run(my_async_table.find({"score": {"$gte": 15}}).to_list())
            [{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
            >>>
            >>> # Filter on a regular non-indexed column:
            >>> # (not recommended performance-wise)
            >>> asyncio.run(my_async_table.find(
            ...     {"when": {
            ...         "$gte": DataAPITimestamp.from_string("1999-12-31T01:23:44Z")
            ...     }}
            ... ).to_list())
            The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
            [{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
            >>>
            >>> # Empty filter (not recommended performance-wise):
            >>> asyncio.run(my_async_table.find({}).to_list())
            The Data API returned a warning: {'errorCode': 'ZERO_FILTER_OPERATIONS', ...
            [{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
            >>>
            >>> # Filter on the primary key and a regular non-indexed column:
            >>> # (not recommended performance-wise)
            >>> asyncio.run(my_async_table.find(
            ...     {"match_id": "fight5", "round": 3, "winner": "Caio Gozer"}
            ... ).to_list())
            The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
            [{'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
            >>>
            >>> # Filter on a regular non-indexed column (and incomplete primary key)
            >>> # (not recommended performance-wise)
            >>> asyncio.run(my_async_table.find(
            ...     {"round": 3, "winner": "Caio Gozer"}
            ... ).to_list())
            The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
            [{'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
            >>>
            >>> # Vector search with "sort" (on an appropriately-indexed vector column):
            >>> asyncio.run(my_async_table.find(
            ...     {},
            ...     sort={"m_vector": DataAPIVector([0.2, 0.3, 0.4])},
            ...     projection={"winner": True},
            ...     limit=3,
            ... ).to_list())
            [{'winner': 'Donna'}, {'winner': 'Victor'}]
            >>>
            >>> # Hybrid search with vector sort and non-vector filtering:
            >>> my_table.find(
            ...     {"match_id": "fight4"},
            ...     sort={"m_vector": DataAPIVector([0.2, 0.3, 0.4])},
            ...     projection={"winner": True},
            ... ).to_list()
            [{'winner': 'Victor'}]
            >>>
            >>> # Return the numeric value of the vector similarity
            >>> # (also demonstrating that one can pass a plain list for a vector):
            >>> asyncio.run(my_async_table.find(
            ...     {},
            ...     sort={"m_vector": [0.2, 0.3, 0.4]},
            ...     projection={"winner": True},
            ...     limit=3,
            ...     include_similarity=True,
            ... ).to_list())
            [{'winner': 'Donna', '$similarity': 0.515}, {'winner': 'Victor', ...
            >>>
            >>> # Non-vector sorting on a 'partitionSort' column:
            >>> asyncio.run(my_async_table.find(
            ...     {"match_id": "fight5"},
            ...     sort={"round": SortMode.DESCENDING},
            ...     projection={"winner": True},
            ... ).to_list())
            [{'winner': 'Caio Gozer'}, {'winner': 'Betta Vigo'}, ...
            >>>
            >>> # Using `skip` and `limit`:
            >>> asyncio.run(my_async_table.find(
            ...     {"match_id": "fight5"},
            ...     sort={"round": SortMode.DESCENDING},
            ...     projection={"winner": True},
            ...     skip=1,
            ...     limit=2,
            ... ).to_list())
            The Data API returned a warning: {'errorCode': 'IN_MEMORY_SORTING...
            [{'winner': 'Betta Vigo'}, {'winner': 'Adam Zuul'}]
            >>>
            >>> # Non-vector sorting on a regular column:
            >>> # (not recommended performance-wise)
            >>> asyncio.run(my_async_table.find(
            ...     {"match_id": "fight5"},
            ...     sort={"winner": SortMode.ASCENDING},
            ...     projection={"winner": True},
            ... ).to_list())
            The Data API returned a warning: {'errorCode': 'IN_MEMORY_SORTING...
            [{'winner': 'Adam Zuul'}, {'winner': 'Betta Vigo'}, ...
            >>>
            >>> # Using `.map()` on a cursor:
            >>> winner_cursor = my_async_table.find(
            ...     {"match_id": "fight5"},
            ...     sort={"round": SortMode.DESCENDING},
            ...     projection={"winner": True},
            ...     limit=5,
            ... )
            >>> print("/".join(asyncio.run(
            ...     winner_cursor.map(lambda row: row["winner"].upper()).to_list())
            ... ))
            CAIO GOZER/BETTA VIGO/ADAM ZUUL
            >>>
            >>> # Some other examples of cursor manipulation
            >>> matches_async_cursor = my_async_table.find(
            ...     sort={"m_vector": DataAPIVector([-0.1, 0.15, 0.3])}
            ... )
            >>> asyncio.run(matches_async_cursor.has_next())
            True
            >>> asyncio.run(matches_async_cursor.__anext__())
            {'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
            >>> matches_async_cursor.consumed
            1
            >>> matches_async_cursor.rewind()
            >>> matches_async_cursor.consumed
            0
            >>> asyncio.run(matches_async_cursor.has_next())
            True
            >>> matches_async_cursor.close()
            >>>
            >>> async def try_consume():
            ...     try:
            ...         await matches_async_cursor.__anext__()
            ...     except StopAsyncIteration:
            ...         print("StopAsyncIteration triggered.")
            ...
            >>> asyncio.run(try_consume())
            StopAsyncIteration triggered.
        """

        # lazy-import here to avoid circular import issues
        from astrapy.cursors import AsyncTableFindCursor

        _request_timeout_ms, _rt_label = _first_valid_timeout(
            (request_timeout_ms, "request_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
        )
        return (
            AsyncTableFindCursor(
                table=self,
                request_timeout_ms=_request_timeout_ms,
                overall_timeout_ms=None,
                request_timeout_label=_rt_label,
            )
            .filter(filter)
            .project(projection)
            .skip(skip)
            .limit(limit)
            .sort(sort)
            .include_similarity(include_similarity)
            .include_sort_vector(include_sort_vector)
        )

    async def find_one(
        self,
        filter: FilterType | None = None,
        *,
        projection: ProjectionType | None = None,
        include_similarity: bool | None = None,
        sort: SortType | None = None,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> ROW | None:
        """
        Run a search according to the given filtering and sorting criteria
        and return the top row matching it, or nothing if there are none.

        The parameters are analogous to some of the parameters to the `find` method
        (which has a few more that do not make sense in this case, such as `limit`).

        Args:
            filter: a dictionary expressing which condition the returned row
                must satisfy. The filter can use operators, such as "$eq" for equality,
                and require columns to compare with literal values. Simple examples
                are `{}` (zero filter), `{"match_no": 123}` (a shorthand for
                `{"match_no": {"$eq": 123}}`, or `{"match_no": 123, "round": "C"}`
                (multiple conditions are implicitly combined with "$and").
                Please consult the Data API documentation for a more detailed
                explanation of table search filters and tips on their usage.
            projection: a prescription on which columns to return for the matching row.
                The projection can take the form `{"column1": True, "column2": True}`.
                `{"*": True}` (i.e. return the whole row), or the complementary
                form that excludes columns: `{"column1": False, "column2": False}`.
                To optimize bandwidth usage, it is recommended to use a projection,
                especially to avoid unnecessary columns of type vector with
                high-dimensional embeddings.
            include_similarity: a boolean to request the numeric value of the
                similarity to be returned as an added "$similarity" key in the returned
                row. It can be used meaningfully only in a vector search (see `sort`).
            sort: this dictionary parameter controls the sorting order, hence determines
                which row is being returned.
                The sort parameter can express either a vector search or
                a regular (ascending/descending, even hierarchical) sorting.
                * For a vector search the parameter takes the form
                `{"vector_column": qv}`, with the query vector `qv` of the appropriate
                type (list of floats or DataAPIVector). If the table has automatic
                embedding generation ("vectorize") enabled on that column, the form
                `{"vectorize_enabled_column": "query text"}` is also valid.
                * In the case of non-vector sorting, the parameter specifies the
                column(s) and the ascending/descending ordering required.
                If multiple columns are provided, the sorting applies them
                hierarchically to the rows. Examples are `{"score": SortMode.ASCENDING}`
                (equivalently `{"score": +1}`), `{"score": +1, "when": -1}`.
                Note that, depending on the column(s) chosen for sorting, the table
                partitioning structure, and the presence of indexes, the sorting
                may be done in-memory by the API. In that case, there may be performance
                implications.
                Consult the Data API documentation for more details on this topic.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a dictionary expressing the result if a row is found, otherwise None.

        Examples:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> from astrapy.constants import SortMode
            >>> from astrapy.data_types import DataAPITimestamp, DataAPIVector
            >>>
            >>> # Filter on the partitioning:
            >>> asyncio.run(my_async_table.find_one({"match_id": "challenge6"}))
            {'match_id': 'challenge6', 'round': 1, 'fighters': DataAPISet([]), ...
            >>>
            >>> # A find with no matches:
            >>> str(asyncio.run(my_async_table.find_one({"match_id": "not_real"})))
            'None'
            >>>
            >>> # Optimize bandwidth using a projection:
            >>> asyncio.run(my_async_table.find_one(
            ...     {"match_id": "challenge6"},
            ...     projection={"round": True, "winner": True},
            ... ))
            {'round': 1, 'winner': 'Donna'}
            >>>
            >>> # Filter on primary key:
            >>> asyncio.run(
            ...     my_async_table.find_one({"match_id": "challenge6", "round": 1})
            ... )
            {'match_id': 'challenge6', 'round': 1, 'fighters': DataAPISet([]), ...
            >>>
            >>> # Filter on a regular indexed column:
            >>> asyncio.run(my_async_table.find_one({"winner": "Caio Gozer"}))
            {'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
            >>>
            >>> # Non-equality filter on a regular indexed column:
            >>> asyncio.run(my_async_table.find_one({"score": {"$gte": 15}}))
            {'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
            >>>
            >>> # Filter on a regular non-indexed column:
            >>> # (not recommended performance-wise)
            >>> asyncio.run(my_async_table.find_one(
            ...     {"when": {
            ...         "$gte": DataAPITimestamp.from_string("1999-12-31T01:23:44Z")
            ...     }}
            ... ))
            The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
            {'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
            >>>
            >>> # Empty filter:
            >>> asyncio.run(my_async_table.find_one({}))
            The Data API returned a warning: {'errorCode': 'ZERO_FILTER_OPERATIONS', ...
            {'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
            >>>
            >>> # Filter on the primary key and a regular non-indexed column:
            >>> # (not recommended performance-wise)
            >>> asyncio.run(my_async_table.find_one(
            ...     {"match_id": "fight5", "round": 3, "winner": "Caio Gozer"}
            ... ))
            The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
            {'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
            >>>
            >>> # Filter on a regular non-indexed column (and incomplete primary key)
            >>> # (not recommended performance-wise)
            >>> asyncio.run(
            ...     my_async_table.find_one({"round": 3, "winner": "Caio Gozer"})
            ... )
            The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
            {'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
            >>>
            >>> # Vector search with "sort" (on an appropriately-indexed vector column):
            >>> asyncio.run(my_async_table.find_one(
            ...     {},
            ...     sort={"m_vector": DataAPIVector([0.2, 0.3, 0.4])},
            ...     projection={"winner": True},
            ... ))
            {'winner': 'Donna'}
            >>>
            >>> # Hybrid search with vector sort and non-vector filtering:
            >>> asyncio.run(my_table.find_one(
            ...     {"match_id": "fight4"},
            ...     sort={"m_vector": DataAPIVector([0.2, 0.3, 0.4])},
            ...     projection={"winner": True},
            ... ))
            {'winner': 'Victor'}
            >>>
            >>> # Return the numeric value of the vector similarity
            >>> # (also demonstrating that one can pass a plain list for a vector):
            >>> asyncio.run(my_async_table.find_one(
            ...     {},
            ...     sort={"m_vector": [0.2, 0.3, 0.4]},
            ...     projection={"winner": True},
            ...     include_similarity=True,
            ... ))
            {'winner': 'Donna', '$similarity': 0.515}
            >>>
            >>> # Non-vector sorting on a 'partitionSort' column:
            >>> asyncio.run(my_async_table.find_one(
            ...     {"match_id": "fight5"},
            ...     sort={"round": SortMode.DESCENDING},
            ...     projection={"winner": True},
            ... ))
            {'winner': 'Caio Gozer'}
            >>>
            >>> # Non-vector sorting on a regular column:
            >>> # (not recommended performance-wise)
            >>> asyncio.run(my_async_table.find_one(
            ...     {"match_id": "fight5"},
            ...     sort={"winner": SortMode.ASCENDING},
            ...     projection={"winner": True},
            ... ))
            The Data API returned a warning: {'errorCode': 'IN_MEMORY_SORTING...
            {'winner': 'Adam Zuul'}
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        fo_options = (
            None
            if include_similarity is None
            else {"includeSimilarity": include_similarity}
        )
        fo_payload = self._converter_agent.preprocess_payload(
            {
                "findOne": {
                    k: v
                    for k, v in {
                        "filter": filter,
                        "projection": normalize_optional_projection(projection),
                        "options": fo_options,
                        "sort": sort,
                    }.items()
                    if v is not None
                }
            },
            map2tuple_checker=None,
        )
        fo_response = await self._api_commander.async_request(
            payload=fo_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        if "document" not in (fo_response.get("data") or {}):
            raise UnexpectedDataAPIResponseException(
                text="Response from findOne API command missing 'document'.",
                raw_response=fo_response,
            )
        if "projectionSchema" not in (fo_response.get("status") or {}):
            raise UnexpectedDataAPIResponseException(
                text="Response from findOne API command missing 'projectionSchema'.",
                raw_response=fo_response,
            )
        doc_response = fo_response["data"]["document"]
        if doc_response is None:
            return None
        return self._converter_agent.postprocess_row(
            fo_response["data"]["document"],
            columns_dict=fo_response["status"]["projectionSchema"],
            similarity_pseudocolumn="$similarity" if include_similarity else None,
        )

    async def distinct(
        self,
        key: str | Iterable[str | int],
        *,
        filter: FilterType | None = None,
        request_timeout_ms: int | None = None,
        general_method_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> list[Any]:
        """
        Return a list of the unique values of `key` across the rows
        in the table that match the provided filter.

        Args:
            key: the name of the field whose value is inspected across rows.
                Keys can be just column names (as is typically the case), but
                the dot-notation is also accepted to mean subkeys or indices
                within lists (for example, "map_column.subkey" or "list_column.2").
                If a column has literal dots or ampersands in its name, this
                parameter must be escaped to be treated properly.
                The key can also be a list of strings and numbers, in which case
                no escape is necessary: each item in the list is a field name/index,
                for example ["map_column", "subkey"] or ["list_column", 2].
                For set and list columns, individual entries are "unrolled"
                automatically.
            filter: a dictionary expressing which condition the inspected rows
                must satisfy. The filter can use operators, such as "$eq" for equality,
                and require columns to compare with literal values. Simple examples
                are `{}` (zero filter), `{"match_no": 123}` (a shorthand for
                `{"match_no": {"$eq": 123}}`, or `{"match_no": 123, "round": "C"}`
                (multiple conditions are implicitly combined with "$and").
                Please consult the Data API documentation for a more detailed
                explanation of table search filters and tips on their usage.
            general_method_timeout_ms: a timeout, in milliseconds, for the whole
                requested operation (which may involve multiple API requests).
                This method, being based on `find` (see) may entail successive HTTP API
                requests, depending on the amount of involved rows.
                If not provided, this object's defaults apply.
            request_timeout_ms: a timeout, in milliseconds, for each API request.
                If not provided, this object's defaults apply.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a list of all different values for `key` found across the rows
            that match the filter. The result list has no repeated items.

        Examples:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> asyncio.run(my_async_table.distinct(
            ...     "winner",
            ...     filter={"match_id": "challenge6"},
            ... ))
            ['Donna', 'Erick', 'Fiona']
            >>>
            >>> # distinct values across the whole table:
            >>> # (not recommended performance-wise)
            >>> asyncio.run(my_async_table.distinct("winner"))
            The Data API returned a warning: {'errorCode': 'ZERO_FILTER_OPERATIONS', ...
            ['Victor', 'Adam Zuul', 'Betta Vigo', 'Caio Gozer', 'Donna', 'Erick', ...
            >>>
            >>> # Over a column containing null values
            >>> # (also with composite filter):
            >>> asyncio.run(my_async_table.distinct(
            ...     "score",
            ...     filter={"match_id": {"$in": ["fight4", "tournamentA"]}},
            ... ))
            [18, None]
            >>>
            >>> # distinct over a set column (automatically "unrolled"):
            >>> asyncio.run(my_async_table.distinct(
            ...     "fighters",
            ...     filter={"match_id": {"$in": ["fight4", "tournamentA"]}},
            ... ))
            [UUID('0193539a-2770-8c09-a32a-111111111111'), UUID('019353e3-00b4-...

        Note:
            It must be kept in mind that `distinct` is a client-side operation,
            which effectively browses all required rows using the logic
            of the `find` method and collects the unique values found for `key`.
            As such, there may be performance, latency and ultimately
            billing implications if the amount of matching rows is large.

        Note:
            For details on the behaviour of "distinct" in conjunction with
            real-time changes in the table contents, see the
            Note of the `find` command.
        """

        # lazy-import here to avoid circular import issues
        from astrapy.cursors import AsyncTableFindCursor

        _general_method_timeout_ms, _gmt_label = _first_valid_timeout(
            (general_method_timeout_ms, "general_method_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (
                self.api_options.timeout_options.general_method_timeout_ms,
                "general_method_timeout_ms",
            ),
        )
        _request_timeout_ms, _rt_label = _first_valid_timeout(
            (request_timeout_ms, "request_timeout_ms"),
            (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
        )
        # preparing cursor:
        _extractor = _create_document_key_extractor(key)
        _key = _reduce_distinct_key_to_shallow_safe(key)
        # relaxing the type hint (limited to within this method body)
        f_cursor: AsyncTableFindCursor[dict[str, Any], dict[str, Any]] = (
            AsyncTableFindCursor(
                table=self,
                request_timeout_ms=_request_timeout_ms,
                overall_timeout_ms=_general_method_timeout_ms,
                request_timeout_label=_rt_label,
                overall_timeout_label=_gmt_label,
            )  # type: ignore[assignment]
            .filter(filter)
            .project({_key: True})
        )
        # consuming it:
        _item_hashes = set()
        distinct_items: list[Any] = []
        logger.info(f"running distinct() on '{self.name}'")
        async for document in f_cursor:
            for item in _extractor(document):
                _item_hash = _hash_table_document(
                    item, options=self.api_options.serdes_options
                )
                if _item_hash not in _item_hashes:
                    _item_hashes.add(_item_hash)
                    distinct_items.append(item)
        logger.info(f"finished running distinct() on '{self.name}'")
        return distinct_items

    async def count_documents(
        self,
        filter: FilterType,
        *,
        upper_bound: int,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> int:
        """
        Count the row in the table matching the specified filter.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"name": "John", "age": 59}
                    {"$and": [{"name": {"$eq": "John"}}, {"age": {"$gt": 58}}]}
                See the Data API documentation for the full set of operators.
            upper_bound: a required ceiling on the result of the count operation.
                If the actual number of rows exceeds this value,
                an exception will be raised.
                Furthermore, if the actual number of rows exceeds the maximum
                count that the Data API can reach (regardless of upper_bound),
                an exception will be raised.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            the exact count of matching rows.

        Examples:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> asyncio.run(my_async_table.insert_many([{"seq": i} for i in range(20)]))
            TableInsertManyResult(...)
            >>> asyncio.run(my_async_table.count_documents({}, upper_bound=100))
            20
            >>> asyncio.run(my_async_table.count_documents({"seq":{"$gt": 15}}, upper_bound=100))
            4
            >>> asyncio.run(my_async_table.count_documents({}, upper_bound=10))
            Traceback (most recent call last):
                ... ...
            astrapy.exceptions.TooManyRowsToCountException

        Note:
            Count operations are expensive: for this reason, the best practice
            is to provide a reasonable `upper_bound` according to the caller
            expectations. Moreover, indiscriminate usage of count operations
            for sizeable amounts of rows (i.e. in the thousands and more)
            is discouraged in favor of alternative application-specific solutions.
            Keep in mind that the Data API has a hard upper limit on the amount
            of rows it will count, and that an exception will be thrown
            by this method if this limit is encountered.
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        cd_payload = {"countDocuments": {"filter": filter}}
        logger.info(f"countDocuments on '{self.name}'")
        cd_response = await self._api_commander.async_request(
            payload=cd_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished countDocuments on '{self.name}'")
        if "count" in cd_response.get("status", {}):
            count: int = cd_response["status"]["count"]
            if cd_response["status"].get("moreData", False):
                raise TooManyRowsToCountException(
                    text=f"Document count exceeds {count}, the maximum allowed by the server",
                    server_max_count_exceeded=True,
                )
            else:
                if count > upper_bound:
                    raise TooManyRowsToCountException(
                        text="Document count exceeds required upper bound",
                        server_max_count_exceeded=False,
                    )
                else:
                    return count
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from countDocuments API command.",
                raw_response=cd_response,
            )

    async def estimated_document_count(
        self,
        *,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> int:
        """
        Query the API server for an estimate of the document count in the table.

        Contrary to `count_documents`, this method has no filtering parameters.

        Args:
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a server-provided estimate count of the documents in the table.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> asyncio.run(my_async_table.estimated_document_count())
            5820
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        ed_payload: dict[str, Any] = {"estimatedDocumentCount": {}}
        logger.info(f"estimatedDocumentCount on '{self.name}'")
        ed_response = await self._api_commander.async_request(
            payload=ed_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished estimatedDocumentCount on '{self.name}'")
        if "count" in ed_response.get("status", {}):
            count: int = ed_response["status"]["count"]
            return count
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from estimatedDocumentCount API command.",
                raw_response=ed_response,
            )

    async def update_one(
        self,
        filter: FilterType,
        update: dict[str, Any],
        *,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Update a single document on the table, changing some or all of the columns,
        with the implicit behaviour of inserting a new row if no match is found.

        Args:
            filter: a predicate expressing the table primary key in full,
                i.e. a dictionary defining values for all columns that form the
                primary key. An example may be `{"match_id": "fight4", "round": 1}`.
            update: the update prescription to apply to the row, expressed
                as a dictionary conforming to the Data API syntax. The update
                operators for tables are `$set` and `$unset` (in particular,
                setting a column to None has the same effect as the $unset operator).
                Examples are `{"$set": {"round": 12}}` and
                `{"$unset": {"winner": "", "score": ""}}`.
                Note that the update operation cannot alter the primary key columns.
                See the Data API documentation for more details.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Examples:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> from astrapy.data_types import DataAPISet
            >>>
            >>> # Set a new value for a column
            >>> await my_async_table.update_one(
            ...     {"match_id": "fight4", "round": 1},
            ...     update={"$set": {"winner": "Winona"}},
            ... )
            >>>
            >>> # Set a new value for a column while unsetting another colum
            >>> await my_async_table.update_one(
            ...     {"match_id": "fight4", "round": 1},
            ...     update={"$set": {"winner": None, "score": 24}},
            ... )
            >>>
            >>> # Set a 'set' column to empty
            >>> await my_async_table.update_one(
            ...     {"match_id": "fight4", "round": 1},
            ...     update={"$set": {"fighters": DataAPISet()}},
            ... )
            >>>
            >>> # Set a 'set' column to empty using None
            >>> await my_async_table.update_one(
            ...     {"match_id": "fight4", "round": 1},
            ...     update={"$set": {"fighters": None}},
            ... )
            >>>
            >>> # Set a 'set' column to empty using a regular (empty) set
            >>> await my_async_table.update_one(
            ...     {"match_id": "fight4", "round": 1},
            ...     update={"$set": {"fighters": set()}},
            ... )
            >>>
            >>> # Set a 'set' column to empty using $unset
            >>> await my_async_table.update_one(
            ...     {"match_id": "fight4", "round": 1},
            ...     update={"$unset": {"fighters": None}},
            ... )
            >>>
            >>> # A non-existing primary key creates a new row
            >>> await my_async_table.update_one(
            ...     {"match_id": "bar_fight", "round": 4},
            ...     update={"$set": {"score": 8, "winner": "Jack"}},
            ... )
            >>>
            >>> # Delete column values for a row (they'll read as None now)
            >>> await my_async_table.update_one(
            ...     {"match_id": "challenge6", "round": 2},
            ...     update={"$unset": {"winner": None, "score": None}},
            ... )

        Note:
            a row created entirely with update operations (as opposed to insertions)
            may, correspondingly, be deleted by means of an $unset update on all columns.
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        uo_payload = self._converter_agent.preprocess_payload(
            {
                "updateOne": {
                    k: v
                    for k, v in {
                        "filter": filter,
                        "update": update,
                    }.items()
                    if v is not None
                }
            },
            map2tuple_checker=map2tuple_checker_update_one,
        )
        logger.info(f"updateOne on '{self.name}'")
        uo_response = await self._api_commander.async_request(
            payload=uo_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished updateOne on '{self.name}'")
        if "status" in uo_response:
            # the contents are disregarded and the method just returns:
            return
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from updateOne API command.",
                raw_response=uo_response,
            )

    async def delete_one(
        self,
        filter: FilterType,
        *,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Delete a row, matching the provided value of the primary key.
        If no row is found with that primary key, the method does nothing.

        Args:
            filter: a predicate expressing the table primary key in full,
                i.e. a dictionary defining values for all columns that form the
                primary key. A row (at most one) is deleted if it matches that primary
                key. An example filter may be `{"match_id": "fight4", "round": 1}`.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Examples:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> # Count the rows matching a certain filter
            >>> len(asyncio.run(my_async_table.find({"match_id": "fight7"}).to_list()))
            3
            >>>
            >>> # Delete a row belonging to the group
            >>> asyncio.run(
            ...     my_async_table.delete_one({"match_id": "fight7", "round": 2})
            ... )
            >>>
            >>> # Count again
            >>> len(asyncio.run(my_async_table.find({"match_id": "fight7"}).to_list()))
            2
            >>>
            >>> # Attempt the delete again (nothing to delete)
            >>> asyncio.run(
            ...     my_async_table.delete_one({"match_id": "fight7", "round": 2})
            ... )
            >>>
            >>> # The count is unchanged
            >>> len(asyncio.run(my_async_table.find({"match_id": "fight7"}).to_list()))
            2
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        do_payload = self._converter_agent.preprocess_payload(
            {
                "deleteOne": {
                    k: v
                    for k, v in {
                        "filter": filter,
                    }.items()
                    if v is not None
                }
            },
            map2tuple_checker=None,
        )
        logger.info(f"deleteOne on '{self.name}'")
        do_response = await self._api_commander.async_request(
            payload=do_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished deleteOne on '{self.name}'")
        if do_response.get("status", {}).get("deletedCount") == -1:
            return
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from deleteOne API command.",
                raw_response=do_response,
            )

    async def delete_many(
        self,
        filter: FilterType,
        *,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Delete all rows matching a provided filter condition.
        This operation can target from a single row to the entirety of the table.

        Args:
            filter: a filter dictionary to specify which row(s) must be deleted.
                1. If the filter is in the form `{"pk1": val1, "pk2": val2 ...}`
                and specified the primary key in full, at most one row is deleted,
                the one with that primary key.
                2. If the table has "partitionSort" columns, some or all of them
                may be left out (the least significant of them can also employ
                an inequality, or range, predicate): a range of rows, but always
                within a single partition, will be deleted.
                3. If an empty filter, `{}`, is passed, this operation empties
                the table completely. *USE WITH CARE*.
                4. Other kinds of filtering clauses are forbidden.
                In the following examples, the table is partitioned
                by columns ["pa1", "pa2"] and has partitionSort "ps1" and "ps2" in that
                order.
                Valid filter examples:
                - `{"pa1": x, "pa2": y, "ps1": z, "ps2": t}`: deletes one row
                - `{"pa1": x, "pa2": y, "ps1": z}`: deletes multiple rows
                - `{"pa1": x, "pa2": y, "ps1": z, "ps2": {"$lt": q}}`: del. multiple rows
                - `{"pa1": x, "pa2": y}`: deletes all rows in the partition
                - `{}`: empties the table (*CAUTION*)
                Invalid filter examples:
                - `{"pa1": x}`: incomplete partition key
                - `{"pa1": x, "ps1" z}`: incomplete partition key (whatever is added)
                - `{"pa1": x, "pa2": y, "ps1": {"$lt": r}, "ps2": t}`: inequality on
                  a non-least-significant partitionSort column provided.
                - `{"pa1": x, "pa2": y, "ps2": t}`: cannot skip "ps1"
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Examples:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> # Delete a single row (full primary key specified):
            >>> await my_async_table.delete_many({"match_id": "fight4", "round": 1})
            >>>
            >>> # Delete part of a partition (inequality on the
            >>> # last-mentioned 'partitionSort' column):
            >>> await my_async_table.delete_many({"match_id": "fight5", "round": {"$gte": 5}})
            >>>
            >>> # Delete a whole partition (leave 'partitionSort' unspecified):
            >>> await my_async_table.delete_many({"match_id": "fight7"})
            >>>
            >>> # empty the table entirely with empty filter (*CAUTION*):
            >>> await my_async_table.delete_many({})
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        dm_payload = self._converter_agent.preprocess_payload(
            {
                "deleteMany": {
                    k: v
                    for k, v in {
                        "filter": filter,
                    }.items()
                    if v is not None
                }
            },
            map2tuple_checker=None,
        )
        logger.info(f"deleteMany on '{self.name}'")
        dm_response = await self._api_commander.async_request(
            payload=dm_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished deleteMany on '{self.name}'")
        if dm_response.get("status", {}).get("deletedCount") == -1:
            return
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from deleteMany API command.",
                raw_response=dm_response,
            )

    async def drop(
        self,
        *,
        if_exists: bool | None = None,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> dict[str, Any]:
        """
        Drop the table, i.e. delete it from the database along with
        all the rows stored therein.

        Args:
            if_exists: if passed as True, trying to drop a non-existing table
                will not error, just silently do nothing instead. If not provided,
                the API default behaviour will hold.
            table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `table_admin_timeout_ms`.
            timeout_ms: an alias for `table_admin_timeout_ms`.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> # List tables:
            >>> asyncio.run(my_async_table.database.list_table_names())
            ['games']
            >>>
            >>> # Drop this table:
            >>> asyncio.run(my_table.drop())
            >>>
            >>> # List tables again:
            >>> asyncio.run(my_table.database.list_table_names())
            []
            >>>
            >>> # Try working on the table now:
            >>> from astrapy.exceptions import DataAPIResponseException
            >>>
            >>> async def try_use_table():
            ...     try:
            ...         my_table.find_one({})
            ...     except DataAPIResponseException as err:
            ...         print(str(err))
            ...
            >>> asyncio.run(try_use_table())
            Collection does not exist [...] (COLLECTION_NOT_EXIST)

        Note:
            Use with caution.

        Note:
            Once the method succeeds, methods on this object can still be invoked:
            however, this hardly makes sense as the underlying actual table
            is no more.
            It is responsibility of the developer to design a correct flow
            which avoids using a deceased collection any further.
        """

        logger.info(f"dropping table '{self.name}' (self)")
        drop_result = await self.database.drop_table(
            self.name,
            if_exists=if_exists,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info(f"finished dropping table '{self.name}' (self)")
        return drop_result

    async def command(
        self,
        body: dict[str, Any] | None,
        *,
        raise_api_errors: bool = True,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> dict[str, Any]:
        """
        Send a POST request to the Data API for this table with
        an arbitrary, caller-provided payload.
        No transformations or type conversions are made on the provided payload.

        Args:
            body: a JSON-serializable dictionary, the payload of the request.
            raise_api_errors: if True, responses with a nonempty 'errors' field
                result in an astrapy exception being raised.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a dictionary with the response of the HTTP request.

        Example:
            >>> # NOTE: may require slight adaptation to an async context.
            >>>
            >>> asyncio.run(my_async_table.command({
            ...     "findOne": {
            ...         "filter": {"match_id": "fight4"},
            ...         "projection": {"winner": True},
            ...     }
            ... }))
            {'data': {'document': {'winner': 'Victor'}}, 'status': ...  # shortened
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        _cmd_desc: str
        if body:
            _cmd_desc = ",".join(sorted(body.keys()))
        else:
            _cmd_desc = "(none)"
        logger.info(f"command={_cmd_desc} on '{self.name}'")
        command_result = await self._api_commander.async_request(
            payload=body,
            raise_api_errors=raise_api_errors,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished command={_cmd_desc} on '{self.name}'")
        return command_result

Ancestors

typing.Generic

Instance variables

var database : AsyncDatabase

a Database object, the database this table belongs to.

Example

>>> my_async_table.database.name
'the_db'

Expand source code

@property
def database(self) -> AsyncDatabase:
    """
    a Database object, the database this table belongs to.

    Example:
        >>> my_async_table.database.name
        'the_db'
    """

    return self._database

var full_name : str

The fully-qualified table name within the database, in the form "keyspace.table_name".

Example

>>> my_async_table.full_name
'default_keyspace.my_table'

Expand source code

@property
def full_name(self) -> str:
    """
    The fully-qualified table name within the database,
    in the form "keyspace.table_name".

    Example:
        >>> my_async_table.full_name
        'default_keyspace.my_table'
    """

    return f"{self.keyspace}.{self.name}"

var keyspace : str

The keyspace this table is in.

Example

>>> my_async_table.keyspace
'default_keyspace'

Expand source code

@property
def keyspace(self) -> str:
    """
    The keyspace this table is in.

    Example:
        >>> my_async_table.keyspace
        'default_keyspace'
    """

    _keyspace = self.database.keyspace
    if _keyspace is None:
        raise RuntimeError("The table's DB is set with keyspace=None")
    return _keyspace

var name : str

The name of this table.

Example

>>> my_async_table.name
'my_table'

Expand source code

@property
def name(self) -> str:
    """
    The name of this table.

    Example:
        >>> my_async_table.name
        'my_table'
    """

    return self._name

Methods

async def alter(self, operation: AlterTableOperation | dict[str, Any], *, row_type: type[Any] = typing.Dict[str, typing.Any], table_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> AsyncTable[~NEW_ROW]

Executes one of the available alter-table operations on this table, such as adding/dropping columns.

This is a blocking operation: the method returns once the index is created and ready to use.

Args

operation: an instance of one of the astrapy.info.AlterTable* classes, representing which alter operation to perform and the details thereof. A regular dictionary can also be provided, but then it must have the alter operation name at its top level: {"add": {"columns": …}}.
row_type: this parameter acts a formal specifier for the type checker. If omitted, the resulting AsyncTable is implicitly an AsyncTable[dict[str, Any]]. If provided, it must match the type hint specified in the assignment. See the examples below.
table_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for table_admin_timeout_ms.
timeout_ms: an alias for table_admin_timeout_ms.

Examples

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> from astrapy.info import (
...     AlterTableAddColumns,
...     AlterTableAddVectorize,
...     AlterTableDropColumns,
...     AlterTableDropVectorize,
...     ColumnType,
...     TableScalarColumnTypeDescriptor,
...     VectorServiceOptions,
... )
>>>
>>> # Add a column
>>> new_table_1 = await my_table.alter(
...     AlterTableAddColumns(
...         columns={
...             "tie_break": TableScalarColumnTypeDescriptor(
...                 column_type=ColumnType.BOOLEAN,
...             ),
...         }
...     )
... )
>>>
>>> # Drop a column
>>> new_table_2 = await new_table_1.alter(AlterTableDropColumns(
...     columns=["tie_break"]
... ))
>>>
>>> # Add vectorize to a (vector) column
>>> new_table_3 = await new_table_2.alter(
...     AlterTableAddVectorize(
...         columns={
...             "m_vector": VectorServiceOptions(
...                 provider="openai",
...                 model_name="text-embedding-3-small",
...                 authentication={
...                     "providerKey": "ASTRA_KMS_API_KEY_NAME",
...                 },
...             ),
...         }
...     )
... )
>>>
>>> # Drop vectorize from a (vector) column
>>> # (Also demonstrates type hint usage)
>>> from typing import TypedDict
>>> from astrapy import AsyncTable
>>> from astrapy.data_types import (
...     DataAPISet,
...     DataAPITimestamp,
...     DataAPIVector,
... )
>>> from astrapy.ids import UUID
>>>
>>> class MyMatch(TypedDict):
...     match_id: str
...     round: int
...     m_vector: DataAPIVector
...     score: int
...     when: DataAPITimestamp
...     winner: str
...     fighters: DataAPISet[UUID]
...
>>> new_table_4: AsyncTable[MyMatch] = await new_table_3.alter(
...     AlterTableDropVectorize(columns=["m_vector"]),
...     row_type=MyMatch,
... )

Expand source code

async def alter(
    self,
    operation: AlterTableOperation | dict[str, Any],
    *,
    row_type: type[Any] = DefaultRowType,
    table_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> AsyncTable[NEW_ROW]:
    """
    Executes one of the available alter-table operations on this table,
    such as adding/dropping columns.

    This is a blocking operation: the method returns once the index
    is created and ready to use.

    Args:
        operation: an instance of one of the `astrapy.info.AlterTable*` classes,
            representing which alter operation to perform and the details thereof.
            A regular dictionary can also be provided, but then it must have the
            alter operation name at its top level: {"add": {"columns": ...}}.
        row_type: this parameter acts a formal specifier for the type checker.
            If omitted, the resulting AsyncTable is implicitly
            an `AsyncTable[dict[str, Any]]`. If provided, it must match
            the type hint specified in the assignment.
            See the examples below.
        table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `table_admin_timeout_ms`.
        timeout_ms: an alias for `table_admin_timeout_ms`.

    Examples:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> from astrapy.info import (
        ...     AlterTableAddColumns,
        ...     AlterTableAddVectorize,
        ...     AlterTableDropColumns,
        ...     AlterTableDropVectorize,
        ...     ColumnType,
        ...     TableScalarColumnTypeDescriptor,
        ...     VectorServiceOptions,
        ... )
        >>>
        >>> # Add a column
        >>> new_table_1 = await my_table.alter(
        ...     AlterTableAddColumns(
        ...         columns={
        ...             "tie_break": TableScalarColumnTypeDescriptor(
        ...                 column_type=ColumnType.BOOLEAN,
        ...             ),
        ...         }
        ...     )
        ... )
        >>>
        >>> # Drop a column
        >>> new_table_2 = await new_table_1.alter(AlterTableDropColumns(
        ...     columns=["tie_break"]
        ... ))
        >>>
        >>> # Add vectorize to a (vector) column
        >>> new_table_3 = await new_table_2.alter(
        ...     AlterTableAddVectorize(
        ...         columns={
        ...             "m_vector": VectorServiceOptions(
        ...                 provider="openai",
        ...                 model_name="text-embedding-3-small",
        ...                 authentication={
        ...                     "providerKey": "ASTRA_KMS_API_KEY_NAME",
        ...                 },
        ...             ),
        ...         }
        ...     )
        ... )
        >>>
        >>> # Drop vectorize from a (vector) column
        >>> # (Also demonstrates type hint usage)
        >>> from typing import TypedDict
        >>> from astrapy import AsyncTable
        >>> from astrapy.data_types import (
        ...     DataAPISet,
        ...     DataAPITimestamp,
        ...     DataAPIVector,
        ... )
        >>> from astrapy.ids import UUID
        >>>
        >>> class MyMatch(TypedDict):
        ...     match_id: str
        ...     round: int
        ...     m_vector: DataAPIVector
        ...     score: int
        ...     when: DataAPITimestamp
        ...     winner: str
        ...     fighters: DataAPISet[UUID]
        ...
        >>> new_table_4: AsyncTable[MyMatch] = await new_table_3.alter(
        ...     AlterTableDropVectorize(columns=["m_vector"]),
        ...     row_type=MyMatch,
        ... )
    """

    n_operation: AlterTableOperation
    if isinstance(operation, AlterTableOperation):
        n_operation = operation
    else:
        n_operation = AlterTableOperation.from_full_dict(operation)
    _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
        timeout_options=self.api_options.timeout_options,
        table_admin_timeout_ms=table_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    at_operation_name = n_operation._name
    at_payload = {
        "alterTable": {
            "operation": {
                at_operation_name: n_operation.as_dict(),
            },
        },
    }
    logger.info(f"alterTable({at_operation_name})")
    at_response = await self._api_commander.async_request(
        payload=at_payload,
        timeout_context=_TimeoutContext(
            request_ms=_table_admin_timeout_ms, label=_ta_label
        ),
    )
    if at_response.get("status") != {"ok": 1}:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from alterTable API command.",
            raw_response=at_response,
        )
    logger.info(f"finished alterTable({at_operation_name})")
    return AsyncTable(
        database=self.database,
        name=self.name,
        keyspace=self.keyspace,
        api_options=self.api_options,
    )

Send a POST request to the Data API for this table with an arbitrary, caller-provided payload. No transformations or type conversions are made on the provided payload.

Args

body: a JSON-serializable dictionary, the payload of the request.
raise_api_errors: if True, responses with a nonempty 'errors' field result in an astrapy exception being raised.
general_method_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for general_method_timeout_ms.
timeout_ms: an alias for general_method_timeout_ms.

Returns

a dictionary with the response of the HTTP request.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> asyncio.run(my_async_table.command({
...     "findOne": {
...         "filter": {"match_id": "fight4"},
...         "projection": {"winner": True},
...     }
... }))
{'data': {'document': {'winner': 'Victor'}}, 'status': ...  # shortened

Expand source code

async def command(
    self,
    body: dict[str, Any] | None,
    *,
    raise_api_errors: bool = True,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> dict[str, Any]:
    """
    Send a POST request to the Data API for this table with
    an arbitrary, caller-provided payload.
    No transformations or type conversions are made on the provided payload.

    Args:
        body: a JSON-serializable dictionary, the payload of the request.
        raise_api_errors: if True, responses with a nonempty 'errors' field
            result in an astrapy exception being raised.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a dictionary with the response of the HTTP request.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> asyncio.run(my_async_table.command({
        ...     "findOne": {
        ...         "filter": {"match_id": "fight4"},
        ...         "projection": {"winner": True},
        ...     }
        ... }))
        {'data': {'document': {'winner': 'Victor'}}, 'status': ...  # shortened
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    _cmd_desc: str
    if body:
        _cmd_desc = ",".join(sorted(body.keys()))
    else:
        _cmd_desc = "(none)"
    logger.info(f"command={_cmd_desc} on '{self.name}'")
    command_result = await self._api_commander.async_request(
        payload=body,
        raise_api_errors=raise_api_errors,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished command={_cmd_desc} on '{self.name}'")
    return command_result

Count the row in the table matching the specified filter.

Args

filter: a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"name": "John", "age": 59} {"$and": [{"name": {"$eq": "John"}}, {"age": {"$gt": 58}}]} See the Data API documentation for the full set of operators.
upper_bound: a required ceiling on the result of the count operation. If the actual number of rows exceeds this value, an exception will be raised. Furthermore, if the actual number of rows exceeds the maximum count that the Data API can reach (regardless of upper_bound), an exception will be raised.
general_method_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for general_method_timeout_ms.
timeout_ms: an alias for general_method_timeout_ms.

Returns

the exact count of matching rows.

Examples

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> asyncio.run(my_async_table.insert_many([{"seq": i} for i in range(20)]))
TableInsertManyResult(...)
>>> asyncio.run(my_async_table.count_documents({}, upper_bound=100))
20
>>> asyncio.run(my_async_table.count_documents({"seq":{"$gt": 15}}, upper_bound=100))
4
>>> asyncio.run(my_async_table.count_documents({}, upper_bound=10))
Traceback (most recent call last):
    ... ...
astrapy.exceptions.TooManyRowsToCountException

Note

Count operations are expensive: for this reason, the best practice is to provide a reasonable upper_bound according to the caller expectations. Moreover, indiscriminate usage of count operations for sizeable amounts of rows (i.e. in the thousands and more) is discouraged in favor of alternative application-specific solutions. Keep in mind that the Data API has a hard upper limit on the amount of rows it will count, and that an exception will be thrown by this method if this limit is encountered.

Expand source code

async def count_documents(
    self,
    filter: FilterType,
    *,
    upper_bound: int,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> int:
    """
    Count the row in the table matching the specified filter.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"name": "John", "age": 59}
                {"$and": [{"name": {"$eq": "John"}}, {"age": {"$gt": 58}}]}
            See the Data API documentation for the full set of operators.
        upper_bound: a required ceiling on the result of the count operation.
            If the actual number of rows exceeds this value,
            an exception will be raised.
            Furthermore, if the actual number of rows exceeds the maximum
            count that the Data API can reach (regardless of upper_bound),
            an exception will be raised.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        the exact count of matching rows.

    Examples:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> asyncio.run(my_async_table.insert_many([{"seq": i} for i in range(20)]))
        TableInsertManyResult(...)
        >>> asyncio.run(my_async_table.count_documents({}, upper_bound=100))
        20
        >>> asyncio.run(my_async_table.count_documents({"seq":{"$gt": 15}}, upper_bound=100))
        4
        >>> asyncio.run(my_async_table.count_documents({}, upper_bound=10))
        Traceback (most recent call last):
            ... ...
        astrapy.exceptions.TooManyRowsToCountException

    Note:
        Count operations are expensive: for this reason, the best practice
        is to provide a reasonable `upper_bound` according to the caller
        expectations. Moreover, indiscriminate usage of count operations
        for sizeable amounts of rows (i.e. in the thousands and more)
        is discouraged in favor of alternative application-specific solutions.
        Keep in mind that the Data API has a hard upper limit on the amount
        of rows it will count, and that an exception will be thrown
        by this method if this limit is encountered.
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    cd_payload = {"countDocuments": {"filter": filter}}
    logger.info(f"countDocuments on '{self.name}'")
    cd_response = await self._api_commander.async_request(
        payload=cd_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished countDocuments on '{self.name}'")
    if "count" in cd_response.get("status", {}):
        count: int = cd_response["status"]["count"]
        if cd_response["status"].get("moreData", False):
            raise TooManyRowsToCountException(
                text=f"Document count exceeds {count}, the maximum allowed by the server",
                server_max_count_exceeded=True,
            )
        else:
            if count > upper_bound:
                raise TooManyRowsToCountException(
                    text="Document count exceeds required upper bound",
                    server_max_count_exceeded=False,
                )
            else:
                return count
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from countDocuments API command.",
            raw_response=cd_response,
        )

Create an index on a non-vector column of the table.

This is a blocking operation: the method returns once the index is created and ready to use.

For creation of a vector index, see method create_vector_index instead.

Args

name: the name of the index. Index names must be unique across the keyspace.
column: the table column on which the index is to be created. For a map column, besides a simple string, it can be an object in one of the two formats {"column": "$values"}, {"column": "$keys"},
options: if passed, it must be an instance of TableIndexOptions, or an equivalent dictionary, which specifies index settings such as – for a text column – case-sensitivity and so on. See the TableIndexOptions class for more details.
if_not_exists: if set to True, the command will succeed even if an index with the specified name already exists (in which case no actual index creation takes place on the database). The API default of False means that an error is raised by the API in case of name collision.
table_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for table_admin_timeout_ms.
timeout_ms: an alias for table_admin_timeout_ms.

Examples

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> from astrapy.info import TableIndexOptions
>>>
>>> # create an index on a column
>>> await my_async_table.create_index(
...     "score_index",
...     "score",
... )
>>>
>>> # create an index on a textual column, specifying indexing options
>>> await my_async_table.create_index(
...     "winner_index",
...     "winner",
...     options=TableIndexOptions(
...         ascii=False,
...         normalize=True,
...         case_sensitive=False,
...     ),
... )

Expand source code

async def create_index(
    self,
    name: str,
    column: str | dict[str, str],
    *,
    options: TableIndexOptions | dict[str, Any] | None = None,
    if_not_exists: bool | None = None,
    table_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Create an index on a non-vector column of the table.

    This is a blocking operation: the method returns once the index
    is created and ready to use.

    For creation of a vector index, see method `create_vector_index` instead.

    Args:
        name: the name of the index. Index names must be unique across the keyspace.
        column: the table column on which the index is to be created.
            For a map column, besides a simple string, it can be an object
            in one of the two formats {"column": "$values"}, {"column": "$keys"},
        options: if passed, it must be an instance of `TableIndexOptions`,
            or an equivalent dictionary, which specifies index settings
            such as -- for a text column -- case-sensitivity and so on.
            See the `astrapy.info.TableIndexOptions` class for more details.
        if_not_exists: if set to True, the command will succeed even if an index
            with the specified name already exists (in which case no actual
            index creation takes place on the database). The API default of False
            means that an error is raised by the API in case of name collision.
        table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `table_admin_timeout_ms`.
        timeout_ms: an alias for `table_admin_timeout_ms`.

    Examples:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> from astrapy.info import TableIndexOptions
        >>>
        >>> # create an index on a column
        >>> await my_async_table.create_index(
        ...     "score_index",
        ...     "score",
        ... )
        >>>
        >>> # create an index on a textual column, specifying indexing options
        >>> await my_async_table.create_index(
        ...     "winner_index",
        ...     "winner",
        ...     options=TableIndexOptions(
        ...         ascii=False,
        ...         normalize=True,
        ...         case_sensitive=False,
        ...     ),
        ... )
    """

    ci_definition: dict[str, Any] = TableIndexDefinition(
        column=column,
        options=TableIndexOptions.coerce(options or {}),
    ).as_dict()
    ci_command = "createIndex"
    return await self._create_generic_index(
        i_name=name,
        ci_definition=ci_definition,
        ci_command=ci_command,
        if_not_exists=if_not_exists,
        table_admin_timeout_ms=table_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )

Create a vector index on a vector column of the table, enabling vector similarity search operations on it.

This is a blocking operation: the method returns once the index is created and ready to use.

For creation of a non-vector index, see method create_index instead.

Args

name: the name of the index. Index names must be unique across the keyspace.
column: the table column, of type "vector" on which to create the index.
options: an instance of TableVectorIndexOptions, or an equivalent dictionary, which specifies settings for the vector index, such as the metric to use or, if desired, a "source model" setting. If omitted, the Data API defaults will apply for the index. See the TableVectorIndexOptions class for more details.
if_not_exists: if set to True, the command will succeed even if an index with the specified name already exists (in which case no actual index creation takes place on the database). The API default of False means that an error is raised by the API in case of name collision.
table_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for table_admin_timeout_ms.
timeout_ms: an alias for table_admin_timeout_ms.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> from astrapy.constants import VectorMetric
>>> from astrapy.info import TableVectorIndexOptions
>>>
>>> # create a vector index with dot-product similarity
>>> await my_async_table.create_vector_index(
...     "m_vector_index",
...     "m_vector",
...     options=TableVectorIndexOptions(
...         metric=VectorMetric.DOT_PRODUCT,
...     ),
... )
>>> # specify a source_model (since the previous statement
>>> # succeeded, this will do nothing because of <code>if\_not\_exists</code>):
>>> await my_async_table.create_vector_index(
...     "m_vector_index",
...     "m_vector",
...     options=TableVectorIndexOptions(
...         metric=VectorMetric.DOT_PRODUCT,
...         source_model="nv-qa-4",
...     ),
...     if_not_exists=True,
... )
>>> # leave the settings to the Data API defaults of cosine
>>> # similarity metric (since the previous statement
>>> # succeeded, this will do nothing because of <code>if\_not\_exists</code>):
>>> await my_async_table.create_vector_index(
...     "m_vector_index",
...     "m_vector",
...     if_not_exists=True,
... )

Expand source code

async def create_vector_index(
    self,
    name: str,
    column: str,
    *,
    options: TableVectorIndexOptions | dict[str, Any] | None = None,
    if_not_exists: bool | None = None,
    table_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Create a vector index on a vector column of the table, enabling vector
    similarity search operations on it.

    This is a blocking operation: the method returns once the index
    is created and ready to use.

    For creation of a non-vector index, see method `create_index` instead.

    Args:
        name: the name of the index. Index names must be unique across the keyspace.
        column: the table column, of type "vector" on which to create the index.
        options: an instance of `TableVectorIndexOptions`, or an equivalent
            dictionary, which specifies settings for the vector index,
            such as the metric to use or, if desired, a "source model" setting.
            If omitted, the Data API defaults will apply for the index.
            See the `astrapy.info.TableVectorIndexOptions` class for more details.
        if_not_exists: if set to True, the command will succeed even if an index
            with the specified name already exists (in which case no actual
            index creation takes place on the database). The API default of False
            means that an error is raised by the API in case of name collision.
        table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `table_admin_timeout_ms`.
        timeout_ms: an alias for `table_admin_timeout_ms`.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> from astrapy.constants import VectorMetric
        >>> from astrapy.info import TableVectorIndexOptions
        >>>
        >>> # create a vector index with dot-product similarity
        >>> await my_async_table.create_vector_index(
        ...     "m_vector_index",
        ...     "m_vector",
        ...     options=TableVectorIndexOptions(
        ...         metric=VectorMetric.DOT_PRODUCT,
        ...     ),
        ... )
        >>> # specify a source_model (since the previous statement
        >>> # succeeded, this will do nothing because of `if_not_exists`):
        >>> await my_async_table.create_vector_index(
        ...     "m_vector_index",
        ...     "m_vector",
        ...     options=TableVectorIndexOptions(
        ...         metric=VectorMetric.DOT_PRODUCT,
        ...         source_model="nv-qa-4",
        ...     ),
        ...     if_not_exists=True,
        ... )
        >>> # leave the settings to the Data API defaults of cosine
        >>> # similarity metric (since the previous statement
        >>> # succeeded, this will do nothing because of `if_not_exists`):
        >>> await my_async_table.create_vector_index(
        ...     "m_vector_index",
        ...     "m_vector",
        ...     if_not_exists=True,
        ... )
    """

    ci_definition: dict[str, Any] = TableVectorIndexDefinition(
        column=column,
        options=TableVectorIndexOptions.coerce(options),
    ).as_dict()
    ci_command = "createVectorIndex"
    return await self._create_generic_index(
        i_name=name,
        ci_definition=ci_definition,
        ci_command=ci_command,
        if_not_exists=if_not_exists,
        table_admin_timeout_ms=table_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )

async def definition(self, *, table_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> ListTableDefinition

Query the Data API and return a structure defining the table schema. If there are no unsupported colums in the table, the return value has the same contents as could have been provided to a create_table method call.

Args

table_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for table_admin_timeout_ms.
timeout_ms: an alias for table_admin_timeout_ms.

Returns

A ListTableDefinition object, available for inspection.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> asyncio.run(my_table.definition())
ListTableDefinition(columns=[match_id,round,fighters, ...  # shortened

Expand source code

async def definition(
    self,
    *,
    table_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> ListTableDefinition:
    """
    Query the Data API and return a structure defining the table schema.
    If there are no unsupported colums in the table, the return value has
    the same contents as could have been provided to a `create_table` method call.

    Args:
        table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `table_admin_timeout_ms`.
        timeout_ms: an alias for `table_admin_timeout_ms`.

    Returns:
        A `ListTableDefinition` object, available for inspection.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> asyncio.run(my_table.definition())
        ListTableDefinition(columns=[match_id,round,fighters, ...  # shortened
    """

    _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
        timeout_options=self.api_options.timeout_options,
        table_admin_timeout_ms=table_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info(f"getting tables in search of '{self.name}'")
    self_descriptors = [
        table_desc
        for table_desc in await self.database._list_tables_ctx(
            keyspace=None,
            timeout_context=_TimeoutContext(
                request_ms=_table_admin_timeout_ms,
                label=_ta_label,
            ),
        )
        if table_desc.name == self.name
    ]
    logger.info(f"finished getting tables in search of '{self.name}'")
    if self_descriptors:
        return self_descriptors[0].definition
    else:
        raise RuntimeError(
            f"Table {self.keyspace}.{self.name} not found.",
        )

async def delete_many(self, filter: FilterType, *, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> None

Delete all rows matching a provided filter condition. This operation can target from a single row to the entirety of the table.

Args

filter: a filter dictionary to specify which row(s) must be deleted. 1. If the filter is in the form {"pk1": val1, "pk2": val2 ...} and specified the primary key in full, at most one row is deleted, the one with that primary key. 2. If the table has "partitionSort" columns, some or all of them may be left out (the least significant of them can also employ an inequality, or range, predicate): a range of rows, but always within a single partition, will be deleted. 3. If an empty filter, {}, is passed, this operation empties the table completely. USE WITH CARE. 4. Other kinds of filtering clauses are forbidden. In the following examples, the table is partitioned by columns ["pa1", "pa2"] and has partitionSort "ps1" and "ps2" in that order. Valid filter examples: - {"pa1": x, "pa2": y, "ps1": z, "ps2": t}: deletes one row - {"pa1": x, "pa2": y, "ps1": z}: deletes multiple rows - {"pa1": x, "pa2": y, "ps1": z, "ps2": {"$lt": q}}: del. multiple rows - {"pa1": x, "pa2": y}: deletes all rows in the partition - {}: empties the table (CAUTION) Invalid filter examples: - {"pa1": x}: incomplete partition key - {"pa1": x, "ps1" z}: incomplete partition key (whatever is added) - {"pa1": x, "pa2": y, "ps1": {"$lt": r}, "ps2": t}: inequality on a non-least-significant partitionSort column provided. - {"pa1": x, "pa2": y, "ps2": t}: cannot skip "ps1"
general_method_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for general_method_timeout_ms.
timeout_ms: an alias for general_method_timeout_ms.

Examples

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> # Delete a single row (full primary key specified):
>>> await my_async_table.delete_many({"match_id": "fight4", "round": 1})
>>>
>>> # Delete part of a partition (inequality on the
>>> # last-mentioned 'partitionSort' column):
>>> await my_async_table.delete_many({"match_id": "fight5", "round": {"$gte": 5}})
>>>
>>> # Delete a whole partition (leave 'partitionSort' unspecified):
>>> await my_async_table.delete_many({"match_id": "fight7"})
>>>
>>> # empty the table entirely with empty filter (*CAUTION*):
>>> await my_async_table.delete_many({})

Expand source code

async def delete_many(
    self,
    filter: FilterType,
    *,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Delete all rows matching a provided filter condition.
    This operation can target from a single row to the entirety of the table.

    Args:
        filter: a filter dictionary to specify which row(s) must be deleted.
            1. If the filter is in the form `{"pk1": val1, "pk2": val2 ...}`
            and specified the primary key in full, at most one row is deleted,
            the one with that primary key.
            2. If the table has "partitionSort" columns, some or all of them
            may be left out (the least significant of them can also employ
            an inequality, or range, predicate): a range of rows, but always
            within a single partition, will be deleted.
            3. If an empty filter, `{}`, is passed, this operation empties
            the table completely. *USE WITH CARE*.
            4. Other kinds of filtering clauses are forbidden.
            In the following examples, the table is partitioned
            by columns ["pa1", "pa2"] and has partitionSort "ps1" and "ps2" in that
            order.
            Valid filter examples:
            - `{"pa1": x, "pa2": y, "ps1": z, "ps2": t}`: deletes one row
            - `{"pa1": x, "pa2": y, "ps1": z}`: deletes multiple rows
            - `{"pa1": x, "pa2": y, "ps1": z, "ps2": {"$lt": q}}`: del. multiple rows
            - `{"pa1": x, "pa2": y}`: deletes all rows in the partition
            - `{}`: empties the table (*CAUTION*)
            Invalid filter examples:
            - `{"pa1": x}`: incomplete partition key
            - `{"pa1": x, "ps1" z}`: incomplete partition key (whatever is added)
            - `{"pa1": x, "pa2": y, "ps1": {"$lt": r}, "ps2": t}`: inequality on
              a non-least-significant partitionSort column provided.
            - `{"pa1": x, "pa2": y, "ps2": t}`: cannot skip "ps1"
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Examples:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> # Delete a single row (full primary key specified):
        >>> await my_async_table.delete_many({"match_id": "fight4", "round": 1})
        >>>
        >>> # Delete part of a partition (inequality on the
        >>> # last-mentioned 'partitionSort' column):
        >>> await my_async_table.delete_many({"match_id": "fight5", "round": {"$gte": 5}})
        >>>
        >>> # Delete a whole partition (leave 'partitionSort' unspecified):
        >>> await my_async_table.delete_many({"match_id": "fight7"})
        >>>
        >>> # empty the table entirely with empty filter (*CAUTION*):
        >>> await my_async_table.delete_many({})
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    dm_payload = self._converter_agent.preprocess_payload(
        {
            "deleteMany": {
                k: v
                for k, v in {
                    "filter": filter,
                }.items()
                if v is not None
            }
        },
        map2tuple_checker=None,
    )
    logger.info(f"deleteMany on '{self.name}'")
    dm_response = await self._api_commander.async_request(
        payload=dm_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished deleteMany on '{self.name}'")
    if dm_response.get("status", {}).get("deletedCount") == -1:
        return
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from deleteMany API command.",
            raw_response=dm_response,
        )

async def delete_one(self, filter: FilterType, *, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> None

Delete a row, matching the provided value of the primary key. If no row is found with that primary key, the method does nothing.

Args

filter: a predicate expressing the table primary key in full, i.e. a dictionary defining values for all columns that form the primary key. A row (at most one) is deleted if it matches that primary key. An example filter may be {"match_id": "fight4", "round": 1}.
general_method_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for general_method_timeout_ms.
timeout_ms: an alias for general_method_timeout_ms.

Examples

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> # Count the rows matching a certain filter
>>> len(asyncio.run(my_async_table.find({"match_id": "fight7"}).to_list()))
3
>>>
>>> # Delete a row belonging to the group
>>> asyncio.run(
...     my_async_table.delete_one({"match_id": "fight7", "round": 2})
... )
>>>
>>> # Count again
>>> len(asyncio.run(my_async_table.find({"match_id": "fight7"}).to_list()))
2
>>>
>>> # Attempt the delete again (nothing to delete)
>>> asyncio.run(
...     my_async_table.delete_one({"match_id": "fight7", "round": 2})
... )
>>>
>>> # The count is unchanged
>>> len(asyncio.run(my_async_table.find({"match_id": "fight7"}).to_list()))
2

Expand source code

async def delete_one(
    self,
    filter: FilterType,
    *,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Delete a row, matching the provided value of the primary key.
    If no row is found with that primary key, the method does nothing.

    Args:
        filter: a predicate expressing the table primary key in full,
            i.e. a dictionary defining values for all columns that form the
            primary key. A row (at most one) is deleted if it matches that primary
            key. An example filter may be `{"match_id": "fight4", "round": 1}`.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Examples:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> # Count the rows matching a certain filter
        >>> len(asyncio.run(my_async_table.find({"match_id": "fight7"}).to_list()))
        3
        >>>
        >>> # Delete a row belonging to the group
        >>> asyncio.run(
        ...     my_async_table.delete_one({"match_id": "fight7", "round": 2})
        ... )
        >>>
        >>> # Count again
        >>> len(asyncio.run(my_async_table.find({"match_id": "fight7"}).to_list()))
        2
        >>>
        >>> # Attempt the delete again (nothing to delete)
        >>> asyncio.run(
        ...     my_async_table.delete_one({"match_id": "fight7", "round": 2})
        ... )
        >>>
        >>> # The count is unchanged
        >>> len(asyncio.run(my_async_table.find({"match_id": "fight7"}).to_list()))
        2
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    do_payload = self._converter_agent.preprocess_payload(
        {
            "deleteOne": {
                k: v
                for k, v in {
                    "filter": filter,
                }.items()
                if v is not None
            }
        },
        map2tuple_checker=None,
    )
    logger.info(f"deleteOne on '{self.name}'")
    do_response = await self._api_commander.async_request(
        payload=do_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished deleteOne on '{self.name}'")
    if do_response.get("status", {}).get("deletedCount") == -1:
        return
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from deleteOne API command.",
            raw_response=do_response,
        )

Return a list of the unique values of key across the rows in the table that match the provided filter.

Args

key: the name of the field whose value is inspected across rows. Keys can be just column names (as is typically the case), but the dot-notation is also accepted to mean subkeys or indices within lists (for example, "map_column.subkey" or "list_column.2"). If a column has literal dots or ampersands in its name, this parameter must be escaped to be treated properly. The key can also be a list of strings and numbers, in which case no escape is necessary: each item in the list is a field name/index, for example ["map_column", "subkey"] or ["list_column", 2]. For set and list columns, individual entries are "unrolled" automatically.
filter: a dictionary expressing which condition the inspected rows must satisfy. The filter can use operators, such as "$eq" for equality, and require columns to compare with literal values. Simple examples are {} (zero filter), {"match_no": 123} (a shorthand for {"match_no": {"$eq": 123}}, or {"match_no": 123, "round": "C"} (multiple conditions are implicitly combined with "$and"). Please consult the Data API documentation for a more detailed explanation of table search filters and tips on their usage.
general_method_timeout_ms: a timeout, in milliseconds, for the whole requested operation (which may involve multiple API requests). This method, being based on find (see) may entail successive HTTP API requests, depending on the amount of involved rows. If not provided, this object's defaults apply.
request_timeout_ms: a timeout, in milliseconds, for each API request. If not provided, this object's defaults apply.
timeout_ms: an alias for general_method_timeout_ms.

Returns

a list of all different values for key found across the rows that match the filter. The result list has no repeated items.

Examples

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> asyncio.run(my_async_table.distinct(
...     "winner",
...     filter={"match_id": "challenge6"},
... ))
['Donna', 'Erick', 'Fiona']
>>>
>>> # distinct values across the whole table:
>>> # (not recommended performance-wise)
>>> asyncio.run(my_async_table.distinct("winner"))
The Data API returned a warning: {'errorCode': 'ZERO_FILTER_OPERATIONS', ...
['Victor', 'Adam Zuul', 'Betta Vigo', 'Caio Gozer', 'Donna', 'Erick', ...
>>>
>>> # Over a column containing null values
>>> # (also with composite filter):
>>> asyncio.run(my_async_table.distinct(
...     "score",
...     filter={"match_id": {"$in": ["fight4", "tournamentA"]}},
... ))
[18, None]
>>>
>>> # distinct over a set column (automatically "unrolled"):
>>> asyncio.run(my_async_table.distinct(
...     "fighters",
...     filter={"match_id": {"$in": ["fight4", "tournamentA"]}},
... ))
[UUID('0193539a-2770-8c09-a32a-111111111111'), UUID('019353e3-00b4-...

Note

It must be kept in mind that distinct is a client-side operation, which effectively browses all required rows using the logic of the find method and collects the unique values found for key. As such, there may be performance, latency and ultimately billing implications if the amount of matching rows is large.

Note

For details on the behaviour of "distinct" in conjunction with real-time changes in the table contents, see the Note of the find command.

Expand source code

async def distinct(
    self,
    key: str | Iterable[str | int],
    *,
    filter: FilterType | None = None,
    request_timeout_ms: int | None = None,
    general_method_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> list[Any]:
    """
    Return a list of the unique values of `key` across the rows
    in the table that match the provided filter.

    Args:
        key: the name of the field whose value is inspected across rows.
            Keys can be just column names (as is typically the case), but
            the dot-notation is also accepted to mean subkeys or indices
            within lists (for example, "map_column.subkey" or "list_column.2").
            If a column has literal dots or ampersands in its name, this
            parameter must be escaped to be treated properly.
            The key can also be a list of strings and numbers, in which case
            no escape is necessary: each item in the list is a field name/index,
            for example ["map_column", "subkey"] or ["list_column", 2].
            For set and list columns, individual entries are "unrolled"
            automatically.
        filter: a dictionary expressing which condition the inspected rows
            must satisfy. The filter can use operators, such as "$eq" for equality,
            and require columns to compare with literal values. Simple examples
            are `{}` (zero filter), `{"match_no": 123}` (a shorthand for
            `{"match_no": {"$eq": 123}}`, or `{"match_no": 123, "round": "C"}`
            (multiple conditions are implicitly combined with "$and").
            Please consult the Data API documentation for a more detailed
            explanation of table search filters and tips on their usage.
        general_method_timeout_ms: a timeout, in milliseconds, for the whole
            requested operation (which may involve multiple API requests).
            This method, being based on `find` (see) may entail successive HTTP API
            requests, depending on the amount of involved rows.
            If not provided, this object's defaults apply.
        request_timeout_ms: a timeout, in milliseconds, for each API request.
            If not provided, this object's defaults apply.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a list of all different values for `key` found across the rows
        that match the filter. The result list has no repeated items.

    Examples:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> asyncio.run(my_async_table.distinct(
        ...     "winner",
        ...     filter={"match_id": "challenge6"},
        ... ))
        ['Donna', 'Erick', 'Fiona']
        >>>
        >>> # distinct values across the whole table:
        >>> # (not recommended performance-wise)
        >>> asyncio.run(my_async_table.distinct("winner"))
        The Data API returned a warning: {'errorCode': 'ZERO_FILTER_OPERATIONS', ...
        ['Victor', 'Adam Zuul', 'Betta Vigo', 'Caio Gozer', 'Donna', 'Erick', ...
        >>>
        >>> # Over a column containing null values
        >>> # (also with composite filter):
        >>> asyncio.run(my_async_table.distinct(
        ...     "score",
        ...     filter={"match_id": {"$in": ["fight4", "tournamentA"]}},
        ... ))
        [18, None]
        >>>
        >>> # distinct over a set column (automatically "unrolled"):
        >>> asyncio.run(my_async_table.distinct(
        ...     "fighters",
        ...     filter={"match_id": {"$in": ["fight4", "tournamentA"]}},
        ... ))
        [UUID('0193539a-2770-8c09-a32a-111111111111'), UUID('019353e3-00b4-...

    Note:
        It must be kept in mind that `distinct` is a client-side operation,
        which effectively browses all required rows using the logic
        of the `find` method and collects the unique values found for `key`.
        As such, there may be performance, latency and ultimately
        billing implications if the amount of matching rows is large.

    Note:
        For details on the behaviour of "distinct" in conjunction with
        real-time changes in the table contents, see the
        Note of the `find` command.
    """

    # lazy-import here to avoid circular import issues
    from astrapy.cursors import AsyncTableFindCursor

    _general_method_timeout_ms, _gmt_label = _first_valid_timeout(
        (general_method_timeout_ms, "general_method_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (
            self.api_options.timeout_options.general_method_timeout_ms,
            "general_method_timeout_ms",
        ),
    )
    _request_timeout_ms, _rt_label = _first_valid_timeout(
        (request_timeout_ms, "request_timeout_ms"),
        (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
    )
    # preparing cursor:
    _extractor = _create_document_key_extractor(key)
    _key = _reduce_distinct_key_to_shallow_safe(key)
    # relaxing the type hint (limited to within this method body)
    f_cursor: AsyncTableFindCursor[dict[str, Any], dict[str, Any]] = (
        AsyncTableFindCursor(
            table=self,
            request_timeout_ms=_request_timeout_ms,
            overall_timeout_ms=_general_method_timeout_ms,
            request_timeout_label=_rt_label,
            overall_timeout_label=_gmt_label,
        )  # type: ignore[assignment]
        .filter(filter)
        .project({_key: True})
    )
    # consuming it:
    _item_hashes = set()
    distinct_items: list[Any] = []
    logger.info(f"running distinct() on '{self.name}'")
    async for document in f_cursor:
        for item in _extractor(document):
            _item_hash = _hash_table_document(
                item, options=self.api_options.serdes_options
            )
            if _item_hash not in _item_hashes:
                _item_hashes.add(_item_hash)
                distinct_items.append(item)
    logger.info(f"finished running distinct() on '{self.name}'")
    return distinct_items

async def drop(self, *, if_exists: bool | None = None, table_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> dict[str, typing.Any]

Drop the table, i.e. delete it from the database along with all the rows stored therein.

Args

if_exists: if passed as True, trying to drop a non-existing table will not error, just silently do nothing instead. If not provided, the API default behaviour will hold.
table_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for table_admin_timeout_ms.
timeout_ms: an alias for table_admin_timeout_ms.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> # List tables:
>>> asyncio.run(my_async_table.database.list_table_names())
['games']
>>>
>>> # Drop this table:
>>> asyncio.run(my_table.drop())
>>>
>>> # List tables again:
>>> asyncio.run(my_table.database.list_table_names())
[]
>>>
>>> # Try working on the table now:
>>> from astrapy.exceptions import DataAPIResponseException
>>>
>>> async def try_use_table():
...     try:
...         my_table.find_one({})
...     except DataAPIResponseException as err:
...         print(str(err))
...
>>> asyncio.run(try_use_table())
Collection does not exist [...] (COLLECTION_NOT_EXIST)

Note

Use with caution.

Note

Once the method succeeds, methods on this object can still be invoked: however, this hardly makes sense as the underlying actual table is no more. It is responsibility of the developer to design a correct flow which avoids using a deceased collection any further.

Expand source code

async def drop(
    self,
    *,
    if_exists: bool | None = None,
    table_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> dict[str, Any]:
    """
    Drop the table, i.e. delete it from the database along with
    all the rows stored therein.

    Args:
        if_exists: if passed as True, trying to drop a non-existing table
            will not error, just silently do nothing instead. If not provided,
            the API default behaviour will hold.
        table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `table_admin_timeout_ms`.
        timeout_ms: an alias for `table_admin_timeout_ms`.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> # List tables:
        >>> asyncio.run(my_async_table.database.list_table_names())
        ['games']
        >>>
        >>> # Drop this table:
        >>> asyncio.run(my_table.drop())
        >>>
        >>> # List tables again:
        >>> asyncio.run(my_table.database.list_table_names())
        []
        >>>
        >>> # Try working on the table now:
        >>> from astrapy.exceptions import DataAPIResponseException
        >>>
        >>> async def try_use_table():
        ...     try:
        ...         my_table.find_one({})
        ...     except DataAPIResponseException as err:
        ...         print(str(err))
        ...
        >>> asyncio.run(try_use_table())
        Collection does not exist [...] (COLLECTION_NOT_EXIST)

    Note:
        Use with caution.

    Note:
        Once the method succeeds, methods on this object can still be invoked:
        however, this hardly makes sense as the underlying actual table
        is no more.
        It is responsibility of the developer to design a correct flow
        which avoids using a deceased collection any further.
    """

    logger.info(f"dropping table '{self.name}' (self)")
    drop_result = await self.database.drop_table(
        self.name,
        if_exists=if_exists,
        table_admin_timeout_ms=table_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info(f"finished dropping table '{self.name}' (self)")
    return drop_result

async def estimated_document_count(self, *, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> int

Query the API server for an estimate of the document count in the table.

Contrary to count_documents, this method has no filtering parameters.

Args

general_method_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for general_method_timeout_ms.
timeout_ms: an alias for general_method_timeout_ms.

Returns

a server-provided estimate count of the documents in the table.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> asyncio.run(my_async_table.estimated_document_count())
5820

Expand source code

async def estimated_document_count(
    self,
    *,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> int:
    """
    Query the API server for an estimate of the document count in the table.

    Contrary to `count_documents`, this method has no filtering parameters.

    Args:
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a server-provided estimate count of the documents in the table.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> asyncio.run(my_async_table.estimated_document_count())
        5820
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    ed_payload: dict[str, Any] = {"estimatedDocumentCount": {}}
    logger.info(f"estimatedDocumentCount on '{self.name}'")
    ed_response = await self._api_commander.async_request(
        payload=ed_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished estimatedDocumentCount on '{self.name}'")
    if "count" in ed_response.get("status", {}):
        count: int = ed_response["status"]["count"]
        return count
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from estimatedDocumentCount API command.",
            raw_response=ed_response,
        )

Find rows on the table matching the provided filters and according to sorting criteria including vector similarity.

The returned AsyncTableFindCursor object, representing the stream of results, can be iterated over, or consumed and manipulated in several other ways (see the examples below and the TableFindCursor documentation for details). Since the amount of returned items can be large, TableFindCursor is a lazy object, that fetches new data while it is being read using the Data API pagination mechanism.

Invoking .to_list() on a TableFindCursor will cause it to consume all rows and materialize the entire result set as a list. This is not recommended if the amount of results is very large.

Args

filter: a dictionary expressing which condition the returned rows must satisfy. The filter can use operators, such as "$eq" for equality, and require columns to compare with literal values. Simple examples are {} (zero filter, not recommended for large tables), {"match_no": 123} (a shorthand for {"match_no": {"$eq": 123}}, or {"match_no": 123, "round": "C"} (multiple conditions are implicitly combined with "$and"). Please consult the Data API documentation for a more detailed explanation of table search filters and tips on their usage.
projection: a prescription on which columns to return for the matching rows. The projection can take the form {"column1": True, "column2": True}. {"*": True} (i.e. return the whole row), or the complementary form that excludes columns: {"column1": False, "column2": False}. To optimize bandwidth usage, it is recommended to use a projection, especially to avoid unnecessary columns of type vector with high-dimensional embeddings.
row_type: this parameter acts a formal specifier for the type checker. If omitted, the resulting cursor is implicitly an AsyncTableFindCursor[ROW, ROW], i.e. maintains the same type for the items it returns as that for the rows in the table. Strictly typed code may want to specify this parameter especially when a projection is given.
skip: if provided, it is a number of rows that would be obtained first in the response and are instead skipped.
limit: a maximum amount of rows to get from the table. The returned cursor will stop yielding rows when either this number is reached or there really are no more matches in the table.
include_similarity: a boolean to request the numeric value of the similarity to be returned as an added "$similarity" key in each returned row. It can be used meaningfully only in a vector search (see sort).
include_sort_vector: a boolean to request the search query vector. If set to True (and if the search is a vector search), calling the get_sort_vector method on the returned cursor will yield the vector used for the ANN search.
sort: this dictionary parameter controls the order in which the rows are returned. The sort parameter can express either a vector search or a regular (ascending/descending, even hierarchical) sorting. * For a vector search the parameter takes the form {"vector_column": qv}, with the query vector qv of the appropriate type (list of floats or DataAPIVector). If the table has automatic embedding generation ("vectorize") enabled on that column, the form {"vectorize_enabled_column": "query text"} is also valid. * In the case of non-vector sorting, the parameter specifies the column(s) and the ascending/descending ordering required. If multiple columns are provided, the sorting applies them hierarchically to the rows. Examples are {"score": SortMode.ASCENDING} (equivalently {"score": +1}), {"score": +1, "when": -1}. Note that, depending on the column(s) chosen for sorting, the table partitioning structure, and the presence of indexes, the sorting may be done in-memory by the API. In that case, there may be performance implications and limitations on the amount of items returned. Consult the Data API documentation for more details on this topic.
request_timeout_ms: a timeout, in milliseconds, to impose on each individual HTTP request to the Data API to accomplish the operation. If not provided, this object's defaults apply.
timeout_ms: an alias for request_timeout_ms.

Returns

a AsyncTableFindCursor object, that can be iterated over (and manipulated in several ways). The cursor, if needed, handles pagination under the hood as the rows are consumed.

Note

As the rows are retrieved in chunks progressively, while the cursor is being iterated over, it is possible that the actual results obtained will reflect changes occurring to the table contents in real time.

Examples

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> # Iterate over results:
>>> async def loop1():
...     async for row in my_async_table.find({"match_id": "challenge6"}):
...         print(f"(R:{row['round']}): winner {row['winner']}")
...
>>> asyncio.run(loop1())
(R:1): winner Donna
(R:2): winner Erick
(R:3): winner Fiona
>>>
>>> # Optimize bandwidth using a projection:
>>> proj = {"round": True, "winner": True}
>>> async def loop2():
...     async for row in my_async_table.find(
...           {"match_id": "challenge6"},
...           projection=proj,
...     ):
...         print(f"(R:{row['round']}): winner {row['winner']}")
...
>>> asyncio.run(loop2())
(R:1): winner Donna
(R:2): winner Erick
(R:3): winner Fiona
>>>
>>> # Filter on the partitioning:
>>> asyncio.run(
...     my_async_table.find({"match_id": "challenge6"}).to_list()
... )
[{'match_id': 'challenge6', 'round': 1, 'fighters': DataAPISet([]), ...
>>>
>>> # Filter on primary key:
>>> asyncio.run(
...     my_async_table.find(
...         {"match_id": "challenge6", "round": 1}
...     ).to_list()
... )
[{'match_id': 'challenge6', 'round': 1, 'fighters': DataAPISet([]), ...
>>>
>>> # Filter on a regular indexed column:
>>> asyncio.run(my_async_table.find({"winner": "Caio Gozer"}).to_list())
[{'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
>>>
>>> # Non-equality filter on a regular indexed column:
>>> asyncio.run(my_async_table.find({"score": {"$gte": 15}}).to_list())
[{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
>>>
>>> # Filter on a regular non-indexed column:
>>> # (not recommended performance-wise)
>>> asyncio.run(my_async_table.find(
...     {"when": {
...         "$gte": DataAPITimestamp.from_string("1999-12-31T01:23:44Z")
...     }}
... ).to_list())
The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
[{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
>>>
>>> # Empty filter (not recommended performance-wise):
>>> asyncio.run(my_async_table.find({}).to_list())
The Data API returned a warning: {'errorCode': 'ZERO_FILTER_OPERATIONS', ...
[{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
>>>
>>> # Filter on the primary key and a regular non-indexed column:
>>> # (not recommended performance-wise)
>>> asyncio.run(my_async_table.find(
...     {"match_id": "fight5", "round": 3, "winner": "Caio Gozer"}
... ).to_list())
The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
[{'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
>>>
>>> # Filter on a regular non-indexed column (and incomplete primary key)
>>> # (not recommended performance-wise)
>>> asyncio.run(my_async_table.find(
...     {"round": 3, "winner": "Caio Gozer"}
... ).to_list())
The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
[{'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
>>>
>>> # Vector search with "sort" (on an appropriately-indexed vector column):
>>> asyncio.run(my_async_table.find(
...     {},
...     sort={"m_vector": DataAPIVector([0.2, 0.3, 0.4])},
...     projection={"winner": True},
...     limit=3,
... ).to_list())
[{'winner': 'Donna'}, {'winner': 'Victor'}]
>>>
>>> # Hybrid search with vector sort and non-vector filtering:
>>> my_table.find(
...     {"match_id": "fight4"},
...     sort={"m_vector": DataAPIVector([0.2, 0.3, 0.4])},
...     projection={"winner": True},
... ).to_list()
[{'winner': 'Victor'}]
>>>
>>> # Return the numeric value of the vector similarity
>>> # (also demonstrating that one can pass a plain list for a vector):
>>> asyncio.run(my_async_table.find(
...     {},
...     sort={"m_vector": [0.2, 0.3, 0.4]},
...     projection={"winner": True},
...     limit=3,
...     include_similarity=True,
... ).to_list())
[{'winner': 'Donna', '$similarity': 0.515}, {'winner': 'Victor', ...
>>>
>>> # Non-vector sorting on a 'partitionSort' column:
>>> asyncio.run(my_async_table.find(
...     {"match_id": "fight5"},
...     sort={"round": SortMode.DESCENDING},
...     projection={"winner": True},
... ).to_list())
[{'winner': 'Caio Gozer'}, {'winner': 'Betta Vigo'}, ...
>>>
>>> # Using `skip` and <code>limit</code>:
>>> asyncio.run(my_async_table.find(
...     {"match_id": "fight5"},
...     sort={"round": SortMode.DESCENDING},
...     projection={"winner": True},
...     skip=1,
...     limit=2,
... ).to_list())
The Data API returned a warning: {'errorCode': 'IN_MEMORY_SORTING...
[{'winner': 'Betta Vigo'}, {'winner': 'Adam Zuul'}]
>>>
>>> # Non-vector sorting on a regular column:
>>> # (not recommended performance-wise)
>>> asyncio.run(my_async_table.find(
...     {"match_id": "fight5"},
...     sort={"winner": SortMode.ASCENDING},
...     projection={"winner": True},
... ).to_list())
The Data API returned a warning: {'errorCode': 'IN_MEMORY_SORTING...
[{'winner': 'Adam Zuul'}, {'winner': 'Betta Vigo'}, ...
>>>
>>> # Using `.map()` on a cursor:
>>> winner_cursor = my_async_table.find(
...     {"match_id": "fight5"},
...     sort={"round": SortMode.DESCENDING},
...     projection={"winner": True},
...     limit=5,
... )
>>> print("/".join(asyncio.run(
...     winner_cursor.map(lambda row: row["winner"].upper()).to_list())
... ))
CAIO GOZER/BETTA VIGO/ADAM ZUUL
>>>
>>> # Some other examples of cursor manipulation
>>> matches_async_cursor = my_async_table.find(
...     sort={"m_vector": DataAPIVector([-0.1, 0.15, 0.3])}
... )
>>> asyncio.run(matches_async_cursor.has_next())
True
>>> asyncio.run(matches_async_cursor.__anext__())
{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
>>> matches_async_cursor.consumed
1
>>> matches_async_cursor.rewind()
>>> matches_async_cursor.consumed
0
>>> asyncio.run(matches_async_cursor.has_next())
True
>>> matches_async_cursor.close()
>>>
>>> async def try_consume():
...     try:
...         await matches_async_cursor.__anext__()
...     except StopAsyncIteration:
...         print("StopAsyncIteration triggered.")
...
>>> asyncio.run(try_consume())
StopAsyncIteration triggered.

Expand source code

def find(
    self,
    filter: FilterType | None = None,
    *,
    projection: ProjectionType | None = None,
    row_type: type[ROW2] | None = None,
    skip: int | None = None,
    limit: int | None = None,
    include_similarity: bool | None = None,
    include_sort_vector: bool | None = None,
    sort: SortType | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> AsyncTableFindCursor[ROW, ROW2]:
    """
    Find rows on the table matching the provided filters
    and according to sorting criteria including vector similarity.

    The returned AsyncTableFindCursor object, representing the stream of results,
    can be iterated over, or consumed and manipulated in several other ways
    (see the examples below and the `TableFindCursor` documentation for details).
    Since the amount of returned items can be large, TableFindCursor is a lazy
    object, that fetches new data while it is being read using the Data API
    pagination mechanism.

    Invoking `.to_list()` on a TableFindCursor will cause it to consume all
    rows and materialize the entire result set as a list. This is not recommended
    if the amount of results is very large.

    Args:
        filter: a dictionary expressing which condition the returned rows
            must satisfy. The filter can use operators, such as "$eq" for equality,
            and require columns to compare with literal values. Simple examples
            are `{}` (zero filter, not recommended for large tables),
            `{"match_no": 123}` (a shorthand for `{"match_no": {"$eq": 123}}`,
            or `{"match_no": 123, "round": "C"}` (multiple conditions are
            implicitly combined with "$and").
            Please consult the Data API documentation for a more detailed
            explanation of table search filters and tips on their usage.
        projection: a prescription on which columns to return for the matching rows.
            The projection can take the form `{"column1": True, "column2": True}`.
            `{"*": True}` (i.e. return the whole row), or the complementary
            form that excludes columns: `{"column1": False, "column2": False}`.
            To optimize bandwidth usage, it is recommended to use a projection,
            especially to avoid unnecessary columns of type vector with
            high-dimensional embeddings.
        row_type: this parameter acts a formal specifier for the type checker.
            If omitted, the resulting cursor is implicitly an
            `AsyncTableFindCursor[ROW, ROW]`, i.e. maintains the same type for
            the items it returns as that for the rows in the table. Strictly
            typed code may want to specify this parameter especially when a
            projection is given.
        skip: if provided, it is a number of rows that would be obtained first
            in the response and are instead skipped.
        limit: a maximum amount of rows to get from the table. The returned cursor
            will stop yielding rows when either this number is reached or there
            really are no more matches in the table.
        include_similarity: a boolean to request the numeric value of the
            similarity to be returned as an added "$similarity" key in each returned
            row. It can be used meaningfully only in a vector search (see `sort`).
        include_sort_vector: a boolean to request the search query vector.
            If set to True (and if the search is a vector search), calling
            the `get_sort_vector` method on the returned cursor will yield
            the vector used for the ANN search.
        sort: this dictionary parameter controls the order in which the rows
            are returned. The sort parameter can express either a vector search or
            a regular (ascending/descending, even hierarchical) sorting.
            * For a vector search the parameter takes the form
            `{"vector_column": qv}`, with the query vector `qv` of the appropriate
            type (list of floats or DataAPIVector). If the table has automatic
            embedding generation ("vectorize") enabled on that column, the form
            `{"vectorize_enabled_column": "query text"}` is also valid.
            * In the case of non-vector sorting, the parameter specifies the
            column(s) and the ascending/descending ordering required.
            If multiple columns are provided, the sorting applies them
            hierarchically to the rows. Examples are `{"score": SortMode.ASCENDING}`
            (equivalently `{"score": +1}`), `{"score": +1, "when": -1}`.
            Note that, depending on the column(s) chosen for sorting, the table
            partitioning structure, and the presence of indexes, the sorting
            may be done in-memory by the API. In that case, there may be performance
            implications and limitations on the amount of items returned.
            Consult the Data API documentation for more details on this topic.
        request_timeout_ms: a timeout, in milliseconds, to impose on each
            individual HTTP request to the Data API to accomplish the operation.
            If not provided, this object's defaults apply.
        timeout_ms: an alias for `request_timeout_ms`.

    Returns:
        a AsyncTableFindCursor object, that can be iterated over (and manipulated
        in several ways). The cursor, if needed, handles pagination under the hood
        as the rows are consumed.

    Note:
        As the rows are retrieved in chunks progressively, while the cursor
        is being iterated over, it is possible that the actual results
        obtained will reflect changes occurring to the table contents in
        real time.

    Examples:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> # Iterate over results:
        >>> async def loop1():
        ...     async for row in my_async_table.find({"match_id": "challenge6"}):
        ...         print(f"(R:{row['round']}): winner {row['winner']}")
        ...
        >>> asyncio.run(loop1())
        (R:1): winner Donna
        (R:2): winner Erick
        (R:3): winner Fiona
        >>>
        >>> # Optimize bandwidth using a projection:
        >>> proj = {"round": True, "winner": True}
        >>> async def loop2():
        ...     async for row in my_async_table.find(
        ...           {"match_id": "challenge6"},
        ...           projection=proj,
        ...     ):
        ...         print(f"(R:{row['round']}): winner {row['winner']}")
        ...
        >>> asyncio.run(loop2())
        (R:1): winner Donna
        (R:2): winner Erick
        (R:3): winner Fiona
        >>>
        >>> # Filter on the partitioning:
        >>> asyncio.run(
        ...     my_async_table.find({"match_id": "challenge6"}).to_list()
        ... )
        [{'match_id': 'challenge6', 'round': 1, 'fighters': DataAPISet([]), ...
        >>>
        >>> # Filter on primary key:
        >>> asyncio.run(
        ...     my_async_table.find(
        ...         {"match_id": "challenge6", "round": 1}
        ...     ).to_list()
        ... )
        [{'match_id': 'challenge6', 'round': 1, 'fighters': DataAPISet([]), ...
        >>>
        >>> # Filter on a regular indexed column:
        >>> asyncio.run(my_async_table.find({"winner": "Caio Gozer"}).to_list())
        [{'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
        >>>
        >>> # Non-equality filter on a regular indexed column:
        >>> asyncio.run(my_async_table.find({"score": {"$gte": 15}}).to_list())
        [{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
        >>>
        >>> # Filter on a regular non-indexed column:
        >>> # (not recommended performance-wise)
        >>> asyncio.run(my_async_table.find(
        ...     {"when": {
        ...         "$gte": DataAPITimestamp.from_string("1999-12-31T01:23:44Z")
        ...     }}
        ... ).to_list())
        The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
        [{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
        >>>
        >>> # Empty filter (not recommended performance-wise):
        >>> asyncio.run(my_async_table.find({}).to_list())
        The Data API returned a warning: {'errorCode': 'ZERO_FILTER_OPERATIONS', ...
        [{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
        >>>
        >>> # Filter on the primary key and a regular non-indexed column:
        >>> # (not recommended performance-wise)
        >>> asyncio.run(my_async_table.find(
        ...     {"match_id": "fight5", "round": 3, "winner": "Caio Gozer"}
        ... ).to_list())
        The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
        [{'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
        >>>
        >>> # Filter on a regular non-indexed column (and incomplete primary key)
        >>> # (not recommended performance-wise)
        >>> asyncio.run(my_async_table.find(
        ...     {"round": 3, "winner": "Caio Gozer"}
        ... ).to_list())
        The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
        [{'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
        >>>
        >>> # Vector search with "sort" (on an appropriately-indexed vector column):
        >>> asyncio.run(my_async_table.find(
        ...     {},
        ...     sort={"m_vector": DataAPIVector([0.2, 0.3, 0.4])},
        ...     projection={"winner": True},
        ...     limit=3,
        ... ).to_list())
        [{'winner': 'Donna'}, {'winner': 'Victor'}]
        >>>
        >>> # Hybrid search with vector sort and non-vector filtering:
        >>> my_table.find(
        ...     {"match_id": "fight4"},
        ...     sort={"m_vector": DataAPIVector([0.2, 0.3, 0.4])},
        ...     projection={"winner": True},
        ... ).to_list()
        [{'winner': 'Victor'}]
        >>>
        >>> # Return the numeric value of the vector similarity
        >>> # (also demonstrating that one can pass a plain list for a vector):
        >>> asyncio.run(my_async_table.find(
        ...     {},
        ...     sort={"m_vector": [0.2, 0.3, 0.4]},
        ...     projection={"winner": True},
        ...     limit=3,
        ...     include_similarity=True,
        ... ).to_list())
        [{'winner': 'Donna', '$similarity': 0.515}, {'winner': 'Victor', ...
        >>>
        >>> # Non-vector sorting on a 'partitionSort' column:
        >>> asyncio.run(my_async_table.find(
        ...     {"match_id": "fight5"},
        ...     sort={"round": SortMode.DESCENDING},
        ...     projection={"winner": True},
        ... ).to_list())
        [{'winner': 'Caio Gozer'}, {'winner': 'Betta Vigo'}, ...
        >>>
        >>> # Using `skip` and `limit`:
        >>> asyncio.run(my_async_table.find(
        ...     {"match_id": "fight5"},
        ...     sort={"round": SortMode.DESCENDING},
        ...     projection={"winner": True},
        ...     skip=1,
        ...     limit=2,
        ... ).to_list())
        The Data API returned a warning: {'errorCode': 'IN_MEMORY_SORTING...
        [{'winner': 'Betta Vigo'}, {'winner': 'Adam Zuul'}]
        >>>
        >>> # Non-vector sorting on a regular column:
        >>> # (not recommended performance-wise)
        >>> asyncio.run(my_async_table.find(
        ...     {"match_id": "fight5"},
        ...     sort={"winner": SortMode.ASCENDING},
        ...     projection={"winner": True},
        ... ).to_list())
        The Data API returned a warning: {'errorCode': 'IN_MEMORY_SORTING...
        [{'winner': 'Adam Zuul'}, {'winner': 'Betta Vigo'}, ...
        >>>
        >>> # Using `.map()` on a cursor:
        >>> winner_cursor = my_async_table.find(
        ...     {"match_id": "fight5"},
        ...     sort={"round": SortMode.DESCENDING},
        ...     projection={"winner": True},
        ...     limit=5,
        ... )
        >>> print("/".join(asyncio.run(
        ...     winner_cursor.map(lambda row: row["winner"].upper()).to_list())
        ... ))
        CAIO GOZER/BETTA VIGO/ADAM ZUUL
        >>>
        >>> # Some other examples of cursor manipulation
        >>> matches_async_cursor = my_async_table.find(
        ...     sort={"m_vector": DataAPIVector([-0.1, 0.15, 0.3])}
        ... )
        >>> asyncio.run(matches_async_cursor.has_next())
        True
        >>> asyncio.run(matches_async_cursor.__anext__())
        {'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
        >>> matches_async_cursor.consumed
        1
        >>> matches_async_cursor.rewind()
        >>> matches_async_cursor.consumed
        0
        >>> asyncio.run(matches_async_cursor.has_next())
        True
        >>> matches_async_cursor.close()
        >>>
        >>> async def try_consume():
        ...     try:
        ...         await matches_async_cursor.__anext__()
        ...     except StopAsyncIteration:
        ...         print("StopAsyncIteration triggered.")
        ...
        >>> asyncio.run(try_consume())
        StopAsyncIteration triggered.
    """

    # lazy-import here to avoid circular import issues
    from astrapy.cursors import AsyncTableFindCursor

    _request_timeout_ms, _rt_label = _first_valid_timeout(
        (request_timeout_ms, "request_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
    )
    return (
        AsyncTableFindCursor(
            table=self,
            request_timeout_ms=_request_timeout_ms,
            overall_timeout_ms=None,
            request_timeout_label=_rt_label,
        )
        .filter(filter)
        .project(projection)
        .skip(skip)
        .limit(limit)
        .sort(sort)
        .include_similarity(include_similarity)
        .include_sort_vector(include_sort_vector)
    )

Run a search according to the given filtering and sorting criteria and return the top row matching it, or nothing if there are none.

The parameters are analogous to some of the parameters to the find method (which has a few more that do not make sense in this case, such as limit).

Args

filter: a dictionary expressing which condition the returned row must satisfy. The filter can use operators, such as "$eq" for equality, and require columns to compare with literal values. Simple examples are {} (zero filter), {"match_no": 123} (a shorthand for {"match_no": {"$eq": 123}}, or {"match_no": 123, "round": "C"} (multiple conditions are implicitly combined with "$and"). Please consult the Data API documentation for a more detailed explanation of table search filters and tips on their usage.
projection: a prescription on which columns to return for the matching row. The projection can take the form {"column1": True, "column2": True}. {"*": True} (i.e. return the whole row), or the complementary form that excludes columns: {"column1": False, "column2": False}. To optimize bandwidth usage, it is recommended to use a projection, especially to avoid unnecessary columns of type vector with high-dimensional embeddings.
include_similarity: a boolean to request the numeric value of the similarity to be returned as an added "$similarity" key in the returned row. It can be used meaningfully only in a vector search (see sort).
sort: this dictionary parameter controls the sorting order, hence determines which row is being returned. The sort parameter can express either a vector search or a regular (ascending/descending, even hierarchical) sorting. * For a vector search the parameter takes the form {"vector_column": qv}, with the query vector qv of the appropriate type (list of floats or DataAPIVector). If the table has automatic embedding generation ("vectorize") enabled on that column, the form {"vectorize_enabled_column": "query text"} is also valid. * In the case of non-vector sorting, the parameter specifies the column(s) and the ascending/descending ordering required. If multiple columns are provided, the sorting applies them hierarchically to the rows. Examples are {"score": SortMode.ASCENDING} (equivalently {"score": +1}), {"score": +1, "when": -1}. Note that, depending on the column(s) chosen for sorting, the table partitioning structure, and the presence of indexes, the sorting may be done in-memory by the API. In that case, there may be performance implications. Consult the Data API documentation for more details on this topic.
general_method_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for general_method_timeout_ms.
timeout_ms: an alias for general_method_timeout_ms.

Returns

a dictionary expressing the result if a row is found, otherwise None.

Examples

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> from astrapy.constants import SortMode
>>> from astrapy.data_types import DataAPITimestamp, DataAPIVector
>>>
>>> # Filter on the partitioning:
>>> asyncio.run(my_async_table.find_one({"match_id": "challenge6"}))
{'match_id': 'challenge6', 'round': 1, 'fighters': DataAPISet([]), ...
>>>
>>> # A find with no matches:
>>> str(asyncio.run(my_async_table.find_one({"match_id": "not_real"})))
'None'
>>>
>>> # Optimize bandwidth using a projection:
>>> asyncio.run(my_async_table.find_one(
...     {"match_id": "challenge6"},
...     projection={"round": True, "winner": True},
... ))
{'round': 1, 'winner': 'Donna'}
>>>
>>> # Filter on primary key:
>>> asyncio.run(
...     my_async_table.find_one({"match_id": "challenge6", "round": 1})
... )
{'match_id': 'challenge6', 'round': 1, 'fighters': DataAPISet([]), ...
>>>
>>> # Filter on a regular indexed column:
>>> asyncio.run(my_async_table.find_one({"winner": "Caio Gozer"}))
{'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
>>>
>>> # Non-equality filter on a regular indexed column:
>>> asyncio.run(my_async_table.find_one({"score": {"$gte": 15}}))
{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
>>>
>>> # Filter on a regular non-indexed column:
>>> # (not recommended performance-wise)
>>> asyncio.run(my_async_table.find_one(
...     {"when": {
...         "$gte": DataAPITimestamp.from_string("1999-12-31T01:23:44Z")
...     }}
... ))
The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
>>>
>>> # Empty filter:
>>> asyncio.run(my_async_table.find_one({}))
The Data API returned a warning: {'errorCode': 'ZERO_FILTER_OPERATIONS', ...
{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
>>>
>>> # Filter on the primary key and a regular non-indexed column:
>>> # (not recommended performance-wise)
>>> asyncio.run(my_async_table.find_one(
...     {"match_id": "fight5", "round": 3, "winner": "Caio Gozer"}
... ))
The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
{'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
>>>
>>> # Filter on a regular non-indexed column (and incomplete primary key)
>>> # (not recommended performance-wise)
>>> asyncio.run(
...     my_async_table.find_one({"round": 3, "winner": "Caio Gozer"})
... )
The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
{'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
>>>
>>> # Vector search with "sort" (on an appropriately-indexed vector column):
>>> asyncio.run(my_async_table.find_one(
...     {},
...     sort={"m_vector": DataAPIVector([0.2, 0.3, 0.4])},
...     projection={"winner": True},
... ))
{'winner': 'Donna'}
>>>
>>> # Hybrid search with vector sort and non-vector filtering:
>>> asyncio.run(my_table.find_one(
...     {"match_id": "fight4"},
...     sort={"m_vector": DataAPIVector([0.2, 0.3, 0.4])},
...     projection={"winner": True},
... ))
{'winner': 'Victor'}
>>>
>>> # Return the numeric value of the vector similarity
>>> # (also demonstrating that one can pass a plain list for a vector):
>>> asyncio.run(my_async_table.find_one(
...     {},
...     sort={"m_vector": [0.2, 0.3, 0.4]},
...     projection={"winner": True},
...     include_similarity=True,
... ))
{'winner': 'Donna', '$similarity': 0.515}
>>>
>>> # Non-vector sorting on a 'partitionSort' column:
>>> asyncio.run(my_async_table.find_one(
...     {"match_id": "fight5"},
...     sort={"round": SortMode.DESCENDING},
...     projection={"winner": True},
... ))
{'winner': 'Caio Gozer'}
>>>
>>> # Non-vector sorting on a regular column:
>>> # (not recommended performance-wise)
>>> asyncio.run(my_async_table.find_one(
...     {"match_id": "fight5"},
...     sort={"winner": SortMode.ASCENDING},
...     projection={"winner": True},
... ))
The Data API returned a warning: {'errorCode': 'IN_MEMORY_SORTING...
{'winner': 'Adam Zuul'}

Expand source code

async def find_one(
    self,
    filter: FilterType | None = None,
    *,
    projection: ProjectionType | None = None,
    include_similarity: bool | None = None,
    sort: SortType | None = None,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> ROW | None:
    """
    Run a search according to the given filtering and sorting criteria
    and return the top row matching it, or nothing if there are none.

    The parameters are analogous to some of the parameters to the `find` method
    (which has a few more that do not make sense in this case, such as `limit`).

    Args:
        filter: a dictionary expressing which condition the returned row
            must satisfy. The filter can use operators, such as "$eq" for equality,
            and require columns to compare with literal values. Simple examples
            are `{}` (zero filter), `{"match_no": 123}` (a shorthand for
            `{"match_no": {"$eq": 123}}`, or `{"match_no": 123, "round": "C"}`
            (multiple conditions are implicitly combined with "$and").
            Please consult the Data API documentation for a more detailed
            explanation of table search filters and tips on their usage.
        projection: a prescription on which columns to return for the matching row.
            The projection can take the form `{"column1": True, "column2": True}`.
            `{"*": True}` (i.e. return the whole row), or the complementary
            form that excludes columns: `{"column1": False, "column2": False}`.
            To optimize bandwidth usage, it is recommended to use a projection,
            especially to avoid unnecessary columns of type vector with
            high-dimensional embeddings.
        include_similarity: a boolean to request the numeric value of the
            similarity to be returned as an added "$similarity" key in the returned
            row. It can be used meaningfully only in a vector search (see `sort`).
        sort: this dictionary parameter controls the sorting order, hence determines
            which row is being returned.
            The sort parameter can express either a vector search or
            a regular (ascending/descending, even hierarchical) sorting.
            * For a vector search the parameter takes the form
            `{"vector_column": qv}`, with the query vector `qv` of the appropriate
            type (list of floats or DataAPIVector). If the table has automatic
            embedding generation ("vectorize") enabled on that column, the form
            `{"vectorize_enabled_column": "query text"}` is also valid.
            * In the case of non-vector sorting, the parameter specifies the
            column(s) and the ascending/descending ordering required.
            If multiple columns are provided, the sorting applies them
            hierarchically to the rows. Examples are `{"score": SortMode.ASCENDING}`
            (equivalently `{"score": +1}`), `{"score": +1, "when": -1}`.
            Note that, depending on the column(s) chosen for sorting, the table
            partitioning structure, and the presence of indexes, the sorting
            may be done in-memory by the API. In that case, there may be performance
            implications.
            Consult the Data API documentation for more details on this topic.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a dictionary expressing the result if a row is found, otherwise None.

    Examples:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> from astrapy.constants import SortMode
        >>> from astrapy.data_types import DataAPITimestamp, DataAPIVector
        >>>
        >>> # Filter on the partitioning:
        >>> asyncio.run(my_async_table.find_one({"match_id": "challenge6"}))
        {'match_id': 'challenge6', 'round': 1, 'fighters': DataAPISet([]), ...
        >>>
        >>> # A find with no matches:
        >>> str(asyncio.run(my_async_table.find_one({"match_id": "not_real"})))
        'None'
        >>>
        >>> # Optimize bandwidth using a projection:
        >>> asyncio.run(my_async_table.find_one(
        ...     {"match_id": "challenge6"},
        ...     projection={"round": True, "winner": True},
        ... ))
        {'round': 1, 'winner': 'Donna'}
        >>>
        >>> # Filter on primary key:
        >>> asyncio.run(
        ...     my_async_table.find_one({"match_id": "challenge6", "round": 1})
        ... )
        {'match_id': 'challenge6', 'round': 1, 'fighters': DataAPISet([]), ...
        >>>
        >>> # Filter on a regular indexed column:
        >>> asyncio.run(my_async_table.find_one({"winner": "Caio Gozer"}))
        {'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
        >>>
        >>> # Non-equality filter on a regular indexed column:
        >>> asyncio.run(my_async_table.find_one({"score": {"$gte": 15}}))
        {'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
        >>>
        >>> # Filter on a regular non-indexed column:
        >>> # (not recommended performance-wise)
        >>> asyncio.run(my_async_table.find_one(
        ...     {"when": {
        ...         "$gte": DataAPITimestamp.from_string("1999-12-31T01:23:44Z")
        ...     }}
        ... ))
        The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
        {'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
        >>>
        >>> # Empty filter:
        >>> asyncio.run(my_async_table.find_one({}))
        The Data API returned a warning: {'errorCode': 'ZERO_FILTER_OPERATIONS', ...
        {'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
        >>>
        >>> # Filter on the primary key and a regular non-indexed column:
        >>> # (not recommended performance-wise)
        >>> asyncio.run(my_async_table.find_one(
        ...     {"match_id": "fight5", "round": 3, "winner": "Caio Gozer"}
        ... ))
        The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
        {'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
        >>>
        >>> # Filter on a regular non-indexed column (and incomplete primary key)
        >>> # (not recommended performance-wise)
        >>> asyncio.run(
        ...     my_async_table.find_one({"round": 3, "winner": "Caio Gozer"})
        ... )
        The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
        {'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
        >>>
        >>> # Vector search with "sort" (on an appropriately-indexed vector column):
        >>> asyncio.run(my_async_table.find_one(
        ...     {},
        ...     sort={"m_vector": DataAPIVector([0.2, 0.3, 0.4])},
        ...     projection={"winner": True},
        ... ))
        {'winner': 'Donna'}
        >>>
        >>> # Hybrid search with vector sort and non-vector filtering:
        >>> asyncio.run(my_table.find_one(
        ...     {"match_id": "fight4"},
        ...     sort={"m_vector": DataAPIVector([0.2, 0.3, 0.4])},
        ...     projection={"winner": True},
        ... ))
        {'winner': 'Victor'}
        >>>
        >>> # Return the numeric value of the vector similarity
        >>> # (also demonstrating that one can pass a plain list for a vector):
        >>> asyncio.run(my_async_table.find_one(
        ...     {},
        ...     sort={"m_vector": [0.2, 0.3, 0.4]},
        ...     projection={"winner": True},
        ...     include_similarity=True,
        ... ))
        {'winner': 'Donna', '$similarity': 0.515}
        >>>
        >>> # Non-vector sorting on a 'partitionSort' column:
        >>> asyncio.run(my_async_table.find_one(
        ...     {"match_id": "fight5"},
        ...     sort={"round": SortMode.DESCENDING},
        ...     projection={"winner": True},
        ... ))
        {'winner': 'Caio Gozer'}
        >>>
        >>> # Non-vector sorting on a regular column:
        >>> # (not recommended performance-wise)
        >>> asyncio.run(my_async_table.find_one(
        ...     {"match_id": "fight5"},
        ...     sort={"winner": SortMode.ASCENDING},
        ...     projection={"winner": True},
        ... ))
        The Data API returned a warning: {'errorCode': 'IN_MEMORY_SORTING...
        {'winner': 'Adam Zuul'}
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    fo_options = (
        None
        if include_similarity is None
        else {"includeSimilarity": include_similarity}
    )
    fo_payload = self._converter_agent.preprocess_payload(
        {
            "findOne": {
                k: v
                for k, v in {
                    "filter": filter,
                    "projection": normalize_optional_projection(projection),
                    "options": fo_options,
                    "sort": sort,
                }.items()
                if v is not None
            }
        },
        map2tuple_checker=None,
    )
    fo_response = await self._api_commander.async_request(
        payload=fo_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    if "document" not in (fo_response.get("data") or {}):
        raise UnexpectedDataAPIResponseException(
            text="Response from findOne API command missing 'document'.",
            raw_response=fo_response,
        )
    if "projectionSchema" not in (fo_response.get("status") or {}):
        raise UnexpectedDataAPIResponseException(
            text="Response from findOne API command missing 'projectionSchema'.",
            raw_response=fo_response,
        )
    doc_response = fo_response["data"]["document"]
    if doc_response is None:
        return None
    return self._converter_agent.postprocess_row(
        fo_response["data"]["document"],
        columns_dict=fo_response["status"]["projectionSchema"],
        similarity_pseudocolumn="$similarity" if include_similarity else None,
    )

async def info(self, *, database_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> TableInfo

Return information on the table. This should not be confused with the table definition (i.e. the schema).

Args

database_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying DevOps API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for database_admin_timeout_ms.
timeout_ms: an alias for database_admin_timeout_ms.

Returns

A TableInfo object for inspection.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> # Note: output reformatted for clarity.
>>> asyncio.run(my_async_table.info())
TableInfo(
    database_info=AstraDBDatabaseInfo(id=..., name=..., ...),
    keyspace='default_keyspace',
    name='games',
    full_name='default_keyspace.games'
)

Expand source code

async def info(
    self,
    *,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> TableInfo:
    """
    Return information on the table. This should not be confused with the table
    definition (i.e. the schema).

    Args:
        database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying DevOps API request.
            If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `database_admin_timeout_ms`.
        timeout_ms: an alias for `database_admin_timeout_ms`.

    Returns:
        A TableInfo object for inspection.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> # Note: output reformatted for clarity.
        >>> asyncio.run(my_async_table.info())
        TableInfo(
            database_info=AstraDBDatabaseInfo(id=..., name=..., ...),
            keyspace='default_keyspace',
            name='games',
            full_name='default_keyspace.games'
        )
    """

    db_info = await self.database.info(
        database_admin_timeout_ms=database_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    return TableInfo(
        database_info=db_info,
        keyspace=self.keyspace,
        name=self.name,
        full_name=self.full_name,
    )

async def insert_many(self, rows: Iterable[ROW], *, ordered: bool = False, chunk_size: int | None = None, concurrency: int | None = None, request_timeout_ms: int | None = None, general_method_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> TableInsertManyResult

Insert a number of rows into the table, with implied overwrite in case of primary key collision.

Inserting rows whose primary key correspond to entries alredy stored in the table has the effect of an in-place update: the rows are overwritten. However, if the rows being inserted are partially provided, i.e. some columns are not specified, these are left unchanged on the database. To explicitly reset them, specify their value as appropriate to their data type, i.e. None, {} or analogous.

Args

rows: an iterable of dictionaries, each expressing a row to insert. Each row must at least fully specify the primary key column values, while any other column may be omitted if desired (in which case it is left as is on DB). The values for the various columns supplied in each row must be of the right data type for the insertion to succeed. Non-primary-key columns can also be explicitly set to null.
ordered: if False (default), the insertions can occur in arbitrary order and possibly concurrently. If True, they are processed sequentially. If there are no specific reasons against it, unordered insertions re to be preferred as they complete much faster.
chunk_size: how many rows to include in each single API request. Exceeding the server maximum allowed value results in an error. Leave it unspecified (recommended) to use the system default.
concurrency: maximum number of concurrent requests to the API at a given time. It cannot be more than one for ordered insertions.
general_method_timeout_ms: a timeout, in milliseconds, to impose on the whole operation, which may consist of several API requests. If not provided, this object's defaults apply.
request_timeout_ms: a timeout, in milliseconds, to impose on each individual HTTP request to the Data API to accomplish the operation. If not provided, this object's defaults apply.
timeout_ms: an alias for general_method_timeout_ms.

Returns

a TableInsertManyResult object, whose attributes are the primary key of the inserted rows both in the form of dictionaries and of tuples.

Examples

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> # Insert complete and partial rows at once (concurrently)
>>> from astrapy.data_types import (
...     DataAPISet,
...     DataAPITimestamp,
...     DataAPIVector,
... )
>>> from astrapy.ids import UUID
>>>
>>> insert_result = asyncio.run(my_async_table.insert_many(
...     [
...         {
...             "match_id": "fight4",
...             "round": 1,
...             "winner": "Victor",
...             "score": 18,
...             "when": DataAPITimestamp.from_string(
...                 "2024-11-28T11:30:00Z",
...             ),
...             "fighters": DataAPISet([
...                 UUID("0193539a-2770-8c09-a32a-111111111111"),
...                 UUID('019353e3-00b4-83f9-a127-222222222222'),
...             ]),
...             "m_vector": DataAPIVector([0.4, -0.6, 0.2]),
...         },
...         {"match_id": "fight5", "round": 1, "winner": "Adam"},
...         {"match_id": "fight5", "round": 2, "winner": "Betta"},
...         {"match_id": "fight5", "round": 3, "winner": "Caio"},
...         {
...             "match_id": "challenge6",
...             "round": 1,
...             "winner": "Donna",
...             "m_vector": [0.9, -0.1, -0.3],
...         },
...         {"match_id": "challenge6", "round": 2, "winner": "Erick"},
...         {"match_id": "challenge6", "round": 3, "winner": "Fiona"},
...         {"match_id": "tournamentA", "round": 1, "winner": "Gael"},
...         {"match_id": "tournamentA", "round": 2, "winner": "Hanna"},
...         {
...             "match_id": "tournamentA",
...             "round": 3,
...             "winner": "Ian",
...             "fighters": DataAPISet([
...                 UUID("0193539a-2770-8c09-a32a-111111111111"),
...             ]),
...         },
...         {"match_id": "fight7", "round": 1, "winner": "Joy"},
...         {"match_id": "fight7", "round": 2, "winner": "Kevin"},
...         {"match_id": "fight7", "round": 3, "winner": "Lauretta"},
...     ],
...     concurrency=10,
...     chunk_size=3,
... ))
>>> insert_result.inserted_ids
[{'match_id': 'fight4', 'round': 1}, {'match_id': 'fight5', ...
>>> insert_result.inserted_id_tuples
[('fight4', 1), ('fight5', 1), ('fight5', 2), ('fight5', 3), ...
>>>
>>> # Ordered insertion
>>> # (would stop on first failure; predictable end result on DB)
>>> asyncio.run(my_async_table.insert_many(
...     [
...         {"match_id": "fight5", "round": 1, "winner": "Adam0"},
...         {"match_id": "fight5", "round": 2, "winner": "Betta0"},
...         {"match_id": "fight5", "round": 3, "winner": "Caio0"},
...         {"match_id": "fight5", "round": 1, "winner": "Adam Zuul"},
...         {"match_id": "fight5", "round": 2, "winner": "Betta Vigo"},
...         {"match_id": "fight5", "round": 3, "winner": "Caio Gozer"},
...     ],
...     ordered=True,
... ))
TableInsertManyResult(inserted_ids=[{'match_id': 'fight5', 'round': 1}, ...

Note

Unordered insertions are executed with some degree of concurrency, so it is usually better to prefer this mode unless the order in the row sequence is important.

Note

A failure mode for this command is related to certain faulty rows found among those to insert: validation may fail, for example, if the vector length does not match the table schema.

For an ordered insertion, the method will raise an exception at the first such faulty row – nevertheless, all rows processed until then will end up being written to the database.

For unordered insertions, if the error stems from faulty rows the insertion proceeds until exhausting the input rows: then, an exception is raised – and all insertable rows will have been written to the database, including those "after" the troublesome ones.

Errors occurring during an insert_many operation, for that reason, may result in a TableInsertManyException being raised. This exception allows to inspect the list of row IDs that were successfully inserted, while accessing at the same time the underlying "root errors" that made the full method call to fail.

Expand source code

async def insert_many(
    self,
    rows: Iterable[ROW],
    *,
    ordered: bool = False,
    chunk_size: int | None = None,
    concurrency: int | None = None,
    request_timeout_ms: int | None = None,
    general_method_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> TableInsertManyResult:
    """
    Insert a number of rows into the table,
    with implied overwrite in case of primary key collision.

    Inserting rows whose primary key correspond to entries alredy stored
    in the table has the effect of an in-place update: the rows are overwritten.
    However, if the rows being inserted are partially provided, i.e. some columns
    are not specified, these are left unchanged on the database. To explicitly
    reset them, specify their value as appropriate to their data type,
    i.e. `None`, `{}` or analogous.

    Args:
        rows: an iterable of dictionaries, each expressing a row to insert.
            Each row must at least fully specify the primary key column values,
            while any other column may be omitted if desired (in which case
            it is left as is on DB).
            The values for the various columns supplied in each row must
            be of the right data type for the insertion to succeed.
            Non-primary-key columns can also be explicitly set to null.
        ordered: if False (default), the insertions can occur in arbitrary order
            and possibly concurrently. If True, they are processed sequentially.
            If there are no specific reasons against it, unordered insertions
            re to be preferred as they complete much faster.
        chunk_size: how many rows to include in each single API request.
            Exceeding the server maximum allowed value results in an error.
            Leave it unspecified (recommended) to use the system default.
        concurrency: maximum number of concurrent requests to the API at
            a given time. It cannot be more than one for ordered insertions.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            whole operation, which may consist of several API requests.
            If not provided, this object's defaults apply.
        request_timeout_ms: a timeout, in milliseconds, to impose on each
            individual HTTP request to the Data API to accomplish the operation.
            If not provided, this object's defaults apply.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a TableInsertManyResult object, whose attributes are the primary key
        of the inserted rows both in the form of dictionaries and of tuples.

    Examples:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> # Insert complete and partial rows at once (concurrently)
        >>> from astrapy.data_types import (
        ...     DataAPISet,
        ...     DataAPITimestamp,
        ...     DataAPIVector,
        ... )
        >>> from astrapy.ids import UUID
        >>>
        >>> insert_result = asyncio.run(my_async_table.insert_many(
        ...     [
        ...         {
        ...             "match_id": "fight4",
        ...             "round": 1,
        ...             "winner": "Victor",
        ...             "score": 18,
        ...             "when": DataAPITimestamp.from_string(
        ...                 "2024-11-28T11:30:00Z",
        ...             ),
        ...             "fighters": DataAPISet([
        ...                 UUID("0193539a-2770-8c09-a32a-111111111111"),
        ...                 UUID('019353e3-00b4-83f9-a127-222222222222'),
        ...             ]),
        ...             "m_vector": DataAPIVector([0.4, -0.6, 0.2]),
        ...         },
        ...         {"match_id": "fight5", "round": 1, "winner": "Adam"},
        ...         {"match_id": "fight5", "round": 2, "winner": "Betta"},
        ...         {"match_id": "fight5", "round": 3, "winner": "Caio"},
        ...         {
        ...             "match_id": "challenge6",
        ...             "round": 1,
        ...             "winner": "Donna",
        ...             "m_vector": [0.9, -0.1, -0.3],
        ...         },
        ...         {"match_id": "challenge6", "round": 2, "winner": "Erick"},
        ...         {"match_id": "challenge6", "round": 3, "winner": "Fiona"},
        ...         {"match_id": "tournamentA", "round": 1, "winner": "Gael"},
        ...         {"match_id": "tournamentA", "round": 2, "winner": "Hanna"},
        ...         {
        ...             "match_id": "tournamentA",
        ...             "round": 3,
        ...             "winner": "Ian",
        ...             "fighters": DataAPISet([
        ...                 UUID("0193539a-2770-8c09-a32a-111111111111"),
        ...             ]),
        ...         },
        ...         {"match_id": "fight7", "round": 1, "winner": "Joy"},
        ...         {"match_id": "fight7", "round": 2, "winner": "Kevin"},
        ...         {"match_id": "fight7", "round": 3, "winner": "Lauretta"},
        ...     ],
        ...     concurrency=10,
        ...     chunk_size=3,
        ... ))
        >>> insert_result.inserted_ids
        [{'match_id': 'fight4', 'round': 1}, {'match_id': 'fight5', ...
        >>> insert_result.inserted_id_tuples
        [('fight4', 1), ('fight5', 1), ('fight5', 2), ('fight5', 3), ...
        >>>
        >>> # Ordered insertion
        >>> # (would stop on first failure; predictable end result on DB)
        >>> asyncio.run(my_async_table.insert_many(
        ...     [
        ...         {"match_id": "fight5", "round": 1, "winner": "Adam0"},
        ...         {"match_id": "fight5", "round": 2, "winner": "Betta0"},
        ...         {"match_id": "fight5", "round": 3, "winner": "Caio0"},
        ...         {"match_id": "fight5", "round": 1, "winner": "Adam Zuul"},
        ...         {"match_id": "fight5", "round": 2, "winner": "Betta Vigo"},
        ...         {"match_id": "fight5", "round": 3, "winner": "Caio Gozer"},
        ...     ],
        ...     ordered=True,
        ... ))
        TableInsertManyResult(inserted_ids=[{'match_id': 'fight5', 'round': 1}, ...

    Note:
        Unordered insertions are executed with some degree of concurrency,
        so it is usually better to prefer this mode unless the order in the
        row sequence is important.

    Note:
        A failure mode for this command is related to certain faulty rows
        found among those to insert: validation may fail, for example, if the
        vector length does not match the table schema.

        For an ordered insertion, the method will raise an exception at
        the first such faulty row -- nevertheless, all rows processed
        until then will end up being written to the database.

        For unordered insertions, if the error stems from faulty rows
        the insertion proceeds until exhausting the input rows: then,
        an exception is raised -- and all insertable rows will have been
        written to the database, including those "after" the troublesome ones.

        Errors occurring during an insert_many operation, for that reason,
        may result in a `TableInsertManyException` being raised.
        This exception allows to inspect the list of row IDs that were
        successfully inserted, while accessing at the same time the underlying
        "root errors" that made the full method call to fail.
    """

    _general_method_timeout_ms, _gmt_label = _first_valid_timeout(
        (general_method_timeout_ms, "general_method_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (
            self.api_options.timeout_options.general_method_timeout_ms,
            "general_method_timeout_ms",
        ),
    )
    _request_timeout_ms, _rt_label = _first_valid_timeout(
        (request_timeout_ms, "request_timeout_ms"),
        (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
    )
    if concurrency is None:
        if ordered:
            _concurrency = 1
        else:
            _concurrency = DEFAULT_INSERT_MANY_CONCURRENCY
    else:
        _concurrency = concurrency
    if _concurrency > 1 and ordered:
        raise ValueError("Cannot run ordered insert_many concurrently.")
    if chunk_size is None:
        _chunk_size = DEFAULT_INSERT_MANY_CHUNK_SIZE
    else:
        _chunk_size = chunk_size
    _rows = list(rows)
    logger.info(f"inserting {len(_rows)} rows in '{self.name}'")
    raw_results: list[dict[str, Any]] = []
    im_payloads: list[dict[str, Any] | None] = []
    timeout_manager = MultiCallTimeoutManager(
        overall_timeout_ms=_general_method_timeout_ms,
        timeout_label=_gmt_label,
    )
    if ordered:
        options = {"ordered": True, "returnDocumentResponses": True}
        inserted_ids: list[Any] = []
        inserted_id_tuples: list[Any] = []
        for i in range(0, len(_rows), _chunk_size):
            im_payload = self._converter_agent.preprocess_payload(
                {
                    "insertMany": {
                        "documents": _rows[i : i + _chunk_size],
                        "options": options,
                    },
                },
                map2tuple_checker=map2tuple_checker_insert_many,
            )
            logger.info(f"insertMany(chunk) on '{self.name}'")
            chunk_response = await self._api_commander.async_request(
                payload=im_payload,
                raise_api_errors=False,
                timeout_context=timeout_manager.remaining_timeout(
                    cap_time_ms=_request_timeout_ms,
                    cap_timeout_label=_rt_label,
                ),
            )
            logger.info(f"finished insertMany(chunk) on '{self.name}'")
            # accumulate the results in this call
            chunk_inserted_ids, chunk_inserted_ids_tuples = (
                self._prepare_keys_from_status(chunk_response.get("status"))
            )
            inserted_ids += chunk_inserted_ids
            inserted_id_tuples += chunk_inserted_ids_tuples
            raw_results += [chunk_response]
            # if errors, quit early
            if chunk_response.get("errors", []):
                response_exception = DataAPIResponseException.from_response(
                    command=im_payload,
                    raw_response=chunk_response,
                )
                raise TableInsertManyException(
                    inserted_ids=inserted_ids,
                    inserted_id_tuples=inserted_id_tuples,
                    exceptions=[response_exception],
                )

        # return
        full_result = TableInsertManyResult(
            raw_results=raw_results,
            inserted_ids=inserted_ids,
            inserted_id_tuples=inserted_id_tuples,
        )
        logger.info(f"finished inserting {len(_rows)} rows in '{self.name}'")
        return full_result

    else:
        # unordered: concurrent or not, do all of them and parse the results
        options = {"ordered": False, "returnDocumentResponses": True}

        sem = asyncio.Semaphore(_concurrency)

        async def concurrent_insert_chunk(
            row_chunk: list[ROW],
        ) -> tuple[dict[str, Any] | None, dict[str, Any]]:
            async with sem:
                im_payload = self._converter_agent.preprocess_payload(
                    {
                        "insertMany": {
                            "documents": row_chunk,
                            "options": options,
                        },
                    },
                    map2tuple_checker=map2tuple_checker_insert_many,
                )
                logger.info(f"insertMany(chunk) on '{self.name}'")
                im_response = await self._api_commander.async_request(
                    payload=im_payload,
                    raise_api_errors=False,
                    timeout_context=timeout_manager.remaining_timeout(
                        cap_time_ms=_request_timeout_ms,
                        cap_timeout_label=_rt_label,
                    ),
                )
                logger.info(f"finished insertMany(chunk) on '{self.name}'")
                return im_payload, im_response

        raw_pl_results_pairs: list[tuple[dict[str, Any] | None, dict[str, Any]]]
        if _concurrency > 1:
            tasks = [
                asyncio.create_task(
                    concurrent_insert_chunk(_rows[i : i + _chunk_size])
                )
                for i in range(0, len(_rows), _chunk_size)
            ]
            raw_pl_results_pairs = await asyncio.gather(*tasks)
        else:
            raw_pl_results_pairs = [
                await concurrent_insert_chunk(_rows[i : i + _chunk_size])
                for i in range(0, len(_rows), _chunk_size)
            ]

        if raw_pl_results_pairs:
            im_payloads, raw_results = list(zip(*raw_pl_results_pairs))
        else:
            im_payloads, raw_results = [], []

        # recast raw_results. Each response has its schema: unfold appropriately
        ids_and_tuples_per_chunk = [
            self._prepare_keys_from_status(chunk_response.get("status"))
            for chunk_response in raw_results
        ]
        inserted_ids = [
            inserted_id
            for chunk_ids, _ in ids_and_tuples_per_chunk
            for inserted_id in chunk_ids
        ]
        inserted_id_tuples = [
            inserted_id_tuple
            for _, chunk_id_tuples in ids_and_tuples_per_chunk
            for inserted_id_tuple in chunk_id_tuples
        ]

        # check-raise
        response_exceptions = [
            DataAPIResponseException.from_response(
                command=chunk_payload,
                raw_response=chunk_response,
            )
            for chunk_payload, chunk_response in zip(im_payloads, raw_results)
            if chunk_response.get("errors", [])
        ]
        if response_exceptions:
            raise TableInsertManyException(
                inserted_ids=inserted_ids,
                inserted_id_tuples=inserted_id_tuples,
                exceptions=response_exceptions,
            )

        # return
        full_result = TableInsertManyResult(
            raw_results=raw_results,
            inserted_ids=inserted_ids,
            inserted_id_tuples=inserted_id_tuples,
        )
        logger.info(f"finished inserting {len(_rows)} rows in '{self.name}'")
        return full_result

async def insert_one(self, row: ROW, *, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> TableInsertOneResult

Insert a single row in the table, with implied overwrite in case of primary key collision.

Inserting a row whose primary key correspond to an entry alredy stored in the table has the effect of an in-place update: the row is overwritten. However, if the row being inserted is partially provided, i.e. some columns are not specified, these are left unchanged on the database. To explicitly reset them, specify their value as appropriate to their data type, i.e. None, {} or analogous.

Args

row: a dictionary expressing the row to insert. The primary key must be specified in full, while any other column may be omitted if desired (in which case it is left as is on DB). The values for the various columns supplied in the row must be of the right data type for the insertion to succeed. Non-primary-key columns can also be explicitly set to null.
general_method_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for general_method_timeout_ms.
timeout_ms: an alias for general_method_timeout_ms.

Returns

a TableInsertOneResult object, whose attributes are the primary key of the inserted row both in the form of a dictionary and of a tuple.

Examples

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> # a full-row insert using astrapy's datatypes
>>> from astrapy.data_types import (
...     DataAPISet,
...     DataAPITimestamp,
...     DataAPIVector,
... )
>>> from astrapy.ids import UUID
>>>
>>> insert_result = asyncio.run(my_async_table.insert_one(
...     {
...         "match_id": "mtch_0",
...         "round": 1,
...         "m_vector": DataAPIVector([0.4, -0.6, 0.2]),
...         "score": 18,
...         "when": DataAPITimestamp.from_string("2024-11-28T11:30:00Z"),
...         "winner": "Victor",
...         "fighters": DataAPISet([
...             UUID("0193539a-2770-8c09-a32a-111111111111"),
...         ]),
...     },
... ))
>>> insert_result.inserted_id
{'match_id': 'mtch_0', 'round': 1}
>>> insert_result.inserted_id_tuple
('mtch_0', 1)
>>>
>>> # a partial-row (which in this case overwrites some of the values)
>>> asyncio.run(my_async_table.insert_one(
...     {
...         "match_id": "mtch_0",
...         "round": 1,
...         "winner": "Victor Vector",
...         "fighters": DataAPISet([
...             UUID("0193539a-2770-8c09-a32a-111111111111"),
...             UUID("0193539a-2880-8875-9f07-222222222222"),
...         ]),
...     },
... ))
TableInsertOneResult(inserted_id={'match_id': 'mtch_0', 'round': 1} ...
>>>
>>> # another insertion demonstrating standard-library datatypes in values
>>> import datetime
>>>
>>> asyncio.run(my_async_table.insert_one(
...     {
...         "match_id": "mtch_0",
...         "round": 2,
...         "winner": "Angela",
...         "score": 25,
...         "when": datetime.datetime(
...             2024, 7, 13, 12, 55, 30, 889,
...             tzinfo=datetime.timezone.utc,
...         ),
...         "fighters": {
...             UUID("019353cb-8e01-8276-a190-333333333333"),
...         },
...         "m_vector": [0.4, -0.6, 0.2],
...     },
... ))
TableInsertOneResult(inserted_id={'match_id': 'mtch_0', 'round': 2}, ...

Expand source code

async def insert_one(
    self,
    row: ROW,
    *,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> TableInsertOneResult:
    """
    Insert a single row in the table,
    with implied overwrite in case of primary key collision.

    Inserting a row whose primary key correspond to an entry alredy stored
    in the table has the effect of an in-place update: the row is overwritten.
    However, if the row being inserted is partially provided, i.e. some columns
    are not specified, these are left unchanged on the database. To explicitly
    reset them, specify their value as appropriate to their data type,
    i.e. `None`, `{}` or analogous.

    Args:
        row: a dictionary expressing the row to insert. The primary key
            must be specified in full, while any other column may be omitted
            if desired (in which case it is left as is on DB).
            The values for the various columns supplied in the row must
            be of the right data type for the insertion to succeed.
            Non-primary-key columns can also be explicitly set to null.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a TableInsertOneResult object, whose attributes are the primary key
        of the inserted row both in the form of a dictionary and of a tuple.

    Examples:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> # a full-row insert using astrapy's datatypes
        >>> from astrapy.data_types import (
        ...     DataAPISet,
        ...     DataAPITimestamp,
        ...     DataAPIVector,
        ... )
        >>> from astrapy.ids import UUID
        >>>
        >>> insert_result = asyncio.run(my_async_table.insert_one(
        ...     {
        ...         "match_id": "mtch_0",
        ...         "round": 1,
        ...         "m_vector": DataAPIVector([0.4, -0.6, 0.2]),
        ...         "score": 18,
        ...         "when": DataAPITimestamp.from_string("2024-11-28T11:30:00Z"),
        ...         "winner": "Victor",
        ...         "fighters": DataAPISet([
        ...             UUID("0193539a-2770-8c09-a32a-111111111111"),
        ...         ]),
        ...     },
        ... ))
        >>> insert_result.inserted_id
        {'match_id': 'mtch_0', 'round': 1}
        >>> insert_result.inserted_id_tuple
        ('mtch_0', 1)
        >>>
        >>> # a partial-row (which in this case overwrites some of the values)
        >>> asyncio.run(my_async_table.insert_one(
        ...     {
        ...         "match_id": "mtch_0",
        ...         "round": 1,
        ...         "winner": "Victor Vector",
        ...         "fighters": DataAPISet([
        ...             UUID("0193539a-2770-8c09-a32a-111111111111"),
        ...             UUID("0193539a-2880-8875-9f07-222222222222"),
        ...         ]),
        ...     },
        ... ))
        TableInsertOneResult(inserted_id={'match_id': 'mtch_0', 'round': 1} ...
        >>>
        >>> # another insertion demonstrating standard-library datatypes in values
        >>> import datetime
        >>>
        >>> asyncio.run(my_async_table.insert_one(
        ...     {
        ...         "match_id": "mtch_0",
        ...         "round": 2,
        ...         "winner": "Angela",
        ...         "score": 25,
        ...         "when": datetime.datetime(
        ...             2024, 7, 13, 12, 55, 30, 889,
        ...             tzinfo=datetime.timezone.utc,
        ...         ),
        ...         "fighters": {
        ...             UUID("019353cb-8e01-8276-a190-333333333333"),
        ...         },
        ...         "m_vector": [0.4, -0.6, 0.2],
        ...     },
        ... ))
        TableInsertOneResult(inserted_id={'match_id': 'mtch_0', 'round': 2}, ...
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    io_payload = self._converter_agent.preprocess_payload(
        {"insertOne": {"document": row}},
        map2tuple_checker=map2tuple_checker_insert_one,
    )
    logger.info(f"insertOne on '{self.name}'")
    io_response = await self._api_commander.async_request(
        payload=io_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished insertOne on '{self.name}'")
    if "insertedIds" in io_response.get("status", {}):
        if not io_response["status"]["insertedIds"]:
            raise UnexpectedDataAPIResponseException(
                text="Response from insertOne API command has empty 'insertedIds'.",
                raw_response=io_response,
            )
        if not io_response["status"]["primaryKeySchema"]:
            raise UnexpectedDataAPIResponseException(
                text="Response from insertOne API command has empty 'primaryKeySchema'.",
                raw_response=io_response,
            )
        inserted_id_list = io_response["status"]["insertedIds"][0]
        inserted_id_tuple, inserted_id = self._converter_agent.postprocess_key(
            inserted_id_list,
            primary_key_schema_dict=io_response["status"]["primaryKeySchema"],
        )
        return TableInsertOneResult(
            raw_results=[io_response],
            inserted_id=inserted_id,
            inserted_id_tuple=inserted_id_tuple,
        )
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from insertOne API command.",
            raw_response=io_response,
        )

async def list_index_names(self, *, table_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> list[str]

List the names of all indexes existing on this table.

Args

table_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for table_admin_timeout_ms.
timeout_ms: an alias for table_admin_timeout_ms.

Returns

a list of the index names as strings, in no particular order.

Example

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> asyncio.run(my_async_table.list_index_names())
['m_vector_index', 'winner_index', 'score_index']

Expand source code

async def list_index_names(
    self,
    *,
    table_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> list[str]:
    """
    List the names of all indexes existing on this table.

    Args:
        table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `table_admin_timeout_ms`.
        timeout_ms: an alias for `table_admin_timeout_ms`.

    Returns:
        a list of the index names as strings, in no particular order.

    Example:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> asyncio.run(my_async_table.list_index_names())
        ['m_vector_index', 'winner_index', 'score_index']
    """

    _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
        timeout_options=self.api_options.timeout_options,
        table_admin_timeout_ms=table_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    li_payload: dict[str, Any] = {"listIndexes": {"options": {}}}
    logger.info("listIndexes")
    li_response = await self._api_commander.async_request(
        payload=li_payload,
        timeout_context=_TimeoutContext(
            request_ms=_table_admin_timeout_ms, label=_ta_label
        ),
    )
    if "indexes" not in li_response.get("status", {}):
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from listIndexes API command.",
            raw_response=li_response,
        )
    else:
        logger.info("finished listIndexes")
        return li_response["status"]["indexes"]  # type: ignore[no-any-return]

Create a Table from this one. Save for the arguments explicitly provided as overrides, everything else is kept identical to this table in the copy (the database is converted into an async object).

Args

embedding_api_key: optional API key(s) for interacting with the table. If an embedding service is configured, and this parameter is not None, each Data API call will include the necessary embedding-related headers as specified by this parameter. If a string is passed, it translates into the one "embedding api key" header (i.e. EmbeddingAPIKeyHeaderProvider). For some vectorize providers/models, if using header-based authentication, specialized subclasses of EmbeddingHeadersProvider should be supplied.
reranking_api_key: optional API key(s) for interacting with the table. If a reranker is configured for the table, and this parameter is not None, Data API calls will include the appropriate reranker-related headers according to this parameter. Reranker services may not necessarily require this setting (e.g. if the service needs no authentication, or one is configured as part of the table definition relying on a "shared secret"). If a string is passed, it is translated into an instance of RerankingAPIKeyHeaderProvider.
api_options: any additional options to set for the result, in the form of an APIOptions instance (where one can set just the needed attributes). In case the same setting is also provided as named parameter, the latter takes precedence.

Returns

the new copy, a Table instance.

Example

>>> my_async_table.to_sync().find_one(
...     {"match_id": "fight4"},
...     projection={"winner": True},
... )
{"pk": 1, "column": "value}

Expand source code

def to_sync(
    self: AsyncTable[ROW],
    *,
    embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
    reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
    api_options: APIOptions | UnsetType = _UNSET,
) -> Table[ROW]:
    """
    Create a Table from this one. Save for the arguments
    explicitly provided as overrides, everything else is kept identical
    to this table in the copy (the database is converted into
    an async object).

    Args:
        embedding_api_key: optional API key(s) for interacting with the table.
            If an embedding service is configured, and this parameter is not None,
            each Data API call will include the necessary embedding-related headers
            as specified by this parameter. If a string is passed, it translates
            into the one "embedding api key" header
            (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
            For some vectorize providers/models, if using header-based authentication,
            specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
            should be supplied.
        reranking_api_key: optional API key(s) for interacting with the table.
            If a reranker is configured for the table, and this parameter
            is not None, Data API calls will include the appropriate
            reranker-related headers according to this parameter. Reranker services
            may not necessarily require this setting (e.g. if the service needs no
            authentication, or one is configured as part of the table
            definition relying on a "shared secret").
            If a string is passed, it is translated into an instance of
            `astrapy.authentication.RerankingAPIKeyHeaderProvider`.
        api_options: any additional options to set for the result, in the form of
            an APIOptions instance (where one can set just the needed attributes).
            In case the same setting is also provided as named parameter,
            the latter takes precedence.

    Returns:
        the new copy, a Table instance.

    Example:
        >>> my_async_table.to_sync().find_one(
        ...     {"match_id": "fight4"},
        ...     projection={"winner": True},
        ... )
        {"pk": 1, "column": "value}
    """

    arg_api_options = APIOptions(
        embedding_api_key=embedding_api_key,
        reranking_api_key=reranking_api_key,
    )
    final_api_options = self.api_options.with_override(api_options).with_override(
        arg_api_options
    )
    return Table(
        database=self.database.to_sync(),
        name=self.name,
        keyspace=self.keyspace,
        api_options=final_api_options,
    )

async def update_one(self, filter: FilterType, update: dict[str, Any], *, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> None

Update a single document on the table, changing some or all of the columns, with the implicit behaviour of inserting a new row if no match is found.

Args

filter: a predicate expressing the table primary key in full, i.e. a dictionary defining values for all columns that form the primary key. An example may be {"match_id": "fight4", "round": 1}.
update: the update prescription to apply to the row, expressed as a dictionary conforming to the Data API syntax. The update operators for tables are $set and $unset (in particular, setting a column to None has the same effect as the $unset operator). Examples are {"$set": {"round": 12}} and {"$unset": {"winner": "", "score": ""}}. Note that the update operation cannot alter the primary key columns. See the Data API documentation for more details.
general_method_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for general_method_timeout_ms.
timeout_ms: an alias for general_method_timeout_ms.

Examples

>>> # NOTE: may require slight adaptation to an async context.
>>>
>>> from astrapy.data_types import DataAPISet
>>>
>>> # Set a new value for a column
>>> await my_async_table.update_one(
...     {"match_id": "fight4", "round": 1},
...     update={"$set": {"winner": "Winona"}},
... )
>>>
>>> # Set a new value for a column while unsetting another colum
>>> await my_async_table.update_one(
...     {"match_id": "fight4", "round": 1},
...     update={"$set": {"winner": None, "score": 24}},
... )
>>>
>>> # Set a 'set' column to empty
>>> await my_async_table.update_one(
...     {"match_id": "fight4", "round": 1},
...     update={"$set": {"fighters": DataAPISet()}},
... )
>>>
>>> # Set a 'set' column to empty using None
>>> await my_async_table.update_one(
...     {"match_id": "fight4", "round": 1},
...     update={"$set": {"fighters": None}},
... )
>>>
>>> # Set a 'set' column to empty using a regular (empty) set
>>> await my_async_table.update_one(
...     {"match_id": "fight4", "round": 1},
...     update={"$set": {"fighters": set()}},
... )
>>>
>>> # Set a 'set' column to empty using $unset
>>> await my_async_table.update_one(
...     {"match_id": "fight4", "round": 1},
...     update={"$unset": {"fighters": None}},
... )
>>>
>>> # A non-existing primary key creates a new row
>>> await my_async_table.update_one(
...     {"match_id": "bar_fight", "round": 4},
...     update={"$set": {"score": 8, "winner": "Jack"}},
... )
>>>
>>> # Delete column values for a row (they'll read as None now)
>>> await my_async_table.update_one(
...     {"match_id": "challenge6", "round": 2},
...     update={"$unset": {"winner": None, "score": None}},
... )

Note

a row created entirely with update operations (as opposed to insertions) may, correspondingly, be deleted by means of an $unset update on all columns.

Expand source code

async def update_one(
    self,
    filter: FilterType,
    update: dict[str, Any],
    *,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Update a single document on the table, changing some or all of the columns,
    with the implicit behaviour of inserting a new row if no match is found.

    Args:
        filter: a predicate expressing the table primary key in full,
            i.e. a dictionary defining values for all columns that form the
            primary key. An example may be `{"match_id": "fight4", "round": 1}`.
        update: the update prescription to apply to the row, expressed
            as a dictionary conforming to the Data API syntax. The update
            operators for tables are `$set` and `$unset` (in particular,
            setting a column to None has the same effect as the $unset operator).
            Examples are `{"$set": {"round": 12}}` and
            `{"$unset": {"winner": "", "score": ""}}`.
            Note that the update operation cannot alter the primary key columns.
            See the Data API documentation for more details.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Examples:
        >>> # NOTE: may require slight adaptation to an async context.
        >>>
        >>> from astrapy.data_types import DataAPISet
        >>>
        >>> # Set a new value for a column
        >>> await my_async_table.update_one(
        ...     {"match_id": "fight4", "round": 1},
        ...     update={"$set": {"winner": "Winona"}},
        ... )
        >>>
        >>> # Set a new value for a column while unsetting another colum
        >>> await my_async_table.update_one(
        ...     {"match_id": "fight4", "round": 1},
        ...     update={"$set": {"winner": None, "score": 24}},
        ... )
        >>>
        >>> # Set a 'set' column to empty
        >>> await my_async_table.update_one(
        ...     {"match_id": "fight4", "round": 1},
        ...     update={"$set": {"fighters": DataAPISet()}},
        ... )
        >>>
        >>> # Set a 'set' column to empty using None
        >>> await my_async_table.update_one(
        ...     {"match_id": "fight4", "round": 1},
        ...     update={"$set": {"fighters": None}},
        ... )
        >>>
        >>> # Set a 'set' column to empty using a regular (empty) set
        >>> await my_async_table.update_one(
        ...     {"match_id": "fight4", "round": 1},
        ...     update={"$set": {"fighters": set()}},
        ... )
        >>>
        >>> # Set a 'set' column to empty using $unset
        >>> await my_async_table.update_one(
        ...     {"match_id": "fight4", "round": 1},
        ...     update={"$unset": {"fighters": None}},
        ... )
        >>>
        >>> # A non-existing primary key creates a new row
        >>> await my_async_table.update_one(
        ...     {"match_id": "bar_fight", "round": 4},
        ...     update={"$set": {"score": 8, "winner": "Jack"}},
        ... )
        >>>
        >>> # Delete column values for a row (they'll read as None now)
        >>> await my_async_table.update_one(
        ...     {"match_id": "challenge6", "round": 2},
        ...     update={"$unset": {"winner": None, "score": None}},
        ... )

    Note:
        a row created entirely with update operations (as opposed to insertions)
        may, correspondingly, be deleted by means of an $unset update on all columns.
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    uo_payload = self._converter_agent.preprocess_payload(
        {
            "updateOne": {
                k: v
                for k, v in {
                    "filter": filter,
                    "update": update,
                }.items()
                if v is not None
            }
        },
        map2tuple_checker=map2tuple_checker_update_one,
    )
    logger.info(f"updateOne on '{self.name}'")
    uo_response = await self._api_commander.async_request(
        payload=uo_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished updateOne on '{self.name}'")
    if "status" in uo_response:
        # the contents are disregarded and the method just returns:
        return
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from updateOne API command.",
            raw_response=uo_response,
        )

Create a clone of this table with some changed attributes.

Args

embedding_api_key: optional API key(s) for interacting with the table. If an embedding service is configured, and this parameter is not None, each Data API call will include the necessary embedding-related headers as specified by this parameter. If a string is passed, it translates into the one "embedding api key" header (i.e. EmbeddingAPIKeyHeaderProvider). For some vectorize providers/models, if using header-based authentication, specialized subclasses of EmbeddingHeadersProvider should be supplied.
reranking_api_key: optional API key(s) for interacting with the table. If a reranker is configured for the table, and this parameter is not None, Data API calls will include the appropriate reranker-related headers according to this parameter. Reranker services may not necessarily require this setting (e.g. if the service needs no authentication, or one is configured as part of the table definition relying on a "shared secret"). If a string is passed, it is translated into an instance of RerankingAPIKeyHeaderProvider.
api_options: any additional options to set for the clone, in the form of an APIOptions instance (where one can set just the needed attributes). In case the same setting is also provided as named parameter, the latter takes precedence.

Returns

a new AsyncTable instance.

Example

>>> table_with_api_key_configured = my_async_table.with_options(
...     embedding_api_key="secret-key-0123abcd...",
... )

Expand source code

def with_options(
    self: AsyncTable[ROW],
    *,
    embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
    reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
    api_options: APIOptions | UnsetType = _UNSET,
) -> AsyncTable[ROW]:
    """
    Create a clone of this table with some changed attributes.

    Args:
        embedding_api_key: optional API key(s) for interacting with the table.
            If an embedding service is configured, and this parameter is not None,
            each Data API call will include the necessary embedding-related headers
            as specified by this parameter. If a string is passed, it translates
            into the one "embedding api key" header
            (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
            For some vectorize providers/models, if using header-based authentication,
            specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
            should be supplied.
        reranking_api_key: optional API key(s) for interacting with the table.
            If a reranker is configured for the table, and this parameter
            is not None, Data API calls will include the appropriate
            reranker-related headers according to this parameter. Reranker services
            may not necessarily require this setting (e.g. if the service needs no
            authentication, or one is configured as part of the table
            definition relying on a "shared secret").
            If a string is passed, it is translated into an instance of
            `astrapy.authentication.RerankingAPIKeyHeaderProvider`.
        api_options: any additional options to set for the clone, in the form of
            an APIOptions instance (where one can set just the needed attributes).
            In case the same setting is also provided as named parameter,
            the latter takes precedence.

    Returns:
        a new AsyncTable instance.

    Example:
        >>> table_with_api_key_configured = my_async_table.with_options(
        ...     embedding_api_key="secret-key-0123abcd...",
        ... )
    """

    return self._copy(
        embedding_api_key=embedding_api_key,
        reranking_api_key=reranking_api_key,
        api_options=api_options,
    )

class Collection (*, database: Database, name: str, keyspace: str | None, api_options: FullAPIOptions)

A Data API collection, the object to interact with the Data API for unstructured (schemaless) data, especially for DDL operations. This class has a synchronous interface.

This class is not meant for direct instantiation by the user, rather it is obtained by invoking methods such as get_collection of Database, wherefrom the Collection inherits its API options such as authentication token and API endpoint.

Args

database: a Database object, instantiated earlier. This represents the database the collection belongs to.
name: the collection name. This parameter should match an existing collection on the database.
keyspace: this is the keyspace to which the collection belongs. If nothing is specified, the database's working keyspace is used.
api_options: a complete specification of the API Options for this instance.

Examples

>>> from astrapy import DataAPIClient
>>> client = DataAPIClient()
>>> database = client.get_database(
...     "https://01234567-....apps.astra.datastax.com",
...     token="AstraCS:..."
... )

>>> # Create a collection using the fluent syntax for its definition
>>> from astrapy.constants import VectorMetric
>>> from astrapy.info import CollectionDefinition
>>>
>>> collection_definition = (
...     CollectionDefinition.builder()
...     .set_vector_dimension(3)
...     .set_vector_metric(VectorMetric.DOT_PRODUCT)
...     .set_indexing("deny", ["annotations", "logs"])
...     .build()
... )
>>> my_collection = database.create_collection(
...     "my_events",
...     definition=collection_definition,
... )

>>> # Create a collection with the definition as object
>>> from astrapy.info import CollectionVectorOptions
>>>
>>> collection_definition_1 = CollectionDefinition(
...     vector=CollectionVectorOptions(
...         dimension=3,
...         metric=VectorMetric.DOT_PRODUCT,
...     ),
...     indexing={"deny": ["annotations", "logs"]},
... )
>>> my_collection_1 = database.create_collection(
...     "my_events",
...     definition=collection_definition_1,
... )
>>>

>>> # Create a collection with the definition as plain dictionary
>>> collection_definition_2 = {
...     "indexing": {"deny": ["annotations", "logs"]},
...     "vector": {
...         "dimension": 3,
...         "metric": VectorMetric.DOT_PRODUCT,
...     },
... }
>>> my_collection_2 = database.create_collection(
...     "my_events",
...     definition=collection_definition_2,
... )

>>> # Get a reference to an existing collection
>>> # (no checks are performed on DB)
>>> my_collection_3a = database.get_collection("my_events")
>>> my_collection_3b = database.my_events
>>> my_collection_3c = database["my_events"]

Note

creating an instance of Collection does not trigger actual creation of the collection on the database. The latter should have been created beforehand, e.g. through the create_collection method of a Database.

Expand source code

class Collection(Generic[DOC]):
    """
    A Data API collection, the object to interact with the Data API for unstructured
    (schemaless) data, especially for DDL operations.
    This class has a synchronous interface.

    This class is not meant for direct instantiation by the user, rather
    it is obtained by invoking methods such as `get_collection` of Database,
    wherefrom the Collection inherits its API options such as authentication
    token and API endpoint.

    Args:
        database: a Database object, instantiated earlier. This represents
            the database the collection belongs to.
        name: the collection name. This parameter should match an existing
            collection on the database.
        keyspace: this is the keyspace to which the collection belongs.
            If nothing is specified, the database's working keyspace is used.
        api_options: a complete specification of the API Options for this instance.

    Examples:
        >>> from astrapy import DataAPIClient
        >>> client = DataAPIClient()
        >>> database = client.get_database(
        ...     "https://01234567-....apps.astra.datastax.com",
        ...     token="AstraCS:..."
        ... )

        >>> # Create a collection using the fluent syntax for its definition
        >>> from astrapy.constants import VectorMetric
        >>> from astrapy.info import CollectionDefinition
        >>>
        >>> collection_definition = (
        ...     CollectionDefinition.builder()
        ...     .set_vector_dimension(3)
        ...     .set_vector_metric(VectorMetric.DOT_PRODUCT)
        ...     .set_indexing("deny", ["annotations", "logs"])
        ...     .build()
        ... )
        >>> my_collection = database.create_collection(
        ...     "my_events",
        ...     definition=collection_definition,
        ... )

        >>>
        >>> # Create a collection with the definition as object
        >>> from astrapy.info import CollectionVectorOptions
        >>>
        >>> collection_definition_1 = CollectionDefinition(
        ...     vector=CollectionVectorOptions(
        ...         dimension=3,
        ...         metric=VectorMetric.DOT_PRODUCT,
        ...     ),
        ...     indexing={"deny": ["annotations", "logs"]},
        ... )
        >>> my_collection_1 = database.create_collection(
        ...     "my_events",
        ...     definition=collection_definition_1,
        ... )
        >>>

        >>> # Create a collection with the definition as plain dictionary
        >>> collection_definition_2 = {
        ...     "indexing": {"deny": ["annotations", "logs"]},
        ...     "vector": {
        ...         "dimension": 3,
        ...         "metric": VectorMetric.DOT_PRODUCT,
        ...     },
        ... }
        >>> my_collection_2 = database.create_collection(
        ...     "my_events",
        ...     definition=collection_definition_2,
        ... )

        >>> # Get a reference to an existing collection
        >>> # (no checks are performed on DB)
        >>> my_collection_3a = database.get_collection("my_events")
        >>> my_collection_3b = database.my_events
        >>> my_collection_3c = database["my_events"]

    Note:
        creating an instance of Collection does not trigger actual creation
        of the collection on the database. The latter should have been created
        beforehand, e.g. through the `create_collection` method of a Database.
    """

    def __init__(
        self,
        *,
        database: Database,
        name: str,
        keyspace: str | None,
        api_options: FullAPIOptions,
    ) -> None:
        self.api_options = api_options
        self._name = name
        _keyspace = keyspace if keyspace is not None else database.keyspace

        if _keyspace is None:
            raise ValueError("Attempted to create Collection with 'keyspace' unset.")

        self._database = database._copy(
            keyspace=_keyspace, api_options=self.api_options
        )
        self._commander_headers = {
            **{DEFAULT_DATA_API_AUTH_HEADER: self.api_options.token.get_token()},
            **self.api_options.embedding_api_key.get_headers(),
            **self.api_options.reranking_api_key.get_headers(),
            **self.api_options.database_additional_headers,
        }
        self._api_commander = self._get_api_commander()

    def __repr__(self) -> str:
        _db_desc = f'database.api_endpoint="{self.database.api_endpoint}"'
        return (
            f'{self.__class__.__name__}(name="{self.name}", '
            f'keyspace="{self.keyspace}", {_db_desc}, '
            f"api_options={self.api_options})"
        )

    def __eq__(self, other: Any) -> bool:
        if isinstance(other, Collection):
            return all(
                [
                    self._name == other._name,
                    self._database == other._database,
                    self.api_options == other.api_options,
                ]
            )
        else:
            return False

    def __call__(self, *pargs: Any, **kwargs: Any) -> None:
        raise TypeError(
            f"'{self.__class__.__name__}' object is not callable. If you "
            f"meant to call the '{self.name}' method on a "
            f"'{self.database.__class__.__name__}' object "
            "it is failing because no such method exists."
        )

    def _get_api_commander(self) -> APICommander:
        """Instantiate a new APICommander based on the properties of this class."""

        if self._database.keyspace is None:
            raise ValueError(
                "No keyspace specified. Collection requires a keyspace to "
                "be set, e.g. through the `keyspace` constructor parameter."
            )

        base_path_components = [
            comp
            for comp in (
                ncomp.strip("/")
                for ncomp in (
                    self._database.api_options.data_api_url_options.api_path,
                    self._database.api_options.data_api_url_options.api_version,
                    self._database.keyspace,
                    self._name,
                )
                if ncomp is not None
            )
            if comp != ""
        ]
        base_path = f"/{'/'.join(base_path_components)}"
        api_commander = APICommander(
            api_endpoint=self._database.api_endpoint,
            path=base_path,
            headers=self._commander_headers,
            callers=self.api_options.callers,
            redacted_header_names=self.api_options.redacted_header_names,
            handle_decimals_writes=(
                self.api_options.serdes_options.use_decimals_in_collections
            ),
            handle_decimals_reads=(
                self.api_options.serdes_options.use_decimals_in_collections
            ),
        )
        return api_commander

    def _converted_request(
        self,
        *,
        http_method: str = HttpMethod.POST,
        payload: dict[str, Any] | None = None,
        additional_path: str | None = None,
        request_params: dict[str, Any] = {},
        raise_api_errors: bool = True,
        timeout_context: _TimeoutContext,
    ) -> dict[str, Any]:
        converted_payload = preprocess_collection_payload(
            payload, options=self.api_options.serdes_options
        )
        raw_response_json = self._api_commander.request(
            http_method=http_method,
            payload=converted_payload,
            additional_path=additional_path,
            request_params=request_params,
            raise_api_errors=raise_api_errors,
            timeout_context=timeout_context,
        )
        response_json = postprocess_collection_response(
            raw_response_json, options=self.api_options.serdes_options
        )
        return response_json

    def _copy(
        self: Collection[DOC],
        *,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> Collection[DOC]:
        arg_api_options = APIOptions(
            embedding_api_key=embedding_api_key,
            reranking_api_key=reranking_api_key,
        )
        final_api_options = self.api_options.with_override(api_options).with_override(
            arg_api_options
        )
        return Collection(
            database=self.database,
            name=self.name,
            keyspace=self.keyspace,
            api_options=final_api_options,
        )

    def with_options(
        self: Collection[DOC],
        *,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> Collection[DOC]:
        """
        Create a clone of this collection with some changed attributes.

        Args:
            embedding_api_key: optional API key(s) for interacting with the collection.
                If an embedding service is configured, and this parameter is not None,
                each Data API call will include the necessary embedding-related headers
                as specified by this parameter. If a string is passed, it translates
                into the one "embedding api key" header
                (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
                For some vectorize providers/models, if using header-based authentication,
                specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
                should be supplied.
            reranking_api_key: optional API key(s) for interacting with the collection.
                If a reranker is configured for the collection, and this parameter
                is not None, Data API calls will include the appropriate
                reranker-related headers according to this parameter. Reranker services
                may not necessarily require this setting (e.g. if the service needs no
                authentication, or one is configured as part of the collection
                definition relying on a "shared secret").
                If a string is passed, it is translated into an instance of
                `astrapy.authentication.RerankingAPIKeyHeaderProvider`.
            api_options: any additional options to set for the clone, in the form of
                an APIOptions instance (where one can set just the needed attributes).
                In case the same setting is also provided as named parameter,
                the latter takes precedence.

        Returns:
            a new Collection instance.

        Example:
            >>> collection_with_api_key_configured = my_collection.with_options(
            ...     embedding_api_key="secret-key-0123abcd...",
            ... )
        """

        return self._copy(
            embedding_api_key=embedding_api_key,
            reranking_api_key=reranking_api_key,
            api_options=api_options,
        )

    def to_async(
        self: Collection[DOC],
        *,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncCollection[DOC]:
        """
        Create an AsyncCollection from this one. Save for the arguments
        explicitly provided as overrides, everything else is kept identical
        to this collection in the copy (the database is converted into
        an async object).

        Args:
            embedding_api_key: optional API key(s) for interacting with the collection.
                If an embedding service is configured, and this parameter is not None,
                each Data API call will include the necessary embedding-related headers
                as specified by this parameter. If a string is passed, it translates
                into the one "embedding api key" header
                (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
                For some vectorize providers/models, if using header-based authentication,
                specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
                should be supplied.
            reranking_api_key: optional API key(s) for interacting with the collection.
                If a reranker is configured for the collection, and this parameter
                is not None, Data API calls will include the appropriate
                reranker-related headers according to this parameter. Reranker services
                may not necessarily require this setting (e.g. if the service needs no
                authentication, or one is configured as part of the collection
                definition relying on a "shared secret").
                If a string is passed, it is translated into an instance of
                `astrapy.authentication.RerankingAPIKeyHeaderProvider`.
            api_options: any additional options to set for the result, in the form of
                an APIOptions instance (where one can set just the needed attributes).
                In case the same setting is also provided as named parameter,
                the latter takes precedence.

        Returns:
            the new copy, an AsyncCollection instance.

        Example:
            >>> asyncio.run(my_coll.to_async().count_documents({},upper_bound=100))
            77
        """

        arg_api_options = APIOptions(
            embedding_api_key=embedding_api_key,
            reranking_api_key=reranking_api_key,
        )
        final_api_options = self.api_options.with_override(api_options).with_override(
            arg_api_options
        )
        return AsyncCollection(
            database=self.database.to_async(),
            name=self.name,
            keyspace=self.keyspace,
            api_options=final_api_options,
        )

    def options(
        self,
        *,
        collection_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> CollectionDefinition:
        """
        Get the collection options, i.e. its configuration as read from the database.

        The method issues a request to the Data API each time is invoked,
        without caching mechanisms: this ensures up-to-date information
        for usages such as real-time collection validation by the application.

        Args:
            collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `collection_admin_timeout_ms`.
            timeout_ms: an alias for `collection_admin_timeout_ms`.

        Returns:
            a CollectionDefinition instance describing the collection.
            (See also the database `list_collections` method.)

        Example:
            >>> my_coll.options()
            CollectionDefinition(vector=CollectionVectorOptions(dimension=3, metric='cosine'))
        """

        _collection_admin_timeout_ms, _ca_label = _select_singlereq_timeout_ca(
            timeout_options=self.api_options.timeout_options,
            collection_admin_timeout_ms=collection_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info(f"getting collections in search of '{self.name}'")
        self_descriptors = [
            coll_desc
            for coll_desc in self.database._list_collections_ctx(
                keyspace=None,
                timeout_context=_TimeoutContext(
                    request_ms=_collection_admin_timeout_ms,
                    label=_ca_label,
                ),
            )
            if coll_desc.name == self.name
        ]
        logger.info(f"finished getting collections in search of '{self.name}'")
        if self_descriptors:
            return self_descriptors[0].definition
        else:
            raise RuntimeError(
                f"Collection {self.keyspace}.{self.name} not found.",
            )

    def info(
        self,
        *,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> CollectionInfo:
        """
        Information on the collection (name, location, database), in the
        form of a CollectionInfo object.

        Not to be confused with the collection `options` method (related
        to the collection internal configuration).

        Args:
            database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying DevOps API request.
                If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `database_admin_timeout_ms`.
            timeout_ms: an alias for `database_admin_timeout_ms`.

        Example:
            >>> my_coll.info().database_info.region
            'eu-west-1'
            >>> my_coll.info().full_name
            'default_keyspace.my_v_collection'

        Note:
            the returned CollectionInfo wraps, among other things,
            the database information: as such, calling this method
            triggers the same-named method of a Database object (which, in turn,
            performs a HTTP request to the DevOps API).
            See the documentation for `Database.info()` for more details.
        """

        return CollectionInfo(
            database_info=self.database.info(
                database_admin_timeout_ms=database_admin_timeout_ms,
                request_timeout_ms=request_timeout_ms,
                timeout_ms=timeout_ms,
            ),
            keyspace=self.keyspace,
            name=self.name,
            full_name=self.full_name,
        )

    @property
    def database(self) -> Database:
        """
        a Database object, the database this collection belongs to.

        Example:
            >>> my_coll.database.name
            'the_application_database'
        """

        return self._database

    @property
    def keyspace(self) -> str:
        """
        The keyspace this collection is in.

        Example:
            >>> my_coll.keyspace
            'default_keyspace'
        """

        _keyspace = self.database.keyspace
        if _keyspace is None:
            raise RuntimeError("The collection's DB is set with keyspace=None")
        return _keyspace

    @property
    def name(self) -> str:
        """
        The name of this collection.

        Example:
            >>> my_coll.name
            'my_v_collection'
        """

        return self._name

    @property
    def full_name(self) -> str:
        """
        The fully-qualified collection name within the database,
        in the form "keyspace.collection_name".

        Example:
            >>> my_coll.full_name
            'default_keyspace.my_v_collection'
        """

        return f"{self.keyspace}.{self.name}"

    def insert_one(
        self,
        document: DOC,
        *,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> CollectionInsertOneResult:
        """
        Insert a single document in the collection in an atomic operation.

        Args:
            document: the dictionary expressing the document to insert.
                The `_id` field of the document can be left out, in which
                case it will be created automatically.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a CollectionInsertOneResult object.

        Examples:
            >>> my_coll.count_documents({}, upper_bound=10)
            0
            >>> my_coll.insert_one(
            ...     {
            ...         "age": 30,
            ...         "name": "Smith",
            ...         "food": ["pear", "peach"],
            ...         "likes_fruit": True,
            ...     },
            ... )
            CollectionInsertOneResult(raw_results=..., inserted_id='ed4587a4-...-...-...')
            >>> my_coll.insert_one({"_id": "user-123", "age": 50, "name": "Maccio"})
            CollectionInsertOneResult(raw_results=..., inserted_id='user-123')
            >>> my_coll.count_documents({}, upper_bound=10)
            2

            >>> my_coll.insert_one({"tag": "v", "$vector": [10, 11]})
            CollectionInsertOneResult(...)

        Note:
            If an `_id` is explicitly provided, which corresponds to a document
            that exists already in the collection, an error is raised and
            the insertion fails.
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        io_payload = {"insertOne": {"document": document}}
        logger.info(f"insertOne on '{self.name}'")
        io_response = self._converted_request(
            payload=io_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished insertOne on '{self.name}'")
        if "insertedIds" in io_response.get("status", {}):
            if io_response["status"]["insertedIds"]:
                inserted_id = io_response["status"]["insertedIds"][0]
                return CollectionInsertOneResult(
                    raw_results=[io_response],
                    inserted_id=inserted_id,
                )
            else:
                raise UnexpectedDataAPIResponseException(
                    text="Faulty response from insert_one API command.",
                    raw_response=io_response,
                )
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from insert_one API command.",
                raw_response=io_response,
            )

    def insert_many(
        self,
        documents: Iterable[DOC],
        *,
        ordered: bool = False,
        chunk_size: int | None = None,
        concurrency: int | None = None,
        request_timeout_ms: int | None = None,
        general_method_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> CollectionInsertManyResult:
        """
        Insert a list of documents into the collection.
        This is not an atomic operation.

        Args:
            documents: an iterable of dictionaries, each a document to insert.
                Documents may specify their `_id` field or leave it out, in which
                case it will be added automatically.
            ordered: if False (default), the insertions can occur in arbitrary order
                and possibly concurrently. If True, they are processed sequentially.
                If there are no specific reasons against it, unordered insertions are to
                be preferred as they complete much faster.
            chunk_size: how many documents to include in a single API request.
                Exceeding the server maximum allowed value results in an error.
                Leave it unspecified (recommended) to use the system default.
            concurrency: maximum number of concurrent requests to the API at
                a given time. It cannot be more than one for ordered insertions.
            general_method_timeout_ms: a timeout, in milliseconds, for the whole
                requested operation (which may involve multiple API requests).
                If not passed, the collection-level setting is used instead.
            request_timeout_ms: a timeout, in milliseconds, for each API request.
                If not passed, the collection-level setting is used instead.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a CollectionInsertManyResult object.

        Examples:
            >>> my_coll.count_documents({}, upper_bound=10)
            0
            >>> my_coll.insert_many(
            ...     [{"a": 10}, {"a": 5}, {"b": [True, False, False]}],
            ...     ordered=True,
            ... )
            CollectionInsertManyResult(raw_results=..., inserted_ids=['184bb06f-...', '...', '...'])
            >>> my_coll.count_documents({}, upper_bound=100)
            3
            >>> my_coll.insert_many(
            ...     [{"seq": i} for i in range(50)],
            ...     concurrency=5,
            ... )
            CollectionInsertManyResult(raw_results=..., inserted_ids=[... ...])
            >>> my_coll.count_documents({}, upper_bound=100)
            53
            >>> my_coll.insert_many(
            ...     [
            ...         {"tag": "a", "$vector": [1, 2]},
            ...         {"tag": "b", "$vector": [3, 4]},
            ...     ]
            ... )
            CollectionInsertManyResult(...)

        Note:
            Unordered insertions are executed with some degree of concurrency,
            so it is usually better to prefer this mode unless the order in the
            document sequence is important.

        Note:
            A failure mode for this command is related to certain faulty documents
            found among those to insert: for example, a document may have an ID
            already found on the collection, or its vector dimension may not
            match the collection setting.

            For an ordered insertion, the method will raise an exception at
            the first such faulty document -- nevertheless, all documents processed
            until then will end up being written to the database.

            For unordered insertions, if the error stems from faulty documents
            the insertion proceeds until exhausting the input documents: then,
            an exception is raised -- and all insertable documents will have been
            written to the database, including those "after" the troublesome ones.

            Errors occurring during an insert_many operation, for that reason,
            may result in a `CollectionInsertManyException` being raised.
            This exception allows to inspect the list of document IDs that were
            successfully inserted, while accessing at the same time the underlying
            "root errors" that made the full method call to fail.
        """

        _general_method_timeout_ms, _gmt_label = _first_valid_timeout(
            (general_method_timeout_ms, "general_method_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (
                self.api_options.timeout_options.general_method_timeout_ms,
                "general_method_timeout_ms",
            ),
        )
        _request_timeout_ms, _rt_label = _first_valid_timeout(
            (request_timeout_ms, "request_timeout_ms"),
            (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
        )
        if concurrency is None:
            if ordered:
                _concurrency = 1
            else:
                _concurrency = DEFAULT_INSERT_MANY_CONCURRENCY
        else:
            _concurrency = concurrency
        if _concurrency > 1 and ordered:
            raise ValueError("Cannot run ordered insert_many concurrently.")
        if chunk_size is None:
            _chunk_size = DEFAULT_INSERT_MANY_CHUNK_SIZE
        else:
            _chunk_size = chunk_size
        _documents = list(documents)
        logger.info(f"inserting {len(_documents)} documents in '{self.name}'")
        raw_results: list[dict[str, Any]] = []
        im_payloads: list[dict[str, Any]] = []
        timeout_manager = MultiCallTimeoutManager(
            overall_timeout_ms=_general_method_timeout_ms,
            timeout_label=_gmt_label,
        )
        if ordered:
            options = {"ordered": True, "returnDocumentResponses": True}
            inserted_ids: list[Any] = []
            for i in range(0, len(_documents), _chunk_size):
                im_payload = {
                    "insertMany": {
                        "documents": _documents[i : i + _chunk_size],
                        "options": options,
                    },
                }
                logger.info(f"insertMany(chunk) on '{self.name}'")
                chunk_response = self._converted_request(
                    payload=im_payload,
                    raise_api_errors=False,
                    timeout_context=timeout_manager.remaining_timeout(
                        cap_time_ms=_request_timeout_ms,
                        cap_timeout_label=_rt_label,
                    ),
                )
                logger.info(f"finished insertMany(chunk) on '{self.name}'")
                # accumulate the results in this call
                chunk_inserted_ids = [
                    doc_resp["_id"]
                    for doc_resp in (chunk_response.get("status") or {}).get(
                        "documentResponses", []
                    )
                    if doc_resp["status"] == "OK"
                ]
                inserted_ids += chunk_inserted_ids
                raw_results += [chunk_response]
                im_payloads += [im_payload]
                # if errors, quit early
                if chunk_response.get("errors", []):
                    response_exception = DataAPIResponseException.from_response(
                        command=im_payload,
                        raw_response=chunk_response,
                    )
                    raise CollectionInsertManyException(
                        inserted_ids=inserted_ids, exceptions=[response_exception]
                    )

            # return
            full_result = CollectionInsertManyResult(
                raw_results=raw_results,
                inserted_ids=inserted_ids,
            )
            logger.info(
                f"finished inserting {len(_documents)} documents in '{self.name}'"
            )
            return full_result

        else:
            # unordered: concurrent or not, do all of them and parse the results
            options = {"ordered": False, "returnDocumentResponses": True}
            if _concurrency > 1:
                with ThreadPoolExecutor(max_workers=_concurrency) as executor:

                    def _chunk_insertor(
                        document_chunk: list[dict[str, Any]],
                    ) -> tuple[dict[str, Any], dict[str, Any]]:
                        im_payload = {
                            "insertMany": {
                                "documents": document_chunk,
                                "options": options,
                            },
                        }
                        logger.info(f"insertMany(chunk) on '{self.name}'")
                        im_response = self._converted_request(
                            payload=im_payload,
                            raise_api_errors=False,
                            timeout_context=timeout_manager.remaining_timeout(
                                cap_time_ms=_request_timeout_ms,
                                cap_timeout_label=_rt_label,
                            ),
                        )
                        logger.info(f"finished insertMany(chunk) on '{self.name}'")
                        return im_payload, im_response

                    raw_pl_results_pairs = list(
                        executor.map(
                            _chunk_insertor,
                            (
                                _documents[i : i + _chunk_size]
                                for i in range(0, len(_documents), _chunk_size)
                            ),
                        )
                    )
                    if raw_pl_results_pairs:
                        im_payloads, raw_results = list(zip(*raw_pl_results_pairs))
                    else:
                        im_payloads, raw_results = [], []

            else:
                for i in range(0, len(_documents), _chunk_size):
                    im_payload = {
                        "insertMany": {
                            "documents": _documents[i : i + _chunk_size],
                            "options": options,
                        },
                    }
                    logger.info(f"insertMany(chunk) on '{self.name}'")
                    im_response = self._converted_request(
                        payload=im_payload,
                        raise_api_errors=False,
                        timeout_context=timeout_manager.remaining_timeout(
                            cap_time_ms=_request_timeout_ms,
                            cap_timeout_label=_rt_label,
                        ),
                    )
                    logger.info(f"finished insertMany(chunk) on '{self.name}'")
                    raw_results.append(im_response)
                    im_payloads.append(im_payload)
            # recast raw_results
            inserted_ids = [
                doc_resp["_id"]
                for chunk_response in raw_results
                for doc_resp in (chunk_response.get("status") or {}).get(
                    "documentResponses", []
                )
                if doc_resp["status"] == "OK"
            ]

            # check-raise
            response_exceptions = [
                DataAPIResponseException.from_response(
                    command=chunk_payload,
                    raw_response=chunk_response,
                )
                for chunk_payload, chunk_response in zip(im_payloads, raw_results)
                if chunk_response.get("errors", [])
            ]
            if response_exceptions:
                raise CollectionInsertManyException(
                    inserted_ids=inserted_ids,
                    exceptions=response_exceptions,
                )

            # return
            full_result = CollectionInsertManyResult(
                raw_results=raw_results,
                inserted_ids=inserted_ids,
            )
            logger.info(
                f"finished inserting {len(_documents)} documents in '{self.name}'"
            )
            return full_result

    @overload
    def find(
        self,
        filter: FilterType | None = None,
        *,
        projection: ProjectionType | None = None,
        document_type: None = None,
        skip: int | None = None,
        limit: int | None = None,
        include_similarity: bool | None = None,
        include_sort_vector: bool | None = None,
        sort: SortType | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> CollectionFindCursor[DOC, DOC]: ...

    @overload
    def find(
        self,
        filter: FilterType | None = None,
        *,
        projection: ProjectionType | None = None,
        document_type: type[DOC2],
        skip: int | None = None,
        limit: int | None = None,
        include_similarity: bool | None = None,
        include_sort_vector: bool | None = None,
        sort: SortType | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> CollectionFindCursor[DOC, DOC2]: ...

    def find(
        self,
        filter: FilterType | None = None,
        *,
        projection: ProjectionType | None = None,
        document_type: type[DOC2] | None = None,
        skip: int | None = None,
        limit: int | None = None,
        include_similarity: bool | None = None,
        include_sort_vector: bool | None = None,
        sort: SortType | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> CollectionFindCursor[DOC, DOC2]:
        """
        Find documents on the collection, matching a certain provided filter.

        The method returns a cursor that can then be iterated over. Depending
        on the method call pattern, the iteration over all documents can reflect
        collection mutations occurred since the `find` method was called, or not.
        In cases where the cursor reflects mutations in real-time, it will iterate
        over cursors in an approximate way (i.e. exhibiting occasional skipped
        or duplicate documents). This happens when making use of the `sort`
        option in a non-vector-search manner.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            projection: it controls which parts of the document are returned.
                It can be an allow-list: `{"f1": True, "f2": True}`,
                or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
                (except for the `_id` and other special fields, which can be
                associated to both True or False independently of the rest
                of the specification).
                The special star-projections `{"*": True}` and `{"*": False}`
                have the effect of returning the whole document and `{}` respectively.
                For lists in documents, slice directives can be passed to select
                portions of the list: for instance, `{"array": {"$slice": 2}}`,
                `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
                `{"array": {"$slice": [-4, 2]}}`.
                An iterable over strings will be treated implicitly as an allow-list.
                The default projection (used if this parameter is not passed) does not
                necessarily include "special" fields such as `$vector` or `$vectorize`.
                See the Data API documentation for more on projections.
            document_type: this parameter acts a formal specifier for the type checker.
                If omitted, the resulting cursor is implicitly a
                `CollectionFindCursor[DOC, DOC]`, i.e. maintains the same type for
                the items it returns as that for the documents in the collection.
                Strictly typed code may want to specify this parameter especially when
                a projection is given.
            skip: with this integer parameter, what would be the first `skip`
                documents returned by the query are discarded, and the results
                start from the (skip+1)-th document.
                This parameter can be used only in conjunction with an explicit
                `sort` criterion of the ascending/descending type (i.e. it cannot
                be used when not sorting, nor with vector-based ANN search).
            limit: this (integer) parameter sets a limit over how many documents
                are returned. Once `limit` is reached (or the cursor is exhausted
                for lack of matching documents), nothing more is returned.
            include_similarity: a boolean to request the numeric value of the
                similarity to be returned as an added "$similarity" key in each
                returned document. It can be used meaningfully only in a vector
                search (see `sort`).
            include_sort_vector: a boolean to request the search query vector.
                If set to True (and if the invocation is a vector search), calling
                the `get_sort_vector` method on the returned cursor will yield
                the vector used for the ANN search.
            sort: with this dictionary parameter one can control the order
                the documents are returned. See the Note about sorting, as well as
                the one about upper bounds, for details.
                Vector-based ANN sorting is achieved by providing a "$vector"
                or a "$vectorize" key in `sort`.
            request_timeout_ms: a timeout, in milliseconds, for each single one
                of the underlying HTTP requests used to fetch documents as the
                cursor is iterated over.
                If not passed, the collection-level setting is used instead.
            timeout_ms: an alias for `request_timeout_ms`.

        Returns:
            a CollectionFindCursor object, that can be iterated over (and manipulated
            in several ways). The cursor, if needed, handles pagination under the hood
            as the documents are consumed.

        Examples:
            >>> filter = {"seq": {"$exists": True}}
            >>> for doc in my_coll.find(filter, projection={"seq": True}, limit=5):
            ...     print(doc["seq"])
            ...
            37
            35
            10
            36
            27
            >>> cursor1 = my_coll.find(
            ...     {},
            ...     limit=4,
            ...     sort={"seq": astrapy.constants.SortMode.DESCENDING},
            ... )
            >>> [doc["_id"] for doc in cursor1]
            ['97e85f81-...', '1581efe4-...', '...', '...']
            >>> cursor2 = my_coll.find({}, limit=3)

            >>> my_coll.insert_many([
            ...     {"tag": "A", "$vector": [4, 5]},
            ...     {"tag": "B", "$vector": [3, 4]},
            ...     {"tag": "C", "$vector": [3, 2]},
            ...     {"tag": "D", "$vector": [4, 1]},
            ...     {"tag": "E", "$vector": [2, 5]},
            ... ])
            >>> ann_tags = [
            ...     document["tag"]
            ...     for document in my_coll.find(
            ...         {},
            ...         sort={"$vector": [3, 3]},
            ...         limit=3,
            ...     )
            ... ]
            >>> ann_tags
            ['A', 'B', 'C']
            >>> # (assuming the collection has metric VectorMetric.COSINE)

            >>> cursor = my_coll.find(
            ...     sort={"$vector": [3, 3]},
            ...     limit=3,
            ...     include_sort_vector=True,
            ... )
            >>> cursor.get_sort_vector()
            [3.0, 3.0]
            >>> matches = cursor.to_list()
            >>> cursor.get_sort_vector()
            [3.0, 3.0]

        Note:
            The following are example values for the `sort` parameter.
            When no particular order is required:
                sort={}  # (default when parameter not provided)
            When sorting by a certain value in ascending/descending order:
                sort={"field": SortMode.ASCENDING}
                sort={"field": SortMode.DESCENDING}
            When sorting first by "field" and then by "subfield"
            (while modern Python versions preserve the order of dictionaries,
            it is suggested for clarity to employ a `collections.OrderedDict`
            in these cases):
                sort={
                    "field": SortMode.ASCENDING,
                    "subfield": SortMode.ASCENDING,
                }
            When running a vector similarity (ANN) search:
                sort={"$vector": [0.4, 0.15, -0.5]}

        Note:
            Some combinations of arguments impose an implicit upper bound on the
            number of documents that are returned by the Data API. More specifically:
            (a) Vector ANN searches cannot return more than a number of documents
            that at the time of writing is set to 1000 items.
            (b) When using a sort criterion of the ascending/descending type,
            the Data API will return a smaller number of documents, set to 20
            at the time of writing, and stop there. The returned documents are
            the top results across the whole collection according to the requested
            criterion.

        Note:
            When not specifying sorting criteria at all (by vector or otherwise),
            the cursor can scroll through an arbitrary number of documents as
            the Data API and the client periodically exchange new chunks of documents.
            It should be noted that the behavior of the cursor in the case documents
            have been added/removed after the `find` was started depends on database
            internals and it is not guaranteed, nor excluded, that such "real-time"
            changes in the data would be picked up by the cursor.
        """

        # lazy-import here to avoid circular import issues
        from astrapy.cursors import CollectionFindCursor

        _request_timeout_ms, _rt_label = _first_valid_timeout(
            (request_timeout_ms, "request_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
        )
        return (
            CollectionFindCursor(
                collection=self,
                request_timeout_ms=_request_timeout_ms,
                overall_timeout_ms=None,
                request_timeout_label=_rt_label,
            )
            .filter(filter)
            .project(projection)
            .skip(skip)
            .limit(limit)
            .sort(sort)
            .include_similarity(include_similarity)
            .include_sort_vector(include_sort_vector)
        )

    def find_one(
        self,
        filter: FilterType | None = None,
        *,
        projection: ProjectionType | None = None,
        include_similarity: bool | None = None,
        sort: SortType | None = None,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> DOC | None:
        """
        Run a search, returning the first document in the collection that matches
        provided filters, if any is found.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            projection: it controls which parts of the document are returned.
                It can be an allow-list: `{"f1": True, "f2": True}`,
                or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
                (except for the `_id` and other special fields, which can be
                associated to both True or False independently of the rest
                of the specification).
                The special star-projections `{"*": True}` and `{"*": False}`
                have the effect of returning the whole document and `{}` respectively.
                For lists in documents, slice directives can be passed to select
                portions of the list: for instance, `{"array": {"$slice": 2}}`,
                `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
                `{"array": {"$slice": [-4, 2]}}`.
                An iterable over strings will be treated implicitly as an allow-list.
                The default projection (used if this parameter is not passed) does not
                necessarily include "special" fields such as `$vector` or `$vectorize`.
                See the Data API documentation for more on projections.
            include_similarity: a boolean to request the numeric value of the
                similarity to be returned as an added "$similarity" key in the
                returned document. It can be used meaningfully only in a vector
                search (see `sort`).
            sort: with this dictionary parameter one can control the order
                the documents are returned. See the Note about sorting for details.
                Vector-based ANN sorting is achieved by providing a "$vector"
                or a "$vectorize" key in `sort`.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a dictionary expressing the required document, otherwise None.

        Examples:
            >>> my_coll.find_one({})
            {'_id': '68d1e515-...', 'seq': 37}
            >>> my_coll.find_one({"seq": 10})
            {'_id': 'd560e217-...', 'seq': 10}
            >>> my_coll.find_one({"seq": 1011})
            >>> # (returns None for no matches)
            >>> my_coll.find_one({}, projection={"seq": False})
            {'_id': '68d1e515-...'}
            >>> my_coll.find_one(
            ...     {},
            ...     sort={"seq": astrapy.constants.SortMode.DESCENDING},
            ... )
            {'_id': '97e85f81-...', 'seq': 69}
            >>> my_coll.find_one({}, sort={"$vector": [1, 0]}, projection={"*": True})
            {'_id': '...', 'tag': 'D', '$vector': [4.0, 1.0]}

        Note:
            See the `find` method for more details on the accepted parameters
            (whereas `skip` and `limit` are not valid parameters for `find_one`).
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        fo_options = (
            None
            if include_similarity is None
            else {"includeSimilarity": include_similarity}
        )
        fo_payload = {
            "findOne": {
                k: v
                for k, v in {
                    "filter": filter,
                    "projection": normalize_optional_projection(projection),
                    "options": fo_options,
                    "sort": sort,
                }.items()
                if v is not None
            }
        }
        fo_response = self._converted_request(
            payload=fo_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        if "document" not in (fo_response.get("data") or {}):
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from findOne API command.",
                raw_response=fo_response,
            )
        doc_response = fo_response["data"]["document"]
        if doc_response is None:
            return None
        return fo_response["data"]["document"]  # type: ignore[no-any-return]

    def distinct(
        self,
        key: str | Iterable[str | int],
        *,
        filter: FilterType | None = None,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> list[Any]:
        """
        Return a list of the unique values of `key` across the documents
        in the collection that match the provided filter.

        Args:
            key: the name of the field whose value is inspected across documents.
                Keys can be just field names (as is often the case), but
                the dot-notation is also accepted to mean subkeys or indices
                within lists (for example, "map_field.subkey" or "list_field.2").
                If a field has literal dots or ampersands in its name, this
                parameter must be escaped to be treated properly.
                The key can also be a list of strings and numbers, in which case
                no escape is necessary: each item in the list is a field name/index,
                for example ["map_field", "subkey"] or ["list_field", 2].
                If lists are encountered and no numeric index is specified,
                all items in the list are visited.
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            general_method_timeout_ms: a timeout, in milliseconds, for the whole
                requested operation (which may involve multiple API requests).
                This method, being based on `find` (see) may entail successive HTTP API
                requests, depending on the amount of involved documents.
            request_timeout_ms: a timeout, in milliseconds, for each API request.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a list of all different values for `key` found across the documents
            that match the filter. The result list has no repeated items.

        Example:
            >>> my_coll.insert_many(
            ...     [
            ...         {"name": "Marco", "food": ["apple", "orange"], "city": "Helsinki"},
            ...         {"name": "Emma", "food": {"likes_fruit": True, "allergies": []}},
            ...     ]
            ... )
            CollectionInsertManyResult(raw_results=..., inserted_ids=['c5b99f37-...', 'd6416321-...'])
            >>> my_coll.distinct("name")
            ['Marco', 'Emma']
            >>> my_coll.distinct("city")
            ['Helsinki']
            >>> my_coll.distinct("food")
            ['apple', 'orange', {'likes_fruit': True, 'allergies': []}]
            >>> my_coll.distinct("food.1")
            ['orange']
            >>> my_coll.distinct("food.allergies")
            []
            >>> my_coll.distinct("food.likes_fruit")
            [True]

        Note:
            It must be kept in mind that `distinct` is a client-side operation,
            which effectively browses all required documents using the logic
            of the `find` method and collects the unique values found for `key`.
            As such, there may be performance, latency and ultimately
            billing implications if the amount of matching documents is large.

        Note:
            For details on the behaviour of "distinct" in conjunction with
            real-time changes in the collection contents, see the
            Note of the `find` command.
        """

        # lazy-import here to avoid circular import issues
        from astrapy.cursors import CollectionFindCursor

        _general_method_timeout_ms, _gmt_label = _first_valid_timeout(
            (general_method_timeout_ms, "general_method_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (
                self.api_options.timeout_options.general_method_timeout_ms,
                "general_method_timeout_ms",
            ),
        )
        _request_timeout_ms, _rt_label = _first_valid_timeout(
            (request_timeout_ms, "request_timeout_ms"),
            (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
        )
        # preparing cursor:
        _extractor = _create_document_key_extractor(key)
        _key = _reduce_distinct_key_to_safe(key)
        # relaxing the type hint (limited to within this method body)
        f_cursor: CollectionFindCursor[dict[str, Any], dict[str, Any]] = (
            CollectionFindCursor(
                collection=self,
                request_timeout_ms=_request_timeout_ms,
                overall_timeout_ms=_general_method_timeout_ms,
                request_timeout_label=_rt_label,
                overall_timeout_label=_gmt_label,
            )  # type: ignore[assignment]
            .filter(filter)
            .project({_key: True})
        )
        # consuming it:
        _item_hashes = set()
        distinct_items: list[Any] = []
        logger.info(f"running distinct() on '{self.name}'")
        for document in f_cursor:
            for item in _extractor(document):
                _item_hash = _hash_collection_document(
                    item, options=self.api_options.serdes_options
                )
                if _item_hash not in _item_hashes:
                    _item_hashes.add(_item_hash)
                    distinct_items.append(item)
        logger.info(f"finished running distinct() on '{self.name}'")
        return distinct_items

    @overload
    def find_and_rerank(
        self,
        filter: FilterType | None = None,
        *,
        sort: HybridSortType,
        projection: ProjectionType | None = None,
        document_type: None = None,
        limit: int | None = None,
        hybrid_limits: int | dict[str, int] | None = None,
        include_scores: bool | None = None,
        include_sort_vector: bool | None = None,
        rerank_on: str | None = None,
        rerank_query: str | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> CollectionFindAndRerankCursor[DOC, RerankedResult[DOC]]: ...

    @overload
    def find_and_rerank(
        self,
        filter: FilterType | None = None,
        *,
        sort: HybridSortType,
        projection: ProjectionType | None = None,
        document_type: type[DOC2],
        limit: int | None = None,
        hybrid_limits: int | dict[str, int] | None = None,
        include_scores: bool | None = None,
        include_sort_vector: bool | None = None,
        rerank_on: str | None = None,
        rerank_query: str | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> CollectionFindAndRerankCursor[DOC, RerankedResult[DOC2]]: ...

    @beta_method
    def find_and_rerank(
        self,
        filter: FilterType | None = None,
        *,
        sort: HybridSortType,
        projection: ProjectionType | None = None,
        document_type: type[DOC2] | None = None,
        limit: int | None = None,
        hybrid_limits: int | dict[str, int] | None = None,
        include_scores: bool | None = None,
        include_sort_vector: bool | None = None,
        rerank_on: str | None = None,
        rerank_query: str | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> CollectionFindAndRerankCursor[DOC, RerankedResult[DOC2]]:
        """
        Find relevant documents, combining vector and lexical matches through reranking.

        For this method to succeed, the collection must be created with the required
        hybrid capabilities (see the `create_collection` method of the Database class).

        The method returns a cursor that can then be iterated over, which yields
        the resulting documents, generally paired with accompanying information
        such as scores.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            sort: a clause specifying the criteria for selecting the top matching
                documents. This must provide enough information for both a lexical
                and a vector similarity to be performed (the latter either query text
                or by query vector, depending on the collection configuration).
                Examples are: `sort={"$hybrid": "xyz"}`,
                `sort={"$hybrid": {"$vectorize": "xyz", "$lexical": "abc"}}`,
                `sort={"$hybrid": {"$vector": DataAPIVector(...), "$lexical": "abc"}}`.
                Note this differs from the `sort` parameter for the `find` method.
            projection: it controls which parts of the document are returned.
                It can be an allow-list: `{"f1": True, "f2": True}`,
                or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
                (except for the `_id` and other special fields, which can be
                associated to both True or False independently of the rest
                of the specification).
                The special star-projections `{"*": True}` and `{"*": False}`
                have the effect of returning the whole document and `{}` respectively.
                For lists in documents, slice directives can be passed to select
                portions of the list: for instance, `{"array": {"$slice": 2}}`,
                `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
                `{"array": {"$slice": [-4, 2]}}`.
                An iterable over strings will be treated implicitly as an allow-list.
                The default projection (used if this parameter is not passed) does not
                necessarily include "special" fields such as `$vector` or `$vectorize`.
                See the Data API documentation for more on projections.
            document_type: this parameter acts a formal specifier for the type checker.
                If omitted, the resulting cursor is implicitly a
                `CollectionFindAndRerankCursor[DOC, DOC]`, i.e. maintains the same type
                for the items it returns as that for the documents in the collection.
                Strictly typed code may want to specify this parameter especially when
                a projection is given.
            limit: maximum number of documents to return as the result of the final
                rerank step.
            hybrid_limits: this controls the amount of documents that are fetched by
                each of the individual retrieval operations that are combined in the
                rerank step. It can be either a number or a dictionary of strings to
                numbers, the latter case expressing different counts for the different
                retrievals. For example: `hybrid_limits=50`,
                `hybrid_limits={"$vector": 20, "$lexical": 10}`.
            include_scores: a boolean to request the scores to be returned along with
                the resulting documents. If this is set, the scores can be read in the
                the map `scores` attribute of each RerankedResult (the map is
                otherwise empty).
            include_sort_vector: a boolean to request the search query vector
                used for the vector-search part of the find operation.
                If set to True, calling the `get_sort_vector` method on the returned
                cursor will yield the vector used for the ANN search.
            rerank_on: for collections without a vectorize (server-side embeddings)
                service, this is used to specify the field name that is then used
                during reranking.
            rerank_query: for collections without a vectorize (server-side embeddings)
                service, this is used to specify the query text for the reranker.
            request_timeout_ms: a timeout, in milliseconds, for each single one
                of the underlying HTTP requests used to fetch documents as the
                cursor is iterated over.
                If not passed, the collection-level setting is used instead.
            timeout_ms: an alias for `request_timeout_ms`.

        Returns:
            a CollectionFindAndRerankCursor object, that can be iterated over (and
            manipulated in several ways).

        Examples:
            >>> # The following examples assume a collection with 'vectorize' and the
            >>> # necessary hybrid configuration; see below for a non-vectorize case.
            >>>
            >>> # Populate with documents
            >>> my_vectorize_coll.insert_many([
            ...     {
            ...         "_id": "A",
            ...         "wkd": "Mon",
            ...         "$vectorize": "Monday is green",
            ...         "$lexical": "Monday is green",
            ...     },
            ...     {
            ...         "_id": "B",
            ...         "wkd": "Tue",
            ...         "$vectorize": "Tuesday is pink",
            ...         "$lexical": "Tuesday is pink",
            ...     },
            ...     {
            ...         "_id": "C",
            ...         "wkd": "Wed",
            ...         "$vectorize": "Wednesday is cyan",
            ...         "$lexical": "Wednesday is cyan",
            ...     },
            ...     {
            ...         "_id": "D",
            ...         "wkd": "Thu",
            ...         "$vectorize": "Thursday is red",
            ...         "$lexical": "Thursday is red",
            ...     },
            ...     {
            ...         "_id": "E",
            ...         "wkd": "Fri",
            ...         "$vectorize": "Friday is orange",
            ...         "$lexical": "Friday is orange",
            ...     },
            ...     {
            ...         "_id": "F",
            ...         "wkd": "Sat",
            ...         "$vectorize": "Saturday is purple",
            ...         "$lexical": "Saturday is purple",
            ...     },
            ...     {
            ...         "_id": "G",
            ...         "wkd": "Sun",
            ...         "$vectorize": "Sunday is beige",
            ...         "$lexical": "Sunday is beige",
            ...     },
            ... ])
            CollectionInsertManyResult(inserted_ids=[A, B, C, D, E ... (7 total)], raw_results=...)
            >>>
            >>> # A simple invocation, consuming the cursor
            >>> # with a loop ('vectorize collection):
            >>> for r_result in my_vectorize_coll.find_and_rerank(
            ...     sort={"$hybrid": "Weekdays?"},
            ...     limit=2,
            ... ):
            ...     print(r_result.document)
            ...
            {'_id': 'C', 'wkd': 'Wed'}
            {'_id': 'A', 'wkd': 'Mon'}
            >>> # Additional arbitrary filtering predicates
            >>> # ('vectorize collection):
            >>> for r_result in my_vectorize_coll.find_and_rerank(
            ...     {"wkd": {"$ne": "Mon"}},
            ...     sort={"$hybrid": "Weekdays?"},
            ...     limit=2,
            ... ):
            ...     print(r_result.document)
            ...
            {'_id': 'C', 'wkd': 'Wed'}
            {'_id': 'B', 'wkd': 'Tue'}
            >>> # Fetch the scores with the documents ('vectorize collection):
            >>> scored_texts = [
            ...     (r_result.document["wkd"], r_result.scores["$rerank"])
            ...     for r_result in my_vectorize_coll.find_and_rerank(
            ...         sort={"$hybrid": "Weekdays?"},
            ...         limit=2,
            ...         include_scores=True,
            ...     )
            ... ]
            >>> print(scored_texts)
            [('Wed', -9.1015625), ('Mon', -10.2421875)]
            >>>
            >>> # Customize sub-search limits ('vectorize collection):
            >>> hits = my_vectorize_coll.find_and_rerank(
            ...     sort={"$hybrid": "Weekdays?"},
            ...     limit=2,
            ...     hybrid_limits=20,
            ... ).to_list()
            >>> print(", ".join(r_res.document["wkd"] for r_res in hits))
            Wed, Mon
            >>>
            >>> # Separate sub-search queries ('vectorize collection):
            >>> cursor = my_vectorize_coll.find_and_rerank(
            ...     sort={
            ...         "$hybrid": {
            ...             "$vectorize": "a week day",
            ...             "$lexical": "green",
            ...         },
            ...     },
            ...     limit=2,
            ...     hybrid_limits={"$lexical": 4, "$vector": 20},
            ... )
            >>> print(", ".join(r_res.document["wkd"] for r_res in cursor))
            Mon, Wed
            >>>
            >>> # Reading back the query vector used by
            >>> # the search ('vectorize collection):
            >>> cursor = my_vectorize_coll.find_and_rerank(
            ...     sort={"$hybrid": "Weekdays?"},
            ...     limit=2,
            ...     include_sort_vector=True
            ... )
            >>> sort_vector = cursor.get_sort_vector()
            >>> print(" ==> ".join(
            ...     r_res.document["wkd"] for r_res in cursor
            ... ))
            Wed ==> Mon
            >>> print(f"Sort vector={sort_vector}")
            Sort vector=[-0.0021172, -0.012057612, 0.010362527 ...]
            >>>
            >>>
            >>> # If the collection has no "vectorize", `rerank_query`
            >>> # and `rerank_on` must be passed. The following assumes a
            >>> # collection with a 3-dimensional vector and the setup for hybrid.
            >>>
            >>> # Populate with documents:
            >>> my_vector3d_coll.insert_many([
            ...     {
            ...         "_id": "A",
            ...         "wkd": "Mon",
            ...         "$vector": [0.1, 0.2, 0.3],
            ...         "$lexical": "Monday is green",
            ...     },
            ...     {
            ...         "_id": "B",
            ...         "wkd": "Tue",
            ...         "$vector": [0.2, 0.3, 0.4],
            ...         "$lexical": "Tuesday is pink",
            ...     },
            ...     {
            ...         "_id": "C",
            ...         "wkd": "Wed",
            ...         "$vector": [0.3, 0.4, 0.5],
            ...         "$lexical": "Wednesday is cyan",
            ...     },
            ...     {
            ...         "_id": "D",
            ...         "wkd": "Thu",
            ...         "$vector": [0.4, 0.5, 0.6],
            ...         "$lexical": "Thursday is red",
            ...     },
            ...     {
            ...         "_id": "E",
            ...         "wkd": "Fri",
            ...         "$vector": [0.5, 0.6, 0.7],
            ...         "$lexical": "Friday is orange",
            ...     },
            ...     {
            ...         "_id": "F",
            ...         "wkd": "Sat",
            ...         "$vector": [0.6, 0.7, 0.8],
            ...         "$lexical": "Saturday is purple",
            ...     },
            ...     {
            ...         "_id": "G",
            ...         "wkd": "Sun",
            ...         "$vector": [0.7, 0.8, 0.9],
            ...         "$lexical": "Sunday is beige",
            ...     },
            ... ])
            CollectionInsertManyResult(inserted_ids=[A, B, C, D, E ... (7 total)], raw_results=...)
            >>>
            >>> # A simple find_and_rerank call (collection without 'vectorize'):
            >>> for r_result in my_vector3d_coll.find_and_rerank(
            ...     sort={
            ...         "$hybrid": {
            ...             "$vector": [0.9, 0.8, 0.7],
            ...             "$lexical": "Weekdays?",
            ...         },
            ...     },
            ...     limit=2,
            ...     rerank_on="wkd",
            ...     rerank_query="week days",
            ... ):
            ...     print(r_result.document["wkd"])
            ...
            Mon
            Tue
        """

        # lazy-import here to avoid circular import issues
        from astrapy.cursors import CollectionFindAndRerankCursor

        _request_timeout_ms, _rt_label = _first_valid_timeout(
            (request_timeout_ms, "request_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
        )
        return (
            CollectionFindAndRerankCursor(
                collection=self,
                request_timeout_ms=_request_timeout_ms,
                overall_timeout_ms=None,
                request_timeout_label=_rt_label,
            )
            .filter(filter)
            .project(projection)
            .limit(limit)
            .sort(sort)
            .hybrid_limits(hybrid_limits)
            .rerank_on(rerank_on)
            .rerank_query(rerank_query)
            .include_scores(include_scores)
            .include_sort_vector(include_sort_vector)
        )

    def count_documents(
        self,
        filter: FilterType,
        *,
        upper_bound: int,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> int:
        """
        Count the documents in the collection matching the specified filter.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            upper_bound: a required ceiling on the result of the count operation.
                If the actual number of documents exceeds this value,
                an exception will be raised.
                Furthermore, if the actual number of documents exceeds the maximum
                count that the Data API can reach (regardless of upper_bound),
                an exception will be raised.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            the exact count of matching documents.

        Example:
            >>> my_coll.insert_many([{"seq": i} for i in range(20)])
            CollectionInsertManyResult(...)
            >>> my_coll.count_documents({}, upper_bound=100)
            20
            >>> my_coll.count_documents({"seq":{"$gt": 15}}, upper_bound=100)
            4
            >>> my_coll.count_documents({}, upper_bound=10)
            Traceback (most recent call last):
                ... ...
            astrapy.exceptions.TooManyDocumentsToCountException

        Note:
            Count operations are expensive: for this reason, the best practice
            is to provide a reasonable `upper_bound` according to the caller
            expectations. Moreover, indiscriminate usage of count operations
            for sizeable amounts of documents (i.e. in the thousands and more)
            is discouraged in favor of alternative application-specific solutions.
            Keep in mind that the Data API has a hard upper limit on the amount
            of documents it will count, and that an exception will be thrown
            by this method if this limit is encountered.
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        cd_payload = {"countDocuments": {"filter": filter}}
        logger.info(f"countDocuments on '{self.name}'")
        cd_response = self._converted_request(
            payload=cd_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished countDocuments on '{self.name}'")
        if "count" in cd_response.get("status", {}):
            count: int = cd_response["status"]["count"]
            if cd_response["status"].get("moreData", False):
                raise TooManyDocumentsToCountException(
                    text=f"Document count exceeds {count}, the maximum allowed by the server",
                    server_max_count_exceeded=True,
                )
            else:
                if count > upper_bound:
                    raise TooManyDocumentsToCountException(
                        text="Document count exceeds required upper bound",
                        server_max_count_exceeded=False,
                    )
                else:
                    return count
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from countDocuments API command.",
                raw_response=cd_response,
            )

    def estimated_document_count(
        self,
        *,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> int:
        """
        Query the API server for an estimate of the document count in the collection.

        Contrary to `count_documents`, this method has no filtering parameters.

        Args:
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a server-provided estimate count of the documents in the collection.

        Example:
            >>> my_coll.estimated_document_count()
            35700
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        ed_payload: dict[str, Any] = {"estimatedDocumentCount": {}}
        logger.info(f"estimatedDocumentCount on '{self.name}'")
        ed_response = self._converted_request(
            payload=ed_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished estimatedDocumentCount on '{self.name}'")
        if "count" in ed_response.get("status", {}):
            count: int = ed_response["status"]["count"]
            return count
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from estimatedDocumentCount API command.",
                raw_response=ed_response,
            )

    def find_one_and_replace(
        self,
        filter: FilterType,
        replacement: DOC,
        *,
        projection: ProjectionType | None = None,
        sort: SortType | None = None,
        upsert: bool = False,
        return_document: str = ReturnDocument.BEFORE,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> DOC | None:
        """
        Find a document on the collection and replace it entirely with a new one,
        optionally inserting a new one if no match is found.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            replacement: the new document to write into the collection.
            projection: it controls which parts of the document are returned.
                It can be an allow-list: `{"f1": True, "f2": True}`,
                or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
                (except for the `_id` and other special fields, which can be
                associated to both True or False independently of the rest
                of the specification).
                The special star-projections `{"*": True}` and `{"*": False}`
                have the effect of returning the whole document and `{}` respectively.
                For lists in documents, slice directives can be passed to select
                portions of the list: for instance, `{"array": {"$slice": 2}}`,
                `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
                `{"array": {"$slice": [-4, 2]}}`.
                An iterable over strings will be treated implicitly as an allow-list.
                The default projection (used if this parameter is not passed) does not
                necessarily include "special" fields such as `$vector` or `$vectorize`.
                See the Data API documentation for more on projections.
            sort: with this dictionary parameter one can control the sorting
                order of the documents matching the filter, effectively
                determining what document will come first and hence be the
                replaced one. See the `find` method for more on sorting.
                Vector-based ANN sorting is achieved by providing a "$vector"
                or a "$vectorize" key in `sort`.
            upsert: this parameter controls the behavior in absence of matches.
                If True, `replacement` is inserted as a new document
                if no matches are found on the collection. If False,
                the operation silently does nothing in case of no matches.
            return_document: a flag controlling what document is returned:
                if set to `ReturnDocument.BEFORE`, or the string "before",
                the document found on database is returned; if set to
                `ReturnDocument.AFTER`, or the string "after", the new
                document is returned. The default is "before".
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            A document (or a projection thereof, as required), either the one
            before the replace operation or the one after that.
            Alternatively, the method returns None to represent
            that no matching document was found, or that no replacement
            was inserted (depending on the `return_document` parameter).

        Example:
            >>> my_coll.insert_one({"_id": "rule1", "text": "all animals are equal"})
            CollectionInsertOneResult(...)
            >>> my_coll.find_one_and_replace(
            ...     {"_id": "rule1"},
            ...     {"text": "some animals are more equal!"},
            ... )
            {'_id': 'rule1', 'text': 'all animals are equal'}
            >>> my_coll.find_one_and_replace(
            ...     {"text": "some animals are more equal!"},
            ...     {"text": "and the pigs are the rulers"},
            ...     return_document=astrapy.constants.ReturnDocument.AFTER,
            ... )
            {'_id': 'rule1', 'text': 'and the pigs are the rulers'}
            >>> my_coll.find_one_and_replace(
            ...     {"_id": "rule2"},
            ...     {"text": "F=ma^2"},
            ...     return_document=astrapy.constants.ReturnDocument.AFTER,
            ... )
            >>> # (returns None for no matches)
            >>> my_coll.find_one_and_replace(
            ...     {"_id": "rule2"},
            ...     {"text": "F=ma"},
            ...     upsert=True,
            ...     return_document=astrapy.constants.ReturnDocument.AFTER,
            ...     projection={"_id": False},
            ... )
            {'text': 'F=ma'}
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        options = {
            "returnDocument": return_document,
            "upsert": upsert,
        }
        fo_payload = {
            "findOneAndReplace": {
                k: v
                for k, v in {
                    "filter": filter,
                    "projection": normalize_optional_projection(projection),
                    "replacement": replacement,
                    "options": options,
                    "sort": sort,
                }.items()
                if v is not None
            }
        }
        logger.info(f"findOneAndReplace on '{self.name}'")
        fo_response = self._converted_request(
            payload=fo_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished findOneAndReplace on '{self.name}'")
        if "document" in fo_response.get("data", {}):
            ret_document = fo_response.get("data", {}).get("document")
            if ret_document is None:
                return None
            else:
                return ret_document  # type: ignore[no-any-return]
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from find_one_and_replace API command.",
                raw_response=fo_response,
            )

    def replace_one(
        self,
        filter: FilterType,
        replacement: DOC,
        *,
        sort: SortType | None = None,
        upsert: bool = False,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> CollectionUpdateResult:
        """
        Replace a single document on the collection with a new one,
        optionally inserting a new one if no match is found.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            replacement: the new document to write into the collection.
            sort: with this dictionary parameter one can control the sorting
                order of the documents matching the filter, effectively
                determining what document will come first and hence be the
                replaced one. See the `find` method for more on sorting.
                Vector-based ANN sorting is achieved by providing a "$vector"
                or a "$vectorize" key in `sort`.
            upsert: this parameter controls the behavior in absence of matches.
                If True, `replacement` is inserted as a new document
                if no matches are found on the collection. If False,
                the operation silently does nothing in case of no matches.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a CollectionUpdateResult object summarizing the outcome of
            the replace operation.

        Example:
            >>> my_coll.insert_one({"Marco": "Polo"})
            CollectionInsertOneResult(...)
            >>> my_coll.replace_one({"Marco": {"$exists": True}}, {"Buda": "Pest"})
            CollectionUpdateResult(raw_results=..., update_info={'n': 1, 'updatedExisting': True, 'ok': 1.0, 'nModified': 1})
            >>> my_coll.find_one({"Buda": "Pest"})
            {'_id': '8424905a-...', 'Buda': 'Pest'}
            >>> my_coll.replace_one({"Mirco": {"$exists": True}}, {"Oh": "yeah?"})
            CollectionUpdateResult(raw_results=..., update_info={'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0})
            >>> my_coll.replace_one({"Mirco": {"$exists": True}}, {"Oh": "yeah?"}, upsert=True)
            CollectionUpdateResult(raw_results=..., update_info={'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '931b47d6-...'})
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        options = {
            "upsert": upsert,
        }
        fo_payload = {
            "findOneAndReplace": {
                k: v
                for k, v in {
                    "filter": filter,
                    "replacement": replacement,
                    "options": options,
                    "sort": sort,
                }.items()
                if v is not None
            }
        }
        logger.info(f"findOneAndReplace on '{self.name}'")
        fo_response = self._converted_request(
            payload=fo_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished findOneAndReplace on '{self.name}'")
        if "document" in fo_response.get("data", {}):
            fo_status = fo_response.get("status") or {}
            _update_info = _prepare_update_info([fo_status])
            return CollectionUpdateResult(
                raw_results=[fo_response],
                update_info=_update_info,
            )
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from find_one_and_replace API command.",
                raw_response=fo_response,
            )

    def find_one_and_update(
        self,
        filter: FilterType,
        update: dict[str, Any],
        *,
        projection: ProjectionType | None = None,
        sort: SortType | None = None,
        upsert: bool = False,
        return_document: str = ReturnDocument.BEFORE,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> DOC | None:
        """
        Find a document on the collection and update it as requested,
        optionally inserting a new one if no match is found.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            update: the update prescription to apply to the document, expressed
                as a dictionary as per Data API syntax. Examples are:
                    {"$set": {"field": "value}}
                    {"$inc": {"counter": 10}}
                    {"$unset": {"field": ""}}
                See the Data API documentation for the full syntax.
            projection: it controls which parts of the document are returned.
                It can be an allow-list: `{"f1": True, "f2": True}`,
                or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
                (except for the `_id` and other special fields, which can be
                associated to both True or False independently of the rest
                of the specification).
                The special star-projections `{"*": True}` and `{"*": False}`
                have the effect of returning the whole document and `{}` respectively.
                For lists in documents, slice directives can be passed to select
                portions of the list: for instance, `{"array": {"$slice": 2}}`,
                `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
                `{"array": {"$slice": [-4, 2]}}`.
                An iterable over strings will be treated implicitly as an allow-list.
                The default projection (used if this parameter is not passed) does not
                necessarily include "special" fields such as `$vector` or `$vectorize`.
                See the Data API documentation for more on projections.
            sort: with this dictionary parameter one can control the sorting
                order of the documents matching the filter, effectively
                determining what document will come first and hence be the
                replaced one. See the `find` method for more on sorting.
                Vector-based ANN sorting is achieved by providing a "$vector"
                or a "$vectorize" key in `sort`.
            upsert: this parameter controls the behavior in absence of matches.
                If True, a new document (resulting from applying the `update`
                to an empty document) is inserted if no matches are found on
                the collection. If False, the operation silently does nothing
                in case of no matches.
            return_document: a flag controlling what document is returned:
                if set to `ReturnDocument.BEFORE`, or the string "before",
                the document found on database is returned; if set to
                `ReturnDocument.AFTER`, or the string "after", the new
                document is returned. The default is "before".
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            A document (or a projection thereof, as required), either the one
            before the replace operation or the one after that.
            Alternatively, the method returns None to represent
            that no matching document was found, or that no update
            was applied (depending on the `return_document` parameter).

        Example:
            >>> my_coll.insert_one({"Marco": "Polo"})
            CollectionInsertOneResult(...)
            >>> my_coll.find_one_and_update(
            ...     {"Marco": {"$exists": True}},
            ...     {"$set": {"title": "Mr."}},
            ... )
            {'_id': 'a80106f2-...', 'Marco': 'Polo'}
            >>> my_coll.find_one_and_update(
            ...     {"title": "Mr."},
            ...     {"$inc": {"rank": 3}},
            ...     projection=["title", "rank"],
            ...     return_document=astrapy.constants.ReturnDocument.AFTER,
            ... )
            {'_id': 'a80106f2-...', 'title': 'Mr.', 'rank': 3}
            >>> my_coll.find_one_and_update(
            ...     {"name": "Johnny"},
            ...     {"$set": {"rank": 0}},
            ...     return_document=astrapy.constants.ReturnDocument.AFTER,
            ... )
            >>> # (returns None for no matches)
            >>> my_coll.find_one_and_update(
            ...     {"name": "Johnny"},
            ...     {"$set": {"rank": 0}},
            ...     upsert=True,
            ...     return_document=astrapy.constants.ReturnDocument.AFTER,
            ... )
            {'_id': 'cb4ef2ab-...', 'name': 'Johnny', 'rank': 0}
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        options = {
            "returnDocument": return_document,
            "upsert": upsert,
        }
        fo_payload = {
            "findOneAndUpdate": {
                k: v
                for k, v in {
                    "filter": filter,
                    "update": update,
                    "options": options,
                    "sort": sort,
                    "projection": normalize_optional_projection(projection),
                }.items()
                if v is not None
            }
        }
        logger.info(f"findOneAndUpdate on '{self.name}'")
        fo_response = self._converted_request(
            payload=fo_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished findOneAndUpdate on '{self.name}'")
        if "document" in fo_response.get("data", {}):
            ret_document = fo_response.get("data", {}).get("document")
            if ret_document is None:
                return None
            else:
                return ret_document  # type: ignore[no-any-return]
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from find_one_and_update API command.",
                raw_response=fo_response,
            )

    def update_one(
        self,
        filter: FilterType,
        update: dict[str, Any],
        *,
        sort: SortType | None = None,
        upsert: bool = False,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> CollectionUpdateResult:
        """
        Update a single document on the collection as requested,
        optionally inserting a new one if no match is found.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            update: the update prescription to apply to the document, expressed
                as a dictionary as per Data API syntax. Examples are:
                    {"$set": {"field": "value}}
                    {"$inc": {"counter": 10}}
                    {"$unset": {"field": ""}}
                See the Data API documentation for the full syntax.
            sort: with this dictionary parameter one can control the sorting
                order of the documents matching the filter, effectively
                determining what document will come first and hence be the
                replaced one. See the `find` method for more on sorting.
                Vector-based ANN sorting is achieved by providing a "$vector"
                or a "$vectorize" key in `sort`.
            upsert: this parameter controls the behavior in absence of matches.
                If True, a new document (resulting from applying the `update`
                to an empty document) is inserted if no matches are found on
                the collection. If False, the operation silently does nothing
                in case of no matches.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a CollectionUpdateResult object summarizing the outcome of
            the update operation.

        Example:
            >>> my_coll.insert_one({"Marco": "Polo"})
            CollectionInsertOneResult(...)
            >>> my_coll.update_one({"Marco": {"$exists": True}}, {"$inc": {"rank": 3}})
            CollectionUpdateResult(raw_results=..., update_info={'n': 1, 'updatedExisting': True, 'ok': 1.0, 'nModified': 1})
            >>> my_coll.update_one({"Mirko": {"$exists": True}}, {"$inc": {"rank": 3}})
            CollectionUpdateResult(raw_results=..., update_info={'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0})
            >>> my_coll.update_one({"Mirko": {"$exists": True}}, {"$inc": {"rank": 3}}, upsert=True)
            CollectionUpdateResult(raw_results=..., update_info={'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '2a45ff60-...'})
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        options = {
            "upsert": upsert,
        }
        uo_payload = {
            "updateOne": {
                k: v
                for k, v in {
                    "filter": filter,
                    "update": update,
                    "options": options,
                    "sort": sort,
                }.items()
                if v is not None
            }
        }
        logger.info(f"updateOne on '{self.name}'")
        uo_response = self._converted_request(
            payload=uo_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished updateOne on '{self.name}'")
        if "status" in uo_response:
            uo_status = uo_response["status"]
            _update_info = _prepare_update_info([uo_status])
            return CollectionUpdateResult(
                raw_results=[uo_response],
                update_info=_update_info,
            )
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from updateOne API command.",
                raw_response=uo_response,
            )

    def update_many(
        self,
        filter: FilterType,
        update: dict[str, Any],
        *,
        upsert: bool = False,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> CollectionUpdateResult:
        """
        Apply an update operation to all documents matching a condition,
        optionally inserting one documents in absence of matches.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            update: the update prescription to apply to the documents, expressed
                as a dictionary as per Data API syntax. Examples are:
                    {"$set": {"field": "value}}
                    {"$inc": {"counter": 10}}
                    {"$unset": {"field": ""}}
                See the Data API documentation for the full syntax.
            upsert: this parameter controls the behavior in absence of matches.
                If True, a single new document (resulting from applying `update`
                to an empty document) is inserted if no matches are found on
                the collection. If False, the operation silently does nothing
                in case of no matches.
            general_method_timeout_ms: a timeout, in milliseconds, for the whole
                requested operation (which may involve multiple API requests).
                This method may entail successive HTTP API requests,
                depending on the amount of involved documents.
                If not passed, the collection-level setting is used instead.
            request_timeout_ms: a timeout, in milliseconds, for each API request.
                If not passed, the collection-level setting is used instead.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a CollectionUpdateResult object summarizing the outcome of
            the update operation.

        Example:
            >>> my_coll.insert_many([{"c": "red"}, {"c": "green"}, {"c": "blue"}])
            CollectionInsertManyResult(...)
            >>> my_coll.update_many({"c": {"$ne": "green"}}, {"$set": {"nongreen": True}})
            CollectionUpdateResult(raw_results=..., update_info={'n': 2, 'updatedExisting': True, 'ok': 1.0, 'nModified': 2})
            >>> my_coll.update_many({"c": "orange"}, {"$set": {"is_also_fruit": True}})
            CollectionUpdateResult(raw_results=..., update_info={'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0})
            >>> my_coll.update_many(
            ...     {"c": "orange"},
            ...     {"$set": {"is_also_fruit": True}},
            ...     upsert=True,
            ... )
            CollectionUpdateResult(raw_results=..., update_info={'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '46643050-...'})

        Note:
            Similarly to the case of `find` (see its docstring for more details),
            running this command while, at the same time, another process is
            inserting new documents which match the filter of the `update_many`
            can result in an unpredictable fraction of these documents being updated.
            In other words, it cannot be easily predicted whether a given
            newly-inserted document will be picked up by the update_many command or not.
        """

        _general_method_timeout_ms, _gmt_label = _first_valid_timeout(
            (general_method_timeout_ms, "general_method_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (
                self.api_options.timeout_options.general_method_timeout_ms,
                "general_method_timeout_ms",
            ),
        )
        _request_timeout_ms, _rt_label = _first_valid_timeout(
            (request_timeout_ms, "request_timeout_ms"),
            (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
        )
        api_options = {
            "upsert": upsert,
        }
        page_state_options: dict[str, str] = {}
        um_responses: list[dict[str, Any]] = []
        um_statuses: list[dict[str, Any]] = []
        must_proceed = True
        logger.info(f"starting update_many on '{self.name}'")
        timeout_manager = MultiCallTimeoutManager(
            overall_timeout_ms=_general_method_timeout_ms,
            timeout_label=_gmt_label,
        )
        while must_proceed:
            options = {**api_options, **page_state_options}
            this_um_payload = {
                "updateMany": {
                    k: v
                    for k, v in {
                        "filter": filter,
                        "update": update,
                        "options": options,
                    }.items()
                    if v is not None
                }
            }
            logger.info(f"updateMany on '{self.name}'")
            this_um_response = self._converted_request(
                payload=this_um_payload,
                raise_api_errors=False,
                timeout_context=timeout_manager.remaining_timeout(
                    cap_time_ms=_request_timeout_ms,
                    cap_timeout_label=_rt_label,
                ),
            )
            logger.info(f"finished updateMany on '{self.name}'")
            this_um_status = this_um_response.get("status") or {}
            #
            # if errors, quit early
            if this_um_response.get("errors", []):
                partial_update_info = _prepare_update_info(um_statuses)
                partial_result = CollectionUpdateResult(
                    raw_results=um_responses,
                    update_info=partial_update_info,
                )
                cause_exception = DataAPIResponseException.from_response(
                    command=this_um_payload,
                    raw_response=this_um_response,
                )
                raise CollectionUpdateManyException(
                    partial_result=partial_result,
                    cause=cause_exception,
                )
            else:
                if "status" not in this_um_response:
                    raise UnexpectedDataAPIResponseException(
                        text="Faulty response from update_many API command.",
                        raw_response=this_um_response,
                    )
                um_responses.append(this_um_response)
                um_statuses.append(this_um_status)
                next_page_state = this_um_status.get("nextPageState")
                if next_page_state is not None:
                    must_proceed = True
                    page_state_options = {"pageState": next_page_state}
                else:
                    must_proceed = False
                    page_state_options = {}

        update_info = _prepare_update_info(um_statuses)
        logger.info(f"finished update_many on '{self.name}'")
        return CollectionUpdateResult(
            raw_results=um_responses,
            update_info=update_info,
        )

    def find_one_and_delete(
        self,
        filter: FilterType,
        *,
        projection: ProjectionType | None = None,
        sort: SortType | None = None,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> DOC | None:
        """
        Find a document in the collection and delete it. The deleted document,
        however, is the return value of the method.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            projection: it controls which parts of the document are returned.
                It can be an allow-list: `{"f1": True, "f2": True}`,
                or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
                (except for the `_id` and other special fields, which can be
                associated to both True or False independently of the rest
                of the specification).
                The special star-projections `{"*": True}` and `{"*": False}`
                have the effect of returning the whole document and `{}` respectively.
                For lists in documents, slice directives can be passed to select
                portions of the list: for instance, `{"array": {"$slice": 2}}`,
                `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
                `{"array": {"$slice": [-4, 2]}}`.
                An iterable over strings will be treated implicitly as an allow-list.
                The default projection (used if this parameter is not passed) does not
                necessarily include "special" fields such as `$vector` or `$vectorize`.
                See the Data API documentation for more on projections.
            sort: with this dictionary parameter one can control the sorting
                order of the documents matching the filter, effectively
                determining what document will come first and hence be the
                deleted one. See the `find` method for more on sorting.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            Either the document (or a projection thereof, as requested), or None
            if no matches were found in the first place.

        Example:
            >>> my_coll.insert_many(
            ...     [
            ...         {"species": "swan", "class": "Aves"},
            ...         {"species": "frog", "class": "Amphibia"},
            ...     ],
            ... )
            CollectionInsertManyResult(...)
            >>> my_coll.find_one_and_delete(
            ...     {"species": {"$ne": "frog"}},
            ...     projection=["species"],
            ... )
            {'_id': '5997fb48-...', 'species': 'swan'}
            >>> my_coll.find_one_and_delete({"species": {"$ne": "frog"}})
            >>> # (returns None for no matches)
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        _projection = normalize_optional_projection(projection)
        fo_payload = {
            "findOneAndDelete": {
                k: v
                for k, v in {
                    "filter": filter,
                    "sort": sort,
                    "projection": _projection,
                }.items()
                if v is not None
            }
        }
        logger.info(f"findOneAndDelete on '{self.name}'")
        fo_response = self._converted_request(
            payload=fo_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished findOneAndDelete on '{self.name}'")
        if "document" in fo_response.get("data", {}):
            document = fo_response["data"]["document"]
            return document  # type: ignore[no-any-return]
        else:
            deleted_count = fo_response.get("status", {}).get("deletedCount")
            if deleted_count == 0:
                return None
            else:
                raise UnexpectedDataAPIResponseException(
                    text="Faulty response from find_one_and_delete API command.",
                    raw_response=fo_response,
                )

    def delete_one(
        self,
        filter: FilterType,
        *,
        sort: SortType | None = None,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> CollectionDeleteResult:
        """
        Delete one document matching a provided filter.
        This method never deletes more than a single document, regardless
        of the number of matches to the provided filters.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            sort: with this dictionary parameter one can control the sorting
                order of the documents matching the filter, effectively
                determining what document will come first and hence be the
                deleted one. See the `find` method for more on sorting.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a CollectionDeleteResult object summarizing the outcome of the
            delete operation.

        Example:
            >>> my_coll.insert_many([{"seq": 1}, {"seq": 0}, {"seq": 2}])
            CollectionInsertManyResult(...)
            >>> my_coll.delete_one({"seq": 1})
            CollectionDeleteResult(raw_results=..., deleted_count=1)
            >>> my_coll.distinct("seq")
            [0, 2]
            >>> my_coll.delete_one(
            ...     {"seq": {"$exists": True}},
            ...     sort={"seq": astrapy.constants.SortMode.DESCENDING},
            ... )
            CollectionDeleteResult(raw_results=..., deleted_count=1)
            >>> my_coll.distinct("seq")
            [0]
            >>> my_coll.delete_one({"seq": 2})
            CollectionDeleteResult(raw_results=..., deleted_count=0)
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        do_payload = {
            "deleteOne": {
                k: v
                for k, v in {
                    "filter": filter,
                    "sort": sort,
                }.items()
                if v is not None
            }
        }
        logger.info(f"deleteOne on '{self.name}'")
        do_response = self._converted_request(
            payload=do_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished deleteOne on '{self.name}'")
        if "deletedCount" in do_response.get("status", {}):
            deleted_count = do_response["status"]["deletedCount"]
            return CollectionDeleteResult(
                deleted_count=deleted_count,
                raw_results=[do_response],
            )
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from delete_one API command.",
                raw_response=do_response,
            )

    def delete_many(
        self,
        filter: FilterType,
        *,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> CollectionDeleteResult:
        """
        Delete all documents matching a provided filter.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
                Passing an empty filter, `{}`, completely erases all contents
                of the collection.
            general_method_timeout_ms: a timeout, in milliseconds, for the whole
                requested operation (which may involve multiple API requests).
                This method may entail successive HTTP API requests,
                depending on the amount of involved documents.
                If not passed, the collection-level setting is used instead.
            request_timeout_ms: a timeout, in milliseconds, for each API request.
                If not passed, the collection-level setting is used instead.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a CollectionDeleteResult object summarizing the outcome of the
            delete operation.

        Example:
            >>> my_coll.insert_many([{"seq": 1}, {"seq": 0}, {"seq": 2}])
            CollectionInsertManyResult(...)
            >>> my_coll.delete_many({"seq": {"$lte": 1}})
            CollectionDeleteResult(raw_results=..., deleted_count=2)
            >>> my_coll.distinct("seq")
            [2]
            >>> my_coll.delete_many({"seq": {"$lte": 1}})
            CollectionDeleteResult(raw_results=..., deleted_count=0)

        Note:
            This operation is in general not atomic. Depending on the amount
            of matching documents, it can keep running (in a blocking way)
            for a macroscopic time. In that case, new documents that are
            meanwhile inserted (e.g. from another process/application) will be
            deleted during the execution of this method call until the
            collection is devoid of matches.
            An exception is the `filter={}` case, whereby the operation is atomic.
        """

        _general_method_timeout_ms, _gmt_label = _first_valid_timeout(
            (general_method_timeout_ms, "general_method_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (
                self.api_options.timeout_options.general_method_timeout_ms,
                "general_method_timeout_ms",
            ),
        )
        _request_timeout_ms, _rt_label = _first_valid_timeout(
            (request_timeout_ms, "request_timeout_ms"),
            (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
        )
        dm_responses: list[dict[str, Any]] = []
        deleted_count = 0
        must_proceed = True
        timeout_manager = MultiCallTimeoutManager(
            overall_timeout_ms=_general_method_timeout_ms,
            timeout_label=_gmt_label,
        )
        this_dm_payload = {"deleteMany": {"filter": filter}}
        logger.info(f"starting delete_many on '{self.name}'")
        while must_proceed:
            logger.info(f"deleteMany on '{self.name}'")
            this_dm_response = self._converted_request(
                payload=this_dm_payload,
                raise_api_errors=False,
                timeout_context=timeout_manager.remaining_timeout(
                    cap_time_ms=_request_timeout_ms,
                    cap_timeout_label=_rt_label,
                ),
            )
            logger.info(f"finished deleteMany on '{self.name}'")
            # if errors, quit early
            if this_dm_response.get("errors", []):
                partial_result = CollectionDeleteResult(
                    deleted_count=deleted_count,
                    raw_results=dm_responses,
                )
                cause_exception = DataAPIResponseException.from_response(
                    command=this_dm_payload,
                    raw_response=this_dm_response,
                )
                raise CollectionDeleteManyException(
                    partial_result=partial_result,
                    cause=cause_exception,
                )
            else:
                this_dc = this_dm_response.get("status", {}).get("deletedCount")
                if this_dc is None:
                    raise UnexpectedDataAPIResponseException(
                        text="Faulty response from delete_many API command.",
                        raw_response=this_dm_response,
                    )
                dm_responses.append(this_dm_response)
                deleted_count += this_dc
                must_proceed = this_dm_response.get("status", {}).get("moreData", False)

        logger.info(f"finished delete_many on '{self.name}'")
        return CollectionDeleteResult(
            deleted_count=deleted_count,
            raw_results=dm_responses,
        )

    def drop(
        self,
        *,
        collection_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Drop the collection, i.e. delete it from the database along with
        all the documents it contains.

        Args:
            collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `collection_admin_timeout_ms`.
            timeout_ms: an alias for `collection_admin_timeout_ms`.

        Example:
            >>> my_coll.find_one({})
            {'_id': '...', 'a': 100}
            >>> my_coll.drop()
            >>> my_coll.find_one({})
            Traceback (most recent call last):
                ... ...
            astrapy.exceptions.DataAPIResponseException: Collection does not exist, collection name: my_collection

        Note:
            Use with caution.

        Note:
            Once the method succeeds, methods on this object can still be invoked:
            however, this hardly makes sense as the underlying actual collection
            is no more.
            It is responsibility of the developer to design a correct flow
            which avoids using a deceased collection any further.
        """

        logger.info(f"dropping collection '{self.name}' (self)")
        self.database.drop_collection(
            self.name,
            collection_admin_timeout_ms=collection_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info(f"finished dropping collection '{self.name}' (self)")

    def command(
        self,
        body: dict[str, Any] | None,
        *,
        raise_api_errors: bool = True,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> dict[str, Any]:
        """
        Send a POST request to the Data API for this collection with
        an arbitrary, caller-provided payload.
        No transformations or type conversions are made on the provided payload.

        Args:
            body: a JSON-serializable dictionary, the payload of the request.
            raise_api_errors: if True, responses with a nonempty 'errors' field
                result in an astrapy exception being raised.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a dictionary with the response of the HTTP request.

        Example:
            >>> my_coll.command({"countDocuments": {}})
            {'status': {'count': 123}}
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        _cmd_desc: str
        if body:
            _cmd_desc = ",".join(sorted(body.keys()))
        else:
            _cmd_desc = "(none)"
        logger.info(f"command={_cmd_desc} on '{self.name}'")
        command_result = self._api_commander.request(
            payload=body,
            raise_api_errors=raise_api_errors,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished command={_cmd_desc} on '{self.name}'")
        return command_result

Ancestors

typing.Generic

Instance variables

var database : Database

a Database object, the database this collection belongs to.

Example

>>> my_coll.database.name
'the_application_database'

Expand source code

@property
def database(self) -> Database:
    """
    a Database object, the database this collection belongs to.

    Example:
        >>> my_coll.database.name
        'the_application_database'
    """

    return self._database

var full_name : str

The fully-qualified collection name within the database, in the form "keyspace.collection_name".

Example

>>> my_coll.full_name
'default_keyspace.my_v_collection'

Expand source code

@property
def full_name(self) -> str:
    """
    The fully-qualified collection name within the database,
    in the form "keyspace.collection_name".

    Example:
        >>> my_coll.full_name
        'default_keyspace.my_v_collection'
    """

    return f"{self.keyspace}.{self.name}"

var keyspace : str

The keyspace this collection is in.

Example

>>> my_coll.keyspace
'default_keyspace'

Expand source code

@property
def keyspace(self) -> str:
    """
    The keyspace this collection is in.

    Example:
        >>> my_coll.keyspace
        'default_keyspace'
    """

    _keyspace = self.database.keyspace
    if _keyspace is None:
        raise RuntimeError("The collection's DB is set with keyspace=None")
    return _keyspace

var name : str

The name of this collection.

Example

>>> my_coll.name
'my_v_collection'

Expand source code

@property
def name(self) -> str:
    """
    The name of this collection.

    Example:
        >>> my_coll.name
        'my_v_collection'
    """

    return self._name

Methods

def command(self, body: dict[str, Any] | None, *, raise_api_errors: bool = True, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> dict[str, typing.Any]

Send a POST request to the Data API for this collection with an arbitrary, caller-provided payload. No transformations or type conversions are made on the provided payload.

Args

body: a JSON-serializable dictionary, the payload of the request.
raise_api_errors: if True, responses with a nonempty 'errors' field result in an astrapy exception being raised.
general_method_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for general_method_timeout_ms.
timeout_ms: an alias for general_method_timeout_ms.

Returns

a dictionary with the response of the HTTP request.

Example

>>> my_coll.command({"countDocuments": {}})
{'status': {'count': 123}}

Expand source code

def command(
    self,
    body: dict[str, Any] | None,
    *,
    raise_api_errors: bool = True,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> dict[str, Any]:
    """
    Send a POST request to the Data API for this collection with
    an arbitrary, caller-provided payload.
    No transformations or type conversions are made on the provided payload.

    Args:
        body: a JSON-serializable dictionary, the payload of the request.
        raise_api_errors: if True, responses with a nonempty 'errors' field
            result in an astrapy exception being raised.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a dictionary with the response of the HTTP request.

    Example:
        >>> my_coll.command({"countDocuments": {}})
        {'status': {'count': 123}}
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    _cmd_desc: str
    if body:
        _cmd_desc = ",".join(sorted(body.keys()))
    else:
        _cmd_desc = "(none)"
    logger.info(f"command={_cmd_desc} on '{self.name}'")
    command_result = self._api_commander.request(
        payload=body,
        raise_api_errors=raise_api_errors,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished command={_cmd_desc} on '{self.name}'")
    return command_result

def count_documents(self, filter: FilterType, *, upper_bound: int, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> int

Count the documents in the collection matching the specified filter.

Args

filter: a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
upper_bound: a required ceiling on the result of the count operation. If the actual number of documents exceeds this value, an exception will be raised. Furthermore, if the actual number of documents exceeds the maximum count that the Data API can reach (regardless of upper_bound), an exception will be raised.
general_method_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for general_method_timeout_ms.
timeout_ms: an alias for general_method_timeout_ms.

Returns

the exact count of matching documents.

Example

>>> my_coll.insert_many([{"seq": i} for i in range(20)])
CollectionInsertManyResult(...)
>>> my_coll.count_documents({}, upper_bound=100)
20
>>> my_coll.count_documents({"seq":{"$gt": 15}}, upper_bound=100)
4
>>> my_coll.count_documents({}, upper_bound=10)
Traceback (most recent call last):
    ... ...
astrapy.exceptions.TooManyDocumentsToCountException

Note

Expand source code

def count_documents(
    self,
    filter: FilterType,
    *,
    upper_bound: int,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> int:
    """
    Count the documents in the collection matching the specified filter.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        upper_bound: a required ceiling on the result of the count operation.
            If the actual number of documents exceeds this value,
            an exception will be raised.
            Furthermore, if the actual number of documents exceeds the maximum
            count that the Data API can reach (regardless of upper_bound),
            an exception will be raised.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        the exact count of matching documents.

    Example:
        >>> my_coll.insert_many([{"seq": i} for i in range(20)])
        CollectionInsertManyResult(...)
        >>> my_coll.count_documents({}, upper_bound=100)
        20
        >>> my_coll.count_documents({"seq":{"$gt": 15}}, upper_bound=100)
        4
        >>> my_coll.count_documents({}, upper_bound=10)
        Traceback (most recent call last):
            ... ...
        astrapy.exceptions.TooManyDocumentsToCountException

    Note:
        Count operations are expensive: for this reason, the best practice
        is to provide a reasonable `upper_bound` according to the caller
        expectations. Moreover, indiscriminate usage of count operations
        for sizeable amounts of documents (i.e. in the thousands and more)
        is discouraged in favor of alternative application-specific solutions.
        Keep in mind that the Data API has a hard upper limit on the amount
        of documents it will count, and that an exception will be thrown
        by this method if this limit is encountered.
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    cd_payload = {"countDocuments": {"filter": filter}}
    logger.info(f"countDocuments on '{self.name}'")
    cd_response = self._converted_request(
        payload=cd_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished countDocuments on '{self.name}'")
    if "count" in cd_response.get("status", {}):
        count: int = cd_response["status"]["count"]
        if cd_response["status"].get("moreData", False):
            raise TooManyDocumentsToCountException(
                text=f"Document count exceeds {count}, the maximum allowed by the server",
                server_max_count_exceeded=True,
            )
        else:
            if count > upper_bound:
                raise TooManyDocumentsToCountException(
                    text="Document count exceeds required upper bound",
                    server_max_count_exceeded=False,
                )
            else:
                return count
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from countDocuments API command.",
            raw_response=cd_response,
        )

def delete_many(self, filter: FilterType, *, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> CollectionDeleteResult

Delete all documents matching a provided filter.

Args

filter: a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators. Passing an empty filter, {}, completely erases all contents of the collection.
general_method_timeout_ms: a timeout, in milliseconds, for the whole requested operation (which may involve multiple API requests). This method may entail successive HTTP API requests, depending on the amount of involved documents. If not passed, the collection-level setting is used instead.
request_timeout_ms: a timeout, in milliseconds, for each API request. If not passed, the collection-level setting is used instead.
timeout_ms: an alias for general_method_timeout_ms.

Returns

a CollectionDeleteResult object summarizing the outcome of the delete operation.

Example

>>> my_coll.insert_many([{"seq": 1}, {"seq": 0}, {"seq": 2}])
CollectionInsertManyResult(...)
>>> my_coll.delete_many({"seq": {"$lte": 1}})
CollectionDeleteResult(raw_results=..., deleted_count=2)
>>> my_coll.distinct("seq")
[2]
>>> my_coll.delete_many({"seq": {"$lte": 1}})
CollectionDeleteResult(raw_results=..., deleted_count=0)

Note

Expand source code

def delete_many(
    self,
    filter: FilterType,
    *,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> CollectionDeleteResult:
    """
    Delete all documents matching a provided filter.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
            Passing an empty filter, `{}`, completely erases all contents
            of the collection.
        general_method_timeout_ms: a timeout, in milliseconds, for the whole
            requested operation (which may involve multiple API requests).
            This method may entail successive HTTP API requests,
            depending on the amount of involved documents.
            If not passed, the collection-level setting is used instead.
        request_timeout_ms: a timeout, in milliseconds, for each API request.
            If not passed, the collection-level setting is used instead.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a CollectionDeleteResult object summarizing the outcome of the
        delete operation.

    Example:
        >>> my_coll.insert_many([{"seq": 1}, {"seq": 0}, {"seq": 2}])
        CollectionInsertManyResult(...)
        >>> my_coll.delete_many({"seq": {"$lte": 1}})
        CollectionDeleteResult(raw_results=..., deleted_count=2)
        >>> my_coll.distinct("seq")
        [2]
        >>> my_coll.delete_many({"seq": {"$lte": 1}})
        CollectionDeleteResult(raw_results=..., deleted_count=0)

    Note:
        This operation is in general not atomic. Depending on the amount
        of matching documents, it can keep running (in a blocking way)
        for a macroscopic time. In that case, new documents that are
        meanwhile inserted (e.g. from another process/application) will be
        deleted during the execution of this method call until the
        collection is devoid of matches.
        An exception is the `filter={}` case, whereby the operation is atomic.
    """

    _general_method_timeout_ms, _gmt_label = _first_valid_timeout(
        (general_method_timeout_ms, "general_method_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (
            self.api_options.timeout_options.general_method_timeout_ms,
            "general_method_timeout_ms",
        ),
    )
    _request_timeout_ms, _rt_label = _first_valid_timeout(
        (request_timeout_ms, "request_timeout_ms"),
        (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
    )
    dm_responses: list[dict[str, Any]] = []
    deleted_count = 0
    must_proceed = True
    timeout_manager = MultiCallTimeoutManager(
        overall_timeout_ms=_general_method_timeout_ms,
        timeout_label=_gmt_label,
    )
    this_dm_payload = {"deleteMany": {"filter": filter}}
    logger.info(f"starting delete_many on '{self.name}'")
    while must_proceed:
        logger.info(f"deleteMany on '{self.name}'")
        this_dm_response = self._converted_request(
            payload=this_dm_payload,
            raise_api_errors=False,
            timeout_context=timeout_manager.remaining_timeout(
                cap_time_ms=_request_timeout_ms,
                cap_timeout_label=_rt_label,
            ),
        )
        logger.info(f"finished deleteMany on '{self.name}'")
        # if errors, quit early
        if this_dm_response.get("errors", []):
            partial_result = CollectionDeleteResult(
                deleted_count=deleted_count,
                raw_results=dm_responses,
            )
            cause_exception = DataAPIResponseException.from_response(
                command=this_dm_payload,
                raw_response=this_dm_response,
            )
            raise CollectionDeleteManyException(
                partial_result=partial_result,
                cause=cause_exception,
            )
        else:
            this_dc = this_dm_response.get("status", {}).get("deletedCount")
            if this_dc is None:
                raise UnexpectedDataAPIResponseException(
                    text="Faulty response from delete_many API command.",
                    raw_response=this_dm_response,
                )
            dm_responses.append(this_dm_response)
            deleted_count += this_dc
            must_proceed = this_dm_response.get("status", {}).get("moreData", False)

    logger.info(f"finished delete_many on '{self.name}'")
    return CollectionDeleteResult(
        deleted_count=deleted_count,
        raw_results=dm_responses,
    )

def delete_one(self, filter: FilterType, *, sort: SortType | None = None, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> CollectionDeleteResult

Delete one document matching a provided filter. This method never deletes more than a single document, regardless of the number of matches to the provided filters.

Args

filter: a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
sort: with this dictionary parameter one can control the sorting order of the documents matching the filter, effectively determining what document will come first and hence be the deleted one. See the find method for more on sorting.
general_method_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for general_method_timeout_ms.
timeout_ms: an alias for general_method_timeout_ms.

Returns

a CollectionDeleteResult object summarizing the outcome of the delete operation.

Example

>>> my_coll.insert_many([{"seq": 1}, {"seq": 0}, {"seq": 2}])
CollectionInsertManyResult(...)
>>> my_coll.delete_one({"seq": 1})
CollectionDeleteResult(raw_results=..., deleted_count=1)
>>> my_coll.distinct("seq")
[0, 2]
>>> my_coll.delete_one(
...     {"seq": {"$exists": True}},
...     sort={"seq": astrapy.constants.SortMode.DESCENDING},
... )
CollectionDeleteResult(raw_results=..., deleted_count=1)
>>> my_coll.distinct("seq")
[0]
>>> my_coll.delete_one({"seq": 2})
CollectionDeleteResult(raw_results=..., deleted_count=0)

Expand source code

def delete_one(
    self,
    filter: FilterType,
    *,
    sort: SortType | None = None,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> CollectionDeleteResult:
    """
    Delete one document matching a provided filter.
    This method never deletes more than a single document, regardless
    of the number of matches to the provided filters.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        sort: with this dictionary parameter one can control the sorting
            order of the documents matching the filter, effectively
            determining what document will come first and hence be the
            deleted one. See the `find` method for more on sorting.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a CollectionDeleteResult object summarizing the outcome of the
        delete operation.

    Example:
        >>> my_coll.insert_many([{"seq": 1}, {"seq": 0}, {"seq": 2}])
        CollectionInsertManyResult(...)
        >>> my_coll.delete_one({"seq": 1})
        CollectionDeleteResult(raw_results=..., deleted_count=1)
        >>> my_coll.distinct("seq")
        [0, 2]
        >>> my_coll.delete_one(
        ...     {"seq": {"$exists": True}},
        ...     sort={"seq": astrapy.constants.SortMode.DESCENDING},
        ... )
        CollectionDeleteResult(raw_results=..., deleted_count=1)
        >>> my_coll.distinct("seq")
        [0]
        >>> my_coll.delete_one({"seq": 2})
        CollectionDeleteResult(raw_results=..., deleted_count=0)
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    do_payload = {
        "deleteOne": {
            k: v
            for k, v in {
                "filter": filter,
                "sort": sort,
            }.items()
            if v is not None
        }
    }
    logger.info(f"deleteOne on '{self.name}'")
    do_response = self._converted_request(
        payload=do_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished deleteOne on '{self.name}'")
    if "deletedCount" in do_response.get("status", {}):
        deleted_count = do_response["status"]["deletedCount"]
        return CollectionDeleteResult(
            deleted_count=deleted_count,
            raw_results=[do_response],
        )
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from delete_one API command.",
            raw_response=do_response,
        )

Return a list of the unique values of key across the documents in the collection that match the provided filter.

Args

key: the name of the field whose value is inspected across documents. Keys can be just field names (as is often the case), but the dot-notation is also accepted to mean subkeys or indices within lists (for example, "map_field.subkey" or "list_field.2"). If a field has literal dots or ampersands in its name, this parameter must be escaped to be treated properly. The key can also be a list of strings and numbers, in which case no escape is necessary: each item in the list is a field name/index, for example ["map_field", "subkey"] or ["list_field", 2]. If lists are encountered and no numeric index is specified, all items in the list are visited.
filter: a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
general_method_timeout_ms: a timeout, in milliseconds, for the whole requested operation (which may involve multiple API requests). This method, being based on find (see) may entail successive HTTP API requests, depending on the amount of involved documents.
request_timeout_ms: a timeout, in milliseconds, for each API request.
timeout_ms: an alias for general_method_timeout_ms.

Returns

a list of all different values for key found across the documents that match the filter. The result list has no repeated items.

Example

>>> my_coll.insert_many(
...     [
...         {"name": "Marco", "food": ["apple", "orange"], "city": "Helsinki"},
...         {"name": "Emma", "food": {"likes_fruit": True, "allergies": []}},
...     ]
... )
CollectionInsertManyResult(raw_results=..., inserted_ids=['c5b99f37-...', 'd6416321-...'])
>>> my_coll.distinct("name")
['Marco', 'Emma']
>>> my_coll.distinct("city")
['Helsinki']
>>> my_coll.distinct("food")
['apple', 'orange', {'likes_fruit': True, 'allergies': []}]
>>> my_coll.distinct("food.1")
['orange']
>>> my_coll.distinct("food.allergies")
[]
>>> my_coll.distinct("food.likes_fruit")
[True]

Note

For details on the behaviour of "distinct" in conjunction with real-time changes in the collection contents, see the Note of the find command.

Expand source code

def distinct(
    self,
    key: str | Iterable[str | int],
    *,
    filter: FilterType | None = None,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> list[Any]:
    """
    Return a list of the unique values of `key` across the documents
    in the collection that match the provided filter.

    Args:
        key: the name of the field whose value is inspected across documents.
            Keys can be just field names (as is often the case), but
            the dot-notation is also accepted to mean subkeys or indices
            within lists (for example, "map_field.subkey" or "list_field.2").
            If a field has literal dots or ampersands in its name, this
            parameter must be escaped to be treated properly.
            The key can also be a list of strings and numbers, in which case
            no escape is necessary: each item in the list is a field name/index,
            for example ["map_field", "subkey"] or ["list_field", 2].
            If lists are encountered and no numeric index is specified,
            all items in the list are visited.
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        general_method_timeout_ms: a timeout, in milliseconds, for the whole
            requested operation (which may involve multiple API requests).
            This method, being based on `find` (see) may entail successive HTTP API
            requests, depending on the amount of involved documents.
        request_timeout_ms: a timeout, in milliseconds, for each API request.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a list of all different values for `key` found across the documents
        that match the filter. The result list has no repeated items.

    Example:
        >>> my_coll.insert_many(
        ...     [
        ...         {"name": "Marco", "food": ["apple", "orange"], "city": "Helsinki"},
        ...         {"name": "Emma", "food": {"likes_fruit": True, "allergies": []}},
        ...     ]
        ... )
        CollectionInsertManyResult(raw_results=..., inserted_ids=['c5b99f37-...', 'd6416321-...'])
        >>> my_coll.distinct("name")
        ['Marco', 'Emma']
        >>> my_coll.distinct("city")
        ['Helsinki']
        >>> my_coll.distinct("food")
        ['apple', 'orange', {'likes_fruit': True, 'allergies': []}]
        >>> my_coll.distinct("food.1")
        ['orange']
        >>> my_coll.distinct("food.allergies")
        []
        >>> my_coll.distinct("food.likes_fruit")
        [True]

    Note:
        It must be kept in mind that `distinct` is a client-side operation,
        which effectively browses all required documents using the logic
        of the `find` method and collects the unique values found for `key`.
        As such, there may be performance, latency and ultimately
        billing implications if the amount of matching documents is large.

    Note:
        For details on the behaviour of "distinct" in conjunction with
        real-time changes in the collection contents, see the
        Note of the `find` command.
    """

    # lazy-import here to avoid circular import issues
    from astrapy.cursors import CollectionFindCursor

    _general_method_timeout_ms, _gmt_label = _first_valid_timeout(
        (general_method_timeout_ms, "general_method_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (
            self.api_options.timeout_options.general_method_timeout_ms,
            "general_method_timeout_ms",
        ),
    )
    _request_timeout_ms, _rt_label = _first_valid_timeout(
        (request_timeout_ms, "request_timeout_ms"),
        (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
    )
    # preparing cursor:
    _extractor = _create_document_key_extractor(key)
    _key = _reduce_distinct_key_to_safe(key)
    # relaxing the type hint (limited to within this method body)
    f_cursor: CollectionFindCursor[dict[str, Any], dict[str, Any]] = (
        CollectionFindCursor(
            collection=self,
            request_timeout_ms=_request_timeout_ms,
            overall_timeout_ms=_general_method_timeout_ms,
            request_timeout_label=_rt_label,
            overall_timeout_label=_gmt_label,
        )  # type: ignore[assignment]
        .filter(filter)
        .project({_key: True})
    )
    # consuming it:
    _item_hashes = set()
    distinct_items: list[Any] = []
    logger.info(f"running distinct() on '{self.name}'")
    for document in f_cursor:
        for item in _extractor(document):
            _item_hash = _hash_collection_document(
                item, options=self.api_options.serdes_options
            )
            if _item_hash not in _item_hashes:
                _item_hashes.add(_item_hash)
                distinct_items.append(item)
    logger.info(f"finished running distinct() on '{self.name}'")
    return distinct_items

def drop(self, *, collection_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> None

Drop the collection, i.e. delete it from the database along with all the documents it contains.

Args

collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for collection_admin_timeout_ms.
timeout_ms: an alias for collection_admin_timeout_ms.

Example

>>> my_coll.find_one({})
{'_id': '...', 'a': 100}
>>> my_coll.drop()
>>> my_coll.find_one({})
Traceback (most recent call last):
    ... ...
astrapy.exceptions.DataAPIResponseException: Collection does not exist, collection name: my_collection

Note

Use with caution.

Note

Expand source code

def drop(
    self,
    *,
    collection_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Drop the collection, i.e. delete it from the database along with
    all the documents it contains.

    Args:
        collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `collection_admin_timeout_ms`.
        timeout_ms: an alias for `collection_admin_timeout_ms`.

    Example:
        >>> my_coll.find_one({})
        {'_id': '...', 'a': 100}
        >>> my_coll.drop()
        >>> my_coll.find_one({})
        Traceback (most recent call last):
            ... ...
        astrapy.exceptions.DataAPIResponseException: Collection does not exist, collection name: my_collection

    Note:
        Use with caution.

    Note:
        Once the method succeeds, methods on this object can still be invoked:
        however, this hardly makes sense as the underlying actual collection
        is no more.
        It is responsibility of the developer to design a correct flow
        which avoids using a deceased collection any further.
    """

    logger.info(f"dropping collection '{self.name}' (self)")
    self.database.drop_collection(
        self.name,
        collection_admin_timeout_ms=collection_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info(f"finished dropping collection '{self.name}' (self)")

def estimated_document_count(self, *, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> int

Query the API server for an estimate of the document count in the collection.

Contrary to count_documents, this method has no filtering parameters.

Args

general_method_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for general_method_timeout_ms.
timeout_ms: an alias for general_method_timeout_ms.

Returns

a server-provided estimate count of the documents in the collection.

Example

>>> my_coll.estimated_document_count()
35700

Expand source code

def estimated_document_count(
    self,
    *,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> int:
    """
    Query the API server for an estimate of the document count in the collection.

    Contrary to `count_documents`, this method has no filtering parameters.

    Args:
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a server-provided estimate count of the documents in the collection.

    Example:
        >>> my_coll.estimated_document_count()
        35700
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    ed_payload: dict[str, Any] = {"estimatedDocumentCount": {}}
    logger.info(f"estimatedDocumentCount on '{self.name}'")
    ed_response = self._converted_request(
        payload=ed_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished estimatedDocumentCount on '{self.name}'")
    if "count" in ed_response.get("status", {}):
        count: int = ed_response["status"]["count"]
        return count
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from estimatedDocumentCount API command.",
            raw_response=ed_response,
        )

Find documents on the collection, matching a certain provided filter.

Args

filter: a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
projection: it controls which parts of the document are returned. It can be an allow-list: {"f1": True, "f2": True}, or a deny-list: {"fx": False, "fy": False}, but not a mixture (except for the _id and other special fields, which can be associated to both True or False independently of the rest of the specification). The special star-projections {"*": True} and {"*": False} have the effect of returning the whole document and {} respectively. For lists in documents, slice directives can be passed to select portions of the list: for instance, {"array": {"$slice": 2}}, {"array": {"$slice": -2}}, {"array": {"$slice": [4, 2]}} or {"array": {"$slice": [-4, 2]}}. An iterable over strings will be treated implicitly as an allow-list. The default projection (used if this parameter is not passed) does not necessarily include "special" fields such as $vector or $vectorize. See the Data API documentation for more on projections.
document_type: this parameter acts a formal specifier for the type checker. If omitted, the resulting cursor is implicitly a CollectionFindCursor[DOC, DOC], i.e. maintains the same type for the items it returns as that for the documents in the collection. Strictly typed code may want to specify this parameter especially when a projection is given.
skip: with this integer parameter, what would be the first skip documents returned by the query are discarded, and the results start from the (skip+1)-th document. This parameter can be used only in conjunction with an explicit sort criterion of the ascending/descending type (i.e. it cannot be used when not sorting, nor with vector-based ANN search).
limit: this (integer) parameter sets a limit over how many documents are returned. Once limit is reached (or the cursor is exhausted for lack of matching documents), nothing more is returned.
include_similarity: a boolean to request the numeric value of the similarity to be returned as an added "$similarity" key in each returned document. It can be used meaningfully only in a vector search (see sort).
include_sort_vector: a boolean to request the search query vector. If set to True (and if the invocation is a vector search), calling the get_sort_vector method on the returned cursor will yield the vector used for the ANN search.
sort: with this dictionary parameter one can control the order the documents are returned. See the Note about sorting, as well as the one about upper bounds, for details. Vector-based ANN sorting is achieved by providing a "$vector" or a "$vectorize" key in sort.
request_timeout_ms: a timeout, in milliseconds, for each single one of the underlying HTTP requests used to fetch documents as the cursor is iterated over. If not passed, the collection-level setting is used instead.
timeout_ms: an alias for request_timeout_ms.

Returns

a CollectionFindCursor object, that can be iterated over (and manipulated in several ways). The cursor, if needed, handles pagination under the hood as the documents are consumed.

Examples

>>> filter = {"seq": {"$exists": True}}
>>> for doc in my_coll.find(filter, projection={"seq": True}, limit=5):
...     print(doc["seq"])
...
37
35
10
36
27
>>> cursor1 = my_coll.find(
...     {},
...     limit=4,
...     sort={"seq": astrapy.constants.SortMode.DESCENDING},
... )
>>> [doc["_id"] for doc in cursor1]
['97e85f81-...', '1581efe4-...', '...', '...']
>>> cursor2 = my_coll.find({}, limit=3)

>>> my_coll.insert_many([
...     {"tag": "A", "$vector": [4, 5]},
...     {"tag": "B", "$vector": [3, 4]},
...     {"tag": "C", "$vector": [3, 2]},
...     {"tag": "D", "$vector": [4, 1]},
...     {"tag": "E", "$vector": [2, 5]},
... ])
>>> ann_tags = [
...     document["tag"]
...     for document in my_coll.find(
...         {},
...         sort={"$vector": [3, 3]},
...         limit=3,
...     )
... ]
>>> ann_tags
['A', 'B', 'C']
>>> # (assuming the collection has metric VectorMetric.COSINE)

>>> cursor = my_coll.find(
...     sort={"$vector": [3, 3]},
...     limit=3,
...     include_sort_vector=True,
... )
>>> cursor.get_sort_vector()
[3.0, 3.0]
>>> matches = cursor.to_list()
>>> cursor.get_sort_vector()
[3.0, 3.0]

Note

The following are example values for the sort parameter. When no particular order is required: sort={} # (default when parameter not provided) When sorting by a certain value in ascending/descending order: sort={"field": SortMode.ASCENDING} sort={"field": SortMode.DESCENDING} When sorting first by "field" and then by "subfield" (while modern Python versions preserve the order of dictionaries, it is suggested for clarity to employ a collections.OrderedDict in these cases): sort={ "field": SortMode.ASCENDING, "subfield": SortMode.ASCENDING, } When running a vector similarity (ANN) search: sort={"$vector": [0.4, 0.15, -0.5]}

Note

Expand source code

def find(
    self,
    filter: FilterType | None = None,
    *,
    projection: ProjectionType | None = None,
    document_type: type[DOC2] | None = None,
    skip: int | None = None,
    limit: int | None = None,
    include_similarity: bool | None = None,
    include_sort_vector: bool | None = None,
    sort: SortType | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> CollectionFindCursor[DOC, DOC2]:
    """
    Find documents on the collection, matching a certain provided filter.

    The method returns a cursor that can then be iterated over. Depending
    on the method call pattern, the iteration over all documents can reflect
    collection mutations occurred since the `find` method was called, or not.
    In cases where the cursor reflects mutations in real-time, it will iterate
    over cursors in an approximate way (i.e. exhibiting occasional skipped
    or duplicate documents). This happens when making use of the `sort`
    option in a non-vector-search manner.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        projection: it controls which parts of the document are returned.
            It can be an allow-list: `{"f1": True, "f2": True}`,
            or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
            (except for the `_id` and other special fields, which can be
            associated to both True or False independently of the rest
            of the specification).
            The special star-projections `{"*": True}` and `{"*": False}`
            have the effect of returning the whole document and `{}` respectively.
            For lists in documents, slice directives can be passed to select
            portions of the list: for instance, `{"array": {"$slice": 2}}`,
            `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
            `{"array": {"$slice": [-4, 2]}}`.
            An iterable over strings will be treated implicitly as an allow-list.
            The default projection (used if this parameter is not passed) does not
            necessarily include "special" fields such as `$vector` or `$vectorize`.
            See the Data API documentation for more on projections.
        document_type: this parameter acts a formal specifier for the type checker.
            If omitted, the resulting cursor is implicitly a
            `CollectionFindCursor[DOC, DOC]`, i.e. maintains the same type for
            the items it returns as that for the documents in the collection.
            Strictly typed code may want to specify this parameter especially when
            a projection is given.
        skip: with this integer parameter, what would be the first `skip`
            documents returned by the query are discarded, and the results
            start from the (skip+1)-th document.
            This parameter can be used only in conjunction with an explicit
            `sort` criterion of the ascending/descending type (i.e. it cannot
            be used when not sorting, nor with vector-based ANN search).
        limit: this (integer) parameter sets a limit over how many documents
            are returned. Once `limit` is reached (or the cursor is exhausted
            for lack of matching documents), nothing more is returned.
        include_similarity: a boolean to request the numeric value of the
            similarity to be returned as an added "$similarity" key in each
            returned document. It can be used meaningfully only in a vector
            search (see `sort`).
        include_sort_vector: a boolean to request the search query vector.
            If set to True (and if the invocation is a vector search), calling
            the `get_sort_vector` method on the returned cursor will yield
            the vector used for the ANN search.
        sort: with this dictionary parameter one can control the order
            the documents are returned. See the Note about sorting, as well as
            the one about upper bounds, for details.
            Vector-based ANN sorting is achieved by providing a "$vector"
            or a "$vectorize" key in `sort`.
        request_timeout_ms: a timeout, in milliseconds, for each single one
            of the underlying HTTP requests used to fetch documents as the
            cursor is iterated over.
            If not passed, the collection-level setting is used instead.
        timeout_ms: an alias for `request_timeout_ms`.

    Returns:
        a CollectionFindCursor object, that can be iterated over (and manipulated
        in several ways). The cursor, if needed, handles pagination under the hood
        as the documents are consumed.

    Examples:
        >>> filter = {"seq": {"$exists": True}}
        >>> for doc in my_coll.find(filter, projection={"seq": True}, limit=5):
        ...     print(doc["seq"])
        ...
        37
        35
        10
        36
        27
        >>> cursor1 = my_coll.find(
        ...     {},
        ...     limit=4,
        ...     sort={"seq": astrapy.constants.SortMode.DESCENDING},
        ... )
        >>> [doc["_id"] for doc in cursor1]
        ['97e85f81-...', '1581efe4-...', '...', '...']
        >>> cursor2 = my_coll.find({}, limit=3)

        >>> my_coll.insert_many([
        ...     {"tag": "A", "$vector": [4, 5]},
        ...     {"tag": "B", "$vector": [3, 4]},
        ...     {"tag": "C", "$vector": [3, 2]},
        ...     {"tag": "D", "$vector": [4, 1]},
        ...     {"tag": "E", "$vector": [2, 5]},
        ... ])
        >>> ann_tags = [
        ...     document["tag"]
        ...     for document in my_coll.find(
        ...         {},
        ...         sort={"$vector": [3, 3]},
        ...         limit=3,
        ...     )
        ... ]
        >>> ann_tags
        ['A', 'B', 'C']
        >>> # (assuming the collection has metric VectorMetric.COSINE)

        >>> cursor = my_coll.find(
        ...     sort={"$vector": [3, 3]},
        ...     limit=3,
        ...     include_sort_vector=True,
        ... )
        >>> cursor.get_sort_vector()
        [3.0, 3.0]
        >>> matches = cursor.to_list()
        >>> cursor.get_sort_vector()
        [3.0, 3.0]

    Note:
        The following are example values for the `sort` parameter.
        When no particular order is required:
            sort={}  # (default when parameter not provided)
        When sorting by a certain value in ascending/descending order:
            sort={"field": SortMode.ASCENDING}
            sort={"field": SortMode.DESCENDING}
        When sorting first by "field" and then by "subfield"
        (while modern Python versions preserve the order of dictionaries,
        it is suggested for clarity to employ a `collections.OrderedDict`
        in these cases):
            sort={
                "field": SortMode.ASCENDING,
                "subfield": SortMode.ASCENDING,
            }
        When running a vector similarity (ANN) search:
            sort={"$vector": [0.4, 0.15, -0.5]}

    Note:
        Some combinations of arguments impose an implicit upper bound on the
        number of documents that are returned by the Data API. More specifically:
        (a) Vector ANN searches cannot return more than a number of documents
        that at the time of writing is set to 1000 items.
        (b) When using a sort criterion of the ascending/descending type,
        the Data API will return a smaller number of documents, set to 20
        at the time of writing, and stop there. The returned documents are
        the top results across the whole collection according to the requested
        criterion.

    Note:
        When not specifying sorting criteria at all (by vector or otherwise),
        the cursor can scroll through an arbitrary number of documents as
        the Data API and the client periodically exchange new chunks of documents.
        It should be noted that the behavior of the cursor in the case documents
        have been added/removed after the `find` was started depends on database
        internals and it is not guaranteed, nor excluded, that such "real-time"
        changes in the data would be picked up by the cursor.
    """

    # lazy-import here to avoid circular import issues
    from astrapy.cursors import CollectionFindCursor

    _request_timeout_ms, _rt_label = _first_valid_timeout(
        (request_timeout_ms, "request_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
    )
    return (
        CollectionFindCursor(
            collection=self,
            request_timeout_ms=_request_timeout_ms,
            overall_timeout_ms=None,
            request_timeout_label=_rt_label,
        )
        .filter(filter)
        .project(projection)
        .skip(skip)
        .limit(limit)
        .sort(sort)
        .include_similarity(include_similarity)
        .include_sort_vector(include_sort_vector)
    )

Find relevant documents, combining vector and lexical matches through reranking.

For this method to succeed, the collection must be created with the required hybrid capabilities (see the create_collection method of the Database class).

The method returns a cursor that can then be iterated over, which yields the resulting documents, generally paired with accompanying information such as scores.

Args

filter: a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
sort: a clause specifying the criteria for selecting the top matching documents. This must provide enough information for both a lexical and a vector similarity to be performed (the latter either query text or by query vector, depending on the collection configuration). Examples are: sort={"$hybrid": "xyz"}, sort={"$hybrid": {"$vectorize": "xyz", "$lexical": "abc"}}, sort={"$hybrid": {"$vector": DataAPIVector(...), "$lexical": "abc"}}. Note this differs from the sort parameter for the find method.
projection: it controls which parts of the document are returned. It can be an allow-list: {"f1": True, "f2": True}, or a deny-list: {"fx": False, "fy": False}, but not a mixture (except for the _id and other special fields, which can be associated to both True or False independently of the rest of the specification). The special star-projections {"*": True} and {"*": False} have the effect of returning the whole document and {} respectively. For lists in documents, slice directives can be passed to select portions of the list: for instance, {"array": {"$slice": 2}}, {"array": {"$slice": -2}}, {"array": {"$slice": [4, 2]}} or {"array": {"$slice": [-4, 2]}}. An iterable over strings will be treated implicitly as an allow-list. The default projection (used if this parameter is not passed) does not necessarily include "special" fields such as $vector or $vectorize. See the Data API documentation for more on projections.
document_type: this parameter acts a formal specifier for the type checker. If omitted, the resulting cursor is implicitly a CollectionFindAndRerankCursor[DOC, DOC], i.e. maintains the same type for the items it returns as that for the documents in the collection. Strictly typed code may want to specify this parameter especially when a projection is given.
limit: maximum number of documents to return as the result of the final rerank step.
hybrid_limits: this controls the amount of documents that are fetched by each of the individual retrieval operations that are combined in the rerank step. It can be either a number or a dictionary of strings to numbers, the latter case expressing different counts for the different retrievals. For example: hybrid_limits=50, hybrid_limits={"$vector": 20, "$lexical": 10}.
include_scores: a boolean to request the scores to be returned along with the resulting documents. If this is set, the scores can be read in the the map scores attribute of each RerankedResult (the map is otherwise empty).
include_sort_vector: a boolean to request the search query vector used for the vector-search part of the find operation. If set to True, calling the get_sort_vector method on the returned cursor will yield the vector used for the ANN search.
rerank_on: for collections without a vectorize (server-side embeddings) service, this is used to specify the field name that is then used during reranking.
rerank_query: for collections without a vectorize (server-side embeddings) service, this is used to specify the query text for the reranker.
request_timeout_ms: a timeout, in milliseconds, for each single one of the underlying HTTP requests used to fetch documents as the cursor is iterated over. If not passed, the collection-level setting is used instead.
timeout_ms: an alias for request_timeout_ms.

Returns

a CollectionFindAndRerankCursor object, that can be iterated over (and manipulated in several ways).

Examples

>>> # The following examples assume a collection with 'vectorize' and the
>>> # necessary hybrid configuration; see below for a non-vectorize case.
>>>
>>> # Populate with documents
>>> my_vectorize_coll.insert_many([
...     {
...         "_id": "A",
...         "wkd": "Mon",
...         "$vectorize": "Monday is green",
...         "$lexical": "Monday is green",
...     },
...     {
...         "_id": "B",
...         "wkd": "Tue",
...         "$vectorize": "Tuesday is pink",
...         "$lexical": "Tuesday is pink",
...     },
...     {
...         "_id": "C",
...         "wkd": "Wed",
...         "$vectorize": "Wednesday is cyan",
...         "$lexical": "Wednesday is cyan",
...     },
...     {
...         "_id": "D",
...         "wkd": "Thu",
...         "$vectorize": "Thursday is red",
...         "$lexical": "Thursday is red",
...     },
...     {
...         "_id": "E",
...         "wkd": "Fri",
...         "$vectorize": "Friday is orange",
...         "$lexical": "Friday is orange",
...     },
...     {
...         "_id": "F",
...         "wkd": "Sat",
...         "$vectorize": "Saturday is purple",
...         "$lexical": "Saturday is purple",
...     },
...     {
...         "_id": "G",
...         "wkd": "Sun",
...         "$vectorize": "Sunday is beige",
...         "$lexical": "Sunday is beige",
...     },
... ])
CollectionInsertManyResult(inserted_ids=[A, B, C, D, E ... (7 total)], raw_results=...)
>>>
>>> # A simple invocation, consuming the cursor
>>> # with a loop ('vectorize collection):
>>> for r_result in my_vectorize_coll.find_and_rerank(
...     sort={"$hybrid": "Weekdays?"},
...     limit=2,
... ):
...     print(r_result.document)
...
{'_id': 'C', 'wkd': 'Wed'}
{'_id': 'A', 'wkd': 'Mon'}
>>> # Additional arbitrary filtering predicates
>>> # ('vectorize collection):
>>> for r_result in my_vectorize_coll.find_and_rerank(
...     {"wkd": {"$ne": "Mon"}},
...     sort={"$hybrid": "Weekdays?"},
...     limit=2,
... ):
...     print(r_result.document)
...
{'_id': 'C', 'wkd': 'Wed'}
{'_id': 'B', 'wkd': 'Tue'}
>>> # Fetch the scores with the documents ('vectorize collection):
>>> scored_texts = [
...     (r_result.document["wkd"], r_result.scores["$rerank"])
...     for r_result in my_vectorize_coll.find_and_rerank(
...         sort={"$hybrid": "Weekdays?"},
...         limit=2,
...         include_scores=True,
...     )
... ]
>>> print(scored_texts)
[('Wed', -9.1015625), ('Mon', -10.2421875)]
>>>
>>> # Customize sub-search limits ('vectorize collection):
>>> hits = my_vectorize_coll.find_and_rerank(
...     sort={"$hybrid": "Weekdays?"},
...     limit=2,
...     hybrid_limits=20,
... ).to_list()
>>> print(", ".join(r_res.document["wkd"] for r_res in hits))
Wed, Mon
>>>
>>> # Separate sub-search queries ('vectorize collection):
>>> cursor = my_vectorize_coll.find_and_rerank(
...     sort={
...         "$hybrid": {
...             "$vectorize": "a week day",
...             "$lexical": "green",
...         },
...     },
...     limit=2,
...     hybrid_limits={"$lexical": 4, "$vector": 20},
... )
>>> print(", ".join(r_res.document["wkd"] for r_res in cursor))
Mon, Wed
>>>
>>> # Reading back the query vector used by
>>> # the search ('vectorize collection):
>>> cursor = my_vectorize_coll.find_and_rerank(
...     sort={"$hybrid": "Weekdays?"},
...     limit=2,
...     include_sort_vector=True
... )
>>> sort_vector = cursor.get_sort_vector()
>>> print(" ==> ".join(
...     r_res.document["wkd"] for r_res in cursor
... ))
Wed ==> Mon
>>> print(f"Sort vector={sort_vector}")
Sort vector=[-0.0021172, -0.012057612, 0.010362527 ...]
>>>
>>>
>>> # If the collection has no "vectorize", <code>rerank\_query</code>
>>> # and <code>rerank\_on</code> must be passed. The following assumes a
>>> # collection with a 3-dimensional vector and the setup for hybrid.
>>>
>>> # Populate with documents:
>>> my_vector3d_coll.insert_many([
...     {
...         "_id": "A",
...         "wkd": "Mon",
...         "$vector": [0.1, 0.2, 0.3],
...         "$lexical": "Monday is green",
...     },
...     {
...         "_id": "B",
...         "wkd": "Tue",
...         "$vector": [0.2, 0.3, 0.4],
...         "$lexical": "Tuesday is pink",
...     },
...     {
...         "_id": "C",
...         "wkd": "Wed",
...         "$vector": [0.3, 0.4, 0.5],
...         "$lexical": "Wednesday is cyan",
...     },
...     {
...         "_id": "D",
...         "wkd": "Thu",
...         "$vector": [0.4, 0.5, 0.6],
...         "$lexical": "Thursday is red",
...     },
...     {
...         "_id": "E",
...         "wkd": "Fri",
...         "$vector": [0.5, 0.6, 0.7],
...         "$lexical": "Friday is orange",
...     },
...     {
...         "_id": "F",
...         "wkd": "Sat",
...         "$vector": [0.6, 0.7, 0.8],
...         "$lexical": "Saturday is purple",
...     },
...     {
...         "_id": "G",
...         "wkd": "Sun",
...         "$vector": [0.7, 0.8, 0.9],
...         "$lexical": "Sunday is beige",
...     },
... ])
CollectionInsertManyResult(inserted_ids=[A, B, C, D, E ... (7 total)], raw_results=...)
>>>
>>> # A simple find_and_rerank call (collection without 'vectorize'):
>>> for r_result in my_vector3d_coll.find_and_rerank(
...     sort={
...         "$hybrid": {
...             "$vector": [0.9, 0.8, 0.7],
...             "$lexical": "Weekdays?",
...         },
...     },
...     limit=2,
...     rerank_on="wkd",
...     rerank_query="week days",
... ):
...     print(r_result.document["wkd"])
...
Mon
Tue

Expand source code

@beta_method
def find_and_rerank(
    self,
    filter: FilterType | None = None,
    *,
    sort: HybridSortType,
    projection: ProjectionType | None = None,
    document_type: type[DOC2] | None = None,
    limit: int | None = None,
    hybrid_limits: int | dict[str, int] | None = None,
    include_scores: bool | None = None,
    include_sort_vector: bool | None = None,
    rerank_on: str | None = None,
    rerank_query: str | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> CollectionFindAndRerankCursor[DOC, RerankedResult[DOC2]]:
    """
    Find relevant documents, combining vector and lexical matches through reranking.

    For this method to succeed, the collection must be created with the required
    hybrid capabilities (see the `create_collection` method of the Database class).

    The method returns a cursor that can then be iterated over, which yields
    the resulting documents, generally paired with accompanying information
    such as scores.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        sort: a clause specifying the criteria for selecting the top matching
            documents. This must provide enough information for both a lexical
            and a vector similarity to be performed (the latter either query text
            or by query vector, depending on the collection configuration).
            Examples are: `sort={"$hybrid": "xyz"}`,
            `sort={"$hybrid": {"$vectorize": "xyz", "$lexical": "abc"}}`,
            `sort={"$hybrid": {"$vector": DataAPIVector(...), "$lexical": "abc"}}`.
            Note this differs from the `sort` parameter for the `find` method.
        projection: it controls which parts of the document are returned.
            It can be an allow-list: `{"f1": True, "f2": True}`,
            or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
            (except for the `_id` and other special fields, which can be
            associated to both True or False independently of the rest
            of the specification).
            The special star-projections `{"*": True}` and `{"*": False}`
            have the effect of returning the whole document and `{}` respectively.
            For lists in documents, slice directives can be passed to select
            portions of the list: for instance, `{"array": {"$slice": 2}}`,
            `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
            `{"array": {"$slice": [-4, 2]}}`.
            An iterable over strings will be treated implicitly as an allow-list.
            The default projection (used if this parameter is not passed) does not
            necessarily include "special" fields such as `$vector` or `$vectorize`.
            See the Data API documentation for more on projections.
        document_type: this parameter acts a formal specifier for the type checker.
            If omitted, the resulting cursor is implicitly a
            `CollectionFindAndRerankCursor[DOC, DOC]`, i.e. maintains the same type
            for the items it returns as that for the documents in the collection.
            Strictly typed code may want to specify this parameter especially when
            a projection is given.
        limit: maximum number of documents to return as the result of the final
            rerank step.
        hybrid_limits: this controls the amount of documents that are fetched by
            each of the individual retrieval operations that are combined in the
            rerank step. It can be either a number or a dictionary of strings to
            numbers, the latter case expressing different counts for the different
            retrievals. For example: `hybrid_limits=50`,
            `hybrid_limits={"$vector": 20, "$lexical": 10}`.
        include_scores: a boolean to request the scores to be returned along with
            the resulting documents. If this is set, the scores can be read in the
            the map `scores` attribute of each RerankedResult (the map is
            otherwise empty).
        include_sort_vector: a boolean to request the search query vector
            used for the vector-search part of the find operation.
            If set to True, calling the `get_sort_vector` method on the returned
            cursor will yield the vector used for the ANN search.
        rerank_on: for collections without a vectorize (server-side embeddings)
            service, this is used to specify the field name that is then used
            during reranking.
        rerank_query: for collections without a vectorize (server-side embeddings)
            service, this is used to specify the query text for the reranker.
        request_timeout_ms: a timeout, in milliseconds, for each single one
            of the underlying HTTP requests used to fetch documents as the
            cursor is iterated over.
            If not passed, the collection-level setting is used instead.
        timeout_ms: an alias for `request_timeout_ms`.

    Returns:
        a CollectionFindAndRerankCursor object, that can be iterated over (and
        manipulated in several ways).

    Examples:
        >>> # The following examples assume a collection with 'vectorize' and the
        >>> # necessary hybrid configuration; see below for a non-vectorize case.
        >>>
        >>> # Populate with documents
        >>> my_vectorize_coll.insert_many([
        ...     {
        ...         "_id": "A",
        ...         "wkd": "Mon",
        ...         "$vectorize": "Monday is green",
        ...         "$lexical": "Monday is green",
        ...     },
        ...     {
        ...         "_id": "B",
        ...         "wkd": "Tue",
        ...         "$vectorize": "Tuesday is pink",
        ...         "$lexical": "Tuesday is pink",
        ...     },
        ...     {
        ...         "_id": "C",
        ...         "wkd": "Wed",
        ...         "$vectorize": "Wednesday is cyan",
        ...         "$lexical": "Wednesday is cyan",
        ...     },
        ...     {
        ...         "_id": "D",
        ...         "wkd": "Thu",
        ...         "$vectorize": "Thursday is red",
        ...         "$lexical": "Thursday is red",
        ...     },
        ...     {
        ...         "_id": "E",
        ...         "wkd": "Fri",
        ...         "$vectorize": "Friday is orange",
        ...         "$lexical": "Friday is orange",
        ...     },
        ...     {
        ...         "_id": "F",
        ...         "wkd": "Sat",
        ...         "$vectorize": "Saturday is purple",
        ...         "$lexical": "Saturday is purple",
        ...     },
        ...     {
        ...         "_id": "G",
        ...         "wkd": "Sun",
        ...         "$vectorize": "Sunday is beige",
        ...         "$lexical": "Sunday is beige",
        ...     },
        ... ])
        CollectionInsertManyResult(inserted_ids=[A, B, C, D, E ... (7 total)], raw_results=...)
        >>>
        >>> # A simple invocation, consuming the cursor
        >>> # with a loop ('vectorize collection):
        >>> for r_result in my_vectorize_coll.find_and_rerank(
        ...     sort={"$hybrid": "Weekdays?"},
        ...     limit=2,
        ... ):
        ...     print(r_result.document)
        ...
        {'_id': 'C', 'wkd': 'Wed'}
        {'_id': 'A', 'wkd': 'Mon'}
        >>> # Additional arbitrary filtering predicates
        >>> # ('vectorize collection):
        >>> for r_result in my_vectorize_coll.find_and_rerank(
        ...     {"wkd": {"$ne": "Mon"}},
        ...     sort={"$hybrid": "Weekdays?"},
        ...     limit=2,
        ... ):
        ...     print(r_result.document)
        ...
        {'_id': 'C', 'wkd': 'Wed'}
        {'_id': 'B', 'wkd': 'Tue'}
        >>> # Fetch the scores with the documents ('vectorize collection):
        >>> scored_texts = [
        ...     (r_result.document["wkd"], r_result.scores["$rerank"])
        ...     for r_result in my_vectorize_coll.find_and_rerank(
        ...         sort={"$hybrid": "Weekdays?"},
        ...         limit=2,
        ...         include_scores=True,
        ...     )
        ... ]
        >>> print(scored_texts)
        [('Wed', -9.1015625), ('Mon', -10.2421875)]
        >>>
        >>> # Customize sub-search limits ('vectorize collection):
        >>> hits = my_vectorize_coll.find_and_rerank(
        ...     sort={"$hybrid": "Weekdays?"},
        ...     limit=2,
        ...     hybrid_limits=20,
        ... ).to_list()
        >>> print(", ".join(r_res.document["wkd"] for r_res in hits))
        Wed, Mon
        >>>
        >>> # Separate sub-search queries ('vectorize collection):
        >>> cursor = my_vectorize_coll.find_and_rerank(
        ...     sort={
        ...         "$hybrid": {
        ...             "$vectorize": "a week day",
        ...             "$lexical": "green",
        ...         },
        ...     },
        ...     limit=2,
        ...     hybrid_limits={"$lexical": 4, "$vector": 20},
        ... )
        >>> print(", ".join(r_res.document["wkd"] for r_res in cursor))
        Mon, Wed
        >>>
        >>> # Reading back the query vector used by
        >>> # the search ('vectorize collection):
        >>> cursor = my_vectorize_coll.find_and_rerank(
        ...     sort={"$hybrid": "Weekdays?"},
        ...     limit=2,
        ...     include_sort_vector=True
        ... )
        >>> sort_vector = cursor.get_sort_vector()
        >>> print(" ==> ".join(
        ...     r_res.document["wkd"] for r_res in cursor
        ... ))
        Wed ==> Mon
        >>> print(f"Sort vector={sort_vector}")
        Sort vector=[-0.0021172, -0.012057612, 0.010362527 ...]
        >>>
        >>>
        >>> # If the collection has no "vectorize", `rerank_query`
        >>> # and `rerank_on` must be passed. The following assumes a
        >>> # collection with a 3-dimensional vector and the setup for hybrid.
        >>>
        >>> # Populate with documents:
        >>> my_vector3d_coll.insert_many([
        ...     {
        ...         "_id": "A",
        ...         "wkd": "Mon",
        ...         "$vector": [0.1, 0.2, 0.3],
        ...         "$lexical": "Monday is green",
        ...     },
        ...     {
        ...         "_id": "B",
        ...         "wkd": "Tue",
        ...         "$vector": [0.2, 0.3, 0.4],
        ...         "$lexical": "Tuesday is pink",
        ...     },
        ...     {
        ...         "_id": "C",
        ...         "wkd": "Wed",
        ...         "$vector": [0.3, 0.4, 0.5],
        ...         "$lexical": "Wednesday is cyan",
        ...     },
        ...     {
        ...         "_id": "D",
        ...         "wkd": "Thu",
        ...         "$vector": [0.4, 0.5, 0.6],
        ...         "$lexical": "Thursday is red",
        ...     },
        ...     {
        ...         "_id": "E",
        ...         "wkd": "Fri",
        ...         "$vector": [0.5, 0.6, 0.7],
        ...         "$lexical": "Friday is orange",
        ...     },
        ...     {
        ...         "_id": "F",
        ...         "wkd": "Sat",
        ...         "$vector": [0.6, 0.7, 0.8],
        ...         "$lexical": "Saturday is purple",
        ...     },
        ...     {
        ...         "_id": "G",
        ...         "wkd": "Sun",
        ...         "$vector": [0.7, 0.8, 0.9],
        ...         "$lexical": "Sunday is beige",
        ...     },
        ... ])
        CollectionInsertManyResult(inserted_ids=[A, B, C, D, E ... (7 total)], raw_results=...)
        >>>
        >>> # A simple find_and_rerank call (collection without 'vectorize'):
        >>> for r_result in my_vector3d_coll.find_and_rerank(
        ...     sort={
        ...         "$hybrid": {
        ...             "$vector": [0.9, 0.8, 0.7],
        ...             "$lexical": "Weekdays?",
        ...         },
        ...     },
        ...     limit=2,
        ...     rerank_on="wkd",
        ...     rerank_query="week days",
        ... ):
        ...     print(r_result.document["wkd"])
        ...
        Mon
        Tue
    """

    # lazy-import here to avoid circular import issues
    from astrapy.cursors import CollectionFindAndRerankCursor

    _request_timeout_ms, _rt_label = _first_valid_timeout(
        (request_timeout_ms, "request_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
    )
    return (
        CollectionFindAndRerankCursor(
            collection=self,
            request_timeout_ms=_request_timeout_ms,
            overall_timeout_ms=None,
            request_timeout_label=_rt_label,
        )
        .filter(filter)
        .project(projection)
        .limit(limit)
        .sort(sort)
        .hybrid_limits(hybrid_limits)
        .rerank_on(rerank_on)
        .rerank_query(rerank_query)
        .include_scores(include_scores)
        .include_sort_vector(include_sort_vector)
    )

Run a search, returning the first document in the collection that matches provided filters, if any is found.

Args

filter: a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
projection: it controls which parts of the document are returned. It can be an allow-list: {"f1": True, "f2": True}, or a deny-list: {"fx": False, "fy": False}, but not a mixture (except for the _id and other special fields, which can be associated to both True or False independently of the rest of the specification). The special star-projections {"*": True} and {"*": False} have the effect of returning the whole document and {} respectively. For lists in documents, slice directives can be passed to select portions of the list: for instance, {"array": {"$slice": 2}}, {"array": {"$slice": -2}}, {"array": {"$slice": [4, 2]}} or {"array": {"$slice": [-4, 2]}}. An iterable over strings will be treated implicitly as an allow-list. The default projection (used if this parameter is not passed) does not necessarily include "special" fields such as $vector or $vectorize. See the Data API documentation for more on projections.
include_similarity: a boolean to request the numeric value of the similarity to be returned as an added "$similarity" key in the returned document. It can be used meaningfully only in a vector search (see sort).
sort: with this dictionary parameter one can control the order the documents are returned. See the Note about sorting for details. Vector-based ANN sorting is achieved by providing a "$vector" or a "$vectorize" key in sort.
general_method_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for general_method_timeout_ms.
timeout_ms: an alias for general_method_timeout_ms.

Returns

a dictionary expressing the required document, otherwise None.

Examples

>>> my_coll.find_one({})
{'_id': '68d1e515-...', 'seq': 37}
>>> my_coll.find_one({"seq": 10})
{'_id': 'd560e217-...', 'seq': 10}
>>> my_coll.find_one({"seq": 1011})
>>> # (returns None for no matches)
>>> my_coll.find_one({}, projection={"seq": False})
{'_id': '68d1e515-...'}
>>> my_coll.find_one(
...     {},
...     sort={"seq": astrapy.constants.SortMode.DESCENDING},
... )
{'_id': '97e85f81-...', 'seq': 69}
>>> my_coll.find_one({}, sort={"$vector": [1, 0]}, projection={"*": True})
{'_id': '...', 'tag': 'D', '$vector': [4.0, 1.0]}

Note

See the find method for more details on the accepted parameters (whereas skip and limit are not valid parameters for find_one).

Expand source code

def find_one(
    self,
    filter: FilterType | None = None,
    *,
    projection: ProjectionType | None = None,
    include_similarity: bool | None = None,
    sort: SortType | None = None,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> DOC | None:
    """
    Run a search, returning the first document in the collection that matches
    provided filters, if any is found.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        projection: it controls which parts of the document are returned.
            It can be an allow-list: `{"f1": True, "f2": True}`,
            or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
            (except for the `_id` and other special fields, which can be
            associated to both True or False independently of the rest
            of the specification).
            The special star-projections `{"*": True}` and `{"*": False}`
            have the effect of returning the whole document and `{}` respectively.
            For lists in documents, slice directives can be passed to select
            portions of the list: for instance, `{"array": {"$slice": 2}}`,
            `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
            `{"array": {"$slice": [-4, 2]}}`.
            An iterable over strings will be treated implicitly as an allow-list.
            The default projection (used if this parameter is not passed) does not
            necessarily include "special" fields such as `$vector` or `$vectorize`.
            See the Data API documentation for more on projections.
        include_similarity: a boolean to request the numeric value of the
            similarity to be returned as an added "$similarity" key in the
            returned document. It can be used meaningfully only in a vector
            search (see `sort`).
        sort: with this dictionary parameter one can control the order
            the documents are returned. See the Note about sorting for details.
            Vector-based ANN sorting is achieved by providing a "$vector"
            or a "$vectorize" key in `sort`.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a dictionary expressing the required document, otherwise None.

    Examples:
        >>> my_coll.find_one({})
        {'_id': '68d1e515-...', 'seq': 37}
        >>> my_coll.find_one({"seq": 10})
        {'_id': 'd560e217-...', 'seq': 10}
        >>> my_coll.find_one({"seq": 1011})
        >>> # (returns None for no matches)
        >>> my_coll.find_one({}, projection={"seq": False})
        {'_id': '68d1e515-...'}
        >>> my_coll.find_one(
        ...     {},
        ...     sort={"seq": astrapy.constants.SortMode.DESCENDING},
        ... )
        {'_id': '97e85f81-...', 'seq': 69}
        >>> my_coll.find_one({}, sort={"$vector": [1, 0]}, projection={"*": True})
        {'_id': '...', 'tag': 'D', '$vector': [4.0, 1.0]}

    Note:
        See the `find` method for more details on the accepted parameters
        (whereas `skip` and `limit` are not valid parameters for `find_one`).
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    fo_options = (
        None
        if include_similarity is None
        else {"includeSimilarity": include_similarity}
    )
    fo_payload = {
        "findOne": {
            k: v
            for k, v in {
                "filter": filter,
                "projection": normalize_optional_projection(projection),
                "options": fo_options,
                "sort": sort,
            }.items()
            if v is not None
        }
    }
    fo_response = self._converted_request(
        payload=fo_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    if "document" not in (fo_response.get("data") or {}):
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from findOne API command.",
            raw_response=fo_response,
        )
    doc_response = fo_response["data"]["document"]
    if doc_response is None:
        return None
    return fo_response["data"]["document"]  # type: ignore[no-any-return]

def find_one_and_delete(self, filter: FilterType, *, projection: ProjectionType | None = None, sort: SortType | None = None, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> Optional[~DOC]

Find a document in the collection and delete it. The deleted document, however, is the return value of the method.

Args

filter: a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
projection: it controls which parts of the document are returned. It can be an allow-list: {"f1": True, "f2": True}, or a deny-list: {"fx": False, "fy": False}, but not a mixture (except for the _id and other special fields, which can be associated to both True or False independently of the rest of the specification). The special star-projections {"*": True} and {"*": False} have the effect of returning the whole document and {} respectively. For lists in documents, slice directives can be passed to select portions of the list: for instance, {"array": {"$slice": 2}}, {"array": {"$slice": -2}}, {"array": {"$slice": [4, 2]}} or {"array": {"$slice": [-4, 2]}}. An iterable over strings will be treated implicitly as an allow-list. The default projection (used if this parameter is not passed) does not necessarily include "special" fields such as $vector or $vectorize. See the Data API documentation for more on projections.
sort: with this dictionary parameter one can control the sorting order of the documents matching the filter, effectively determining what document will come first and hence be the deleted one. See the find method for more on sorting.
general_method_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for general_method_timeout_ms.
timeout_ms: an alias for general_method_timeout_ms.

Returns

Either the document (or a projection thereof, as requested), or None if no matches were found in the first place.

Example

>>> my_coll.insert_many(
...     [
...         {"species": "swan", "class": "Aves"},
...         {"species": "frog", "class": "Amphibia"},
...     ],
... )
CollectionInsertManyResult(...)
>>> my_coll.find_one_and_delete(
...     {"species": {"$ne": "frog"}},
...     projection=["species"],
... )
{'_id': '5997fb48-...', 'species': 'swan'}
>>> my_coll.find_one_and_delete({"species": {"$ne": "frog"}})
>>> # (returns None for no matches)

Expand source code

def find_one_and_delete(
    self,
    filter: FilterType,
    *,
    projection: ProjectionType | None = None,
    sort: SortType | None = None,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> DOC | None:
    """
    Find a document in the collection and delete it. The deleted document,
    however, is the return value of the method.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        projection: it controls which parts of the document are returned.
            It can be an allow-list: `{"f1": True, "f2": True}`,
            or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
            (except for the `_id` and other special fields, which can be
            associated to both True or False independently of the rest
            of the specification).
            The special star-projections `{"*": True}` and `{"*": False}`
            have the effect of returning the whole document and `{}` respectively.
            For lists in documents, slice directives can be passed to select
            portions of the list: for instance, `{"array": {"$slice": 2}}`,
            `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
            `{"array": {"$slice": [-4, 2]}}`.
            An iterable over strings will be treated implicitly as an allow-list.
            The default projection (used if this parameter is not passed) does not
            necessarily include "special" fields such as `$vector` or `$vectorize`.
            See the Data API documentation for more on projections.
        sort: with this dictionary parameter one can control the sorting
            order of the documents matching the filter, effectively
            determining what document will come first and hence be the
            deleted one. See the `find` method for more on sorting.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        Either the document (or a projection thereof, as requested), or None
        if no matches were found in the first place.

    Example:
        >>> my_coll.insert_many(
        ...     [
        ...         {"species": "swan", "class": "Aves"},
        ...         {"species": "frog", "class": "Amphibia"},
        ...     ],
        ... )
        CollectionInsertManyResult(...)
        >>> my_coll.find_one_and_delete(
        ...     {"species": {"$ne": "frog"}},
        ...     projection=["species"],
        ... )
        {'_id': '5997fb48-...', 'species': 'swan'}
        >>> my_coll.find_one_and_delete({"species": {"$ne": "frog"}})
        >>> # (returns None for no matches)
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    _projection = normalize_optional_projection(projection)
    fo_payload = {
        "findOneAndDelete": {
            k: v
            for k, v in {
                "filter": filter,
                "sort": sort,
                "projection": _projection,
            }.items()
            if v is not None
        }
    }
    logger.info(f"findOneAndDelete on '{self.name}'")
    fo_response = self._converted_request(
        payload=fo_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished findOneAndDelete on '{self.name}'")
    if "document" in fo_response.get("data", {}):
        document = fo_response["data"]["document"]
        return document  # type: ignore[no-any-return]
    else:
        deleted_count = fo_response.get("status", {}).get("deletedCount")
        if deleted_count == 0:
            return None
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from find_one_and_delete API command.",
                raw_response=fo_response,
            )

def find_one_and_replace(self, filter: FilterType, replacement: DOC, *, projection: ProjectionType | None = None, sort: SortType | None = None, upsert: bool = False, return_document: str = 'before', general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> Optional[~DOC]

Find a document on the collection and replace it entirely with a new one, optionally inserting a new one if no match is found.

Args

filter: a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
replacement: the new document to write into the collection.
projection: it controls which parts of the document are returned. It can be an allow-list: {"f1": True, "f2": True}, or a deny-list: {"fx": False, "fy": False}, but not a mixture (except for the _id and other special fields, which can be associated to both True or False independently of the rest of the specification). The special star-projections {"*": True} and {"*": False} have the effect of returning the whole document and {} respectively. For lists in documents, slice directives can be passed to select portions of the list: for instance, {"array": {"$slice": 2}}, {"array": {"$slice": -2}}, {"array": {"$slice": [4, 2]}} or {"array": {"$slice": [-4, 2]}}. An iterable over strings will be treated implicitly as an allow-list. The default projection (used if this parameter is not passed) does not necessarily include "special" fields such as $vector or $vectorize. See the Data API documentation for more on projections.
sort: with this dictionary parameter one can control the sorting order of the documents matching the filter, effectively determining what document will come first and hence be the replaced one. See the find method for more on sorting. Vector-based ANN sorting is achieved by providing a "$vector" or a "$vectorize" key in sort.
upsert: this parameter controls the behavior in absence of matches. If True, replacement is inserted as a new document if no matches are found on the collection. If False, the operation silently does nothing in case of no matches.
return_document: a flag controlling what document is returned: if set to ReturnDocument.BEFORE, or the string "before", the document found on database is returned; if set to ReturnDocument.AFTER, or the string "after", the new document is returned. The default is "before".
general_method_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for general_method_timeout_ms.
timeout_ms: an alias for general_method_timeout_ms.

Returns

A document (or a projection thereof, as required), either the one before the replace operation or the one after that. Alternatively, the method returns None to represent that no matching document was found, or that no replacement was inserted (depending on the return_document parameter).

Example

>>> my_coll.insert_one({"_id": "rule1", "text": "all animals are equal"})
CollectionInsertOneResult(...)
>>> my_coll.find_one_and_replace(
...     {"_id": "rule1"},
...     {"text": "some animals are more equal!"},
... )
{'_id': 'rule1', 'text': 'all animals are equal'}
>>> my_coll.find_one_and_replace(
...     {"text": "some animals are more equal!"},
...     {"text": "and the pigs are the rulers"},
...     return_document=astrapy.constants.ReturnDocument.AFTER,
... )
{'_id': 'rule1', 'text': 'and the pigs are the rulers'}
>>> my_coll.find_one_and_replace(
...     {"_id": "rule2"},
...     {"text": "F=ma^2"},
...     return_document=astrapy.constants.ReturnDocument.AFTER,
... )
>>> # (returns None for no matches)
>>> my_coll.find_one_and_replace(
...     {"_id": "rule2"},
...     {"text": "F=ma"},
...     upsert=True,
...     return_document=astrapy.constants.ReturnDocument.AFTER,
...     projection={"_id": False},
... )
{'text': 'F=ma'}

Expand source code

def find_one_and_replace(
    self,
    filter: FilterType,
    replacement: DOC,
    *,
    projection: ProjectionType | None = None,
    sort: SortType | None = None,
    upsert: bool = False,
    return_document: str = ReturnDocument.BEFORE,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> DOC | None:
    """
    Find a document on the collection and replace it entirely with a new one,
    optionally inserting a new one if no match is found.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        replacement: the new document to write into the collection.
        projection: it controls which parts of the document are returned.
            It can be an allow-list: `{"f1": True, "f2": True}`,
            or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
            (except for the `_id` and other special fields, which can be
            associated to both True or False independently of the rest
            of the specification).
            The special star-projections `{"*": True}` and `{"*": False}`
            have the effect of returning the whole document and `{}` respectively.
            For lists in documents, slice directives can be passed to select
            portions of the list: for instance, `{"array": {"$slice": 2}}`,
            `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
            `{"array": {"$slice": [-4, 2]}}`.
            An iterable over strings will be treated implicitly as an allow-list.
            The default projection (used if this parameter is not passed) does not
            necessarily include "special" fields such as `$vector` or `$vectorize`.
            See the Data API documentation for more on projections.
        sort: with this dictionary parameter one can control the sorting
            order of the documents matching the filter, effectively
            determining what document will come first and hence be the
            replaced one. See the `find` method for more on sorting.
            Vector-based ANN sorting is achieved by providing a "$vector"
            or a "$vectorize" key in `sort`.
        upsert: this parameter controls the behavior in absence of matches.
            If True, `replacement` is inserted as a new document
            if no matches are found on the collection. If False,
            the operation silently does nothing in case of no matches.
        return_document: a flag controlling what document is returned:
            if set to `ReturnDocument.BEFORE`, or the string "before",
            the document found on database is returned; if set to
            `ReturnDocument.AFTER`, or the string "after", the new
            document is returned. The default is "before".
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        A document (or a projection thereof, as required), either the one
        before the replace operation or the one after that.
        Alternatively, the method returns None to represent
        that no matching document was found, or that no replacement
        was inserted (depending on the `return_document` parameter).

    Example:
        >>> my_coll.insert_one({"_id": "rule1", "text": "all animals are equal"})
        CollectionInsertOneResult(...)
        >>> my_coll.find_one_and_replace(
        ...     {"_id": "rule1"},
        ...     {"text": "some animals are more equal!"},
        ... )
        {'_id': 'rule1', 'text': 'all animals are equal'}
        >>> my_coll.find_one_and_replace(
        ...     {"text": "some animals are more equal!"},
        ...     {"text": "and the pigs are the rulers"},
        ...     return_document=astrapy.constants.ReturnDocument.AFTER,
        ... )
        {'_id': 'rule1', 'text': 'and the pigs are the rulers'}
        >>> my_coll.find_one_and_replace(
        ...     {"_id": "rule2"},
        ...     {"text": "F=ma^2"},
        ...     return_document=astrapy.constants.ReturnDocument.AFTER,
        ... )
        >>> # (returns None for no matches)
        >>> my_coll.find_one_and_replace(
        ...     {"_id": "rule2"},
        ...     {"text": "F=ma"},
        ...     upsert=True,
        ...     return_document=astrapy.constants.ReturnDocument.AFTER,
        ...     projection={"_id": False},
        ... )
        {'text': 'F=ma'}
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    options = {
        "returnDocument": return_document,
        "upsert": upsert,
    }
    fo_payload = {
        "findOneAndReplace": {
            k: v
            for k, v in {
                "filter": filter,
                "projection": normalize_optional_projection(projection),
                "replacement": replacement,
                "options": options,
                "sort": sort,
            }.items()
            if v is not None
        }
    }
    logger.info(f"findOneAndReplace on '{self.name}'")
    fo_response = self._converted_request(
        payload=fo_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished findOneAndReplace on '{self.name}'")
    if "document" in fo_response.get("data", {}):
        ret_document = fo_response.get("data", {}).get("document")
        if ret_document is None:
            return None
        else:
            return ret_document  # type: ignore[no-any-return]
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from find_one_and_replace API command.",
            raw_response=fo_response,
        )

def find_one_and_update(self, filter: FilterType, update: dict[str, Any], *, projection: ProjectionType | None = None, sort: SortType | None = None, upsert: bool = False, return_document: str = 'before', general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> Optional[~DOC]

Find a document on the collection and update it as requested, optionally inserting a new one if no match is found.

Args

filter: a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
update: the update prescription to apply to the document, expressed as a dictionary as per Data API syntax. Examples are: {"$set": {"field": "value}} {"$inc": {"counter": 10}} {"$unset": {"field": ""}} See the Data API documentation for the full syntax.
projection: it controls which parts of the document are returned. It can be an allow-list: {"f1": True, "f2": True}, or a deny-list: {"fx": False, "fy": False}, but not a mixture (except for the _id and other special fields, which can be associated to both True or False independently of the rest of the specification). The special star-projections {"*": True} and {"*": False} have the effect of returning the whole document and {} respectively. For lists in documents, slice directives can be passed to select portions of the list: for instance, {"array": {"$slice": 2}}, {"array": {"$slice": -2}}, {"array": {"$slice": [4, 2]}} or {"array": {"$slice": [-4, 2]}}. An iterable over strings will be treated implicitly as an allow-list. The default projection (used if this parameter is not passed) does not necessarily include "special" fields such as $vector or $vectorize. See the Data API documentation for more on projections.
sort: with this dictionary parameter one can control the sorting order of the documents matching the filter, effectively determining what document will come first and hence be the replaced one. See the find method for more on sorting. Vector-based ANN sorting is achieved by providing a "$vector" or a "$vectorize" key in sort.
upsert: this parameter controls the behavior in absence of matches. If True, a new document (resulting from applying the update to an empty document) is inserted if no matches are found on the collection. If False, the operation silently does nothing in case of no matches.
return_document: a flag controlling what document is returned: if set to ReturnDocument.BEFORE, or the string "before", the document found on database is returned; if set to ReturnDocument.AFTER, or the string "after", the new document is returned. The default is "before".
general_method_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for general_method_timeout_ms.
timeout_ms: an alias for general_method_timeout_ms.

Returns

Example

>>> my_coll.insert_one({"Marco": "Polo"})
CollectionInsertOneResult(...)
>>> my_coll.find_one_and_update(
...     {"Marco": {"$exists": True}},
...     {"$set": {"title": "Mr."}},
... )
{'_id': 'a80106f2-...', 'Marco': 'Polo'}
>>> my_coll.find_one_and_update(
...     {"title": "Mr."},
...     {"$inc": {"rank": 3}},
...     projection=["title", "rank"],
...     return_document=astrapy.constants.ReturnDocument.AFTER,
... )
{'_id': 'a80106f2-...', 'title': 'Mr.', 'rank': 3}
>>> my_coll.find_one_and_update(
...     {"name": "Johnny"},
...     {"$set": {"rank": 0}},
...     return_document=astrapy.constants.ReturnDocument.AFTER,
... )
>>> # (returns None for no matches)
>>> my_coll.find_one_and_update(
...     {"name": "Johnny"},
...     {"$set": {"rank": 0}},
...     upsert=True,
...     return_document=astrapy.constants.ReturnDocument.AFTER,
... )
{'_id': 'cb4ef2ab-...', 'name': 'Johnny', 'rank': 0}

Expand source code

def find_one_and_update(
    self,
    filter: FilterType,
    update: dict[str, Any],
    *,
    projection: ProjectionType | None = None,
    sort: SortType | None = None,
    upsert: bool = False,
    return_document: str = ReturnDocument.BEFORE,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> DOC | None:
    """
    Find a document on the collection and update it as requested,
    optionally inserting a new one if no match is found.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        update: the update prescription to apply to the document, expressed
            as a dictionary as per Data API syntax. Examples are:
                {"$set": {"field": "value}}
                {"$inc": {"counter": 10}}
                {"$unset": {"field": ""}}
            See the Data API documentation for the full syntax.
        projection: it controls which parts of the document are returned.
            It can be an allow-list: `{"f1": True, "f2": True}`,
            or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
            (except for the `_id` and other special fields, which can be
            associated to both True or False independently of the rest
            of the specification).
            The special star-projections `{"*": True}` and `{"*": False}`
            have the effect of returning the whole document and `{}` respectively.
            For lists in documents, slice directives can be passed to select
            portions of the list: for instance, `{"array": {"$slice": 2}}`,
            `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
            `{"array": {"$slice": [-4, 2]}}`.
            An iterable over strings will be treated implicitly as an allow-list.
            The default projection (used if this parameter is not passed) does not
            necessarily include "special" fields such as `$vector` or `$vectorize`.
            See the Data API documentation for more on projections.
        sort: with this dictionary parameter one can control the sorting
            order of the documents matching the filter, effectively
            determining what document will come first and hence be the
            replaced one. See the `find` method for more on sorting.
            Vector-based ANN sorting is achieved by providing a "$vector"
            or a "$vectorize" key in `sort`.
        upsert: this parameter controls the behavior in absence of matches.
            If True, a new document (resulting from applying the `update`
            to an empty document) is inserted if no matches are found on
            the collection. If False, the operation silently does nothing
            in case of no matches.
        return_document: a flag controlling what document is returned:
            if set to `ReturnDocument.BEFORE`, or the string "before",
            the document found on database is returned; if set to
            `ReturnDocument.AFTER`, or the string "after", the new
            document is returned. The default is "before".
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        A document (or a projection thereof, as required), either the one
        before the replace operation or the one after that.
        Alternatively, the method returns None to represent
        that no matching document was found, or that no update
        was applied (depending on the `return_document` parameter).

    Example:
        >>> my_coll.insert_one({"Marco": "Polo"})
        CollectionInsertOneResult(...)
        >>> my_coll.find_one_and_update(
        ...     {"Marco": {"$exists": True}},
        ...     {"$set": {"title": "Mr."}},
        ... )
        {'_id': 'a80106f2-...', 'Marco': 'Polo'}
        >>> my_coll.find_one_and_update(
        ...     {"title": "Mr."},
        ...     {"$inc": {"rank": 3}},
        ...     projection=["title", "rank"],
        ...     return_document=astrapy.constants.ReturnDocument.AFTER,
        ... )
        {'_id': 'a80106f2-...', 'title': 'Mr.', 'rank': 3}
        >>> my_coll.find_one_and_update(
        ...     {"name": "Johnny"},
        ...     {"$set": {"rank": 0}},
        ...     return_document=astrapy.constants.ReturnDocument.AFTER,
        ... )
        >>> # (returns None for no matches)
        >>> my_coll.find_one_and_update(
        ...     {"name": "Johnny"},
        ...     {"$set": {"rank": 0}},
        ...     upsert=True,
        ...     return_document=astrapy.constants.ReturnDocument.AFTER,
        ... )
        {'_id': 'cb4ef2ab-...', 'name': 'Johnny', 'rank': 0}
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    options = {
        "returnDocument": return_document,
        "upsert": upsert,
    }
    fo_payload = {
        "findOneAndUpdate": {
            k: v
            for k, v in {
                "filter": filter,
                "update": update,
                "options": options,
                "sort": sort,
                "projection": normalize_optional_projection(projection),
            }.items()
            if v is not None
        }
    }
    logger.info(f"findOneAndUpdate on '{self.name}'")
    fo_response = self._converted_request(
        payload=fo_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished findOneAndUpdate on '{self.name}'")
    if "document" in fo_response.get("data", {}):
        ret_document = fo_response.get("data", {}).get("document")
        if ret_document is None:
            return None
        else:
            return ret_document  # type: ignore[no-any-return]
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from find_one_and_update API command.",
            raw_response=fo_response,
        )

def info(self, *, database_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> CollectionInfo

Information on the collection (name, location, database), in the form of a CollectionInfo object.

Not to be confused with the collection options method (related to the collection internal configuration).

Args

database_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying DevOps API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for database_admin_timeout_ms.
timeout_ms: an alias for database_admin_timeout_ms.

Example

>>> my_coll.info().database_info.region
'eu-west-1'
>>> my_coll.info().full_name
'default_keyspace.my_v_collection'

Note

Expand source code

def info(
    self,
    *,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> CollectionInfo:
    """
    Information on the collection (name, location, database), in the
    form of a CollectionInfo object.

    Not to be confused with the collection `options` method (related
    to the collection internal configuration).

    Args:
        database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying DevOps API request.
            If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `database_admin_timeout_ms`.
        timeout_ms: an alias for `database_admin_timeout_ms`.

    Example:
        >>> my_coll.info().database_info.region
        'eu-west-1'
        >>> my_coll.info().full_name
        'default_keyspace.my_v_collection'

    Note:
        the returned CollectionInfo wraps, among other things,
        the database information: as such, calling this method
        triggers the same-named method of a Database object (which, in turn,
        performs a HTTP request to the DevOps API).
        See the documentation for `Database.info()` for more details.
    """

    return CollectionInfo(
        database_info=self.database.info(
            database_admin_timeout_ms=database_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        ),
        keyspace=self.keyspace,
        name=self.name,
        full_name=self.full_name,
    )

def insert_many(self, documents: Iterable[DOC], *, ordered: bool = False, chunk_size: int | None = None, concurrency: int | None = None, request_timeout_ms: int | None = None, general_method_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> CollectionInsertManyResult

Insert a list of documents into the collection. This is not an atomic operation.

Args

documents: an iterable of dictionaries, each a document to insert. Documents may specify their _id field or leave it out, in which case it will be added automatically.
ordered: if False (default), the insertions can occur in arbitrary order and possibly concurrently. If True, they are processed sequentially. If there are no specific reasons against it, unordered insertions are to be preferred as they complete much faster.
chunk_size: how many documents to include in a single API request. Exceeding the server maximum allowed value results in an error. Leave it unspecified (recommended) to use the system default.
concurrency: maximum number of concurrent requests to the API at a given time. It cannot be more than one for ordered insertions.
general_method_timeout_ms: a timeout, in milliseconds, for the whole requested operation (which may involve multiple API requests). If not passed, the collection-level setting is used instead.
request_timeout_ms: a timeout, in milliseconds, for each API request. If not passed, the collection-level setting is used instead.
timeout_ms: an alias for general_method_timeout_ms.

Returns

a CollectionInsertManyResult object.

Examples

>>> my_coll.count_documents({}, upper_bound=10)
0
>>> my_coll.insert_many(
...     [{"a": 10}, {"a": 5}, {"b": [True, False, False]}],
...     ordered=True,
... )
CollectionInsertManyResult(raw_results=..., inserted_ids=['184bb06f-...', '...', '...'])
>>> my_coll.count_documents({}, upper_bound=100)
3
>>> my_coll.insert_many(
...     [{"seq": i} for i in range(50)],
...     concurrency=5,
... )
CollectionInsertManyResult(raw_results=..., inserted_ids=[... ...])
>>> my_coll.count_documents({}, upper_bound=100)
53
>>> my_coll.insert_many(
...     [
...         {"tag": "a", "$vector": [1, 2]},
...         {"tag": "b", "$vector": [3, 4]},
...     ]
... )
CollectionInsertManyResult(...)

Note

Unordered insertions are executed with some degree of concurrency, so it is usually better to prefer this mode unless the order in the document sequence is important.

Note

For an ordered insertion, the method will raise an exception at the first such faulty document – nevertheless, all documents processed until then will end up being written to the database.

Expand source code

def insert_many(
    self,
    documents: Iterable[DOC],
    *,
    ordered: bool = False,
    chunk_size: int | None = None,
    concurrency: int | None = None,
    request_timeout_ms: int | None = None,
    general_method_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> CollectionInsertManyResult:
    """
    Insert a list of documents into the collection.
    This is not an atomic operation.

    Args:
        documents: an iterable of dictionaries, each a document to insert.
            Documents may specify their `_id` field or leave it out, in which
            case it will be added automatically.
        ordered: if False (default), the insertions can occur in arbitrary order
            and possibly concurrently. If True, they are processed sequentially.
            If there are no specific reasons against it, unordered insertions are to
            be preferred as they complete much faster.
        chunk_size: how many documents to include in a single API request.
            Exceeding the server maximum allowed value results in an error.
            Leave it unspecified (recommended) to use the system default.
        concurrency: maximum number of concurrent requests to the API at
            a given time. It cannot be more than one for ordered insertions.
        general_method_timeout_ms: a timeout, in milliseconds, for the whole
            requested operation (which may involve multiple API requests).
            If not passed, the collection-level setting is used instead.
        request_timeout_ms: a timeout, in milliseconds, for each API request.
            If not passed, the collection-level setting is used instead.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a CollectionInsertManyResult object.

    Examples:
        >>> my_coll.count_documents({}, upper_bound=10)
        0
        >>> my_coll.insert_many(
        ...     [{"a": 10}, {"a": 5}, {"b": [True, False, False]}],
        ...     ordered=True,
        ... )
        CollectionInsertManyResult(raw_results=..., inserted_ids=['184bb06f-...', '...', '...'])
        >>> my_coll.count_documents({}, upper_bound=100)
        3
        >>> my_coll.insert_many(
        ...     [{"seq": i} for i in range(50)],
        ...     concurrency=5,
        ... )
        CollectionInsertManyResult(raw_results=..., inserted_ids=[... ...])
        >>> my_coll.count_documents({}, upper_bound=100)
        53
        >>> my_coll.insert_many(
        ...     [
        ...         {"tag": "a", "$vector": [1, 2]},
        ...         {"tag": "b", "$vector": [3, 4]},
        ...     ]
        ... )
        CollectionInsertManyResult(...)

    Note:
        Unordered insertions are executed with some degree of concurrency,
        so it is usually better to prefer this mode unless the order in the
        document sequence is important.

    Note:
        A failure mode for this command is related to certain faulty documents
        found among those to insert: for example, a document may have an ID
        already found on the collection, or its vector dimension may not
        match the collection setting.

        For an ordered insertion, the method will raise an exception at
        the first such faulty document -- nevertheless, all documents processed
        until then will end up being written to the database.

        For unordered insertions, if the error stems from faulty documents
        the insertion proceeds until exhausting the input documents: then,
        an exception is raised -- and all insertable documents will have been
        written to the database, including those "after" the troublesome ones.

        Errors occurring during an insert_many operation, for that reason,
        may result in a `CollectionInsertManyException` being raised.
        This exception allows to inspect the list of document IDs that were
        successfully inserted, while accessing at the same time the underlying
        "root errors" that made the full method call to fail.
    """

    _general_method_timeout_ms, _gmt_label = _first_valid_timeout(
        (general_method_timeout_ms, "general_method_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (
            self.api_options.timeout_options.general_method_timeout_ms,
            "general_method_timeout_ms",
        ),
    )
    _request_timeout_ms, _rt_label = _first_valid_timeout(
        (request_timeout_ms, "request_timeout_ms"),
        (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
    )
    if concurrency is None:
        if ordered:
            _concurrency = 1
        else:
            _concurrency = DEFAULT_INSERT_MANY_CONCURRENCY
    else:
        _concurrency = concurrency
    if _concurrency > 1 and ordered:
        raise ValueError("Cannot run ordered insert_many concurrently.")
    if chunk_size is None:
        _chunk_size = DEFAULT_INSERT_MANY_CHUNK_SIZE
    else:
        _chunk_size = chunk_size
    _documents = list(documents)
    logger.info(f"inserting {len(_documents)} documents in '{self.name}'")
    raw_results: list[dict[str, Any]] = []
    im_payloads: list[dict[str, Any]] = []
    timeout_manager = MultiCallTimeoutManager(
        overall_timeout_ms=_general_method_timeout_ms,
        timeout_label=_gmt_label,
    )
    if ordered:
        options = {"ordered": True, "returnDocumentResponses": True}
        inserted_ids: list[Any] = []
        for i in range(0, len(_documents), _chunk_size):
            im_payload = {
                "insertMany": {
                    "documents": _documents[i : i + _chunk_size],
                    "options": options,
                },
            }
            logger.info(f"insertMany(chunk) on '{self.name}'")
            chunk_response = self._converted_request(
                payload=im_payload,
                raise_api_errors=False,
                timeout_context=timeout_manager.remaining_timeout(
                    cap_time_ms=_request_timeout_ms,
                    cap_timeout_label=_rt_label,
                ),
            )
            logger.info(f"finished insertMany(chunk) on '{self.name}'")
            # accumulate the results in this call
            chunk_inserted_ids = [
                doc_resp["_id"]
                for doc_resp in (chunk_response.get("status") or {}).get(
                    "documentResponses", []
                )
                if doc_resp["status"] == "OK"
            ]
            inserted_ids += chunk_inserted_ids
            raw_results += [chunk_response]
            im_payloads += [im_payload]
            # if errors, quit early
            if chunk_response.get("errors", []):
                response_exception = DataAPIResponseException.from_response(
                    command=im_payload,
                    raw_response=chunk_response,
                )
                raise CollectionInsertManyException(
                    inserted_ids=inserted_ids, exceptions=[response_exception]
                )

        # return
        full_result = CollectionInsertManyResult(
            raw_results=raw_results,
            inserted_ids=inserted_ids,
        )
        logger.info(
            f"finished inserting {len(_documents)} documents in '{self.name}'"
        )
        return full_result

    else:
        # unordered: concurrent or not, do all of them and parse the results
        options = {"ordered": False, "returnDocumentResponses": True}
        if _concurrency > 1:
            with ThreadPoolExecutor(max_workers=_concurrency) as executor:

                def _chunk_insertor(
                    document_chunk: list[dict[str, Any]],
                ) -> tuple[dict[str, Any], dict[str, Any]]:
                    im_payload = {
                        "insertMany": {
                            "documents": document_chunk,
                            "options": options,
                        },
                    }
                    logger.info(f"insertMany(chunk) on '{self.name}'")
                    im_response = self._converted_request(
                        payload=im_payload,
                        raise_api_errors=False,
                        timeout_context=timeout_manager.remaining_timeout(
                            cap_time_ms=_request_timeout_ms,
                            cap_timeout_label=_rt_label,
                        ),
                    )
                    logger.info(f"finished insertMany(chunk) on '{self.name}'")
                    return im_payload, im_response

                raw_pl_results_pairs = list(
                    executor.map(
                        _chunk_insertor,
                        (
                            _documents[i : i + _chunk_size]
                            for i in range(0, len(_documents), _chunk_size)
                        ),
                    )
                )
                if raw_pl_results_pairs:
                    im_payloads, raw_results = list(zip(*raw_pl_results_pairs))
                else:
                    im_payloads, raw_results = [], []

        else:
            for i in range(0, len(_documents), _chunk_size):
                im_payload = {
                    "insertMany": {
                        "documents": _documents[i : i + _chunk_size],
                        "options": options,
                    },
                }
                logger.info(f"insertMany(chunk) on '{self.name}'")
                im_response = self._converted_request(
                    payload=im_payload,
                    raise_api_errors=False,
                    timeout_context=timeout_manager.remaining_timeout(
                        cap_time_ms=_request_timeout_ms,
                        cap_timeout_label=_rt_label,
                    ),
                )
                logger.info(f"finished insertMany(chunk) on '{self.name}'")
                raw_results.append(im_response)
                im_payloads.append(im_payload)
        # recast raw_results
        inserted_ids = [
            doc_resp["_id"]
            for chunk_response in raw_results
            for doc_resp in (chunk_response.get("status") or {}).get(
                "documentResponses", []
            )
            if doc_resp["status"] == "OK"
        ]

        # check-raise
        response_exceptions = [
            DataAPIResponseException.from_response(
                command=chunk_payload,
                raw_response=chunk_response,
            )
            for chunk_payload, chunk_response in zip(im_payloads, raw_results)
            if chunk_response.get("errors", [])
        ]
        if response_exceptions:
            raise CollectionInsertManyException(
                inserted_ids=inserted_ids,
                exceptions=response_exceptions,
            )

        # return
        full_result = CollectionInsertManyResult(
            raw_results=raw_results,
            inserted_ids=inserted_ids,
        )
        logger.info(
            f"finished inserting {len(_documents)} documents in '{self.name}'"
        )
        return full_result

def insert_one(self, document: DOC, *, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> CollectionInsertOneResult

Insert a single document in the collection in an atomic operation.

Args

document: the dictionary expressing the document to insert. The _id field of the document can be left out, in which case it will be created automatically.
general_method_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for general_method_timeout_ms.
timeout_ms: an alias for general_method_timeout_ms.

Returns

a CollectionInsertOneResult object.

Examples

>>> my_coll.count_documents({}, upper_bound=10)
0
>>> my_coll.insert_one(
...     {
...         "age": 30,
...         "name": "Smith",
...         "food": ["pear", "peach"],
...         "likes_fruit": True,
...     },
... )
CollectionInsertOneResult(raw_results=..., inserted_id='ed4587a4-...-...-...')
>>> my_coll.insert_one({"_id": "user-123", "age": 50, "name": "Maccio"})
CollectionInsertOneResult(raw_results=..., inserted_id='user-123')
>>> my_coll.count_documents({}, upper_bound=10)
2

>>> my_coll.insert_one({"tag": "v", "$vector": [10, 11]})
CollectionInsertOneResult(...)

Note

If an _id is explicitly provided, which corresponds to a document that exists already in the collection, an error is raised and the insertion fails.

Expand source code

def insert_one(
    self,
    document: DOC,
    *,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> CollectionInsertOneResult:
    """
    Insert a single document in the collection in an atomic operation.

    Args:
        document: the dictionary expressing the document to insert.
            The `_id` field of the document can be left out, in which
            case it will be created automatically.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a CollectionInsertOneResult object.

    Examples:
        >>> my_coll.count_documents({}, upper_bound=10)
        0
        >>> my_coll.insert_one(
        ...     {
        ...         "age": 30,
        ...         "name": "Smith",
        ...         "food": ["pear", "peach"],
        ...         "likes_fruit": True,
        ...     },
        ... )
        CollectionInsertOneResult(raw_results=..., inserted_id='ed4587a4-...-...-...')
        >>> my_coll.insert_one({"_id": "user-123", "age": 50, "name": "Maccio"})
        CollectionInsertOneResult(raw_results=..., inserted_id='user-123')
        >>> my_coll.count_documents({}, upper_bound=10)
        2

        >>> my_coll.insert_one({"tag": "v", "$vector": [10, 11]})
        CollectionInsertOneResult(...)

    Note:
        If an `_id` is explicitly provided, which corresponds to a document
        that exists already in the collection, an error is raised and
        the insertion fails.
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    io_payload = {"insertOne": {"document": document}}
    logger.info(f"insertOne on '{self.name}'")
    io_response = self._converted_request(
        payload=io_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished insertOne on '{self.name}'")
    if "insertedIds" in io_response.get("status", {}):
        if io_response["status"]["insertedIds"]:
            inserted_id = io_response["status"]["insertedIds"][0]
            return CollectionInsertOneResult(
                raw_results=[io_response],
                inserted_id=inserted_id,
            )
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from insert_one API command.",
                raw_response=io_response,
            )
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from insert_one API command.",
            raw_response=io_response,
        )

def options(self, *, collection_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> CollectionDefinition

Get the collection options, i.e. its configuration as read from the database.

Args

collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for collection_admin_timeout_ms.
timeout_ms: an alias for collection_admin_timeout_ms.

Returns

a CollectionDefinition instance describing the collection. (See also the database list_collections method.)

Example

>>> my_coll.options()
CollectionDefinition(vector=CollectionVectorOptions(dimension=3, metric='cosine'))

Expand source code

def options(
    self,
    *,
    collection_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> CollectionDefinition:
    """
    Get the collection options, i.e. its configuration as read from the database.

    The method issues a request to the Data API each time is invoked,
    without caching mechanisms: this ensures up-to-date information
    for usages such as real-time collection validation by the application.

    Args:
        collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `collection_admin_timeout_ms`.
        timeout_ms: an alias for `collection_admin_timeout_ms`.

    Returns:
        a CollectionDefinition instance describing the collection.
        (See also the database `list_collections` method.)

    Example:
        >>> my_coll.options()
        CollectionDefinition(vector=CollectionVectorOptions(dimension=3, metric='cosine'))
    """

    _collection_admin_timeout_ms, _ca_label = _select_singlereq_timeout_ca(
        timeout_options=self.api_options.timeout_options,
        collection_admin_timeout_ms=collection_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info(f"getting collections in search of '{self.name}'")
    self_descriptors = [
        coll_desc
        for coll_desc in self.database._list_collections_ctx(
            keyspace=None,
            timeout_context=_TimeoutContext(
                request_ms=_collection_admin_timeout_ms,
                label=_ca_label,
            ),
        )
        if coll_desc.name == self.name
    ]
    logger.info(f"finished getting collections in search of '{self.name}'")
    if self_descriptors:
        return self_descriptors[0].definition
    else:
        raise RuntimeError(
            f"Collection {self.keyspace}.{self.name} not found.",
        )

def replace_one(self, filter: FilterType, replacement: DOC, *, sort: SortType | None = None, upsert: bool = False, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> CollectionUpdateResult

Replace a single document on the collection with a new one, optionally inserting a new one if no match is found.

Args

filter: a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
replacement: the new document to write into the collection.
sort: with this dictionary parameter one can control the sorting order of the documents matching the filter, effectively determining what document will come first and hence be the replaced one. See the find method for more on sorting. Vector-based ANN sorting is achieved by providing a "$vector" or a "$vectorize" key in sort.
upsert: this parameter controls the behavior in absence of matches. If True, replacement is inserted as a new document if no matches are found on the collection. If False, the operation silently does nothing in case of no matches.
general_method_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for general_method_timeout_ms.
timeout_ms: an alias for general_method_timeout_ms.

Returns

a CollectionUpdateResult object summarizing the outcome of the replace operation.

Example

>>> my_coll.insert_one({"Marco": "Polo"})
CollectionInsertOneResult(...)
>>> my_coll.replace_one({"Marco": {"$exists": True}}, {"Buda": "Pest"})
CollectionUpdateResult(raw_results=..., update_info={'n': 1, 'updatedExisting': True, 'ok': 1.0, 'nModified': 1})
>>> my_coll.find_one({"Buda": "Pest"})
{'_id': '8424905a-...', 'Buda': 'Pest'}
>>> my_coll.replace_one({"Mirco": {"$exists": True}}, {"Oh": "yeah?"})
CollectionUpdateResult(raw_results=..., update_info={'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0})
>>> my_coll.replace_one({"Mirco": {"$exists": True}}, {"Oh": "yeah?"}, upsert=True)
CollectionUpdateResult(raw_results=..., update_info={'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '931b47d6-...'})

Expand source code

def replace_one(
    self,
    filter: FilterType,
    replacement: DOC,
    *,
    sort: SortType | None = None,
    upsert: bool = False,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> CollectionUpdateResult:
    """
    Replace a single document on the collection with a new one,
    optionally inserting a new one if no match is found.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        replacement: the new document to write into the collection.
        sort: with this dictionary parameter one can control the sorting
            order of the documents matching the filter, effectively
            determining what document will come first and hence be the
            replaced one. See the `find` method for more on sorting.
            Vector-based ANN sorting is achieved by providing a "$vector"
            or a "$vectorize" key in `sort`.
        upsert: this parameter controls the behavior in absence of matches.
            If True, `replacement` is inserted as a new document
            if no matches are found on the collection. If False,
            the operation silently does nothing in case of no matches.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a CollectionUpdateResult object summarizing the outcome of
        the replace operation.

    Example:
        >>> my_coll.insert_one({"Marco": "Polo"})
        CollectionInsertOneResult(...)
        >>> my_coll.replace_one({"Marco": {"$exists": True}}, {"Buda": "Pest"})
        CollectionUpdateResult(raw_results=..., update_info={'n': 1, 'updatedExisting': True, 'ok': 1.0, 'nModified': 1})
        >>> my_coll.find_one({"Buda": "Pest"})
        {'_id': '8424905a-...', 'Buda': 'Pest'}
        >>> my_coll.replace_one({"Mirco": {"$exists": True}}, {"Oh": "yeah?"})
        CollectionUpdateResult(raw_results=..., update_info={'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0})
        >>> my_coll.replace_one({"Mirco": {"$exists": True}}, {"Oh": "yeah?"}, upsert=True)
        CollectionUpdateResult(raw_results=..., update_info={'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '931b47d6-...'})
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    options = {
        "upsert": upsert,
    }
    fo_payload = {
        "findOneAndReplace": {
            k: v
            for k, v in {
                "filter": filter,
                "replacement": replacement,
                "options": options,
                "sort": sort,
            }.items()
            if v is not None
        }
    }
    logger.info(f"findOneAndReplace on '{self.name}'")
    fo_response = self._converted_request(
        payload=fo_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished findOneAndReplace on '{self.name}'")
    if "document" in fo_response.get("data", {}):
        fo_status = fo_response.get("status") or {}
        _update_info = _prepare_update_info([fo_status])
        return CollectionUpdateResult(
            raw_results=[fo_response],
            update_info=_update_info,
        )
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from find_one_and_replace API command.",
            raw_response=fo_response,
        )

def to_async(self: Collection[DOC], *, embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = (unset), reranking_api_key: str | RerankingHeadersProvider | UnsetType = (unset), api_options: APIOptions | UnsetType = (unset)) ‑> AsyncCollection[DOC]

Create an AsyncCollection from this one. Save for the arguments explicitly provided as overrides, everything else is kept identical to this collection in the copy (the database is converted into an async object).

Args

embedding_api_key: optional API key(s) for interacting with the collection. If an embedding service is configured, and this parameter is not None, each Data API call will include the necessary embedding-related headers as specified by this parameter. If a string is passed, it translates into the one "embedding api key" header (i.e. EmbeddingAPIKeyHeaderProvider). For some vectorize providers/models, if using header-based authentication, specialized subclasses of EmbeddingHeadersProvider should be supplied.
reranking_api_key: optional API key(s) for interacting with the collection. If a reranker is configured for the collection, and this parameter is not None, Data API calls will include the appropriate reranker-related headers according to this parameter. Reranker services may not necessarily require this setting (e.g. if the service needs no authentication, or one is configured as part of the collection definition relying on a "shared secret"). If a string is passed, it is translated into an instance of RerankingAPIKeyHeaderProvider.
api_options: any additional options to set for the result, in the form of an APIOptions instance (where one can set just the needed attributes). In case the same setting is also provided as named parameter, the latter takes precedence.

Returns

the new copy, an AsyncCollection instance.

Example

>>> asyncio.run(my_coll.to_async().count_documents({},upper_bound=100))
77

Expand source code

def to_async(
    self: Collection[DOC],
    *,
    embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
    reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
    api_options: APIOptions | UnsetType = _UNSET,
) -> AsyncCollection[DOC]:
    """
    Create an AsyncCollection from this one. Save for the arguments
    explicitly provided as overrides, everything else is kept identical
    to this collection in the copy (the database is converted into
    an async object).

    Args:
        embedding_api_key: optional API key(s) for interacting with the collection.
            If an embedding service is configured, and this parameter is not None,
            each Data API call will include the necessary embedding-related headers
            as specified by this parameter. If a string is passed, it translates
            into the one "embedding api key" header
            (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
            For some vectorize providers/models, if using header-based authentication,
            specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
            should be supplied.
        reranking_api_key: optional API key(s) for interacting with the collection.
            If a reranker is configured for the collection, and this parameter
            is not None, Data API calls will include the appropriate
            reranker-related headers according to this parameter. Reranker services
            may not necessarily require this setting (e.g. if the service needs no
            authentication, or one is configured as part of the collection
            definition relying on a "shared secret").
            If a string is passed, it is translated into an instance of
            `astrapy.authentication.RerankingAPIKeyHeaderProvider`.
        api_options: any additional options to set for the result, in the form of
            an APIOptions instance (where one can set just the needed attributes).
            In case the same setting is also provided as named parameter,
            the latter takes precedence.

    Returns:
        the new copy, an AsyncCollection instance.

    Example:
        >>> asyncio.run(my_coll.to_async().count_documents({},upper_bound=100))
        77
    """

    arg_api_options = APIOptions(
        embedding_api_key=embedding_api_key,
        reranking_api_key=reranking_api_key,
    )
    final_api_options = self.api_options.with_override(api_options).with_override(
        arg_api_options
    )
    return AsyncCollection(
        database=self.database.to_async(),
        name=self.name,
        keyspace=self.keyspace,
        api_options=final_api_options,
    )

def update_many(self, filter: FilterType, update: dict[str, Any], *, upsert: bool = False, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> CollectionUpdateResult

Apply an update operation to all documents matching a condition, optionally inserting one documents in absence of matches.

Args

filter: a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
update: the update prescription to apply to the documents, expressed as a dictionary as per Data API syntax. Examples are: {"$set": {"field": "value}} {"$inc": {"counter": 10}} {"$unset": {"field": ""}} See the Data API documentation for the full syntax.
upsert: this parameter controls the behavior in absence of matches. If True, a single new document (resulting from applying update to an empty document) is inserted if no matches are found on the collection. If False, the operation silently does nothing in case of no matches.
general_method_timeout_ms: a timeout, in milliseconds, for the whole requested operation (which may involve multiple API requests). This method may entail successive HTTP API requests, depending on the amount of involved documents. If not passed, the collection-level setting is used instead.
request_timeout_ms: a timeout, in milliseconds, for each API request. If not passed, the collection-level setting is used instead.
timeout_ms: an alias for general_method_timeout_ms.

Returns

a CollectionUpdateResult object summarizing the outcome of the update operation.

Example

>>> my_coll.insert_many([{"c": "red"}, {"c": "green"}, {"c": "blue"}])
CollectionInsertManyResult(...)
>>> my_coll.update_many({"c": {"$ne": "green"}}, {"$set": {"nongreen": True}})
CollectionUpdateResult(raw_results=..., update_info={'n': 2, 'updatedExisting': True, 'ok': 1.0, 'nModified': 2})
>>> my_coll.update_many({"c": "orange"}, {"$set": {"is_also_fruit": True}})
CollectionUpdateResult(raw_results=..., update_info={'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0})
>>> my_coll.update_many(
...     {"c": "orange"},
...     {"$set": {"is_also_fruit": True}},
...     upsert=True,
... )
CollectionUpdateResult(raw_results=..., update_info={'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '46643050-...'})

Note

Expand source code

def update_many(
    self,
    filter: FilterType,
    update: dict[str, Any],
    *,
    upsert: bool = False,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> CollectionUpdateResult:
    """
    Apply an update operation to all documents matching a condition,
    optionally inserting one documents in absence of matches.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        update: the update prescription to apply to the documents, expressed
            as a dictionary as per Data API syntax. Examples are:
                {"$set": {"field": "value}}
                {"$inc": {"counter": 10}}
                {"$unset": {"field": ""}}
            See the Data API documentation for the full syntax.
        upsert: this parameter controls the behavior in absence of matches.
            If True, a single new document (resulting from applying `update`
            to an empty document) is inserted if no matches are found on
            the collection. If False, the operation silently does nothing
            in case of no matches.
        general_method_timeout_ms: a timeout, in milliseconds, for the whole
            requested operation (which may involve multiple API requests).
            This method may entail successive HTTP API requests,
            depending on the amount of involved documents.
            If not passed, the collection-level setting is used instead.
        request_timeout_ms: a timeout, in milliseconds, for each API request.
            If not passed, the collection-level setting is used instead.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a CollectionUpdateResult object summarizing the outcome of
        the update operation.

    Example:
        >>> my_coll.insert_many([{"c": "red"}, {"c": "green"}, {"c": "blue"}])
        CollectionInsertManyResult(...)
        >>> my_coll.update_many({"c": {"$ne": "green"}}, {"$set": {"nongreen": True}})
        CollectionUpdateResult(raw_results=..., update_info={'n': 2, 'updatedExisting': True, 'ok': 1.0, 'nModified': 2})
        >>> my_coll.update_many({"c": "orange"}, {"$set": {"is_also_fruit": True}})
        CollectionUpdateResult(raw_results=..., update_info={'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0})
        >>> my_coll.update_many(
        ...     {"c": "orange"},
        ...     {"$set": {"is_also_fruit": True}},
        ...     upsert=True,
        ... )
        CollectionUpdateResult(raw_results=..., update_info={'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '46643050-...'})

    Note:
        Similarly to the case of `find` (see its docstring for more details),
        running this command while, at the same time, another process is
        inserting new documents which match the filter of the `update_many`
        can result in an unpredictable fraction of these documents being updated.
        In other words, it cannot be easily predicted whether a given
        newly-inserted document will be picked up by the update_many command or not.
    """

    _general_method_timeout_ms, _gmt_label = _first_valid_timeout(
        (general_method_timeout_ms, "general_method_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (
            self.api_options.timeout_options.general_method_timeout_ms,
            "general_method_timeout_ms",
        ),
    )
    _request_timeout_ms, _rt_label = _first_valid_timeout(
        (request_timeout_ms, "request_timeout_ms"),
        (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
    )
    api_options = {
        "upsert": upsert,
    }
    page_state_options: dict[str, str] = {}
    um_responses: list[dict[str, Any]] = []
    um_statuses: list[dict[str, Any]] = []
    must_proceed = True
    logger.info(f"starting update_many on '{self.name}'")
    timeout_manager = MultiCallTimeoutManager(
        overall_timeout_ms=_general_method_timeout_ms,
        timeout_label=_gmt_label,
    )
    while must_proceed:
        options = {**api_options, **page_state_options}
        this_um_payload = {
            "updateMany": {
                k: v
                for k, v in {
                    "filter": filter,
                    "update": update,
                    "options": options,
                }.items()
                if v is not None
            }
        }
        logger.info(f"updateMany on '{self.name}'")
        this_um_response = self._converted_request(
            payload=this_um_payload,
            raise_api_errors=False,
            timeout_context=timeout_manager.remaining_timeout(
                cap_time_ms=_request_timeout_ms,
                cap_timeout_label=_rt_label,
            ),
        )
        logger.info(f"finished updateMany on '{self.name}'")
        this_um_status = this_um_response.get("status") or {}
        #
        # if errors, quit early
        if this_um_response.get("errors", []):
            partial_update_info = _prepare_update_info(um_statuses)
            partial_result = CollectionUpdateResult(
                raw_results=um_responses,
                update_info=partial_update_info,
            )
            cause_exception = DataAPIResponseException.from_response(
                command=this_um_payload,
                raw_response=this_um_response,
            )
            raise CollectionUpdateManyException(
                partial_result=partial_result,
                cause=cause_exception,
            )
        else:
            if "status" not in this_um_response:
                raise UnexpectedDataAPIResponseException(
                    text="Faulty response from update_many API command.",
                    raw_response=this_um_response,
                )
            um_responses.append(this_um_response)
            um_statuses.append(this_um_status)
            next_page_state = this_um_status.get("nextPageState")
            if next_page_state is not None:
                must_proceed = True
                page_state_options = {"pageState": next_page_state}
            else:
                must_proceed = False
                page_state_options = {}

    update_info = _prepare_update_info(um_statuses)
    logger.info(f"finished update_many on '{self.name}'")
    return CollectionUpdateResult(
        raw_results=um_responses,
        update_info=update_info,
    )

def update_one(self, filter: FilterType, update: dict[str, Any], *, sort: SortType | None = None, upsert: bool = False, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> CollectionUpdateResult

Update a single document on the collection as requested, optionally inserting a new one if no match is found.

Args

filter: a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
update: the update prescription to apply to the document, expressed as a dictionary as per Data API syntax. Examples are: {"$set": {"field": "value}} {"$inc": {"counter": 10}} {"$unset": {"field": ""}} See the Data API documentation for the full syntax.
sort: with this dictionary parameter one can control the sorting order of the documents matching the filter, effectively determining what document will come first and hence be the replaced one. See the find method for more on sorting. Vector-based ANN sorting is achieved by providing a "$vector" or a "$vectorize" key in sort.
upsert: this parameter controls the behavior in absence of matches. If True, a new document (resulting from applying the update to an empty document) is inserted if no matches are found on the collection. If False, the operation silently does nothing in case of no matches.
general_method_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for general_method_timeout_ms.
timeout_ms: an alias for general_method_timeout_ms.

Returns

a CollectionUpdateResult object summarizing the outcome of the update operation.

Example

>>> my_coll.insert_one({"Marco": "Polo"})
CollectionInsertOneResult(...)
>>> my_coll.update_one({"Marco": {"$exists": True}}, {"$inc": {"rank": 3}})
CollectionUpdateResult(raw_results=..., update_info={'n': 1, 'updatedExisting': True, 'ok': 1.0, 'nModified': 1})
>>> my_coll.update_one({"Mirko": {"$exists": True}}, {"$inc": {"rank": 3}})
CollectionUpdateResult(raw_results=..., update_info={'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0})
>>> my_coll.update_one({"Mirko": {"$exists": True}}, {"$inc": {"rank": 3}}, upsert=True)
CollectionUpdateResult(raw_results=..., update_info={'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '2a45ff60-...'})

Expand source code

def update_one(
    self,
    filter: FilterType,
    update: dict[str, Any],
    *,
    sort: SortType | None = None,
    upsert: bool = False,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> CollectionUpdateResult:
    """
    Update a single document on the collection as requested,
    optionally inserting a new one if no match is found.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        update: the update prescription to apply to the document, expressed
            as a dictionary as per Data API syntax. Examples are:
                {"$set": {"field": "value}}
                {"$inc": {"counter": 10}}
                {"$unset": {"field": ""}}
            See the Data API documentation for the full syntax.
        sort: with this dictionary parameter one can control the sorting
            order of the documents matching the filter, effectively
            determining what document will come first and hence be the
            replaced one. See the `find` method for more on sorting.
            Vector-based ANN sorting is achieved by providing a "$vector"
            or a "$vectorize" key in `sort`.
        upsert: this parameter controls the behavior in absence of matches.
            If True, a new document (resulting from applying the `update`
            to an empty document) is inserted if no matches are found on
            the collection. If False, the operation silently does nothing
            in case of no matches.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a CollectionUpdateResult object summarizing the outcome of
        the update operation.

    Example:
        >>> my_coll.insert_one({"Marco": "Polo"})
        CollectionInsertOneResult(...)
        >>> my_coll.update_one({"Marco": {"$exists": True}}, {"$inc": {"rank": 3}})
        CollectionUpdateResult(raw_results=..., update_info={'n': 1, 'updatedExisting': True, 'ok': 1.0, 'nModified': 1})
        >>> my_coll.update_one({"Mirko": {"$exists": True}}, {"$inc": {"rank": 3}})
        CollectionUpdateResult(raw_results=..., update_info={'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0})
        >>> my_coll.update_one({"Mirko": {"$exists": True}}, {"$inc": {"rank": 3}}, upsert=True)
        CollectionUpdateResult(raw_results=..., update_info={'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '2a45ff60-...'})
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    options = {
        "upsert": upsert,
    }
    uo_payload = {
        "updateOne": {
            k: v
            for k, v in {
                "filter": filter,
                "update": update,
                "options": options,
                "sort": sort,
            }.items()
            if v is not None
        }
    }
    logger.info(f"updateOne on '{self.name}'")
    uo_response = self._converted_request(
        payload=uo_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished updateOne on '{self.name}'")
    if "status" in uo_response:
        uo_status = uo_response["status"]
        _update_info = _prepare_update_info([uo_status])
        return CollectionUpdateResult(
            raw_results=[uo_response],
            update_info=_update_info,
        )
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from updateOne API command.",
            raw_response=uo_response,
        )

Create a clone of this collection with some changed attributes.

Args

embedding_api_key: optional API key(s) for interacting with the collection. If an embedding service is configured, and this parameter is not None, each Data API call will include the necessary embedding-related headers as specified by this parameter. If a string is passed, it translates into the one "embedding api key" header (i.e. EmbeddingAPIKeyHeaderProvider). For some vectorize providers/models, if using header-based authentication, specialized subclasses of EmbeddingHeadersProvider should be supplied.
reranking_api_key: optional API key(s) for interacting with the collection. If a reranker is configured for the collection, and this parameter is not None, Data API calls will include the appropriate reranker-related headers according to this parameter. Reranker services may not necessarily require this setting (e.g. if the service needs no authentication, or one is configured as part of the collection definition relying on a "shared secret"). If a string is passed, it is translated into an instance of RerankingAPIKeyHeaderProvider.
api_options: any additional options to set for the clone, in the form of an APIOptions instance (where one can set just the needed attributes). In case the same setting is also provided as named parameter, the latter takes precedence.

Returns

a new Collection instance.

Example

>>> collection_with_api_key_configured = my_collection.with_options(
...     embedding_api_key="secret-key-0123abcd...",
... )

Expand source code

def with_options(
    self: Collection[DOC],
    *,
    embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
    reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
    api_options: APIOptions | UnsetType = _UNSET,
) -> Collection[DOC]:
    """
    Create a clone of this collection with some changed attributes.

    Args:
        embedding_api_key: optional API key(s) for interacting with the collection.
            If an embedding service is configured, and this parameter is not None,
            each Data API call will include the necessary embedding-related headers
            as specified by this parameter. If a string is passed, it translates
            into the one "embedding api key" header
            (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
            For some vectorize providers/models, if using header-based authentication,
            specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
            should be supplied.
        reranking_api_key: optional API key(s) for interacting with the collection.
            If a reranker is configured for the collection, and this parameter
            is not None, Data API calls will include the appropriate
            reranker-related headers according to this parameter. Reranker services
            may not necessarily require this setting (e.g. if the service needs no
            authentication, or one is configured as part of the collection
            definition relying on a "shared secret").
            If a string is passed, it is translated into an instance of
            `astrapy.authentication.RerankingAPIKeyHeaderProvider`.
        api_options: any additional options to set for the clone, in the form of
            an APIOptions instance (where one can set just the needed attributes).
            In case the same setting is also provided as named parameter,
            the latter takes precedence.

    Returns:
        a new Collection instance.

    Example:
        >>> collection_with_api_key_configured = my_collection.with_options(
        ...     embedding_api_key="secret-key-0123abcd...",
        ... )
    """

    return self._copy(
        embedding_api_key=embedding_api_key,
        reranking_api_key=reranking_api_key,
        api_options=api_options,
    )

A client for using the Data API. This is the entry point, sitting at the top of the conceptual "client -> database -> collection" hierarchy and of the "client -> admin -> database admin" chain as well.

A client is created first, optionally passing it a suitable Access Token. Starting from the client, then: - databases (Database and AsyncDatabase) are created for working with data - AstraDBAdmin objects can be created for admin-level work

Args

token: an Access Token to the database. Example: "AstraCS:xyz...". This can be either a literal token string or a subclass of TokenProvider. Note that generally one should pass the token later, when spawning Database instances from the client (with the get_database) method of DataAPIClient; the reason is that the typical tokens are scoped to a single database. However, when performing administrative tasks at the AstraDBAdmin level (such as creating databases), an org-wide token is required – then it makes sense to provide it when creating the DataAPIClient instance.
environment: a string representing the target Data API environment. It can be left unspecified for the default value of Environment.PROD; other values include Environment.OTHER, Environment.DSE.
callers: a list of caller identities, i.e. applications, or frameworks, on behalf of which Data API and DevOps API calls are performed. These end up in the request user-agent. Each caller identity is a ("caller_name", "caller_version") pair.
api_options: a specification - complete or partial - of the API Options to override the system defaults. This allows for a deeper configuration than what the named parameters (token, environment, callers) offer. If this is passed alongside these named parameters, those will take precedence.

Example

>>> from astrapy import DataAPIClient
>>> from astrapy.info import CollectionDefinition
>>> my_client = DataAPIClient()
>>> my_db0 = my_client.get_database(
...     "https://01234567-....apps.astra.datastax.com",
...     token="AstraCS:...",
... )
>>> my_coll = my_db0.create_collection(
...     "movies",
...     definition=(
...         CollectionDefinition.builder()
...         .set_vector_dimension(2)
...         .build()
...     ),
... )
>>> my_coll.insert_one({"title": "The Title", "$vector": [0.1, 0.3]})
>>> my_db1 = my_client.get_database("01234567-...")
>>> my_db2 = my_client.get_database("01234567-...", region="us-east1")
>>> my_adm0 = my_client.get_admin()
>>> my_adm1 = my_client.get_admin(token=more_powerful_token_override)
>>> database_list = my_adm0.list_databases()

Expand source code

class DataAPIClient:
    """
    A client for using the Data API. This is the entry point, sitting
    at the top of the conceptual "client -> database -> collection" hierarchy
    and of the "client -> admin -> database admin" chain as well.

    A client is created first, optionally passing it a suitable Access Token.
    Starting from the client, then:
        - databases (Database and AsyncDatabase) are created for working with data
        - AstraDBAdmin objects can be created for admin-level work

    Args:
        token: an Access Token to the database. Example: `"AstraCS:xyz..."`.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
            Note that generally one should pass the token later, when spawning
            Database instances from the client (with the `get_database`) method
            of DataAPIClient; the reason is that the typical tokens are scoped
            to a single database. However, when performing administrative tasks
            at the AstraDBAdmin level (such as creating databases), an org-wide
            token is required -- then it makes sense to provide it when creating
            the DataAPIClient instance.
        environment: a string representing the target Data API environment.
            It can be left unspecified for the default value of `Environment.PROD`;
            other values include `Environment.OTHER`, `Environment.DSE`.
        callers: a list of caller identities, i.e. applications, or frameworks,
            on behalf of which Data API and DevOps API calls are performed.
            These end up in the request user-agent.
            Each caller identity is a ("caller_name", "caller_version") pair.
        api_options: a specification - complete or partial - of the API Options
            to override the system defaults. This allows for a deeper configuration
            than what the named parameters (token, environment, callers) offer.
            If this is passed alongside these named parameters, those will take
            precedence.

    Example:
        >>> from astrapy import DataAPIClient
        >>> from astrapy.info import CollectionDefinition
        >>> my_client = DataAPIClient()
        >>> my_db0 = my_client.get_database(
        ...     "https://01234567-....apps.astra.datastax.com",
        ...     token="AstraCS:...",
        ... )
        >>> my_coll = my_db0.create_collection(
        ...     "movies",
        ...     definition=(
        ...         CollectionDefinition.builder()
        ...         .set_vector_dimension(2)
        ...         .build()
        ...     ),
        ... )
        >>> my_coll.insert_one({"title": "The Title", "$vector": [0.1, 0.3]})
        >>> my_db1 = my_client.get_database("01234567-...")
        >>> my_db2 = my_client.get_database("01234567-...", region="us-east1")
        >>> my_adm0 = my_client.get_admin()
        >>> my_adm1 = my_client.get_admin(token=more_powerful_token_override)
        >>> database_list = my_adm0.list_databases()
    """

    def __init__(
        self,
        token: str | TokenProvider | UnsetType = _UNSET,
        *,
        environment: str | UnsetType = _UNSET,
        callers: Sequence[CallerType] | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> None:
        # this parameter bootstraps the defaults, has a special treatment:
        _environment: str
        if isinstance(environment, UnsetType):
            _environment = Environment.PROD.lower()
        else:
            _environment = environment.lower()
        if _environment not in Environment.values:
            raise InvalidEnvironmentException(
                f"Unsupported `environment` value: '{_environment}'."
            )
        arg_api_options = APIOptions(
            callers=callers,
            token=token,
        )
        self.api_options = (
            defaultAPIOptions(_environment)
            .with_override(api_options)
            .with_override(arg_api_options)
        )

    def __repr__(self) -> str:
        return f"{self.__class__.__name__}({self.api_options})"

    def __eq__(self, other: Any) -> bool:
        if isinstance(other, DataAPIClient):
            return all(
                [
                    self.api_options.token == other.api_options.token,
                    self.api_options.environment == other.api_options.environment,
                    self.api_options.callers == other.api_options.callers,
                ]
            )
        else:
            return False

    def __getitem__(self, api_endpoint: str) -> Database:
        return self.get_database(api_endpoint=api_endpoint)

    def _copy(
        self,
        *,
        token: str | TokenProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> DataAPIClient:
        arg_api_options = APIOptions(token=token)
        final_api_options = self.api_options.with_override(api_options).with_override(
            arg_api_options
        )
        return DataAPIClient(
            token=token,
            environment=final_api_options.environment,
            api_options=final_api_options,
        )

    def with_options(
        self,
        *,
        token: str | TokenProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> DataAPIClient:
        """
        Create a clone of this DataAPIClient with some changed attributes.

        Args:
            token: an Access Token to the database. Example: `"AstraCS:xyz..."`.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            api_options: any additional options to set for the clone, in the form of
                an APIOptions instance (where one can set just the needed attributes).
                In case the same setting is also provided as named parameter,
                the latter takes precedence.

        Returns:
            a new DataAPIClient instance.

        Example:
            >>> other_auth_client = my_client.with_options(
            ...     token="AstraCS:xyz...",
            ... )
        """

        return self._copy(
            token=token,
            api_options=api_options,
        )

    def get_database(
        self,
        api_endpoint: str,
        *,
        token: str | TokenProvider | UnsetType = _UNSET,
        keyspace: str | None = None,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> Database:
        """
        Get a Database object from this client, for doing data-related work.

        Args:
            api_endpoint: the API Endpoint for the target database
                (e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`,
                or a custom domain if one is configured for the database).
                The database must exist already for the resulting object
                to be effectively used; in other words, this invocation
                does not create the database, just the object instance.
                Actual admin work can be achieved by using the AstraDBAdmin object.
            token: if supplied, is passed to the Database instead of the client token.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            keyspace: if provided, it is passed to the Database; otherwise
                the Database class will apply an environment-specific default.
            spawn_api_options: a specification - complete or partial - of the
                API Options to override the defaults.
                This allows for a deeper configuration of the database, e.g.
                concerning timeouts; if this is passed together with
                the equivalent named parameters, the latter will take precedence
                in their respective settings.

        Returns:
            a Database object with which to work on Data API collections.

        Example:
            >>> my_db1 = my_client.get_database(
            ...     "https://01234567-...us-west1.apps.astra.datastax.com",
            ... )
            >>> my_db2 = my_client.get_database(
            ...     "https://01234567-...us-west1.apps.astra.datastax.com",
            ...     token="AstraCS:...",
            ...     keyspace="prod_keyspace",
            ... )
            >>> my_coll = my_db0.create_collection(
            ...     "movies",
            ...     definition=(
            ...         CollectionDefinition.builder()
            ...         .set_vector_dimension(2)
            ...         .build()
            ...     ),
            ... )
            >>> my_coll.insert_one({"title": "The Title", "$vector": [0.3, 0.4]})

        Note:
            This method does not perform any admin-level operation through
            the DevOps API. For actual creation of a database, see the
            `create_database` method of class AstraDBAdmin.
        """

        # lazy importing here to avoid circular dependency
        from astrapy import Database

        arg_api_options = APIOptions(token=token)
        resulting_api_options = self.api_options.with_override(
            spawn_api_options
        ).with_override(arg_api_options)

        if resulting_api_options.environment in Environment.astra_db_values:
            parsed_api_endpoint = parse_api_endpoint(api_endpoint)
            if parsed_api_endpoint is not None:
                if parsed_api_endpoint.environment != resulting_api_options.environment:
                    raise InvalidEnvironmentException(
                        "Environment mismatch between client and provided "
                        "API endpoint. You can try adding "
                        f'`environment="{parsed_api_endpoint.environment}"` '
                        "to the DataAPIClient creation statement."
                    )
                return Database(
                    api_endpoint=api_endpoint,
                    keyspace=keyspace,
                    api_options=resulting_api_options,
                )
            else:
                msg = api_endpoint_parsing_cdinfo_message(api_endpoint)
                logger.info(msg)
                return Database(
                    api_endpoint=api_endpoint,
                    keyspace=keyspace,
                    api_options=resulting_api_options,
                )
        else:
            parsed_generic_api_endpoint = parse_generic_api_url(api_endpoint)
            if parsed_generic_api_endpoint:
                return Database(
                    api_endpoint=parsed_generic_api_endpoint,
                    keyspace=keyspace,
                    api_options=resulting_api_options,
                )
            else:
                msg = generic_api_url_parsing_error_message(api_endpoint)
                raise ValueError(msg)

    def get_async_database(
        self,
        api_endpoint: str,
        *,
        token: str | TokenProvider | UnsetType = _UNSET,
        keyspace: str | None = None,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncDatabase:
        """
        Get an AsyncDatabase object from this client, for doing data-related work.

        Args:
            api_endpoint: the API Endpoint for the target database
                (e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`,
                or a custom domain if one is configured for the database).
                The database must exist already for the resulting object
                to be effectively used; in other words, this invocation
                does not create the database, just the object instance.
                Actual admin work can be achieved by using the AstraDBAdmin object.
            token: if supplied, is passed to the Database instead of the client token.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            keyspace: if provided, it is passed to the Database; otherwise
                the Database class will apply an environment-specific default.
            spawn_api_options: a specification - complete or partial - of the
                API Options to override the defaults.
                This allows for a deeper configuration of the database, e.g.
                concerning timeouts; if this is passed together with
                the equivalent named parameters, the latter will take precedence
                in their respective settings.

        Returns:
            an AsyncDatabase object with which to work on Data API collections.

        Example:
            >>> async def create_use_db(cl: DataAPIClient, api_ep: str) -> None:
            ...     async_db = cl.get_async_database(api_ep)
            ...     my_a_coll = await async_db.create_collection(
            ...         "movies",
            ...         definition=(
            ...             CollectionDefinition.builder()
            ...             .set_vector_dimension(2)
            ...         .build()
            ...         )
            ...     )
            ...     await my_a_coll.insert_one({"title": "The Title", "$vector": [0.3, 0.4]})
            ...
            >>> asyncio.run(
            ...   create_use_db(
            ...       my_client,
            ...       "https://01234567-...us-west1.apps.astra.datastax.com",
            ...   )
            ... )

        Note:
            This method does not perform any admin-level operation through
            the DevOps API. For actual creation of a database, see the
            `create_database` method of class AstraDBAdmin.
        """

        return self.get_database(
            api_endpoint=api_endpoint,
            token=token,
            keyspace=keyspace,
            spawn_api_options=spawn_api_options,
        ).to_async()

    def get_database_by_api_endpoint(
        self,
        api_endpoint: str,
        *,
        token: str | TokenProvider | UnsetType = _UNSET,
        keyspace: str | None = None,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> Database:
        """
        Get a Database object from this client, for doing data-related work.

        Note: this is an alias for `get_database` (see).

        Args:
            api_endpoint: the API Endpoint for the target database
                (e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`,
                or a custom domain if one is configured for the database).
                The database must exist already for the resulting object
                to be effectively used; in other words, this invocation
                does not create the database, just the object instance.
                Actual admin work can be achieved by using the AstraDBAdmin object.
            token: if supplied, is passed to the Database instead of the client token.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            keyspace: if provided, it is passed to the Database; otherwise
                the Database class will apply an environment-specific default.
            spawn_api_options: a specification - complete or partial - of the
                API Options to override the defaults.
                This allows for a deeper configuration of the database, e.g.
                concerning timeouts; if this is passed together with
                the equivalent named parameters, the latter will take precedence
                in their respective settings.

        Returns:
            a Database object with which to work on Data API collections.
        """

        return self.get_database(
            api_endpoint=api_endpoint,
            token=token,
            keyspace=keyspace,
            spawn_api_options=spawn_api_options,
        )

    def get_async_database_by_api_endpoint(
        self,
        api_endpoint: str,
        *,
        token: str | TokenProvider | UnsetType = _UNSET,
        keyspace: str | None = None,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncDatabase:
        """
        Get an AsyncDatabase object from this client, for doing data-related work.

        Note: this is an alias for `get_async_database` (see).

        Args:
            api_endpoint: the API Endpoint for the target database
                (e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`,
                or a custom domain if one is configured for the database).
                The database must exist already for the resulting object
                to be effectively used; in other words, this invocation
                does not create the database, just the object instance.
                Actual admin work can be achieved by using the AstraDBAdmin object.
            token: if supplied, is passed to the Database instead of the client token.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            keyspace: if provided, it is passed to the Database; otherwise
                the Database class will apply an environment-specific default.
            spawn_api_options: a specification - complete or partial - of the
                API Options to override the defaults.
                This allows for a deeper configuration of the database, e.g.
                concerning timeouts; if this is passed together with
                the equivalent named parameters, the latter will take precedence
                in their respective settings.

        Returns:
            an AsyncDatabase object with which to work on Data API collections.
        """

        return self.get_async_database(
            api_endpoint=api_endpoint,
            token=token,
            keyspace=keyspace,
            spawn_api_options=spawn_api_options,
        )

    def get_admin(
        self,
        *,
        token: str | TokenProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> AstraDBAdmin:
        """
        Get an AstraDBAdmin instance corresponding to this client, for
        admin work such as managing databases.

        Args:
            token: if supplied, is passed to the Astra DB Admin instead of the
                client token. This may be useful when switching to a more powerful,
                admin-capable permission set.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            spawn_api_options: a specification - complete or partial - of the
                API Options to override the defaults.
                This allows for a deeper configuration of the admin, e.g.
                concerning timeouts; if this is passed together with
                the equivalent named parameters, the latter will take precedence
                in their respective settings.

        Returns:
            An AstraDBAdmin instance, wich which to perform management at the
            database level.

        Example:
            >>> my_adm0 = my_client.get_admin()
            >>> my_adm1 = my_client.get_admin(token=more_powerful_token_override)
            >>> database_list = my_adm0.list_databases()
            >>> my_db_admin = my_adm0.create_database(
            ...     "the_other_database",
            ...     cloud_provider="AWS",
            ...     region="eu-west-1",
            ... )
            >>> my_db_admin.list_keyspaces()
            ['default_keyspace', 'that_other_one']
        """

        # lazy importing here to avoid circular dependency
        from astrapy.admin import AstraDBAdmin

        arg_api_options = APIOptions(token=token)
        resulting_api_options = self.api_options.with_override(
            spawn_api_options
        ).with_override(arg_api_options)

        if resulting_api_options.environment not in Environment.astra_db_values:
            raise InvalidEnvironmentException(
                "Method not supported outside of Astra DB."
            )

        return AstraDBAdmin(api_options=resulting_api_options)

Methods

def get_admin(self, *, token: str | TokenProvider | UnsetType = (unset), spawn_api_options: APIOptions | UnsetType = (unset)) ‑> AstraDBAdmin

Get an AstraDBAdmin instance corresponding to this client, for admin work such as managing databases.

Args

token: if supplied, is passed to the Astra DB Admin instead of the client token. This may be useful when switching to a more powerful, admin-capable permission set. This can be either a literal token string or a subclass of TokenProvider.
spawn_api_options: a specification - complete or partial - of the API Options to override the defaults. This allows for a deeper configuration of the admin, e.g. concerning timeouts; if this is passed together with the equivalent named parameters, the latter will take precedence in their respective settings.

Returns

An AstraDBAdmin instance, wich which to perform management at the database level.

Example

>>> my_adm0 = my_client.get_admin()
>>> my_adm1 = my_client.get_admin(token=more_powerful_token_override)
>>> database_list = my_adm0.list_databases()
>>> my_db_admin = my_adm0.create_database(
...     "the_other_database",
...     cloud_provider="AWS",
...     region="eu-west-1",
... )
>>> my_db_admin.list_keyspaces()
['default_keyspace', 'that_other_one']

Expand source code

def get_admin(
    self,
    *,
    token: str | TokenProvider | UnsetType = _UNSET,
    spawn_api_options: APIOptions | UnsetType = _UNSET,
) -> AstraDBAdmin:
    """
    Get an AstraDBAdmin instance corresponding to this client, for
    admin work such as managing databases.

    Args:
        token: if supplied, is passed to the Astra DB Admin instead of the
            client token. This may be useful when switching to a more powerful,
            admin-capable permission set.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        spawn_api_options: a specification - complete or partial - of the
            API Options to override the defaults.
            This allows for a deeper configuration of the admin, e.g.
            concerning timeouts; if this is passed together with
            the equivalent named parameters, the latter will take precedence
            in their respective settings.

    Returns:
        An AstraDBAdmin instance, wich which to perform management at the
        database level.

    Example:
        >>> my_adm0 = my_client.get_admin()
        >>> my_adm1 = my_client.get_admin(token=more_powerful_token_override)
        >>> database_list = my_adm0.list_databases()
        >>> my_db_admin = my_adm0.create_database(
        ...     "the_other_database",
        ...     cloud_provider="AWS",
        ...     region="eu-west-1",
        ... )
        >>> my_db_admin.list_keyspaces()
        ['default_keyspace', 'that_other_one']
    """

    # lazy importing here to avoid circular dependency
    from astrapy.admin import AstraDBAdmin

    arg_api_options = APIOptions(token=token)
    resulting_api_options = self.api_options.with_override(
        spawn_api_options
    ).with_override(arg_api_options)

    if resulting_api_options.environment not in Environment.astra_db_values:
        raise InvalidEnvironmentException(
            "Method not supported outside of Astra DB."
        )

    return AstraDBAdmin(api_options=resulting_api_options)

def get_async_database(self, api_endpoint: str, *, token: str | TokenProvider | UnsetType = (unset), keyspace: str | None = None, spawn_api_options: APIOptions | UnsetType = (unset)) ‑> AsyncDatabase

Get an AsyncDatabase object from this client, for doing data-related work.

Args

api_endpoint: the API Endpoint for the target database (e.g. https://<ID>-<REGION>.apps.astra.datastax.com, or a custom domain if one is configured for the database). The database must exist already for the resulting object to be effectively used; in other words, this invocation does not create the database, just the object instance. Actual admin work can be achieved by using the AstraDBAdmin object.
token: if supplied, is passed to the Database instead of the client token. This can be either a literal token string or a subclass of TokenProvider.
keyspace: if provided, it is passed to the Database; otherwise the Database class will apply an environment-specific default.
spawn_api_options: a specification - complete or partial - of the API Options to override the defaults. This allows for a deeper configuration of the database, e.g. concerning timeouts; if this is passed together with the equivalent named parameters, the latter will take precedence in their respective settings.

Returns

an AsyncDatabase object with which to work on Data API collections.

Example

>>> async def create_use_db(cl: DataAPIClient, api_ep: str) -> None:
...     async_db = cl.get_async_database(api_ep)
...     my_a_coll = await async_db.create_collection(
...         "movies",
...         definition=(
...             CollectionDefinition.builder()
...             .set_vector_dimension(2)
...         .build()
...         )
...     )
...     await my_a_coll.insert_one({"title": "The Title", "$vector": [0.3, 0.4]})
...
>>> asyncio.run(
...   create_use_db(
...       my_client,
...       "https://01234567-...us-west1.apps.astra.datastax.com",
...   )
... )

Note

This method does not perform any admin-level operation through the DevOps API. For actual creation of a database, see the create_database method of class AstraDBAdmin.

Expand source code

def get_async_database(
    self,
    api_endpoint: str,
    *,
    token: str | TokenProvider | UnsetType = _UNSET,
    keyspace: str | None = None,
    spawn_api_options: APIOptions | UnsetType = _UNSET,
) -> AsyncDatabase:
    """
    Get an AsyncDatabase object from this client, for doing data-related work.

    Args:
        api_endpoint: the API Endpoint for the target database
            (e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`,
            or a custom domain if one is configured for the database).
            The database must exist already for the resulting object
            to be effectively used; in other words, this invocation
            does not create the database, just the object instance.
            Actual admin work can be achieved by using the AstraDBAdmin object.
        token: if supplied, is passed to the Database instead of the client token.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        keyspace: if provided, it is passed to the Database; otherwise
            the Database class will apply an environment-specific default.
        spawn_api_options: a specification - complete or partial - of the
            API Options to override the defaults.
            This allows for a deeper configuration of the database, e.g.
            concerning timeouts; if this is passed together with
            the equivalent named parameters, the latter will take precedence
            in their respective settings.

    Returns:
        an AsyncDatabase object with which to work on Data API collections.

    Example:
        >>> async def create_use_db(cl: DataAPIClient, api_ep: str) -> None:
        ...     async_db = cl.get_async_database(api_ep)
        ...     my_a_coll = await async_db.create_collection(
        ...         "movies",
        ...         definition=(
        ...             CollectionDefinition.builder()
        ...             .set_vector_dimension(2)
        ...         .build()
        ...         )
        ...     )
        ...     await my_a_coll.insert_one({"title": "The Title", "$vector": [0.3, 0.4]})
        ...
        >>> asyncio.run(
        ...   create_use_db(
        ...       my_client,
        ...       "https://01234567-...us-west1.apps.astra.datastax.com",
        ...   )
        ... )

    Note:
        This method does not perform any admin-level operation through
        the DevOps API. For actual creation of a database, see the
        `create_database` method of class AstraDBAdmin.
    """

    return self.get_database(
        api_endpoint=api_endpoint,
        token=token,
        keyspace=keyspace,
        spawn_api_options=spawn_api_options,
    ).to_async()

def get_async_database_by_api_endpoint(self, api_endpoint: str, *, token: str | TokenProvider | UnsetType = (unset), keyspace: str | None = None, spawn_api_options: APIOptions | UnsetType = (unset)) ‑> AsyncDatabase

Get an AsyncDatabase object from this client, for doing data-related work.

Note: this is an alias for get_async_database (see).

Args

api_endpoint: the API Endpoint for the target database (e.g. https://<ID>-<REGION>.apps.astra.datastax.com, or a custom domain if one is configured for the database). The database must exist already for the resulting object to be effectively used; in other words, this invocation does not create the database, just the object instance. Actual admin work can be achieved by using the AstraDBAdmin object.
token: if supplied, is passed to the Database instead of the client token. This can be either a literal token string or a subclass of TokenProvider.
keyspace: if provided, it is passed to the Database; otherwise the Database class will apply an environment-specific default.
spawn_api_options: a specification - complete or partial - of the API Options to override the defaults. This allows for a deeper configuration of the database, e.g. concerning timeouts; if this is passed together with the equivalent named parameters, the latter will take precedence in their respective settings.

Returns

an AsyncDatabase object with which to work on Data API collections.

Expand source code

def get_async_database_by_api_endpoint(
    self,
    api_endpoint: str,
    *,
    token: str | TokenProvider | UnsetType = _UNSET,
    keyspace: str | None = None,
    spawn_api_options: APIOptions | UnsetType = _UNSET,
) -> AsyncDatabase:
    """
    Get an AsyncDatabase object from this client, for doing data-related work.

    Note: this is an alias for `get_async_database` (see).

    Args:
        api_endpoint: the API Endpoint for the target database
            (e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`,
            or a custom domain if one is configured for the database).
            The database must exist already for the resulting object
            to be effectively used; in other words, this invocation
            does not create the database, just the object instance.
            Actual admin work can be achieved by using the AstraDBAdmin object.
        token: if supplied, is passed to the Database instead of the client token.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        keyspace: if provided, it is passed to the Database; otherwise
            the Database class will apply an environment-specific default.
        spawn_api_options: a specification - complete or partial - of the
            API Options to override the defaults.
            This allows for a deeper configuration of the database, e.g.
            concerning timeouts; if this is passed together with
            the equivalent named parameters, the latter will take precedence
            in their respective settings.

    Returns:
        an AsyncDatabase object with which to work on Data API collections.
    """

    return self.get_async_database(
        api_endpoint=api_endpoint,
        token=token,
        keyspace=keyspace,
        spawn_api_options=spawn_api_options,
    )

def get_database(self, api_endpoint: str, *, token: str | TokenProvider | UnsetType = (unset), keyspace: str | None = None, spawn_api_options: APIOptions | UnsetType = (unset)) ‑> Database

Get a Database object from this client, for doing data-related work.

Args

api_endpoint: the API Endpoint for the target database (e.g. https://<ID>-<REGION>.apps.astra.datastax.com, or a custom domain if one is configured for the database). The database must exist already for the resulting object to be effectively used; in other words, this invocation does not create the database, just the object instance. Actual admin work can be achieved by using the AstraDBAdmin object.
token: if supplied, is passed to the Database instead of the client token. This can be either a literal token string or a subclass of TokenProvider.
keyspace: if provided, it is passed to the Database; otherwise the Database class will apply an environment-specific default.
spawn_api_options: a specification - complete or partial - of the API Options to override the defaults. This allows for a deeper configuration of the database, e.g. concerning timeouts; if this is passed together with the equivalent named parameters, the latter will take precedence in their respective settings.

Returns

a Database object with which to work on Data API collections.

Example

>>> my_db1 = my_client.get_database(
...     "https://01234567-...us-west1.apps.astra.datastax.com",
... )
>>> my_db2 = my_client.get_database(
...     "https://01234567-...us-west1.apps.astra.datastax.com",
...     token="AstraCS:...",
...     keyspace="prod_keyspace",
... )
>>> my_coll = my_db0.create_collection(
...     "movies",
...     definition=(
...         CollectionDefinition.builder()
...         .set_vector_dimension(2)
...         .build()
...     ),
... )
>>> my_coll.insert_one({"title": "The Title", "$vector": [0.3, 0.4]})

Note

This method does not perform any admin-level operation through the DevOps API. For actual creation of a database, see the create_database method of class AstraDBAdmin.

Expand source code

def get_database(
    self,
    api_endpoint: str,
    *,
    token: str | TokenProvider | UnsetType = _UNSET,
    keyspace: str | None = None,
    spawn_api_options: APIOptions | UnsetType = _UNSET,
) -> Database:
    """
    Get a Database object from this client, for doing data-related work.

    Args:
        api_endpoint: the API Endpoint for the target database
            (e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`,
            or a custom domain if one is configured for the database).
            The database must exist already for the resulting object
            to be effectively used; in other words, this invocation
            does not create the database, just the object instance.
            Actual admin work can be achieved by using the AstraDBAdmin object.
        token: if supplied, is passed to the Database instead of the client token.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        keyspace: if provided, it is passed to the Database; otherwise
            the Database class will apply an environment-specific default.
        spawn_api_options: a specification - complete or partial - of the
            API Options to override the defaults.
            This allows for a deeper configuration of the database, e.g.
            concerning timeouts; if this is passed together with
            the equivalent named parameters, the latter will take precedence
            in their respective settings.

    Returns:
        a Database object with which to work on Data API collections.

    Example:
        >>> my_db1 = my_client.get_database(
        ...     "https://01234567-...us-west1.apps.astra.datastax.com",
        ... )
        >>> my_db2 = my_client.get_database(
        ...     "https://01234567-...us-west1.apps.astra.datastax.com",
        ...     token="AstraCS:...",
        ...     keyspace="prod_keyspace",
        ... )
        >>> my_coll = my_db0.create_collection(
        ...     "movies",
        ...     definition=(
        ...         CollectionDefinition.builder()
        ...         .set_vector_dimension(2)
        ...         .build()
        ...     ),
        ... )
        >>> my_coll.insert_one({"title": "The Title", "$vector": [0.3, 0.4]})

    Note:
        This method does not perform any admin-level operation through
        the DevOps API. For actual creation of a database, see the
        `create_database` method of class AstraDBAdmin.
    """

    # lazy importing here to avoid circular dependency
    from astrapy import Database

    arg_api_options = APIOptions(token=token)
    resulting_api_options = self.api_options.with_override(
        spawn_api_options
    ).with_override(arg_api_options)

    if resulting_api_options.environment in Environment.astra_db_values:
        parsed_api_endpoint = parse_api_endpoint(api_endpoint)
        if parsed_api_endpoint is not None:
            if parsed_api_endpoint.environment != resulting_api_options.environment:
                raise InvalidEnvironmentException(
                    "Environment mismatch between client and provided "
                    "API endpoint. You can try adding "
                    f'`environment="{parsed_api_endpoint.environment}"` '
                    "to the DataAPIClient creation statement."
                )
            return Database(
                api_endpoint=api_endpoint,
                keyspace=keyspace,
                api_options=resulting_api_options,
            )
        else:
            msg = api_endpoint_parsing_cdinfo_message(api_endpoint)
            logger.info(msg)
            return Database(
                api_endpoint=api_endpoint,
                keyspace=keyspace,
                api_options=resulting_api_options,
            )
    else:
        parsed_generic_api_endpoint = parse_generic_api_url(api_endpoint)
        if parsed_generic_api_endpoint:
            return Database(
                api_endpoint=parsed_generic_api_endpoint,
                keyspace=keyspace,
                api_options=resulting_api_options,
            )
        else:
            msg = generic_api_url_parsing_error_message(api_endpoint)
            raise ValueError(msg)

def get_database_by_api_endpoint(self, api_endpoint: str, *, token: str | TokenProvider | UnsetType = (unset), keyspace: str | None = None, spawn_api_options: APIOptions | UnsetType = (unset)) ‑> Database

Get a Database object from this client, for doing data-related work.

Note: this is an alias for get_database (see).

Args

api_endpoint: the API Endpoint for the target database (e.g. https://<ID>-<REGION>.apps.astra.datastax.com, or a custom domain if one is configured for the database). The database must exist already for the resulting object to be effectively used; in other words, this invocation does not create the database, just the object instance. Actual admin work can be achieved by using the AstraDBAdmin object.
token: if supplied, is passed to the Database instead of the client token. This can be either a literal token string or a subclass of TokenProvider.
keyspace: if provided, it is passed to the Database; otherwise the Database class will apply an environment-specific default.
spawn_api_options: a specification - complete or partial - of the API Options to override the defaults. This allows for a deeper configuration of the database, e.g. concerning timeouts; if this is passed together with the equivalent named parameters, the latter will take precedence in their respective settings.

Returns

a Database object with which to work on Data API collections.

Expand source code

def get_database_by_api_endpoint(
    self,
    api_endpoint: str,
    *,
    token: str | TokenProvider | UnsetType = _UNSET,
    keyspace: str | None = None,
    spawn_api_options: APIOptions | UnsetType = _UNSET,
) -> Database:
    """
    Get a Database object from this client, for doing data-related work.

    Note: this is an alias for `get_database` (see).

    Args:
        api_endpoint: the API Endpoint for the target database
            (e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`,
            or a custom domain if one is configured for the database).
            The database must exist already for the resulting object
            to be effectively used; in other words, this invocation
            does not create the database, just the object instance.
            Actual admin work can be achieved by using the AstraDBAdmin object.
        token: if supplied, is passed to the Database instead of the client token.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        keyspace: if provided, it is passed to the Database; otherwise
            the Database class will apply an environment-specific default.
        spawn_api_options: a specification - complete or partial - of the
            API Options to override the defaults.
            This allows for a deeper configuration of the database, e.g.
            concerning timeouts; if this is passed together with
            the equivalent named parameters, the latter will take precedence
            in their respective settings.

    Returns:
        a Database object with which to work on Data API collections.
    """

    return self.get_database(
        api_endpoint=api_endpoint,
        token=token,
        keyspace=keyspace,
        spawn_api_options=spawn_api_options,
    )

def with_options(self, *, token: str | TokenProvider | UnsetType = (unset), api_options: APIOptions | UnsetType = (unset)) ‑> DataAPIClient

Create a clone of this DataAPIClient with some changed attributes.

Args

token: an Access Token to the database. Example: "AstraCS:xyz...". This can be either a literal token string or a subclass of TokenProvider.
api_options: any additional options to set for the clone, in the form of an APIOptions instance (where one can set just the needed attributes). In case the same setting is also provided as named parameter, the latter takes precedence.

Returns

a new DataAPIClient instance.

Example

>>> other_auth_client = my_client.with_options(
...     token="AstraCS:xyz...",
... )

Expand source code

def with_options(
    self,
    *,
    token: str | TokenProvider | UnsetType = _UNSET,
    api_options: APIOptions | UnsetType = _UNSET,
) -> DataAPIClient:
    """
    Create a clone of this DataAPIClient with some changed attributes.

    Args:
        token: an Access Token to the database. Example: `"AstraCS:xyz..."`.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        api_options: any additional options to set for the clone, in the form of
            an APIOptions instance (where one can set just the needed attributes).
            In case the same setting is also provided as named parameter,
            the latter takes precedence.

    Returns:
        a new DataAPIClient instance.

    Example:
        >>> other_auth_client = my_client.with_options(
        ...     token="AstraCS:xyz...",
        ... )
    """

    return self._copy(
        token=token,
        api_options=api_options,
    )

class DataAPIDatabaseAdmin (*, api_endpoint: str, api_options: FullAPIOptions, spawner_database: Database | AsyncDatabase | None = None)

An "admin" object for non-Astra Data API environments, to perform administrative tasks at the keyspaces level such as creating/listing/dropping keyspaces.

Conforming to the architecture of non-Astra deployments of the Data API, this object works within the one existing database. It is within that database that the keyspace CRUD operations (and possibly other admin operations) are performed. Since non-Astra environment lack the concept of an overall admin (such as the all-databases AstraDBAdmin class), a DataAPIDatabaseAdmin is generally created by invoking the get_database_admin method of the corresponding Database object (which in turn is spawned by a DataAPIClient).

Args

api_endpoint: the full URI to access the Data API, e.g. "http://localhost:8181".
api_options: a complete specification of the API Options for this instance.
spawner_database: either a Database or an AsyncDatabase instance. This represents the database class which spawns this admin object, so that, if required, a keyspace creation can retroactively "use" the new keyspace in the spawner. Used to enable the Async/Database.get_admin_database().create_keyspace() pattern.

Example

>>> from astrapy import DataAPIClient
>>> from astrapy.constants import Environment
>>> from astrapy.authentication import UsernamePasswordTokenProvider
>>>
>>> token_provider = UsernamePasswordTokenProvider("username", "password")
>>> endpoint = "http://localhost:8181"
>>>
>>> client = DataAPIClient(
>>>     token=token_provider,
>>>     environment=Environment.OTHER,
>>> )
>>> database = client.get_database(endpoint)
>>> admin_for_my_db = database.get_database_admin()
>>>
>>> admin_for_my_db.list_keyspaces()
['keyspace1', 'keyspace2']

Note

a more powerful token may be required than the one sufficient for working in the Database, Collection and Table classes. Check the provided token if "Unauthorized" errors are encountered.

Expand source code

class DataAPIDatabaseAdmin(DatabaseAdmin):
    """
    An "admin" object for non-Astra Data API environments, to perform administrative
    tasks at the keyspaces level such as creating/listing/dropping keyspaces.

    Conforming to the architecture of non-Astra deployments of the Data API,
    this object works within the one existing database. It is within that database
    that the keyspace CRUD operations (and possibly other admin operations)
    are performed. Since non-Astra environment lack the concept of an overall
    admin (such as the all-databases AstraDBAdmin class), a `DataAPIDatabaseAdmin`
    is generally created by invoking the `get_database_admin` method of the
    corresponding `Database` object (which in turn is spawned by a DataAPIClient).

    Args:
        api_endpoint: the full URI to access the Data API,
            e.g. "http://localhost:8181".
        api_options: a complete specification of the API Options for this instance.
        spawner_database: either a Database or an AsyncDatabase instance. This represents
            the database class which spawns this admin object, so that, if required,
            a keyspace creation can retroactively "use" the new keyspace in the spawner.
            Used to enable the Async/Database.get_admin_database().create_keyspace()
            pattern.

    Example:
        >>> from astrapy import DataAPIClient
        >>> from astrapy.constants import Environment
        >>> from astrapy.authentication import UsernamePasswordTokenProvider
        >>>
        >>> token_provider = UsernamePasswordTokenProvider("username", "password")
        >>> endpoint = "http://localhost:8181"
        >>>
        >>> client = DataAPIClient(
        >>>     token=token_provider,
        >>>     environment=Environment.OTHER,
        >>> )
        >>> database = client.get_database(endpoint)
        >>> admin_for_my_db = database.get_database_admin()
        >>>
        >>> admin_for_my_db.list_keyspaces()
        ['keyspace1', 'keyspace2']

    Note:
        a more powerful token may be required than the one sufficient for working
        in the Database, Collection and Table classes. Check the provided token
        if "Unauthorized" errors are encountered.
    """

    def __init__(
        self,
        *,
        api_endpoint: str,
        api_options: FullAPIOptions,
        spawner_database: Database | AsyncDatabase | None = None,
    ) -> None:
        # lazy import here to avoid circular dependency
        from astrapy.database import Database

        self.api_options = api_options
        self.api_endpoint = api_endpoint

        if spawner_database is not None:
            self.spawner_database = spawner_database
        else:
            # leaving the keyspace to its per-environment default
            # (a task for the Database)
            self.spawner_database = Database(
                api_endpoint=self.api_endpoint,
                keyspace=None,
                api_options=self.api_options,
            )

        # even if Data API, this is admin and must use the Admin additional headers:
        self._commander_headers = {
            DEFAULT_DATA_API_AUTH_HEADER: self.api_options.token.get_token(),
            **self.api_options.admin_additional_headers,
        }
        self._api_commander = self._get_api_commander()

    def __repr__(self) -> str:
        parts = [
            f'api_endpoint="{self.api_endpoint}"',
            f"api_options={self.api_options}",
        ]
        return f"{self.__class__.__name__}({', '.join(parts)})"

    def __eq__(self, other: Any) -> bool:
        if isinstance(other, DataAPIDatabaseAdmin):
            return all(
                [
                    self.api_endpoint == other.api_endpoint,
                    self.api_options == other.api_options,
                ]
            )
        else:
            return False

    def _get_api_commander(self) -> APICommander:
        base_path_components = [
            comp
            for comp in (
                ncomp.strip("/")
                for ncomp in (
                    self.api_options.data_api_url_options.api_path,
                    self.api_options.data_api_url_options.api_version,
                )
                if ncomp is not None
            )
            if comp != ""
        ]
        base_path = f"/{'/'.join(base_path_components)}"
        api_commander = APICommander(
            api_endpoint=self.api_endpoint,
            path=base_path,
            headers=self._commander_headers,
            callers=self.api_options.callers,
            redacted_header_names=self.api_options.redacted_header_names,
        )
        return api_commander

    def _copy(
        self,
        *,
        token: str | TokenProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> DataAPIDatabaseAdmin:
        arg_api_options = APIOptions(
            token=token,
        )
        final_api_options = self.api_options.with_override(api_options).with_override(
            arg_api_options
        )
        return DataAPIDatabaseAdmin(
            api_endpoint=self.api_endpoint,
            api_options=final_api_options,
            spawner_database=self.spawner_database,
        )

    def with_options(
        self,
        *,
        token: str | TokenProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> DataAPIDatabaseAdmin:
        """
        Create a clone of this DataAPIDatabaseAdmin with some changed attributes.

        Args:
            token: an access token with enough permission to perform admin tasks.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            api_options: any additional options to set for the clone, in the form of
                an APIOptions instance (where one can set just the needed attributes).
                In case the same setting is also provided as named parameter,
                the latter takes precedence.

        Returns:
            a new DataAPIDatabaseAdmin instance.

        Example:
            >>> admin_for_my_other_db = admin_for_my_db.with_options(
            ...     api_endpoint="http://10.1.1.5:8181",
            ... )
        """

        return self._copy(
            token=token,
            api_options=api_options,
        )

    def list_keyspaces(
        self,
        *,
        keyspace_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> list[str]:
        """
        Query the API for a list of the keyspaces in the database.

        Args:
            keyspace_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `keyspace_admin_timeout_ms`.
            timeout_ms: an alias for `keyspace_admin_timeout_ms`.

        Returns:
            A list of the keyspaces, each a string, in no particular order.

        Example:
            >>> admin_for_my_db.list_keyspaces()
            ['default_keyspace', 'staging_keyspace']
        """

        _keyspace_admin_timeout_ms, _ka_label = _select_singlereq_timeout_ka(
            timeout_options=self.api_options.timeout_options,
            keyspace_admin_timeout_ms=keyspace_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info("getting list of keyspaces")
        fn_response = self._api_commander.request(
            payload={"findKeyspaces": {}},
            timeout_context=_TimeoutContext(
                request_ms=_keyspace_admin_timeout_ms, label=_ka_label
            ),
        )
        if "keyspaces" not in fn_response.get("status", {}):
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from findKeyspaces API command.",
                raw_response=fn_response,
            )
        else:
            logger.info("finished getting list of keyspaces")
            return fn_response["status"]["keyspaces"]  # type: ignore[no-any-return]

    def create_keyspace(
        self,
        name: str,
        *,
        replication_options: dict[str, Any] | None = None,
        update_db_keyspace: bool | None = None,
        keyspace_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
        **kwargs: Any,
    ) -> None:
        """
        Create a keyspace in the database.

        Args:
            name: the keyspace name. If supplying a keyspace that exists
                already, the method call proceeds as usual, no errors are
                raised, and the whole invocation is a no-op.
            replication_options: this dictionary can specify the options about
                replication of the keyspace (across database nodes). If provided,
                it must have a structure similar to:
                `{"class": "SimpleStrategy", "replication_factor": 1}`.
            update_db_keyspace: if True, the `Database` or `AsyncDatabase` class
                that spawned this DatabaseAdmin, if any, gets updated to work on
                the newly-created keyspace starting when this method returns.
            keyspace_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `keyspace_admin_timeout_ms`.
            timeout_ms: an alias for `keyspace_admin_timeout_ms`.

        Note: a timeout event is no guarantee at all that the
        creation request has not reached the API server and is not going
        to be, in fact, honored.

        Example:
            >>> admin_for_my_db.list_keyspaces()
            ['default_keyspace']
            >>> admin_for_my_db.create_keyspace("that_other_one")
            >>> admin_for_my_db.list_keyspaces()
            ['default_keyspace', 'that_other_one']
        """

        _keyspace_admin_timeout_ms, _ka_label = _select_singlereq_timeout_ka(
            timeout_options=self.api_options.timeout_options,
            keyspace_admin_timeout_ms=keyspace_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        options = {
            k: v
            for k, v in {
                "replication": replication_options,
            }.items()
            if v
        }
        payload = {
            "createKeyspace": {
                **{"name": name},
                **({"options": options} if options else {}),
            }
        }
        logger.info("creating keyspace")
        cn_response = self._api_commander.request(
            payload=payload,
            timeout_context=_TimeoutContext(
                request_ms=_keyspace_admin_timeout_ms, label=_ka_label
            ),
        )
        if (cn_response.get("status") or {}).get("ok") != 1:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from createKeyspace API command.",
                raw_response=cn_response,
            )
        else:
            logger.info("finished creating keyspace")
            if update_db_keyspace:
                self.spawner_database.use_keyspace(name)

    def drop_keyspace(
        self,
        name: str,
        *,
        keyspace_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Drop (delete) a keyspace from the database.

        Args:
            name: the keyspace to delete. If it does not exist in this database,
                an error is raised.
            keyspace_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `keyspace_admin_timeout_ms`.
            timeout_ms: an alias for `keyspace_admin_timeout_ms`.

        Note: a timeout event is no guarantee at all that the
        deletion request has not reached the API server and is not going
        to be, in fact, honored.

        Example:
            >>> admin_for_my_db.list_keyspaces()
            ['default_keyspace', 'that_other_one']
            >>> admin_for_my_db.drop_keyspace("that_other_one")
            >>> admin_for_my_db.list_keyspaces()
            ['default_keyspace']
        """

        _keyspace_admin_timeout_ms, _ka_label = _select_singlereq_timeout_ka(
            timeout_options=self.api_options.timeout_options,
            keyspace_admin_timeout_ms=keyspace_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info("dropping keyspace")
        dn_response = self._api_commander.request(
            payload={"dropKeyspace": {"name": name}},
            timeout_context=_TimeoutContext(
                request_ms=_keyspace_admin_timeout_ms, label=_ka_label
            ),
        )
        if (dn_response.get("status") or {}).get("ok") != 1:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from dropKeyspace API command.",
                raw_response=dn_response,
            )
        else:
            logger.info("finished dropping keyspace")

    async def async_list_keyspaces(
        self,
        *,
        keyspace_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> list[str]:
        """
        Query the API for a list of the keyspaces in the database.
        Async version of the method, for use in an asyncio context.

        Args:
            keyspace_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `keyspace_admin_timeout_ms`.
            timeout_ms: an alias for `keyspace_admin_timeout_ms`.

        Returns:
            A list of the keyspaces, each a string, in no particular order.

        Example:
            >>> asyncio.run(admin_for_my_db.async_list_keyspaces())
            ['default_keyspace', 'staging_keyspace']
        """

        _keyspace_admin_timeout_ms, _ka_label = _select_singlereq_timeout_ka(
            timeout_options=self.api_options.timeout_options,
            keyspace_admin_timeout_ms=keyspace_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info("getting list of keyspaces, async")
        fn_response = await self._api_commander.async_request(
            payload={"findKeyspaces": {}},
            timeout_context=_TimeoutContext(
                request_ms=_keyspace_admin_timeout_ms, label=_ka_label
            ),
        )
        if "keyspaces" not in fn_response.get("status", {}):
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from findKeyspaces API command.",
                raw_response=fn_response,
            )
        else:
            logger.info("finished getting list of keyspaces, async")
            return fn_response["status"]["keyspaces"]  # type: ignore[no-any-return]

    async def async_create_keyspace(
        self,
        name: str,
        *,
        replication_options: dict[str, Any] | None = None,
        update_db_keyspace: bool | None = None,
        keyspace_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
        **kwargs: Any,
    ) -> None:
        """
        Create a keyspace in the database.
        Async version of the method, for use in an asyncio context.

        Args:
            name: the keyspace name. If supplying a keyspace that exists
                already, the method call proceeds as usual, no errors are
                raised, and the whole invocation is a no-op.
            replication_options: this dictionary can specify the options about
                replication of the keyspace (across database nodes). If provided,
                it must have a structure similar to:
                `{"class": "SimpleStrategy", "replication_factor": 1}`.
            update_db_keyspace: if True, the `Database` or `AsyncDatabase` class
                that spawned this DatabaseAdmin, if any, gets updated to work on
                the newly-created keyspace starting when this method returns.
            keyspace_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `keyspace_admin_timeout_ms`.
            timeout_ms: an alias for `keyspace_admin_timeout_ms`.

        Note: a timeout event is no guarantee at all that the
        creation request has not reached the API server and is not going
        to be, in fact, honored.

        Example:
            >>> admin_for_my_db.list_keyspaces()
            ['default_keyspace']
            >>> asyncio.run(admin_for_my_db.async_create_keyspace(
            ...     "that_other_one"
            ... ))
            >>> admin_for_my_db.list_leyspaces()
            ['default_keyspace', 'that_other_one']
        """

        _keyspace_admin_timeout_ms, _ka_label = _select_singlereq_timeout_ka(
            timeout_options=self.api_options.timeout_options,
            keyspace_admin_timeout_ms=keyspace_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        options = {
            k: v
            for k, v in {
                "replication": replication_options,
            }.items()
            if v
        }
        payload = {
            "createKeyspace": {
                **{"name": name},
                **({"options": options} if options else {}),
            }
        }
        logger.info("creating keyspace, async")
        cn_response = await self._api_commander.async_request(
            payload=payload,
            timeout_context=_TimeoutContext(
                request_ms=_keyspace_admin_timeout_ms, label=_ka_label
            ),
        )
        if (cn_response.get("status") or {}).get("ok") != 1:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from createKeyspace API command.",
                raw_response=cn_response,
            )
        else:
            logger.info("finished creating keyspace, async")
            if update_db_keyspace:
                self.spawner_database.use_keyspace(name)

    async def async_drop_keyspace(
        self,
        name: str,
        *,
        keyspace_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Drop (delete) a keyspace from the database.
        Async version of the method, for use in an asyncio context.

        Args:
            name: the keyspace to delete. If it does not exist in this database,
                an error is raised.
            keyspace_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `keyspace_admin_timeout_ms`.
            timeout_ms: an alias for `keyspace_admin_timeout_ms`.

        Note: a timeout event is no guarantee at all that the
        deletion request has not reached the API server and is not going
        to be, in fact, honored.

        Example:
            >>> admin_for_my_db.list_keyspaces()
            ['that_other_one', 'default_keyspace']
            >>> asyncio.run(admin_for_my_db.async_drop_keyspace(
            ...     "that_other_one"
            ... ))
            >>> admin_for_my_db.list_keyspaces()
            ['default_keyspace']
        """

        _keyspace_admin_timeout_ms, _ka_label = _select_singlereq_timeout_ka(
            timeout_options=self.api_options.timeout_options,
            keyspace_admin_timeout_ms=keyspace_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info("dropping keyspace, async")
        dn_response = await self._api_commander.async_request(
            payload={"dropKeyspace": {"name": name}},
            timeout_context=_TimeoutContext(
                request_ms=_keyspace_admin_timeout_ms, label=_ka_label
            ),
        )
        if (dn_response.get("status") or {}).get("ok") != 1:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from dropKeyspace API command.",
                raw_response=dn_response,
            )
        else:
            logger.info("finished dropping keyspace, async")

    def get_database(
        self,
        *,
        keyspace: str | None = None,
        token: str | TokenProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> Database:
        """
        Create a Database instance for a specific database, to be used
        when doing data-level work (such as creating/managing collections).

        Args:
            keyspace: an optional keyspace to set in the resulting Database.
                If not set, the keyspace remains unspecified and must be set later
                with the `use_keyspace` method.
            token: if supplied, is passed to the Database instead of
                the one set for this object. Useful if one wants to work in
                a least-privilege manner, limiting the permissions for non-admin work.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            spawn_api_options: a specification - complete or partial - of the
                API Options to override the defaults inherited from the database admin.
                This allows for a deeper configuration of the database admin, e.g.
                concerning timeouts; if this is passed together with
                the named timeout parameters, the latter will take precedence
                in their respective settings.

        Returns:
            A Database object, ready to work with data, collections and tables.

        Example:
            >>> my_db = admin_for_my_db.get_database()
            >>> my_db.list_collection_names()
            ['movies', 'another_collection']

        Note:
            creating an instance of Database does not trigger actual creation
            of the database itself, which should exist beforehand.
        """

        # lazy importing here to avoid circular dependency
        from astrapy import Database

        # this multiple-override implements the alias on timeout params
        resulting_api_options = self.api_options.with_override(
            spawn_api_options,
        ).with_override(
            APIOptions(
                token=token,
            ),
        )

        return Database(
            api_endpoint=self.api_endpoint,
            keyspace=keyspace,
            api_options=resulting_api_options,
        )

    def get_async_database(
        self,
        *,
        keyspace: str | None = None,
        token: str | TokenProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncDatabase:
        """
        Create an AsyncDatabase instance for a specific database, to be used
        when doing data-level work (such as creating/managing collections).

        Args:
            keyspace: an optional keyspace to set in the resulting AsyncDatabase.
                If not set, the keyspace remains unspecified and must be set later
                with the `use_keyspace` method.
            token: if supplied, is passed to the AsyncDatabase instead of
                the one set for this object. Useful if one wants to work in
                a least-privilege manner, limiting the permissions for non-admin work.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            spawn_api_options: a specification - complete or partial - of the
                API Options to override the defaults inherited from the database admin.
                This allows for a deeper configuration of the database admin, e.g.
                concerning timeouts; if this is passed together with
                the named timeout parameters, the latter will take precedence
                in their respective settings.

        Returns:
            An AsyncDatabase object, ready to work with data, collections and tables.
        Note:
            creating an instance of AsyncDatabase does not trigger actual creation
            of the database itself, which should exist beforehand.
        """

        return self.get_database(
            token=token,
            keyspace=keyspace,
            spawn_api_options=spawn_api_options,
        ).to_async()

    def find_embedding_providers(
        self,
        *,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> FindEmbeddingProvidersResult:
        """
        Query the API for the full information on available embedding providers.

        Args:
            database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `database_admin_timeout_ms`.
            timeout_ms: an alias for `database_admin_timeout_ms`.

        Returns:
            A `FindEmbeddingProvidersResult` object with the complete information
            returned by the API about available embedding providers

        Example (output abridged and indented for clarity):
            >>> admin_for_my_db.find_embedding_providers()
            FindEmbeddingProvidersResult(embedding_providers=..., openai, ...)
            >>> admin_for_my_db.find_embedding_providers().embedding_providers
            {
                'openai': EmbeddingProvider(
                    display_name='OpenAI',
                    models=[
                        EmbeddingProviderModel(name='text-embedding-3-small'),
                        ...
                    ]
                ),
                ...
            }
        """

        _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
            timeout_options=self.api_options.timeout_options,
            database_admin_timeout_ms=database_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info("findEmbeddingProviders")
        fe_response = self._api_commander.request(
            payload={"findEmbeddingProviders": {}},
            timeout_context=_TimeoutContext(
                request_ms=_database_admin_timeout_ms, label=_da_label
            ),
        )
        if "embeddingProviders" not in fe_response.get("status", {}):
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from findEmbeddingProviders API command.",
                raw_response=fe_response,
            )
        else:
            logger.info("finished findEmbeddingProviders")
            return FindEmbeddingProvidersResult._from_dict(fe_response["status"])

    async def async_find_embedding_providers(
        self,
        *,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> FindEmbeddingProvidersResult:
        """
        Query the API for the full information on available embedding providers.
        Async version of the method, for use in an asyncio context.

        Args:
            database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `database_admin_timeout_ms`.
            timeout_ms: an alias for `database_admin_timeout_ms`.

        Returns:
            A `FindEmbeddingProvidersResult` object with the complete information
            returned by the API about available embedding providers

        Example (output abridged and indented for clarity):
            >>> asyncio.run(admin_for_my_db.find_embedding_providers())
            FindEmbeddingProvidersResult(embedding_providers=..., openai, ...)
            >>> asyncio.run(
            ...     admin_for_my_db.find_embedding_providers()
            ... ).embedding_providers
            {
                'openai': EmbeddingProvider(
                    display_name='OpenAI',
                    models=[
                        EmbeddingProviderModel(name='text-embedding-3-small'),
                        ...
                    ]
                ),
                ...
            }
        """

        _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
            timeout_options=self.api_options.timeout_options,
            database_admin_timeout_ms=database_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info("findEmbeddingProviders, async")
        fe_response = await self._api_commander.async_request(
            payload={"findEmbeddingProviders": {}},
            timeout_context=_TimeoutContext(
                request_ms=_database_admin_timeout_ms, label=_da_label
            ),
        )
        if "embeddingProviders" not in fe_response.get("status", {}):
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from findEmbeddingProviders API command.",
                raw_response=fe_response,
            )
        else:
            logger.info("finished findEmbeddingProviders, async")
            return FindEmbeddingProvidersResult._from_dict(fe_response["status"])

    def find_reranking_providers(
        self,
        *,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> FindRerankingProvidersResult:
        """
        Query the API for the full information on available reranking providers.

        Args:
            database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `database_admin_timeout_ms`.
            timeout_ms: an alias for `database_admin_timeout_ms`.

        Returns:
            A `FindRerankingProvidersResult` object with the complete information
            returned by the API about available reranking providers

        Example (output abridged and indented for clarity):
            >>> admin_for_my_db.find_reranking_providers()
            FindRerankingProvidersResult(reranking_providers=nvidia)
            >>> admin_for_my_db.find_reranking_providers().reranking_providers
            {
                'nvidia': RerankingProvider(
                    <Default>
                    display_name='Nvidia',
                    models=[
                        RerankingProviderModel(
                            <Default>
                            name='nvidia/llama-3.2-nv-rerankqa-1b-v2'
                        ),
                        ...
                    ]
                ),
                ...
            }
        """

        _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
            timeout_options=self.api_options.timeout_options,
            database_admin_timeout_ms=database_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info("findRerankingProviders")
        fr_response = self._api_commander.request(
            payload={"findRerankingProviders": {}},
            timeout_context=_TimeoutContext(
                request_ms=_database_admin_timeout_ms, label=_da_label
            ),
        )
        if "rerankingProviders" not in fr_response.get("status", {}):
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from findRerankingProviders API command.",
                raw_response=fr_response,
            )
        else:
            logger.info("finished findRerankingProviders")
            return FindRerankingProvidersResult._from_dict(fr_response["status"])

    async def async_find_reranking_providers(
        self,
        *,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> FindRerankingProvidersResult:
        """
        Query the API for the full information on available reranking providers.
        Async version of the method, for use in an asyncio context.

        Args:
            database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `database_admin_timeout_ms`.
            timeout_ms: an alias for `database_admin_timeout_ms`.

        Returns:
            A `FindRerankingProvidersResult` object with the complete information
            returned by the API about available reranking providers

        Example (output abridged and indented for clarity):
            >>> asyncio.run(admin_for_my_db.find_reranking_providers())
            FindRerankingProvidersResult(reranking_providers=nvidia)
            >>> asyncio.run(
            ...     admin_for_my_db.find_reranking_providers()
            ... ).reranking_providers
            {
                'nvidia': RerankingProvider(
                    <Default>
                    display_name='Nvidia',
                    models=[
                        RerankingProviderModel(
                            <Default>
                            name='nvidia/llama-3.2-nv-rerankqa-1b-v2'
                        ),
                        ...
                    ]
                ),
                ...
            }
        """

        _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
            timeout_options=self.api_options.timeout_options,
            database_admin_timeout_ms=database_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info("findRerankingProviders, async")
        fr_response = await self._api_commander.async_request(
            payload={"findRerankingProviders": {}},
            timeout_context=_TimeoutContext(
                request_ms=_database_admin_timeout_ms, label=_da_label
            ),
        )
        if "rerankingProviders" not in fr_response.get("status", {}):
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from findRerankingProviders API command.",
                raw_response=fr_response,
            )
        else:
            logger.info("finished findRerankingProviders, async")
            return FindRerankingProvidersResult._from_dict(fr_response["status"])

Ancestors

DatabaseAdmin
abc.ABC

Methods

async def async_create_keyspace(self, name: str, *, replication_options: dict[str, Any] | None = None, update_db_keyspace: bool | None = None, keyspace_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None, **kwargs: Any) ‑> None

Create a keyspace in the database. Async version of the method, for use in an asyncio context.

Args

name: the keyspace name. If supplying a keyspace that exists already, the method call proceeds as usual, no errors are raised, and the whole invocation is a no-op.
replication_options: this dictionary can specify the options about replication of the keyspace (across database nodes). If provided, it must have a structure similar to: {"class": "SimpleStrategy", "replication_factor": 1}.
update_db_keyspace: if True, the Database or AsyncDatabase class that spawned this DatabaseAdmin, if any, gets updated to work on the newly-created keyspace starting when this method returns.
keyspace_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for keyspace_admin_timeout_ms.
timeout_ms: an alias for keyspace_admin_timeout_ms.

Note: a timeout event is no guarantee at all that the creation request has not reached the API server and is not going to be, in fact, honored.

Example

>>> admin_for_my_db.list_keyspaces()
['default_keyspace']
>>> asyncio.run(admin_for_my_db.async_create_keyspace(
...     "that_other_one"
... ))
>>> admin_for_my_db.list_leyspaces()
['default_keyspace', 'that_other_one']

Expand source code

async def async_create_keyspace(
    self,
    name: str,
    *,
    replication_options: dict[str, Any] | None = None,
    update_db_keyspace: bool | None = None,
    keyspace_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
    **kwargs: Any,
) -> None:
    """
    Create a keyspace in the database.
    Async version of the method, for use in an asyncio context.

    Args:
        name: the keyspace name. If supplying a keyspace that exists
            already, the method call proceeds as usual, no errors are
            raised, and the whole invocation is a no-op.
        replication_options: this dictionary can specify the options about
            replication of the keyspace (across database nodes). If provided,
            it must have a structure similar to:
            `{"class": "SimpleStrategy", "replication_factor": 1}`.
        update_db_keyspace: if True, the `Database` or `AsyncDatabase` class
            that spawned this DatabaseAdmin, if any, gets updated to work on
            the newly-created keyspace starting when this method returns.
        keyspace_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `keyspace_admin_timeout_ms`.
        timeout_ms: an alias for `keyspace_admin_timeout_ms`.

    Note: a timeout event is no guarantee at all that the
    creation request has not reached the API server and is not going
    to be, in fact, honored.

    Example:
        >>> admin_for_my_db.list_keyspaces()
        ['default_keyspace']
        >>> asyncio.run(admin_for_my_db.async_create_keyspace(
        ...     "that_other_one"
        ... ))
        >>> admin_for_my_db.list_leyspaces()
        ['default_keyspace', 'that_other_one']
    """

    _keyspace_admin_timeout_ms, _ka_label = _select_singlereq_timeout_ka(
        timeout_options=self.api_options.timeout_options,
        keyspace_admin_timeout_ms=keyspace_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    options = {
        k: v
        for k, v in {
            "replication": replication_options,
        }.items()
        if v
    }
    payload = {
        "createKeyspace": {
            **{"name": name},
            **({"options": options} if options else {}),
        }
    }
    logger.info("creating keyspace, async")
    cn_response = await self._api_commander.async_request(
        payload=payload,
        timeout_context=_TimeoutContext(
            request_ms=_keyspace_admin_timeout_ms, label=_ka_label
        ),
    )
    if (cn_response.get("status") or {}).get("ok") != 1:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from createKeyspace API command.",
            raw_response=cn_response,
        )
    else:
        logger.info("finished creating keyspace, async")
        if update_db_keyspace:
            self.spawner_database.use_keyspace(name)

async def async_drop_keyspace(self, name: str, *, keyspace_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> None

Drop (delete) a keyspace from the database. Async version of the method, for use in an asyncio context.

Args

name: the keyspace to delete. If it does not exist in this database, an error is raised.
keyspace_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for keyspace_admin_timeout_ms.
timeout_ms: an alias for keyspace_admin_timeout_ms.

Note: a timeout event is no guarantee at all that the deletion request has not reached the API server and is not going to be, in fact, honored.

Example

>>> admin_for_my_db.list_keyspaces()
['that_other_one', 'default_keyspace']
>>> asyncio.run(admin_for_my_db.async_drop_keyspace(
...     "that_other_one"
... ))
>>> admin_for_my_db.list_keyspaces()
['default_keyspace']

Expand source code

async def async_drop_keyspace(
    self,
    name: str,
    *,
    keyspace_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Drop (delete) a keyspace from the database.
    Async version of the method, for use in an asyncio context.

    Args:
        name: the keyspace to delete. If it does not exist in this database,
            an error is raised.
        keyspace_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `keyspace_admin_timeout_ms`.
        timeout_ms: an alias for `keyspace_admin_timeout_ms`.

    Note: a timeout event is no guarantee at all that the
    deletion request has not reached the API server and is not going
    to be, in fact, honored.

    Example:
        >>> admin_for_my_db.list_keyspaces()
        ['that_other_one', 'default_keyspace']
        >>> asyncio.run(admin_for_my_db.async_drop_keyspace(
        ...     "that_other_one"
        ... ))
        >>> admin_for_my_db.list_keyspaces()
        ['default_keyspace']
    """

    _keyspace_admin_timeout_ms, _ka_label = _select_singlereq_timeout_ka(
        timeout_options=self.api_options.timeout_options,
        keyspace_admin_timeout_ms=keyspace_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info("dropping keyspace, async")
    dn_response = await self._api_commander.async_request(
        payload={"dropKeyspace": {"name": name}},
        timeout_context=_TimeoutContext(
            request_ms=_keyspace_admin_timeout_ms, label=_ka_label
        ),
    )
    if (dn_response.get("status") or {}).get("ok") != 1:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from dropKeyspace API command.",
            raw_response=dn_response,
        )
    else:
        logger.info("finished dropping keyspace, async")

Query the API for the full information on available embedding providers. Async version of the method, for use in an asyncio context.

Args

database_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for database_admin_timeout_ms.
timeout_ms: an alias for database_admin_timeout_ms.

Returns

Expand source code

async def async_find_embedding_providers(
    self,
    *,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> FindEmbeddingProvidersResult:
    """
    Query the API for the full information on available embedding providers.
    Async version of the method, for use in an asyncio context.

    Args:
        database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `database_admin_timeout_ms`.
        timeout_ms: an alias for `database_admin_timeout_ms`.

    Returns:
        A `FindEmbeddingProvidersResult` object with the complete information
        returned by the API about available embedding providers

    Example (output abridged and indented for clarity):
        >>> asyncio.run(admin_for_my_db.find_embedding_providers())
        FindEmbeddingProvidersResult(embedding_providers=..., openai, ...)
        >>> asyncio.run(
        ...     admin_for_my_db.find_embedding_providers()
        ... ).embedding_providers
        {
            'openai': EmbeddingProvider(
                display_name='OpenAI',
                models=[
                    EmbeddingProviderModel(name='text-embedding-3-small'),
                    ...
                ]
            ),
            ...
        }
    """

    _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
        timeout_options=self.api_options.timeout_options,
        database_admin_timeout_ms=database_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info("findEmbeddingProviders, async")
    fe_response = await self._api_commander.async_request(
        payload={"findEmbeddingProviders": {}},
        timeout_context=_TimeoutContext(
            request_ms=_database_admin_timeout_ms, label=_da_label
        ),
    )
    if "embeddingProviders" not in fe_response.get("status", {}):
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from findEmbeddingProviders API command.",
            raw_response=fe_response,
        )
    else:
        logger.info("finished findEmbeddingProviders, async")
        return FindEmbeddingProvidersResult._from_dict(fe_response["status"])

Query the API for the full information on available reranking providers. Async version of the method, for use in an asyncio context.

Args

database_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for database_admin_timeout_ms.
timeout_ms: an alias for database_admin_timeout_ms.

Returns

Expand source code

async def async_find_reranking_providers(
    self,
    *,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> FindRerankingProvidersResult:
    """
    Query the API for the full information on available reranking providers.
    Async version of the method, for use in an asyncio context.

    Args:
        database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `database_admin_timeout_ms`.
        timeout_ms: an alias for `database_admin_timeout_ms`.

    Returns:
        A `FindRerankingProvidersResult` object with the complete information
        returned by the API about available reranking providers

    Example (output abridged and indented for clarity):
        >>> asyncio.run(admin_for_my_db.find_reranking_providers())
        FindRerankingProvidersResult(reranking_providers=nvidia)
        >>> asyncio.run(
        ...     admin_for_my_db.find_reranking_providers()
        ... ).reranking_providers
        {
            'nvidia': RerankingProvider(
                <Default>
                display_name='Nvidia',
                models=[
                    RerankingProviderModel(
                        <Default>
                        name='nvidia/llama-3.2-nv-rerankqa-1b-v2'
                    ),
                    ...
                ]
            ),
            ...
        }
    """

    _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
        timeout_options=self.api_options.timeout_options,
        database_admin_timeout_ms=database_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info("findRerankingProviders, async")
    fr_response = await self._api_commander.async_request(
        payload={"findRerankingProviders": {}},
        timeout_context=_TimeoutContext(
            request_ms=_database_admin_timeout_ms, label=_da_label
        ),
    )
    if "rerankingProviders" not in fr_response.get("status", {}):
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from findRerankingProviders API command.",
            raw_response=fr_response,
        )
    else:
        logger.info("finished findRerankingProviders, async")
        return FindRerankingProvidersResult._from_dict(fr_response["status"])

async def async_list_keyspaces(self, *, keyspace_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> list[str]

Query the API for a list of the keyspaces in the database. Async version of the method, for use in an asyncio context.

Args

keyspace_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for keyspace_admin_timeout_ms.
timeout_ms: an alias for keyspace_admin_timeout_ms.

Returns

A list of the keyspaces, each a string, in no particular order.

Example

>>> asyncio.run(admin_for_my_db.async_list_keyspaces())
['default_keyspace', 'staging_keyspace']

Expand source code

async def async_list_keyspaces(
    self,
    *,
    keyspace_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> list[str]:
    """
    Query the API for a list of the keyspaces in the database.
    Async version of the method, for use in an asyncio context.

    Args:
        keyspace_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `keyspace_admin_timeout_ms`.
        timeout_ms: an alias for `keyspace_admin_timeout_ms`.

    Returns:
        A list of the keyspaces, each a string, in no particular order.

    Example:
        >>> asyncio.run(admin_for_my_db.async_list_keyspaces())
        ['default_keyspace', 'staging_keyspace']
    """

    _keyspace_admin_timeout_ms, _ka_label = _select_singlereq_timeout_ka(
        timeout_options=self.api_options.timeout_options,
        keyspace_admin_timeout_ms=keyspace_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info("getting list of keyspaces, async")
    fn_response = await self._api_commander.async_request(
        payload={"findKeyspaces": {}},
        timeout_context=_TimeoutContext(
            request_ms=_keyspace_admin_timeout_ms, label=_ka_label
        ),
    )
    if "keyspaces" not in fn_response.get("status", {}):
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from findKeyspaces API command.",
            raw_response=fn_response,
        )
    else:
        logger.info("finished getting list of keyspaces, async")
        return fn_response["status"]["keyspaces"]  # type: ignore[no-any-return]

def create_keyspace(self, name: str, *, replication_options: dict[str, Any] | None = None, update_db_keyspace: bool | None = None, keyspace_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None, **kwargs: Any) ‑> None

Create a keyspace in the database.

Args

name: the keyspace name. If supplying a keyspace that exists already, the method call proceeds as usual, no errors are raised, and the whole invocation is a no-op.
replication_options: this dictionary can specify the options about replication of the keyspace (across database nodes). If provided, it must have a structure similar to: {"class": "SimpleStrategy", "replication_factor": 1}.
update_db_keyspace: if True, the Database or AsyncDatabase class that spawned this DatabaseAdmin, if any, gets updated to work on the newly-created keyspace starting when this method returns.
keyspace_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for keyspace_admin_timeout_ms.
timeout_ms: an alias for keyspace_admin_timeout_ms.

Note: a timeout event is no guarantee at all that the creation request has not reached the API server and is not going to be, in fact, honored.

Example

>>> admin_for_my_db.list_keyspaces()
['default_keyspace']
>>> admin_for_my_db.create_keyspace("that_other_one")
>>> admin_for_my_db.list_keyspaces()
['default_keyspace', 'that_other_one']

Expand source code

def create_keyspace(
    self,
    name: str,
    *,
    replication_options: dict[str, Any] | None = None,
    update_db_keyspace: bool | None = None,
    keyspace_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
    **kwargs: Any,
) -> None:
    """
    Create a keyspace in the database.

    Args:
        name: the keyspace name. If supplying a keyspace that exists
            already, the method call proceeds as usual, no errors are
            raised, and the whole invocation is a no-op.
        replication_options: this dictionary can specify the options about
            replication of the keyspace (across database nodes). If provided,
            it must have a structure similar to:
            `{"class": "SimpleStrategy", "replication_factor": 1}`.
        update_db_keyspace: if True, the `Database` or `AsyncDatabase` class
            that spawned this DatabaseAdmin, if any, gets updated to work on
            the newly-created keyspace starting when this method returns.
        keyspace_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `keyspace_admin_timeout_ms`.
        timeout_ms: an alias for `keyspace_admin_timeout_ms`.

    Note: a timeout event is no guarantee at all that the
    creation request has not reached the API server and is not going
    to be, in fact, honored.

    Example:
        >>> admin_for_my_db.list_keyspaces()
        ['default_keyspace']
        >>> admin_for_my_db.create_keyspace("that_other_one")
        >>> admin_for_my_db.list_keyspaces()
        ['default_keyspace', 'that_other_one']
    """

    _keyspace_admin_timeout_ms, _ka_label = _select_singlereq_timeout_ka(
        timeout_options=self.api_options.timeout_options,
        keyspace_admin_timeout_ms=keyspace_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    options = {
        k: v
        for k, v in {
            "replication": replication_options,
        }.items()
        if v
    }
    payload = {
        "createKeyspace": {
            **{"name": name},
            **({"options": options} if options else {}),
        }
    }
    logger.info("creating keyspace")
    cn_response = self._api_commander.request(
        payload=payload,
        timeout_context=_TimeoutContext(
            request_ms=_keyspace_admin_timeout_ms, label=_ka_label
        ),
    )
    if (cn_response.get("status") or {}).get("ok") != 1:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from createKeyspace API command.",
            raw_response=cn_response,
        )
    else:
        logger.info("finished creating keyspace")
        if update_db_keyspace:
            self.spawner_database.use_keyspace(name)

def drop_keyspace(self, name: str, *, keyspace_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> None

Drop (delete) a keyspace from the database.

Args

name: the keyspace to delete. If it does not exist in this database, an error is raised.
keyspace_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for keyspace_admin_timeout_ms.
timeout_ms: an alias for keyspace_admin_timeout_ms.

Note: a timeout event is no guarantee at all that the deletion request has not reached the API server and is not going to be, in fact, honored.

Example

>>> admin_for_my_db.list_keyspaces()
['default_keyspace', 'that_other_one']
>>> admin_for_my_db.drop_keyspace("that_other_one")
>>> admin_for_my_db.list_keyspaces()
['default_keyspace']

Expand source code

def drop_keyspace(
    self,
    name: str,
    *,
    keyspace_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Drop (delete) a keyspace from the database.

    Args:
        name: the keyspace to delete. If it does not exist in this database,
            an error is raised.
        keyspace_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `keyspace_admin_timeout_ms`.
        timeout_ms: an alias for `keyspace_admin_timeout_ms`.

    Note: a timeout event is no guarantee at all that the
    deletion request has not reached the API server and is not going
    to be, in fact, honored.

    Example:
        >>> admin_for_my_db.list_keyspaces()
        ['default_keyspace', 'that_other_one']
        >>> admin_for_my_db.drop_keyspace("that_other_one")
        >>> admin_for_my_db.list_keyspaces()
        ['default_keyspace']
    """

    _keyspace_admin_timeout_ms, _ka_label = _select_singlereq_timeout_ka(
        timeout_options=self.api_options.timeout_options,
        keyspace_admin_timeout_ms=keyspace_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info("dropping keyspace")
    dn_response = self._api_commander.request(
        payload={"dropKeyspace": {"name": name}},
        timeout_context=_TimeoutContext(
            request_ms=_keyspace_admin_timeout_ms, label=_ka_label
        ),
    )
    if (dn_response.get("status") or {}).get("ok") != 1:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from dropKeyspace API command.",
            raw_response=dn_response,
        )
    else:
        logger.info("finished dropping keyspace")

def find_embedding_providers(self, *, database_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> FindEmbeddingProvidersResult

Query the API for the full information on available embedding providers.

Args

database_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for database_admin_timeout_ms.
timeout_ms: an alias for database_admin_timeout_ms.

Returns

A FindEmbeddingProvidersResult object with the complete information returned by the API about available embedding providers Example (output abridged and indented for clarity): >>> admin_for_my_db.find_embedding_providers() FindEmbeddingProvidersResult(embedding_providers=…, openai, …) >>> admin_for_my_db.find_embedding_providers().embedding_providers { 'openai': EmbeddingProvider( display_name='OpenAI', models=[ EmbeddingProviderModel(name='text-embedding-3-small'), … ] ), … }

Expand source code

def find_embedding_providers(
    self,
    *,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> FindEmbeddingProvidersResult:
    """
    Query the API for the full information on available embedding providers.

    Args:
        database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `database_admin_timeout_ms`.
        timeout_ms: an alias for `database_admin_timeout_ms`.

    Returns:
        A `FindEmbeddingProvidersResult` object with the complete information
        returned by the API about available embedding providers

    Example (output abridged and indented for clarity):
        >>> admin_for_my_db.find_embedding_providers()
        FindEmbeddingProvidersResult(embedding_providers=..., openai, ...)
        >>> admin_for_my_db.find_embedding_providers().embedding_providers
        {
            'openai': EmbeddingProvider(
                display_name='OpenAI',
                models=[
                    EmbeddingProviderModel(name='text-embedding-3-small'),
                    ...
                ]
            ),
            ...
        }
    """

    _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
        timeout_options=self.api_options.timeout_options,
        database_admin_timeout_ms=database_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info("findEmbeddingProviders")
    fe_response = self._api_commander.request(
        payload={"findEmbeddingProviders": {}},
        timeout_context=_TimeoutContext(
            request_ms=_database_admin_timeout_ms, label=_da_label
        ),
    )
    if "embeddingProviders" not in fe_response.get("status", {}):
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from findEmbeddingProviders API command.",
            raw_response=fe_response,
        )
    else:
        logger.info("finished findEmbeddingProviders")
        return FindEmbeddingProvidersResult._from_dict(fe_response["status"])

def find_reranking_providers(self, *, database_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> FindRerankingProvidersResult

Query the API for the full information on available reranking providers.

Args

database_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for database_admin_timeout_ms.
timeout_ms: an alias for database_admin_timeout_ms.

Returns

A FindRerankingProvidersResult object with the complete information returned by the API about available reranking providers Example (output abridged and indented for clarity): >>> admin_for_my_db.find_reranking_providers() FindRerankingProvidersResult(reranking_providers=nvidia) >>> admin_for_my_db.find_reranking_providers().reranking_providers { 'nvidia': RerankingProvider( display_name='Nvidia', models=[ RerankingProviderModel( name='nvidia/llama-3.2-nv-rerankqa-1b-v2' ), … ] ), … }

Expand source code

def find_reranking_providers(
    self,
    *,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> FindRerankingProvidersResult:
    """
    Query the API for the full information on available reranking providers.

    Args:
        database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `database_admin_timeout_ms`.
        timeout_ms: an alias for `database_admin_timeout_ms`.

    Returns:
        A `FindRerankingProvidersResult` object with the complete information
        returned by the API about available reranking providers

    Example (output abridged and indented for clarity):
        >>> admin_for_my_db.find_reranking_providers()
        FindRerankingProvidersResult(reranking_providers=nvidia)
        >>> admin_for_my_db.find_reranking_providers().reranking_providers
        {
            'nvidia': RerankingProvider(
                <Default>
                display_name='Nvidia',
                models=[
                    RerankingProviderModel(
                        <Default>
                        name='nvidia/llama-3.2-nv-rerankqa-1b-v2'
                    ),
                    ...
                ]
            ),
            ...
        }
    """

    _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
        timeout_options=self.api_options.timeout_options,
        database_admin_timeout_ms=database_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info("findRerankingProviders")
    fr_response = self._api_commander.request(
        payload={"findRerankingProviders": {}},
        timeout_context=_TimeoutContext(
            request_ms=_database_admin_timeout_ms, label=_da_label
        ),
    )
    if "rerankingProviders" not in fr_response.get("status", {}):
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from findRerankingProviders API command.",
            raw_response=fr_response,
        )
    else:
        logger.info("finished findRerankingProviders")
        return FindRerankingProvidersResult._from_dict(fr_response["status"])

def get_async_database(self, *, keyspace: str | None = None, token: str | TokenProvider | UnsetType = (unset), spawn_api_options: APIOptions | UnsetType = (unset)) ‑> AsyncDatabase

Create an AsyncDatabase instance for a specific database, to be used when doing data-level work (such as creating/managing collections).

Args

keyspace: an optional keyspace to set in the resulting AsyncDatabase. If not set, the keyspace remains unspecified and must be set later with the use_keyspace method.
token: if supplied, is passed to the AsyncDatabase instead of the one set for this object. Useful if one wants to work in a least-privilege manner, limiting the permissions for non-admin work. This can be either a literal token string or a subclass of TokenProvider.
spawn_api_options: a specification - complete or partial - of the API Options to override the defaults inherited from the database admin. This allows for a deeper configuration of the database admin, e.g. concerning timeouts; if this is passed together with the named timeout parameters, the latter will take precedence in their respective settings.

Returns

An AsyncDatabase object, ready to work with data, collections and tables.

Note

creating an instance of AsyncDatabase does not trigger actual creation of the database itself, which should exist beforehand.

Expand source code

def get_async_database(
    self,
    *,
    keyspace: str | None = None,
    token: str | TokenProvider | UnsetType = _UNSET,
    spawn_api_options: APIOptions | UnsetType = _UNSET,
) -> AsyncDatabase:
    """
    Create an AsyncDatabase instance for a specific database, to be used
    when doing data-level work (such as creating/managing collections).

    Args:
        keyspace: an optional keyspace to set in the resulting AsyncDatabase.
            If not set, the keyspace remains unspecified and must be set later
            with the `use_keyspace` method.
        token: if supplied, is passed to the AsyncDatabase instead of
            the one set for this object. Useful if one wants to work in
            a least-privilege manner, limiting the permissions for non-admin work.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        spawn_api_options: a specification - complete or partial - of the
            API Options to override the defaults inherited from the database admin.
            This allows for a deeper configuration of the database admin, e.g.
            concerning timeouts; if this is passed together with
            the named timeout parameters, the latter will take precedence
            in their respective settings.

    Returns:
        An AsyncDatabase object, ready to work with data, collections and tables.
    Note:
        creating an instance of AsyncDatabase does not trigger actual creation
        of the database itself, which should exist beforehand.
    """

    return self.get_database(
        token=token,
        keyspace=keyspace,
        spawn_api_options=spawn_api_options,
    ).to_async()

def get_database(self, *, keyspace: str | None = None, token: str | TokenProvider | UnsetType = (unset), spawn_api_options: APIOptions | UnsetType = (unset)) ‑> Database

Create a Database instance for a specific database, to be used when doing data-level work (such as creating/managing collections).

Args

keyspace: an optional keyspace to set in the resulting Database. If not set, the keyspace remains unspecified and must be set later with the use_keyspace method.
token: if supplied, is passed to the Database instead of the one set for this object. Useful if one wants to work in a least-privilege manner, limiting the permissions for non-admin work. This can be either a literal token string or a subclass of TokenProvider.
spawn_api_options: a specification - complete or partial - of the API Options to override the defaults inherited from the database admin. This allows for a deeper configuration of the database admin, e.g. concerning timeouts; if this is passed together with the named timeout parameters, the latter will take precedence in their respective settings.

Returns

A Database object, ready to work with data, collections and tables.

Example

>>> my_db = admin_for_my_db.get_database()
>>> my_db.list_collection_names()
['movies', 'another_collection']

Note

creating an instance of Database does not trigger actual creation of the database itself, which should exist beforehand.

Expand source code

def get_database(
    self,
    *,
    keyspace: str | None = None,
    token: str | TokenProvider | UnsetType = _UNSET,
    spawn_api_options: APIOptions | UnsetType = _UNSET,
) -> Database:
    """
    Create a Database instance for a specific database, to be used
    when doing data-level work (such as creating/managing collections).

    Args:
        keyspace: an optional keyspace to set in the resulting Database.
            If not set, the keyspace remains unspecified and must be set later
            with the `use_keyspace` method.
        token: if supplied, is passed to the Database instead of
            the one set for this object. Useful if one wants to work in
            a least-privilege manner, limiting the permissions for non-admin work.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        spawn_api_options: a specification - complete or partial - of the
            API Options to override the defaults inherited from the database admin.
            This allows for a deeper configuration of the database admin, e.g.
            concerning timeouts; if this is passed together with
            the named timeout parameters, the latter will take precedence
            in their respective settings.

    Returns:
        A Database object, ready to work with data, collections and tables.

    Example:
        >>> my_db = admin_for_my_db.get_database()
        >>> my_db.list_collection_names()
        ['movies', 'another_collection']

    Note:
        creating an instance of Database does not trigger actual creation
        of the database itself, which should exist beforehand.
    """

    # lazy importing here to avoid circular dependency
    from astrapy import Database

    # this multiple-override implements the alias on timeout params
    resulting_api_options = self.api_options.with_override(
        spawn_api_options,
    ).with_override(
        APIOptions(
            token=token,
        ),
    )

    return Database(
        api_endpoint=self.api_endpoint,
        keyspace=keyspace,
        api_options=resulting_api_options,
    )

def list_keyspaces(self, *, keyspace_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> list[str]

Query the API for a list of the keyspaces in the database.

Args

keyspace_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for keyspace_admin_timeout_ms.
timeout_ms: an alias for keyspace_admin_timeout_ms.

Returns

A list of the keyspaces, each a string, in no particular order.

Example

>>> admin_for_my_db.list_keyspaces()
['default_keyspace', 'staging_keyspace']

Expand source code

def list_keyspaces(
    self,
    *,
    keyspace_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> list[str]:
    """
    Query the API for a list of the keyspaces in the database.

    Args:
        keyspace_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `keyspace_admin_timeout_ms`.
        timeout_ms: an alias for `keyspace_admin_timeout_ms`.

    Returns:
        A list of the keyspaces, each a string, in no particular order.

    Example:
        >>> admin_for_my_db.list_keyspaces()
        ['default_keyspace', 'staging_keyspace']
    """

    _keyspace_admin_timeout_ms, _ka_label = _select_singlereq_timeout_ka(
        timeout_options=self.api_options.timeout_options,
        keyspace_admin_timeout_ms=keyspace_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info("getting list of keyspaces")
    fn_response = self._api_commander.request(
        payload={"findKeyspaces": {}},
        timeout_context=_TimeoutContext(
            request_ms=_keyspace_admin_timeout_ms, label=_ka_label
        ),
    )
    if "keyspaces" not in fn_response.get("status", {}):
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from findKeyspaces API command.",
            raw_response=fn_response,
        )
    else:
        logger.info("finished getting list of keyspaces")
        return fn_response["status"]["keyspaces"]  # type: ignore[no-any-return]

def with_options(self, *, token: str | TokenProvider | UnsetType = (unset), api_options: APIOptions | UnsetType = (unset)) ‑> DataAPIDatabaseAdmin

Create a clone of this DataAPIDatabaseAdmin with some changed attributes.

Args

token: an access token with enough permission to perform admin tasks. This can be either a literal token string or a subclass of TokenProvider.
api_options: any additional options to set for the clone, in the form of an APIOptions instance (where one can set just the needed attributes). In case the same setting is also provided as named parameter, the latter takes precedence.

Returns

a new DataAPIDatabaseAdmin instance.

Example

>>> admin_for_my_other_db = admin_for_my_db.with_options(
...     api_endpoint="http://10.1.1.5:8181",
... )

Expand source code

def with_options(
    self,
    *,
    token: str | TokenProvider | UnsetType = _UNSET,
    api_options: APIOptions | UnsetType = _UNSET,
) -> DataAPIDatabaseAdmin:
    """
    Create a clone of this DataAPIDatabaseAdmin with some changed attributes.

    Args:
        token: an access token with enough permission to perform admin tasks.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        api_options: any additional options to set for the clone, in the form of
            an APIOptions instance (where one can set just the needed attributes).
            In case the same setting is also provided as named parameter,
            the latter takes precedence.

    Returns:
        a new DataAPIDatabaseAdmin instance.

    Example:
        >>> admin_for_my_other_db = admin_for_my_db.with_options(
        ...     api_endpoint="http://10.1.1.5:8181",
        ... )
    """

    return self._copy(
        token=token,
        api_options=api_options,
    )

class Database (*, api_endpoint: str, keyspace: str | None, api_options: FullAPIOptions)

A Data API database. This is the object for doing database-level DML, such as creating/deleting collections, and for obtaining Collection objects themselves. This class has a synchronous interface.

This class is not meant for direct instantiation by the user, rather it is obtained by invoking methods such as get_database of AstraDBClient.

On Astra DB, a Database comes with an "API Endpoint", which implies a Database object instance reaches a specific region (relevant point in case of multi-region databases).

A Database is also always set with a "working keyspace" on which all data operations are done (unless otherwise specified).

Args

api_endpoint: the full "API Endpoint" string used to reach the Data API. Example: "https://-.apps.astra.datastax.com"
keyspace: this is the keyspace all method calls will target, unless one is explicitly specified in the call. If no keyspace is supplied when creating a Database, on Astra DB the name "default_keyspace" is set, while on other environments the keyspace is left unspecified: in this case, most operations are unavailable until a keyspace is set (through an explicit use_keyspace invocation or equivalent).
api_options: a complete specification of the API Options for this instance.

Example

>>> from astrapy import DataAPIClient
>>> my_client = astrapy.DataAPIClient()
>>> my_db = my_client.get_database(
...     "https://01234567-....apps.astra.datastax.com",
...     token="AstraCS:...",
... )

Note

creating an instance of Database does not trigger actual creation of the database itself, which should exist beforehand. To create databases, see the AstraDBAdmin class.

Expand source code

class Database:
    """
    A Data API database. This is the object for doing database-level
    DML, such as creating/deleting collections, and for obtaining Collection
    objects themselves. This class has a synchronous interface.

    This class is not meant for direct instantiation by the user, rather
    it is obtained by invoking methods such as `get_database`
    of AstraDBClient.

    On Astra DB, a Database comes with an "API Endpoint", which implies
    a Database object instance reaches a specific region (relevant point in
    case of multi-region databases).

    A Database is also always set with a "working keyspace" on which all
    data operations are done (unless otherwise specified).

    Args:
        api_endpoint: the full "API Endpoint" string used to reach the Data API.
            Example: "https://<database_id>-<region>.apps.astra.datastax.com"
        keyspace: this is the keyspace all method calls will target, unless
            one is explicitly specified in the call. If no keyspace is supplied
            when creating a Database, on Astra DB the name "default_keyspace" is set,
            while on other environments the keyspace is left unspecified: in this case,
            most operations are unavailable until a keyspace is set (through an explicit
            `use_keyspace` invocation or equivalent).
        api_options: a complete specification of the API Options for this instance.

    Example:
        >>> from astrapy import DataAPIClient
        >>> my_client = astrapy.DataAPIClient()
        >>> my_db = my_client.get_database(
        ...     "https://01234567-....apps.astra.datastax.com",
        ...     token="AstraCS:...",
        ... )

    Note:
        creating an instance of Database does not trigger actual creation
        of the database itself, which should exist beforehand. To create databases,
        see the AstraDBAdmin class.
    """

    def __init__(
        self,
        *,
        api_endpoint: str,
        keyspace: str | None,
        api_options: FullAPIOptions,
    ) -> None:
        self.api_options = api_options
        self.api_endpoint = api_endpoint.strip("/")
        # enforce defaults if on Astra DB:
        self._using_keyspace: str | None
        if (
            keyspace is None
            and self.api_options.environment in Environment.astra_db_values
        ):
            self._using_keyspace = DEFAULT_ASTRA_DB_KEYSPACE
        else:
            self._using_keyspace = keyspace

        self._commander_headers = {
            DEFAULT_DATA_API_AUTH_HEADER: self.api_options.token.get_token(),
            **self.api_options.database_additional_headers,
        }
        self._name: str | None = None
        self._api_commander = self._get_api_commander(keyspace=self.keyspace)

    def __getattr__(self, collection_name: str) -> Collection[DefaultDocumentType]:
        return self.get_collection(name=collection_name)

    def __getitem__(self, collection_name: str) -> Collection[DefaultDocumentType]:
        return self.get_collection(name=collection_name)

    def __repr__(self) -> str:
        ep_desc = f'api_endpoint="{self.api_endpoint}"'
        keyspace_desc: str | None
        if self._using_keyspace is None:
            keyspace_desc = "keyspace not set"
        else:
            keyspace_desc = f'keyspace="{self._using_keyspace}"'
        api_options_desc = f"api_options={self.api_options}"
        parts = [
            pt for pt in [ep_desc, keyspace_desc, api_options_desc] if pt is not None
        ]
        return f"{self.__class__.__name__}({', '.join(parts)})"

    def __eq__(self, other: Any) -> bool:
        if isinstance(other, Database):
            return all(
                [
                    self.api_endpoint == other.api_endpoint,
                    self.keyspace == other.keyspace,
                    self.api_options == other.api_options,
                ]
            )
        else:
            return False

    def _get_api_commander(self, keyspace: str | None) -> APICommander | None:
        """
        Instantiate a new APICommander based on the properties of this class
        and a provided keyspace.

        If keyspace is None, return None (signaling a "keyspace not set").
        """

        if keyspace is None:
            return None
        else:
            base_path_components = [
                comp
                for comp in (
                    ncomp.strip("/")
                    for ncomp in (
                        self.api_options.data_api_url_options.api_path,
                        self.api_options.data_api_url_options.api_version,
                        keyspace,
                    )
                    if ncomp is not None
                )
                if comp != ""
            ]
            base_path = f"/{'/'.join(base_path_components)}"
            api_commander = APICommander(
                api_endpoint=self.api_endpoint,
                path=base_path,
                headers=self._commander_headers,
                callers=self.api_options.callers,
                redacted_header_names=self.api_options.redacted_header_names,
            )
            return api_commander

    def _get_driver_commander(self, keyspace: str | None) -> APICommander:
        """
        Building on _get_api_commander, fall back to class keyspace in
        creating/returning a commander, and in any case raise an error if not set.
        """
        driver_commander: APICommander | None
        if keyspace:
            driver_commander = self._get_api_commander(keyspace=keyspace)
        else:
            driver_commander = self._api_commander
        if driver_commander is None:
            raise ValueError(
                "No keyspace specified. This operation requires a keyspace to "
                "be set, e.g. through the `use_keyspace` method."
            )
        return driver_commander

    def _copy(
        self,
        *,
        keyspace: str | None = None,
        token: str | TokenProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> Database:
        arg_api_options = APIOptions(
            token=token,
        )
        final_api_options = self.api_options.with_override(api_options).with_override(
            arg_api_options
        )
        return Database(
            api_endpoint=self.api_endpoint,
            keyspace=keyspace or self.keyspace,
            api_options=final_api_options,
        )

    def with_options(
        self,
        *,
        keyspace: str | None = None,
        token: str | TokenProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> Database:
        """
        Create a clone of this database with some changed attributes.

        Args:
            keyspace: this is the keyspace all method calls will target, unless
                one is explicitly specified in the call. If no keyspace is supplied
                when creating a Database, the name "default_keyspace" is set.
            token: an Access Token to the database. Example: `"AstraCS:xyz..."`.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            api_options: any additional options to set for the clone, in the form of
                an APIOptions instance (where one can set just the needed attributes).
                In case the same setting is also provided as named parameter,
                the latter takes precedence.

        Returns:
            a new `Database` instance.

        Example:
            >>> my_db_2 = my_db.with_options(
            ...     keyspace="the_other_keyspace",
            ...     token="AstraCS:xyz...",
            ... )
        """

        return self._copy(
            keyspace=keyspace,
            token=token,
            api_options=api_options,
        )

    def to_async(
        self,
        *,
        keyspace: str | None = None,
        token: str | TokenProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncDatabase:
        """
        Create an AsyncDatabase from this one. Save for the arguments
        explicitly provided as overrides, everything else is kept identical
        to this database in the copy.

        Args:
            keyspace: this is the keyspace all method calls will target, unless
                one is explicitly specified in the call. If no keyspace is supplied
                when creating a Database, the name "default_keyspace" is set.
            token: an Access Token to the database. Example: "AstraCS:xyz..."
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            api_options: any additional options to set for the result, in the form of
                an APIOptions instance (where one can set just the needed attributes).
                In case the same setting is also provided as named parameter,
                the latter takes precedence.

        Returns:
            the new copy, an `AsyncDatabase` instance.

        Example:
            >>> async_database = my_db.to_async()
            >>> asyncio.run(async_database.list_collection_names())
        """

        arg_api_options = APIOptions(
            token=token,
        )
        final_api_options = self.api_options.with_override(api_options).with_override(
            arg_api_options
        )
        return AsyncDatabase(
            api_endpoint=self.api_endpoint,
            keyspace=keyspace or self.keyspace,
            api_options=final_api_options,
        )

    def use_keyspace(self, keyspace: str) -> None:
        """
        Switch to a new working keyspace for this database.
        This method changes (mutates) the Database instance.

        Note that this method does not create the keyspace, which should exist
        already (created for instance with a `DatabaseAdmin.create_keyspace` call).

        Args:
            keyspace: the new keyspace to use as the database working keyspace.

        Returns:
            None.

        Example:
            >>> my_db.list_collection_names()
            ['coll_1', 'coll_2']
            >>> my_db.use_keyspace("an_empty_keyspace")
            >>> my_db.list_collection_names()
            []
        """
        logger.info(f"switching to keyspace '{keyspace}'")
        self._using_keyspace = keyspace
        self._api_commander = self._get_api_commander(keyspace=self.keyspace)

    def info(
        self,
        *,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> AstraDBDatabaseInfo:
        """
        Additional information on the database as an AstraDBDatabaseInfo instance.

        Some of the returned properties are dynamic throughout the lifetime
        of the database (such as raw_info["keyspaces"]). For this reason,
        each invocation of this method triggers a new request to the DevOps API.

        Not available outside of Astra DB and when using custom domains.

        Args:
            database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `database_admin_timeout_ms`.
            timeout_ms: an alias for `database_admin_timeout_ms`.

        Example:
            >>> my_db.info().region
            'eu-west-1'

            >>> my_db.info().raw_info['datacenters'][0]['dateCreated']
            '2023-01-30T12:34:56Z'

        Note:
            see the AstraDBDatabaseInfo documentation for a caveat about the difference
            between the `region` and the `raw["region"]` attributes.
        """

        if self.api_options.environment not in Environment.astra_db_values:
            raise InvalidEnvironmentException(
                "Environments outside of Astra DB are not supported."
            )
        elif parse_api_endpoint(self.api_endpoint) is None:
            raise InvalidEnvironmentException(
                "Cannot inspect a nonstandard API endpoint for properties."
            )

        _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
            timeout_options=self.api_options.timeout_options,
            database_admin_timeout_ms=database_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info("getting database info")
        database_info = fetch_database_info(
            self.api_endpoint,
            keyspace=self.keyspace,
            request_timeout_ms=_database_admin_timeout_ms,
            api_options=self.api_options,
        )
        if database_info is not None:
            logger.info("finished getting database info")
            return database_info
        else:
            raise DevOpsAPIException("Failure while fetching database info.")

    @property
    def id(self) -> str:
        """
        The ID of this database.
        Not available outside of Astra DB and when using custom domains.

        Example:
            >>> my_db.id
            '01234567-89ab-cdef-0123-456789abcdef'
        """

        if self.api_options.environment in Environment.astra_db_values:
            parsed_api_endpoint = parse_api_endpoint(self.api_endpoint)
            if parsed_api_endpoint is not None:
                return parsed_api_endpoint.database_id
            else:
                raise InvalidEnvironmentException(
                    "Cannot inspect a nonstandard API endpoint for properties."
                )
        else:
            raise InvalidEnvironmentException(
                "Database is not in a supported environment for this operation."
            )

    @property
    def region(self) -> str:
        """
        The region where this database is located.

        The region is still well defined in case of multi-region databases,
        since a Database instance connects to exactly one of the regions
        (as specified by the API Endpoint).

        Not available outside of Astra DB and when using custom domains.

        Example:
            >>> my_db.region
            'us-west-2'
        """

        if self.api_options.environment in Environment.astra_db_values:
            parsed_api_endpoint = parse_api_endpoint(self.api_endpoint)
            if parsed_api_endpoint is not None:
                return parsed_api_endpoint.region
            else:
                raise InvalidEnvironmentException(
                    "Cannot inspect a nonstandard API endpoint for properties."
                )
        else:
            raise InvalidEnvironmentException(
                "Database is not in a supported environment for this operation."
            )

    def name(self) -> str:
        """
        The name of this database. Note that this bears no unicity guarantees.

        Calling this method the first time involves a request
        to the DevOps API (the resulting database name is then cached).
        See the `info()` method for more details.

        Example:
            >>> my_db.name()
            'the_application_database'
        """

        if self._name is None:
            self._name = self.info().name
        return self._name

    @property
    def keyspace(self) -> str | None:
        """
        The keyspace this database uses as target for all commands when
        no method-call-specific keyspace is specified.

        Returns:
            the working keyspace (a string), or None if not set.

        Example:
            >>> my_db.keyspace
            'the_keyspace'
        """

        return self._using_keyspace

    @overload
    def get_collection(
        self,
        name: str,
        *,
        keyspace: str | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> Collection[DefaultDocumentType]: ...

    @overload
    def get_collection(
        self,
        name: str,
        *,
        document_type: type[DOC],
        keyspace: str | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> Collection[DOC]: ...

    def get_collection(
        self,
        name: str,
        *,
        document_type: type[Any] = DefaultDocumentType,
        keyspace: str | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> Collection[DOC]:
        """
        Spawn a `Collection` object instance representing a collection
        on this database.

        Creating a `Collection` instance does not have any effect on the
        actual state of the database: in other words, for the created
        `Collection` instance to be used meaningfully, the collection
        must exist already (for instance, it should have been created
        previously by calling the `create_collection` method).

        Args:
            name: the name of the collection.
            document_type: this parameter acts a formal specifier for the type checker.
                If omitted, the resulting Collection is implicitly
                a `Collection[dict[str, Any]]`. If provided, it must match the
                type hint specified in the assignment.
                See the examples below.
            keyspace: the keyspace containing the collection. If no keyspace
                is specified, the general setting for this database is used.
            embedding_api_key: optional API key(s) for interacting with the collection.
                If an embedding service is configured, and this parameter is not None,
                each Data API call will include the necessary embedding-related headers
                as specified by this parameter. If a string is passed, it translates
                into the one "embedding api key" header
                (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
                For some vectorize providers/models, if using header-based
                authentication, specialized subclasses of
                `astrapy.authentication.EmbeddingHeadersProvider` should be supplied.
            reranking_api_key: optional API key(s) for interacting with the collection.
                If a reranker is configured for the collection, and this parameter
                is not None, Data API calls will include the appropriate
                reranker-related headers according to this parameter. Reranker services
                may not necessarily require this setting (e.g. if the service needs no
                authentication, or one is configured as part of the collection
                definition relying on a "shared secret").
                If a string is passed, it is translated into an instance of
                `astrapy.authentication.RerankingAPIKeyHeaderProvider`.
            spawn_api_options: a specification - complete or partial - of the
                API Options to override the defaults inherited from the Database.
                This allows for a deeper configuration of the collection, e.g.
                concerning timeouts; if this is passed together with
                the named timeout parameters, the latter will take precedence
                in their respective settings.

        Returns:
            a `Collection` instance, representing the desired collection
                (but without any form of validation).

        Example:
            >>> my_col = my_db.get_collection("my_collection")
            >>> my_col.count_documents({}, upper_bound=100)
            41

        Note:
            The attribute and indexing syntax forms achieve the same effect
            as this method. In other words, the following are equivalent:
                my_db.get_collection("coll_name")
                my_db.coll_name
                my_db["coll_name"]
        """

        # lazy importing here against circular-import error
        from astrapy.collection import Collection

        resulting_api_options = self.api_options.with_override(
            spawn_api_options,
        ).with_override(
            APIOptions(
                embedding_api_key=embedding_api_key,
                reranking_api_key=reranking_api_key,
            ),
        )

        _keyspace = keyspace or self.keyspace
        if _keyspace is None:
            raise ValueError(
                "No keyspace specified. This operation requires a keyspace to "
                "be set, e.g. through the `use_keyspace` method."
            )
        return Collection(
            database=self,
            name=name,
            keyspace=_keyspace,
            api_options=resulting_api_options,
        )

    @overload
    def create_collection(
        self,
        name: str,
        *,
        definition: CollectionDefinition | dict[str, Any] | None = None,
        keyspace: str | None = None,
        collection_admin_timeout_ms: int | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> Collection[DefaultDocumentType]: ...

    @overload
    def create_collection(
        self,
        name: str,
        *,
        definition: CollectionDefinition | dict[str, Any] | None = None,
        document_type: type[DOC],
        keyspace: str | None = None,
        collection_admin_timeout_ms: int | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> Collection[DOC]: ...

    def create_collection(
        self,
        name: str,
        *,
        definition: CollectionDefinition | dict[str, Any] | None = None,
        document_type: type[Any] = DefaultDocumentType,
        keyspace: str | None = None,
        collection_admin_timeout_ms: int | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> Collection[DOC]:
        """
        Creates a collection on the database and return the Collection
        instance that represents it.

        This is a blocking operation: the method returns when the collection
        is ready to be used. As opposed to the `get_collection` instance,
        this method triggers causes the collection to be actually created on DB.

        Args:
            name: the name of the collection.
            definition: a complete collection definition for the table. This can be an
                instance of `CollectionDefinition` or an equivalent (nested) dictionary,
                in which case it will be parsed into a `CollectionDefinition`.
                See the `astrapy.info.CollectionDefinition` class and the
                `Collection` class for more details and ways to construct this object.
            document_type: this parameter acts a formal specifier for the type checker.
                If omitted, the resulting Collection is implicitly
                a `Collection[dict[str, Any]]`. If provided, it must match the
                type hint specified in the assignment.
                See the examples below.
            keyspace: the keyspace where the collection is to be created.
                If not specified, the general setting for this database is used.
            collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
            embedding_api_key: optional API key(s) for interacting with the collection.
                If an embedding service is configured, and this parameter is not None,
                each Data API call will include the necessary embedding-related headers
                as specified by this parameter. If a string is passed, it translates
                into the one "embedding api key" header
                (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
                For some vectorize providers/models, if using header-based authentication,
                specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
                should be supplied.
            reranking_api_key: optional API key(s) for interacting with the collection.
                If a reranker is configured for the collection, and this parameter
                is not None, Data API calls will include the appropriate
                reranker-related headers according to this parameter. Reranker services
                may not necessarily require this setting (e.g. if the service needs no
                authentication, or one is configured as part of the collection
                definition relying on a "shared secret").
                If a string is passed, it is translated into an instance of
                `astrapy.authentication.RerankingAPIKeyHeaderProvider`.
            spawn_api_options: a specification - complete or partial - of the
                API Options to override the defaults inherited from the Database.
                This allows for a deeper configuration of the collection, e.g.
                concerning timeouts; if this is passed together with
                the named timeout parameters, the latter will take precedence
                in their respective settings.

        Returns:
            a (synchronous) `Collection` instance, representing the
            newly-created collection.

        Example:
            >>> # Create a collection using the fluent syntax for its definition
            >>> from astrapy.constants import VectorMetric
            >>> from astrapy.info import CollectionDefinition
            >>>
            >>> collection_definition = (
            ...     CollectionDefinition.builder()
            ...     .set_vector_dimension(3)
            ...     .set_vector_metric(VectorMetric.DOT_PRODUCT)
            ...     .set_indexing("deny", ["annotations", "logs"])
            ...     .build()
            ... )
            >>> my_collection = database.create_collection(
            ...     "my_events",
            ...     definition=collection_definition,
            ... )

            >>>
            >>> # Create a collection with the definition as object
            >>> from astrapy.info import CollectionVectorOptions
            >>>
            >>> collection_definition_1 = CollectionDefinition(
            ...     vector=CollectionVectorOptions(
            ...         dimension=3,
            ...         metric=VectorMetric.DOT_PRODUCT,
            ...     ),
            ...     indexing={"deny": ["annotations", "logs"]},
            ... )
            >>> my_collection_1 = database.create_collection(
            ...     "my_events",
            ...     definition=collection_definition_1,
            ... )
            >>>

            >>> # Create a collection with the definition as plain dictionary
            >>> collection_definition_2 = {
            ...     "indexing": {"deny": ["annotations", "logs"]},
            ...     "vector": {
            ...         "dimension": 3,
            ...         "metric": VectorMetric.DOT_PRODUCT,
            ...     },
            ... }
            >>> my_collection_2 = database.create_collection(
            ...     "my_events",
            ...     definition=collection_definition_2,
            ... )
        """

        cc_definition: dict[str, Any] = CollectionDefinition.coerce(
            definition or {}
        ).as_dict()
        # this method has custom code to pick its timeout
        _collection_admin_timeout_ms: int
        _ca_label: str
        if collection_admin_timeout_ms is not None:
            _collection_admin_timeout_ms = collection_admin_timeout_ms
            _ca_label = "collection_admin_timeout_ms"
        else:
            _collection_admin_timeout_ms = (
                self.api_options.timeout_options.collection_admin_timeout_ms
            )
            _ca_label = "collection_admin_timeout_ms"
        driver_commander = self._get_driver_commander(keyspace=keyspace)
        cc_payload = {
            "createCollection": {
                k: v
                for k, v in {
                    "name": name,
                    "options": cc_definition,
                }.items()
                if v is not None
                if v != {}
            }
        }
        logger.info(f"createCollection('{name}')")
        cc_response = driver_commander.request(
            payload=cc_payload,
            timeout_context=_TimeoutContext(
                request_ms=_collection_admin_timeout_ms, label=_ca_label
            ),
        )
        if cc_response.get("status") != {"ok": 1}:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from createCollection API command.",
                raw_response=cc_response,
            )
        logger.info(f"finished createCollection('{name}')")
        return self.get_collection(
            name,
            document_type=document_type,
            keyspace=keyspace,
            embedding_api_key=embedding_api_key,
            reranking_api_key=reranking_api_key,
            spawn_api_options=spawn_api_options,
        )

    def drop_collection(
        self,
        name: str,
        *,
        keyspace: str | None = None,
        collection_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Drop a collection from the database, along with all documents therein.

        Args:
            name: the name of the collection to drop.
            keyspace: the keyspace where the collection resides. If not specified,
                the database working keyspace is assumed.
            collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `collection_admin_timeout_ms`.
            timeout_ms: an alias for `collection_admin_timeout_ms`.

        Example:
            >>> my_db.list_collection_names()
            ['a_collection', 'my_v_col', 'another_col']
            >>> my_db.drop_collection("my_v_col")
            >>> my_db.list_collection_names()
            ['a_collection', 'another_col']
        """

        _collection_admin_timeout_ms, _ca_label = _select_singlereq_timeout_ca(
            timeout_options=self.api_options.timeout_options,
            collection_admin_timeout_ms=collection_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        _keyspace = keyspace or self.keyspace
        driver_commander = self._get_driver_commander(keyspace=_keyspace)
        dc_payload = {"deleteCollection": {"name": name}}
        logger.info(f"deleteCollection('{name}')")
        dc_response = driver_commander.request(
            payload=dc_payload,
            timeout_context=_TimeoutContext(
                request_ms=_collection_admin_timeout_ms, label=_ca_label
            ),
        )
        if dc_response.get("status") != {"ok": 1}:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from deleteCollection API command.",
                raw_response=dc_response,
            )
        logger.info(f"finished deleteCollection('{name}')")
        return dc_response.get("status", {})  # type: ignore[no-any-return]

    def list_collections(
        self,
        *,
        keyspace: str | None = None,
        collection_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> list[CollectionDescriptor]:
        """
        List all collections in a given keyspace for this database.

        Args:
            keyspace: the keyspace to be inspected. If not specified,
                the general setting for this database is assumed.
            collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `collection_admin_timeout_ms`.
            timeout_ms: an alias for `collection_admin_timeout_ms`.

        Returns:
            a list of CollectionDescriptor instances one for each collection.

        Example:
            >>> coll_list = my_db.list_collections()
            >>> coll_list
            [CollectionDescriptor(name='my_v_col', options=CollectionDefinition())]
            >>> for coll_dict in my_db.list_collections():
            ...     print(coll_dict)
            ...
            CollectionDescriptor(name='my_v_col', options=CollectionDefinition())
        """

        _collection_admin_timeout_ms, _ca_label = _select_singlereq_timeout_ca(
            timeout_options=self.api_options.timeout_options,
            collection_admin_timeout_ms=collection_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        return self._list_collections_ctx(
            keyspace=keyspace,
            timeout_context=_TimeoutContext(
                request_ms=_collection_admin_timeout_ms, label=_ca_label
            ),
        )

    def _list_collections_ctx(
        self,
        *,
        keyspace: str | None,
        timeout_context: _TimeoutContext,
    ) -> list[CollectionDescriptor]:
        driver_commander = self._get_driver_commander(keyspace=keyspace)
        gc_payload = {"findCollections": {"options": {"explain": True}}}
        logger.info("findCollections")
        gc_response = driver_commander.request(
            payload=gc_payload,
            timeout_context=timeout_context,
        )
        if "collections" not in gc_response.get("status", {}):
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from findCollections API command.",
                raw_response=gc_response,
            )
        else:
            # we know this is a list of dicts, to marshal into "descriptors"
            logger.info("finished findCollections")
            return [
                CollectionDescriptor._from_dict(col_dict)
                for col_dict in gc_response["status"]["collections"]
            ]

    def list_collection_names(
        self,
        *,
        keyspace: str | None = None,
        collection_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> list[str]:
        """
        List the names of all collections in a given keyspace of this database.

        Args:
            keyspace: the keyspace to be inspected. If not specified,
                the general setting for this database is assumed.
            collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `collection_admin_timeout_ms`.
            timeout_ms: an alias for `collection_admin_timeout_ms`.

        Returns:
            a list of the collection names as strings, in no particular order.

        Example:
            >>> my_db.list_collection_names()
            ['a_collection', 'another_col']
        """

        _collection_admin_timeout_ms, _ca_label = _select_singlereq_timeout_ca(
            timeout_options=self.api_options.timeout_options,
            collection_admin_timeout_ms=collection_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        driver_commander = self._get_driver_commander(keyspace=keyspace)
        gc_payload: dict[str, Any] = {"findCollections": {}}
        logger.info("findCollections")
        gc_response = driver_commander.request(
            payload=gc_payload,
            timeout_context=_TimeoutContext(
                request_ms=_collection_admin_timeout_ms, label=_ca_label
            ),
        )
        if "collections" not in gc_response.get("status", {}):
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from findCollections API command.",
                raw_response=gc_response,
            )
        else:
            logger.info("finished findCollections")
            return gc_response["status"]["collections"]  # type: ignore[no-any-return]

    @overload
    def get_table(
        self,
        name: str,
        *,
        keyspace: str | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> Table[DefaultRowType]: ...

    @overload
    def get_table(
        self,
        name: str,
        *,
        row_type: type[ROW],
        keyspace: str | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> Table[ROW]: ...

    def get_table(
        self,
        name: str,
        *,
        row_type: type[Any] = DefaultRowType,
        keyspace: str | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> Table[ROW]:
        """
        Spawn a `Table` object instance representing a table
        on this database.

        Creating a `Table` instance does not have any effect on the
        actual state of the database: in other words, for the created
        `Table` instance to be used meaningfully, the table
        must exist already (for instance, it should have been created
        previously by calling the `create_table` method).

        Args:
            name: the name of the table.
            row_type: this parameter acts a formal specifier for the type checker.
                If omitted, the resulting Table is implicitly a `Table[dict[str, Any]]`.
                If provided, it must match the type hint specified in the assignment.
                See the examples below.
            keyspace: the keyspace containing the table. If no keyspace
                is specified, the general setting for this database is used.
            embedding_api_key: optional API key(s) for interacting with the table.
                If an embedding service is configured, and this parameter is not None,
                each Data API call will include the necessary embedding-related headers
                as specified by this parameter. If a string is passed, it translates
                into the one "embedding api key" header
                (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
                For some vectorize providers/models, if using header-based
                authentication, specialized subclasses of
                `astrapy.authentication.EmbeddingHeadersProvider` should be supplied.
            reranking_api_key: optional API key(s) for interacting with the table.
                If a reranker is configured for the table, and this parameter
                is not None, Data API calls will include the appropriate
                reranker-related headers according to this parameter. Reranker services
                may not necessarily require this setting (e.g. if the service needs no
                authentication, or one is configured as part of the table
                definition relying on a "shared secret").
                If a string is passed, it is translated into an instance of
                `astrapy.authentication.RerankingAPIKeyHeaderProvider`.
            spawn_api_options: a specification - complete or partial - of the
                API Options to override the defaults inherited from the Database.
                This allows for a deeper configuration of the table, e.g.
                concerning timeouts; if this is passed together with
                the named timeout parameters, the latter will take precedence
                in their respective settings.

        Returns:
            a `Table` instance, representing the desired table
                (but without any form of validation).

        Example:
            >>> # Get a Table object (and read a property of it as an example):
            >>> my_table = database.get_table("games")
            >>> my_table.full_name
            'default_keyspace.games'
            >>>
            >>> # Get a Table object in a specific keyspace,
            >>> # and set an embedding API key to it:
            >>> my_other_table = database.get_table(
            ...     "tournaments",
            ...     keyspace="the_other_keyspace",
            ...     embedding_api_key="secret-012abc...",
            ... )
            >>>
            >>> from astrapy import Table
            >>> MyCustomDictType = dict[str, int]
            >>>
            >>> # Get a Table object typed with a specific type for its rows:
            >>> my_typed_table: Table[MyCustomDictType] = database.get_table(
            ...     "games",
            ...     row_type=MyCustomDictType,
            ... )
        """

        # lazy importing here against circular-import error
        from astrapy.table import Table

        resulting_api_options = self.api_options.with_override(
            spawn_api_options,
        ).with_override(
            APIOptions(
                embedding_api_key=embedding_api_key,
                reranking_api_key=reranking_api_key,
            ),
        )

        _keyspace = keyspace or self.keyspace
        if _keyspace is None:
            raise ValueError(
                "No keyspace specified. This operation requires a keyspace to "
                "be set, e.g. through the `use_keyspace` method."
            )
        return Table[ROW](
            database=self,
            name=name,
            keyspace=_keyspace,
            api_options=resulting_api_options,
        )

    @overload
    def create_table(
        self,
        name: str,
        *,
        definition: CreateTableDefinition | dict[str, Any],
        keyspace: str | None = None,
        if_not_exists: bool | None = None,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> Table[DefaultRowType]: ...

    @overload
    def create_table(
        self,
        name: str,
        *,
        definition: CreateTableDefinition | dict[str, Any],
        row_type: type[ROW],
        keyspace: str | None = None,
        if_not_exists: bool | None = None,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> Table[ROW]: ...

    def create_table(
        self,
        name: str,
        *,
        definition: CreateTableDefinition | dict[str, Any],
        row_type: type[Any] = DefaultRowType,
        keyspace: str | None = None,
        if_not_exists: bool | None = None,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> Table[ROW]:
        """
        Creates a table on the database and return the Table
        instance that represents it.

        This is a blocking operation: the method returns when the table
        is ready to be used. As opposed to the `get_table` method call,
        this method causes the table to be actually created on DB.

        Args:
            name: the name of the table.
            definition: a complete table definition for the table. This can be an
                instance of `CreateTableDefinition` or an equivalent (nested) dictionary,
                in which case it will be parsed into a `CreateTableDefinition`.
                See the `astrapy.info.CreateTableDefinition` class and the
                `Table` class for more details and ways to construct this object.
            row_type: this parameter acts a formal specifier for the type checker.
                If omitted, the resulting Table is implicitly a `Table[dict[str, Any]]`.
                If provided, it must match the type hint specified in the assignment.
                See the examples below.
            keyspace: the keyspace where the table is to be created.
                If not specified, the general setting for this database is used.
            if_not_exists: if set to True, the command will succeed even if a table
                with the specified name already exists (in which case no actual
                table creation takes place on the database). Defaults to False,
                i.e. an error is raised by the API in case of table-name collision.
            table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `table_admin_timeout_ms`.
            timeout_ms: an alias for `table_admin_timeout_ms`.
            embedding_api_key: optional API key(s) for interacting with the table.
                If an embedding service is configured, and this parameter is not None,
                each Data API call will include the necessary embedding-related headers
                as specified by this parameter. If a string is passed, it translates
                into the one "embedding api key" header
                (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
                For some vectorize providers/models, if using header-based authentication,
                specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
                should be supplied.
            reranking_api_key: optional API key(s) for interacting with the table.
                If a reranker is configured for the table, and this parameter
                is not None, Data API calls will include the appropriate
                reranker-related headers according to this parameter. Reranker services
                may not necessarily require this setting (e.g. if the service needs no
                authentication, or one is configured as part of the table
                definition relying on a "shared secret").
                If a string is passed, it is translated into an instance of
                `astrapy.authentication.RerankingAPIKeyHeaderProvider`.
            spawn_api_options: a specification - complete or partial - of the
                API Options to override the defaults inherited from the Database.
                This allows for a deeper configuration of the table, e.g.
                concerning timeouts; if this is passed together with
                the named timeout parameters, the latter will take precedence
                in their respective settings.

        Returns:
            a (synchronous) `Table` instance, representing the
            newly-created table.

        Example:
            >>> # Create a table using the fluent syntax for definition
            >>> from astrapy.constants import SortMode
            >>> from astrapy.info import (
            ...     CreateTableDefinition,
            ...     ColumnType,
            ... )
            >>> table_definition = (
            ...     CreateTableDefinition.builder()
            ...     .add_column("match_id", ColumnType.TEXT)
            ...     .add_column("round", ColumnType.INT)
            ...     .add_vector_column("m_vector", dimension=3)
            ...     .add_column("score", ColumnType.INT)
            ...     .add_column("when", ColumnType.TIMESTAMP)
            ...     .add_column("winner", ColumnType.TEXT)
            ...     .add_set_column("fighters", ColumnType.UUID)
            ...     .add_partition_by(["match_id"])
            ...     .add_partition_sort({"round": SortMode.ASCENDING})
            ...     .build()
            ... )
            >>> my_table = database.create_table(
            ...     "games",
            ...     definition=table_definition,
            ... )
            >>>
            >>> # Create a table with the definition as object
            >>> # (and do not raise an error if the table exists already)
            >>> from astrapy.info import (
            ...     CreateTableDefinition,
            ...     TablePrimaryKeyDescriptor,
            ...     TableScalarColumnTypeDescriptor,
            ...     TableValuedColumnType,
            ...     TableValuedColumnTypeDescriptor,
            ...     TableVectorColumnTypeDescriptor,
            ... )
            >>> table_definition_1 = CreateTableDefinition(
            ...     columns={
            ...         "match_id": TableScalarColumnTypeDescriptor(
            ...             ColumnType.TEXT,
            ...         ),
            ...         "round": TableScalarColumnTypeDescriptor(
            ...             ColumnType.INT,
            ...         ),
            ...         "m_vector": TableVectorColumnTypeDescriptor(
            ...             column_type="vector", dimension=3
            ...         ),
            ...         "score": TableScalarColumnTypeDescriptor(
            ...             ColumnType.INT,
            ...         ),
            ...         "when": TableScalarColumnTypeDescriptor(
            ...             ColumnType.TIMESTAMP,
            ...         ),
            ...         "winner": TableScalarColumnTypeDescriptor(
            ...             ColumnType.TEXT,
            ...         ),
            ...         "fighters": TableValuedColumnTypeDescriptor(
            ...             column_type=TableValuedColumnType.SET,
            ...             value_type=ColumnType.UUID,
            ...         ),
            ...     },
            ...     primary_key=TablePrimaryKeyDescriptor(
            ...         partition_by=["match_id"],
            ...         partition_sort={"round": SortMode.ASCENDING},
            ...     ),
            ... )
            >>> my_table_1 = database.create_table(
            ...     "games",
            ...     definition=table_definition_1,
            ...     if_not_exists=True,
            ... )
            >>>
            >>> # Create a table with the definition as plain dictionary
            >>> # (and do not raise an error if the table exists already)
            >>> table_definition_2 = {
            ...     "columns": {
            ...         "match_id": {"type": "text"},
            ...         "round": {"type": "int"},
            ...         "m_vector": {"type": "vector", "dimension": 3},
            ...         "score": {"type": "int"},
            ...         "when": {"type": "timestamp"},
            ...         "winner": {"type": "text"},
            ...         "fighters": {"type": "set", "valueType": "uuid"},
            ...     },
            ...     "primaryKey": {
            ...         "partitionBy": ["match_id"],
            ...         "partitionSort": {"round": 1},
            ...     },
            ... }
            >>> my_table_2 = database.create_table(
            ...     "games",
            ...     definition=table_definition_2,
            ...     if_not_exists=True,
            ... )
        """

        ct_options: dict[str, bool]
        if if_not_exists is not None:
            ct_options = {"ifNotExists": if_not_exists}
        else:
            ct_options = {}
        ct_definition: dict[str, Any] = CreateTableDefinition.coerce(
            definition
        ).as_dict()
        _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
            timeout_options=self.api_options.timeout_options,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        driver_commander = self._get_driver_commander(keyspace=keyspace)
        ct_payload = {
            "createTable": {
                k: v
                for k, v in {
                    "name": name,
                    "definition": ct_definition,
                    "options": ct_options,
                }.items()
                if v is not None
                if v != {}
            }
        }
        logger.info(f"createTable('{name}')")
        ct_response = driver_commander.request(
            payload=ct_payload,
            timeout_context=_TimeoutContext(
                request_ms=_table_admin_timeout_ms, label=_ta_label
            ),
        )
        if ct_response.get("status") != {"ok": 1}:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from createTable API command.",
                raw_response=ct_response,
            )
        logger.info(f"finished createTable('{name}')")
        return self.get_table(
            name,
            row_type=row_type,
            keyspace=keyspace,
            embedding_api_key=embedding_api_key,
            reranking_api_key=reranking_api_key,
            spawn_api_options=spawn_api_options,
        )

    def drop_table_index(
        self,
        name: str,
        *,
        keyspace: str | None = None,
        if_exists: bool | None = None,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Drops (deletes) an index (of any kind) from the table it is associated to.

        This is a blocking operation: the method returns once the index
        is deleted.

        Note:
            Although associated to a table, index names are unique across a keyspace.
            For this reason, no table name is required in this call.

        Args:
            name: the name of the index.
            keyspace: the keyspace to which the index belongs.
                If not specified, the general setting for this database is used.
            if_exists: if passed as True, trying to drop a non-existing index
                will not error, just silently do nothing instead. If not provided,
                the API default behaviour will hold.
            table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `table_admin_timeout_ms`.
            timeout_ms: an alias for `table_admin_timeout_ms`.

        Example:
            >>> # Drop an index from the keyspace:
            >>> database.drop_table_index("score_index")
            >>> # Drop an index, unless it does not exist already:
            >>> database.drop_table_index("score_index", if_exists=True)
        """

        _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
            timeout_options=self.api_options.timeout_options,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        di_options: dict[str, bool]
        if if_exists is not None:
            di_options = {"ifExists": if_exists}
        else:
            di_options = {}
        di_payload = {
            "dropIndex": {
                k: v
                for k, v in {
                    "name": name,
                    "options": di_options,
                }.items()
                if v is not None
                if v != {}
            }
        }
        driver_commander = self._get_driver_commander(keyspace=keyspace)
        logger.info(f"dropIndex('{name}')")
        di_response = driver_commander.request(
            payload=di_payload,
            timeout_context=_TimeoutContext(
                request_ms=_table_admin_timeout_ms, label=_ta_label
            ),
        )
        if di_response.get("status") != {"ok": 1}:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from dropIndex API command.",
                raw_response=di_response,
            )
        logger.info(f"finished dropIndex('{name}')")

    def drop_table(
        self,
        name: str,
        *,
        keyspace: str | None = None,
        if_exists: bool | None = None,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Drop a table from the database, along with all rows therein and related indexes.

        Args:
            name: the name of the table to drop.
            keyspace: the keyspace where the table resides. If not specified,
                the database working keyspace is assumed.
            if_exists: if passed as True, trying to drop a non-existing table
                will not error, just silently do nothing instead. If not provided,
                the API default behaviour will hold.
            table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `table_admin_timeout_ms`.
            timeout_ms: an alias for `table_admin_timeout_ms`.

        Example:
            >>> database.list_table_names()
            ['fighters', 'games']
            >>> database.drop_table("fighters")
            >>> database.list_table_names()
            ['games']
            >>> # not erroring because of if_not_exists:
            >>> database.drop_table("fighters", if_not_exists=True)
        """

        _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
            timeout_options=self.api_options.timeout_options,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        _keyspace = keyspace or self.keyspace
        dt_options: dict[str, bool]
        if if_exists is not None:
            dt_options = {"ifExists": if_exists}
        else:
            dt_options = {}
        driver_commander = self._get_driver_commander(keyspace=_keyspace)
        dt_payload = {
            "dropTable": {
                k: v
                for k, v in {
                    "name": name,
                    "options": dt_options,
                }.items()
                if v is not None
                if v != {}
            }
        }
        logger.info(f"dropTable('{name}')")
        dt_response = driver_commander.request(
            payload=dt_payload,
            timeout_context=_TimeoutContext(
                request_ms=_table_admin_timeout_ms, label=_ta_label
            ),
        )
        if dt_response.get("status") != {"ok": 1}:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from dropTable API command.",
                raw_response=dt_response,
            )
        logger.info(f"finished dropTable('{name}')")
        return dt_response.get("status", {})  # type: ignore[no-any-return]

    def list_tables(
        self,
        *,
        keyspace: str | None = None,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> list[ListTableDescriptor]:
        """
        List all tables in a given keyspace for this database.

        Args:
            keyspace: the keyspace to be inspected. If not specified,
                the general setting for this database is assumed.
            table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `table_admin_timeout_ms`.
            timeout_ms: an alias for `table_admin_timeout_ms`.

        Returns:
            a list of ListTableDescriptor instances, one for each table.

        Example:
            >>> tables = my_database.list_tables()
            >>> tables
            [ListTableDescriptor(name='fighters', definition=ListTableDefinition(...
            >>> tables[1].name
            'games'
            >>> tables[1].definition.columns
            {'match_id': TableScalarColumnTypeDescriptor(ColumnType.TEXT),...
            >>> tables[1].definition.columns['score']
            TableScalarColumnTypeDescriptor(ColumnType.INT)
            >>> tables[1].definition.primary_key.partition_by
            ['match_id']
            >>> tables[1].definition.primary_key.partition_sort
            {'round': 1}
        """

        _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
            timeout_options=self.api_options.timeout_options,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        return self._list_tables_ctx(
            keyspace=keyspace,
            timeout_context=_TimeoutContext(
                request_ms=_table_admin_timeout_ms, label=_ta_label
            ),
        )

    def _list_tables_ctx(
        self,
        *,
        keyspace: str | None,
        timeout_context: _TimeoutContext,
    ) -> list[ListTableDescriptor]:
        driver_commander = self._get_driver_commander(keyspace=keyspace)
        lt_payload = {"listTables": {"options": {"explain": True}}}
        logger.info("listTables")
        lt_response = driver_commander.request(
            payload=lt_payload,
            timeout_context=timeout_context,
        )
        if "tables" not in lt_response.get("status", {}):
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from listTables API command.",
                raw_response=lt_response,
            )
        else:
            # we know this is a list of dicts, to marshal into "descriptors"
            logger.info("finished listTables")
            return [
                ListTableDescriptor.coerce(tab_dict)
                for tab_dict in lt_response["status"]["tables"]
            ]

    def list_table_names(
        self,
        *,
        keyspace: str | None = None,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> list[str]:
        """
        List the names of all tables in a given keyspace of this database.

        Args:
            keyspace: the keyspace to be inspected. If not specified,
                the general setting for this database is assumed.
            table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `table_admin_timeout_ms`.
            timeout_ms: an alias for `table_admin_timeout_ms`.

        Returns:
            a list of the table names as strings, in no particular order.

        Example:
            >>> database.list_table_names()
            ['fighters', 'games']
        """

        _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
            timeout_options=self.api_options.timeout_options,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        driver_commander = self._get_driver_commander(keyspace=keyspace)
        lt_payload: dict[str, Any] = {"listTables": {}}
        logger.info("listTables")
        lt_response = driver_commander.request(
            payload=lt_payload,
            timeout_context=_TimeoutContext(
                request_ms=_table_admin_timeout_ms, label=_ta_label
            ),
        )
        if "tables" not in lt_response.get("status", {}):
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from listTables API command.",
                raw_response=lt_response,
            )
        else:
            logger.info("finished listTables")
            return lt_response["status"]["tables"]  # type: ignore[no-any-return]

    def command(
        self,
        body: dict[str, Any],
        *,
        keyspace: str | None | UnsetType = _UNSET,
        collection_or_table_name: str | None = None,
        raise_api_errors: bool = True,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> dict[str, Any]:
        """
        Send a POST request to the Data API for this database with
        an arbitrary, caller-provided payload.

        Args:
            body: a JSON-serializable dictionary, the payload of the request.
            keyspace: the keyspace to use, if any. If a keyspace is employed,
                it is used to construct the full request URL. To run a command
                targeting no specific keyspace (rather, the database as a whole),
                pass an explicit `None`: the request URL will lack the suffix
                "/<keyspace>" component. If unspecified, the working keyspace of
                this database is used. If another keyspace is passed, it will be
                used instead of the database's working one.
            collection_or_table_name: if provided, the name is appended at the end
                of the endpoint. In this way, this method allows collection-
                and table-level arbitrary POST requests as well.
                This parameter cannot be used if `keyspace=None` is explicitly provided.
            raise_api_errors: if True, responses with a nonempty 'errors' field
                result in an astrapy exception being raised.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a dictionary with the response of the HTTP request.

        Example:
            >>> my_db.command({"findCollections": {}})
            {'status': {'collections': ['my_coll']}}
            >>> my_db.command({"countDocuments": {}}, collection_or_table_name="my_coll")
            {'status': {'count': 123}}
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        _keyspace: str | None
        if keyspace is None:
            if collection_or_table_name is not None:
                raise ValueError(
                    "Cannot pass collection_or_table_name to database "
                    "`command` on a no-keyspace command"
                )
            _keyspace = None
        else:
            if isinstance(keyspace, UnsetType):
                _keyspace = self.keyspace
            else:
                _keyspace = keyspace
        # build the ad-hoc-commander path with _keyspace and the coll.or.table
        base_path_components = [
            comp
            for comp in (
                ncomp.strip("/")
                for ncomp in (
                    self.api_options.data_api_url_options.api_path,
                    self.api_options.data_api_url_options.api_version,
                    _keyspace,
                    collection_or_table_name,
                )
                if ncomp is not None
            )
            if comp != ""
        ]
        base_path = f"/{'/'.join(base_path_components)}"
        command_commander = APICommander(
            api_endpoint=self.api_endpoint,
            path=base_path,
            headers=self._commander_headers,
            callers=self.api_options.callers,
            redacted_header_names=self.api_options.redacted_header_names,
        )

        _cmd_desc = ",".join(sorted(body.keys()))
        logger.info(f"command={_cmd_desc} on {self.__class__.__name__}")
        req_response = command_commander.request(
            payload=body,
            raise_api_errors=raise_api_errors,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"command={_cmd_desc} on {self.__class__.__name__}")
        return req_response

    def get_database_admin(
        self,
        *,
        token: str | TokenProvider | UnsetType = _UNSET,
        spawn_api_options: APIOptions | UnsetType = _UNSET,
    ) -> DatabaseAdmin:
        """
        Return a DatabaseAdmin object corresponding to this database, for
        use in admin tasks such as managing keyspaces.

        This method, depending on the environment where the database resides,
        returns an appropriate subclass of DatabaseAdmin.

        Args:
            token: an access token with enough permission on the database to
                perform the desired tasks. If omitted (as it can generally be done),
                the token of this Database is used.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            spawn_api_options: a specification - complete or partial - of the
                API Options to override the defaults.
                This allows for a deeper configuration of the database admin, e.g.
                concerning timeouts; if this is passed together with
                the equivalent named parameters, the latter will take precedence
                in their respective settings.

        Returns:
            A DatabaseAdmin instance targeting this database. More precisely,
            for Astra DB an instance of `AstraDBDatabaseAdmin` is returned;
            for other environments, an instance of `DataAPIDatabaseAdmin` is returned.

        Example:
            >>> my_db_admin = my_db.get_database_admin()
            >>> if "new_keyspace" not in my_db_admin.list_keyspaces():
            ...     my_db_admin.create_keyspace("new_keyspace")
            >>> my_db_admin.list_keyspaces()
            ['default_keyspace', 'new_keyspace']
        """

        # lazy importing here to avoid circular dependency
        from astrapy.admin.admin import AstraDBDatabaseAdmin, DataAPIDatabaseAdmin

        arg_api_options = APIOptions(
            token=token,
        )
        api_options = self.api_options.with_override(spawn_api_options).with_override(
            arg_api_options
        )

        if api_options.environment in Environment.astra_db_values:
            if parse_api_endpoint(self.api_endpoint) is None:
                raise InvalidEnvironmentException(
                    "Cannot use a nonstandard API endpoint for this operation."
                )
            return AstraDBDatabaseAdmin(
                api_endpoint=self.api_endpoint,
                api_options=api_options,
                spawner_database=self,
            )
        else:
            return DataAPIDatabaseAdmin(
                api_endpoint=self.api_endpoint,
                api_options=api_options,
                spawner_database=self,
            )

Instance variables

var id : str

The ID of this database. Not available outside of Astra DB and when using custom domains.

Example

>>> my_db.id
'01234567-89ab-cdef-0123-456789abcdef'

Expand source code

@property
def id(self) -> str:
    """
    The ID of this database.
    Not available outside of Astra DB and when using custom domains.

    Example:
        >>> my_db.id
        '01234567-89ab-cdef-0123-456789abcdef'
    """

    if self.api_options.environment in Environment.astra_db_values:
        parsed_api_endpoint = parse_api_endpoint(self.api_endpoint)
        if parsed_api_endpoint is not None:
            return parsed_api_endpoint.database_id
        else:
            raise InvalidEnvironmentException(
                "Cannot inspect a nonstandard API endpoint for properties."
            )
    else:
        raise InvalidEnvironmentException(
            "Database is not in a supported environment for this operation."
        )

var keyspace : str | None

The keyspace this database uses as target for all commands when no method-call-specific keyspace is specified.

Returns

the working keyspace (a string), or None if not set.

Example

>>> my_db.keyspace
'the_keyspace'

Expand source code

@property
def keyspace(self) -> str | None:
    """
    The keyspace this database uses as target for all commands when
    no method-call-specific keyspace is specified.

    Returns:
        the working keyspace (a string), or None if not set.

    Example:
        >>> my_db.keyspace
        'the_keyspace'
    """

    return self._using_keyspace

var region : str

The region where this database is located.

The region is still well defined in case of multi-region databases, since a Database instance connects to exactly one of the regions (as specified by the API Endpoint).

Not available outside of Astra DB and when using custom domains.

Example

>>> my_db.region
'us-west-2'

Expand source code

@property
def region(self) -> str:
    """
    The region where this database is located.

    The region is still well defined in case of multi-region databases,
    since a Database instance connects to exactly one of the regions
    (as specified by the API Endpoint).

    Not available outside of Astra DB and when using custom domains.

    Example:
        >>> my_db.region
        'us-west-2'
    """

    if self.api_options.environment in Environment.astra_db_values:
        parsed_api_endpoint = parse_api_endpoint(self.api_endpoint)
        if parsed_api_endpoint is not None:
            return parsed_api_endpoint.region
        else:
            raise InvalidEnvironmentException(
                "Cannot inspect a nonstandard API endpoint for properties."
            )
    else:
        raise InvalidEnvironmentException(
            "Database is not in a supported environment for this operation."
        )

Methods

def command(self, body: dict[str, Any], *, keyspace: str | None | UnsetType = (unset), collection_or_table_name: str | None = None, raise_api_errors: bool = True, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> dict[str, typing.Any]

Send a POST request to the Data API for this database with an arbitrary, caller-provided payload.

Args

body: a JSON-serializable dictionary, the payload of the request.
keyspace: the keyspace to use, if any. If a keyspace is employed, it is used to construct the full request URL. To run a command targeting no specific keyspace (rather, the database as a whole), pass an explicit None: the request URL will lack the suffix "/" component. If unspecified, the working keyspace of this database is used. If another keyspace is passed, it will be used instead of the database's working one.
collection_or_table_name: if provided, the name is appended at the end of the endpoint. In this way, this method allows collection- and table-level arbitrary POST requests as well. This parameter cannot be used if keyspace=None is explicitly provided.
raise_api_errors: if True, responses with a nonempty 'errors' field result in an astrapy exception being raised.
general_method_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for general_method_timeout_ms.
timeout_ms: an alias for general_method_timeout_ms.

Returns

a dictionary with the response of the HTTP request.

Example

>>> my_db.command({"findCollections": {}})
{'status': {'collections': ['my_coll']}}
>>> my_db.command({"countDocuments": {}}, collection_or_table_name="my_coll")
{'status': {'count': 123}}

Expand source code

def command(
    self,
    body: dict[str, Any],
    *,
    keyspace: str | None | UnsetType = _UNSET,
    collection_or_table_name: str | None = None,
    raise_api_errors: bool = True,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> dict[str, Any]:
    """
    Send a POST request to the Data API for this database with
    an arbitrary, caller-provided payload.

    Args:
        body: a JSON-serializable dictionary, the payload of the request.
        keyspace: the keyspace to use, if any. If a keyspace is employed,
            it is used to construct the full request URL. To run a command
            targeting no specific keyspace (rather, the database as a whole),
            pass an explicit `None`: the request URL will lack the suffix
            "/<keyspace>" component. If unspecified, the working keyspace of
            this database is used. If another keyspace is passed, it will be
            used instead of the database's working one.
        collection_or_table_name: if provided, the name is appended at the end
            of the endpoint. In this way, this method allows collection-
            and table-level arbitrary POST requests as well.
            This parameter cannot be used if `keyspace=None` is explicitly provided.
        raise_api_errors: if True, responses with a nonempty 'errors' field
            result in an astrapy exception being raised.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a dictionary with the response of the HTTP request.

    Example:
        >>> my_db.command({"findCollections": {}})
        {'status': {'collections': ['my_coll']}}
        >>> my_db.command({"countDocuments": {}}, collection_or_table_name="my_coll")
        {'status': {'count': 123}}
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    _keyspace: str | None
    if keyspace is None:
        if collection_or_table_name is not None:
            raise ValueError(
                "Cannot pass collection_or_table_name to database "
                "`command` on a no-keyspace command"
            )
        _keyspace = None
    else:
        if isinstance(keyspace, UnsetType):
            _keyspace = self.keyspace
        else:
            _keyspace = keyspace
    # build the ad-hoc-commander path with _keyspace and the coll.or.table
    base_path_components = [
        comp
        for comp in (
            ncomp.strip("/")
            for ncomp in (
                self.api_options.data_api_url_options.api_path,
                self.api_options.data_api_url_options.api_version,
                _keyspace,
                collection_or_table_name,
            )
            if ncomp is not None
        )
        if comp != ""
    ]
    base_path = f"/{'/'.join(base_path_components)}"
    command_commander = APICommander(
        api_endpoint=self.api_endpoint,
        path=base_path,
        headers=self._commander_headers,
        callers=self.api_options.callers,
        redacted_header_names=self.api_options.redacted_header_names,
    )

    _cmd_desc = ",".join(sorted(body.keys()))
    logger.info(f"command={_cmd_desc} on {self.__class__.__name__}")
    req_response = command_commander.request(
        payload=body,
        raise_api_errors=raise_api_errors,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"command={_cmd_desc} on {self.__class__.__name__}")
    return req_response

def create_collection(self, name: str, *, definition: CollectionDefinition | dict[str, Any] | None = None, document_type: type[Any] = typing.Dict[str, typing.Any], keyspace: str | None = None, collection_admin_timeout_ms: int | None = None, embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = (unset), reranking_api_key: str | RerankingHeadersProvider | UnsetType = (unset), spawn_api_options: APIOptions | UnsetType = (unset)) ‑> Collection[DOC]

Creates a collection on the database and return the Collection instance that represents it.

Args

name: the name of the collection.
definition: a complete collection definition for the table. This can be an instance of CollectionDefinition or an equivalent (nested) dictionary, in which case it will be parsed into a CollectionDefinition. See the CollectionDefinition class and the Collection class for more details and ways to construct this object.
document_type: this parameter acts a formal specifier for the type checker. If omitted, the resulting Collection is implicitly a Collection[dict[str, Any]]. If provided, it must match the type hint specified in the assignment. See the examples below.
keyspace: the keyspace where the collection is to be created. If not specified, the general setting for this database is used.
collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply.
embedding_api_key: optional API key(s) for interacting with the collection. If an embedding service is configured, and this parameter is not None, each Data API call will include the necessary embedding-related headers as specified by this parameter. If a string is passed, it translates into the one "embedding api key" header (i.e. EmbeddingAPIKeyHeaderProvider). For some vectorize providers/models, if using header-based authentication, specialized subclasses of EmbeddingHeadersProvider should be supplied.
reranking_api_key: optional API key(s) for interacting with the collection. If a reranker is configured for the collection, and this parameter is not None, Data API calls will include the appropriate reranker-related headers according to this parameter. Reranker services may not necessarily require this setting (e.g. if the service needs no authentication, or one is configured as part of the collection definition relying on a "shared secret"). If a string is passed, it is translated into an instance of RerankingAPIKeyHeaderProvider.
spawn_api_options: a specification - complete or partial - of the API Options to override the defaults inherited from the Database. This allows for a deeper configuration of the collection, e.g. concerning timeouts; if this is passed together with the named timeout parameters, the latter will take precedence in their respective settings.

Returns

a (synchronous) Collection instance, representing the newly-created collection.

Example

>>> # Create a collection using the fluent syntax for its definition
>>> from astrapy.constants import VectorMetric
>>> from astrapy.info import CollectionDefinition
>>>
>>> collection_definition = (
...     CollectionDefinition.builder()
...     .set_vector_dimension(3)
...     .set_vector_metric(VectorMetric.DOT_PRODUCT)
...     .set_indexing("deny", ["annotations", "logs"])
...     .build()
... )
>>> my_collection = database.create_collection(
...     "my_events",
...     definition=collection_definition,
... )

>>> # Create a collection with the definition as object
>>> from astrapy.info import CollectionVectorOptions
>>>
>>> collection_definition_1 = CollectionDefinition(
...     vector=CollectionVectorOptions(
...         dimension=3,
...         metric=VectorMetric.DOT_PRODUCT,
...     ),
...     indexing={"deny": ["annotations", "logs"]},
... )
>>> my_collection_1 = database.create_collection(
...     "my_events",
...     definition=collection_definition_1,
... )
>>>

>>> # Create a collection with the definition as plain dictionary
>>> collection_definition_2 = {
...     "indexing": {"deny": ["annotations", "logs"]},
...     "vector": {
...         "dimension": 3,
...         "metric": VectorMetric.DOT_PRODUCT,
...     },
... }
>>> my_collection_2 = database.create_collection(
...     "my_events",
...     definition=collection_definition_2,
... )

Expand source code

def create_collection(
    self,
    name: str,
    *,
    definition: CollectionDefinition | dict[str, Any] | None = None,
    document_type: type[Any] = DefaultDocumentType,
    keyspace: str | None = None,
    collection_admin_timeout_ms: int | None = None,
    embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
    reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
    spawn_api_options: APIOptions | UnsetType = _UNSET,
) -> Collection[DOC]:
    """
    Creates a collection on the database and return the Collection
    instance that represents it.

    This is a blocking operation: the method returns when the collection
    is ready to be used. As opposed to the `get_collection` instance,
    this method triggers causes the collection to be actually created on DB.

    Args:
        name: the name of the collection.
        definition: a complete collection definition for the table. This can be an
            instance of `CollectionDefinition` or an equivalent (nested) dictionary,
            in which case it will be parsed into a `CollectionDefinition`.
            See the `astrapy.info.CollectionDefinition` class and the
            `Collection` class for more details and ways to construct this object.
        document_type: this parameter acts a formal specifier for the type checker.
            If omitted, the resulting Collection is implicitly
            a `Collection[dict[str, Any]]`. If provided, it must match the
            type hint specified in the assignment.
            See the examples below.
        keyspace: the keyspace where the collection is to be created.
            If not specified, the general setting for this database is used.
        collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
        embedding_api_key: optional API key(s) for interacting with the collection.
            If an embedding service is configured, and this parameter is not None,
            each Data API call will include the necessary embedding-related headers
            as specified by this parameter. If a string is passed, it translates
            into the one "embedding api key" header
            (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
            For some vectorize providers/models, if using header-based authentication,
            specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
            should be supplied.
        reranking_api_key: optional API key(s) for interacting with the collection.
            If a reranker is configured for the collection, and this parameter
            is not None, Data API calls will include the appropriate
            reranker-related headers according to this parameter. Reranker services
            may not necessarily require this setting (e.g. if the service needs no
            authentication, or one is configured as part of the collection
            definition relying on a "shared secret").
            If a string is passed, it is translated into an instance of
            `astrapy.authentication.RerankingAPIKeyHeaderProvider`.
        spawn_api_options: a specification - complete or partial - of the
            API Options to override the defaults inherited from the Database.
            This allows for a deeper configuration of the collection, e.g.
            concerning timeouts; if this is passed together with
            the named timeout parameters, the latter will take precedence
            in their respective settings.

    Returns:
        a (synchronous) `Collection` instance, representing the
        newly-created collection.

    Example:
        >>> # Create a collection using the fluent syntax for its definition
        >>> from astrapy.constants import VectorMetric
        >>> from astrapy.info import CollectionDefinition
        >>>
        >>> collection_definition = (
        ...     CollectionDefinition.builder()
        ...     .set_vector_dimension(3)
        ...     .set_vector_metric(VectorMetric.DOT_PRODUCT)
        ...     .set_indexing("deny", ["annotations", "logs"])
        ...     .build()
        ... )
        >>> my_collection = database.create_collection(
        ...     "my_events",
        ...     definition=collection_definition,
        ... )

        >>>
        >>> # Create a collection with the definition as object
        >>> from astrapy.info import CollectionVectorOptions
        >>>
        >>> collection_definition_1 = CollectionDefinition(
        ...     vector=CollectionVectorOptions(
        ...         dimension=3,
        ...         metric=VectorMetric.DOT_PRODUCT,
        ...     ),
        ...     indexing={"deny": ["annotations", "logs"]},
        ... )
        >>> my_collection_1 = database.create_collection(
        ...     "my_events",
        ...     definition=collection_definition_1,
        ... )
        >>>

        >>> # Create a collection with the definition as plain dictionary
        >>> collection_definition_2 = {
        ...     "indexing": {"deny": ["annotations", "logs"]},
        ...     "vector": {
        ...         "dimension": 3,
        ...         "metric": VectorMetric.DOT_PRODUCT,
        ...     },
        ... }
        >>> my_collection_2 = database.create_collection(
        ...     "my_events",
        ...     definition=collection_definition_2,
        ... )
    """

    cc_definition: dict[str, Any] = CollectionDefinition.coerce(
        definition or {}
    ).as_dict()
    # this method has custom code to pick its timeout
    _collection_admin_timeout_ms: int
    _ca_label: str
    if collection_admin_timeout_ms is not None:
        _collection_admin_timeout_ms = collection_admin_timeout_ms
        _ca_label = "collection_admin_timeout_ms"
    else:
        _collection_admin_timeout_ms = (
            self.api_options.timeout_options.collection_admin_timeout_ms
        )
        _ca_label = "collection_admin_timeout_ms"
    driver_commander = self._get_driver_commander(keyspace=keyspace)
    cc_payload = {
        "createCollection": {
            k: v
            for k, v in {
                "name": name,
                "options": cc_definition,
            }.items()
            if v is not None
            if v != {}
        }
    }
    logger.info(f"createCollection('{name}')")
    cc_response = driver_commander.request(
        payload=cc_payload,
        timeout_context=_TimeoutContext(
            request_ms=_collection_admin_timeout_ms, label=_ca_label
        ),
    )
    if cc_response.get("status") != {"ok": 1}:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from createCollection API command.",
            raw_response=cc_response,
        )
    logger.info(f"finished createCollection('{name}')")
    return self.get_collection(
        name,
        document_type=document_type,
        keyspace=keyspace,
        embedding_api_key=embedding_api_key,
        reranking_api_key=reranking_api_key,
        spawn_api_options=spawn_api_options,
    )

def create_table(self, name: str, *, definition: CreateTableDefinition | dict[str, Any], row_type: type[Any] = typing.Dict[str, typing.Any], keyspace: str | None = None, if_not_exists: bool | None = None, table_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None, embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = (unset), reranking_api_key: str | RerankingHeadersProvider | UnsetType = (unset), spawn_api_options: APIOptions | UnsetType = (unset)) ‑> Table[ROW]

Creates a table on the database and return the Table instance that represents it.

This is a blocking operation: the method returns when the table is ready to be used. As opposed to the get_table method call, this method causes the table to be actually created on DB.

Args

name: the name of the table.
definition: a complete table definition for the table. This can be an instance of CreateTableDefinition or an equivalent (nested) dictionary, in which case it will be parsed into a CreateTableDefinition. See the CreateTableDefinition class and the Table class for more details and ways to construct this object.
row_type: this parameter acts a formal specifier for the type checker. If omitted, the resulting Table is implicitly a Table[dict[str, Any]]. If provided, it must match the type hint specified in the assignment. See the examples below.
keyspace: the keyspace where the table is to be created. If not specified, the general setting for this database is used.
if_not_exists: if set to True, the command will succeed even if a table with the specified name already exists (in which case no actual table creation takes place on the database). Defaults to False, i.e. an error is raised by the API in case of table-name collision.
table_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for table_admin_timeout_ms.
timeout_ms: an alias for table_admin_timeout_ms.
embedding_api_key: optional API key(s) for interacting with the table. If an embedding service is configured, and this parameter is not None, each Data API call will include the necessary embedding-related headers as specified by this parameter. If a string is passed, it translates into the one "embedding api key" header (i.e. EmbeddingAPIKeyHeaderProvider). For some vectorize providers/models, if using header-based authentication, specialized subclasses of EmbeddingHeadersProvider should be supplied.
reranking_api_key: optional API key(s) for interacting with the table. If a reranker is configured for the table, and this parameter is not None, Data API calls will include the appropriate reranker-related headers according to this parameter. Reranker services may not necessarily require this setting (e.g. if the service needs no authentication, or one is configured as part of the table definition relying on a "shared secret"). If a string is passed, it is translated into an instance of RerankingAPIKeyHeaderProvider.
spawn_api_options: a specification - complete or partial - of the API Options to override the defaults inherited from the Database. This allows for a deeper configuration of the table, e.g. concerning timeouts; if this is passed together with the named timeout parameters, the latter will take precedence in their respective settings.

Returns

a (synchronous) Table instance, representing the newly-created table.

Example

>>> # Create a table using the fluent syntax for definition
>>> from astrapy.constants import SortMode
>>> from astrapy.info import (
...     CreateTableDefinition,
...     ColumnType,
... )
>>> table_definition = (
...     CreateTableDefinition.builder()
...     .add_column("match_id", ColumnType.TEXT)
...     .add_column("round", ColumnType.INT)
...     .add_vector_column("m_vector", dimension=3)
...     .add_column("score", ColumnType.INT)
...     .add_column("when", ColumnType.TIMESTAMP)
...     .add_column("winner", ColumnType.TEXT)
...     .add_set_column("fighters", ColumnType.UUID)
...     .add_partition_by(["match_id"])
...     .add_partition_sort({"round": SortMode.ASCENDING})
...     .build()
... )
>>> my_table = database.create_table(
...     "games",
...     definition=table_definition,
... )
>>>
>>> # Create a table with the definition as object
>>> # (and do not raise an error if the table exists already)
>>> from astrapy.info import (
...     CreateTableDefinition,
...     TablePrimaryKeyDescriptor,
...     TableScalarColumnTypeDescriptor,
...     TableValuedColumnType,
...     TableValuedColumnTypeDescriptor,
...     TableVectorColumnTypeDescriptor,
... )
>>> table_definition_1 = CreateTableDefinition(
...     columns={
...         "match_id": TableScalarColumnTypeDescriptor(
...             ColumnType.TEXT,
...         ),
...         "round": TableScalarColumnTypeDescriptor(
...             ColumnType.INT,
...         ),
...         "m_vector": TableVectorColumnTypeDescriptor(
...             column_type="vector", dimension=3
...         ),
...         "score": TableScalarColumnTypeDescriptor(
...             ColumnType.INT,
...         ),
...         "when": TableScalarColumnTypeDescriptor(
...             ColumnType.TIMESTAMP,
...         ),
...         "winner": TableScalarColumnTypeDescriptor(
...             ColumnType.TEXT,
...         ),
...         "fighters": TableValuedColumnTypeDescriptor(
...             column_type=TableValuedColumnType.SET,
...             value_type=ColumnType.UUID,
...         ),
...     },
...     primary_key=TablePrimaryKeyDescriptor(
...         partition_by=["match_id"],
...         partition_sort={"round": SortMode.ASCENDING},
...     ),
... )
>>> my_table_1 = database.create_table(
...     "games",
...     definition=table_definition_1,
...     if_not_exists=True,
... )
>>>
>>> # Create a table with the definition as plain dictionary
>>> # (and do not raise an error if the table exists already)
>>> table_definition_2 = {
...     "columns": {
...         "match_id": {"type": "text"},
...         "round": {"type": "int"},
...         "m_vector": {"type": "vector", "dimension": 3},
...         "score": {"type": "int"},
...         "when": {"type": "timestamp"},
...         "winner": {"type": "text"},
...         "fighters": {"type": "set", "valueType": "uuid"},
...     },
...     "primaryKey": {
...         "partitionBy": ["match_id"],
...         "partitionSort": {"round": 1},
...     },
... }
>>> my_table_2 = database.create_table(
...     "games",
...     definition=table_definition_2,
...     if_not_exists=True,
... )

Expand source code

def create_table(
    self,
    name: str,
    *,
    definition: CreateTableDefinition | dict[str, Any],
    row_type: type[Any] = DefaultRowType,
    keyspace: str | None = None,
    if_not_exists: bool | None = None,
    table_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
    embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
    reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
    spawn_api_options: APIOptions | UnsetType = _UNSET,
) -> Table[ROW]:
    """
    Creates a table on the database and return the Table
    instance that represents it.

    This is a blocking operation: the method returns when the table
    is ready to be used. As opposed to the `get_table` method call,
    this method causes the table to be actually created on DB.

    Args:
        name: the name of the table.
        definition: a complete table definition for the table. This can be an
            instance of `CreateTableDefinition` or an equivalent (nested) dictionary,
            in which case it will be parsed into a `CreateTableDefinition`.
            See the `astrapy.info.CreateTableDefinition` class and the
            `Table` class for more details and ways to construct this object.
        row_type: this parameter acts a formal specifier for the type checker.
            If omitted, the resulting Table is implicitly a `Table[dict[str, Any]]`.
            If provided, it must match the type hint specified in the assignment.
            See the examples below.
        keyspace: the keyspace where the table is to be created.
            If not specified, the general setting for this database is used.
        if_not_exists: if set to True, the command will succeed even if a table
            with the specified name already exists (in which case no actual
            table creation takes place on the database). Defaults to False,
            i.e. an error is raised by the API in case of table-name collision.
        table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `table_admin_timeout_ms`.
        timeout_ms: an alias for `table_admin_timeout_ms`.
        embedding_api_key: optional API key(s) for interacting with the table.
            If an embedding service is configured, and this parameter is not None,
            each Data API call will include the necessary embedding-related headers
            as specified by this parameter. If a string is passed, it translates
            into the one "embedding api key" header
            (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
            For some vectorize providers/models, if using header-based authentication,
            specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
            should be supplied.
        reranking_api_key: optional API key(s) for interacting with the table.
            If a reranker is configured for the table, and this parameter
            is not None, Data API calls will include the appropriate
            reranker-related headers according to this parameter. Reranker services
            may not necessarily require this setting (e.g. if the service needs no
            authentication, or one is configured as part of the table
            definition relying on a "shared secret").
            If a string is passed, it is translated into an instance of
            `astrapy.authentication.RerankingAPIKeyHeaderProvider`.
        spawn_api_options: a specification - complete or partial - of the
            API Options to override the defaults inherited from the Database.
            This allows for a deeper configuration of the table, e.g.
            concerning timeouts; if this is passed together with
            the named timeout parameters, the latter will take precedence
            in their respective settings.

    Returns:
        a (synchronous) `Table` instance, representing the
        newly-created table.

    Example:
        >>> # Create a table using the fluent syntax for definition
        >>> from astrapy.constants import SortMode
        >>> from astrapy.info import (
        ...     CreateTableDefinition,
        ...     ColumnType,
        ... )
        >>> table_definition = (
        ...     CreateTableDefinition.builder()
        ...     .add_column("match_id", ColumnType.TEXT)
        ...     .add_column("round", ColumnType.INT)
        ...     .add_vector_column("m_vector", dimension=3)
        ...     .add_column("score", ColumnType.INT)
        ...     .add_column("when", ColumnType.TIMESTAMP)
        ...     .add_column("winner", ColumnType.TEXT)
        ...     .add_set_column("fighters", ColumnType.UUID)
        ...     .add_partition_by(["match_id"])
        ...     .add_partition_sort({"round": SortMode.ASCENDING})
        ...     .build()
        ... )
        >>> my_table = database.create_table(
        ...     "games",
        ...     definition=table_definition,
        ... )
        >>>
        >>> # Create a table with the definition as object
        >>> # (and do not raise an error if the table exists already)
        >>> from astrapy.info import (
        ...     CreateTableDefinition,
        ...     TablePrimaryKeyDescriptor,
        ...     TableScalarColumnTypeDescriptor,
        ...     TableValuedColumnType,
        ...     TableValuedColumnTypeDescriptor,
        ...     TableVectorColumnTypeDescriptor,
        ... )
        >>> table_definition_1 = CreateTableDefinition(
        ...     columns={
        ...         "match_id": TableScalarColumnTypeDescriptor(
        ...             ColumnType.TEXT,
        ...         ),
        ...         "round": TableScalarColumnTypeDescriptor(
        ...             ColumnType.INT,
        ...         ),
        ...         "m_vector": TableVectorColumnTypeDescriptor(
        ...             column_type="vector", dimension=3
        ...         ),
        ...         "score": TableScalarColumnTypeDescriptor(
        ...             ColumnType.INT,
        ...         ),
        ...         "when": TableScalarColumnTypeDescriptor(
        ...             ColumnType.TIMESTAMP,
        ...         ),
        ...         "winner": TableScalarColumnTypeDescriptor(
        ...             ColumnType.TEXT,
        ...         ),
        ...         "fighters": TableValuedColumnTypeDescriptor(
        ...             column_type=TableValuedColumnType.SET,
        ...             value_type=ColumnType.UUID,
        ...         ),
        ...     },
        ...     primary_key=TablePrimaryKeyDescriptor(
        ...         partition_by=["match_id"],
        ...         partition_sort={"round": SortMode.ASCENDING},
        ...     ),
        ... )
        >>> my_table_1 = database.create_table(
        ...     "games",
        ...     definition=table_definition_1,
        ...     if_not_exists=True,
        ... )
        >>>
        >>> # Create a table with the definition as plain dictionary
        >>> # (and do not raise an error if the table exists already)
        >>> table_definition_2 = {
        ...     "columns": {
        ...         "match_id": {"type": "text"},
        ...         "round": {"type": "int"},
        ...         "m_vector": {"type": "vector", "dimension": 3},
        ...         "score": {"type": "int"},
        ...         "when": {"type": "timestamp"},
        ...         "winner": {"type": "text"},
        ...         "fighters": {"type": "set", "valueType": "uuid"},
        ...     },
        ...     "primaryKey": {
        ...         "partitionBy": ["match_id"],
        ...         "partitionSort": {"round": 1},
        ...     },
        ... }
        >>> my_table_2 = database.create_table(
        ...     "games",
        ...     definition=table_definition_2,
        ...     if_not_exists=True,
        ... )
    """

    ct_options: dict[str, bool]
    if if_not_exists is not None:
        ct_options = {"ifNotExists": if_not_exists}
    else:
        ct_options = {}
    ct_definition: dict[str, Any] = CreateTableDefinition.coerce(
        definition
    ).as_dict()
    _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
        timeout_options=self.api_options.timeout_options,
        table_admin_timeout_ms=table_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    driver_commander = self._get_driver_commander(keyspace=keyspace)
    ct_payload = {
        "createTable": {
            k: v
            for k, v in {
                "name": name,
                "definition": ct_definition,
                "options": ct_options,
            }.items()
            if v is not None
            if v != {}
        }
    }
    logger.info(f"createTable('{name}')")
    ct_response = driver_commander.request(
        payload=ct_payload,
        timeout_context=_TimeoutContext(
            request_ms=_table_admin_timeout_ms, label=_ta_label
        ),
    )
    if ct_response.get("status") != {"ok": 1}:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from createTable API command.",
            raw_response=ct_response,
        )
    logger.info(f"finished createTable('{name}')")
    return self.get_table(
        name,
        row_type=row_type,
        keyspace=keyspace,
        embedding_api_key=embedding_api_key,
        reranking_api_key=reranking_api_key,
        spawn_api_options=spawn_api_options,
    )

def drop_collection(self, name: str, *, keyspace: str | None = None, collection_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> None

Drop a collection from the database, along with all documents therein.

Args

name: the name of the collection to drop.
keyspace: the keyspace where the collection resides. If not specified, the database working keyspace is assumed.
collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for collection_admin_timeout_ms.
timeout_ms: an alias for collection_admin_timeout_ms.

Example

>>> my_db.list_collection_names()
['a_collection', 'my_v_col', 'another_col']
>>> my_db.drop_collection("my_v_col")
>>> my_db.list_collection_names()
['a_collection', 'another_col']

Expand source code

def drop_collection(
    self,
    name: str,
    *,
    keyspace: str | None = None,
    collection_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Drop a collection from the database, along with all documents therein.

    Args:
        name: the name of the collection to drop.
        keyspace: the keyspace where the collection resides. If not specified,
            the database working keyspace is assumed.
        collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `collection_admin_timeout_ms`.
        timeout_ms: an alias for `collection_admin_timeout_ms`.

    Example:
        >>> my_db.list_collection_names()
        ['a_collection', 'my_v_col', 'another_col']
        >>> my_db.drop_collection("my_v_col")
        >>> my_db.list_collection_names()
        ['a_collection', 'another_col']
    """

    _collection_admin_timeout_ms, _ca_label = _select_singlereq_timeout_ca(
        timeout_options=self.api_options.timeout_options,
        collection_admin_timeout_ms=collection_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    _keyspace = keyspace or self.keyspace
    driver_commander = self._get_driver_commander(keyspace=_keyspace)
    dc_payload = {"deleteCollection": {"name": name}}
    logger.info(f"deleteCollection('{name}')")
    dc_response = driver_commander.request(
        payload=dc_payload,
        timeout_context=_TimeoutContext(
            request_ms=_collection_admin_timeout_ms, label=_ca_label
        ),
    )
    if dc_response.get("status") != {"ok": 1}:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from deleteCollection API command.",
            raw_response=dc_response,
        )
    logger.info(f"finished deleteCollection('{name}')")
    return dc_response.get("status", {})  # type: ignore[no-any-return]

Drop a table from the database, along with all rows therein and related indexes.

Args

name: the name of the table to drop.
keyspace: the keyspace where the table resides. If not specified, the database working keyspace is assumed.
if_exists: if passed as True, trying to drop a non-existing table will not error, just silently do nothing instead. If not provided, the API default behaviour will hold.
table_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for table_admin_timeout_ms.
timeout_ms: an alias for table_admin_timeout_ms.

Example

>>> database.list_table_names()
['fighters', 'games']
>>> database.drop_table("fighters")
>>> database.list_table_names()
['games']
>>> # not erroring because of if_not_exists:
>>> database.drop_table("fighters", if_not_exists=True)

Expand source code

def drop_table(
    self,
    name: str,
    *,
    keyspace: str | None = None,
    if_exists: bool | None = None,
    table_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Drop a table from the database, along with all rows therein and related indexes.

    Args:
        name: the name of the table to drop.
        keyspace: the keyspace where the table resides. If not specified,
            the database working keyspace is assumed.
        if_exists: if passed as True, trying to drop a non-existing table
            will not error, just silently do nothing instead. If not provided,
            the API default behaviour will hold.
        table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `table_admin_timeout_ms`.
        timeout_ms: an alias for `table_admin_timeout_ms`.

    Example:
        >>> database.list_table_names()
        ['fighters', 'games']
        >>> database.drop_table("fighters")
        >>> database.list_table_names()
        ['games']
        >>> # not erroring because of if_not_exists:
        >>> database.drop_table("fighters", if_not_exists=True)
    """

    _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
        timeout_options=self.api_options.timeout_options,
        table_admin_timeout_ms=table_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    _keyspace = keyspace or self.keyspace
    dt_options: dict[str, bool]
    if if_exists is not None:
        dt_options = {"ifExists": if_exists}
    else:
        dt_options = {}
    driver_commander = self._get_driver_commander(keyspace=_keyspace)
    dt_payload = {
        "dropTable": {
            k: v
            for k, v in {
                "name": name,
                "options": dt_options,
            }.items()
            if v is not None
            if v != {}
        }
    }
    logger.info(f"dropTable('{name}')")
    dt_response = driver_commander.request(
        payload=dt_payload,
        timeout_context=_TimeoutContext(
            request_ms=_table_admin_timeout_ms, label=_ta_label
        ),
    )
    if dt_response.get("status") != {"ok": 1}:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from dropTable API command.",
            raw_response=dt_response,
        )
    logger.info(f"finished dropTable('{name}')")
    return dt_response.get("status", {})  # type: ignore[no-any-return]

Drops (deletes) an index (of any kind) from the table it is associated to.

This is a blocking operation: the method returns once the index is deleted.

Note

Although associated to a table, index names are unique across a keyspace. For this reason, no table name is required in this call.

Args

name: the name of the index.
keyspace: the keyspace to which the index belongs. If not specified, the general setting for this database is used.
if_exists: if passed as True, trying to drop a non-existing index will not error, just silently do nothing instead. If not provided, the API default behaviour will hold.
table_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for table_admin_timeout_ms.
timeout_ms: an alias for table_admin_timeout_ms.

Example

>>> # Drop an index from the keyspace:
>>> database.drop_table_index("score_index")
>>> # Drop an index, unless it does not exist already:
>>> database.drop_table_index("score_index", if_exists=True)

Expand source code

def drop_table_index(
    self,
    name: str,
    *,
    keyspace: str | None = None,
    if_exists: bool | None = None,
    table_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Drops (deletes) an index (of any kind) from the table it is associated to.

    This is a blocking operation: the method returns once the index
    is deleted.

    Note:
        Although associated to a table, index names are unique across a keyspace.
        For this reason, no table name is required in this call.

    Args:
        name: the name of the index.
        keyspace: the keyspace to which the index belongs.
            If not specified, the general setting for this database is used.
        if_exists: if passed as True, trying to drop a non-existing index
            will not error, just silently do nothing instead. If not provided,
            the API default behaviour will hold.
        table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `table_admin_timeout_ms`.
        timeout_ms: an alias for `table_admin_timeout_ms`.

    Example:
        >>> # Drop an index from the keyspace:
        >>> database.drop_table_index("score_index")
        >>> # Drop an index, unless it does not exist already:
        >>> database.drop_table_index("score_index", if_exists=True)
    """

    _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
        timeout_options=self.api_options.timeout_options,
        table_admin_timeout_ms=table_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    di_options: dict[str, bool]
    if if_exists is not None:
        di_options = {"ifExists": if_exists}
    else:
        di_options = {}
    di_payload = {
        "dropIndex": {
            k: v
            for k, v in {
                "name": name,
                "options": di_options,
            }.items()
            if v is not None
            if v != {}
        }
    }
    driver_commander = self._get_driver_commander(keyspace=keyspace)
    logger.info(f"dropIndex('{name}')")
    di_response = driver_commander.request(
        payload=di_payload,
        timeout_context=_TimeoutContext(
            request_ms=_table_admin_timeout_ms, label=_ta_label
        ),
    )
    if di_response.get("status") != {"ok": 1}:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from dropIndex API command.",
            raw_response=di_response,
        )
    logger.info(f"finished dropIndex('{name}')")

Spawn a Collection object instance representing a collection on this database.

Creating a Collection instance does not have any effect on the actual state of the database: in other words, for the created Collection instance to be used meaningfully, the collection must exist already (for instance, it should have been created previously by calling the create_collection method).

Args

name: the name of the collection.
document_type: this parameter acts a formal specifier for the type checker. If omitted, the resulting Collection is implicitly a Collection[dict[str, Any]]. If provided, it must match the type hint specified in the assignment. See the examples below.
keyspace: the keyspace containing the collection. If no keyspace is specified, the general setting for this database is used.
embedding_api_key: optional API key(s) for interacting with the collection. If an embedding service is configured, and this parameter is not None, each Data API call will include the necessary embedding-related headers as specified by this parameter. If a string is passed, it translates into the one "embedding api key" header (i.e. EmbeddingAPIKeyHeaderProvider). For some vectorize providers/models, if using header-based authentication, specialized subclasses of EmbeddingHeadersProvider should be supplied.
reranking_api_key: optional API key(s) for interacting with the collection. If a reranker is configured for the collection, and this parameter is not None, Data API calls will include the appropriate reranker-related headers according to this parameter. Reranker services may not necessarily require this setting (e.g. if the service needs no authentication, or one is configured as part of the collection definition relying on a "shared secret"). If a string is passed, it is translated into an instance of RerankingAPIKeyHeaderProvider.
spawn_api_options: a specification - complete or partial - of the API Options to override the defaults inherited from the Database. This allows for a deeper configuration of the collection, e.g. concerning timeouts; if this is passed together with the named timeout parameters, the latter will take precedence in their respective settings.

Returns

a Collection instance, representing the desired collection (but without any form of validation).

Example

>>> my_col = my_db.get_collection("my_collection")
>>> my_col.count_documents({}, upper_bound=100)
41

Note

The attribute and indexing syntax forms achieve the same effect as this method. In other words, the following are equivalent: my_db.get_collection("coll_name") my_db.coll_name my_db["coll_name"]

Expand source code

def get_collection(
    self,
    name: str,
    *,
    document_type: type[Any] = DefaultDocumentType,
    keyspace: str | None = None,
    embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
    reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
    spawn_api_options: APIOptions | UnsetType = _UNSET,
) -> Collection[DOC]:
    """
    Spawn a `Collection` object instance representing a collection
    on this database.

    Creating a `Collection` instance does not have any effect on the
    actual state of the database: in other words, for the created
    `Collection` instance to be used meaningfully, the collection
    must exist already (for instance, it should have been created
    previously by calling the `create_collection` method).

    Args:
        name: the name of the collection.
        document_type: this parameter acts a formal specifier for the type checker.
            If omitted, the resulting Collection is implicitly
            a `Collection[dict[str, Any]]`. If provided, it must match the
            type hint specified in the assignment.
            See the examples below.
        keyspace: the keyspace containing the collection. If no keyspace
            is specified, the general setting for this database is used.
        embedding_api_key: optional API key(s) for interacting with the collection.
            If an embedding service is configured, and this parameter is not None,
            each Data API call will include the necessary embedding-related headers
            as specified by this parameter. If a string is passed, it translates
            into the one "embedding api key" header
            (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
            For some vectorize providers/models, if using header-based
            authentication, specialized subclasses of
            `astrapy.authentication.EmbeddingHeadersProvider` should be supplied.
        reranking_api_key: optional API key(s) for interacting with the collection.
            If a reranker is configured for the collection, and this parameter
            is not None, Data API calls will include the appropriate
            reranker-related headers according to this parameter. Reranker services
            may not necessarily require this setting (e.g. if the service needs no
            authentication, or one is configured as part of the collection
            definition relying on a "shared secret").
            If a string is passed, it is translated into an instance of
            `astrapy.authentication.RerankingAPIKeyHeaderProvider`.
        spawn_api_options: a specification - complete or partial - of the
            API Options to override the defaults inherited from the Database.
            This allows for a deeper configuration of the collection, e.g.
            concerning timeouts; if this is passed together with
            the named timeout parameters, the latter will take precedence
            in their respective settings.

    Returns:
        a `Collection` instance, representing the desired collection
            (but without any form of validation).

    Example:
        >>> my_col = my_db.get_collection("my_collection")
        >>> my_col.count_documents({}, upper_bound=100)
        41

    Note:
        The attribute and indexing syntax forms achieve the same effect
        as this method. In other words, the following are equivalent:
            my_db.get_collection("coll_name")
            my_db.coll_name
            my_db["coll_name"]
    """

    # lazy importing here against circular-import error
    from astrapy.collection import Collection

    resulting_api_options = self.api_options.with_override(
        spawn_api_options,
    ).with_override(
        APIOptions(
            embedding_api_key=embedding_api_key,
            reranking_api_key=reranking_api_key,
        ),
    )

    _keyspace = keyspace or self.keyspace
    if _keyspace is None:
        raise ValueError(
            "No keyspace specified. This operation requires a keyspace to "
            "be set, e.g. through the `use_keyspace` method."
        )
    return Collection(
        database=self,
        name=name,
        keyspace=_keyspace,
        api_options=resulting_api_options,
    )

def get_database_admin(self, *, token: str | TokenProvider | UnsetType = (unset), spawn_api_options: APIOptions | UnsetType = (unset)) ‑> DatabaseAdmin

Return a DatabaseAdmin object corresponding to this database, for use in admin tasks such as managing keyspaces.

This method, depending on the environment where the database resides, returns an appropriate subclass of DatabaseAdmin.

Args

token: an access token with enough permission on the database to perform the desired tasks. If omitted (as it can generally be done), the token of this Database is used. This can be either a literal token string or a subclass of TokenProvider.
spawn_api_options: a specification - complete or partial - of the API Options to override the defaults. This allows for a deeper configuration of the database admin, e.g. concerning timeouts; if this is passed together with the equivalent named parameters, the latter will take precedence in their respective settings.

Returns

Example

>>> my_db_admin = my_db.get_database_admin()
>>> if "new_keyspace" not in my_db_admin.list_keyspaces():
...     my_db_admin.create_keyspace("new_keyspace")
>>> my_db_admin.list_keyspaces()
['default_keyspace', 'new_keyspace']

Expand source code

def get_database_admin(
    self,
    *,
    token: str | TokenProvider | UnsetType = _UNSET,
    spawn_api_options: APIOptions | UnsetType = _UNSET,
) -> DatabaseAdmin:
    """
    Return a DatabaseAdmin object corresponding to this database, for
    use in admin tasks such as managing keyspaces.

    This method, depending on the environment where the database resides,
    returns an appropriate subclass of DatabaseAdmin.

    Args:
        token: an access token with enough permission on the database to
            perform the desired tasks. If omitted (as it can generally be done),
            the token of this Database is used.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        spawn_api_options: a specification - complete or partial - of the
            API Options to override the defaults.
            This allows for a deeper configuration of the database admin, e.g.
            concerning timeouts; if this is passed together with
            the equivalent named parameters, the latter will take precedence
            in their respective settings.

    Returns:
        A DatabaseAdmin instance targeting this database. More precisely,
        for Astra DB an instance of `AstraDBDatabaseAdmin` is returned;
        for other environments, an instance of `DataAPIDatabaseAdmin` is returned.

    Example:
        >>> my_db_admin = my_db.get_database_admin()
        >>> if "new_keyspace" not in my_db_admin.list_keyspaces():
        ...     my_db_admin.create_keyspace("new_keyspace")
        >>> my_db_admin.list_keyspaces()
        ['default_keyspace', 'new_keyspace']
    """

    # lazy importing here to avoid circular dependency
    from astrapy.admin.admin import AstraDBDatabaseAdmin, DataAPIDatabaseAdmin

    arg_api_options = APIOptions(
        token=token,
    )
    api_options = self.api_options.with_override(spawn_api_options).with_override(
        arg_api_options
    )

    if api_options.environment in Environment.astra_db_values:
        if parse_api_endpoint(self.api_endpoint) is None:
            raise InvalidEnvironmentException(
                "Cannot use a nonstandard API endpoint for this operation."
            )
        return AstraDBDatabaseAdmin(
            api_endpoint=self.api_endpoint,
            api_options=api_options,
            spawner_database=self,
        )
    else:
        return DataAPIDatabaseAdmin(
            api_endpoint=self.api_endpoint,
            api_options=api_options,
            spawner_database=self,
        )

Spawn a Table object instance representing a table on this database.

Creating a Table instance does not have any effect on the actual state of the database: in other words, for the created Table instance to be used meaningfully, the table must exist already (for instance, it should have been created previously by calling the create_table method).

Args

name: the name of the table.
row_type: this parameter acts a formal specifier for the type checker. If omitted, the resulting Table is implicitly a Table[dict[str, Any]]. If provided, it must match the type hint specified in the assignment. See the examples below.
keyspace: the keyspace containing the table. If no keyspace is specified, the general setting for this database is used.
embedding_api_key: optional API key(s) for interacting with the table. If an embedding service is configured, and this parameter is not None, each Data API call will include the necessary embedding-related headers as specified by this parameter. If a string is passed, it translates into the one "embedding api key" header (i.e. EmbeddingAPIKeyHeaderProvider). For some vectorize providers/models, if using header-based authentication, specialized subclasses of EmbeddingHeadersProvider should be supplied.
reranking_api_key: optional API key(s) for interacting with the table. If a reranker is configured for the table, and this parameter is not None, Data API calls will include the appropriate reranker-related headers according to this parameter. Reranker services may not necessarily require this setting (e.g. if the service needs no authentication, or one is configured as part of the table definition relying on a "shared secret"). If a string is passed, it is translated into an instance of RerankingAPIKeyHeaderProvider.
spawn_api_options: a specification - complete or partial - of the API Options to override the defaults inherited from the Database. This allows for a deeper configuration of the table, e.g. concerning timeouts; if this is passed together with the named timeout parameters, the latter will take precedence in their respective settings.

Returns

a Table instance, representing the desired table (but without any form of validation).

Example

>>> # Get a Table object (and read a property of it as an example):
>>> my_table = database.get_table("games")
>>> my_table.full_name
'default_keyspace.games'
>>>
>>> # Get a Table object in a specific keyspace,
>>> # and set an embedding API key to it:
>>> my_other_table = database.get_table(
...     "tournaments",
...     keyspace="the_other_keyspace",
...     embedding_api_key="secret-012abc...",
... )
>>>
>>> from astrapy import Table
>>> MyCustomDictType = dict[str, int]
>>>
>>> # Get a Table object typed with a specific type for its rows:
>>> my_typed_table: Table[MyCustomDictType] = database.get_table(
...     "games",
...     row_type=MyCustomDictType,
... )

Expand source code

def get_table(
    self,
    name: str,
    *,
    row_type: type[Any] = DefaultRowType,
    keyspace: str | None = None,
    embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
    reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
    spawn_api_options: APIOptions | UnsetType = _UNSET,
) -> Table[ROW]:
    """
    Spawn a `Table` object instance representing a table
    on this database.

    Creating a `Table` instance does not have any effect on the
    actual state of the database: in other words, for the created
    `Table` instance to be used meaningfully, the table
    must exist already (for instance, it should have been created
    previously by calling the `create_table` method).

    Args:
        name: the name of the table.
        row_type: this parameter acts a formal specifier for the type checker.
            If omitted, the resulting Table is implicitly a `Table[dict[str, Any]]`.
            If provided, it must match the type hint specified in the assignment.
            See the examples below.
        keyspace: the keyspace containing the table. If no keyspace
            is specified, the general setting for this database is used.
        embedding_api_key: optional API key(s) for interacting with the table.
            If an embedding service is configured, and this parameter is not None,
            each Data API call will include the necessary embedding-related headers
            as specified by this parameter. If a string is passed, it translates
            into the one "embedding api key" header
            (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
            For some vectorize providers/models, if using header-based
            authentication, specialized subclasses of
            `astrapy.authentication.EmbeddingHeadersProvider` should be supplied.
        reranking_api_key: optional API key(s) for interacting with the table.
            If a reranker is configured for the table, and this parameter
            is not None, Data API calls will include the appropriate
            reranker-related headers according to this parameter. Reranker services
            may not necessarily require this setting (e.g. if the service needs no
            authentication, or one is configured as part of the table
            definition relying on a "shared secret").
            If a string is passed, it is translated into an instance of
            `astrapy.authentication.RerankingAPIKeyHeaderProvider`.
        spawn_api_options: a specification - complete or partial - of the
            API Options to override the defaults inherited from the Database.
            This allows for a deeper configuration of the table, e.g.
            concerning timeouts; if this is passed together with
            the named timeout parameters, the latter will take precedence
            in their respective settings.

    Returns:
        a `Table` instance, representing the desired table
            (but without any form of validation).

    Example:
        >>> # Get a Table object (and read a property of it as an example):
        >>> my_table = database.get_table("games")
        >>> my_table.full_name
        'default_keyspace.games'
        >>>
        >>> # Get a Table object in a specific keyspace,
        >>> # and set an embedding API key to it:
        >>> my_other_table = database.get_table(
        ...     "tournaments",
        ...     keyspace="the_other_keyspace",
        ...     embedding_api_key="secret-012abc...",
        ... )
        >>>
        >>> from astrapy import Table
        >>> MyCustomDictType = dict[str, int]
        >>>
        >>> # Get a Table object typed with a specific type for its rows:
        >>> my_typed_table: Table[MyCustomDictType] = database.get_table(
        ...     "games",
        ...     row_type=MyCustomDictType,
        ... )
    """

    # lazy importing here against circular-import error
    from astrapy.table import Table

    resulting_api_options = self.api_options.with_override(
        spawn_api_options,
    ).with_override(
        APIOptions(
            embedding_api_key=embedding_api_key,
            reranking_api_key=reranking_api_key,
        ),
    )

    _keyspace = keyspace or self.keyspace
    if _keyspace is None:
        raise ValueError(
            "No keyspace specified. This operation requires a keyspace to "
            "be set, e.g. through the `use_keyspace` method."
        )
    return Table[ROW](
        database=self,
        name=name,
        keyspace=_keyspace,
        api_options=resulting_api_options,
    )

def info(self, *, database_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> AstraDBDatabaseInfo

Additional information on the database as an AstraDBDatabaseInfo instance.

Not available outside of Astra DB and when using custom domains.

Args

database_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for database_admin_timeout_ms.
timeout_ms: an alias for database_admin_timeout_ms.

Example

>>> my_db.info().region
'eu-west-1'

>>> my_db.info().raw_info['datacenters'][0]['dateCreated']
'2023-01-30T12:34:56Z'

Note

see the AstraDBDatabaseInfo documentation for a caveat about the difference between the region and the raw["region"] attributes.

Expand source code

def info(
    self,
    *,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> AstraDBDatabaseInfo:
    """
    Additional information on the database as an AstraDBDatabaseInfo instance.

    Some of the returned properties are dynamic throughout the lifetime
    of the database (such as raw_info["keyspaces"]). For this reason,
    each invocation of this method triggers a new request to the DevOps API.

    Not available outside of Astra DB and when using custom domains.

    Args:
        database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `database_admin_timeout_ms`.
        timeout_ms: an alias for `database_admin_timeout_ms`.

    Example:
        >>> my_db.info().region
        'eu-west-1'

        >>> my_db.info().raw_info['datacenters'][0]['dateCreated']
        '2023-01-30T12:34:56Z'

    Note:
        see the AstraDBDatabaseInfo documentation for a caveat about the difference
        between the `region` and the `raw["region"]` attributes.
    """

    if self.api_options.environment not in Environment.astra_db_values:
        raise InvalidEnvironmentException(
            "Environments outside of Astra DB are not supported."
        )
    elif parse_api_endpoint(self.api_endpoint) is None:
        raise InvalidEnvironmentException(
            "Cannot inspect a nonstandard API endpoint for properties."
        )

    _database_admin_timeout_ms, _da_label = _select_singlereq_timeout_da(
        timeout_options=self.api_options.timeout_options,
        database_admin_timeout_ms=database_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info("getting database info")
    database_info = fetch_database_info(
        self.api_endpoint,
        keyspace=self.keyspace,
        request_timeout_ms=_database_admin_timeout_ms,
        api_options=self.api_options,
    )
    if database_info is not None:
        logger.info("finished getting database info")
        return database_info
    else:
        raise DevOpsAPIException("Failure while fetching database info.")

def list_collection_names(self, *, keyspace: str | None = None, collection_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> list[str]

List the names of all collections in a given keyspace of this database.

Args

keyspace: the keyspace to be inspected. If not specified, the general setting for this database is assumed.
collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for collection_admin_timeout_ms.
timeout_ms: an alias for collection_admin_timeout_ms.

Returns

a list of the collection names as strings, in no particular order.

Example

>>> my_db.list_collection_names()
['a_collection', 'another_col']

Expand source code

def list_collection_names(
    self,
    *,
    keyspace: str | None = None,
    collection_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> list[str]:
    """
    List the names of all collections in a given keyspace of this database.

    Args:
        keyspace: the keyspace to be inspected. If not specified,
            the general setting for this database is assumed.
        collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `collection_admin_timeout_ms`.
        timeout_ms: an alias for `collection_admin_timeout_ms`.

    Returns:
        a list of the collection names as strings, in no particular order.

    Example:
        >>> my_db.list_collection_names()
        ['a_collection', 'another_col']
    """

    _collection_admin_timeout_ms, _ca_label = _select_singlereq_timeout_ca(
        timeout_options=self.api_options.timeout_options,
        collection_admin_timeout_ms=collection_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    driver_commander = self._get_driver_commander(keyspace=keyspace)
    gc_payload: dict[str, Any] = {"findCollections": {}}
    logger.info("findCollections")
    gc_response = driver_commander.request(
        payload=gc_payload,
        timeout_context=_TimeoutContext(
            request_ms=_collection_admin_timeout_ms, label=_ca_label
        ),
    )
    if "collections" not in gc_response.get("status", {}):
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from findCollections API command.",
            raw_response=gc_response,
        )
    else:
        logger.info("finished findCollections")
        return gc_response["status"]["collections"]  # type: ignore[no-any-return]

def list_collections(self, *, keyspace: str | None = None, collection_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> list[CollectionDescriptor]

List all collections in a given keyspace for this database.

Args

keyspace: the keyspace to be inspected. If not specified, the general setting for this database is assumed.
collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for collection_admin_timeout_ms.
timeout_ms: an alias for collection_admin_timeout_ms.

Returns

a list of CollectionDescriptor instances one for each collection.

Example

>>> coll_list = my_db.list_collections()
>>> coll_list
[CollectionDescriptor(name='my_v_col', options=CollectionDefinition())]
>>> for coll_dict in my_db.list_collections():
...     print(coll_dict)
...
CollectionDescriptor(name='my_v_col', options=CollectionDefinition())

Expand source code

def list_collections(
    self,
    *,
    keyspace: str | None = None,
    collection_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> list[CollectionDescriptor]:
    """
    List all collections in a given keyspace for this database.

    Args:
        keyspace: the keyspace to be inspected. If not specified,
            the general setting for this database is assumed.
        collection_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `collection_admin_timeout_ms`.
        timeout_ms: an alias for `collection_admin_timeout_ms`.

    Returns:
        a list of CollectionDescriptor instances one for each collection.

    Example:
        >>> coll_list = my_db.list_collections()
        >>> coll_list
        [CollectionDescriptor(name='my_v_col', options=CollectionDefinition())]
        >>> for coll_dict in my_db.list_collections():
        ...     print(coll_dict)
        ...
        CollectionDescriptor(name='my_v_col', options=CollectionDefinition())
    """

    _collection_admin_timeout_ms, _ca_label = _select_singlereq_timeout_ca(
        timeout_options=self.api_options.timeout_options,
        collection_admin_timeout_ms=collection_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    return self._list_collections_ctx(
        keyspace=keyspace,
        timeout_context=_TimeoutContext(
            request_ms=_collection_admin_timeout_ms, label=_ca_label
        ),
    )

def list_table_names(self, *, keyspace: str | None = None, table_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> list[str]

List the names of all tables in a given keyspace of this database.

Args

keyspace: the keyspace to be inspected. If not specified, the general setting for this database is assumed.
table_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for table_admin_timeout_ms.
timeout_ms: an alias for table_admin_timeout_ms.

Returns

a list of the table names as strings, in no particular order.

Example

>>> database.list_table_names()
['fighters', 'games']

Expand source code

def list_table_names(
    self,
    *,
    keyspace: str | None = None,
    table_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> list[str]:
    """
    List the names of all tables in a given keyspace of this database.

    Args:
        keyspace: the keyspace to be inspected. If not specified,
            the general setting for this database is assumed.
        table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `table_admin_timeout_ms`.
        timeout_ms: an alias for `table_admin_timeout_ms`.

    Returns:
        a list of the table names as strings, in no particular order.

    Example:
        >>> database.list_table_names()
        ['fighters', 'games']
    """

    _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
        timeout_options=self.api_options.timeout_options,
        table_admin_timeout_ms=table_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    driver_commander = self._get_driver_commander(keyspace=keyspace)
    lt_payload: dict[str, Any] = {"listTables": {}}
    logger.info("listTables")
    lt_response = driver_commander.request(
        payload=lt_payload,
        timeout_context=_TimeoutContext(
            request_ms=_table_admin_timeout_ms, label=_ta_label
        ),
    )
    if "tables" not in lt_response.get("status", {}):
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from listTables API command.",
            raw_response=lt_response,
        )
    else:
        logger.info("finished listTables")
        return lt_response["status"]["tables"]  # type: ignore[no-any-return]

def list_tables(self, *, keyspace: str | None = None, table_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> list[ListTableDescriptor]

List all tables in a given keyspace for this database.

Args

keyspace: the keyspace to be inspected. If not specified, the general setting for this database is assumed.
table_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for table_admin_timeout_ms.
timeout_ms: an alias for table_admin_timeout_ms.

Returns

a list of ListTableDescriptor instances, one for each table.

Example

>>> tables = my_database.list_tables()
>>> tables
[ListTableDescriptor(name='fighters', definition=ListTableDefinition(...
>>> tables[1].name
'games'
>>> tables[1].definition.columns
{'match_id': TableScalarColumnTypeDescriptor(ColumnType.TEXT),...
>>> tables[1].definition.columns['score']
TableScalarColumnTypeDescriptor(ColumnType.INT)
>>> tables[1].definition.primary_key.partition_by
['match_id']
>>> tables[1].definition.primary_key.partition_sort
{'round': 1}

Expand source code

def list_tables(
    self,
    *,
    keyspace: str | None = None,
    table_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> list[ListTableDescriptor]:
    """
    List all tables in a given keyspace for this database.

    Args:
        keyspace: the keyspace to be inspected. If not specified,
            the general setting for this database is assumed.
        table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `table_admin_timeout_ms`.
        timeout_ms: an alias for `table_admin_timeout_ms`.

    Returns:
        a list of ListTableDescriptor instances, one for each table.

    Example:
        >>> tables = my_database.list_tables()
        >>> tables
        [ListTableDescriptor(name='fighters', definition=ListTableDefinition(...
        >>> tables[1].name
        'games'
        >>> tables[1].definition.columns
        {'match_id': TableScalarColumnTypeDescriptor(ColumnType.TEXT),...
        >>> tables[1].definition.columns['score']
        TableScalarColumnTypeDescriptor(ColumnType.INT)
        >>> tables[1].definition.primary_key.partition_by
        ['match_id']
        >>> tables[1].definition.primary_key.partition_sort
        {'round': 1}
    """

    _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
        timeout_options=self.api_options.timeout_options,
        table_admin_timeout_ms=table_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    return self._list_tables_ctx(
        keyspace=keyspace,
        timeout_context=_TimeoutContext(
            request_ms=_table_admin_timeout_ms, label=_ta_label
        ),
    )

def name(self) ‑> str

The name of this database. Note that this bears no unicity guarantees.

Calling this method the first time involves a request to the DevOps API (the resulting database name is then cached). See the astrapy.info method for more details.

Example

>>> my_db.name()
'the_application_database'

Expand source code

def name(self) -> str:
    """
    The name of this database. Note that this bears no unicity guarantees.

    Calling this method the first time involves a request
    to the DevOps API (the resulting database name is then cached).
    See the `info()` method for more details.

    Example:
        >>> my_db.name()
        'the_application_database'
    """

    if self._name is None:
        self._name = self.info().name
    return self._name

def to_async(self, *, keyspace: str | None = None, token: str | TokenProvider | UnsetType = (unset), api_options: APIOptions | UnsetType = (unset)) ‑> AsyncDatabase

Create an AsyncDatabase from this one. Save for the arguments explicitly provided as overrides, everything else is kept identical to this database in the copy.

Args

keyspace: this is the keyspace all method calls will target, unless one is explicitly specified in the call. If no keyspace is supplied when creating a Database, the name "default_keyspace" is set.
token: an Access Token to the database. Example: "AstraCS:xyz…" This can be either a literal token string or a subclass of TokenProvider.
api_options: any additional options to set for the result, in the form of an APIOptions instance (where one can set just the needed attributes). In case the same setting is also provided as named parameter, the latter takes precedence.

Returns

the new copy, an AsyncDatabase instance.

Example

>>> async_database = my_db.to_async()
>>> asyncio.run(async_database.list_collection_names())

Expand source code

def to_async(
    self,
    *,
    keyspace: str | None = None,
    token: str | TokenProvider | UnsetType = _UNSET,
    api_options: APIOptions | UnsetType = _UNSET,
) -> AsyncDatabase:
    """
    Create an AsyncDatabase from this one. Save for the arguments
    explicitly provided as overrides, everything else is kept identical
    to this database in the copy.

    Args:
        keyspace: this is the keyspace all method calls will target, unless
            one is explicitly specified in the call. If no keyspace is supplied
            when creating a Database, the name "default_keyspace" is set.
        token: an Access Token to the database. Example: "AstraCS:xyz..."
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        api_options: any additional options to set for the result, in the form of
            an APIOptions instance (where one can set just the needed attributes).
            In case the same setting is also provided as named parameter,
            the latter takes precedence.

    Returns:
        the new copy, an `AsyncDatabase` instance.

    Example:
        >>> async_database = my_db.to_async()
        >>> asyncio.run(async_database.list_collection_names())
    """

    arg_api_options = APIOptions(
        token=token,
    )
    final_api_options = self.api_options.with_override(api_options).with_override(
        arg_api_options
    )
    return AsyncDatabase(
        api_endpoint=self.api_endpoint,
        keyspace=keyspace or self.keyspace,
        api_options=final_api_options,
    )

def use_keyspace(self, keyspace: str) ‑> None

Switch to a new working keyspace for this database. This method changes (mutates) the Database instance.

Note that this method does not create the keyspace, which should exist already (created for instance with a DatabaseAdmin.create_keyspace call).

Args

keyspace: the new keyspace to use as the database working keyspace.

Returns

None.

Example

>>> my_db.list_collection_names()
['coll_1', 'coll_2']
>>> my_db.use_keyspace("an_empty_keyspace")
>>> my_db.list_collection_names()
[]

Expand source code

def use_keyspace(self, keyspace: str) -> None:
    """
    Switch to a new working keyspace for this database.
    This method changes (mutates) the Database instance.

    Note that this method does not create the keyspace, which should exist
    already (created for instance with a `DatabaseAdmin.create_keyspace` call).

    Args:
        keyspace: the new keyspace to use as the database working keyspace.

    Returns:
        None.

    Example:
        >>> my_db.list_collection_names()
        ['coll_1', 'coll_2']
        >>> my_db.use_keyspace("an_empty_keyspace")
        >>> my_db.list_collection_names()
        []
    """
    logger.info(f"switching to keyspace '{keyspace}'")
    self._using_keyspace = keyspace
    self._api_commander = self._get_api_commander(keyspace=self.keyspace)

def with_options(self, *, keyspace: str | None = None, token: str | TokenProvider | UnsetType = (unset), api_options: APIOptions | UnsetType = (unset)) ‑> Database

Create a clone of this database with some changed attributes.

Args

keyspace: this is the keyspace all method calls will target, unless one is explicitly specified in the call. If no keyspace is supplied when creating a Database, the name "default_keyspace" is set.
token: an Access Token to the database. Example: "AstraCS:xyz...". This can be either a literal token string or a subclass of TokenProvider.
api_options: any additional options to set for the clone, in the form of an APIOptions instance (where one can set just the needed attributes). In case the same setting is also provided as named parameter, the latter takes precedence.

Returns

a new Database instance.

Example

>>> my_db_2 = my_db.with_options(
...     keyspace="the_other_keyspace",
...     token="AstraCS:xyz...",
... )

Expand source code

def with_options(
    self,
    *,
    keyspace: str | None = None,
    token: str | TokenProvider | UnsetType = _UNSET,
    api_options: APIOptions | UnsetType = _UNSET,
) -> Database:
    """
    Create a clone of this database with some changed attributes.

    Args:
        keyspace: this is the keyspace all method calls will target, unless
            one is explicitly specified in the call. If no keyspace is supplied
            when creating a Database, the name "default_keyspace" is set.
        token: an Access Token to the database. Example: `"AstraCS:xyz..."`.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        api_options: any additional options to set for the clone, in the form of
            an APIOptions instance (where one can set just the needed attributes).
            In case the same setting is also provided as named parameter,
            the latter takes precedence.

    Returns:
        a new `Database` instance.

    Example:
        >>> my_db_2 = my_db.with_options(
        ...     keyspace="the_other_keyspace",
        ...     token="AstraCS:xyz...",
        ... )
    """

    return self._copy(
        keyspace=keyspace,
        token=token,
        api_options=api_options,
    )

class Table (*, database: Database, name: str, keyspace: str | None, api_options: FullAPIOptions)

A Data API table, the object to interact with the Data API for structured data, especially for DDL operations. This class has a synchronous interface.

This class is not meant for direct instantiation by the user, rather it is obtained by invoking methods such as get_table of Database, wherefrom the Table inherits its API options such as authentication token and API endpoint. In order to create a table, instead, one should call the create_table method of Database, providing a table definition parameter that can be built in different ways (see the CreateTableDefinition object and examples below).

Args

database: a Database object, instantiated earlier. This represents the database the table belongs to.
name: the table name. This parameter should match an existing table on the database.
keyspace: this is the keyspace to which the table belongs. If nothing is specified, the database's working keyspace is used.
api_options: a complete specification of the API Options for this instance.

Examples

>>> from astrapy import DataAPIClient
>>> client = DataAPIClient()
>>> database = client.get_database(
...     "https://01234567-....apps.astra.datastax.com",
...     token="AstraCS:..."
... )
>>>

>>> # Create a table using the fluent syntax for definition
>>> from astrapy.constants import SortMode
>>> from astrapy.info import (
...     CreateTableDefinition,
...     ColumnType,
... )
>>> table_definition = (
...     CreateTableDefinition.builder()
...     .add_column("match_id", ColumnType.TEXT)
...     .add_column("round", ColumnType.INT)
...     .add_vector_column("m_vector", dimension=3)
...     .add_column("score", ColumnType.INT)
...     .add_column("when", ColumnType.TIMESTAMP)
...     .add_column("winner", ColumnType.TEXT)
...     .add_set_column("fighters", ColumnType.UUID)
...     .add_partition_by(["match_id"])
...     .add_partition_sort({"round": SortMode.ASCENDING})
...     .build()
... )
>>> my_table = database.create_table(
...     "games",
...     definition=table_definition,
... )

>>> # Create a table with the definition as object
>>> # (and do not raise an error if the table exists already)
>>> from astrapy.info import (
...     CreateTableDefinition,
...     TablePrimaryKeyDescriptor,
...     TableScalarColumnTypeDescriptor,
...     TableValuedColumnType,
...     TableValuedColumnTypeDescriptor,
...     TableVectorColumnTypeDescriptor,
... )
>>> table_definition_1 = CreateTableDefinition(
...     columns={
...         "match_id": TableScalarColumnTypeDescriptor(
...             ColumnType.TEXT,
...         ),
...         "round": TableScalarColumnTypeDescriptor(
...             ColumnType.INT,
...         ),
...         "m_vector": TableVectorColumnTypeDescriptor(
...             column_type="vector", dimension=3
...         ),
...         "score": TableScalarColumnTypeDescriptor(
...             ColumnType.INT,
...         ),
...         "when": TableScalarColumnTypeDescriptor(
...             ColumnType.TIMESTAMP,
...         ),
...         "winner": TableScalarColumnTypeDescriptor(
...             ColumnType.TEXT,
...         ),
...         "fighters": TableValuedColumnTypeDescriptor(
...             column_type=TableValuedColumnType.SET,
...             value_type=ColumnType.UUID,
...         ),
...     },
...     primary_key=TablePrimaryKeyDescriptor(
...         partition_by=["match_id"],
...         partition_sort={"round": SortMode.ASCENDING},
...     ),
... )
>>> my_table_1 = database.create_table(
...     "games",
...     definition=table_definition_1,
...     if_not_exists=True,
... )

>>> # Create a table with the definition as plain dictionary
>>> # (and do not raise an error if the table exists already)
>>> table_definition_2 = {
...     "columns": {
...         "match_id": {"type": "text"},
...         "round": {"type": "int"},
...         "m_vector": {"type": "vector", "dimension": 3},
...         "score": {"type": "int"},
...         "when": {"type": "timestamp"},
...         "winner": {"type": "text"},
...         "fighters": {"type": "set", "valueType": "uuid"},
...     },
...     "primaryKey": {
...         "partitionBy": ["match_id"],
...         "partitionSort": {"round": 1},
...     },
... }
>>> my_table_2 = database.create_table(
...     "games",
...     definition=table_definition_2,
...     if_not_exists=True,
... )

>>> # Get a reference to an existing table
>>> # (no checks are performed on DB)
>>> my_table_3 = database.get_table("games")

Note

creating an instance of Table does not trigger, in itself, actual creation of the table on the database. The latter should have been created beforehand, e.g. through the create_table method of a Database.

Expand source code

class Table(Generic[ROW]):
    """
    A Data API table, the object to interact with the Data API for structured data,
    especially for DDL operations. This class has a synchronous interface.

    This class is not meant for direct instantiation by the user, rather
    it is obtained by invoking methods such as `get_table` of Database,
    wherefrom the Table inherits its API options such as authentication
    token and API endpoint.
    In order to create a table, instead, one should call the `create_table`
    method of Database, providing a table definition parameter that can be built
    in different ways (see the `CreateTableDefinition` object and examples below).

    Args:
        database: a Database object, instantiated earlier. This represents
            the database the table belongs to.
        name: the table name. This parameter should match an existing
            table on the database.
        keyspace: this is the keyspace to which the table belongs.
            If nothing is specified, the database's working keyspace is used.
        api_options: a complete specification of the API Options for this instance.

    Examples:
        >>> from astrapy import DataAPIClient
        >>> client = DataAPIClient()
        >>> database = client.get_database(
        ...     "https://01234567-....apps.astra.datastax.com",
        ...     token="AstraCS:..."
        ... )
        >>>

        >>> # Create a table using the fluent syntax for definition
        >>> from astrapy.constants import SortMode
        >>> from astrapy.info import (
        ...     CreateTableDefinition,
        ...     ColumnType,
        ... )
        >>> table_definition = (
        ...     CreateTableDefinition.builder()
        ...     .add_column("match_id", ColumnType.TEXT)
        ...     .add_column("round", ColumnType.INT)
        ...     .add_vector_column("m_vector", dimension=3)
        ...     .add_column("score", ColumnType.INT)
        ...     .add_column("when", ColumnType.TIMESTAMP)
        ...     .add_column("winner", ColumnType.TEXT)
        ...     .add_set_column("fighters", ColumnType.UUID)
        ...     .add_partition_by(["match_id"])
        ...     .add_partition_sort({"round": SortMode.ASCENDING})
        ...     .build()
        ... )
        >>> my_table = database.create_table(
        ...     "games",
        ...     definition=table_definition,
        ... )

        >>> # Create a table with the definition as object
        >>> # (and do not raise an error if the table exists already)
        >>> from astrapy.info import (
        ...     CreateTableDefinition,
        ...     TablePrimaryKeyDescriptor,
        ...     TableScalarColumnTypeDescriptor,
        ...     TableValuedColumnType,
        ...     TableValuedColumnTypeDescriptor,
        ...     TableVectorColumnTypeDescriptor,
        ... )
        >>> table_definition_1 = CreateTableDefinition(
        ...     columns={
        ...         "match_id": TableScalarColumnTypeDescriptor(
        ...             ColumnType.TEXT,
        ...         ),
        ...         "round": TableScalarColumnTypeDescriptor(
        ...             ColumnType.INT,
        ...         ),
        ...         "m_vector": TableVectorColumnTypeDescriptor(
        ...             column_type="vector", dimension=3
        ...         ),
        ...         "score": TableScalarColumnTypeDescriptor(
        ...             ColumnType.INT,
        ...         ),
        ...         "when": TableScalarColumnTypeDescriptor(
        ...             ColumnType.TIMESTAMP,
        ...         ),
        ...         "winner": TableScalarColumnTypeDescriptor(
        ...             ColumnType.TEXT,
        ...         ),
        ...         "fighters": TableValuedColumnTypeDescriptor(
        ...             column_type=TableValuedColumnType.SET,
        ...             value_type=ColumnType.UUID,
        ...         ),
        ...     },
        ...     primary_key=TablePrimaryKeyDescriptor(
        ...         partition_by=["match_id"],
        ...         partition_sort={"round": SortMode.ASCENDING},
        ...     ),
        ... )
        >>> my_table_1 = database.create_table(
        ...     "games",
        ...     definition=table_definition_1,
        ...     if_not_exists=True,
        ... )

        >>> # Create a table with the definition as plain dictionary
        >>> # (and do not raise an error if the table exists already)
        >>> table_definition_2 = {
        ...     "columns": {
        ...         "match_id": {"type": "text"},
        ...         "round": {"type": "int"},
        ...         "m_vector": {"type": "vector", "dimension": 3},
        ...         "score": {"type": "int"},
        ...         "when": {"type": "timestamp"},
        ...         "winner": {"type": "text"},
        ...         "fighters": {"type": "set", "valueType": "uuid"},
        ...     },
        ...     "primaryKey": {
        ...         "partitionBy": ["match_id"],
        ...         "partitionSort": {"round": 1},
        ...     },
        ... }
        >>> my_table_2 = database.create_table(
        ...     "games",
        ...     definition=table_definition_2,
        ...     if_not_exists=True,
        ... )

        >>> # Get a reference to an existing table
        >>> # (no checks are performed on DB)
        >>> my_table_3 = database.get_table("games")

    Note:
        creating an instance of Table does not trigger, in itself, actual
        creation of the table on the database. The latter should have been created
        beforehand, e.g. through the `create_table` method of a Database.
    """

    def __init__(
        self,
        *,
        database: Database,
        name: str,
        keyspace: str | None,
        api_options: FullAPIOptions,
    ) -> None:
        self.api_options = api_options
        self._name = name
        _keyspace = keyspace if keyspace is not None else database.keyspace

        if _keyspace is None:
            raise ValueError("Attempted to create Table with 'keyspace' unset.")

        self._database = database._copy(
            keyspace=_keyspace, api_options=self.api_options
        )
        self._commander_headers = {
            **{DEFAULT_DATA_API_AUTH_HEADER: self.api_options.token.get_token()},
            **self.api_options.embedding_api_key.get_headers(),
            **self.api_options.reranking_api_key.get_headers(),
            **self.api_options.database_additional_headers,
        }
        self._api_commander = self._get_api_commander()
        self._converter_agent: _TableConverterAgent[ROW] = _TableConverterAgent(
            options=self.api_options.serdes_options,
        )

    def __repr__(self) -> str:
        _db_desc = f'database.api_endpoint="{self.database.api_endpoint}"'
        return (
            f'{self.__class__.__name__}(name="{self.name}", '
            f'keyspace="{self.keyspace}", {_db_desc}, '
            f"api_options={self.api_options})"
        )

    def __eq__(self, other: Any) -> bool:
        if isinstance(other, Table):
            return all(
                [
                    self._name == other._name,
                    self._database == other._database,
                    self.api_options == other.api_options,
                ]
            )
        else:
            return False

    def _get_api_commander(self) -> APICommander:
        """Instantiate a new APICommander based on the properties of this class."""

        if self._database.keyspace is None:
            raise ValueError(
                "No keyspace specified. Table requires a keyspace to "
                "be set, e.g. through the `keyspace` constructor parameter."
            )

        base_path_components = [
            comp
            for comp in (
                ncomp.strip("/")
                for ncomp in (
                    self._database.api_options.data_api_url_options.api_path,
                    self._database.api_options.data_api_url_options.api_version,
                    self._database.keyspace,
                    self._name,
                )
                if ncomp is not None
            )
            if comp != ""
        ]
        base_path = f"/{'/'.join(base_path_components)}"
        api_commander = APICommander(
            api_endpoint=self._database.api_endpoint,
            path=base_path,
            headers=self._commander_headers,
            callers=self.api_options.callers,
            redacted_header_names=self.api_options.redacted_header_names,
            handle_decimals_writes=True,
            handle_decimals_reads=True,
        )
        return api_commander

    def _copy(
        self: Table[ROW],
        *,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> Table[ROW]:
        arg_api_options = APIOptions(
            embedding_api_key=embedding_api_key,
            reranking_api_key=reranking_api_key,
        )
        final_api_options = self.api_options.with_override(api_options).with_override(
            arg_api_options
        )
        return Table(
            database=self.database,
            name=self.name,
            keyspace=self.keyspace,
            api_options=final_api_options,
        )

    def with_options(
        self: Table[ROW],
        *,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> Table[ROW]:
        """
        Create a clone of this table with some changed attributes.

        Args:
            embedding_api_key: optional API key(s) for interacting with the table.
                If an embedding service is configured, and this parameter is not None,
                each Data API call will include the necessary embedding-related headers
                as specified by this parameter. If a string is passed, it translates
                into the one "embedding api key" header
                (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
                For some vectorize providers/models, if using header-based authentication,
                specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
                should be supplied.
            reranking_api_key: optional API key(s) for interacting with the table.
                If a reranker is configured for the table, and this parameter
                is not None, Data API calls will include the appropriate
                reranker-related headers according to this parameter. Reranker services
                may not necessarily require this setting (e.g. if the service needs no
                authentication, or one is configured as part of the table
                definition relying on a "shared secret").
                If a string is passed, it is translated into an instance of
                `astrapy.authentication.RerankingAPIKeyHeaderProvider`.
            api_options: any additional options to set for the clone, in the form of
                an APIOptions instance (where one can set just the needed attributes).
                In case the same setting is also provided as named parameter,
                the latter takes precedence.

        Returns:
            a new Table instance.

        Example:
            >>> table_with_api_key_configured = my_table.with_options(
            ...     embedding_api_key="secret-key-0123abcd...",
            ... )
        """

        return self._copy(
            embedding_api_key=embedding_api_key,
            reranking_api_key=reranking_api_key,
            api_options=api_options,
        )

    def to_async(
        self: Table[ROW],
        *,
        embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
        reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
        api_options: APIOptions | UnsetType = _UNSET,
    ) -> AsyncTable[ROW]:
        """
        Create an AsyncTable from this one. Save for the arguments
        explicitly provided as overrides, everything else is kept identical
        to this table in the copy (the database is converted into
        an async object).

        Args:
            embedding_api_key: optional API key(s) for interacting with the table.
                If an embedding service is configured, and this parameter is not None,
                each Data API call will include the necessary embedding-related headers
                as specified by this parameter. If a string is passed, it translates
                into the one "embedding api key" header
                (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
                For some vectorize providers/models, if using header-based authentication,
                specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
                should be supplied.
            reranking_api_key: optional API key(s) for interacting with the table.
                If a reranker is configured for the table, and this parameter
                is not None, Data API calls will include the appropriate
                reranker-related headers according to this parameter. Reranker services
                may not necessarily require this setting (e.g. if the service needs no
                authentication, or one is configured as part of the table
                definition relying on a "shared secret").
                If a string is passed, it is translated into an instance of
                `astrapy.authentication.RerankingAPIKeyHeaderProvider`.
            api_options: any additional options to set for the result, in the form of
                an APIOptions instance (where one can set just the needed attributes).
                In case the same setting is also provided as named parameter,
                the latter takes precedence.

        Returns:
            the new copy, an AsyncTable instance.

        Example:
            >>> asyncio.run(my_table.to_async().find_one(
            ...     {"match_id": "fight4"},
            ...     projection={"winner": True},
            ... ))
            {"pk": 1, "column": "value}
        """

        arg_api_options = APIOptions(
            embedding_api_key=embedding_api_key,
            reranking_api_key=reranking_api_key,
        )
        final_api_options = self.api_options.with_override(api_options).with_override(
            arg_api_options
        )
        return AsyncTable(
            database=self.database.to_async(),
            name=self.name,
            keyspace=self.keyspace,
            api_options=final_api_options,
        )

    def definition(
        self,
        *,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> ListTableDefinition:
        """
        Query the Data API and return a structure defining the table schema.
        If there are no unsupported colums in the table, the return value has
        the same contents as could have been provided to a `create_table` method call.

        Args:
            table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `table_admin_timeout_ms`.
            timeout_ms: an alias for `table_admin_timeout_ms`.

        Returns:
            A `ListTableDefinition` object, available for inspection.

        Example:
            >>> my_table.definition()
            ListTableDefinition(columns=[match_id,round,fighters, ...  # shortened
        """

        _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
            timeout_options=self.api_options.timeout_options,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info(f"getting tables in search of '{self.name}'")
        self_descriptors = [
            table_desc
            for table_desc in self.database._list_tables_ctx(
                keyspace=None,
                timeout_context=_TimeoutContext(
                    request_ms=_table_admin_timeout_ms,
                    label=_ta_label,
                ),
            )
            if table_desc.name == self.name
        ]
        logger.info(f"finished getting tables in search of '{self.name}'")
        if self_descriptors:
            return self_descriptors[0].definition
        else:
            raise RuntimeError(
                f"Table {self.keyspace}.{self.name} not found.",
            )

    def info(
        self,
        *,
        database_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> TableInfo:
        """
        Return information on the table. This should not be confused with the table
        definition (i.e. the schema).

        Args:
            database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying DevOps API request.
                If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `database_admin_timeout_ms`.
            timeout_ms: an alias for `database_admin_timeout_ms`.

        Returns:
            A TableInfo object for inspection.

        Example:
            >>> # Note: output reformatted for clarity.
            >>> my_table.info()
            TableInfo(
                database_info=AstraDBDatabaseInfo(id=..., name=..., ...),
                keyspace='default_keyspace',
                name='games',
                full_name='default_keyspace.games'
            )
        """

        return TableInfo(
            database_info=self.database.info(
                database_admin_timeout_ms=database_admin_timeout_ms,
                request_timeout_ms=request_timeout_ms,
                timeout_ms=timeout_ms,
            ),
            keyspace=self.keyspace,
            name=self.name,
            full_name=self.full_name,
        )

    @property
    def database(self) -> Database:
        """
        a Database object, the database this table belongs to.

        Example:
            >>> my_table.database.name
            'the_db'
        """

        return self._database

    @property
    def keyspace(self) -> str:
        """
        The keyspace this table is in.

        Example:
            >>> my_table.keyspace
            'default_keyspace'
        """

        _keyspace = self.database.keyspace
        if _keyspace is None:
            raise RuntimeError("The table's DB is set with keyspace=None")
        return _keyspace

    @property
    def name(self) -> str:
        """
        The name of this table.

        Example:
            >>> my_table.name
            'games'
        """

        return self._name

    @property
    def full_name(self) -> str:
        """
        The fully-qualified table name within the database,
        in the form "keyspace.table_name".

        Example:
            >>> my_table.full_name
            'default_keyspace.my_table'
        """

        return f"{self.keyspace}.{self.name}"

    def _create_generic_index(
        self,
        i_name: str,
        ci_definition: dict[str, Any],
        ci_command: str,
        if_not_exists: bool | None,
        table_admin_timeout_ms: int | None,
        request_timeout_ms: int | None,
        timeout_ms: int | None,
    ) -> None:
        ci_options: dict[str, bool]
        if if_not_exists is not None:
            ci_options = {"ifNotExists": if_not_exists}
        else:
            ci_options = {}
        _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
            timeout_options=self.api_options.timeout_options,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        ci_payload = {
            ci_command: {
                "name": i_name,
                "definition": ci_definition,
                "options": ci_options,
            }
        }
        logger.info(f"{ci_command}('{i_name}')")
        ci_response = self._api_commander.request(
            payload=ci_payload,
            timeout_context=_TimeoutContext(
                request_ms=_table_admin_timeout_ms, label=_ta_label
            ),
        )
        if ci_response.get("status") != {"ok": 1}:
            raise UnexpectedDataAPIResponseException(
                text=f"Faulty response from {ci_command} API command.",
                raw_response=ci_response,
            )
        logger.info(f"finished {ci_command}('{i_name}')")

    def create_index(
        self,
        name: str,
        column: str | dict[str, str],
        *,
        options: TableIndexOptions | dict[str, Any] | None = None,
        if_not_exists: bool | None = None,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Create an index on a non-vector column of the table.

        This is a blocking operation: the method returns once the index
        is created and ready to use.

        For creation of a vector index, see method `create_vector_index` instead.

        Args:
            name: the name of the index. Index names must be unique across the keyspace.
            column: the table column on which the index is to be created.
                For a map column, besides a simple string, it can be an object
                in one of the two formats {"column": "$values"}, {"column": "$keys"},
            options: if passed, it must be an instance of `TableIndexOptions`,
                or an equivalent dictionary, which specifies index settings
                such as -- for a text column -- case-sensitivity and so on.
                See the `astrapy.info.TableIndexOptions` class for more details.
            if_not_exists: if set to True, the command will succeed even if an index
                with the specified name already exists (in which case no actual
                index creation takes place on the database). The API default of False
                means that an error is raised by the API in case of name collision.
            table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `table_admin_timeout_ms`.
            timeout_ms: an alias for `table_admin_timeout_ms`.

        Examples:
            >>> from astrapy.info import TableIndexOptions
            >>>
            >>> # create an index on a column
            >>> my_table.create_index(
            ...     "score_index",
            ...     "score",
            ... )
            >>>
            >>> # create an index on a textual column, specifying indexing options
            >>> my_table.create_index(
            ...     "winner_index",
            ...     "winner",
            ...     options=TableIndexOptions(
            ...         ascii=False,
            ...         normalize=True,
            ...         case_sensitive=False,
            ...     ),
            ... )
        """

        ci_definition: dict[str, Any] = TableIndexDefinition(
            column=column,
            options=TableIndexOptions.coerce(options or {}),
        ).as_dict()
        ci_command = "createIndex"
        return self._create_generic_index(
            i_name=name,
            ci_definition=ci_definition,
            ci_command=ci_command,
            if_not_exists=if_not_exists,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )

    def create_vector_index(
        self,
        name: str,
        column: str,
        *,
        options: TableVectorIndexOptions | dict[str, Any] | None = None,
        if_not_exists: bool | None = None,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Create a vector index on a vector column of the table, enabling vector
        similarity search operations on it.

        This is a blocking operation: the method returns once the index
        is created and ready to use.

        For creation of a non-vector index, see method `create_index` instead.

        Args:
            name: the name of the index. Index names must be unique across the keyspace.
            column: the table column, of type "vector" on which to create the index.
            options: an instance of `TableVectorIndexOptions`, or an equivalent
                dictionary, which specifies settings for the vector index,
                such as the metric to use or, if desired, a "source model" setting.
                If omitted, the Data API defaults will apply for the index.
                See the `astrapy.info.TableVectorIndexOptions` class for more details.
            if_not_exists: if set to True, the command will succeed even if an index
                with the specified name already exists (in which case no actual
                index creation takes place on the database). The API default of False
                means that an error is raised by the API in case of name collision.
            table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `table_admin_timeout_ms`.
            timeout_ms: an alias for `table_admin_timeout_ms`.

        Example:
            >>> from astrapy.constants import VectorMetric
            >>> from astrapy.info import TableVectorIndexOptions
            >>>
            >>> # create a vector index with dot-product similarity
            >>> my_table.create_vector_index(
            ...     "m_vector_index",
            ...     "m_vector",
            ...     options=TableVectorIndexOptions(
            ...         metric=VectorMetric.DOT_PRODUCT,
            ...     ),
            ... )
            >>> # specify a source_model (since the previous statement
            >>> # succeeded, this will do nothing because of `if_not_exists`):
            >>> my_table.create_vector_index(
            ...     "m_vector_index",
            ...     "m_vector",
            ...     options=TableVectorIndexOptions(
            ...         metric=VectorMetric.DOT_PRODUCT,
            ...         source_model="nv-qa-4",
            ...     ),
            ...     if_not_exists=True,
            ... )
            >>> # leave the settings to the Data API defaults of cosine
            >>> # similarity metric (since the previous statement
            >>> # succeeded, this will do nothing because of `if_not_exists`):
            >>> my_table.create_vector_index(
            ...     "m_vector_index",
            ...     "m_vector",
            ...     if_not_exists=True,
            ... )
        """

        ci_definition: dict[str, Any] = TableVectorIndexDefinition(
            column=column,
            options=TableVectorIndexOptions.coerce(options),
        ).as_dict()
        ci_command = "createVectorIndex"
        return self._create_generic_index(
            i_name=name,
            ci_definition=ci_definition,
            ci_command=ci_command,
            if_not_exists=if_not_exists,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )

    def list_index_names(
        self,
        *,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> list[str]:
        """
        List the names of all indexes existing on this table.

        Args:
            table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `table_admin_timeout_ms`.
            timeout_ms: an alias for `table_admin_timeout_ms`.

        Returns:
            a list of the index names as strings, in no particular order.

        Example:
            >>> my_table.list_index_names()
            ['m_vector_index', 'winner_index', 'score_index']
        """

        _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
            timeout_options=self.api_options.timeout_options,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        li_payload: dict[str, Any] = {"listIndexes": {"options": {}}}
        logger.info("listIndexes")
        li_response = self._api_commander.request(
            payload=li_payload,
            timeout_context=_TimeoutContext(
                request_ms=_table_admin_timeout_ms, label=_ta_label
            ),
        )
        if "indexes" not in li_response.get("status", {}):
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from listIndexes API command.",
                raw_response=li_response,
            )
        else:
            logger.info("finished listIndexes")
            return li_response["status"]["indexes"]  # type: ignore[no-any-return]

    def _list_indexes(
        self,
        *,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> list[TableIndexDescriptor]:
        """
        List the full definitions of all indexes existing on this table.

        WARNING: method not public yet, pending completion of its API.

        Args:
            table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `table_admin_timeout_ms`.
            timeout_ms: an alias for `table_admin_timeout_ms`.

        Returns:
            a list of `astrapy.info.TableIndexDescriptor` objects in no particular
            order, each providing the details of an index present on the table.

        Example:
            >>> indexes = my_table.list_indexes()
            >>> indexes
            [TableIndexDescriptor(name='m_vector_index', definition=...)...]  # Note: shortened
            >>> indexes[1].definition.column
            'winner'
            >>> indexes[1].definition.options.case_sensitive
            False
        """

        _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
            timeout_options=self.api_options.timeout_options,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        li_payload: dict[str, Any] = {"listIndexes": {"options": {"explain": True}}}
        logger.info("listIndexes")
        li_response = self._api_commander.request(
            payload=li_payload,
            timeout_context=_TimeoutContext(
                request_ms=_table_admin_timeout_ms, label=_ta_label
            ),
        )
        columns = self.definition(
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        ).columns

        if "indexes" not in li_response.get("status", {}):
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from listIndexes API command.",
                raw_response=li_response,
            )
        else:
            logger.info("finished listIndexes")
            return [
                TableIndexDescriptor.coerce(index_object, columns=columns)
                for index_object in li_response["status"]["indexes"]
            ]

    @overload
    def alter(
        self,
        operation: AlterTableOperation | dict[str, Any],
        *,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> Table[DefaultRowType]: ...

    @overload
    def alter(
        self,
        operation: AlterTableOperation | dict[str, Any],
        *,
        row_type: type[NEW_ROW],
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> Table[NEW_ROW]: ...

    def alter(
        self,
        operation: AlterTableOperation | dict[str, Any],
        *,
        row_type: type[Any] = DefaultRowType,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> Table[NEW_ROW]:
        """
        Executes one of the available alter-table operations on this table,
        such as adding/dropping columns.

        This is a blocking operation: the method returns once the index
        is created and ready to use.

        Args:
            operation: an instance of one of the `astrapy.info.AlterTable*` classes,
                representing which alter operation to perform and the details thereof.
                A regular dictionary can also be provided, but then it must have the
                alter operation name at its top level: {"add": {"columns": ...}}.
            row_type: this parameter acts a formal specifier for the type checker.
                If omitted, the resulting Table is implicitly a `Table[dict[str, Any]]`.
                If provided, it must match the type hint specified in the assignment.
                See the examples below.
            table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `table_admin_timeout_ms`.
            timeout_ms: an alias for `table_admin_timeout_ms`.

        Examples:
            >>> from astrapy.info import (
            ...     AlterTableAddColumns,
            ...     AlterTableAddVectorize,
            ...     AlterTableDropColumns,
            ...     AlterTableDropVectorize,
            ...     ColumnType,
            ...     TableScalarColumnTypeDescriptor,
            ...     VectorServiceOptions,
            ... )
            >>>
            >>> # Add a column
            >>> new_table_1 = my_table.alter(
            ...     AlterTableAddColumns(
            ...         columns={
            ...             "tie_break": TableScalarColumnTypeDescriptor(
            ...                 column_type=ColumnType.BOOLEAN,
            ...             ),
            ...         }
            ...     )
            ... )
            >>>
            >>> # Drop a column
            >>> new_table_2 = new_table_1.alter(AlterTableDropColumns(
            ...     columns=["tie_break"]
            ... ))
            >>>
            >>> # Add vectorize to a (vector) column
            >>> new_table_3 = new_table_2.alter(
            ...     AlterTableAddVectorize(
            ...         columns={
            ...             "m_vector": VectorServiceOptions(
            ...                 provider="openai",
            ...                 model_name="text-embedding-3-small",
            ...                 authentication={
            ...                     "providerKey": "ASTRA_KMS_API_KEY_NAME",
            ...                 },
            ...             ),
            ...         }
            ...     )
            ... )
            >>>
            >>> # Drop vectorize from a (vector) column
            >>> # (Also demonstrates type hint usage)
            >>> from typing import TypedDict
            >>> from astrapy import Table
            >>> from astrapy.data_types import (
            ...     DataAPISet,
            ...     DataAPITimestamp,
            ...     DataAPIVector,
            ... )
            >>> from astrapy.ids import UUID
            >>>
            >>> class MyMatch(TypedDict):
            ...     match_id: str
            ...     round: int
            ...     m_vector: DataAPIVector
            ...     score: int
            ...     when: DataAPITimestamp
            ...     winner: str
            ...     fighters: DataAPISet[UUID]
            ...
            >>> new_table_4: Table[MyMatch] = new_table_3.alter(
            ...     AlterTableDropVectorize(columns=["m_vector"]),
            ...     row_type=MyMatch,
            ... )
        """

        n_operation: AlterTableOperation
        if isinstance(operation, AlterTableOperation):
            n_operation = operation
        else:
            n_operation = AlterTableOperation.from_full_dict(operation)
        _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
            timeout_options=self.api_options.timeout_options,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        at_operation_name = n_operation._name
        at_payload = {
            "alterTable": {
                "operation": {
                    at_operation_name: n_operation.as_dict(),
                },
            },
        }
        logger.info(f"alterTable({at_operation_name})")
        at_response = self._api_commander.request(
            payload=at_payload,
            timeout_context=_TimeoutContext(
                request_ms=_table_admin_timeout_ms, label=_ta_label
            ),
        )
        if at_response.get("status") != {"ok": 1}:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from alterTable API command.",
                raw_response=at_response,
            )
        logger.info(f"finished alterTable({at_operation_name})")
        return Table(
            database=self.database,
            name=self.name,
            keyspace=self.keyspace,
            api_options=self.api_options,
        )

    def insert_one(
        self,
        row: ROW,
        *,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> TableInsertOneResult:
        """
        Insert a single row in the table,
        with implied overwrite in case of primary key collision.

        Inserting a row whose primary key correspond to an entry alredy stored
        in the table has the effect of an in-place update: the row is overwritten.
        However, if the row being inserted is partially provided, i.e. some columns
        are not specified, these are left unchanged on the database. To explicitly
        reset them, specify their value as appropriate to their data type,
        i.e. `None`, `{}` or analogous.

        Args:
            row: a dictionary expressing the row to insert. The primary key
                must be specified in full, while any other column may be omitted
                if desired (in which case it is left as is on DB).
                The values for the various columns supplied in the row must
                be of the right data type for the insertion to succeed.
                Non-primary-key columns can also be explicitly set to null.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a TableInsertOneResult object, whose attributes are the primary key
            of the inserted row both in the form of a dictionary and of a tuple.

        Examples:
            >>> # a full-row insert using astrapy's datatypes
            >>> from astrapy.data_types import (
            ...     DataAPISet,
            ...     DataAPITimestamp,
            ...     DataAPIVector,
            ... )
            >>> from astrapy.ids import UUID
            >>>
            >>> insert_result = my_table.insert_one(
            ...     {
            ...         "match_id": "mtch_0",
            ...         "round": 1,
            ...         "m_vector": DataAPIVector([0.4, -0.6, 0.2]),
            ...         "score": 18,
            ...         "when": DataAPITimestamp.from_string("2024-11-28T11:30:00Z"),
            ...         "winner": "Victor",
            ...         "fighters": DataAPISet([
            ...             UUID("0193539a-2770-8c09-a32a-111111111111"),
            ...         ]),
            ...     },
            ... )
            >>> insert_result.inserted_id
            {'match_id': 'mtch_0', 'round': 1}
            >>> insert_result.inserted_id_tuple
            ('mtch_0', 1)
            >>>
            >>> # a partial-row (which in this case overwrites some of the values)
            >>> my_table.insert_one(
            ...     {
            ...         "match_id": "mtch_0",
            ...         "round": 1,
            ...         "winner": "Victor Vector",
            ...         "fighters": DataAPISet([
            ...             UUID("0193539a-2770-8c09-a32a-111111111111"),
            ...             UUID("0193539a-2880-8875-9f07-222222222222"),
            ...         ]),
            ...     },
            ... )
            TableInsertOneResult(inserted_id={'match_id': 'mtch_0', 'round': 1} ...
            >>>
            >>> # another insertion demonstrating standard-library datatypes in values
            >>> import datetime
            >>>
            >>> my_table.insert_one(
            ...     {
            ...         "match_id": "mtch_0",
            ...         "round": 2,
            ...         "winner": "Angela",
            ...         "score": 25,
            ...         "when": datetime.datetime(
            ...             2024, 7, 13, 12, 55, 30, 889,
            ...             tzinfo=datetime.timezone.utc,
            ...         ),
            ...         "fighters": {
            ...             UUID("019353cb-8e01-8276-a190-333333333333"),
            ...         },
            ...         "m_vector": [0.4, -0.6, 0.2],
            ...     },
            ... )
            TableInsertOneResult(inserted_id={'match_id': 'mtch_0', 'round': 2}, ...
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        io_payload = self._converter_agent.preprocess_payload(
            {"insertOne": {"document": row}},
            map2tuple_checker=map2tuple_checker_insert_one,
        )
        logger.info(f"insertOne on '{self.name}'")
        io_response = self._api_commander.request(
            payload=io_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished insertOne on '{self.name}'")
        if "insertedIds" in io_response.get("status", {}):
            if not io_response["status"]["insertedIds"]:
                raise UnexpectedDataAPIResponseException(
                    text="Response from insertOne API command has empty 'insertedIds'.",
                    raw_response=io_response,
                )
            if not io_response["status"]["primaryKeySchema"]:
                raise UnexpectedDataAPIResponseException(
                    text=(
                        "Response from insertOne API command has "
                        "empty 'primaryKeySchema'."
                    ),
                    raw_response=io_response,
                )
            inserted_id_list = io_response["status"]["insertedIds"][0]
            inserted_id_tuple, inserted_id = self._converter_agent.postprocess_key(
                inserted_id_list,
                primary_key_schema_dict=io_response["status"]["primaryKeySchema"],
            )
            return TableInsertOneResult(
                raw_results=[io_response],
                inserted_id=inserted_id,
                inserted_id_tuple=inserted_id_tuple,
            )
        else:
            raise UnexpectedDataAPIResponseException(
                text="Response from insertOne API command missing 'insertedIds'.",
                raw_response=io_response,
            )

    def _prepare_keys_from_status(
        self, status: dict[str, Any] | None, raise_on_missing: bool = False
    ) -> tuple[list[dict[str, Any]], list[tuple[Any, ...]]]:
        ids: list[dict[str, Any]]
        id_tuples: list[tuple[Any, ...]]
        if status is None:
            if raise_on_missing:
                raise UnexpectedDataAPIResponseException(
                    text="'status' not found in API response",
                    raw_response=None,
                )
            else:
                ids = []
                id_tuples = []
        else:
            if "documentResponses" not in status:
                raise UnexpectedDataAPIResponseException(
                    text=(
                        "received a 'status' without 'documentResponses' "
                        f"in API response (received: {status})"
                    ),
                    raw_response=None,
                )
            raw_inserted_ids = [
                row_resp["_id"]
                for row_resp in status["documentResponses"]
                if row_resp["status"] == "OK"
            ]
            if raw_inserted_ids:
                if "primaryKeySchema" not in status:
                    raise UnexpectedDataAPIResponseException(
                        text=(
                            "received a 'status' without 'primaryKeySchema' "
                            f"in API response (received: {status})"
                        ),
                        raw_response=None,
                    )
                id_tuples_and_ids = self._converter_agent.postprocess_keys(
                    raw_inserted_ids,
                    primary_key_schema_dict=status["primaryKeySchema"],
                )
                id_tuples = [tpl for tpl, _ in id_tuples_and_ids]
                ids = [id for _, id in id_tuples_and_ids]
            else:
                ids = []
                id_tuples = []
        return ids, id_tuples

    def insert_many(
        self,
        rows: Iterable[ROW],
        *,
        ordered: bool = False,
        chunk_size: int | None = None,
        concurrency: int | None = None,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> TableInsertManyResult:
        """
        Insert a number of rows into the table,
        with implied overwrite in case of primary key collision.

        Inserting rows whose primary key correspond to entries alredy stored
        in the table has the effect of an in-place update: the rows are overwritten.
        However, if the rows being inserted are partially provided, i.e. some columns
        are not specified, these are left unchanged on the database. To explicitly
        reset them, specify their value as appropriate to their data type,
        i.e. `None`, `{}` or analogous.

        Args:
            rows: an iterable of dictionaries, each expressing a row to insert.
                Each row must at least fully specify the primary key column values,
                while any other column may be omitted if desired (in which case
                it is left as is on DB).
                The values for the various columns supplied in each row must
                be of the right data type for the insertion to succeed.
                Non-primary-key columns can also be explicitly set to null.
            ordered: if False (default), the insertions can occur in arbitrary order
                and possibly concurrently. If True, they are processed sequentially.
                If there are no specific reasons against it, unordered insertions
                re to be preferred as they complete much faster.
            chunk_size: how many rows to include in each single API request.
                Exceeding the server maximum allowed value results in an error.
                Leave it unspecified (recommended) to use the system default.
            concurrency: maximum number of concurrent requests to the API at
                a given time. It cannot be more than one for ordered insertions.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                whole operation, which may consist of several API requests.
                If not provided, this object's defaults apply.
            request_timeout_ms: a timeout, in milliseconds, to impose on each
                individual HTTP request to the Data API to accomplish the operation.
                If not provided, this object's defaults apply.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a TableInsertManyResult object, whose attributes are the primary key
            of the inserted rows both in the form of dictionaries and of tuples.

        Examples:
            >>> # Insert complete and partial rows at once (concurrently)
            >>> from astrapy.data_types import (
            ...     DataAPISet,
            ...     DataAPITimestamp,
            ...     DataAPIVector,
            ... )
            >>> from astrapy.ids import UUID
            >>>
            >>> insert_result = my_table.insert_many(
            ...     [
            ...         {
            ...             "match_id": "fight4",
            ...             "round": 1,
            ...             "winner": "Victor",
            ...             "score": 18,
            ...             "when": DataAPITimestamp.from_string(
            ...                 "2024-11-28T11:30:00Z",
            ...             ),
            ...             "fighters": DataAPISet([
            ...                 UUID("0193539a-2770-8c09-a32a-111111111111"),
            ...                 UUID('019353e3-00b4-83f9-a127-222222222222'),
            ...             ]),
            ...             "m_vector": DataAPIVector([0.4, -0.6, 0.2]),
            ...         },
            ...         {"match_id": "fight5", "round": 1, "winner": "Adam"},
            ...         {"match_id": "fight5", "round": 2, "winner": "Betta"},
            ...         {"match_id": "fight5", "round": 3, "winner": "Caio"},
            ...         {
            ...             "match_id": "challenge6",
            ...             "round": 1,
            ...             "winner": "Donna",
            ...             "m_vector": [0.9, -0.1, -0.3],
            ...         },
            ...         {"match_id": "challenge6", "round": 2, "winner": "Erick"},
            ...         {"match_id": "challenge6", "round": 3, "winner": "Fiona"},
            ...         {"match_id": "tournamentA", "round": 1, "winner": "Gael"},
            ...         {"match_id": "tournamentA", "round": 2, "winner": "Hanna"},
            ...         {
            ...             "match_id": "tournamentA",
            ...             "round": 3,
            ...             "winner": "Ian",
            ...             "fighters": DataAPISet([
            ...                 UUID("0193539a-2770-8c09-a32a-111111111111"),
            ...             ]),
            ...         },
            ...         {"match_id": "fight7", "round": 1, "winner": "Joy"},
            ...         {"match_id": "fight7", "round": 2, "winner": "Kevin"},
            ...         {"match_id": "fight7", "round": 3, "winner": "Lauretta"},
            ...     ],
            ...     concurrency=10,
            ...     chunk_size=3,
            ... )
            >>> insert_result.inserted_ids
            [{'match_id': 'fight4', 'round': 1}, {'match_id': 'fight5', ...
            >>> insert_result.inserted_id_tuples
            [('fight4', 1), ('fight5', 1), ('fight5', 2), ('fight5', 3), ...
            >>>
            >>> # Ordered insertion
            >>> # (would stop on first failure; predictable end result on DB)
            >>> my_table.insert_many(
            ...     [
            ...         {"match_id": "fight5", "round": 1, "winner": "Adam0"},
            ...         {"match_id": "fight5", "round": 2, "winner": "Betta0"},
            ...         {"match_id": "fight5", "round": 3, "winner": "Caio0"},
            ...         {"match_id": "fight5", "round": 1, "winner": "Adam Zuul"},
            ...         {"match_id": "fight5", "round": 2, "winner": "Betta Vigo"},
            ...         {"match_id": "fight5", "round": 3, "winner": "Caio Gozer"},
            ...     ],
            ...     ordered=True,
            ... )
            TableInsertManyResult(inserted_ids=[{'match_id': 'fight5', 'round': 1}, ...

        Note:
            Unordered insertions are executed with some degree of concurrency,
            so it is usually better to prefer this mode unless the order in the
            row sequence is important.

        Note:
            A failure mode for this command is related to certain faulty rows
            found among those to insert: validation may fail, for example, if the
            vector length does not match the table schema.

            For an ordered insertion, the method will raise an exception at
            the first such faulty row -- nevertheless, all rows processed
            until then will end up being written to the database.

            For unordered insertions, if the error stems from faulty rows
            the insertion proceeds until exhausting the input rows: then,
            an exception is raised -- and all insertable rows will have been
            written to the database, including those "after" the troublesome ones.

            Errors occurring during an insert_many operation, for that reason,
            may result in a `TableInsertManyException` being raised.
            This exception allows to inspect the list of row IDs that were
            successfully inserted, while accessing at the same time the underlying
            "root errors" that made the full method call to fail.
        """

        _general_method_timeout_ms, _gmt_label = _first_valid_timeout(
            (general_method_timeout_ms, "general_method_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (
                self.api_options.timeout_options.general_method_timeout_ms,
                "general_method_timeout_ms",
            ),
        )
        _request_timeout_ms, _rt_label = _first_valid_timeout(
            (request_timeout_ms, "request_timeout_ms"),
            (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
        )
        if concurrency is None:
            if ordered:
                _concurrency = 1
            else:
                _concurrency = DEFAULT_INSERT_MANY_CONCURRENCY
        else:
            _concurrency = concurrency
        if _concurrency > 1 and ordered:
            raise ValueError("Cannot run ordered insert_many concurrently.")
        if chunk_size is None:
            _chunk_size = DEFAULT_INSERT_MANY_CHUNK_SIZE
        else:
            _chunk_size = chunk_size
        _rows = list(rows)
        logger.info(f"inserting {len(_rows)} rows in '{self.name}'")
        raw_results: list[dict[str, Any]] = []
        im_payloads: list[dict[str, Any] | None] = []
        timeout_manager = MultiCallTimeoutManager(
            overall_timeout_ms=_general_method_timeout_ms,
            timeout_label=_gmt_label,
        )
        if ordered:
            options = {"ordered": True, "returnDocumentResponses": True}
            inserted_ids: list[Any] = []
            inserted_id_tuples: list[Any] = []
            for i in range(0, len(_rows), _chunk_size):
                im_payload = self._converter_agent.preprocess_payload(
                    {
                        "insertMany": {
                            "documents": _rows[i : i + _chunk_size],
                            "options": options,
                        },
                    },
                    map2tuple_checker=map2tuple_checker_insert_many,
                )
                logger.info(f"insertMany on '{self.name}'")
                chunk_response = self._api_commander.request(
                    payload=im_payload,
                    raise_api_errors=False,
                    timeout_context=timeout_manager.remaining_timeout(
                        cap_time_ms=_request_timeout_ms,
                        cap_timeout_label=_rt_label,
                    ),
                )
                logger.info(f"finished insertMany on '{self.name}'")
                # accumulate the results in this call
                chunk_inserted_ids, chunk_inserted_ids_tuples = (
                    self._prepare_keys_from_status(chunk_response.get("status"))
                )
                inserted_ids += chunk_inserted_ids
                inserted_id_tuples += chunk_inserted_ids_tuples
                raw_results += [chunk_response]
                im_payloads += [im_payload]
                # if errors, quit early
                if chunk_response.get("errors", []):
                    response_exception = DataAPIResponseException.from_response(
                        command=im_payload,
                        raw_response=chunk_response,
                    )
                    raise TableInsertManyException(
                        inserted_ids=inserted_ids,
                        inserted_id_tuples=inserted_id_tuples,
                        exceptions=[response_exception],
                    )

            # return
            full_result = TableInsertManyResult(
                raw_results=raw_results,
                inserted_ids=inserted_ids,
                inserted_id_tuples=inserted_id_tuples,
            )
            logger.info(f"finished inserting {len(_rows)} rows in '{self.name}'")
            return full_result

        else:
            # unordered: concurrent or not, do all of them and parse the results
            options = {"ordered": False, "returnDocumentResponses": True}
            if _concurrency > 1:
                with ThreadPoolExecutor(max_workers=_concurrency) as executor:

                    def _chunk_insertor(
                        row_chunk: list[dict[str, Any]],
                    ) -> tuple[dict[str, Any] | None, dict[str, Any]]:
                        im_payload = self._converter_agent.preprocess_payload(
                            {
                                "insertMany": {
                                    "documents": row_chunk,
                                    "options": options,
                                },
                            },
                            map2tuple_checker=map2tuple_checker_insert_many,
                        )
                        logger.info(f"insertMany(chunk) on '{self.name}'")
                        im_response = self._api_commander.request(
                            payload=im_payload,
                            raise_api_errors=False,
                            timeout_context=timeout_manager.remaining_timeout(
                                cap_time_ms=_request_timeout_ms,
                                cap_timeout_label=_rt_label,
                            ),
                        )
                        logger.info(f"finished insertMany(chunk) on '{self.name}'")
                        return im_payload, im_response

                    raw_pl_results_pairs = list(
                        executor.map(
                            _chunk_insertor,
                            (
                                _rows[i : i + _chunk_size]
                                for i in range(0, len(_rows), _chunk_size)
                            ),
                        )
                    )
                    if raw_pl_results_pairs:
                        im_payloads, raw_results = list(zip(*raw_pl_results_pairs))
                    else:
                        im_payloads, raw_results = [], []

            else:
                for i in range(0, len(_rows), _chunk_size):
                    im_payload = self._converter_agent.preprocess_payload(
                        {
                            "insertMany": {
                                "documents": _rows[i : i + _chunk_size],
                                "options": options,
                            },
                        },
                        map2tuple_checker=map2tuple_checker_insert_many,
                    )
                    logger.info(f"insertMany(chunk) on '{self.name}'")
                    im_response = self._api_commander.request(
                        payload=im_payload,
                        raise_api_errors=False,
                        timeout_context=timeout_manager.remaining_timeout(
                            cap_time_ms=_request_timeout_ms,
                            cap_timeout_label=_rt_label,
                        ),
                    )
                    logger.info(f"finished insertMany(chunk) on '{self.name}'")
                    raw_results.append(im_response)
                    im_payloads.append(im_payload)
            # recast raw_results. Each response has its schema: unfold appropriately
            ids_and_tuples_per_chunk = [
                self._prepare_keys_from_status(chunk_response.get("status"))
                for chunk_response in raw_results
            ]
            inserted_ids = [
                inserted_id
                for chunk_ids, _ in ids_and_tuples_per_chunk
                for inserted_id in chunk_ids
            ]
            inserted_id_tuples = [
                inserted_id_tuple
                for _, chunk_id_tuples in ids_and_tuples_per_chunk
                for inserted_id_tuple in chunk_id_tuples
            ]
            # check-raise
            response_exceptions = [
                DataAPIResponseException.from_response(
                    command=chunk_payload,
                    raw_response=chunk_response,
                )
                for chunk_payload, chunk_response in zip(im_payloads, raw_results)
                if chunk_response.get("errors", [])
            ]
            if response_exceptions:
                raise TableInsertManyException(
                    inserted_ids=inserted_ids,
                    inserted_id_tuples=inserted_id_tuples,
                    exceptions=response_exceptions,
                )

            # return
            full_result = TableInsertManyResult(
                raw_results=raw_results,
                inserted_ids=inserted_ids,
                inserted_id_tuples=inserted_id_tuples,
            )
            logger.info(f"finished inserting {len(_rows)} rows in '{self.name}'")
            return full_result

    @overload
    def find(
        self,
        filter: FilterType | None = None,
        *,
        projection: ProjectionType | None = None,
        row_type: None = None,
        skip: int | None = None,
        limit: int | None = None,
        include_similarity: bool | None = None,
        include_sort_vector: bool | None = None,
        sort: SortType | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> TableFindCursor[ROW, ROW]: ...

    @overload
    def find(
        self,
        filter: FilterType | None = None,
        *,
        projection: ProjectionType | None = None,
        row_type: type[ROW2],
        skip: int | None = None,
        limit: int | None = None,
        include_similarity: bool | None = None,
        include_sort_vector: bool | None = None,
        sort: SortType | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> TableFindCursor[ROW, ROW2]: ...

    def find(
        self,
        filter: FilterType | None = None,
        *,
        projection: ProjectionType | None = None,
        row_type: type[ROW2] | None = None,
        skip: int | None = None,
        limit: int | None = None,
        include_similarity: bool | None = None,
        include_sort_vector: bool | None = None,
        sort: SortType | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> TableFindCursor[ROW, ROW2]:
        """
        Find rows on the table matching the provided filters
        and according to sorting criteria including vector similarity.

        The returned TableFindCursor object, representing the stream of results,
        can be iterated over, or consumed and manipulated in several other ways
        (see the examples below and the `TableFindCursor` documentation for details).
        Since the amount of returned items can be large, TableFindCursor is a lazy
        object, that fetches new data while it is being read using the Data API
        pagination mechanism.

        Invoking `.to_list()` on a TableFindCursor will cause it to consume all
        rows and materialize the entire result set as a list. This is not recommended
        if the amount of results is very large.

        Args:
            filter: a dictionary expressing which condition the returned rows
                must satisfy. The filter can use operators, such as "$eq" for equality,
                and require columns to compare with literal values. Simple examples
                are `{}` (zero filter, not recommended for large tables),
                `{"match_no": 123}` (a shorthand for `{"match_no": {"$eq": 123}}`,
                or `{"match_no": 123, "round": "C"}` (multiple conditions are
                implicitly combined with "$and").
                Please consult the Data API documentation for a more detailed
                explanation of table search filters and tips on their usage.
            projection: a prescription on which columns to return for the matching rows.
                The projection can take the form `{"column1": True, "column2": True}`.
                `{"*": True}` (i.e. return the whole row), or the complementary
                form that excludes columns: `{"column1": False, "column2": False}`.
                To optimize bandwidth usage, it is recommended to use a projection,
                especially to avoid unnecessary columns of type vector with
                high-dimensional embeddings.
            row_type: this parameter acts a formal specifier for the type checker.
                If omitted, the resulting cursor is implicitly a
                `TableFindCursor[ROW, ROW]`, i.e. maintains the same type for
                the items it returns as that for the rows in the table. Strictly
                typed code may want to specify this parameter especially when a
                projection is given.
            skip: if provided, it is a number of rows that would be obtained first
                in the response and are instead skipped.
            limit: a maximum amount of rows to get from the table. The returned cursor
                will stop yielding rows when either this number is reached or there
                really are no more matches in the table.
            include_similarity: a boolean to request the numeric value of the
                similarity to be returned as an added "$similarity" key in each returned
                row. It can be used meaningfully only in a vector search (see `sort`).
            include_sort_vector: a boolean to request the search query vector.
                If set to True (and if the search is a vector search), calling
                the `get_sort_vector` method on the returned cursor will yield
                the vector used for the ANN search.
            sort: this dictionary parameter controls the order in which the rows
                are returned. The sort parameter can express either a vector search or
                a regular (ascending/descending, even hierarchical) sorting.
                * For a vector search the parameter takes the form
                `{"vector_column": qv}`, with the query vector `qv` of the appropriate
                type (list of floats or DataAPIVector). If the table has automatic
                embedding generation ("vectorize") enabled on that column, the form
                `{"vectorize_enabled_column": "query text"}` is also valid.
                * In the case of non-vector sorting, the parameter specifies the
                column(s) and the ascending/descending ordering required.
                If multiple columns are provided, the sorting applies them
                hierarchically to the rows. Examples are `{"score": SortMode.ASCENDING}`
                (equivalently `{"score": +1}`), `{"score": +1, "when": -1}`.
                Note that, depending on the column(s) chosen for sorting, the table
                partitioning structure, and the presence of indexes, the sorting
                may be done in-memory by the API. In that case, there may be performance
                implications and limitations on the amount of items returned.
                Consult the Data API documentation for more details on this topic.
            request_timeout_ms: a timeout, in milliseconds, to impose on each
                individual HTTP request to the Data API to accomplish the operation.
                If not provided, this object's defaults apply.
            timeout_ms: an alias for `request_timeout_ms`.

        Returns:
            a TableFindCursor object, that can be iterated over (and manipulated
            in several ways). The cursor, if needed, handles pagination under the hood
            as the rows are consumed.

        Note:
            As the rows are retrieved in chunks progressively, while the cursor
            is being iterated over, it is possible that the actual results
            obtained will reflect changes occurring to the table contents in
            real time.

        Examples:
            >>> # Iterate over results:
            >>> for row in my_table.find({"match_id": "challenge6"}):
            ...     print(f"(R:{row['round']}): winner {row['winner']}")
            ...
            (R:1): winner Donna
            (R:2): winner Erick
            (R:3): winner Fiona
            >>> # Optimize bandwidth using a projection:
            >>> proj = {"round": True, "winner": True}
            >>> for row in my_table.find({"match_id": "challenge6"}, projection=proj):
            ...     print(f"(R:{row['round']}): winner {row['winner']}")
            ...
            (R:1): winner Donna
            (R:2): winner Erick
            (R:3): winner Fiona
            >>> # Filter on the partitioning:
            >>> my_table.find({"match_id": "challenge6"}).to_list()
            [{'match_id': 'challenge6', 'round': 1, 'fighters': DataAPISet([]), ...
            >>>
            >>> # Filter on primary key:
            >>> my_table.find({"match_id": "challenge6", "round": 1}).to_list()
            [{'match_id': 'challenge6', 'round': 1, 'fighters': DataAPISet([]), ...
            >>>
            >>> # Filter on a regular indexed column:
            >>> my_table.find({"winner": "Caio Gozer"}).to_list()
            [{'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
            >>>
            >>> # Non-equality filter on a regular indexed column:
            >>> my_table.find({"score": {"$gte": 15}}).to_list()
            [{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
            >>>
            >>> # Filter on a regular non-indexed column:
            >>> # (not recommended performance-wise)
            >>> my_table.find(
            ...     {"when": {
            ...         "$gte": DataAPITimestamp.from_string("1999-12-31T01:23:44Z")
            ...     }}
            ... ).to_list()
            The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
            [{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
            >>>
            >>> # Empty filter (not recommended performance-wise):
            >>> my_table.find({}).to_list()
            The Data API returned a warning: {'errorCode': 'ZERO_FILTER_OPERATIONS', ...
            [{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
            >>>
            >>> # Filter on the primary key and a regular non-indexed column:
            >>> # (not recommended performance-wise)
            >>> my_table.find(
            ...     {"match_id": "fight5", "round": 3, "winner": "Caio Gozer"}
            ... ).to_list()
            The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
            [{'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
            >>>
            >>> # Filter on a regular non-indexed column (and incomplete primary key)
            >>> # (not recommended performance-wise)
            >>> my_table.find({"round": 3, "winner": "Caio Gozer"}).to_list()
            The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
            [{'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
            >>>
            >>> # Vector search with "sort" (on an appropriately-indexed vector column):
            >>> my_table.find(
            ...     {},
            ...     sort={"m_vector": DataAPIVector([0.2, 0.3, 0.4])},
            ...     projection={"winner": True},
            ...     limit=3,
            ... ).to_list()
            [{'winner': 'Donna'}, {'winner': 'Victor'}]
            >>>
            >>> # Hybrid search with vector sort and non-vector filtering:
            >>> my_table.find(
            ...     {"match_id": "fight4"},
            ...     sort={"m_vector": DataAPIVector([0.2, 0.3, 0.4])},
            ...     projection={"winner": True},
            ...     limit=3,
            ... ).to_list()
            [{'winner': 'Victor'}]
            >>>
            >>> # Return the numeric value of the vector similarity
            >>> # (also demonstrating that one can pass a plain list for a vector):
            >>> my_table.find(
            ...     {},
            ...     sort={"m_vector": [0.2, 0.3, 0.4]},
            ...     projection={"winner": True},
            ...     limit=3,
            ...     include_similarity=True,
            ... ).to_list()
            [{'winner': 'Donna', '$similarity': 0.515}, {'winner': 'Victor', ...
            >>>
            >>> # Non-vector sorting on a 'partitionSort' column:
            >>> my_table.find(
            ...     {"match_id": "fight5"},
            ...     sort={"round": SortMode.DESCENDING},
            ...     projection={"winner": True},
            ... ).to_list()
            [{'winner': 'Caio Gozer'}, {'winner': 'Betta Vigo'}, ...
            >>>
            >>> # Using `skip` and `limit`:
            >>> my_table.find(
            ...     {"match_id": "fight5"},
            ...     sort={"round": SortMode.DESCENDING},
            ...     projection={"winner": True},
            ...     skip=1,
            ...     limit=2,
            ... ).to_list()
            The Data API returned a warning: {'errorCode': 'IN_MEMORY_SORTING...
            [{'winner': 'Betta Vigo'}, {'winner': 'Adam Zuul'}]
            >>>
            >>> # Non-vector sorting on a regular column:
            >>> # (not recommended performance-wise)
            >>> my_table.find(
            ...     {"match_id": "fight5"},
            ...     sort={"winner": SortMode.ASCENDING},
            ...     projection={"winner": True},
            ... ).to_list()
            The Data API returned a warning: {'errorCode': 'IN_MEMORY_SORTING...
            [{'winner': 'Adam Zuul'}, {'winner': 'Betta Vigo'}, ...
            >>>
            >>> # Using `.map()` on a cursor:
            >>> winner_cursor = my_table.find(
            ...     {"match_id": "fight5"},
            ...     sort={"round": SortMode.DESCENDING},
            ...     projection={"winner": True},
            ...     limit=5,
            ... )
            >>> print("/".join(winner_cursor.map(lambda row: row["winner"].upper())))
            CAIO GOZER/BETTA VIGO/ADAM ZUUL
            >>>
            >>> # Some other examples of cursor manipulation
            >>> matches_cursor = my_table.find(
            ...     sort={"m_vector": DataAPIVector([-0.1, 0.15, 0.3])}
            ... )
            >>> matches_cursor.has_next()
            True
            >>> next(matches_cursor)
            {'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
            >>> matches_cursor.consumed
            1
            >>> matches_cursor.rewind()
            >>> matches_cursor.consumed
            0
            >>> matches_cursor.has_next()
            True
            >>> matches_cursor.close()
            >>> try:
            ...     next(matches_cursor)
            ... except StopIteration:
            ...     print("StopIteration triggered.")
            ...
            StopIteration triggered.
        """

        # lazy-import here to avoid circular import issues
        from astrapy.cursors import TableFindCursor

        _request_timeout_ms, _rt_label = _first_valid_timeout(
            (request_timeout_ms, "request_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
        )
        return (
            TableFindCursor(
                table=self,
                request_timeout_ms=_request_timeout_ms,
                overall_timeout_ms=None,
                request_timeout_label=_rt_label,
            )
            .filter(filter)
            .project(projection)
            .skip(skip)
            .limit(limit)
            .sort(sort)
            .include_similarity(include_similarity)
            .include_sort_vector(include_sort_vector)
        )

    def find_one(
        self,
        filter: FilterType | None = None,
        *,
        projection: ProjectionType | None = None,
        include_similarity: bool | None = None,
        sort: SortType | None = None,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> ROW | None:
        """
        Run a search according to the given filtering and sorting criteria
        and return the top row matching it, or nothing if there are none.

        The parameters are analogous to some of the parameters to the `find` method
        (which has a few more that do not make sense in this case, such as `limit`).

        Args:
            filter: a dictionary expressing which condition the returned row
                must satisfy. The filter can use operators, such as "$eq" for equality,
                and require columns to compare with literal values. Simple examples
                are `{}` (zero filter), `{"match_no": 123}` (a shorthand for
                `{"match_no": {"$eq": 123}}`, or `{"match_no": 123, "round": "C"}`
                (multiple conditions are implicitly combined with "$and").
                Please consult the Data API documentation for a more detailed
                explanation of table search filters and tips on their usage.
            projection: a prescription on which columns to return for the matching row.
                The projection can take the form `{"column1": True, "column2": True}`.
                `{"*": True}` (i.e. return the whole row), or the complementary
                form that excludes columns: `{"column1": False, "column2": False}`.
                To optimize bandwidth usage, it is recommended to use a projection,
                especially to avoid unnecessary columns of type vector with
                high-dimensional embeddings.
            include_similarity: a boolean to request the numeric value of the
                similarity to be returned as an added "$similarity" key in the returned
                row. It can be used meaningfully only in a vector search (see `sort`).
            sort: this dictionary parameter controls the sorting order, hence determines
                which row is being returned.
                The sort parameter can express either a vector search or
                a regular (ascending/descending, even hierarchical) sorting.
                * For a vector search the parameter takes the form
                `{"vector_column": qv}`, with the query vector `qv` of the appropriate
                type (list of floats or DataAPIVector). If the table has automatic
                embedding generation ("vectorize") enabled on that column, the form
                `{"vectorize_enabled_column": "query text"}` is also valid.
                * In the case of non-vector sorting, the parameter specifies the
                column(s) and the ascending/descending ordering required.
                If multiple columns are provided, the sorting applies them
                hierarchically to the rows. Examples are `{"score": SortMode.ASCENDING}`
                (equivalently `{"score": +1}`), `{"score": +1, "when": -1}`.
                Note that, depending on the column(s) chosen for sorting, the table
                partitioning structure, and the presence of indexes, the sorting
                may be done in-memory by the API. In that case, there may be performance
                implications.
                Consult the Data API documentation for more details on this topic.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a dictionary expressing the result if a row is found, otherwise None.

        Examples:
            >>> from astrapy.constants import SortMode
            >>> from astrapy.data_types import DataAPITimestamp, DataAPIVector
            >>>
            >>> # Filter on the partitioning:
            >>> my_table.find_one({"match_id": "challenge6"})
            {'match_id': 'challenge6', 'round': 1, 'fighters': DataAPISet([]), ...
            >>>
            >>> # A find with no matches:
            >>> str(my_table.find_one({"match_id": "not_real"}))
            'None'
            >>>
            >>> # Optimize bandwidth using a projection:
            >>> my_table.find_one(
            ...     {"match_id": "challenge6"},
            ...     projection={"round": True, "winner": True},
            ... )
            {'round': 1, 'winner': 'Donna'}
            >>>
            >>> # Filter on primary key:
            >>> my_table.find_one({"match_id": "challenge6", "round": 1})
            {'match_id': 'challenge6', 'round': 1, 'fighters': DataAPISet([]), ...
            >>>
            >>> # Filter on a regular indexed column:
            >>> my_table.find_one({"winner": "Caio Gozer"})
            {'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
            >>>
            >>> # Non-equality filter on a regular indexed column:
            >>> my_table.find_one({"score": {"$gte": 15}})
            {'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
            >>>
            >>> # Filter on a regular non-indexed column:
            >>> # (not recommended performance-wise)
            >>> my_table.find_one(
            ...     {"when": {
            ...         "$gte": DataAPITimestamp.from_string("1999-12-31T01:23:44Z")
            ...     }}
            ... )
            The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
            {'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
            >>>
            >>> # Empty filter:
            >>> my_table.find_one({})
            The Data API returned a warning: {'errorCode': 'ZERO_FILTER_OPERATIONS', ...
            {'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
            >>>
            >>> # Filter on the primary key and a regular non-indexed column:
            >>> # (not recommended performance-wise)
            >>> my_table.find_one(
            ...     {"match_id": "fight5", "round": 3, "winner": "Caio Gozer"}
            ... )
            The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
            {'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
            >>>
            >>> # Filter on a regular non-indexed column (and incomplete primary key)
            >>> # (not recommended performance-wise)
            >>> my_table.find_one({"round": 3, "winner": "Caio Gozer"})
            The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
            {'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
            >>>
            >>> # Vector search with "sort" (on an appropriately-indexed vector column):
            >>> my_table.find_one(
            ...     {},
            ...     sort={"m_vector": DataAPIVector([0.2, 0.3, 0.4])},
            ...     projection={"winner": True},
            ... )
            {'winner': 'Donna'}
            >>>
            >>> # Hybrid search with vector sort and non-vector filtering:
            >>> my_table.find_one(
            ...     {"match_id": "fight4"},
            ...     sort={"m_vector": DataAPIVector([0.2, 0.3, 0.4])},
            ...     projection={"winner": True},
            ... )
            {'winner': 'Victor'}
            >>>
            >>> # Return the numeric value of the vector similarity
            >>> # (also demonstrating that one can pass a plain list for a vector):
            >>> my_table.find_one(
            ...     {},
            ...     sort={"m_vector": [0.2, 0.3, 0.4]},
            ...     projection={"winner": True},
            ...     include_similarity=True,
            ... )
            {'winner': 'Donna', '$similarity': 0.515}
            >>>
            >>> # Non-vector sorting on a 'partitionSort' column:
            >>> my_table.find_one(
            ...     {"match_id": "fight5"},
            ...     sort={"round": SortMode.DESCENDING},
            ...     projection={"winner": True},
            ... )
            {'winner': 'Caio Gozer'}
            >>>
            >>> # Non-vector sorting on a regular column:
            >>> # (not recommended performance-wise)
            >>> my_table.find_one(
            ...     {"match_id": "fight5"},
            ...     sort={"winner": SortMode.ASCENDING},
            ...     projection={"winner": True},
            ... )
            The Data API returned a warning: {'errorCode': 'IN_MEMORY_SORTING...
            {'winner': 'Adam Zuul'}
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        fo_options = (
            None
            if include_similarity is None
            else {"includeSimilarity": include_similarity}
        )
        fo_payload = self._converter_agent.preprocess_payload(
            {
                "findOne": {
                    k: v
                    for k, v in {
                        "filter": filter,
                        "projection": normalize_optional_projection(projection),
                        "options": fo_options,
                        "sort": sort,
                    }.items()
                    if v is not None
                }
            },
            map2tuple_checker=None,
        )
        fo_response = self._api_commander.request(
            payload=fo_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        if "document" not in (fo_response.get("data") or {}):
            raise UnexpectedDataAPIResponseException(
                text="Response from findOne API command missing 'document'.",
                raw_response=fo_response,
            )
        if "projectionSchema" not in (fo_response.get("status") or {}):
            raise UnexpectedDataAPIResponseException(
                text="Response from findOne API command missing 'projectionSchema'.",
                raw_response=fo_response,
            )
        doc_response = fo_response["data"]["document"]
        if doc_response is None:
            return None
        return self._converter_agent.postprocess_row(
            fo_response["data"]["document"],
            columns_dict=fo_response["status"]["projectionSchema"],
            similarity_pseudocolumn="$similarity" if include_similarity else None,
        )

    def distinct(
        self,
        key: str | Iterable[str | int],
        *,
        filter: FilterType | None = None,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> list[Any]:
        """
        Return a list of the unique values of `key` across the rows
        in the table that match the provided filter.

        Args:
            key: the name of the field whose value is inspected across rows.
                Keys can be just column names (as is typically the case), but
                the dot-notation is also accepted to mean subkeys or indices
                within lists (for example, "map_column.subkey" or "list_column.2").
                If a column has literal dots or ampersands in its name, this
                parameter must be escaped to be treated properly.
                The key can also be a list of strings and numbers, in which case
                no escape is necessary: each item in the list is a field name/index,
                for example ["map_column", "subkey"] or ["list_column", 2].
                For set and list columns, individual entries are "unrolled"
                automatically.
            filter: a dictionary expressing which condition the inspected rows
                must satisfy. The filter can use operators, such as "$eq" for equality,
                and require columns to compare with literal values. Simple examples
                are `{}` (zero filter), `{"match_no": 123}` (a shorthand for
                `{"match_no": {"$eq": 123}}`, or `{"match_no": 123, "round": "C"}`
                (multiple conditions are implicitly combined with "$and").
                Please consult the Data API documentation for a more detailed
                explanation of table search filters and tips on their usage.
            general_method_timeout_ms: a timeout, in milliseconds, for the whole
                requested operation (which may involve multiple API requests).
                This method, being based on `find` (see) may entail successive HTTP API
                requests, depending on the amount of involved rows.
                If not provided, this object's defaults apply.
            request_timeout_ms: a timeout, in milliseconds, for each API request.
                If not provided, this object's defaults apply.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a list of all different values for `key` found across the rows
            that match the filter. The result list has no repeated items.

        Examples:
            >>> my_table.distinct("winner", filter={"match_id": "challenge6"})
            ['Donna', 'Erick', 'Fiona']
            >>>
            >>> # distinct values across the whole table:
            >>> # (not recommended performance-wise)
            >>> my_table.distinct("winner")
            The Data API returned a warning: {'errorCode': 'ZERO_FILTER_OPERATIONS', ...
            ['Victor', 'Adam Zuul', 'Betta Vigo', 'Caio Gozer', 'Donna', 'Erick', ...
            >>>
            >>> # Over a column containing null values
            >>> # (also with composite filter):
            >>> my_table.distinct(
            ...     "score",
            ...     filter={"match_id": {"$in": ["fight4", "tournamentA"]}},
            ... )
            [18, None]
            >>>
            >>> # distinct over a set column (automatically "unrolled"):
            >>> my_table.distinct(
            ...     "fighters",
            ...     filter={"match_id": {"$in": ["fight4", "tournamentA"]}},
            ... )
            [UUID('0193539a-2770-8c09-a32a-111111111111'), UUID('019353e3-00b4-...

        Note:
            It must be kept in mind that `distinct` is a client-side operation,
            which effectively browses all required rows using the logic
            of the `find` method and collects the unique values found for `key`.
            As such, there may be performance, latency and ultimately
            billing implications if the amount of matching rows is large.

        Note:
            For details on the behaviour of "distinct" in conjunction with
            real-time changes in the table contents, see the
            Note of the `find` command.
        """

        # lazy-import here to avoid circular import issues
        from astrapy.cursors import TableFindCursor

        _general_method_timeout_ms, _gmt_label = _first_valid_timeout(
            (general_method_timeout_ms, "general_method_timeout_ms"),
            (timeout_ms, "timeout_ms"),
            (
                self.api_options.timeout_options.general_method_timeout_ms,
                "general_method_timeout_ms",
            ),
        )
        _request_timeout_ms, _rt_label = _first_valid_timeout(
            (request_timeout_ms, "request_timeout_ms"),
            (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
        )
        # preparing cursor:
        _extractor = _create_document_key_extractor(key)
        _key = _reduce_distinct_key_to_shallow_safe(key)
        # relaxing the type hint (limited to within this method body)
        f_cursor: TableFindCursor[dict[str, Any], dict[str, Any]] = (
            TableFindCursor(
                table=self,
                request_timeout_ms=_request_timeout_ms,
                overall_timeout_ms=_general_method_timeout_ms,
                request_timeout_label=_rt_label,
                overall_timeout_label=_gmt_label,
            )  # type: ignore[assignment]
            .filter(filter)
            .project({_key: True})
        )
        # consuming it:
        _item_hashes = set()
        distinct_items: list[Any] = []
        logger.info(f"running distinct() on '{self.name}'")
        for document in f_cursor:
            for item in _extractor(document):
                _item_hash = _hash_table_document(
                    item, options=self.api_options.serdes_options
                )
                if _item_hash not in _item_hashes:
                    _item_hashes.add(_item_hash)
                    distinct_items.append(item)
        logger.info(f"finished running distinct() on '{self.name}'")
        return distinct_items

    def count_documents(
        self,
        filter: FilterType,
        *,
        upper_bound: int,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> int:
        """
        Count the row in the table matching the specified filter.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"name": "John", "age": 59}
                    {"$and": [{"name": {"$eq": "John"}}, {"age": {"$gt": 58}}]}
                See the Data API documentation for the full set of operators.
            upper_bound: a required ceiling on the result of the count operation.
                If the actual number of rows exceeds this value,
                an exception will be raised.
                Furthermore, if the actual number of rows exceeds the maximum
                count that the Data API can reach (regardless of upper_bound),
                an exception will be raised.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            the exact count of matching rows.

        Examples:
            >>> my_table.insert_many([{"seq": i} for i in range(20)])
            TableInsertManyResult(...)
            >>> my_table.count_documents({}, upper_bound=100)
            20
            >>> my_table.count_documents({"seq":{"$gt": 15}}, upper_bound=100)
            4
            >>> my_table.count_documents({}, upper_bound=10)
            Traceback (most recent call last):
                ... ...
            astrapy.exceptions.TooManyRowsToCountException

        Note:
            Count operations are expensive: for this reason, the best practice
            is to provide a reasonable `upper_bound` according to the caller
            expectations. Moreover, indiscriminate usage of count operations
            for sizeable amounts of rows (i.e. in the thousands and more)
            is discouraged in favor of alternative application-specific solutions.
            Keep in mind that the Data API has a hard upper limit on the amount
            of rows it will count, and that an exception will be thrown
            by this method if this limit is encountered.
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        cd_payload = {"countDocuments": {"filter": filter}}
        logger.info(f"countDocuments on '{self.name}'")
        cd_response = self._api_commander.request(
            payload=cd_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished countDocuments on '{self.name}'")
        if "count" in cd_response.get("status", {}):
            count: int = cd_response["status"]["count"]
            if cd_response["status"].get("moreData", False):
                raise TooManyRowsToCountException(
                    text=f"Document count exceeds {count}, the maximum allowed by the server",
                    server_max_count_exceeded=True,
                )
            else:
                if count > upper_bound:
                    raise TooManyRowsToCountException(
                        text="Document count exceeds required upper bound",
                        server_max_count_exceeded=False,
                    )
                else:
                    return count
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from countDocuments API command.",
                raw_response=cd_response,
            )

    def estimated_document_count(
        self,
        *,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> int:
        """
        Query the API server for an estimate of the document count in the table.

        Contrary to `count_documents`, this method has no filtering parameters.

        Args:
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a server-provided estimate count of the documents in the table.

        Example:
            >>> my_table.estimated_document_count()
            5820
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        ed_payload: dict[str, Any] = {"estimatedDocumentCount": {}}
        logger.info(f"estimatedDocumentCount on '{self.name}'")
        ed_response = self._api_commander.request(
            payload=ed_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished estimatedDocumentCount on '{self.name}'")
        if "count" in ed_response.get("status", {}):
            count: int = ed_response["status"]["count"]
            return count
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from estimatedDocumentCount API command.",
                raw_response=ed_response,
            )

    def update_one(
        self,
        filter: FilterType,
        update: dict[str, Any],
        *,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Update a single document on the table, changing some or all of the columns,
        with the implicit behaviour of inserting a new row if no match is found.

        Args:
            filter: a predicate expressing the table primary key in full,
                i.e. a dictionary defining values for all columns that form the
                primary key. An example may be `{"match_id": "fight4", "round": 1}`.
            update: the update prescription to apply to the row, expressed
                as a dictionary conforming to the Data API syntax. The update
                operators for tables are `$set` and `$unset` (in particular,
                setting a column to None has the same effect as the $unset operator).
                Examples are `{"$set": {"round": 12}}` and
                `{"$unset": {"winner": "", "score": ""}}`.
                Note that the update operation cannot alter the primary key columns.
                See the Data API documentation for more details.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Examples:
            >>> from astrapy.data_types import DataAPISet
            >>>
            >>> # Set a new value for a column
            >>> my_table.update_one(
            ...     {"match_id": "fight4", "round": 1},
            ...     update={"$set": {"winner": "Winona"}},
            ... )
            >>>
            >>> # Set a new value for a column while unsetting another colum
            >>> my_table.update_one(
            ...     {"match_id": "fight4", "round": 1},
            ...     update={"$set": {"winner": None, "score": 24}},
            ... )
            >>>
            >>> # Set a 'set' column to empty
            >>> my_table.update_one(
            ...     {"match_id": "fight4", "round": 1},
            ...     update={"$set": {"fighters": DataAPISet()}},
            ... )
            >>>
            >>> # Set a 'set' column to empty using None
            >>> my_table.update_one(
            ...     {"match_id": "fight4", "round": 1},
            ...     update={"$set": {"fighters": None}},
            ... )
            >>>
            >>> # Set a 'set' column to empty using a regular (empty) set
            >>> my_table.update_one(
            ...     {"match_id": "fight4", "round": 1},
            ...     update={"$set": {"fighters": set()}},
            ... )
            >>>
            >>> # Set a 'set' column to empty using $unset
            >>> my_table.update_one(
            ...     {"match_id": "fight4", "round": 1},
            ...     update={"$unset": {"fighters": None}},
            ... )
            >>>
            >>> # A non-existing primary key creates a new row
            >>> my_table.update_one(
            ...     {"match_id": "bar_fight", "round": 4},
            ...     update={"$set": {"score": 8, "winner": "Jack"}},
            ... )
            >>>
            >>> # Delete column values for a row (they'll read as None now)
            >>> my_table.update_one(
            ...     {"match_id": "challenge6", "round": 2},
            ...     update={"$unset": {"winner": None, "score": None}},
            ... )

        Note:
            a row created entirely with update operations (as opposed to insertions)
            may, correspondingly, be deleted by means of an $unset update on all columns.
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        uo_payload = self._converter_agent.preprocess_payload(
            {
                "updateOne": {
                    k: v
                    for k, v in {
                        "filter": filter,
                        "update": update,
                    }.items()
                    if v is not None
                }
            },
            map2tuple_checker=map2tuple_checker_update_one,
        )
        logger.info(f"updateOne on '{self.name}'")
        uo_response = self._api_commander.request(
            payload=uo_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished updateOne on '{self.name}'")
        if "status" in uo_response:
            # the contents are disregarded and the method just returns:
            return
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from updateOne API command.",
                raw_response=uo_response,
            )

    def delete_one(
        self,
        filter: FilterType,
        *,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Delete a row, matching the provided value of the primary key.
        If no row is found with that primary key, the method does nothing.

        Args:
            filter: a predicate expressing the table primary key in full,
                i.e. a dictionary defining values for all columns that form the
                primary key. A row (at most one) is deleted if it matches that primary
                key. An example filter may be `{"match_id": "fight4", "round": 1}`.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Examples:
            >>> # Count the rows matching a certain filter
            >>> len(my_table.find({"match_id": "fight7"}).to_list())
            3
            >>>
            >>> # Delete a row belonging to the group
            >>> my_table.delete_one({"match_id": "fight7", "round": 2})
            >>>
            >>> # Count again
            >>> len(my_table.find({"match_id": "fight7"}).to_list())
            2
            >>>
            >>> # Attempt the delete again (nothing to delete)
            >>> my_table.delete_one({"match_id": "fight7", "round": 2})
            >>>
            >>> # The count is unchanged
            >>> len(my_table.find({"match_id": "fight7"}).to_list())
            2
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        do_payload = self._converter_agent.preprocess_payload(
            {
                "deleteOne": {
                    k: v
                    for k, v in {
                        "filter": filter,
                    }.items()
                    if v is not None
                }
            },
            map2tuple_checker=None,
        )
        logger.info(f"deleteOne on '{self.name}'")
        do_response = self._api_commander.request(
            payload=do_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished deleteOne on '{self.name}'")
        if do_response.get("status", {}).get("deletedCount") == -1:
            return
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from deleteOne API command.",
                raw_response=do_response,
            )

    def delete_many(
        self,
        filter: FilterType,
        *,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Delete all rows matching a provided filter condition.
        This operation can target from a single row to the entirety of the table.

        Args:
            filter: a filter dictionary to specify which row(s) must be deleted.
                1. If the filter is in the form `{"pk1": val1, "pk2": val2 ...}`
                and specified the primary key in full, at most one row is deleted,
                the one with that primary key.
                2. If the table has "partitionSort" columns, some or all of them
                may be left out (the least significant of them can also employ
                an inequality, or range, predicate): a range of rows, but always
                within a single partition, will be deleted.
                3. If an empty filter, `{}`, is passed, this operation empties
                the table completely. *USE WITH CARE*.
                4. Other kinds of filtering clauses are forbidden.
                In the following examples, the table is partitioned
                by columns ["pa1", "pa2"] and has partitionSort "ps1" and "ps2" in that
                order.
                Valid filter examples:
                - `{"pa1": x, "pa2": y, "ps1": z, "ps2": t}`: deletes one row
                - `{"pa1": x, "pa2": y, "ps1": z}`: deletes multiple rows
                - `{"pa1": x, "pa2": y, "ps1": z, "ps2": {"$lt": q}}`: del. multiple rows
                - `{"pa1": x, "pa2": y}`: deletes all rows in the partition
                - `{}`: empties the table (*CAUTION*)
                Invalid filter examples:
                - `{"pa1": x}`: incomplete partition key
                - `{"pa1": x, "ps1" z}`: incomplete partition key (whatever is added)
                - `{"pa1": x, "pa2": y, "ps1": {"$lt": r}, "ps2": t}`: inequality on
                  a non-least-significant partitionSort column provided.
                - `{"pa1": x, "pa2": y, "ps2": t}`: cannot skip "ps1"
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Examples:
            >>> # Delete a single row (full primary key specified):
            >>> my_table.delete_many({"match_id": "fight4", "round": 1})
            >>>
            >>> # Delete part of a partition (inequality on the
            >>> # last-mentioned 'partitionSort' column):
            >>> my_table.delete_many({"match_id": "fight5", "round": {"$gte": 5}})
            >>>
            >>> # Delete a whole partition (leave 'partitionSort' unspecified):
            >>> my_table.delete_many({"match_id": "fight7"})
            >>>
            >>> # empty the table entirely with empty filter (*CAUTION*):
            >>> my_table.delete_many({})
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        dm_payload = self._converter_agent.preprocess_payload(
            {
                "deleteMany": {
                    k: v
                    for k, v in {
                        "filter": filter,
                    }.items()
                    if v is not None
                }
            },
            map2tuple_checker=None,
        )
        logger.info(f"deleteMany on '{self.name}'")
        dm_response = self._api_commander.request(
            payload=dm_payload,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished deleteMany on '{self.name}'")
        if dm_response.get("status", {}).get("deletedCount") == -1:
            return
        else:
            raise UnexpectedDataAPIResponseException(
                text="Faulty response from deleteMany API command.",
                raw_response=dm_response,
            )

    def drop(
        self,
        *,
        if_exists: bool | None = None,
        table_admin_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Drop the table, i.e. delete it from the database along with
        all the rows stored therein.

        Args:
            if_exists: if passed as True, trying to drop a non-existing table
                will not error, just silently do nothing instead. If not provided,
                the API default behaviour will hold.
            table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `table_admin_timeout_ms`.
            timeout_ms: an alias for `table_admin_timeout_ms`.

        Example:
            >>> # List tables:
            >>> my_table.database.list_table_names()
            ['games']
            >>>
            >>> # Drop this table:
            >>> my_table.drop()
            >>>
            >>> # List tables again:
            >>> my_table.database.list_table_names()
            []
            >>>
            >>> # Try working on the table now:
            >>> from astrapy.exceptions import DataAPIResponseException
            >>> try:
            ...     my_table.find_one({})
            ... except DataAPIResponseException as err:
            ...     print(str(err))
            ...
            Collection does not exist [...] (COLLECTION_NOT_EXIST)

        Note:
            Use with caution.

        Note:
            Once the method succeeds, methods on this object can still be invoked:
            however, this hardly makes sense as the underlying actual table
            is no more.
            It is responsibility of the developer to design a correct flow
            which avoids using a deceased collection any further.
        """

        logger.info(f"dropping table '{self.name}' (self)")
        self.database.drop_table(
            self.name,
            if_exists=if_exists,
            table_admin_timeout_ms=table_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        logger.info(f"finished dropping table '{self.name}' (self)")

    def command(
        self,
        body: dict[str, Any] | None,
        *,
        raise_api_errors: bool = True,
        general_method_timeout_ms: int | None = None,
        request_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> dict[str, Any]:
        """
        Send a POST request to the Data API for this table with
        an arbitrary, caller-provided payload.
        No transformations or type conversions are made on the provided payload.

        Args:
            body: a JSON-serializable dictionary, the payload of the request.
            raise_api_errors: if True, responses with a nonempty 'errors' field
                result in an astrapy exception being raised.
            general_method_timeout_ms: a timeout, in milliseconds, to impose on the
                underlying API request. If not provided, this object's defaults apply.
                (This method issues a single API request, hence all timeout parameters
                are treated the same.)
            request_timeout_ms: an alias for `general_method_timeout_ms`.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a dictionary with the response of the HTTP request.

        Example:
            >>> my_table.command({
            ...     "findOne": {
            ...         "filter": {"match_id": "fight4"},
            ...         "projection": {"winner": True},
            ...     }
            ... })
            {'data': {'document': {'winner': 'Victor'}}, 'status': ...  # shortened
        """

        _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
            timeout_options=self.api_options.timeout_options,
            general_method_timeout_ms=general_method_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        )
        _cmd_desc: str
        if body:
            _cmd_desc = ",".join(sorted(body.keys()))
        else:
            _cmd_desc = "(none)"
        logger.info(f"command={_cmd_desc} on '{self.name}'")
        command_result = self._api_commander.request(
            payload=body,
            raise_api_errors=raise_api_errors,
            timeout_context=_TimeoutContext(
                request_ms=_request_timeout_ms, label=_rt_label
            ),
        )
        logger.info(f"finished command={_cmd_desc} on '{self.name}'")
        return command_result

Ancestors

typing.Generic

Instance variables

var database : Database

a Database object, the database this table belongs to.

Example

>>> my_table.database.name
'the_db'

Expand source code

@property
def database(self) -> Database:
    """
    a Database object, the database this table belongs to.

    Example:
        >>> my_table.database.name
        'the_db'
    """

    return self._database

var full_name : str

The fully-qualified table name within the database, in the form "keyspace.table_name".

Example

>>> my_table.full_name
'default_keyspace.my_table'

Expand source code

@property
def full_name(self) -> str:
    """
    The fully-qualified table name within the database,
    in the form "keyspace.table_name".

    Example:
        >>> my_table.full_name
        'default_keyspace.my_table'
    """

    return f"{self.keyspace}.{self.name}"

var keyspace : str

The keyspace this table is in.

Example

>>> my_table.keyspace
'default_keyspace'

Expand source code

@property
def keyspace(self) -> str:
    """
    The keyspace this table is in.

    Example:
        >>> my_table.keyspace
        'default_keyspace'
    """

    _keyspace = self.database.keyspace
    if _keyspace is None:
        raise RuntimeError("The table's DB is set with keyspace=None")
    return _keyspace

var name : str

The name of this table.

Example

>>> my_table.name
'games'

Expand source code

@property
def name(self) -> str:
    """
    The name of this table.

    Example:
        >>> my_table.name
        'games'
    """

    return self._name

Methods

def alter(self, operation: AlterTableOperation | dict[str, Any], *, row_type: type[Any] = typing.Dict[str, typing.Any], table_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> Table[~NEW_ROW]

Executes one of the available alter-table operations on this table, such as adding/dropping columns.

This is a blocking operation: the method returns once the index is created and ready to use.

Args

operation: an instance of one of the astrapy.info.AlterTable* classes, representing which alter operation to perform and the details thereof. A regular dictionary can also be provided, but then it must have the alter operation name at its top level: {"add": {"columns": …}}.
row_type: this parameter acts a formal specifier for the type checker. If omitted, the resulting Table is implicitly a Table[dict[str, Any]]. If provided, it must match the type hint specified in the assignment. See the examples below.
table_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for table_admin_timeout_ms.
timeout_ms: an alias for table_admin_timeout_ms.

Examples

>>> from astrapy.info import (
...     AlterTableAddColumns,
...     AlterTableAddVectorize,
...     AlterTableDropColumns,
...     AlterTableDropVectorize,
...     ColumnType,
...     TableScalarColumnTypeDescriptor,
...     VectorServiceOptions,
... )
>>>
>>> # Add a column
>>> new_table_1 = my_table.alter(
...     AlterTableAddColumns(
...         columns={
...             "tie_break": TableScalarColumnTypeDescriptor(
...                 column_type=ColumnType.BOOLEAN,
...             ),
...         }
...     )
... )
>>>
>>> # Drop a column
>>> new_table_2 = new_table_1.alter(AlterTableDropColumns(
...     columns=["tie_break"]
... ))
>>>
>>> # Add vectorize to a (vector) column
>>> new_table_3 = new_table_2.alter(
...     AlterTableAddVectorize(
...         columns={
...             "m_vector": VectorServiceOptions(
...                 provider="openai",
...                 model_name="text-embedding-3-small",
...                 authentication={
...                     "providerKey": "ASTRA_KMS_API_KEY_NAME",
...                 },
...             ),
...         }
...     )
... )
>>>
>>> # Drop vectorize from a (vector) column
>>> # (Also demonstrates type hint usage)
>>> from typing import TypedDict
>>> from astrapy import Table
>>> from astrapy.data_types import (
...     DataAPISet,
...     DataAPITimestamp,
...     DataAPIVector,
... )
>>> from astrapy.ids import UUID
>>>
>>> class MyMatch(TypedDict):
...     match_id: str
...     round: int
...     m_vector: DataAPIVector
...     score: int
...     when: DataAPITimestamp
...     winner: str
...     fighters: DataAPISet[UUID]
...
>>> new_table_4: Table[MyMatch] = new_table_3.alter(
...     AlterTableDropVectorize(columns=["m_vector"]),
...     row_type=MyMatch,
... )

Expand source code

def alter(
    self,
    operation: AlterTableOperation | dict[str, Any],
    *,
    row_type: type[Any] = DefaultRowType,
    table_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> Table[NEW_ROW]:
    """
    Executes one of the available alter-table operations on this table,
    such as adding/dropping columns.

    This is a blocking operation: the method returns once the index
    is created and ready to use.

    Args:
        operation: an instance of one of the `astrapy.info.AlterTable*` classes,
            representing which alter operation to perform and the details thereof.
            A regular dictionary can also be provided, but then it must have the
            alter operation name at its top level: {"add": {"columns": ...}}.
        row_type: this parameter acts a formal specifier for the type checker.
            If omitted, the resulting Table is implicitly a `Table[dict[str, Any]]`.
            If provided, it must match the type hint specified in the assignment.
            See the examples below.
        table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `table_admin_timeout_ms`.
        timeout_ms: an alias for `table_admin_timeout_ms`.

    Examples:
        >>> from astrapy.info import (
        ...     AlterTableAddColumns,
        ...     AlterTableAddVectorize,
        ...     AlterTableDropColumns,
        ...     AlterTableDropVectorize,
        ...     ColumnType,
        ...     TableScalarColumnTypeDescriptor,
        ...     VectorServiceOptions,
        ... )
        >>>
        >>> # Add a column
        >>> new_table_1 = my_table.alter(
        ...     AlterTableAddColumns(
        ...         columns={
        ...             "tie_break": TableScalarColumnTypeDescriptor(
        ...                 column_type=ColumnType.BOOLEAN,
        ...             ),
        ...         }
        ...     )
        ... )
        >>>
        >>> # Drop a column
        >>> new_table_2 = new_table_1.alter(AlterTableDropColumns(
        ...     columns=["tie_break"]
        ... ))
        >>>
        >>> # Add vectorize to a (vector) column
        >>> new_table_3 = new_table_2.alter(
        ...     AlterTableAddVectorize(
        ...         columns={
        ...             "m_vector": VectorServiceOptions(
        ...                 provider="openai",
        ...                 model_name="text-embedding-3-small",
        ...                 authentication={
        ...                     "providerKey": "ASTRA_KMS_API_KEY_NAME",
        ...                 },
        ...             ),
        ...         }
        ...     )
        ... )
        >>>
        >>> # Drop vectorize from a (vector) column
        >>> # (Also demonstrates type hint usage)
        >>> from typing import TypedDict
        >>> from astrapy import Table
        >>> from astrapy.data_types import (
        ...     DataAPISet,
        ...     DataAPITimestamp,
        ...     DataAPIVector,
        ... )
        >>> from astrapy.ids import UUID
        >>>
        >>> class MyMatch(TypedDict):
        ...     match_id: str
        ...     round: int
        ...     m_vector: DataAPIVector
        ...     score: int
        ...     when: DataAPITimestamp
        ...     winner: str
        ...     fighters: DataAPISet[UUID]
        ...
        >>> new_table_4: Table[MyMatch] = new_table_3.alter(
        ...     AlterTableDropVectorize(columns=["m_vector"]),
        ...     row_type=MyMatch,
        ... )
    """

    n_operation: AlterTableOperation
    if isinstance(operation, AlterTableOperation):
        n_operation = operation
    else:
        n_operation = AlterTableOperation.from_full_dict(operation)
    _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
        timeout_options=self.api_options.timeout_options,
        table_admin_timeout_ms=table_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    at_operation_name = n_operation._name
    at_payload = {
        "alterTable": {
            "operation": {
                at_operation_name: n_operation.as_dict(),
            },
        },
    }
    logger.info(f"alterTable({at_operation_name})")
    at_response = self._api_commander.request(
        payload=at_payload,
        timeout_context=_TimeoutContext(
            request_ms=_table_admin_timeout_ms, label=_ta_label
        ),
    )
    if at_response.get("status") != {"ok": 1}:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from alterTable API command.",
            raw_response=at_response,
        )
    logger.info(f"finished alterTable({at_operation_name})")
    return Table(
        database=self.database,
        name=self.name,
        keyspace=self.keyspace,
        api_options=self.api_options,
    )

Send a POST request to the Data API for this table with an arbitrary, caller-provided payload. No transformations or type conversions are made on the provided payload.

Args

body: a JSON-serializable dictionary, the payload of the request.
raise_api_errors: if True, responses with a nonempty 'errors' field result in an astrapy exception being raised.
general_method_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for general_method_timeout_ms.
timeout_ms: an alias for general_method_timeout_ms.

Returns

a dictionary with the response of the HTTP request.

Example

>>> my_table.command({
...     "findOne": {
...         "filter": {"match_id": "fight4"},
...         "projection": {"winner": True},
...     }
... })
{'data': {'document': {'winner': 'Victor'}}, 'status': ...  # shortened

Expand source code

def command(
    self,
    body: dict[str, Any] | None,
    *,
    raise_api_errors: bool = True,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> dict[str, Any]:
    """
    Send a POST request to the Data API for this table with
    an arbitrary, caller-provided payload.
    No transformations or type conversions are made on the provided payload.

    Args:
        body: a JSON-serializable dictionary, the payload of the request.
        raise_api_errors: if True, responses with a nonempty 'errors' field
            result in an astrapy exception being raised.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a dictionary with the response of the HTTP request.

    Example:
        >>> my_table.command({
        ...     "findOne": {
        ...         "filter": {"match_id": "fight4"},
        ...         "projection": {"winner": True},
        ...     }
        ... })
        {'data': {'document': {'winner': 'Victor'}}, 'status': ...  # shortened
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    _cmd_desc: str
    if body:
        _cmd_desc = ",".join(sorted(body.keys()))
    else:
        _cmd_desc = "(none)"
    logger.info(f"command={_cmd_desc} on '{self.name}'")
    command_result = self._api_commander.request(
        payload=body,
        raise_api_errors=raise_api_errors,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished command={_cmd_desc} on '{self.name}'")
    return command_result

def count_documents(self, filter: FilterType, *, upper_bound: int, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> int

Count the row in the table matching the specified filter.

Args

filter: a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"name": "John", "age": 59} {"$and": [{"name": {"$eq": "John"}}, {"age": {"$gt": 58}}]} See the Data API documentation for the full set of operators.
upper_bound: a required ceiling on the result of the count operation. If the actual number of rows exceeds this value, an exception will be raised. Furthermore, if the actual number of rows exceeds the maximum count that the Data API can reach (regardless of upper_bound), an exception will be raised.
general_method_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for general_method_timeout_ms.
timeout_ms: an alias for general_method_timeout_ms.

Returns

the exact count of matching rows.

Examples

>>> my_table.insert_many([{"seq": i} for i in range(20)])
TableInsertManyResult(...)
>>> my_table.count_documents({}, upper_bound=100)
20
>>> my_table.count_documents({"seq":{"$gt": 15}}, upper_bound=100)
4
>>> my_table.count_documents({}, upper_bound=10)
Traceback (most recent call last):
    ... ...
astrapy.exceptions.TooManyRowsToCountException

Note

Count operations are expensive: for this reason, the best practice is to provide a reasonable upper_bound according to the caller expectations. Moreover, indiscriminate usage of count operations for sizeable amounts of rows (i.e. in the thousands and more) is discouraged in favor of alternative application-specific solutions. Keep in mind that the Data API has a hard upper limit on the amount of rows it will count, and that an exception will be thrown by this method if this limit is encountered.

Expand source code

def count_documents(
    self,
    filter: FilterType,
    *,
    upper_bound: int,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> int:
    """
    Count the row in the table matching the specified filter.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"name": "John", "age": 59}
                {"$and": [{"name": {"$eq": "John"}}, {"age": {"$gt": 58}}]}
            See the Data API documentation for the full set of operators.
        upper_bound: a required ceiling on the result of the count operation.
            If the actual number of rows exceeds this value,
            an exception will be raised.
            Furthermore, if the actual number of rows exceeds the maximum
            count that the Data API can reach (regardless of upper_bound),
            an exception will be raised.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        the exact count of matching rows.

    Examples:
        >>> my_table.insert_many([{"seq": i} for i in range(20)])
        TableInsertManyResult(...)
        >>> my_table.count_documents({}, upper_bound=100)
        20
        >>> my_table.count_documents({"seq":{"$gt": 15}}, upper_bound=100)
        4
        >>> my_table.count_documents({}, upper_bound=10)
        Traceback (most recent call last):
            ... ...
        astrapy.exceptions.TooManyRowsToCountException

    Note:
        Count operations are expensive: for this reason, the best practice
        is to provide a reasonable `upper_bound` according to the caller
        expectations. Moreover, indiscriminate usage of count operations
        for sizeable amounts of rows (i.e. in the thousands and more)
        is discouraged in favor of alternative application-specific solutions.
        Keep in mind that the Data API has a hard upper limit on the amount
        of rows it will count, and that an exception will be thrown
        by this method if this limit is encountered.
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    cd_payload = {"countDocuments": {"filter": filter}}
    logger.info(f"countDocuments on '{self.name}'")
    cd_response = self._api_commander.request(
        payload=cd_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished countDocuments on '{self.name}'")
    if "count" in cd_response.get("status", {}):
        count: int = cd_response["status"]["count"]
        if cd_response["status"].get("moreData", False):
            raise TooManyRowsToCountException(
                text=f"Document count exceeds {count}, the maximum allowed by the server",
                server_max_count_exceeded=True,
            )
        else:
            if count > upper_bound:
                raise TooManyRowsToCountException(
                    text="Document count exceeds required upper bound",
                    server_max_count_exceeded=False,
                )
            else:
                return count
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from countDocuments API command.",
            raw_response=cd_response,
        )

Create an index on a non-vector column of the table.

This is a blocking operation: the method returns once the index is created and ready to use.

For creation of a vector index, see method create_vector_index instead.

Args

name: the name of the index. Index names must be unique across the keyspace.
column: the table column on which the index is to be created. For a map column, besides a simple string, it can be an object in one of the two formats {"column": "$values"}, {"column": "$keys"},
options: if passed, it must be an instance of TableIndexOptions, or an equivalent dictionary, which specifies index settings such as – for a text column – case-sensitivity and so on. See the TableIndexOptions class for more details.
if_not_exists: if set to True, the command will succeed even if an index with the specified name already exists (in which case no actual index creation takes place on the database). The API default of False means that an error is raised by the API in case of name collision.
table_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for table_admin_timeout_ms.
timeout_ms: an alias for table_admin_timeout_ms.

Examples

>>> from astrapy.info import TableIndexOptions
>>>
>>> # create an index on a column
>>> my_table.create_index(
...     "score_index",
...     "score",
... )
>>>
>>> # create an index on a textual column, specifying indexing options
>>> my_table.create_index(
...     "winner_index",
...     "winner",
...     options=TableIndexOptions(
...         ascii=False,
...         normalize=True,
...         case_sensitive=False,
...     ),
... )

Expand source code

def create_index(
    self,
    name: str,
    column: str | dict[str, str],
    *,
    options: TableIndexOptions | dict[str, Any] | None = None,
    if_not_exists: bool | None = None,
    table_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Create an index on a non-vector column of the table.

    This is a blocking operation: the method returns once the index
    is created and ready to use.

    For creation of a vector index, see method `create_vector_index` instead.

    Args:
        name: the name of the index. Index names must be unique across the keyspace.
        column: the table column on which the index is to be created.
            For a map column, besides a simple string, it can be an object
            in one of the two formats {"column": "$values"}, {"column": "$keys"},
        options: if passed, it must be an instance of `TableIndexOptions`,
            or an equivalent dictionary, which specifies index settings
            such as -- for a text column -- case-sensitivity and so on.
            See the `astrapy.info.TableIndexOptions` class for more details.
        if_not_exists: if set to True, the command will succeed even if an index
            with the specified name already exists (in which case no actual
            index creation takes place on the database). The API default of False
            means that an error is raised by the API in case of name collision.
        table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `table_admin_timeout_ms`.
        timeout_ms: an alias for `table_admin_timeout_ms`.

    Examples:
        >>> from astrapy.info import TableIndexOptions
        >>>
        >>> # create an index on a column
        >>> my_table.create_index(
        ...     "score_index",
        ...     "score",
        ... )
        >>>
        >>> # create an index on a textual column, specifying indexing options
        >>> my_table.create_index(
        ...     "winner_index",
        ...     "winner",
        ...     options=TableIndexOptions(
        ...         ascii=False,
        ...         normalize=True,
        ...         case_sensitive=False,
        ...     ),
        ... )
    """

    ci_definition: dict[str, Any] = TableIndexDefinition(
        column=column,
        options=TableIndexOptions.coerce(options or {}),
    ).as_dict()
    ci_command = "createIndex"
    return self._create_generic_index(
        i_name=name,
        ci_definition=ci_definition,
        ci_command=ci_command,
        if_not_exists=if_not_exists,
        table_admin_timeout_ms=table_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )

Create a vector index on a vector column of the table, enabling vector similarity search operations on it.

This is a blocking operation: the method returns once the index is created and ready to use.

For creation of a non-vector index, see method create_index instead.

Args

name: the name of the index. Index names must be unique across the keyspace.
column: the table column, of type "vector" on which to create the index.
options: an instance of TableVectorIndexOptions, or an equivalent dictionary, which specifies settings for the vector index, such as the metric to use or, if desired, a "source model" setting. If omitted, the Data API defaults will apply for the index. See the TableVectorIndexOptions class for more details.
if_not_exists: if set to True, the command will succeed even if an index with the specified name already exists (in which case no actual index creation takes place on the database). The API default of False means that an error is raised by the API in case of name collision.
table_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for table_admin_timeout_ms.
timeout_ms: an alias for table_admin_timeout_ms.

Example

>>> from astrapy.constants import VectorMetric
>>> from astrapy.info import TableVectorIndexOptions
>>>
>>> # create a vector index with dot-product similarity
>>> my_table.create_vector_index(
...     "m_vector_index",
...     "m_vector",
...     options=TableVectorIndexOptions(
...         metric=VectorMetric.DOT_PRODUCT,
...     ),
... )
>>> # specify a source_model (since the previous statement
>>> # succeeded, this will do nothing because of <code>if\_not\_exists</code>):
>>> my_table.create_vector_index(
...     "m_vector_index",
...     "m_vector",
...     options=TableVectorIndexOptions(
...         metric=VectorMetric.DOT_PRODUCT,
...         source_model="nv-qa-4",
...     ),
...     if_not_exists=True,
... )
>>> # leave the settings to the Data API defaults of cosine
>>> # similarity metric (since the previous statement
>>> # succeeded, this will do nothing because of <code>if\_not\_exists</code>):
>>> my_table.create_vector_index(
...     "m_vector_index",
...     "m_vector",
...     if_not_exists=True,
... )

Expand source code

def create_vector_index(
    self,
    name: str,
    column: str,
    *,
    options: TableVectorIndexOptions | dict[str, Any] | None = None,
    if_not_exists: bool | None = None,
    table_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Create a vector index on a vector column of the table, enabling vector
    similarity search operations on it.

    This is a blocking operation: the method returns once the index
    is created and ready to use.

    For creation of a non-vector index, see method `create_index` instead.

    Args:
        name: the name of the index. Index names must be unique across the keyspace.
        column: the table column, of type "vector" on which to create the index.
        options: an instance of `TableVectorIndexOptions`, or an equivalent
            dictionary, which specifies settings for the vector index,
            such as the metric to use or, if desired, a "source model" setting.
            If omitted, the Data API defaults will apply for the index.
            See the `astrapy.info.TableVectorIndexOptions` class for more details.
        if_not_exists: if set to True, the command will succeed even if an index
            with the specified name already exists (in which case no actual
            index creation takes place on the database). The API default of False
            means that an error is raised by the API in case of name collision.
        table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `table_admin_timeout_ms`.
        timeout_ms: an alias for `table_admin_timeout_ms`.

    Example:
        >>> from astrapy.constants import VectorMetric
        >>> from astrapy.info import TableVectorIndexOptions
        >>>
        >>> # create a vector index with dot-product similarity
        >>> my_table.create_vector_index(
        ...     "m_vector_index",
        ...     "m_vector",
        ...     options=TableVectorIndexOptions(
        ...         metric=VectorMetric.DOT_PRODUCT,
        ...     ),
        ... )
        >>> # specify a source_model (since the previous statement
        >>> # succeeded, this will do nothing because of `if_not_exists`):
        >>> my_table.create_vector_index(
        ...     "m_vector_index",
        ...     "m_vector",
        ...     options=TableVectorIndexOptions(
        ...         metric=VectorMetric.DOT_PRODUCT,
        ...         source_model="nv-qa-4",
        ...     ),
        ...     if_not_exists=True,
        ... )
        >>> # leave the settings to the Data API defaults of cosine
        >>> # similarity metric (since the previous statement
        >>> # succeeded, this will do nothing because of `if_not_exists`):
        >>> my_table.create_vector_index(
        ...     "m_vector_index",
        ...     "m_vector",
        ...     if_not_exists=True,
        ... )
    """

    ci_definition: dict[str, Any] = TableVectorIndexDefinition(
        column=column,
        options=TableVectorIndexOptions.coerce(options),
    ).as_dict()
    ci_command = "createVectorIndex"
    return self._create_generic_index(
        i_name=name,
        ci_definition=ci_definition,
        ci_command=ci_command,
        if_not_exists=if_not_exists,
        table_admin_timeout_ms=table_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )

def definition(self, *, table_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> ListTableDefinition

Args

table_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for table_admin_timeout_ms.
timeout_ms: an alias for table_admin_timeout_ms.

Returns

A ListTableDefinition object, available for inspection.

Example

>>> my_table.definition()
ListTableDefinition(columns=[match_id,round,fighters, ...  # shortened

Expand source code

def definition(
    self,
    *,
    table_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> ListTableDefinition:
    """
    Query the Data API and return a structure defining the table schema.
    If there are no unsupported colums in the table, the return value has
    the same contents as could have been provided to a `create_table` method call.

    Args:
        table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `table_admin_timeout_ms`.
        timeout_ms: an alias for `table_admin_timeout_ms`.

    Returns:
        A `ListTableDefinition` object, available for inspection.

    Example:
        >>> my_table.definition()
        ListTableDefinition(columns=[match_id,round,fighters, ...  # shortened
    """

    _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
        timeout_options=self.api_options.timeout_options,
        table_admin_timeout_ms=table_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info(f"getting tables in search of '{self.name}'")
    self_descriptors = [
        table_desc
        for table_desc in self.database._list_tables_ctx(
            keyspace=None,
            timeout_context=_TimeoutContext(
                request_ms=_table_admin_timeout_ms,
                label=_ta_label,
            ),
        )
        if table_desc.name == self.name
    ]
    logger.info(f"finished getting tables in search of '{self.name}'")
    if self_descriptors:
        return self_descriptors[0].definition
    else:
        raise RuntimeError(
            f"Table {self.keyspace}.{self.name} not found.",
        )

def delete_many(self, filter: FilterType, *, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> None

Delete all rows matching a provided filter condition. This operation can target from a single row to the entirety of the table.

Args

filter: a filter dictionary to specify which row(s) must be deleted. 1. If the filter is in the form {"pk1": val1, "pk2": val2 ...} and specified the primary key in full, at most one row is deleted, the one with that primary key. 2. If the table has "partitionSort" columns, some or all of them may be left out (the least significant of them can also employ an inequality, or range, predicate): a range of rows, but always within a single partition, will be deleted. 3. If an empty filter, {}, is passed, this operation empties the table completely. USE WITH CARE. 4. Other kinds of filtering clauses are forbidden. In the following examples, the table is partitioned by columns ["pa1", "pa2"] and has partitionSort "ps1" and "ps2" in that order. Valid filter examples: - {"pa1": x, "pa2": y, "ps1": z, "ps2": t}: deletes one row - {"pa1": x, "pa2": y, "ps1": z}: deletes multiple rows - {"pa1": x, "pa2": y, "ps1": z, "ps2": {"$lt": q}}: del. multiple rows - {"pa1": x, "pa2": y}: deletes all rows in the partition - {}: empties the table (CAUTION) Invalid filter examples: - {"pa1": x}: incomplete partition key - {"pa1": x, "ps1" z}: incomplete partition key (whatever is added) - {"pa1": x, "pa2": y, "ps1": {"$lt": r}, "ps2": t}: inequality on a non-least-significant partitionSort column provided. - {"pa1": x, "pa2": y, "ps2": t}: cannot skip "ps1"
general_method_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for general_method_timeout_ms.
timeout_ms: an alias for general_method_timeout_ms.

Examples

>>> # Delete a single row (full primary key specified):
>>> my_table.delete_many({"match_id": "fight4", "round": 1})
>>>
>>> # Delete part of a partition (inequality on the
>>> # last-mentioned 'partitionSort' column):
>>> my_table.delete_many({"match_id": "fight5", "round": {"$gte": 5}})
>>>
>>> # Delete a whole partition (leave 'partitionSort' unspecified):
>>> my_table.delete_many({"match_id": "fight7"})
>>>
>>> # empty the table entirely with empty filter (*CAUTION*):
>>> my_table.delete_many({})

Expand source code

def delete_many(
    self,
    filter: FilterType,
    *,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Delete all rows matching a provided filter condition.
    This operation can target from a single row to the entirety of the table.

    Args:
        filter: a filter dictionary to specify which row(s) must be deleted.
            1. If the filter is in the form `{"pk1": val1, "pk2": val2 ...}`
            and specified the primary key in full, at most one row is deleted,
            the one with that primary key.
            2. If the table has "partitionSort" columns, some or all of them
            may be left out (the least significant of them can also employ
            an inequality, or range, predicate): a range of rows, but always
            within a single partition, will be deleted.
            3. If an empty filter, `{}`, is passed, this operation empties
            the table completely. *USE WITH CARE*.
            4. Other kinds of filtering clauses are forbidden.
            In the following examples, the table is partitioned
            by columns ["pa1", "pa2"] and has partitionSort "ps1" and "ps2" in that
            order.
            Valid filter examples:
            - `{"pa1": x, "pa2": y, "ps1": z, "ps2": t}`: deletes one row
            - `{"pa1": x, "pa2": y, "ps1": z}`: deletes multiple rows
            - `{"pa1": x, "pa2": y, "ps1": z, "ps2": {"$lt": q}}`: del. multiple rows
            - `{"pa1": x, "pa2": y}`: deletes all rows in the partition
            - `{}`: empties the table (*CAUTION*)
            Invalid filter examples:
            - `{"pa1": x}`: incomplete partition key
            - `{"pa1": x, "ps1" z}`: incomplete partition key (whatever is added)
            - `{"pa1": x, "pa2": y, "ps1": {"$lt": r}, "ps2": t}`: inequality on
              a non-least-significant partitionSort column provided.
            - `{"pa1": x, "pa2": y, "ps2": t}`: cannot skip "ps1"
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Examples:
        >>> # Delete a single row (full primary key specified):
        >>> my_table.delete_many({"match_id": "fight4", "round": 1})
        >>>
        >>> # Delete part of a partition (inequality on the
        >>> # last-mentioned 'partitionSort' column):
        >>> my_table.delete_many({"match_id": "fight5", "round": {"$gte": 5}})
        >>>
        >>> # Delete a whole partition (leave 'partitionSort' unspecified):
        >>> my_table.delete_many({"match_id": "fight7"})
        >>>
        >>> # empty the table entirely with empty filter (*CAUTION*):
        >>> my_table.delete_many({})
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    dm_payload = self._converter_agent.preprocess_payload(
        {
            "deleteMany": {
                k: v
                for k, v in {
                    "filter": filter,
                }.items()
                if v is not None
            }
        },
        map2tuple_checker=None,
    )
    logger.info(f"deleteMany on '{self.name}'")
    dm_response = self._api_commander.request(
        payload=dm_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished deleteMany on '{self.name}'")
    if dm_response.get("status", {}).get("deletedCount") == -1:
        return
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from deleteMany API command.",
            raw_response=dm_response,
        )

def delete_one(self, filter: FilterType, *, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> None

Delete a row, matching the provided value of the primary key. If no row is found with that primary key, the method does nothing.

Args

filter: a predicate expressing the table primary key in full, i.e. a dictionary defining values for all columns that form the primary key. A row (at most one) is deleted if it matches that primary key. An example filter may be {"match_id": "fight4", "round": 1}.
general_method_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for general_method_timeout_ms.
timeout_ms: an alias for general_method_timeout_ms.

Examples

>>> # Count the rows matching a certain filter
>>> len(my_table.find({"match_id": "fight7"}).to_list())
3
>>>
>>> # Delete a row belonging to the group
>>> my_table.delete_one({"match_id": "fight7", "round": 2})
>>>
>>> # Count again
>>> len(my_table.find({"match_id": "fight7"}).to_list())
2
>>>
>>> # Attempt the delete again (nothing to delete)
>>> my_table.delete_one({"match_id": "fight7", "round": 2})
>>>
>>> # The count is unchanged
>>> len(my_table.find({"match_id": "fight7"}).to_list())
2

Expand source code

def delete_one(
    self,
    filter: FilterType,
    *,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Delete a row, matching the provided value of the primary key.
    If no row is found with that primary key, the method does nothing.

    Args:
        filter: a predicate expressing the table primary key in full,
            i.e. a dictionary defining values for all columns that form the
            primary key. A row (at most one) is deleted if it matches that primary
            key. An example filter may be `{"match_id": "fight4", "round": 1}`.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Examples:
        >>> # Count the rows matching a certain filter
        >>> len(my_table.find({"match_id": "fight7"}).to_list())
        3
        >>>
        >>> # Delete a row belonging to the group
        >>> my_table.delete_one({"match_id": "fight7", "round": 2})
        >>>
        >>> # Count again
        >>> len(my_table.find({"match_id": "fight7"}).to_list())
        2
        >>>
        >>> # Attempt the delete again (nothing to delete)
        >>> my_table.delete_one({"match_id": "fight7", "round": 2})
        >>>
        >>> # The count is unchanged
        >>> len(my_table.find({"match_id": "fight7"}).to_list())
        2
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    do_payload = self._converter_agent.preprocess_payload(
        {
            "deleteOne": {
                k: v
                for k, v in {
                    "filter": filter,
                }.items()
                if v is not None
            }
        },
        map2tuple_checker=None,
    )
    logger.info(f"deleteOne on '{self.name}'")
    do_response = self._api_commander.request(
        payload=do_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished deleteOne on '{self.name}'")
    if do_response.get("status", {}).get("deletedCount") == -1:
        return
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from deleteOne API command.",
            raw_response=do_response,
        )

Return a list of the unique values of key across the rows in the table that match the provided filter.

Args

key: the name of the field whose value is inspected across rows. Keys can be just column names (as is typically the case), but the dot-notation is also accepted to mean subkeys or indices within lists (for example, "map_column.subkey" or "list_column.2"). If a column has literal dots or ampersands in its name, this parameter must be escaped to be treated properly. The key can also be a list of strings and numbers, in which case no escape is necessary: each item in the list is a field name/index, for example ["map_column", "subkey"] or ["list_column", 2]. For set and list columns, individual entries are "unrolled" automatically.
filter: a dictionary expressing which condition the inspected rows must satisfy. The filter can use operators, such as "$eq" for equality, and require columns to compare with literal values. Simple examples are {} (zero filter), {"match_no": 123} (a shorthand for {"match_no": {"$eq": 123}}, or {"match_no": 123, "round": "C"} (multiple conditions are implicitly combined with "$and"). Please consult the Data API documentation for a more detailed explanation of table search filters and tips on their usage.
general_method_timeout_ms: a timeout, in milliseconds, for the whole requested operation (which may involve multiple API requests). This method, being based on find (see) may entail successive HTTP API requests, depending on the amount of involved rows. If not provided, this object's defaults apply.
request_timeout_ms: a timeout, in milliseconds, for each API request. If not provided, this object's defaults apply.
timeout_ms: an alias for general_method_timeout_ms.

Returns

a list of all different values for key found across the rows that match the filter. The result list has no repeated items.

Examples

>>> my_table.distinct("winner", filter={"match_id": "challenge6"})
['Donna', 'Erick', 'Fiona']
>>>
>>> # distinct values across the whole table:
>>> # (not recommended performance-wise)
>>> my_table.distinct("winner")
The Data API returned a warning: {'errorCode': 'ZERO_FILTER_OPERATIONS', ...
['Victor', 'Adam Zuul', 'Betta Vigo', 'Caio Gozer', 'Donna', 'Erick', ...
>>>
>>> # Over a column containing null values
>>> # (also with composite filter):
>>> my_table.distinct(
...     "score",
...     filter={"match_id": {"$in": ["fight4", "tournamentA"]}},
... )
[18, None]
>>>
>>> # distinct over a set column (automatically "unrolled"):
>>> my_table.distinct(
...     "fighters",
...     filter={"match_id": {"$in": ["fight4", "tournamentA"]}},
... )
[UUID('0193539a-2770-8c09-a32a-111111111111'), UUID('019353e3-00b4-...

Note

For details on the behaviour of "distinct" in conjunction with real-time changes in the table contents, see the Note of the find command.

Expand source code

def distinct(
    self,
    key: str | Iterable[str | int],
    *,
    filter: FilterType | None = None,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> list[Any]:
    """
    Return a list of the unique values of `key` across the rows
    in the table that match the provided filter.

    Args:
        key: the name of the field whose value is inspected across rows.
            Keys can be just column names (as is typically the case), but
            the dot-notation is also accepted to mean subkeys or indices
            within lists (for example, "map_column.subkey" or "list_column.2").
            If a column has literal dots or ampersands in its name, this
            parameter must be escaped to be treated properly.
            The key can also be a list of strings and numbers, in which case
            no escape is necessary: each item in the list is a field name/index,
            for example ["map_column", "subkey"] or ["list_column", 2].
            For set and list columns, individual entries are "unrolled"
            automatically.
        filter: a dictionary expressing which condition the inspected rows
            must satisfy. The filter can use operators, such as "$eq" for equality,
            and require columns to compare with literal values. Simple examples
            are `{}` (zero filter), `{"match_no": 123}` (a shorthand for
            `{"match_no": {"$eq": 123}}`, or `{"match_no": 123, "round": "C"}`
            (multiple conditions are implicitly combined with "$and").
            Please consult the Data API documentation for a more detailed
            explanation of table search filters and tips on their usage.
        general_method_timeout_ms: a timeout, in milliseconds, for the whole
            requested operation (which may involve multiple API requests).
            This method, being based on `find` (see) may entail successive HTTP API
            requests, depending on the amount of involved rows.
            If not provided, this object's defaults apply.
        request_timeout_ms: a timeout, in milliseconds, for each API request.
            If not provided, this object's defaults apply.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a list of all different values for `key` found across the rows
        that match the filter. The result list has no repeated items.

    Examples:
        >>> my_table.distinct("winner", filter={"match_id": "challenge6"})
        ['Donna', 'Erick', 'Fiona']
        >>>
        >>> # distinct values across the whole table:
        >>> # (not recommended performance-wise)
        >>> my_table.distinct("winner")
        The Data API returned a warning: {'errorCode': 'ZERO_FILTER_OPERATIONS', ...
        ['Victor', 'Adam Zuul', 'Betta Vigo', 'Caio Gozer', 'Donna', 'Erick', ...
        >>>
        >>> # Over a column containing null values
        >>> # (also with composite filter):
        >>> my_table.distinct(
        ...     "score",
        ...     filter={"match_id": {"$in": ["fight4", "tournamentA"]}},
        ... )
        [18, None]
        >>>
        >>> # distinct over a set column (automatically "unrolled"):
        >>> my_table.distinct(
        ...     "fighters",
        ...     filter={"match_id": {"$in": ["fight4", "tournamentA"]}},
        ... )
        [UUID('0193539a-2770-8c09-a32a-111111111111'), UUID('019353e3-00b4-...

    Note:
        It must be kept in mind that `distinct` is a client-side operation,
        which effectively browses all required rows using the logic
        of the `find` method and collects the unique values found for `key`.
        As such, there may be performance, latency and ultimately
        billing implications if the amount of matching rows is large.

    Note:
        For details on the behaviour of "distinct" in conjunction with
        real-time changes in the table contents, see the
        Note of the `find` command.
    """

    # lazy-import here to avoid circular import issues
    from astrapy.cursors import TableFindCursor

    _general_method_timeout_ms, _gmt_label = _first_valid_timeout(
        (general_method_timeout_ms, "general_method_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (
            self.api_options.timeout_options.general_method_timeout_ms,
            "general_method_timeout_ms",
        ),
    )
    _request_timeout_ms, _rt_label = _first_valid_timeout(
        (request_timeout_ms, "request_timeout_ms"),
        (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
    )
    # preparing cursor:
    _extractor = _create_document_key_extractor(key)
    _key = _reduce_distinct_key_to_shallow_safe(key)
    # relaxing the type hint (limited to within this method body)
    f_cursor: TableFindCursor[dict[str, Any], dict[str, Any]] = (
        TableFindCursor(
            table=self,
            request_timeout_ms=_request_timeout_ms,
            overall_timeout_ms=_general_method_timeout_ms,
            request_timeout_label=_rt_label,
            overall_timeout_label=_gmt_label,
        )  # type: ignore[assignment]
        .filter(filter)
        .project({_key: True})
    )
    # consuming it:
    _item_hashes = set()
    distinct_items: list[Any] = []
    logger.info(f"running distinct() on '{self.name}'")
    for document in f_cursor:
        for item in _extractor(document):
            _item_hash = _hash_table_document(
                item, options=self.api_options.serdes_options
            )
            if _item_hash not in _item_hashes:
                _item_hashes.add(_item_hash)
                distinct_items.append(item)
    logger.info(f"finished running distinct() on '{self.name}'")
    return distinct_items

def drop(self, *, if_exists: bool | None = None, table_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> None

Drop the table, i.e. delete it from the database along with all the rows stored therein.

Args

if_exists: if passed as True, trying to drop a non-existing table will not error, just silently do nothing instead. If not provided, the API default behaviour will hold.
table_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for table_admin_timeout_ms.
timeout_ms: an alias for table_admin_timeout_ms.

Example

>>> # List tables:
>>> my_table.database.list_table_names()
['games']
>>>
>>> # Drop this table:
>>> my_table.drop()
>>>
>>> # List tables again:
>>> my_table.database.list_table_names()
[]
>>>
>>> # Try working on the table now:
>>> from astrapy.exceptions import DataAPIResponseException
>>> try:
...     my_table.find_one({})
... except DataAPIResponseException as err:
...     print(str(err))
...
Collection does not exist [...] (COLLECTION_NOT_EXIST)

Note

Use with caution.

Note

Expand source code

def drop(
    self,
    *,
    if_exists: bool | None = None,
    table_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Drop the table, i.e. delete it from the database along with
    all the rows stored therein.

    Args:
        if_exists: if passed as True, trying to drop a non-existing table
            will not error, just silently do nothing instead. If not provided,
            the API default behaviour will hold.
        table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `table_admin_timeout_ms`.
        timeout_ms: an alias for `table_admin_timeout_ms`.

    Example:
        >>> # List tables:
        >>> my_table.database.list_table_names()
        ['games']
        >>>
        >>> # Drop this table:
        >>> my_table.drop()
        >>>
        >>> # List tables again:
        >>> my_table.database.list_table_names()
        []
        >>>
        >>> # Try working on the table now:
        >>> from astrapy.exceptions import DataAPIResponseException
        >>> try:
        ...     my_table.find_one({})
        ... except DataAPIResponseException as err:
        ...     print(str(err))
        ...
        Collection does not exist [...] (COLLECTION_NOT_EXIST)

    Note:
        Use with caution.

    Note:
        Once the method succeeds, methods on this object can still be invoked:
        however, this hardly makes sense as the underlying actual table
        is no more.
        It is responsibility of the developer to design a correct flow
        which avoids using a deceased collection any further.
    """

    logger.info(f"dropping table '{self.name}' (self)")
    self.database.drop_table(
        self.name,
        if_exists=if_exists,
        table_admin_timeout_ms=table_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    logger.info(f"finished dropping table '{self.name}' (self)")

def estimated_document_count(self, *, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> int

Query the API server for an estimate of the document count in the table.

Contrary to count_documents, this method has no filtering parameters.

Args

general_method_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for general_method_timeout_ms.
timeout_ms: an alias for general_method_timeout_ms.

Returns

a server-provided estimate count of the documents in the table.

Example

>>> my_table.estimated_document_count()
5820

Expand source code

def estimated_document_count(
    self,
    *,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> int:
    """
    Query the API server for an estimate of the document count in the table.

    Contrary to `count_documents`, this method has no filtering parameters.

    Args:
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a server-provided estimate count of the documents in the table.

    Example:
        >>> my_table.estimated_document_count()
        5820
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    ed_payload: dict[str, Any] = {"estimatedDocumentCount": {}}
    logger.info(f"estimatedDocumentCount on '{self.name}'")
    ed_response = self._api_commander.request(
        payload=ed_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished estimatedDocumentCount on '{self.name}'")
    if "count" in ed_response.get("status", {}):
        count: int = ed_response["status"]["count"]
        return count
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from estimatedDocumentCount API command.",
            raw_response=ed_response,
        )

Find rows on the table matching the provided filters and according to sorting criteria including vector similarity.

The returned TableFindCursor object, representing the stream of results, can be iterated over, or consumed and manipulated in several other ways (see the examples below and the TableFindCursor documentation for details). Since the amount of returned items can be large, TableFindCursor is a lazy object, that fetches new data while it is being read using the Data API pagination mechanism.

Invoking .to_list() on a TableFindCursor will cause it to consume all rows and materialize the entire result set as a list. This is not recommended if the amount of results is very large.

Args

filter: a dictionary expressing which condition the returned rows must satisfy. The filter can use operators, such as "$eq" for equality, and require columns to compare with literal values. Simple examples are {} (zero filter, not recommended for large tables), {"match_no": 123} (a shorthand for {"match_no": {"$eq": 123}}, or {"match_no": 123, "round": "C"} (multiple conditions are implicitly combined with "$and"). Please consult the Data API documentation for a more detailed explanation of table search filters and tips on their usage.
projection: a prescription on which columns to return for the matching rows. The projection can take the form {"column1": True, "column2": True}. {"*": True} (i.e. return the whole row), or the complementary form that excludes columns: {"column1": False, "column2": False}. To optimize bandwidth usage, it is recommended to use a projection, especially to avoid unnecessary columns of type vector with high-dimensional embeddings.
row_type: this parameter acts a formal specifier for the type checker. If omitted, the resulting cursor is implicitly a TableFindCursor[ROW, ROW], i.e. maintains the same type for the items it returns as that for the rows in the table. Strictly typed code may want to specify this parameter especially when a projection is given.
skip: if provided, it is a number of rows that would be obtained first in the response and are instead skipped.
limit: a maximum amount of rows to get from the table. The returned cursor will stop yielding rows when either this number is reached or there really are no more matches in the table.
include_similarity: a boolean to request the numeric value of the similarity to be returned as an added "$similarity" key in each returned row. It can be used meaningfully only in a vector search (see sort).
include_sort_vector: a boolean to request the search query vector. If set to True (and if the search is a vector search), calling the get_sort_vector method on the returned cursor will yield the vector used for the ANN search.
sort: this dictionary parameter controls the order in which the rows are returned. The sort parameter can express either a vector search or a regular (ascending/descending, even hierarchical) sorting. * For a vector search the parameter takes the form {"vector_column": qv}, with the query vector qv of the appropriate type (list of floats or DataAPIVector). If the table has automatic embedding generation ("vectorize") enabled on that column, the form {"vectorize_enabled_column": "query text"} is also valid. * In the case of non-vector sorting, the parameter specifies the column(s) and the ascending/descending ordering required. If multiple columns are provided, the sorting applies them hierarchically to the rows. Examples are {"score": SortMode.ASCENDING} (equivalently {"score": +1}), {"score": +1, "when": -1}. Note that, depending on the column(s) chosen for sorting, the table partitioning structure, and the presence of indexes, the sorting may be done in-memory by the API. In that case, there may be performance implications and limitations on the amount of items returned. Consult the Data API documentation for more details on this topic.
request_timeout_ms: a timeout, in milliseconds, to impose on each individual HTTP request to the Data API to accomplish the operation. If not provided, this object's defaults apply.
timeout_ms: an alias for request_timeout_ms.

Returns

a TableFindCursor object, that can be iterated over (and manipulated in several ways). The cursor, if needed, handles pagination under the hood as the rows are consumed.

Note

Examples

>>> # Iterate over results:
>>> for row in my_table.find({"match_id": "challenge6"}):
...     print(f"(R:{row['round']}): winner {row['winner']}")
...
(R:1): winner Donna
(R:2): winner Erick
(R:3): winner Fiona
>>> # Optimize bandwidth using a projection:
>>> proj = {"round": True, "winner": True}
>>> for row in my_table.find({"match_id": "challenge6"}, projection=proj):
...     print(f"(R:{row['round']}): winner {row['winner']}")
...
(R:1): winner Donna
(R:2): winner Erick
(R:3): winner Fiona
>>> # Filter on the partitioning:
>>> my_table.find({"match_id": "challenge6"}).to_list()
[{'match_id': 'challenge6', 'round': 1, 'fighters': DataAPISet([]), ...
>>>
>>> # Filter on primary key:
>>> my_table.find({"match_id": "challenge6", "round": 1}).to_list()
[{'match_id': 'challenge6', 'round': 1, 'fighters': DataAPISet([]), ...
>>>
>>> # Filter on a regular indexed column:
>>> my_table.find({"winner": "Caio Gozer"}).to_list()
[{'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
>>>
>>> # Non-equality filter on a regular indexed column:
>>> my_table.find({"score": {"$gte": 15}}).to_list()
[{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
>>>
>>> # Filter on a regular non-indexed column:
>>> # (not recommended performance-wise)
>>> my_table.find(
...     {"when": {
...         "$gte": DataAPITimestamp.from_string("1999-12-31T01:23:44Z")
...     }}
... ).to_list()
The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
[{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
>>>
>>> # Empty filter (not recommended performance-wise):
>>> my_table.find({}).to_list()
The Data API returned a warning: {'errorCode': 'ZERO_FILTER_OPERATIONS', ...
[{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
>>>
>>> # Filter on the primary key and a regular non-indexed column:
>>> # (not recommended performance-wise)
>>> my_table.find(
...     {"match_id": "fight5", "round": 3, "winner": "Caio Gozer"}
... ).to_list()
The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
[{'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
>>>
>>> # Filter on a regular non-indexed column (and incomplete primary key)
>>> # (not recommended performance-wise)
>>> my_table.find({"round": 3, "winner": "Caio Gozer"}).to_list()
The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
[{'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
>>>
>>> # Vector search with "sort" (on an appropriately-indexed vector column):
>>> my_table.find(
...     {},
...     sort={"m_vector": DataAPIVector([0.2, 0.3, 0.4])},
...     projection={"winner": True},
...     limit=3,
... ).to_list()
[{'winner': 'Donna'}, {'winner': 'Victor'}]
>>>
>>> # Hybrid search with vector sort and non-vector filtering:
>>> my_table.find(
...     {"match_id": "fight4"},
...     sort={"m_vector": DataAPIVector([0.2, 0.3, 0.4])},
...     projection={"winner": True},
...     limit=3,
... ).to_list()
[{'winner': 'Victor'}]
>>>
>>> # Return the numeric value of the vector similarity
>>> # (also demonstrating that one can pass a plain list for a vector):
>>> my_table.find(
...     {},
...     sort={"m_vector": [0.2, 0.3, 0.4]},
...     projection={"winner": True},
...     limit=3,
...     include_similarity=True,
... ).to_list()
[{'winner': 'Donna', '$similarity': 0.515}, {'winner': 'Victor', ...
>>>
>>> # Non-vector sorting on a 'partitionSort' column:
>>> my_table.find(
...     {"match_id": "fight5"},
...     sort={"round": SortMode.DESCENDING},
...     projection={"winner": True},
... ).to_list()
[{'winner': 'Caio Gozer'}, {'winner': 'Betta Vigo'}, ...
>>>
>>> # Using `skip` and <code>limit</code>:
>>> my_table.find(
...     {"match_id": "fight5"},
...     sort={"round": SortMode.DESCENDING},
...     projection={"winner": True},
...     skip=1,
...     limit=2,
... ).to_list()
The Data API returned a warning: {'errorCode': 'IN_MEMORY_SORTING...
[{'winner': 'Betta Vigo'}, {'winner': 'Adam Zuul'}]
>>>
>>> # Non-vector sorting on a regular column:
>>> # (not recommended performance-wise)
>>> my_table.find(
...     {"match_id": "fight5"},
...     sort={"winner": SortMode.ASCENDING},
...     projection={"winner": True},
... ).to_list()
The Data API returned a warning: {'errorCode': 'IN_MEMORY_SORTING...
[{'winner': 'Adam Zuul'}, {'winner': 'Betta Vigo'}, ...
>>>
>>> # Using `.map()` on a cursor:
>>> winner_cursor = my_table.find(
...     {"match_id": "fight5"},
...     sort={"round": SortMode.DESCENDING},
...     projection={"winner": True},
...     limit=5,
... )
>>> print("/".join(winner_cursor.map(lambda row: row["winner"].upper())))
CAIO GOZER/BETTA VIGO/ADAM ZUUL
>>>
>>> # Some other examples of cursor manipulation
>>> matches_cursor = my_table.find(
...     sort={"m_vector": DataAPIVector([-0.1, 0.15, 0.3])}
... )
>>> matches_cursor.has_next()
True
>>> next(matches_cursor)
{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
>>> matches_cursor.consumed
1
>>> matches_cursor.rewind()
>>> matches_cursor.consumed
0
>>> matches_cursor.has_next()
True
>>> matches_cursor.close()
>>> try:
...     next(matches_cursor)
... except StopIteration:
...     print("StopIteration triggered.")
...
StopIteration triggered.

Expand source code

def find(
    self,
    filter: FilterType | None = None,
    *,
    projection: ProjectionType | None = None,
    row_type: type[ROW2] | None = None,
    skip: int | None = None,
    limit: int | None = None,
    include_similarity: bool | None = None,
    include_sort_vector: bool | None = None,
    sort: SortType | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> TableFindCursor[ROW, ROW2]:
    """
    Find rows on the table matching the provided filters
    and according to sorting criteria including vector similarity.

    The returned TableFindCursor object, representing the stream of results,
    can be iterated over, or consumed and manipulated in several other ways
    (see the examples below and the `TableFindCursor` documentation for details).
    Since the amount of returned items can be large, TableFindCursor is a lazy
    object, that fetches new data while it is being read using the Data API
    pagination mechanism.

    Invoking `.to_list()` on a TableFindCursor will cause it to consume all
    rows and materialize the entire result set as a list. This is not recommended
    if the amount of results is very large.

    Args:
        filter: a dictionary expressing which condition the returned rows
            must satisfy. The filter can use operators, such as "$eq" for equality,
            and require columns to compare with literal values. Simple examples
            are `{}` (zero filter, not recommended for large tables),
            `{"match_no": 123}` (a shorthand for `{"match_no": {"$eq": 123}}`,
            or `{"match_no": 123, "round": "C"}` (multiple conditions are
            implicitly combined with "$and").
            Please consult the Data API documentation for a more detailed
            explanation of table search filters and tips on their usage.
        projection: a prescription on which columns to return for the matching rows.
            The projection can take the form `{"column1": True, "column2": True}`.
            `{"*": True}` (i.e. return the whole row), or the complementary
            form that excludes columns: `{"column1": False, "column2": False}`.
            To optimize bandwidth usage, it is recommended to use a projection,
            especially to avoid unnecessary columns of type vector with
            high-dimensional embeddings.
        row_type: this parameter acts a formal specifier for the type checker.
            If omitted, the resulting cursor is implicitly a
            `TableFindCursor[ROW, ROW]`, i.e. maintains the same type for
            the items it returns as that for the rows in the table. Strictly
            typed code may want to specify this parameter especially when a
            projection is given.
        skip: if provided, it is a number of rows that would be obtained first
            in the response and are instead skipped.
        limit: a maximum amount of rows to get from the table. The returned cursor
            will stop yielding rows when either this number is reached or there
            really are no more matches in the table.
        include_similarity: a boolean to request the numeric value of the
            similarity to be returned as an added "$similarity" key in each returned
            row. It can be used meaningfully only in a vector search (see `sort`).
        include_sort_vector: a boolean to request the search query vector.
            If set to True (and if the search is a vector search), calling
            the `get_sort_vector` method on the returned cursor will yield
            the vector used for the ANN search.
        sort: this dictionary parameter controls the order in which the rows
            are returned. The sort parameter can express either a vector search or
            a regular (ascending/descending, even hierarchical) sorting.
            * For a vector search the parameter takes the form
            `{"vector_column": qv}`, with the query vector `qv` of the appropriate
            type (list of floats or DataAPIVector). If the table has automatic
            embedding generation ("vectorize") enabled on that column, the form
            `{"vectorize_enabled_column": "query text"}` is also valid.
            * In the case of non-vector sorting, the parameter specifies the
            column(s) and the ascending/descending ordering required.
            If multiple columns are provided, the sorting applies them
            hierarchically to the rows. Examples are `{"score": SortMode.ASCENDING}`
            (equivalently `{"score": +1}`), `{"score": +1, "when": -1}`.
            Note that, depending on the column(s) chosen for sorting, the table
            partitioning structure, and the presence of indexes, the sorting
            may be done in-memory by the API. In that case, there may be performance
            implications and limitations on the amount of items returned.
            Consult the Data API documentation for more details on this topic.
        request_timeout_ms: a timeout, in milliseconds, to impose on each
            individual HTTP request to the Data API to accomplish the operation.
            If not provided, this object's defaults apply.
        timeout_ms: an alias for `request_timeout_ms`.

    Returns:
        a TableFindCursor object, that can be iterated over (and manipulated
        in several ways). The cursor, if needed, handles pagination under the hood
        as the rows are consumed.

    Note:
        As the rows are retrieved in chunks progressively, while the cursor
        is being iterated over, it is possible that the actual results
        obtained will reflect changes occurring to the table contents in
        real time.

    Examples:
        >>> # Iterate over results:
        >>> for row in my_table.find({"match_id": "challenge6"}):
        ...     print(f"(R:{row['round']}): winner {row['winner']}")
        ...
        (R:1): winner Donna
        (R:2): winner Erick
        (R:3): winner Fiona
        >>> # Optimize bandwidth using a projection:
        >>> proj = {"round": True, "winner": True}
        >>> for row in my_table.find({"match_id": "challenge6"}, projection=proj):
        ...     print(f"(R:{row['round']}): winner {row['winner']}")
        ...
        (R:1): winner Donna
        (R:2): winner Erick
        (R:3): winner Fiona
        >>> # Filter on the partitioning:
        >>> my_table.find({"match_id": "challenge6"}).to_list()
        [{'match_id': 'challenge6', 'round': 1, 'fighters': DataAPISet([]), ...
        >>>
        >>> # Filter on primary key:
        >>> my_table.find({"match_id": "challenge6", "round": 1}).to_list()
        [{'match_id': 'challenge6', 'round': 1, 'fighters': DataAPISet([]), ...
        >>>
        >>> # Filter on a regular indexed column:
        >>> my_table.find({"winner": "Caio Gozer"}).to_list()
        [{'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
        >>>
        >>> # Non-equality filter on a regular indexed column:
        >>> my_table.find({"score": {"$gte": 15}}).to_list()
        [{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
        >>>
        >>> # Filter on a regular non-indexed column:
        >>> # (not recommended performance-wise)
        >>> my_table.find(
        ...     {"when": {
        ...         "$gte": DataAPITimestamp.from_string("1999-12-31T01:23:44Z")
        ...     }}
        ... ).to_list()
        The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
        [{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
        >>>
        >>> # Empty filter (not recommended performance-wise):
        >>> my_table.find({}).to_list()
        The Data API returned a warning: {'errorCode': 'ZERO_FILTER_OPERATIONS', ...
        [{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
        >>>
        >>> # Filter on the primary key and a regular non-indexed column:
        >>> # (not recommended performance-wise)
        >>> my_table.find(
        ...     {"match_id": "fight5", "round": 3, "winner": "Caio Gozer"}
        ... ).to_list()
        The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
        [{'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
        >>>
        >>> # Filter on a regular non-indexed column (and incomplete primary key)
        >>> # (not recommended performance-wise)
        >>> my_table.find({"round": 3, "winner": "Caio Gozer"}).to_list()
        The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
        [{'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
        >>>
        >>> # Vector search with "sort" (on an appropriately-indexed vector column):
        >>> my_table.find(
        ...     {},
        ...     sort={"m_vector": DataAPIVector([0.2, 0.3, 0.4])},
        ...     projection={"winner": True},
        ...     limit=3,
        ... ).to_list()
        [{'winner': 'Donna'}, {'winner': 'Victor'}]
        >>>
        >>> # Hybrid search with vector sort and non-vector filtering:
        >>> my_table.find(
        ...     {"match_id": "fight4"},
        ...     sort={"m_vector": DataAPIVector([0.2, 0.3, 0.4])},
        ...     projection={"winner": True},
        ...     limit=3,
        ... ).to_list()
        [{'winner': 'Victor'}]
        >>>
        >>> # Return the numeric value of the vector similarity
        >>> # (also demonstrating that one can pass a plain list for a vector):
        >>> my_table.find(
        ...     {},
        ...     sort={"m_vector": [0.2, 0.3, 0.4]},
        ...     projection={"winner": True},
        ...     limit=3,
        ...     include_similarity=True,
        ... ).to_list()
        [{'winner': 'Donna', '$similarity': 0.515}, {'winner': 'Victor', ...
        >>>
        >>> # Non-vector sorting on a 'partitionSort' column:
        >>> my_table.find(
        ...     {"match_id": "fight5"},
        ...     sort={"round": SortMode.DESCENDING},
        ...     projection={"winner": True},
        ... ).to_list()
        [{'winner': 'Caio Gozer'}, {'winner': 'Betta Vigo'}, ...
        >>>
        >>> # Using `skip` and `limit`:
        >>> my_table.find(
        ...     {"match_id": "fight5"},
        ...     sort={"round": SortMode.DESCENDING},
        ...     projection={"winner": True},
        ...     skip=1,
        ...     limit=2,
        ... ).to_list()
        The Data API returned a warning: {'errorCode': 'IN_MEMORY_SORTING...
        [{'winner': 'Betta Vigo'}, {'winner': 'Adam Zuul'}]
        >>>
        >>> # Non-vector sorting on a regular column:
        >>> # (not recommended performance-wise)
        >>> my_table.find(
        ...     {"match_id": "fight5"},
        ...     sort={"winner": SortMode.ASCENDING},
        ...     projection={"winner": True},
        ... ).to_list()
        The Data API returned a warning: {'errorCode': 'IN_MEMORY_SORTING...
        [{'winner': 'Adam Zuul'}, {'winner': 'Betta Vigo'}, ...
        >>>
        >>> # Using `.map()` on a cursor:
        >>> winner_cursor = my_table.find(
        ...     {"match_id": "fight5"},
        ...     sort={"round": SortMode.DESCENDING},
        ...     projection={"winner": True},
        ...     limit=5,
        ... )
        >>> print("/".join(winner_cursor.map(lambda row: row["winner"].upper())))
        CAIO GOZER/BETTA VIGO/ADAM ZUUL
        >>>
        >>> # Some other examples of cursor manipulation
        >>> matches_cursor = my_table.find(
        ...     sort={"m_vector": DataAPIVector([-0.1, 0.15, 0.3])}
        ... )
        >>> matches_cursor.has_next()
        True
        >>> next(matches_cursor)
        {'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
        >>> matches_cursor.consumed
        1
        >>> matches_cursor.rewind()
        >>> matches_cursor.consumed
        0
        >>> matches_cursor.has_next()
        True
        >>> matches_cursor.close()
        >>> try:
        ...     next(matches_cursor)
        ... except StopIteration:
        ...     print("StopIteration triggered.")
        ...
        StopIteration triggered.
    """

    # lazy-import here to avoid circular import issues
    from astrapy.cursors import TableFindCursor

    _request_timeout_ms, _rt_label = _first_valid_timeout(
        (request_timeout_ms, "request_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
    )
    return (
        TableFindCursor(
            table=self,
            request_timeout_ms=_request_timeout_ms,
            overall_timeout_ms=None,
            request_timeout_label=_rt_label,
        )
        .filter(filter)
        .project(projection)
        .skip(skip)
        .limit(limit)
        .sort(sort)
        .include_similarity(include_similarity)
        .include_sort_vector(include_sort_vector)
    )

Run a search according to the given filtering and sorting criteria and return the top row matching it, or nothing if there are none.

The parameters are analogous to some of the parameters to the find method (which has a few more that do not make sense in this case, such as limit).

Args

filter: a dictionary expressing which condition the returned row must satisfy. The filter can use operators, such as "$eq" for equality, and require columns to compare with literal values. Simple examples are {} (zero filter), {"match_no": 123} (a shorthand for {"match_no": {"$eq": 123}}, or {"match_no": 123, "round": "C"} (multiple conditions are implicitly combined with "$and"). Please consult the Data API documentation for a more detailed explanation of table search filters and tips on their usage.
projection: a prescription on which columns to return for the matching row. The projection can take the form {"column1": True, "column2": True}. {"*": True} (i.e. return the whole row), or the complementary form that excludes columns: {"column1": False, "column2": False}. To optimize bandwidth usage, it is recommended to use a projection, especially to avoid unnecessary columns of type vector with high-dimensional embeddings.
include_similarity: a boolean to request the numeric value of the similarity to be returned as an added "$similarity" key in the returned row. It can be used meaningfully only in a vector search (see sort).
sort: this dictionary parameter controls the sorting order, hence determines which row is being returned. The sort parameter can express either a vector search or a regular (ascending/descending, even hierarchical) sorting. * For a vector search the parameter takes the form {"vector_column": qv}, with the query vector qv of the appropriate type (list of floats or DataAPIVector). If the table has automatic embedding generation ("vectorize") enabled on that column, the form {"vectorize_enabled_column": "query text"} is also valid. * In the case of non-vector sorting, the parameter specifies the column(s) and the ascending/descending ordering required. If multiple columns are provided, the sorting applies them hierarchically to the rows. Examples are {"score": SortMode.ASCENDING} (equivalently {"score": +1}), {"score": +1, "when": -1}. Note that, depending on the column(s) chosen for sorting, the table partitioning structure, and the presence of indexes, the sorting may be done in-memory by the API. In that case, there may be performance implications. Consult the Data API documentation for more details on this topic.
general_method_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for general_method_timeout_ms.
timeout_ms: an alias for general_method_timeout_ms.

Returns

a dictionary expressing the result if a row is found, otherwise None.

Examples

>>> from astrapy.constants import SortMode
>>> from astrapy.data_types import DataAPITimestamp, DataAPIVector
>>>
>>> # Filter on the partitioning:
>>> my_table.find_one({"match_id": "challenge6"})
{'match_id': 'challenge6', 'round': 1, 'fighters': DataAPISet([]), ...
>>>
>>> # A find with no matches:
>>> str(my_table.find_one({"match_id": "not_real"}))
'None'
>>>
>>> # Optimize bandwidth using a projection:
>>> my_table.find_one(
...     {"match_id": "challenge6"},
...     projection={"round": True, "winner": True},
... )
{'round': 1, 'winner': 'Donna'}
>>>
>>> # Filter on primary key:
>>> my_table.find_one({"match_id": "challenge6", "round": 1})
{'match_id': 'challenge6', 'round': 1, 'fighters': DataAPISet([]), ...
>>>
>>> # Filter on a regular indexed column:
>>> my_table.find_one({"winner": "Caio Gozer"})
{'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
>>>
>>> # Non-equality filter on a regular indexed column:
>>> my_table.find_one({"score": {"$gte": 15}})
{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
>>>
>>> # Filter on a regular non-indexed column:
>>> # (not recommended performance-wise)
>>> my_table.find_one(
...     {"when": {
...         "$gte": DataAPITimestamp.from_string("1999-12-31T01:23:44Z")
...     }}
... )
The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
>>>
>>> # Empty filter:
>>> my_table.find_one({})
The Data API returned a warning: {'errorCode': 'ZERO_FILTER_OPERATIONS', ...
{'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
>>>
>>> # Filter on the primary key and a regular non-indexed column:
>>> # (not recommended performance-wise)
>>> my_table.find_one(
...     {"match_id": "fight5", "round": 3, "winner": "Caio Gozer"}
... )
The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
{'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
>>>
>>> # Filter on a regular non-indexed column (and incomplete primary key)
>>> # (not recommended performance-wise)
>>> my_table.find_one({"round": 3, "winner": "Caio Gozer"})
The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
{'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
>>>
>>> # Vector search with "sort" (on an appropriately-indexed vector column):
>>> my_table.find_one(
...     {},
...     sort={"m_vector": DataAPIVector([0.2, 0.3, 0.4])},
...     projection={"winner": True},
... )
{'winner': 'Donna'}
>>>
>>> # Hybrid search with vector sort and non-vector filtering:
>>> my_table.find_one(
...     {"match_id": "fight4"},
...     sort={"m_vector": DataAPIVector([0.2, 0.3, 0.4])},
...     projection={"winner": True},
... )
{'winner': 'Victor'}
>>>
>>> # Return the numeric value of the vector similarity
>>> # (also demonstrating that one can pass a plain list for a vector):
>>> my_table.find_one(
...     {},
...     sort={"m_vector": [0.2, 0.3, 0.4]},
...     projection={"winner": True},
...     include_similarity=True,
... )
{'winner': 'Donna', '$similarity': 0.515}
>>>
>>> # Non-vector sorting on a 'partitionSort' column:
>>> my_table.find_one(
...     {"match_id": "fight5"},
...     sort={"round": SortMode.DESCENDING},
...     projection={"winner": True},
... )
{'winner': 'Caio Gozer'}
>>>
>>> # Non-vector sorting on a regular column:
>>> # (not recommended performance-wise)
>>> my_table.find_one(
...     {"match_id": "fight5"},
...     sort={"winner": SortMode.ASCENDING},
...     projection={"winner": True},
... )
The Data API returned a warning: {'errorCode': 'IN_MEMORY_SORTING...
{'winner': 'Adam Zuul'}

Expand source code

def find_one(
    self,
    filter: FilterType | None = None,
    *,
    projection: ProjectionType | None = None,
    include_similarity: bool | None = None,
    sort: SortType | None = None,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> ROW | None:
    """
    Run a search according to the given filtering and sorting criteria
    and return the top row matching it, or nothing if there are none.

    The parameters are analogous to some of the parameters to the `find` method
    (which has a few more that do not make sense in this case, such as `limit`).

    Args:
        filter: a dictionary expressing which condition the returned row
            must satisfy. The filter can use operators, such as "$eq" for equality,
            and require columns to compare with literal values. Simple examples
            are `{}` (zero filter), `{"match_no": 123}` (a shorthand for
            `{"match_no": {"$eq": 123}}`, or `{"match_no": 123, "round": "C"}`
            (multiple conditions are implicitly combined with "$and").
            Please consult the Data API documentation for a more detailed
            explanation of table search filters and tips on their usage.
        projection: a prescription on which columns to return for the matching row.
            The projection can take the form `{"column1": True, "column2": True}`.
            `{"*": True}` (i.e. return the whole row), or the complementary
            form that excludes columns: `{"column1": False, "column2": False}`.
            To optimize bandwidth usage, it is recommended to use a projection,
            especially to avoid unnecessary columns of type vector with
            high-dimensional embeddings.
        include_similarity: a boolean to request the numeric value of the
            similarity to be returned as an added "$similarity" key in the returned
            row. It can be used meaningfully only in a vector search (see `sort`).
        sort: this dictionary parameter controls the sorting order, hence determines
            which row is being returned.
            The sort parameter can express either a vector search or
            a regular (ascending/descending, even hierarchical) sorting.
            * For a vector search the parameter takes the form
            `{"vector_column": qv}`, with the query vector `qv` of the appropriate
            type (list of floats or DataAPIVector). If the table has automatic
            embedding generation ("vectorize") enabled on that column, the form
            `{"vectorize_enabled_column": "query text"}` is also valid.
            * In the case of non-vector sorting, the parameter specifies the
            column(s) and the ascending/descending ordering required.
            If multiple columns are provided, the sorting applies them
            hierarchically to the rows. Examples are `{"score": SortMode.ASCENDING}`
            (equivalently `{"score": +1}`), `{"score": +1, "when": -1}`.
            Note that, depending on the column(s) chosen for sorting, the table
            partitioning structure, and the presence of indexes, the sorting
            may be done in-memory by the API. In that case, there may be performance
            implications.
            Consult the Data API documentation for more details on this topic.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a dictionary expressing the result if a row is found, otherwise None.

    Examples:
        >>> from astrapy.constants import SortMode
        >>> from astrapy.data_types import DataAPITimestamp, DataAPIVector
        >>>
        >>> # Filter on the partitioning:
        >>> my_table.find_one({"match_id": "challenge6"})
        {'match_id': 'challenge6', 'round': 1, 'fighters': DataAPISet([]), ...
        >>>
        >>> # A find with no matches:
        >>> str(my_table.find_one({"match_id": "not_real"}))
        'None'
        >>>
        >>> # Optimize bandwidth using a projection:
        >>> my_table.find_one(
        ...     {"match_id": "challenge6"},
        ...     projection={"round": True, "winner": True},
        ... )
        {'round': 1, 'winner': 'Donna'}
        >>>
        >>> # Filter on primary key:
        >>> my_table.find_one({"match_id": "challenge6", "round": 1})
        {'match_id': 'challenge6', 'round': 1, 'fighters': DataAPISet([]), ...
        >>>
        >>> # Filter on a regular indexed column:
        >>> my_table.find_one({"winner": "Caio Gozer"})
        {'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
        >>>
        >>> # Non-equality filter on a regular indexed column:
        >>> my_table.find_one({"score": {"$gte": 15}})
        {'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
        >>>
        >>> # Filter on a regular non-indexed column:
        >>> # (not recommended performance-wise)
        >>> my_table.find_one(
        ...     {"when": {
        ...         "$gte": DataAPITimestamp.from_string("1999-12-31T01:23:44Z")
        ...     }}
        ... )
        The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
        {'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
        >>>
        >>> # Empty filter:
        >>> my_table.find_one({})
        The Data API returned a warning: {'errorCode': 'ZERO_FILTER_OPERATIONS', ...
        {'match_id': 'fight4', 'round': 1, 'fighters': DataAPISet([UUID('0193...
        >>>
        >>> # Filter on the primary key and a regular non-indexed column:
        >>> # (not recommended performance-wise)
        >>> my_table.find_one(
        ...     {"match_id": "fight5", "round": 3, "winner": "Caio Gozer"}
        ... )
        The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
        {'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
        >>>
        >>> # Filter on a regular non-indexed column (and incomplete primary key)
        >>> # (not recommended performance-wise)
        >>> my_table.find_one({"round": 3, "winner": "Caio Gozer"})
        The Data API returned a warning: {'errorCode': 'MISSING_INDEX', ...
        {'match_id': 'fight5', 'round': 3, 'fighters': DataAPISet([]), ...
        >>>
        >>> # Vector search with "sort" (on an appropriately-indexed vector column):
        >>> my_table.find_one(
        ...     {},
        ...     sort={"m_vector": DataAPIVector([0.2, 0.3, 0.4])},
        ...     projection={"winner": True},
        ... )
        {'winner': 'Donna'}
        >>>
        >>> # Hybrid search with vector sort and non-vector filtering:
        >>> my_table.find_one(
        ...     {"match_id": "fight4"},
        ...     sort={"m_vector": DataAPIVector([0.2, 0.3, 0.4])},
        ...     projection={"winner": True},
        ... )
        {'winner': 'Victor'}
        >>>
        >>> # Return the numeric value of the vector similarity
        >>> # (also demonstrating that one can pass a plain list for a vector):
        >>> my_table.find_one(
        ...     {},
        ...     sort={"m_vector": [0.2, 0.3, 0.4]},
        ...     projection={"winner": True},
        ...     include_similarity=True,
        ... )
        {'winner': 'Donna', '$similarity': 0.515}
        >>>
        >>> # Non-vector sorting on a 'partitionSort' column:
        >>> my_table.find_one(
        ...     {"match_id": "fight5"},
        ...     sort={"round": SortMode.DESCENDING},
        ...     projection={"winner": True},
        ... )
        {'winner': 'Caio Gozer'}
        >>>
        >>> # Non-vector sorting on a regular column:
        >>> # (not recommended performance-wise)
        >>> my_table.find_one(
        ...     {"match_id": "fight5"},
        ...     sort={"winner": SortMode.ASCENDING},
        ...     projection={"winner": True},
        ... )
        The Data API returned a warning: {'errorCode': 'IN_MEMORY_SORTING...
        {'winner': 'Adam Zuul'}
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    fo_options = (
        None
        if include_similarity is None
        else {"includeSimilarity": include_similarity}
    )
    fo_payload = self._converter_agent.preprocess_payload(
        {
            "findOne": {
                k: v
                for k, v in {
                    "filter": filter,
                    "projection": normalize_optional_projection(projection),
                    "options": fo_options,
                    "sort": sort,
                }.items()
                if v is not None
            }
        },
        map2tuple_checker=None,
    )
    fo_response = self._api_commander.request(
        payload=fo_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    if "document" not in (fo_response.get("data") or {}):
        raise UnexpectedDataAPIResponseException(
            text="Response from findOne API command missing 'document'.",
            raw_response=fo_response,
        )
    if "projectionSchema" not in (fo_response.get("status") or {}):
        raise UnexpectedDataAPIResponseException(
            text="Response from findOne API command missing 'projectionSchema'.",
            raw_response=fo_response,
        )
    doc_response = fo_response["data"]["document"]
    if doc_response is None:
        return None
    return self._converter_agent.postprocess_row(
        fo_response["data"]["document"],
        columns_dict=fo_response["status"]["projectionSchema"],
        similarity_pseudocolumn="$similarity" if include_similarity else None,
    )

def info(self, *, database_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> TableInfo

Return information on the table. This should not be confused with the table definition (i.e. the schema).

Args

database_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying DevOps API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for database_admin_timeout_ms.
timeout_ms: an alias for database_admin_timeout_ms.

Returns

A TableInfo object for inspection.

Example

>>> # Note: output reformatted for clarity.
>>> my_table.info()
TableInfo(
    database_info=AstraDBDatabaseInfo(id=..., name=..., ...),
    keyspace='default_keyspace',
    name='games',
    full_name='default_keyspace.games'
)

Expand source code

def info(
    self,
    *,
    database_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> TableInfo:
    """
    Return information on the table. This should not be confused with the table
    definition (i.e. the schema).

    Args:
        database_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying DevOps API request.
            If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `database_admin_timeout_ms`.
        timeout_ms: an alias for `database_admin_timeout_ms`.

    Returns:
        A TableInfo object for inspection.

    Example:
        >>> # Note: output reformatted for clarity.
        >>> my_table.info()
        TableInfo(
            database_info=AstraDBDatabaseInfo(id=..., name=..., ...),
            keyspace='default_keyspace',
            name='games',
            full_name='default_keyspace.games'
        )
    """

    return TableInfo(
        database_info=self.database.info(
            database_admin_timeout_ms=database_admin_timeout_ms,
            request_timeout_ms=request_timeout_ms,
            timeout_ms=timeout_ms,
        ),
        keyspace=self.keyspace,
        name=self.name,
        full_name=self.full_name,
    )

def insert_many(self, rows: Iterable[ROW], *, ordered: bool = False, chunk_size: int | None = None, concurrency: int | None = None, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> TableInsertManyResult

Insert a number of rows into the table, with implied overwrite in case of primary key collision.

Args

rows: an iterable of dictionaries, each expressing a row to insert. Each row must at least fully specify the primary key column values, while any other column may be omitted if desired (in which case it is left as is on DB). The values for the various columns supplied in each row must be of the right data type for the insertion to succeed. Non-primary-key columns can also be explicitly set to null.
ordered: if False (default), the insertions can occur in arbitrary order and possibly concurrently. If True, they are processed sequentially. If there are no specific reasons against it, unordered insertions re to be preferred as they complete much faster.
chunk_size: how many rows to include in each single API request. Exceeding the server maximum allowed value results in an error. Leave it unspecified (recommended) to use the system default.
concurrency: maximum number of concurrent requests to the API at a given time. It cannot be more than one for ordered insertions.
general_method_timeout_ms: a timeout, in milliseconds, to impose on the whole operation, which may consist of several API requests. If not provided, this object's defaults apply.
request_timeout_ms: a timeout, in milliseconds, to impose on each individual HTTP request to the Data API to accomplish the operation. If not provided, this object's defaults apply.
timeout_ms: an alias for general_method_timeout_ms.

Returns

a TableInsertManyResult object, whose attributes are the primary key of the inserted rows both in the form of dictionaries and of tuples.

Examples

>>> # Insert complete and partial rows at once (concurrently)
>>> from astrapy.data_types import (
...     DataAPISet,
...     DataAPITimestamp,
...     DataAPIVector,
... )
>>> from astrapy.ids import UUID
>>>
>>> insert_result = my_table.insert_many(
...     [
...         {
...             "match_id": "fight4",
...             "round": 1,
...             "winner": "Victor",
...             "score": 18,
...             "when": DataAPITimestamp.from_string(
...                 "2024-11-28T11:30:00Z",
...             ),
...             "fighters": DataAPISet([
...                 UUID("0193539a-2770-8c09-a32a-111111111111"),
...                 UUID('019353e3-00b4-83f9-a127-222222222222'),
...             ]),
...             "m_vector": DataAPIVector([0.4, -0.6, 0.2]),
...         },
...         {"match_id": "fight5", "round": 1, "winner": "Adam"},
...         {"match_id": "fight5", "round": 2, "winner": "Betta"},
...         {"match_id": "fight5", "round": 3, "winner": "Caio"},
...         {
...             "match_id": "challenge6",
...             "round": 1,
...             "winner": "Donna",
...             "m_vector": [0.9, -0.1, -0.3],
...         },
...         {"match_id": "challenge6", "round": 2, "winner": "Erick"},
...         {"match_id": "challenge6", "round": 3, "winner": "Fiona"},
...         {"match_id": "tournamentA", "round": 1, "winner": "Gael"},
...         {"match_id": "tournamentA", "round": 2, "winner": "Hanna"},
...         {
...             "match_id": "tournamentA",
...             "round": 3,
...             "winner": "Ian",
...             "fighters": DataAPISet([
...                 UUID("0193539a-2770-8c09-a32a-111111111111"),
...             ]),
...         },
...         {"match_id": "fight7", "round": 1, "winner": "Joy"},
...         {"match_id": "fight7", "round": 2, "winner": "Kevin"},
...         {"match_id": "fight7", "round": 3, "winner": "Lauretta"},
...     ],
...     concurrency=10,
...     chunk_size=3,
... )
>>> insert_result.inserted_ids
[{'match_id': 'fight4', 'round': 1}, {'match_id': 'fight5', ...
>>> insert_result.inserted_id_tuples
[('fight4', 1), ('fight5', 1), ('fight5', 2), ('fight5', 3), ...
>>>
>>> # Ordered insertion
>>> # (would stop on first failure; predictable end result on DB)
>>> my_table.insert_many(
...     [
...         {"match_id": "fight5", "round": 1, "winner": "Adam0"},
...         {"match_id": "fight5", "round": 2, "winner": "Betta0"},
...         {"match_id": "fight5", "round": 3, "winner": "Caio0"},
...         {"match_id": "fight5", "round": 1, "winner": "Adam Zuul"},
...         {"match_id": "fight5", "round": 2, "winner": "Betta Vigo"},
...         {"match_id": "fight5", "round": 3, "winner": "Caio Gozer"},
...     ],
...     ordered=True,
... )
TableInsertManyResult(inserted_ids=[{'match_id': 'fight5', 'round': 1}, ...

Note

Unordered insertions are executed with some degree of concurrency, so it is usually better to prefer this mode unless the order in the row sequence is important.

Note

A failure mode for this command is related to certain faulty rows found among those to insert: validation may fail, for example, if the vector length does not match the table schema.

For an ordered insertion, the method will raise an exception at the first such faulty row – nevertheless, all rows processed until then will end up being written to the database.

Expand source code

def insert_many(
    self,
    rows: Iterable[ROW],
    *,
    ordered: bool = False,
    chunk_size: int | None = None,
    concurrency: int | None = None,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> TableInsertManyResult:
    """
    Insert a number of rows into the table,
    with implied overwrite in case of primary key collision.

    Inserting rows whose primary key correspond to entries alredy stored
    in the table has the effect of an in-place update: the rows are overwritten.
    However, if the rows being inserted are partially provided, i.e. some columns
    are not specified, these are left unchanged on the database. To explicitly
    reset them, specify their value as appropriate to their data type,
    i.e. `None`, `{}` or analogous.

    Args:
        rows: an iterable of dictionaries, each expressing a row to insert.
            Each row must at least fully specify the primary key column values,
            while any other column may be omitted if desired (in which case
            it is left as is on DB).
            The values for the various columns supplied in each row must
            be of the right data type for the insertion to succeed.
            Non-primary-key columns can also be explicitly set to null.
        ordered: if False (default), the insertions can occur in arbitrary order
            and possibly concurrently. If True, they are processed sequentially.
            If there are no specific reasons against it, unordered insertions
            re to be preferred as they complete much faster.
        chunk_size: how many rows to include in each single API request.
            Exceeding the server maximum allowed value results in an error.
            Leave it unspecified (recommended) to use the system default.
        concurrency: maximum number of concurrent requests to the API at
            a given time. It cannot be more than one for ordered insertions.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            whole operation, which may consist of several API requests.
            If not provided, this object's defaults apply.
        request_timeout_ms: a timeout, in milliseconds, to impose on each
            individual HTTP request to the Data API to accomplish the operation.
            If not provided, this object's defaults apply.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a TableInsertManyResult object, whose attributes are the primary key
        of the inserted rows both in the form of dictionaries and of tuples.

    Examples:
        >>> # Insert complete and partial rows at once (concurrently)
        >>> from astrapy.data_types import (
        ...     DataAPISet,
        ...     DataAPITimestamp,
        ...     DataAPIVector,
        ... )
        >>> from astrapy.ids import UUID
        >>>
        >>> insert_result = my_table.insert_many(
        ...     [
        ...         {
        ...             "match_id": "fight4",
        ...             "round": 1,
        ...             "winner": "Victor",
        ...             "score": 18,
        ...             "when": DataAPITimestamp.from_string(
        ...                 "2024-11-28T11:30:00Z",
        ...             ),
        ...             "fighters": DataAPISet([
        ...                 UUID("0193539a-2770-8c09-a32a-111111111111"),
        ...                 UUID('019353e3-00b4-83f9-a127-222222222222'),
        ...             ]),
        ...             "m_vector": DataAPIVector([0.4, -0.6, 0.2]),
        ...         },
        ...         {"match_id": "fight5", "round": 1, "winner": "Adam"},
        ...         {"match_id": "fight5", "round": 2, "winner": "Betta"},
        ...         {"match_id": "fight5", "round": 3, "winner": "Caio"},
        ...         {
        ...             "match_id": "challenge6",
        ...             "round": 1,
        ...             "winner": "Donna",
        ...             "m_vector": [0.9, -0.1, -0.3],
        ...         },
        ...         {"match_id": "challenge6", "round": 2, "winner": "Erick"},
        ...         {"match_id": "challenge6", "round": 3, "winner": "Fiona"},
        ...         {"match_id": "tournamentA", "round": 1, "winner": "Gael"},
        ...         {"match_id": "tournamentA", "round": 2, "winner": "Hanna"},
        ...         {
        ...             "match_id": "tournamentA",
        ...             "round": 3,
        ...             "winner": "Ian",
        ...             "fighters": DataAPISet([
        ...                 UUID("0193539a-2770-8c09-a32a-111111111111"),
        ...             ]),
        ...         },
        ...         {"match_id": "fight7", "round": 1, "winner": "Joy"},
        ...         {"match_id": "fight7", "round": 2, "winner": "Kevin"},
        ...         {"match_id": "fight7", "round": 3, "winner": "Lauretta"},
        ...     ],
        ...     concurrency=10,
        ...     chunk_size=3,
        ... )
        >>> insert_result.inserted_ids
        [{'match_id': 'fight4', 'round': 1}, {'match_id': 'fight5', ...
        >>> insert_result.inserted_id_tuples
        [('fight4', 1), ('fight5', 1), ('fight5', 2), ('fight5', 3), ...
        >>>
        >>> # Ordered insertion
        >>> # (would stop on first failure; predictable end result on DB)
        >>> my_table.insert_many(
        ...     [
        ...         {"match_id": "fight5", "round": 1, "winner": "Adam0"},
        ...         {"match_id": "fight5", "round": 2, "winner": "Betta0"},
        ...         {"match_id": "fight5", "round": 3, "winner": "Caio0"},
        ...         {"match_id": "fight5", "round": 1, "winner": "Adam Zuul"},
        ...         {"match_id": "fight5", "round": 2, "winner": "Betta Vigo"},
        ...         {"match_id": "fight5", "round": 3, "winner": "Caio Gozer"},
        ...     ],
        ...     ordered=True,
        ... )
        TableInsertManyResult(inserted_ids=[{'match_id': 'fight5', 'round': 1}, ...

    Note:
        Unordered insertions are executed with some degree of concurrency,
        so it is usually better to prefer this mode unless the order in the
        row sequence is important.

    Note:
        A failure mode for this command is related to certain faulty rows
        found among those to insert: validation may fail, for example, if the
        vector length does not match the table schema.

        For an ordered insertion, the method will raise an exception at
        the first such faulty row -- nevertheless, all rows processed
        until then will end up being written to the database.

        For unordered insertions, if the error stems from faulty rows
        the insertion proceeds until exhausting the input rows: then,
        an exception is raised -- and all insertable rows will have been
        written to the database, including those "after" the troublesome ones.

        Errors occurring during an insert_many operation, for that reason,
        may result in a `TableInsertManyException` being raised.
        This exception allows to inspect the list of row IDs that were
        successfully inserted, while accessing at the same time the underlying
        "root errors" that made the full method call to fail.
    """

    _general_method_timeout_ms, _gmt_label = _first_valid_timeout(
        (general_method_timeout_ms, "general_method_timeout_ms"),
        (timeout_ms, "timeout_ms"),
        (
            self.api_options.timeout_options.general_method_timeout_ms,
            "general_method_timeout_ms",
        ),
    )
    _request_timeout_ms, _rt_label = _first_valid_timeout(
        (request_timeout_ms, "request_timeout_ms"),
        (self.api_options.timeout_options.request_timeout_ms, "request_timeout_ms"),
    )
    if concurrency is None:
        if ordered:
            _concurrency = 1
        else:
            _concurrency = DEFAULT_INSERT_MANY_CONCURRENCY
    else:
        _concurrency = concurrency
    if _concurrency > 1 and ordered:
        raise ValueError("Cannot run ordered insert_many concurrently.")
    if chunk_size is None:
        _chunk_size = DEFAULT_INSERT_MANY_CHUNK_SIZE
    else:
        _chunk_size = chunk_size
    _rows = list(rows)
    logger.info(f"inserting {len(_rows)} rows in '{self.name}'")
    raw_results: list[dict[str, Any]] = []
    im_payloads: list[dict[str, Any] | None] = []
    timeout_manager = MultiCallTimeoutManager(
        overall_timeout_ms=_general_method_timeout_ms,
        timeout_label=_gmt_label,
    )
    if ordered:
        options = {"ordered": True, "returnDocumentResponses": True}
        inserted_ids: list[Any] = []
        inserted_id_tuples: list[Any] = []
        for i in range(0, len(_rows), _chunk_size):
            im_payload = self._converter_agent.preprocess_payload(
                {
                    "insertMany": {
                        "documents": _rows[i : i + _chunk_size],
                        "options": options,
                    },
                },
                map2tuple_checker=map2tuple_checker_insert_many,
            )
            logger.info(f"insertMany on '{self.name}'")
            chunk_response = self._api_commander.request(
                payload=im_payload,
                raise_api_errors=False,
                timeout_context=timeout_manager.remaining_timeout(
                    cap_time_ms=_request_timeout_ms,
                    cap_timeout_label=_rt_label,
                ),
            )
            logger.info(f"finished insertMany on '{self.name}'")
            # accumulate the results in this call
            chunk_inserted_ids, chunk_inserted_ids_tuples = (
                self._prepare_keys_from_status(chunk_response.get("status"))
            )
            inserted_ids += chunk_inserted_ids
            inserted_id_tuples += chunk_inserted_ids_tuples
            raw_results += [chunk_response]
            im_payloads += [im_payload]
            # if errors, quit early
            if chunk_response.get("errors", []):
                response_exception = DataAPIResponseException.from_response(
                    command=im_payload,
                    raw_response=chunk_response,
                )
                raise TableInsertManyException(
                    inserted_ids=inserted_ids,
                    inserted_id_tuples=inserted_id_tuples,
                    exceptions=[response_exception],
                )

        # return
        full_result = TableInsertManyResult(
            raw_results=raw_results,
            inserted_ids=inserted_ids,
            inserted_id_tuples=inserted_id_tuples,
        )
        logger.info(f"finished inserting {len(_rows)} rows in '{self.name}'")
        return full_result

    else:
        # unordered: concurrent or not, do all of them and parse the results
        options = {"ordered": False, "returnDocumentResponses": True}
        if _concurrency > 1:
            with ThreadPoolExecutor(max_workers=_concurrency) as executor:

                def _chunk_insertor(
                    row_chunk: list[dict[str, Any]],
                ) -> tuple[dict[str, Any] | None, dict[str, Any]]:
                    im_payload = self._converter_agent.preprocess_payload(
                        {
                            "insertMany": {
                                "documents": row_chunk,
                                "options": options,
                            },
                        },
                        map2tuple_checker=map2tuple_checker_insert_many,
                    )
                    logger.info(f"insertMany(chunk) on '{self.name}'")
                    im_response = self._api_commander.request(
                        payload=im_payload,
                        raise_api_errors=False,
                        timeout_context=timeout_manager.remaining_timeout(
                            cap_time_ms=_request_timeout_ms,
                            cap_timeout_label=_rt_label,
                        ),
                    )
                    logger.info(f"finished insertMany(chunk) on '{self.name}'")
                    return im_payload, im_response

                raw_pl_results_pairs = list(
                    executor.map(
                        _chunk_insertor,
                        (
                            _rows[i : i + _chunk_size]
                            for i in range(0, len(_rows), _chunk_size)
                        ),
                    )
                )
                if raw_pl_results_pairs:
                    im_payloads, raw_results = list(zip(*raw_pl_results_pairs))
                else:
                    im_payloads, raw_results = [], []

        else:
            for i in range(0, len(_rows), _chunk_size):
                im_payload = self._converter_agent.preprocess_payload(
                    {
                        "insertMany": {
                            "documents": _rows[i : i + _chunk_size],
                            "options": options,
                        },
                    },
                    map2tuple_checker=map2tuple_checker_insert_many,
                )
                logger.info(f"insertMany(chunk) on '{self.name}'")
                im_response = self._api_commander.request(
                    payload=im_payload,
                    raise_api_errors=False,
                    timeout_context=timeout_manager.remaining_timeout(
                        cap_time_ms=_request_timeout_ms,
                        cap_timeout_label=_rt_label,
                    ),
                )
                logger.info(f"finished insertMany(chunk) on '{self.name}'")
                raw_results.append(im_response)
                im_payloads.append(im_payload)
        # recast raw_results. Each response has its schema: unfold appropriately
        ids_and_tuples_per_chunk = [
            self._prepare_keys_from_status(chunk_response.get("status"))
            for chunk_response in raw_results
        ]
        inserted_ids = [
            inserted_id
            for chunk_ids, _ in ids_and_tuples_per_chunk
            for inserted_id in chunk_ids
        ]
        inserted_id_tuples = [
            inserted_id_tuple
            for _, chunk_id_tuples in ids_and_tuples_per_chunk
            for inserted_id_tuple in chunk_id_tuples
        ]
        # check-raise
        response_exceptions = [
            DataAPIResponseException.from_response(
                command=chunk_payload,
                raw_response=chunk_response,
            )
            for chunk_payload, chunk_response in zip(im_payloads, raw_results)
            if chunk_response.get("errors", [])
        ]
        if response_exceptions:
            raise TableInsertManyException(
                inserted_ids=inserted_ids,
                inserted_id_tuples=inserted_id_tuples,
                exceptions=response_exceptions,
            )

        # return
        full_result = TableInsertManyResult(
            raw_results=raw_results,
            inserted_ids=inserted_ids,
            inserted_id_tuples=inserted_id_tuples,
        )
        logger.info(f"finished inserting {len(_rows)} rows in '{self.name}'")
        return full_result

def insert_one(self, row: ROW, *, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> TableInsertOneResult

Insert a single row in the table, with implied overwrite in case of primary key collision.

Args

row: a dictionary expressing the row to insert. The primary key must be specified in full, while any other column may be omitted if desired (in which case it is left as is on DB). The values for the various columns supplied in the row must be of the right data type for the insertion to succeed. Non-primary-key columns can also be explicitly set to null.
general_method_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for general_method_timeout_ms.
timeout_ms: an alias for general_method_timeout_ms.

Returns

a TableInsertOneResult object, whose attributes are the primary key of the inserted row both in the form of a dictionary and of a tuple.

Examples

>>> # a full-row insert using astrapy's datatypes
>>> from astrapy.data_types import (
...     DataAPISet,
...     DataAPITimestamp,
...     DataAPIVector,
... )
>>> from astrapy.ids import UUID
>>>
>>> insert_result = my_table.insert_one(
...     {
...         "match_id": "mtch_0",
...         "round": 1,
...         "m_vector": DataAPIVector([0.4, -0.6, 0.2]),
...         "score": 18,
...         "when": DataAPITimestamp.from_string("2024-11-28T11:30:00Z"),
...         "winner": "Victor",
...         "fighters": DataAPISet([
...             UUID("0193539a-2770-8c09-a32a-111111111111"),
...         ]),
...     },
... )
>>> insert_result.inserted_id
{'match_id': 'mtch_0', 'round': 1}
>>> insert_result.inserted_id_tuple
('mtch_0', 1)
>>>
>>> # a partial-row (which in this case overwrites some of the values)
>>> my_table.insert_one(
...     {
...         "match_id": "mtch_0",
...         "round": 1,
...         "winner": "Victor Vector",
...         "fighters": DataAPISet([
...             UUID("0193539a-2770-8c09-a32a-111111111111"),
...             UUID("0193539a-2880-8875-9f07-222222222222"),
...         ]),
...     },
... )
TableInsertOneResult(inserted_id={'match_id': 'mtch_0', 'round': 1} ...
>>>
>>> # another insertion demonstrating standard-library datatypes in values
>>> import datetime
>>>
>>> my_table.insert_one(
...     {
...         "match_id": "mtch_0",
...         "round": 2,
...         "winner": "Angela",
...         "score": 25,
...         "when": datetime.datetime(
...             2024, 7, 13, 12, 55, 30, 889,
...             tzinfo=datetime.timezone.utc,
...         ),
...         "fighters": {
...             UUID("019353cb-8e01-8276-a190-333333333333"),
...         },
...         "m_vector": [0.4, -0.6, 0.2],
...     },
... )
TableInsertOneResult(inserted_id={'match_id': 'mtch_0', 'round': 2}, ...

Expand source code

def insert_one(
    self,
    row: ROW,
    *,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> TableInsertOneResult:
    """
    Insert a single row in the table,
    with implied overwrite in case of primary key collision.

    Inserting a row whose primary key correspond to an entry alredy stored
    in the table has the effect of an in-place update: the row is overwritten.
    However, if the row being inserted is partially provided, i.e. some columns
    are not specified, these are left unchanged on the database. To explicitly
    reset them, specify their value as appropriate to their data type,
    i.e. `None`, `{}` or analogous.

    Args:
        row: a dictionary expressing the row to insert. The primary key
            must be specified in full, while any other column may be omitted
            if desired (in which case it is left as is on DB).
            The values for the various columns supplied in the row must
            be of the right data type for the insertion to succeed.
            Non-primary-key columns can also be explicitly set to null.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a TableInsertOneResult object, whose attributes are the primary key
        of the inserted row both in the form of a dictionary and of a tuple.

    Examples:
        >>> # a full-row insert using astrapy's datatypes
        >>> from astrapy.data_types import (
        ...     DataAPISet,
        ...     DataAPITimestamp,
        ...     DataAPIVector,
        ... )
        >>> from astrapy.ids import UUID
        >>>
        >>> insert_result = my_table.insert_one(
        ...     {
        ...         "match_id": "mtch_0",
        ...         "round": 1,
        ...         "m_vector": DataAPIVector([0.4, -0.6, 0.2]),
        ...         "score": 18,
        ...         "when": DataAPITimestamp.from_string("2024-11-28T11:30:00Z"),
        ...         "winner": "Victor",
        ...         "fighters": DataAPISet([
        ...             UUID("0193539a-2770-8c09-a32a-111111111111"),
        ...         ]),
        ...     },
        ... )
        >>> insert_result.inserted_id
        {'match_id': 'mtch_0', 'round': 1}
        >>> insert_result.inserted_id_tuple
        ('mtch_0', 1)
        >>>
        >>> # a partial-row (which in this case overwrites some of the values)
        >>> my_table.insert_one(
        ...     {
        ...         "match_id": "mtch_0",
        ...         "round": 1,
        ...         "winner": "Victor Vector",
        ...         "fighters": DataAPISet([
        ...             UUID("0193539a-2770-8c09-a32a-111111111111"),
        ...             UUID("0193539a-2880-8875-9f07-222222222222"),
        ...         ]),
        ...     },
        ... )
        TableInsertOneResult(inserted_id={'match_id': 'mtch_0', 'round': 1} ...
        >>>
        >>> # another insertion demonstrating standard-library datatypes in values
        >>> import datetime
        >>>
        >>> my_table.insert_one(
        ...     {
        ...         "match_id": "mtch_0",
        ...         "round": 2,
        ...         "winner": "Angela",
        ...         "score": 25,
        ...         "when": datetime.datetime(
        ...             2024, 7, 13, 12, 55, 30, 889,
        ...             tzinfo=datetime.timezone.utc,
        ...         ),
        ...         "fighters": {
        ...             UUID("019353cb-8e01-8276-a190-333333333333"),
        ...         },
        ...         "m_vector": [0.4, -0.6, 0.2],
        ...     },
        ... )
        TableInsertOneResult(inserted_id={'match_id': 'mtch_0', 'round': 2}, ...
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    io_payload = self._converter_agent.preprocess_payload(
        {"insertOne": {"document": row}},
        map2tuple_checker=map2tuple_checker_insert_one,
    )
    logger.info(f"insertOne on '{self.name}'")
    io_response = self._api_commander.request(
        payload=io_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished insertOne on '{self.name}'")
    if "insertedIds" in io_response.get("status", {}):
        if not io_response["status"]["insertedIds"]:
            raise UnexpectedDataAPIResponseException(
                text="Response from insertOne API command has empty 'insertedIds'.",
                raw_response=io_response,
            )
        if not io_response["status"]["primaryKeySchema"]:
            raise UnexpectedDataAPIResponseException(
                text=(
                    "Response from insertOne API command has "
                    "empty 'primaryKeySchema'."
                ),
                raw_response=io_response,
            )
        inserted_id_list = io_response["status"]["insertedIds"][0]
        inserted_id_tuple, inserted_id = self._converter_agent.postprocess_key(
            inserted_id_list,
            primary_key_schema_dict=io_response["status"]["primaryKeySchema"],
        )
        return TableInsertOneResult(
            raw_results=[io_response],
            inserted_id=inserted_id,
            inserted_id_tuple=inserted_id_tuple,
        )
    else:
        raise UnexpectedDataAPIResponseException(
            text="Response from insertOne API command missing 'insertedIds'.",
            raw_response=io_response,
        )

def list_index_names(self, *, table_admin_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> list[str]

List the names of all indexes existing on this table.

Args

table_admin_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for table_admin_timeout_ms.
timeout_ms: an alias for table_admin_timeout_ms.

Returns

a list of the index names as strings, in no particular order.

Example

>>> my_table.list_index_names()
['m_vector_index', 'winner_index', 'score_index']

Expand source code

def list_index_names(
    self,
    *,
    table_admin_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> list[str]:
    """
    List the names of all indexes existing on this table.

    Args:
        table_admin_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `table_admin_timeout_ms`.
        timeout_ms: an alias for `table_admin_timeout_ms`.

    Returns:
        a list of the index names as strings, in no particular order.

    Example:
        >>> my_table.list_index_names()
        ['m_vector_index', 'winner_index', 'score_index']
    """

    _table_admin_timeout_ms, _ta_label = _select_singlereq_timeout_ta(
        timeout_options=self.api_options.timeout_options,
        table_admin_timeout_ms=table_admin_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    li_payload: dict[str, Any] = {"listIndexes": {"options": {}}}
    logger.info("listIndexes")
    li_response = self._api_commander.request(
        payload=li_payload,
        timeout_context=_TimeoutContext(
            request_ms=_table_admin_timeout_ms, label=_ta_label
        ),
    )
    if "indexes" not in li_response.get("status", {}):
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from listIndexes API command.",
            raw_response=li_response,
        )
    else:
        logger.info("finished listIndexes")
        return li_response["status"]["indexes"]  # type: ignore[no-any-return]

Create an AsyncTable from this one. Save for the arguments explicitly provided as overrides, everything else is kept identical to this table in the copy (the database is converted into an async object).

Args

embedding_api_key: optional API key(s) for interacting with the table. If an embedding service is configured, and this parameter is not None, each Data API call will include the necessary embedding-related headers as specified by this parameter. If a string is passed, it translates into the one "embedding api key" header (i.e. EmbeddingAPIKeyHeaderProvider). For some vectorize providers/models, if using header-based authentication, specialized subclasses of EmbeddingHeadersProvider should be supplied.
reranking_api_key: optional API key(s) for interacting with the table. If a reranker is configured for the table, and this parameter is not None, Data API calls will include the appropriate reranker-related headers according to this parameter. Reranker services may not necessarily require this setting (e.g. if the service needs no authentication, or one is configured as part of the table definition relying on a "shared secret"). If a string is passed, it is translated into an instance of RerankingAPIKeyHeaderProvider.
api_options: any additional options to set for the result, in the form of an APIOptions instance (where one can set just the needed attributes). In case the same setting is also provided as named parameter, the latter takes precedence.

Returns

the new copy, an AsyncTable instance.

Example

>>> asyncio.run(my_table.to_async().find_one(
...     {"match_id": "fight4"},
...     projection={"winner": True},
... ))
{"pk": 1, "column": "value}

Expand source code

def to_async(
    self: Table[ROW],
    *,
    embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
    reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
    api_options: APIOptions | UnsetType = _UNSET,
) -> AsyncTable[ROW]:
    """
    Create an AsyncTable from this one. Save for the arguments
    explicitly provided as overrides, everything else is kept identical
    to this table in the copy (the database is converted into
    an async object).

    Args:
        embedding_api_key: optional API key(s) for interacting with the table.
            If an embedding service is configured, and this parameter is not None,
            each Data API call will include the necessary embedding-related headers
            as specified by this parameter. If a string is passed, it translates
            into the one "embedding api key" header
            (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
            For some vectorize providers/models, if using header-based authentication,
            specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
            should be supplied.
        reranking_api_key: optional API key(s) for interacting with the table.
            If a reranker is configured for the table, and this parameter
            is not None, Data API calls will include the appropriate
            reranker-related headers according to this parameter. Reranker services
            may not necessarily require this setting (e.g. if the service needs no
            authentication, or one is configured as part of the table
            definition relying on a "shared secret").
            If a string is passed, it is translated into an instance of
            `astrapy.authentication.RerankingAPIKeyHeaderProvider`.
        api_options: any additional options to set for the result, in the form of
            an APIOptions instance (where one can set just the needed attributes).
            In case the same setting is also provided as named parameter,
            the latter takes precedence.

    Returns:
        the new copy, an AsyncTable instance.

    Example:
        >>> asyncio.run(my_table.to_async().find_one(
        ...     {"match_id": "fight4"},
        ...     projection={"winner": True},
        ... ))
        {"pk": 1, "column": "value}
    """

    arg_api_options = APIOptions(
        embedding_api_key=embedding_api_key,
        reranking_api_key=reranking_api_key,
    )
    final_api_options = self.api_options.with_override(api_options).with_override(
        arg_api_options
    )
    return AsyncTable(
        database=self.database.to_async(),
        name=self.name,
        keyspace=self.keyspace,
        api_options=final_api_options,
    )

def update_one(self, filter: FilterType, update: dict[str, Any], *, general_method_timeout_ms: int | None = None, request_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> None

Update a single document on the table, changing some or all of the columns, with the implicit behaviour of inserting a new row if no match is found.

Args

filter: a predicate expressing the table primary key in full, i.e. a dictionary defining values for all columns that form the primary key. An example may be {"match_id": "fight4", "round": 1}.
update: the update prescription to apply to the row, expressed as a dictionary conforming to the Data API syntax. The update operators for tables are $set and $unset (in particular, setting a column to None has the same effect as the $unset operator). Examples are {"$set": {"round": 12}} and {"$unset": {"winner": "", "score": ""}}. Note that the update operation cannot alter the primary key columns. See the Data API documentation for more details.
general_method_timeout_ms: a timeout, in milliseconds, to impose on the underlying API request. If not provided, this object's defaults apply. (This method issues a single API request, hence all timeout parameters are treated the same.)
request_timeout_ms: an alias for general_method_timeout_ms.
timeout_ms: an alias for general_method_timeout_ms.

Examples

>>> from astrapy.data_types import DataAPISet
>>>
>>> # Set a new value for a column
>>> my_table.update_one(
...     {"match_id": "fight4", "round": 1},
...     update={"$set": {"winner": "Winona"}},
... )
>>>
>>> # Set a new value for a column while unsetting another colum
>>> my_table.update_one(
...     {"match_id": "fight4", "round": 1},
...     update={"$set": {"winner": None, "score": 24}},
... )
>>>
>>> # Set a 'set' column to empty
>>> my_table.update_one(
...     {"match_id": "fight4", "round": 1},
...     update={"$set": {"fighters": DataAPISet()}},
... )
>>>
>>> # Set a 'set' column to empty using None
>>> my_table.update_one(
...     {"match_id": "fight4", "round": 1},
...     update={"$set": {"fighters": None}},
... )
>>>
>>> # Set a 'set' column to empty using a regular (empty) set
>>> my_table.update_one(
...     {"match_id": "fight4", "round": 1},
...     update={"$set": {"fighters": set()}},
... )
>>>
>>> # Set a 'set' column to empty using $unset
>>> my_table.update_one(
...     {"match_id": "fight4", "round": 1},
...     update={"$unset": {"fighters": None}},
... )
>>>
>>> # A non-existing primary key creates a new row
>>> my_table.update_one(
...     {"match_id": "bar_fight", "round": 4},
...     update={"$set": {"score": 8, "winner": "Jack"}},
... )
>>>
>>> # Delete column values for a row (they'll read as None now)
>>> my_table.update_one(
...     {"match_id": "challenge6", "round": 2},
...     update={"$unset": {"winner": None, "score": None}},
... )

Note

a row created entirely with update operations (as opposed to insertions) may, correspondingly, be deleted by means of an $unset update on all columns.

Expand source code

def update_one(
    self,
    filter: FilterType,
    update: dict[str, Any],
    *,
    general_method_timeout_ms: int | None = None,
    request_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Update a single document on the table, changing some or all of the columns,
    with the implicit behaviour of inserting a new row if no match is found.

    Args:
        filter: a predicate expressing the table primary key in full,
            i.e. a dictionary defining values for all columns that form the
            primary key. An example may be `{"match_id": "fight4", "round": 1}`.
        update: the update prescription to apply to the row, expressed
            as a dictionary conforming to the Data API syntax. The update
            operators for tables are `$set` and `$unset` (in particular,
            setting a column to None has the same effect as the $unset operator).
            Examples are `{"$set": {"round": 12}}` and
            `{"$unset": {"winner": "", "score": ""}}`.
            Note that the update operation cannot alter the primary key columns.
            See the Data API documentation for more details.
        general_method_timeout_ms: a timeout, in milliseconds, to impose on the
            underlying API request. If not provided, this object's defaults apply.
            (This method issues a single API request, hence all timeout parameters
            are treated the same.)
        request_timeout_ms: an alias for `general_method_timeout_ms`.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Examples:
        >>> from astrapy.data_types import DataAPISet
        >>>
        >>> # Set a new value for a column
        >>> my_table.update_one(
        ...     {"match_id": "fight4", "round": 1},
        ...     update={"$set": {"winner": "Winona"}},
        ... )
        >>>
        >>> # Set a new value for a column while unsetting another colum
        >>> my_table.update_one(
        ...     {"match_id": "fight4", "round": 1},
        ...     update={"$set": {"winner": None, "score": 24}},
        ... )
        >>>
        >>> # Set a 'set' column to empty
        >>> my_table.update_one(
        ...     {"match_id": "fight4", "round": 1},
        ...     update={"$set": {"fighters": DataAPISet()}},
        ... )
        >>>
        >>> # Set a 'set' column to empty using None
        >>> my_table.update_one(
        ...     {"match_id": "fight4", "round": 1},
        ...     update={"$set": {"fighters": None}},
        ... )
        >>>
        >>> # Set a 'set' column to empty using a regular (empty) set
        >>> my_table.update_one(
        ...     {"match_id": "fight4", "round": 1},
        ...     update={"$set": {"fighters": set()}},
        ... )
        >>>
        >>> # Set a 'set' column to empty using $unset
        >>> my_table.update_one(
        ...     {"match_id": "fight4", "round": 1},
        ...     update={"$unset": {"fighters": None}},
        ... )
        >>>
        >>> # A non-existing primary key creates a new row
        >>> my_table.update_one(
        ...     {"match_id": "bar_fight", "round": 4},
        ...     update={"$set": {"score": 8, "winner": "Jack"}},
        ... )
        >>>
        >>> # Delete column values for a row (they'll read as None now)
        >>> my_table.update_one(
        ...     {"match_id": "challenge6", "round": 2},
        ...     update={"$unset": {"winner": None, "score": None}},
        ... )

    Note:
        a row created entirely with update operations (as opposed to insertions)
        may, correspondingly, be deleted by means of an $unset update on all columns.
    """

    _request_timeout_ms, _rt_label = _select_singlereq_timeout_gm(
        timeout_options=self.api_options.timeout_options,
        general_method_timeout_ms=general_method_timeout_ms,
        request_timeout_ms=request_timeout_ms,
        timeout_ms=timeout_ms,
    )
    uo_payload = self._converter_agent.preprocess_payload(
        {
            "updateOne": {
                k: v
                for k, v in {
                    "filter": filter,
                    "update": update,
                }.items()
                if v is not None
            }
        },
        map2tuple_checker=map2tuple_checker_update_one,
    )
    logger.info(f"updateOne on '{self.name}'")
    uo_response = self._api_commander.request(
        payload=uo_payload,
        timeout_context=_TimeoutContext(
            request_ms=_request_timeout_ms, label=_rt_label
        ),
    )
    logger.info(f"finished updateOne on '{self.name}'")
    if "status" in uo_response:
        # the contents are disregarded and the method just returns:
        return
    else:
        raise UnexpectedDataAPIResponseException(
            text="Faulty response from updateOne API command.",
            raw_response=uo_response,
        )

Create a clone of this table with some changed attributes.

Args

embedding_api_key: optional API key(s) for interacting with the table. If an embedding service is configured, and this parameter is not None, each Data API call will include the necessary embedding-related headers as specified by this parameter. If a string is passed, it translates into the one "embedding api key" header (i.e. EmbeddingAPIKeyHeaderProvider). For some vectorize providers/models, if using header-based authentication, specialized subclasses of EmbeddingHeadersProvider should be supplied.
reranking_api_key: optional API key(s) for interacting with the table. If a reranker is configured for the table, and this parameter is not None, Data API calls will include the appropriate reranker-related headers according to this parameter. Reranker services may not necessarily require this setting (e.g. if the service needs no authentication, or one is configured as part of the table definition relying on a "shared secret"). If a string is passed, it is translated into an instance of RerankingAPIKeyHeaderProvider.
api_options: any additional options to set for the clone, in the form of an APIOptions instance (where one can set just the needed attributes). In case the same setting is also provided as named parameter, the latter takes precedence.

Returns

a new Table instance.

Example

>>> table_with_api_key_configured = my_table.with_options(
...     embedding_api_key="secret-key-0123abcd...",
... )

Expand source code

def with_options(
    self: Table[ROW],
    *,
    embedding_api_key: str | EmbeddingHeadersProvider | UnsetType = _UNSET,
    reranking_api_key: str | RerankingHeadersProvider | UnsetType = _UNSET,
    api_options: APIOptions | UnsetType = _UNSET,
) -> Table[ROW]:
    """
    Create a clone of this table with some changed attributes.

    Args:
        embedding_api_key: optional API key(s) for interacting with the table.
            If an embedding service is configured, and this parameter is not None,
            each Data API call will include the necessary embedding-related headers
            as specified by this parameter. If a string is passed, it translates
            into the one "embedding api key" header
            (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
            For some vectorize providers/models, if using header-based authentication,
            specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
            should be supplied.
        reranking_api_key: optional API key(s) for interacting with the table.
            If a reranker is configured for the table, and this parameter
            is not None, Data API calls will include the appropriate
            reranker-related headers according to this parameter. Reranker services
            may not necessarily require this setting (e.g. if the service needs no
            authentication, or one is configured as part of the table
            definition relying on a "shared secret").
            If a string is passed, it is translated into an instance of
            `astrapy.authentication.RerankingAPIKeyHeaderProvider`.
        api_options: any additional options to set for the clone, in the form of
            an APIOptions instance (where one can set just the needed attributes).
            In case the same setting is also provided as named parameter,
            the latter takes precedence.

    Returns:
        a new Table instance.

    Example:
        >>> table_with_api_key_configured = my_table.with_options(
        ...     embedding_api_key="secret-key-0123abcd...",
        ... )
    """

    return self._copy(
        embedding_api_key=embedding_api_key,
        reranking_api_key=reranking_api_key,
        api_options=api_options,
    )