Package astrapy

Expand source code
# Copyright DataStax, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

import importlib.metadata
import os

import toml


def get_version() -> str:
    try:
        # Poetry will create a __version__ attribute in the package's __init__.py file
        return importlib.metadata.version(__package__)

    # If the package is not installed, we can still get the version from the pyproject.toml file
    except importlib.metadata.PackageNotFoundError:
        # Get the path to the pyproject.toml file
        dir_path = os.path.dirname(os.path.realpath(__file__))
        pyproject_path = os.path.join(dir_path, "..", "pyproject.toml")

        # Read the pyproject.toml file and get the version from the poetry section
        try:
            with open(pyproject_path, encoding="utf-8") as pyproject:
                # Load the pyproject.toml file as a dictionary
                file_contents = pyproject.read()
                pyproject_data = toml.loads(file_contents)

                # Return the version from the poetry section
                return str(pyproject_data["tool"]["poetry"]["version"])

        # If the pyproject.toml file does not exist or the version is not found, return unknown
        except (FileNotFoundError, KeyError):
            return "unknown"


__version__: str = get_version()


import astrapy.constants  # noqa: E402
import astrapy.cursors  # noqa: E402
import astrapy.ids  # noqa: E402
import astrapy.operations  # noqa: F401, E402
from astrapy.admin import (  # noqa: E402
    AstraDBAdmin,
    AstraDBDatabaseAdmin,
    DataAPIDatabaseAdmin,
)
from astrapy.client import DataAPIClient  # noqa: E402
from astrapy.collection import AsyncCollection, Collection  # noqa: E402

# A circular-import issue requires this to happen at the end of this module:
from astrapy.database import AsyncDatabase, Database  # noqa: E402

__all__ = [
    "AstraDBAdmin",
    "AstraDBDatabaseAdmin",
    "AsyncCollection",
    "AsyncDatabase",
    "Collection",
    "Database",
    "DataAPIClient",
    "DataAPIDatabaseAdmin",
    "__version__",
]


__pdoc__ = {
    "api": False,
    "api_commander": False,
    "api_options": False,
    "core": False,
    "db": False,
    "defaults": False,
    "ids": False,
    "meta": False,
    "ops": False,
    "request_tools": False,
    "transform_payload": False,
    "user_agents": False,
}

Sub-modules

astrapy.admin
astrapy.authentication
astrapy.client
astrapy.collection
astrapy.constants
astrapy.cursors
astrapy.database
astrapy.exceptions
astrapy.info
astrapy.operations
astrapy.results

Classes

class AstraDBAdmin (token: str | TokenProvider | None = None, *, environment: str | None = None, callers: Sequence[CallerType] = [], caller_name: str | None = None, caller_version: str | None = None, dev_ops_url: str | None = None, dev_ops_api_version: str | None = None)

An "admin" object, able to perform administrative tasks at the databases level, such as creating, listing or dropping databases.

Args

token
an access token with enough permission to perform admin tasks. This can be either a literal token string or a subclass of TokenProvider.
environment
a label, whose value is one of Environment.PROD (default), Environment.DEV or Environment.TEST.
callers
a list of caller identities, i.e. applications, or frameworks, on behalf of which DevOps API calls are performed. These end up in the request user-agent. Each caller identity is a ("caller_name", "caller_version") pair.
caller_name
DEPRECATED, use callers. Removal 2.0. Name of the application, or framework, on behalf of which the DevOps API calls are performed. This ends up in the request user-agent.
caller_version
version of the caller. DEPRECATED, use callers. Removal 2.0.
dev_ops_url
in case of custom deployments, this can be used to specify the URL to the DevOps API, such as "https://api.astra.datastax.com". Generally it can be omitted. The environment (prod/dev/…) is determined from the API Endpoint.
dev_ops_api_version
this can specify a custom version of the DevOps API (such as "v2"). Generally not needed.

Example

>>> from astrapy import DataAPIClient
>>> my_client = DataAPIClient("AstraCS:...")
>>> my_astra_db_admin = my_client.get_admin()
>>> database_list = my_astra_db_admin.list_databases()
>>> len(database_list)
3
>>> database_list[2].id
'01234567-...'
>>> my_db_admin = my_astra_db_admin.get_database_admin("01234567-...")
>>> my_db_admin.list_keyspaces()
['default_keyspace', 'staging_keyspace']
Expand source code
class AstraDBAdmin:
    """
    An "admin" object, able to perform administrative tasks at the databases
    level, such as creating, listing or dropping databases.

    Args:
        token: an access token with enough permission to perform admin tasks.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        environment: a label, whose value is one of Environment.PROD (default),
            Environment.DEV or Environment.TEST.
        callers: a list of caller identities, i.e. applications, or frameworks,
            on behalf of which DevOps API calls are performed. These end up in
            the request user-agent.
            Each caller identity is a ("caller_name", "caller_version") pair.
        caller_name: *DEPRECATED*, use `callers`. Removal 2.0. Name of the
            application, or framework, on behalf of which the DevOps API calls
            are performed. This ends up in the request user-agent.
        caller_version: version of the caller. *DEPRECATED*, use `callers`. Removal 2.0.
        dev_ops_url: in case of custom deployments, this can be used to specify
            the URL to the DevOps API, such as "https://api.astra.datastax.com".
            Generally it can be omitted. The environment (prod/dev/...) is
            determined from the API Endpoint.
        dev_ops_api_version: this can specify a custom version of the DevOps API
            (such as "v2"). Generally not needed.

    Example:
        >>> from astrapy import DataAPIClient
        >>> my_client = DataAPIClient("AstraCS:...")
        >>> my_astra_db_admin = my_client.get_admin()
        >>> database_list = my_astra_db_admin.list_databases()
        >>> len(database_list)
        3
        >>> database_list[2].id
        '01234567-...'
        >>> my_db_admin = my_astra_db_admin.get_database_admin("01234567-...")
        >>> my_db_admin.list_keyspaces()
        ['default_keyspace', 'staging_keyspace']
    """

    def __init__(
        self,
        token: str | TokenProvider | None = None,
        *,
        environment: str | None = None,
        callers: Sequence[CallerType] = [],
        caller_name: str | None = None,
        caller_version: str | None = None,
        dev_ops_url: str | None = None,
        dev_ops_api_version: str | None = None,
    ) -> None:
        callers_param = check_caller_parameters(callers, caller_name, caller_version)
        self.token_provider = coerce_token_provider(token)
        self.environment = (environment or Environment.PROD).lower()
        if self.environment not in Environment.astra_db_values:
            raise ValueError("Environments outside of Astra DB are not supported.")
        if dev_ops_url is None:
            self.dev_ops_url = DEV_OPS_URL_ENV_MAP[self.environment]
        else:
            self.dev_ops_url = dev_ops_url
        self._dev_ops_url = dev_ops_url
        self._dev_ops_api_version = dev_ops_api_version

        self._dev_ops_commander_headers: dict[str, str | None]
        if self.token_provider:
            _token = self.token_provider.get_token()
            self._dev_ops_commander_headers = {
                DEFAULT_DEV_OPS_AUTH_HEADER: f"{DEFAULT_DEV_OPS_AUTH_PREFIX}{_token}",
            }
        else:
            self._dev_ops_commander_headers = {}

        self.callers = callers_param
        self._dev_ops_api_commander = self._get_dev_ops_api_commander()

    def __repr__(self) -> str:
        token_desc: str | None
        if self.token_provider:
            token_desc = f'"{redact_secret(str(self.token_provider), 15)}"'
        else:
            token_desc = None
        env_desc: str | None
        if self.environment == Environment.PROD:
            env_desc = None
        else:
            env_desc = f'environment="{self.environment}"'
        parts = [pt for pt in [token_desc, env_desc] if pt is not None]
        return f"{self.__class__.__name__}({', '.join(parts)})"

    def __eq__(self, other: Any) -> bool:
        if isinstance(other, AstraDBAdmin):
            return all(
                [
                    self.token_provider == other.token_provider,
                    self.environment == other.environment,
                    self.dev_ops_url == other.dev_ops_url,
                    self.dev_ops_url == other.dev_ops_url,
                    self.callers == other.callers,
                    self._dev_ops_url == other._dev_ops_url,
                    self._dev_ops_api_version == other._dev_ops_api_version,
                    self._dev_ops_api_commander == other._dev_ops_api_commander,
                ]
            )
        else:
            return False

    def _get_dev_ops_api_commander(self) -> APICommander:
        """Instantiate a new APICommander based on the properties of this class."""

        dev_ops_base_path = "/".join(
            [DEV_OPS_VERSION_ENV_MAP[self.environment], "databases"]
        )
        dev_ops_commander = APICommander(
            api_endpoint=DEV_OPS_URL_ENV_MAP[self.environment],
            path=dev_ops_base_path,
            headers=self._dev_ops_commander_headers,
            callers=self.callers,
            dev_ops_api=True,
        )
        return dev_ops_commander

    def _copy(
        self,
        *,
        token: str | TokenProvider | None = None,
        environment: str | None = None,
        callers: Sequence[CallerType] = [],
        caller_name: str | None = None,
        caller_version: str | None = None,
        dev_ops_url: str | None = None,
        dev_ops_api_version: str | None = None,
    ) -> AstraDBAdmin:
        callers_param = check_caller_parameters(callers, caller_name, caller_version)
        return AstraDBAdmin(
            token=coerce_token_provider(token) or self.token_provider,
            environment=environment or self.environment,
            callers=callers_param or self.callers,
            dev_ops_url=dev_ops_url or self._dev_ops_url,
            dev_ops_api_version=dev_ops_api_version or self._dev_ops_api_version,
        )

    def with_options(
        self,
        *,
        token: str | TokenProvider | None = None,
        callers: Sequence[CallerType] = [],
        caller_name: str | None = None,
        caller_version: str | None = None,
    ) -> AstraDBAdmin:
        """
        Create a clone of this AstraDBAdmin with some changed attributes.

        Args:
            token: an Access Token to the database. Example: `"AstraCS:xyz..."`.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            callers: a list of caller identities, i.e. applications, or frameworks,
                on behalf of which DevOps API calls are performed. These end up in
                the request user-agent.
                Each caller identity is a ("caller_name", "caller_version") pair.
            caller_name: *DEPRECATED*, use `callers`. Removal 2.0. Name of the
                application, or framework, on behalf of which the DevOps API calls
                are performed. This ends up in the request user-agent.
            caller_version: version of the caller. *DEPRECATED*, use `callers`.
                Removal 2.0.

        Returns:
            a new AstraDBAdmin instance.

        Example:
            >>> another_astra_db_admin = my_astra_db_admin.with_options(
            ...     callers=[("caller_identity", "1.2.0")],
            ... )
        """

        callers_param = check_caller_parameters(callers, caller_name, caller_version)
        return self._copy(
            token=token,
            callers=callers_param,
        )

    @deprecation.deprecated(  # type: ignore[misc]
        deprecated_in="1.5.1",
        removed_in="2.0.0",
        current_version=__version__,
        details=SET_CALLER_DEPRECATION_NOTICE,
    )
    def set_caller(
        self,
        caller_name: str | None = None,
        caller_version: str | None = None,
    ) -> None:
        """
        Set a new identity for the application/framework on behalf of which
        the DevOps API calls will be performed (the "caller").

        New objects spawned from this client afterwards will inherit the new settings.

        Args:
            caller_name: name of the application, or framework, on behalf of which
                the DevOps API calls are performed. This ends up in the request user-agent.
            caller_version: version of the caller.

        Example:
            >>> my_astra_db_admin.set_caller(
            ...     callers=[("the_caller", "0.1.0")],
            ... )
        """

        logger.info(f"setting caller to {caller_name}/{caller_version}")
        callers_param = check_caller_parameters([], caller_name, caller_version)
        self.callers = callers_param
        self._dev_ops_api_commander = self._get_dev_ops_api_commander()

    def list_databases(
        self,
        *,
        include: str | None = None,
        provider: str | None = None,
        page_size: int | None = None,
        max_time_ms: int | None = None,
    ) -> CommandCursor[AdminDatabaseInfo]:
        """
        Get the list of databases, as obtained with a request to the DevOps API.

        Args:
            include: a filter on what databases are to be returned. As per
                DevOps API, defaults to "nonterminated". Pass "all" to include
                the already terminated databases.
            provider: a filter on the cloud provider for the databases.
                As per DevOps API, defaults to "ALL". Pass e.g. "AWS" to
                restrict the results.
            page_size: number of results per page from the DevOps API. Optional.
            max_time_ms: a timeout, in milliseconds, for the API request.

        Returns:
            A CommandCursor to iterate over the detected databases,
            represented as AdminDatabaseInfo objects.

        Example:
            >>> database_cursor = my_astra_db_admin.list_databases()
            >>> database_list = list(database_cursor)
            >>> len(database_list)
            3
            >>> database_list[2].id
            '01234567-...'
            >>> database_list[2].status
            'ACTIVE'
            >>> database_list[2].info.region
            'eu-west-1'
        """

        logger.info("getting databases (DevOps API)")
        request_params_0 = {
            k: v
            for k, v in {
                "include": include,
                "provider": provider,
                "limit": page_size or DEV_OPS_DEFAULT_DATABASES_PAGE_SIZE,
            }.items()
            if v is not None
        }
        responses: list[dict[str, Any]] = []
        logger.info("request 0, getting databases (DevOps API)")
        response_0 = self._dev_ops_api_commander.request(
            http_method=HttpMethod.GET,
            request_params=request_params_0,
            timeout_info=base_timeout_info(max_time_ms),
        )
        if not isinstance(response_0, list):
            raise DevOpsAPIException(
                "Faulty response from get-databases DevOps API command.",
            )
        logger.info("finished request 0, getting databases (DevOps API)")
        responses += [response_0]
        while len(responses[-1]) >= request_params_0["limit"]:
            if "id" not in responses[-1][-1]:
                raise DevOpsAPIException(
                    "Faulty response from get-databases DevOps API command.",
                )
            last_received_db_id = responses[-1][-1]["id"]
            request_params_n = {
                **request_params_0,
                **{"starting_after": last_received_db_id},
            }
            logger.info(
                "request %s, getting databases (DevOps API)",
                len(responses),
            )
            response_n = self._dev_ops_api_commander.request(
                http_method=HttpMethod.GET,
                request_params=request_params_n,
                timeout_info=base_timeout_info(max_time_ms),
            )
            logger.info(
                "finished request %s, getting databases (DevOps API)",
                len(responses),
            )
            if not isinstance(response_n, list):
                raise DevOpsAPIException(
                    "Faulty response from get-databases DevOps API command.",
                )
            responses += [response_n]

        logger.info("finished getting databases (DevOps API)")
        return CommandCursor(
            address=self._dev_ops_api_commander.full_path,
            items=[
                _recast_as_admin_database_info(
                    db_dict,
                    environment=self.environment,
                )
                for response in responses
                for db_dict in response
            ],
        )

    async def async_list_databases(
        self,
        *,
        include: str | None = None,
        provider: str | None = None,
        page_size: int | None = None,
        max_time_ms: int | None = None,
    ) -> CommandCursor[AdminDatabaseInfo]:
        """
        Get the list of databases, as obtained with a request to the DevOps API.
        Async version of the method, for use in an asyncio context.

        Args:
            include: a filter on what databases are to be returned. As per
                DevOps API, defaults to "nonterminated". Pass "all" to include
                the already terminated databases.
            provider: a filter on the cloud provider for the databases.
                As per DevOps API, defaults to "ALL". Pass e.g. "AWS" to
                restrict the results.
            page_size: number of results per page from the DevOps API. Optional.
            max_time_ms: a timeout, in milliseconds, for the API request.

        Returns:
            A CommandCursor to iterate over the detected databases,
            represented as AdminDatabaseInfo objects.
            Note that the return type is not an awaitable, rather
            a regular iterable, e.g. for use in ordinary "for" loops.

        Example:
            >>> async def check_if_db_exists(db_id: str) -> bool:
            ...     db_cursor = await my_astra_db_admin.async_list_databases()
            ...     db_list = list(dd_cursor)
            ...     return db_id in db_list
            ...
            >>> asyncio.run(check_if_db_exists("xyz"))
            True
            >>> asyncio.run(check_if_db_exists("01234567-..."))
            False
        """

        logger.info("getting databases (DevOps API), async")
        request_params_0 = {
            k: v
            for k, v in {
                "include": include,
                "provider": provider,
                "limit": page_size or DEV_OPS_DEFAULT_DATABASES_PAGE_SIZE,
            }.items()
            if v is not None
        }
        responses: list[dict[str, Any]] = []
        logger.info("request 0, getting databases (DevOps API), async")
        response_0 = await self._dev_ops_api_commander.async_request(
            http_method=HttpMethod.GET,
            request_params=request_params_0,
            timeout_info=base_timeout_info(max_time_ms),
        )
        if not isinstance(response_0, list):
            raise DevOpsAPIException(
                "Faulty response from get-databases DevOps API command.",
            )
        logger.info("finished request 0, getting databases (DevOps API), async")
        responses += [response_0]
        while len(responses[-1]) >= request_params_0["limit"]:
            if "id" not in responses[-1][-1]:
                raise DevOpsAPIException(
                    "Faulty response from get-databases DevOps API command.",
                )
            last_received_db_id = responses[-1][-1]["id"]
            request_params_n = {
                **request_params_0,
                **{"starting_after": last_received_db_id},
            }
            logger.info(
                "request %s, getting databases (DevOps API)",
                len(responses),
            )
            response_n = await self._dev_ops_api_commander.async_request(
                http_method=HttpMethod.GET,
                request_params=request_params_n,
                timeout_info=base_timeout_info(max_time_ms),
            )
            logger.info(
                "finished request %s, getting databases (DevOps API), async",
                len(responses),
            )
            if not isinstance(response_n, list):
                raise DevOpsAPIException(
                    "Faulty response from get-databases DevOps API command.",
                )
            responses += [response_n]

        logger.info("finished getting databases (DevOps API), async")
        return CommandCursor(
            address=self._dev_ops_api_commander.full_path,
            items=[
                _recast_as_admin_database_info(
                    db_dict,
                    environment=self.environment,
                )
                for response in responses
                for db_dict in response
            ],
        )

    def database_info(
        self, id: str, *, max_time_ms: int | None = None
    ) -> AdminDatabaseInfo:
        """
        Get the full information on a given database, through a request to the DevOps API.

        Args:
            id: the ID of the target database, e. g.
                "01234567-89ab-cdef-0123-456789abcdef".
            max_time_ms: a timeout, in milliseconds, for the API request.

        Returns:
            An AdminDatabaseInfo object.

        Example:
            >>> details_of_my_db = my_astra_db_admin.database_info("01234567-...")
            >>> details_of_my_db.id
            '01234567-...'
            >>> details_of_my_db.status
            'ACTIVE'
            >>> details_of_my_db.info.region
            'eu-west-1'
        """

        logger.info(f"getting database info for '{id}' (DevOps API)")
        gd_response = self._dev_ops_api_commander.request(
            http_method=HttpMethod.GET,
            additional_path=id,
            timeout_info=base_timeout_info(max_time_ms),
        )
        logger.info(f"finished getting database info for '{id}' (DevOps API)")
        return _recast_as_admin_database_info(
            gd_response,
            environment=self.environment,
        )

    async def async_database_info(
        self, id: str, *, max_time_ms: int | None = None
    ) -> AdminDatabaseInfo:
        """
        Get the full information on a given database, through a request to the DevOps API.
        This is an awaitable method suitable for use within an asyncio event loop.

        Args:
            id: the ID of the target database, e. g.
                "01234567-89ab-cdef-0123-456789abcdef".
            max_time_ms: a timeout, in milliseconds, for the API request.

        Returns:
            An AdminDatabaseInfo object.

        Example:
            >>> async def check_if_db_active(db_id: str) -> bool:
            ...     db_info = await my_astra_db_admin.async_database_info(db_id)
            ...     return db_info.status == "ACTIVE"
            ...
            >>> asyncio.run(check_if_db_active("01234567-..."))
            True
        """

        logger.info(f"getting database info for '{id}' (DevOps API), async")
        gd_response = await self._dev_ops_api_commander.async_request(
            http_method=HttpMethod.GET,
            additional_path=id,
            timeout_info=base_timeout_info(max_time_ms),
        )
        logger.info(f"finished getting database info for '{id}' (DevOps API), async")
        return _recast_as_admin_database_info(
            gd_response,
            environment=self.environment,
        )

    def create_database(
        self,
        name: str,
        *,
        cloud_provider: str,
        region: str,
        keyspace: str | None = None,
        namespace: str | None = None,
        wait_until_active: bool = True,
        max_time_ms: int | None = None,
    ) -> AstraDBDatabaseAdmin:
        """
        Create a database as requested, optionally waiting for it to be ready.

        Args:
            name: the desired name for the database.
            cloud_provider: one of 'aws', 'gcp' or 'azure'.
            region: any of the available cloud regions.
            keyspace: name for the one keyspace the database starts with.
                If omitted, DevOps API will use its default.
            namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
            wait_until_active: if True (default), the method returns only after
                the newly-created database is in ACTIVE state (a few minutes,
                usually). If False, it will return right after issuing the
                creation request to the DevOps API, and it will be responsibility
                of the caller to check the database status before working with it.
            max_time_ms: a timeout, in milliseconds, for the whole requested
                operation to complete.
                Note that a timeout is no guarantee that the creation request
                has not reached the API server.

        Returns:
            An AstraDBDatabaseAdmin instance.

        Example:
            >>> my_new_db_admin = my_astra_db_admin.create_database(
            ...     "new_database",
            ...     cloud_provider="aws",
            ...     region="ap-south-1",
            ... )
            >>> my_new_db = my_new_db_admin.get_database()
            >>> my_coll = my_new_db.create_collection("movies", dimension=2)
            >>> my_coll.insert_one({"title": "The Title", "$vector": [0.1, 0.2]})
        """

        keyspace_param = check_namespace_keyspace(
            keyspace=keyspace,
            namespace=namespace,
        )

        cd_payload = {
            k: v
            for k, v in {
                "name": name,
                "tier": "serverless",
                "cloudProvider": cloud_provider,
                "region": region,
                "capacityUnits": 1,
                "dbType": "vector",
                "keyspace": keyspace_param,
            }.items()
            if v is not None
        }
        timeout_manager = MultiCallTimeoutManager(
            overall_max_time_ms=max_time_ms, dev_ops_api=True
        )
        logger.info(
            f"creating database {name}/({cloud_provider}, {region}) (DevOps API)"
        )
        cd_raw_response = self._dev_ops_api_commander.raw_request(
            http_method=HttpMethod.POST,
            payload=cd_payload,
            timeout_info=timeout_manager.remaining_timeout_info(),
        )
        if cd_raw_response.status_code != DEV_OPS_RESPONSE_HTTP_CREATED:
            raise DevOpsAPIException(
                f"DB creation ('{name}') failed: API returned HTTP "
                f"{cd_raw_response.status_code} instead of "
                f"{DEV_OPS_RESPONSE_HTTP_CREATED} - Created."
            )
        new_database_id = cd_raw_response.headers["Location"]
        logger.info(
            "DevOps API returned from creating database "
            f"{name}/({cloud_provider}, {region})"
        )
        if wait_until_active:
            last_status_seen = DEV_OPS_DATABASE_STATUS_PENDING
            while last_status_seen in {
                DEV_OPS_DATABASE_STATUS_PENDING,
                DEV_OPS_DATABASE_STATUS_INITIALIZING,
            }:
                logger.info(f"sleeping to poll for status of '{new_database_id}'")
                time.sleep(DEV_OPS_DATABASE_POLL_INTERVAL_S)
                last_db_info = self.database_info(
                    id=new_database_id,
                    max_time_ms=timeout_manager.remaining_timeout_ms(),
                )
                last_status_seen = last_db_info.status
            if last_status_seen != DEV_OPS_DATABASE_STATUS_ACTIVE:
                raise DevOpsAPIException(
                    f"Database {name} entered unexpected status {last_status_seen} after PENDING"
                )
        # return the database instance
        logger.info(
            f"finished creating database '{new_database_id}' = "
            f"{name}/({cloud_provider}, {region}) (DevOps API)"
        )
        return AstraDBDatabaseAdmin.from_astra_db_admin(
            api_endpoint=build_api_endpoint(
                environment=self.environment,
                database_id=new_database_id,
                region=region,
            ),
            astra_db_admin=self,
        )

    async def async_create_database(
        self,
        name: str,
        *,
        cloud_provider: str,
        region: str,
        keyspace: str | None = None,
        namespace: str | None = None,
        wait_until_active: bool = True,
        max_time_ms: int | None = None,
    ) -> AstraDBDatabaseAdmin:
        """
        Create a database as requested, optionally waiting for it to be ready.
        This is an awaitable method suitable for use within an asyncio event loop.

        Args:
            name: the desired name for the database.
            cloud_provider: one of 'aws', 'gcp' or 'azure'.
            region: any of the available cloud regions.
            keyspace: name for the one keyspace the database starts with.
                If omitted, DevOps API will use its default.
            namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
            wait_until_active: if True (default), the method returns only after
                the newly-created database is in ACTIVE state (a few minutes,
                usually). If False, it will return right after issuing the
                creation request to the DevOps API, and it will be responsibility
                of the caller to check the database status before working with it.
            max_time_ms: a timeout, in milliseconds, for the whole requested
                operation to complete.
                Note that a timeout is no guarantee that the creation request
                has not reached the API server.

        Returns:
            An AstraDBDatabaseAdmin instance.

        Example:
            >>> asyncio.run(
            ...     my_astra_db_admin.async_create_database(
            ...         "new_database",
            ...         cloud_provider="aws",
            ...         region="ap-south-1",
            ....    )
            ... )
            AstraDBDatabaseAdmin(id=...)
        """

        keyspace_param = check_namespace_keyspace(
            keyspace=keyspace,
            namespace=namespace,
        )

        cd_payload = {
            k: v
            for k, v in {
                "name": name,
                "tier": "serverless",
                "cloudProvider": cloud_provider,
                "region": region,
                "capacityUnits": 1,
                "dbType": "vector",
                "keyspace": keyspace_param,
            }.items()
            if v is not None
        }
        timeout_manager = MultiCallTimeoutManager(
            overall_max_time_ms=max_time_ms, dev_ops_api=True
        )
        logger.info(
            f"creating database {name}/({cloud_provider}, {region}) "
            "(DevOps API), async"
        )
        cd_raw_response = await self._dev_ops_api_commander.async_raw_request(
            http_method=HttpMethod.POST,
            payload=cd_payload,
            timeout_info=timeout_manager.remaining_timeout_info(),
        )
        if cd_raw_response.status_code != DEV_OPS_RESPONSE_HTTP_CREATED:
            raise DevOpsAPIException(
                f"DB creation ('{name}') failed: API returned HTTP "
                f"{cd_raw_response.status_code} instead of "
                f"{DEV_OPS_RESPONSE_HTTP_CREATED} - Created."
            )
        new_database_id = cd_raw_response.headers["Location"]
        logger.info(
            "DevOps API returned from creating database "
            f"{name}/({cloud_provider}, {region}), async"
        )
        if wait_until_active:
            last_status_seen = DEV_OPS_DATABASE_STATUS_PENDING
            while last_status_seen in {
                DEV_OPS_DATABASE_STATUS_PENDING,
                DEV_OPS_DATABASE_STATUS_INITIALIZING,
            }:
                logger.info(
                    f"sleeping to poll for status of '{new_database_id}', async"
                )
                await asyncio.sleep(DEV_OPS_DATABASE_POLL_INTERVAL_S)
                last_db_info = await self.async_database_info(
                    id=new_database_id,
                    max_time_ms=timeout_manager.remaining_timeout_ms(),
                )
                last_status_seen = last_db_info.status
            if last_status_seen != DEV_OPS_DATABASE_STATUS_ACTIVE:
                raise DevOpsAPIException(
                    f"Database {name} entered unexpected status "
                    f"{last_status_seen} after PENDING"
                )
        # return the database instance
        logger.info(
            f"finished creating database '{new_database_id}' = "
            f"{name}/({cloud_provider}, {region}) (DevOps API), async"
        )
        return AstraDBDatabaseAdmin.from_astra_db_admin(
            api_endpoint=build_api_endpoint(
                environment=self.environment,
                database_id=new_database_id,
                region=region,
            ),
            astra_db_admin=self,
        )

    def drop_database(
        self,
        id: str,
        *,
        wait_until_active: bool = True,
        max_time_ms: int | None = None,
    ) -> dict[str, Any]:
        """
        Drop a database, i.e. delete it completely and permanently with all its data.

        Args:
            id: The ID of the database to drop, e. g.
                "01234567-89ab-cdef-0123-456789abcdef".
            wait_until_active: if True (default), the method returns only after
                the database has actually been deleted (generally a few minutes).
                If False, it will return right after issuing the
                drop request to the DevOps API, and it will be responsibility
                of the caller to check the database status/availability
                after that, if desired.
            max_time_ms: a timeout, in milliseconds, for the whole requested
                operation to complete.
                Note that a timeout is no guarantee that the deletion request
                has not reached the API server.

        Returns:
            A dictionary of the form {"ok": 1} in case of success.
            Otherwise, an exception is raised.

        Example:
            >>> database_list_pre = my_astra_db_admin.list_databases()
            >>> len(database_list_pre)
            3
            >>> my_astra_db_admin.drop_database("01234567-...")
            {'ok': 1}
            >>> database_list_post = my_astra_db_admin.list_databases()
            >>> len(database_list_post)
            2
        """

        timeout_manager = MultiCallTimeoutManager(
            overall_max_time_ms=max_time_ms, dev_ops_api=True
        )
        logger.info(f"dropping database '{id}' (DevOps API)")
        te_raw_response = self._dev_ops_api_commander.raw_request(
            http_method=HttpMethod.POST,
            additional_path=f"{id}/terminate",
            timeout_info=timeout_manager.remaining_timeout_info(),
        )
        if te_raw_response.status_code != DEV_OPS_RESPONSE_HTTP_ACCEPTED:
            raise DevOpsAPIException(
                f"DB deletion ('{id}') failed: API returned HTTP "
                f"{te_raw_response.status_code} instead of "
                f"{DEV_OPS_RESPONSE_HTTP_ACCEPTED} - Created"
            )
        logger.info(f"DevOps API returned from dropping database '{id}'")
        if wait_until_active:
            last_status_seen: str | None = DEV_OPS_DATABASE_STATUS_TERMINATING
            _db_name: str | None = None
            while last_status_seen == DEV_OPS_DATABASE_STATUS_TERMINATING:
                logger.info(f"sleeping to poll for status of '{id}'")
                time.sleep(DEV_OPS_DATABASE_POLL_INTERVAL_S)
                #
                detected_databases = [
                    a_db_info
                    for a_db_info in self.list_databases(
                        max_time_ms=timeout_manager.remaining_timeout_ms(),
                    )
                    if a_db_info.id == id
                ]
                if detected_databases:
                    last_status_seen = detected_databases[0].status
                    _db_name = detected_databases[0].info.name
                else:
                    last_status_seen = None
            if last_status_seen is not None:
                _name_desc = f" ({_db_name})" if _db_name else ""
                raise DevOpsAPIException(
                    f"Database {id}{_name_desc} entered unexpected status "
                    f"{last_status_seen} after PENDING"
                )
        logger.info(f"finished dropping database '{id}' (DevOps API)")
        return {"ok": 1}

    async def async_drop_database(
        self,
        id: str,
        *,
        wait_until_active: bool = True,
        max_time_ms: int | None = None,
    ) -> dict[str, Any]:
        """
        Drop a database, i.e. delete it completely and permanently with all its data.
        Async version of the method, for use in an asyncio context.

        Args:
            id: The ID of the database to drop, e. g.
                "01234567-89ab-cdef-0123-456789abcdef".
            wait_until_active: if True (default), the method returns only after
                the database has actually been deleted (generally a few minutes).
                If False, it will return right after issuing the
                drop request to the DevOps API, and it will be responsibility
                of the caller to check the database status/availability
                after that, if desired.
            max_time_ms: a timeout, in milliseconds, for the whole requested
                operation to complete.
                Note that a timeout is no guarantee that the deletion request
                has not reached the API server.

        Returns:
            A dictionary of the form {"ok": 1} in case of success.
            Otherwise, an exception is raised.

        Example:
            >>> asyncio.run(
            ...     my_astra_db_admin.async_drop_database("01234567-...")
            ... )
            {'ok': 1}
        """

        timeout_manager = MultiCallTimeoutManager(
            overall_max_time_ms=max_time_ms, dev_ops_api=True
        )
        logger.info(f"dropping database '{id}' (DevOps API), async")
        te_raw_response = await self._dev_ops_api_commander.async_raw_request(
            http_method=HttpMethod.POST,
            additional_path=f"{id}/terminate",
            timeout_info=timeout_manager.remaining_timeout_info(),
        )
        if te_raw_response.status_code != DEV_OPS_RESPONSE_HTTP_ACCEPTED:
            raise DevOpsAPIException(
                f"DB deletion ('{id}') failed: API returned HTTP "
                f"{te_raw_response.status_code} instead of "
                f"{DEV_OPS_RESPONSE_HTTP_ACCEPTED} - Created"
            )
        logger.info(f"DevOps API returned from dropping database '{id}', async")
        if wait_until_active:
            last_status_seen: str | None = DEV_OPS_DATABASE_STATUS_TERMINATING
            _db_name: str | None = None
            while last_status_seen == DEV_OPS_DATABASE_STATUS_TERMINATING:
                logger.info(f"sleeping to poll for status of '{id}', async")
                await asyncio.sleep(DEV_OPS_DATABASE_POLL_INTERVAL_S)
                #
                detected_databases = [
                    a_db_info
                    for a_db_info in await self.async_list_databases(
                        max_time_ms=timeout_manager.remaining_timeout_ms(),
                    )
                    if a_db_info.id == id
                ]
                if detected_databases:
                    last_status_seen = detected_databases[0].status
                    _db_name = detected_databases[0].info.name
                else:
                    last_status_seen = None
            if last_status_seen is not None:
                _name_desc = f" ({_db_name})" if _db_name else ""
                raise DevOpsAPIException(
                    f"Database {id}{_name_desc} entered unexpected status "
                    f"{last_status_seen} after PENDING"
                )
        logger.info(f"finished dropping database '{id}' (DevOps API), async")
        return {"ok": 1}

    def get_database_admin(
        self,
        api_endpoint_or_id: str | None = None,
        *,
        api_endpoint: str | None = None,
        id: str | None = None,
        region: str | None = None,
        max_time_ms: int | None = None,
    ) -> AstraDBDatabaseAdmin:
        """
        Create an AstraDBDatabaseAdmin object for admin work within a certain database.

        Args:
            api_endpoint_or_id: positional parameter that can stand for both
                `api_endpoint` and `id`. Passing them together is an error.
            api_endpoint: the API Endpoint for the target database
                (e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`).
                The database must exist already for the resulting object
                to be effectively used; in other words, this invocation
                does not create the database, just the object instance.
            id: the target database ID. This is alternative to using the API Endpoint.
            region: the region to use for connecting to the database. The
                database must be located in that region. This parameter can be used
                only if the database is specified by its ID (instead of API Endpoint).
                If this parameter is not passed, and cannot be inferred
                from the API endpoint, an additional DevOps API request is made
                to determine the default region and use it subsequently.
            max_time_ms: a timeout, in milliseconds, for the DevOps API
                HTTP request should it be necessary (see the `region` argument).

        Returns:
            An AstraDBDatabaseAdmin instance representing the requested database.

        Example:
            >>> my_db_admin = my_astra_db_admin.get_database_admin("01234567-...")
            >>> my_db_admin.list_keyspaces()
            ['default_keyspace']
            >>> my_db_admin.create_keyspace("that_other_one")
            {'ok': 1}
            >>> my_db_admin.list_keyspaces()
            ['default_keyspace', 'that_other_one']

        Note:
            This method does not perform any admin-level operation through
            the DevOps API. For actual creation of a database, see the
            `create_database` method.
        """

        _api_endpoint_p, _id_p = check_id_endpoint_parg_kwargs(
            p_arg=api_endpoint_or_id, api_endpoint=api_endpoint, id=id
        )
        # handle the "endpoint passed as id" case first:
        if _api_endpoint_p is not None:
            if region is not None:
                raise ValueError(
                    "Parameter `region` not supported with an API endpoint."
                )
            # in this case max_time_ms is ignored (no calls take place)
            return AstraDBDatabaseAdmin.from_astra_db_admin(
                api_endpoint=_api_endpoint_p,
                astra_db_admin=self,
                max_time_ms=max_time_ms,
            )
        else:
            if _id_p is None:
                raise ValueError("Either `api_endpoint` or `id` must be supplied.")

            _region = normalize_region_for_id(
                database_id=_id_p,
                token_str=self.token_provider.get_token(),
                environment=self.environment,
                region_param=region,
                max_time_ms=max_time_ms,
            )
            return AstraDBDatabaseAdmin.from_astra_db_admin(
                api_endpoint=build_api_endpoint(
                    environment=self.environment,
                    database_id=_id_p,
                    region=_region,
                ),
                astra_db_admin=self,
                max_time_ms=max_time_ms,
            )

    def get_database(
        self,
        api_endpoint_or_id: str | None = None,
        *,
        api_endpoint: str | None = None,
        token: str | TokenProvider | None = None,
        keyspace: str | None = None,
        namespace: str | None = None,
        id: str | None = None,
        region: str | None = None,
        api_path: str | None = None,
        api_version: str | None = None,
        max_time_ms: int | None = None,
    ) -> Database:
        """
        Create a Database instance for a specific database, to be used
        when doing data-level work (such as creating/managing collections).

        Args:
            api_endpoint_or_id: positional parameter that can stand for both
                `api_endpoint` and `id`. Passing them together is an error.
            api_endpoint: the API Endpoint for the target database
                (e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`).
                The database must exist already for the resulting object
                to be effectively used; in other words, this invocation
                does not create the database, just the object instance.
            token: if supplied, is passed to the Database instead of
                the one set for this object.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            keyspace: used to specify a certain keyspace the resulting
                Database will primarily work on. If not specified, an additional
                DevOps API call reveals the default keyspace for the target database.
            namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
            id: the target database ID. This is alternative to using the API Endpoint.
            region: the region to use for connecting to the database. The
                database must be located in that region. This parameter can be used
                only if the database is specified by its ID (instead of API Endpoint).
                If this parameter is not passed, and cannot be inferred
                from the API endpoint, an additional DevOps API request is made
                to determine the default region and use it subsequently.
            api_path: path to append to the API Endpoint. In typical usage, this
                should be left to its default of "/api/json".
            api_version: version specifier to append to the API path. In typical
                usage, this should be left to its default of "v1".
            max_time_ms: a timeout, in milliseconds, for the DevOps API
                HTTP request should it be necessary (see the `region` argument).

        Returns:
            A Database object ready to be used.

        Example:
            >>> my_db = my_astra_db_admin.get_database(
            ...     "https://<ID>-<REGION>.apps.astra.datastax.com",
            ...     keyspace="my_prod_keyspace",
            ... )
            >>> coll = my_db.create_collection("movies", dimension=2)
            >>> my_coll.insert_one({"title": "The Title", "$vector": [0.3, 0.4]})
        """

        _api_endpoint_p, _id_p = check_id_endpoint_parg_kwargs(
            p_arg=api_endpoint_or_id, api_endpoint=api_endpoint, id=id
        )
        keyspace_param = check_namespace_keyspace(
            keyspace=keyspace,
            namespace=namespace,
        )

        # lazy importing here to avoid circular dependency
        from astrapy import Database

        _token = coerce_token_provider(token) or self.token_provider
        _keyspace: str | None
        # handle the "endpoint passed as id" case first:
        if _api_endpoint_p is not None:
            if region is not None:
                raise ValueError(
                    "Parameter `region` not supported with an API endpoint."
                )
            if keyspace_param:
                _keyspace = keyspace_param
            else:
                parsed_api_endpoint = parse_api_endpoint(_api_endpoint_p)
                if parsed_api_endpoint is None:
                    msg = api_endpoint_parsing_error_message(_api_endpoint_p)
                    raise ValueError(msg)
                _keyspace = self.database_info(
                    parsed_api_endpoint.database_id,
                    max_time_ms=max_time_ms,
                ).info.keyspace
            return Database(
                api_endpoint=_api_endpoint_p,
                token=_token,
                keyspace=_keyspace,
                callers=self.callers,
                environment=self.environment,
                api_path=api_path,
                api_version=api_version,
            )
        else:
            # the case where an ID is passed:
            if _id_p is None:
                raise ValueError("Either `api_endpoint` or `id` must be supplied.")
            _region = normalize_region_for_id(
                database_id=_id_p,
                token_str=self.token_provider.get_token(),
                environment=self.environment,
                region_param=region,
                max_time_ms=max_time_ms,
            )
            if keyspace_param:
                _keyspace = keyspace_param
            else:
                _keyspace = self.database_info(
                    _id_p, max_time_ms=max_time_ms
                ).info.keyspace
            return Database(
                api_endpoint=build_api_endpoint(
                    environment=self.environment,
                    database_id=_id_p,
                    region=_region,
                ),
                token=_token,
                keyspace=_keyspace,
                callers=self.callers,
                environment=self.environment,
                api_path=api_path,
                api_version=api_version,
            )

    def get_async_database(
        self,
        api_endpoint_or_id: str | None = None,
        *,
        api_endpoint: str | None = None,
        token: str | TokenProvider | None = None,
        keyspace: str | None = None,
        namespace: str | None = None,
        id: str | None = None,
        region: str | None = None,
        api_path: str | None = None,
        api_version: str | None = None,
    ) -> AsyncDatabase:
        """
        Create an AsyncDatabase instance for a specific database, to be used
        when doing data-level work (such as creating/managing collections).

        Args:
            api_endpoint_or_id: positional parameter that can stand for both
                `api_endpoint` and `id`. Passing them together is an error.
            api_endpoint: the API Endpoint for the target database
                (e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`).
                The database must exist already for the resulting object
                to be effectively used; in other words, this invocation
                does not create the database, just the object instance.
            token: if supplied, is passed to the Database instead of
                the one set for this object.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            keyspace: used to specify a certain keyspace the resulting
                AsyncDatabase will primarily work on. If not specified, an additional
                DevOps API call reveals the default keyspace for the target database.
            namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
            id: the target database ID. This is alternative to using the API Endpoint.
            region: the region to use for connecting to the database. The
                database must be located in that region. This parameter can be used
                only if the database is specified by its ID (instead of API Endpoint).
                If this parameter is not passed, and cannot be inferred
                from the API endpoint, an additional DevOps API request is made
                to determine the default region and use it subsequently.
            api_path: path to append to the API Endpoint. In typical usage, this
                should be left to its default of "/api/json".
            api_version: version specifier to append to the API path. In typical
                usage, this should be left to its default of "v1".
            max_time_ms: a timeout, in milliseconds, for the DevOps API
                HTTP request should it be necessary (see the `region` argument).

        Returns:
            An AsyncDatabase object ready to be used.

        Example:
            >>> async def create_use_collection(
            ...     admin: AstraDBAdmin,
            ...     api_endpoint: str,
            ...     keyspace: str,
            ... ) -> None:
            ...     my_async_db = admin.get_async_database(
            ...         api_endpoint,
            ...         keyspace=keyspace,
            ...     )
            ...     a_coll = await my_async_db.create_collection("movies", dimension=2)
            ...     await a_coll.insert_one(
            ...         {"title": "The Title", "$vector": [0.3, 0.4]}
            ...     )
            ...
            >>> asyncio.run(create_use_collection(
            ...     my_admin,
            ...     "https://<ID>-<REGION>.apps.astra.datastax.com",
            ...     "default_keyspace",
            ... ))
            >>>
        """

        keyspace_param = check_namespace_keyspace(
            keyspace=keyspace,
            namespace=namespace,
        )

        return self.get_database(
            api_endpoint_or_id=api_endpoint_or_id,
            api_endpoint=api_endpoint,
            token=token,
            keyspace=keyspace_param,
            id=id,
            region=region,
            api_path=api_path,
            api_version=api_version,
        ).to_async()

Methods

async def async_create_database(self, name: str, *, cloud_provider: str, region: str, keyspace: str | None = None, namespace: str | None = None, wait_until_active: bool = True, max_time_ms: int | None = None) ‑> AstraDBDatabaseAdmin

Create a database as requested, optionally waiting for it to be ready. This is an awaitable method suitable for use within an asyncio event loop.

Args

name
the desired name for the database.
cloud_provider
one of 'aws', 'gcp' or 'azure'.
region
any of the available cloud regions.
keyspace
name for the one keyspace the database starts with. If omitted, DevOps API will use its default.
namespace
an alias for keyspace. DEPRECATED, removal in 2.0.
wait_until_active
if True (default), the method returns only after the newly-created database is in ACTIVE state (a few minutes, usually). If False, it will return right after issuing the creation request to the DevOps API, and it will be responsibility of the caller to check the database status before working with it.
max_time_ms
a timeout, in milliseconds, for the whole requested operation to complete. Note that a timeout is no guarantee that the creation request has not reached the API server.

Returns

An AstraDBDatabaseAdmin instance.

Example

>>> asyncio.run(
...     my_astra_db_admin.async_create_database(
...         "new_database",
...         cloud_provider="aws",
...         region="ap-south-1",
....    )
... )
AstraDBDatabaseAdmin(id=...)
Expand source code
async def async_create_database(
    self,
    name: str,
    *,
    cloud_provider: str,
    region: str,
    keyspace: str | None = None,
    namespace: str | None = None,
    wait_until_active: bool = True,
    max_time_ms: int | None = None,
) -> AstraDBDatabaseAdmin:
    """
    Create a database as requested, optionally waiting for it to be ready.
    This is an awaitable method suitable for use within an asyncio event loop.

    Args:
        name: the desired name for the database.
        cloud_provider: one of 'aws', 'gcp' or 'azure'.
        region: any of the available cloud regions.
        keyspace: name for the one keyspace the database starts with.
            If omitted, DevOps API will use its default.
        namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
        wait_until_active: if True (default), the method returns only after
            the newly-created database is in ACTIVE state (a few minutes,
            usually). If False, it will return right after issuing the
            creation request to the DevOps API, and it will be responsibility
            of the caller to check the database status before working with it.
        max_time_ms: a timeout, in milliseconds, for the whole requested
            operation to complete.
            Note that a timeout is no guarantee that the creation request
            has not reached the API server.

    Returns:
        An AstraDBDatabaseAdmin instance.

    Example:
        >>> asyncio.run(
        ...     my_astra_db_admin.async_create_database(
        ...         "new_database",
        ...         cloud_provider="aws",
        ...         region="ap-south-1",
        ....    )
        ... )
        AstraDBDatabaseAdmin(id=...)
    """

    keyspace_param = check_namespace_keyspace(
        keyspace=keyspace,
        namespace=namespace,
    )

    cd_payload = {
        k: v
        for k, v in {
            "name": name,
            "tier": "serverless",
            "cloudProvider": cloud_provider,
            "region": region,
            "capacityUnits": 1,
            "dbType": "vector",
            "keyspace": keyspace_param,
        }.items()
        if v is not None
    }
    timeout_manager = MultiCallTimeoutManager(
        overall_max_time_ms=max_time_ms, dev_ops_api=True
    )
    logger.info(
        f"creating database {name}/({cloud_provider}, {region}) "
        "(DevOps API), async"
    )
    cd_raw_response = await self._dev_ops_api_commander.async_raw_request(
        http_method=HttpMethod.POST,
        payload=cd_payload,
        timeout_info=timeout_manager.remaining_timeout_info(),
    )
    if cd_raw_response.status_code != DEV_OPS_RESPONSE_HTTP_CREATED:
        raise DevOpsAPIException(
            f"DB creation ('{name}') failed: API returned HTTP "
            f"{cd_raw_response.status_code} instead of "
            f"{DEV_OPS_RESPONSE_HTTP_CREATED} - Created."
        )
    new_database_id = cd_raw_response.headers["Location"]
    logger.info(
        "DevOps API returned from creating database "
        f"{name}/({cloud_provider}, {region}), async"
    )
    if wait_until_active:
        last_status_seen = DEV_OPS_DATABASE_STATUS_PENDING
        while last_status_seen in {
            DEV_OPS_DATABASE_STATUS_PENDING,
            DEV_OPS_DATABASE_STATUS_INITIALIZING,
        }:
            logger.info(
                f"sleeping to poll for status of '{new_database_id}', async"
            )
            await asyncio.sleep(DEV_OPS_DATABASE_POLL_INTERVAL_S)
            last_db_info = await self.async_database_info(
                id=new_database_id,
                max_time_ms=timeout_manager.remaining_timeout_ms(),
            )
            last_status_seen = last_db_info.status
        if last_status_seen != DEV_OPS_DATABASE_STATUS_ACTIVE:
            raise DevOpsAPIException(
                f"Database {name} entered unexpected status "
                f"{last_status_seen} after PENDING"
            )
    # return the database instance
    logger.info(
        f"finished creating database '{new_database_id}' = "
        f"{name}/({cloud_provider}, {region}) (DevOps API), async"
    )
    return AstraDBDatabaseAdmin.from_astra_db_admin(
        api_endpoint=build_api_endpoint(
            environment=self.environment,
            database_id=new_database_id,
            region=region,
        ),
        astra_db_admin=self,
    )
async def async_database_info(self, id: str, *, max_time_ms: int | None = None) ‑> AdminDatabaseInfo

Get the full information on a given database, through a request to the DevOps API. This is an awaitable method suitable for use within an asyncio event loop.

Args

id
the ID of the target database, e. g. "01234567-89ab-cdef-0123-456789abcdef".
max_time_ms
a timeout, in milliseconds, for the API request.

Returns

An AdminDatabaseInfo object.

Example

>>> async def check_if_db_active(db_id: str) -> bool:
...     db_info = await my_astra_db_admin.async_database_info(db_id)
...     return db_info.status == "ACTIVE"
...
>>> asyncio.run(check_if_db_active("01234567-..."))
True
Expand source code
async def async_database_info(
    self, id: str, *, max_time_ms: int | None = None
) -> AdminDatabaseInfo:
    """
    Get the full information on a given database, through a request to the DevOps API.
    This is an awaitable method suitable for use within an asyncio event loop.

    Args:
        id: the ID of the target database, e. g.
            "01234567-89ab-cdef-0123-456789abcdef".
        max_time_ms: a timeout, in milliseconds, for the API request.

    Returns:
        An AdminDatabaseInfo object.

    Example:
        >>> async def check_if_db_active(db_id: str) -> bool:
        ...     db_info = await my_astra_db_admin.async_database_info(db_id)
        ...     return db_info.status == "ACTIVE"
        ...
        >>> asyncio.run(check_if_db_active("01234567-..."))
        True
    """

    logger.info(f"getting database info for '{id}' (DevOps API), async")
    gd_response = await self._dev_ops_api_commander.async_request(
        http_method=HttpMethod.GET,
        additional_path=id,
        timeout_info=base_timeout_info(max_time_ms),
    )
    logger.info(f"finished getting database info for '{id}' (DevOps API), async")
    return _recast_as_admin_database_info(
        gd_response,
        environment=self.environment,
    )
async def async_drop_database(self, id: str, *, wait_until_active: bool = True, max_time_ms: int | None = None) ‑> dict[str, typing.Any]

Drop a database, i.e. delete it completely and permanently with all its data. Async version of the method, for use in an asyncio context.

Args

id
The ID of the database to drop, e. g. "01234567-89ab-cdef-0123-456789abcdef".
wait_until_active
if True (default), the method returns only after the database has actually been deleted (generally a few minutes). If False, it will return right after issuing the drop request to the DevOps API, and it will be responsibility of the caller to check the database status/availability after that, if desired.
max_time_ms
a timeout, in milliseconds, for the whole requested operation to complete. Note that a timeout is no guarantee that the deletion request has not reached the API server.

Returns

A dictionary of the form {"ok": 1} in case of success. Otherwise, an exception is raised.

Example

>>> asyncio.run(
...     my_astra_db_admin.async_drop_database("01234567-...")
... )
{'ok': 1}
Expand source code
async def async_drop_database(
    self,
    id: str,
    *,
    wait_until_active: bool = True,
    max_time_ms: int | None = None,
) -> dict[str, Any]:
    """
    Drop a database, i.e. delete it completely and permanently with all its data.
    Async version of the method, for use in an asyncio context.

    Args:
        id: The ID of the database to drop, e. g.
            "01234567-89ab-cdef-0123-456789abcdef".
        wait_until_active: if True (default), the method returns only after
            the database has actually been deleted (generally a few minutes).
            If False, it will return right after issuing the
            drop request to the DevOps API, and it will be responsibility
            of the caller to check the database status/availability
            after that, if desired.
        max_time_ms: a timeout, in milliseconds, for the whole requested
            operation to complete.
            Note that a timeout is no guarantee that the deletion request
            has not reached the API server.

    Returns:
        A dictionary of the form {"ok": 1} in case of success.
        Otherwise, an exception is raised.

    Example:
        >>> asyncio.run(
        ...     my_astra_db_admin.async_drop_database("01234567-...")
        ... )
        {'ok': 1}
    """

    timeout_manager = MultiCallTimeoutManager(
        overall_max_time_ms=max_time_ms, dev_ops_api=True
    )
    logger.info(f"dropping database '{id}' (DevOps API), async")
    te_raw_response = await self._dev_ops_api_commander.async_raw_request(
        http_method=HttpMethod.POST,
        additional_path=f"{id}/terminate",
        timeout_info=timeout_manager.remaining_timeout_info(),
    )
    if te_raw_response.status_code != DEV_OPS_RESPONSE_HTTP_ACCEPTED:
        raise DevOpsAPIException(
            f"DB deletion ('{id}') failed: API returned HTTP "
            f"{te_raw_response.status_code} instead of "
            f"{DEV_OPS_RESPONSE_HTTP_ACCEPTED} - Created"
        )
    logger.info(f"DevOps API returned from dropping database '{id}', async")
    if wait_until_active:
        last_status_seen: str | None = DEV_OPS_DATABASE_STATUS_TERMINATING
        _db_name: str | None = None
        while last_status_seen == DEV_OPS_DATABASE_STATUS_TERMINATING:
            logger.info(f"sleeping to poll for status of '{id}', async")
            await asyncio.sleep(DEV_OPS_DATABASE_POLL_INTERVAL_S)
            #
            detected_databases = [
                a_db_info
                for a_db_info in await self.async_list_databases(
                    max_time_ms=timeout_manager.remaining_timeout_ms(),
                )
                if a_db_info.id == id
            ]
            if detected_databases:
                last_status_seen = detected_databases[0].status
                _db_name = detected_databases[0].info.name
            else:
                last_status_seen = None
        if last_status_seen is not None:
            _name_desc = f" ({_db_name})" if _db_name else ""
            raise DevOpsAPIException(
                f"Database {id}{_name_desc} entered unexpected status "
                f"{last_status_seen} after PENDING"
            )
    logger.info(f"finished dropping database '{id}' (DevOps API), async")
    return {"ok": 1}
async def async_list_databases(self, *, include: str | None = None, provider: str | None = None, page_size: int | None = None, max_time_ms: int | None = None) ‑> CommandCursor[AdminDatabaseInfo]

Get the list of databases, as obtained with a request to the DevOps API. Async version of the method, for use in an asyncio context.

Args

include
a filter on what databases are to be returned. As per DevOps API, defaults to "nonterminated". Pass "all" to include the already terminated databases.
provider
a filter on the cloud provider for the databases. As per DevOps API, defaults to "ALL". Pass e.g. "AWS" to restrict the results.
page_size
number of results per page from the DevOps API. Optional.
max_time_ms
a timeout, in milliseconds, for the API request.

Returns

A CommandCursor to iterate over the detected databases, represented as AdminDatabaseInfo objects. Note that the return type is not an awaitable, rather a regular iterable, e.g. for use in ordinary "for" loops.

Example

>>> async def check_if_db_exists(db_id: str) -> bool:
...     db_cursor = await my_astra_db_admin.async_list_databases()
...     db_list = list(dd_cursor)
...     return db_id in db_list
...
>>> asyncio.run(check_if_db_exists("xyz"))
True
>>> asyncio.run(check_if_db_exists("01234567-..."))
False
Expand source code
async def async_list_databases(
    self,
    *,
    include: str | None = None,
    provider: str | None = None,
    page_size: int | None = None,
    max_time_ms: int | None = None,
) -> CommandCursor[AdminDatabaseInfo]:
    """
    Get the list of databases, as obtained with a request to the DevOps API.
    Async version of the method, for use in an asyncio context.

    Args:
        include: a filter on what databases are to be returned. As per
            DevOps API, defaults to "nonterminated". Pass "all" to include
            the already terminated databases.
        provider: a filter on the cloud provider for the databases.
            As per DevOps API, defaults to "ALL". Pass e.g. "AWS" to
            restrict the results.
        page_size: number of results per page from the DevOps API. Optional.
        max_time_ms: a timeout, in milliseconds, for the API request.

    Returns:
        A CommandCursor to iterate over the detected databases,
        represented as AdminDatabaseInfo objects.
        Note that the return type is not an awaitable, rather
        a regular iterable, e.g. for use in ordinary "for" loops.

    Example:
        >>> async def check_if_db_exists(db_id: str) -> bool:
        ...     db_cursor = await my_astra_db_admin.async_list_databases()
        ...     db_list = list(dd_cursor)
        ...     return db_id in db_list
        ...
        >>> asyncio.run(check_if_db_exists("xyz"))
        True
        >>> asyncio.run(check_if_db_exists("01234567-..."))
        False
    """

    logger.info("getting databases (DevOps API), async")
    request_params_0 = {
        k: v
        for k, v in {
            "include": include,
            "provider": provider,
            "limit": page_size or DEV_OPS_DEFAULT_DATABASES_PAGE_SIZE,
        }.items()
        if v is not None
    }
    responses: list[dict[str, Any]] = []
    logger.info("request 0, getting databases (DevOps API), async")
    response_0 = await self._dev_ops_api_commander.async_request(
        http_method=HttpMethod.GET,
        request_params=request_params_0,
        timeout_info=base_timeout_info(max_time_ms),
    )
    if not isinstance(response_0, list):
        raise DevOpsAPIException(
            "Faulty response from get-databases DevOps API command.",
        )
    logger.info("finished request 0, getting databases (DevOps API), async")
    responses += [response_0]
    while len(responses[-1]) >= request_params_0["limit"]:
        if "id" not in responses[-1][-1]:
            raise DevOpsAPIException(
                "Faulty response from get-databases DevOps API command.",
            )
        last_received_db_id = responses[-1][-1]["id"]
        request_params_n = {
            **request_params_0,
            **{"starting_after": last_received_db_id},
        }
        logger.info(
            "request %s, getting databases (DevOps API)",
            len(responses),
        )
        response_n = await self._dev_ops_api_commander.async_request(
            http_method=HttpMethod.GET,
            request_params=request_params_n,
            timeout_info=base_timeout_info(max_time_ms),
        )
        logger.info(
            "finished request %s, getting databases (DevOps API), async",
            len(responses),
        )
        if not isinstance(response_n, list):
            raise DevOpsAPIException(
                "Faulty response from get-databases DevOps API command.",
            )
        responses += [response_n]

    logger.info("finished getting databases (DevOps API), async")
    return CommandCursor(
        address=self._dev_ops_api_commander.full_path,
        items=[
            _recast_as_admin_database_info(
                db_dict,
                environment=self.environment,
            )
            for response in responses
            for db_dict in response
        ],
    )
def create_database(self, name: str, *, cloud_provider: str, region: str, keyspace: str | None = None, namespace: str | None = None, wait_until_active: bool = True, max_time_ms: int | None = None) ‑> AstraDBDatabaseAdmin

Create a database as requested, optionally waiting for it to be ready.

Args

name
the desired name for the database.
cloud_provider
one of 'aws', 'gcp' or 'azure'.
region
any of the available cloud regions.
keyspace
name for the one keyspace the database starts with. If omitted, DevOps API will use its default.
namespace
an alias for keyspace. DEPRECATED, removal in 2.0.
wait_until_active
if True (default), the method returns only after the newly-created database is in ACTIVE state (a few minutes, usually). If False, it will return right after issuing the creation request to the DevOps API, and it will be responsibility of the caller to check the database status before working with it.
max_time_ms
a timeout, in milliseconds, for the whole requested operation to complete. Note that a timeout is no guarantee that the creation request has not reached the API server.

Returns

An AstraDBDatabaseAdmin instance.

Example

>>> my_new_db_admin = my_astra_db_admin.create_database(
...     "new_database",
...     cloud_provider="aws",
...     region="ap-south-1",
... )
>>> my_new_db = my_new_db_admin.get_database()
>>> my_coll = my_new_db.create_collection("movies", dimension=2)
>>> my_coll.insert_one({"title": "The Title", "$vector": [0.1, 0.2]})
Expand source code
def create_database(
    self,
    name: str,
    *,
    cloud_provider: str,
    region: str,
    keyspace: str | None = None,
    namespace: str | None = None,
    wait_until_active: bool = True,
    max_time_ms: int | None = None,
) -> AstraDBDatabaseAdmin:
    """
    Create a database as requested, optionally waiting for it to be ready.

    Args:
        name: the desired name for the database.
        cloud_provider: one of 'aws', 'gcp' or 'azure'.
        region: any of the available cloud regions.
        keyspace: name for the one keyspace the database starts with.
            If omitted, DevOps API will use its default.
        namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
        wait_until_active: if True (default), the method returns only after
            the newly-created database is in ACTIVE state (a few minutes,
            usually). If False, it will return right after issuing the
            creation request to the DevOps API, and it will be responsibility
            of the caller to check the database status before working with it.
        max_time_ms: a timeout, in milliseconds, for the whole requested
            operation to complete.
            Note that a timeout is no guarantee that the creation request
            has not reached the API server.

    Returns:
        An AstraDBDatabaseAdmin instance.

    Example:
        >>> my_new_db_admin = my_astra_db_admin.create_database(
        ...     "new_database",
        ...     cloud_provider="aws",
        ...     region="ap-south-1",
        ... )
        >>> my_new_db = my_new_db_admin.get_database()
        >>> my_coll = my_new_db.create_collection("movies", dimension=2)
        >>> my_coll.insert_one({"title": "The Title", "$vector": [0.1, 0.2]})
    """

    keyspace_param = check_namespace_keyspace(
        keyspace=keyspace,
        namespace=namespace,
    )

    cd_payload = {
        k: v
        for k, v in {
            "name": name,
            "tier": "serverless",
            "cloudProvider": cloud_provider,
            "region": region,
            "capacityUnits": 1,
            "dbType": "vector",
            "keyspace": keyspace_param,
        }.items()
        if v is not None
    }
    timeout_manager = MultiCallTimeoutManager(
        overall_max_time_ms=max_time_ms, dev_ops_api=True
    )
    logger.info(
        f"creating database {name}/({cloud_provider}, {region}) (DevOps API)"
    )
    cd_raw_response = self._dev_ops_api_commander.raw_request(
        http_method=HttpMethod.POST,
        payload=cd_payload,
        timeout_info=timeout_manager.remaining_timeout_info(),
    )
    if cd_raw_response.status_code != DEV_OPS_RESPONSE_HTTP_CREATED:
        raise DevOpsAPIException(
            f"DB creation ('{name}') failed: API returned HTTP "
            f"{cd_raw_response.status_code} instead of "
            f"{DEV_OPS_RESPONSE_HTTP_CREATED} - Created."
        )
    new_database_id = cd_raw_response.headers["Location"]
    logger.info(
        "DevOps API returned from creating database "
        f"{name}/({cloud_provider}, {region})"
    )
    if wait_until_active:
        last_status_seen = DEV_OPS_DATABASE_STATUS_PENDING
        while last_status_seen in {
            DEV_OPS_DATABASE_STATUS_PENDING,
            DEV_OPS_DATABASE_STATUS_INITIALIZING,
        }:
            logger.info(f"sleeping to poll for status of '{new_database_id}'")
            time.sleep(DEV_OPS_DATABASE_POLL_INTERVAL_S)
            last_db_info = self.database_info(
                id=new_database_id,
                max_time_ms=timeout_manager.remaining_timeout_ms(),
            )
            last_status_seen = last_db_info.status
        if last_status_seen != DEV_OPS_DATABASE_STATUS_ACTIVE:
            raise DevOpsAPIException(
                f"Database {name} entered unexpected status {last_status_seen} after PENDING"
            )
    # return the database instance
    logger.info(
        f"finished creating database '{new_database_id}' = "
        f"{name}/({cloud_provider}, {region}) (DevOps API)"
    )
    return AstraDBDatabaseAdmin.from_astra_db_admin(
        api_endpoint=build_api_endpoint(
            environment=self.environment,
            database_id=new_database_id,
            region=region,
        ),
        astra_db_admin=self,
    )
def database_info(self, id: str, *, max_time_ms: int | None = None) ‑> AdminDatabaseInfo

Get the full information on a given database, through a request to the DevOps API.

Args

id
the ID of the target database, e. g. "01234567-89ab-cdef-0123-456789abcdef".
max_time_ms
a timeout, in milliseconds, for the API request.

Returns

An AdminDatabaseInfo object.

Example

>>> details_of_my_db = my_astra_db_admin.database_info("01234567-...")
>>> details_of_my_db.id
'01234567-...'
>>> details_of_my_db.status
'ACTIVE'
>>> details_of_my_db.info.region
'eu-west-1'
Expand source code
def database_info(
    self, id: str, *, max_time_ms: int | None = None
) -> AdminDatabaseInfo:
    """
    Get the full information on a given database, through a request to the DevOps API.

    Args:
        id: the ID of the target database, e. g.
            "01234567-89ab-cdef-0123-456789abcdef".
        max_time_ms: a timeout, in milliseconds, for the API request.

    Returns:
        An AdminDatabaseInfo object.

    Example:
        >>> details_of_my_db = my_astra_db_admin.database_info("01234567-...")
        >>> details_of_my_db.id
        '01234567-...'
        >>> details_of_my_db.status
        'ACTIVE'
        >>> details_of_my_db.info.region
        'eu-west-1'
    """

    logger.info(f"getting database info for '{id}' (DevOps API)")
    gd_response = self._dev_ops_api_commander.request(
        http_method=HttpMethod.GET,
        additional_path=id,
        timeout_info=base_timeout_info(max_time_ms),
    )
    logger.info(f"finished getting database info for '{id}' (DevOps API)")
    return _recast_as_admin_database_info(
        gd_response,
        environment=self.environment,
    )
def drop_database(self, id: str, *, wait_until_active: bool = True, max_time_ms: int | None = None) ‑> dict[str, typing.Any]

Drop a database, i.e. delete it completely and permanently with all its data.

Args

id
The ID of the database to drop, e. g. "01234567-89ab-cdef-0123-456789abcdef".
wait_until_active
if True (default), the method returns only after the database has actually been deleted (generally a few minutes). If False, it will return right after issuing the drop request to the DevOps API, and it will be responsibility of the caller to check the database status/availability after that, if desired.
max_time_ms
a timeout, in milliseconds, for the whole requested operation to complete. Note that a timeout is no guarantee that the deletion request has not reached the API server.

Returns

A dictionary of the form {"ok": 1} in case of success. Otherwise, an exception is raised.

Example

>>> database_list_pre = my_astra_db_admin.list_databases()
>>> len(database_list_pre)
3
>>> my_astra_db_admin.drop_database("01234567-...")
{'ok': 1}
>>> database_list_post = my_astra_db_admin.list_databases()
>>> len(database_list_post)
2
Expand source code
def drop_database(
    self,
    id: str,
    *,
    wait_until_active: bool = True,
    max_time_ms: int | None = None,
) -> dict[str, Any]:
    """
    Drop a database, i.e. delete it completely and permanently with all its data.

    Args:
        id: The ID of the database to drop, e. g.
            "01234567-89ab-cdef-0123-456789abcdef".
        wait_until_active: if True (default), the method returns only after
            the database has actually been deleted (generally a few minutes).
            If False, it will return right after issuing the
            drop request to the DevOps API, and it will be responsibility
            of the caller to check the database status/availability
            after that, if desired.
        max_time_ms: a timeout, in milliseconds, for the whole requested
            operation to complete.
            Note that a timeout is no guarantee that the deletion request
            has not reached the API server.

    Returns:
        A dictionary of the form {"ok": 1} in case of success.
        Otherwise, an exception is raised.

    Example:
        >>> database_list_pre = my_astra_db_admin.list_databases()
        >>> len(database_list_pre)
        3
        >>> my_astra_db_admin.drop_database("01234567-...")
        {'ok': 1}
        >>> database_list_post = my_astra_db_admin.list_databases()
        >>> len(database_list_post)
        2
    """

    timeout_manager = MultiCallTimeoutManager(
        overall_max_time_ms=max_time_ms, dev_ops_api=True
    )
    logger.info(f"dropping database '{id}' (DevOps API)")
    te_raw_response = self._dev_ops_api_commander.raw_request(
        http_method=HttpMethod.POST,
        additional_path=f"{id}/terminate",
        timeout_info=timeout_manager.remaining_timeout_info(),
    )
    if te_raw_response.status_code != DEV_OPS_RESPONSE_HTTP_ACCEPTED:
        raise DevOpsAPIException(
            f"DB deletion ('{id}') failed: API returned HTTP "
            f"{te_raw_response.status_code} instead of "
            f"{DEV_OPS_RESPONSE_HTTP_ACCEPTED} - Created"
        )
    logger.info(f"DevOps API returned from dropping database '{id}'")
    if wait_until_active:
        last_status_seen: str | None = DEV_OPS_DATABASE_STATUS_TERMINATING
        _db_name: str | None = None
        while last_status_seen == DEV_OPS_DATABASE_STATUS_TERMINATING:
            logger.info(f"sleeping to poll for status of '{id}'")
            time.sleep(DEV_OPS_DATABASE_POLL_INTERVAL_S)
            #
            detected_databases = [
                a_db_info
                for a_db_info in self.list_databases(
                    max_time_ms=timeout_manager.remaining_timeout_ms(),
                )
                if a_db_info.id == id
            ]
            if detected_databases:
                last_status_seen = detected_databases[0].status
                _db_name = detected_databases[0].info.name
            else:
                last_status_seen = None
        if last_status_seen is not None:
            _name_desc = f" ({_db_name})" if _db_name else ""
            raise DevOpsAPIException(
                f"Database {id}{_name_desc} entered unexpected status "
                f"{last_status_seen} after PENDING"
            )
    logger.info(f"finished dropping database '{id}' (DevOps API)")
    return {"ok": 1}
def get_async_database(self, api_endpoint_or_id: str | None = None, *, api_endpoint: str | None = None, token: str | TokenProvider | None = None, keyspace: str | None = None, namespace: str | None = None, id: str | None = None, region: str | None = None, api_path: str | None = None, api_version: str | None = None) ‑> AsyncDatabase

Create an AsyncDatabase instance for a specific database, to be used when doing data-level work (such as creating/managing collections).

Args

api_endpoint_or_id
positional parameter that can stand for both api_endpoint and id. Passing them together is an error.
api_endpoint
the API Endpoint for the target database (e.g. https://<ID>-<REGION>.apps.astra.datastax.com). The database must exist already for the resulting object to be effectively used; in other words, this invocation does not create the database, just the object instance.
token
if supplied, is passed to the Database instead of the one set for this object. This can be either a literal token string or a subclass of TokenProvider.
keyspace
used to specify a certain keyspace the resulting AsyncDatabase will primarily work on. If not specified, an additional DevOps API call reveals the default keyspace for the target database.
namespace
an alias for keyspace. DEPRECATED, removal in 2.0.
id
the target database ID. This is alternative to using the API Endpoint.
region
the region to use for connecting to the database. The database must be located in that region. This parameter can be used only if the database is specified by its ID (instead of API Endpoint). If this parameter is not passed, and cannot be inferred from the API endpoint, an additional DevOps API request is made to determine the default region and use it subsequently.
api_path
path to append to the API Endpoint. In typical usage, this should be left to its default of "/api/json".
api_version
version specifier to append to the API path. In typical usage, this should be left to its default of "v1".
max_time_ms
a timeout, in milliseconds, for the DevOps API HTTP request should it be necessary (see the region argument).

Returns

An AsyncDatabase object ready to be used.

Example

>>> async def create_use_collection(
...     admin: AstraDBAdmin,
...     api_endpoint: str,
...     keyspace: str,
... ) -> None:
...     my_async_db = admin.get_async_database(
...         api_endpoint,
...         keyspace=keyspace,
...     )
...     a_coll = await my_async_db.create_collection("movies", dimension=2)
...     await a_coll.insert_one(
...         {"title": "The Title", "$vector": [0.3, 0.4]}
...     )
...
>>> asyncio.run(create_use_collection(
...     my_admin,
...     "https://<ID>-<REGION>.apps.astra.datastax.com",
...     "default_keyspace",
... ))
>>>
Expand source code
def get_async_database(
    self,
    api_endpoint_or_id: str | None = None,
    *,
    api_endpoint: str | None = None,
    token: str | TokenProvider | None = None,
    keyspace: str | None = None,
    namespace: str | None = None,
    id: str | None = None,
    region: str | None = None,
    api_path: str | None = None,
    api_version: str | None = None,
) -> AsyncDatabase:
    """
    Create an AsyncDatabase instance for a specific database, to be used
    when doing data-level work (such as creating/managing collections).

    Args:
        api_endpoint_or_id: positional parameter that can stand for both
            `api_endpoint` and `id`. Passing them together is an error.
        api_endpoint: the API Endpoint for the target database
            (e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`).
            The database must exist already for the resulting object
            to be effectively used; in other words, this invocation
            does not create the database, just the object instance.
        token: if supplied, is passed to the Database instead of
            the one set for this object.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        keyspace: used to specify a certain keyspace the resulting
            AsyncDatabase will primarily work on. If not specified, an additional
            DevOps API call reveals the default keyspace for the target database.
        namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
        id: the target database ID. This is alternative to using the API Endpoint.
        region: the region to use for connecting to the database. The
            database must be located in that region. This parameter can be used
            only if the database is specified by its ID (instead of API Endpoint).
            If this parameter is not passed, and cannot be inferred
            from the API endpoint, an additional DevOps API request is made
            to determine the default region and use it subsequently.
        api_path: path to append to the API Endpoint. In typical usage, this
            should be left to its default of "/api/json".
        api_version: version specifier to append to the API path. In typical
            usage, this should be left to its default of "v1".
        max_time_ms: a timeout, in milliseconds, for the DevOps API
            HTTP request should it be necessary (see the `region` argument).

    Returns:
        An AsyncDatabase object ready to be used.

    Example:
        >>> async def create_use_collection(
        ...     admin: AstraDBAdmin,
        ...     api_endpoint: str,
        ...     keyspace: str,
        ... ) -> None:
        ...     my_async_db = admin.get_async_database(
        ...         api_endpoint,
        ...         keyspace=keyspace,
        ...     )
        ...     a_coll = await my_async_db.create_collection("movies", dimension=2)
        ...     await a_coll.insert_one(
        ...         {"title": "The Title", "$vector": [0.3, 0.4]}
        ...     )
        ...
        >>> asyncio.run(create_use_collection(
        ...     my_admin,
        ...     "https://<ID>-<REGION>.apps.astra.datastax.com",
        ...     "default_keyspace",
        ... ))
        >>>
    """

    keyspace_param = check_namespace_keyspace(
        keyspace=keyspace,
        namespace=namespace,
    )

    return self.get_database(
        api_endpoint_or_id=api_endpoint_or_id,
        api_endpoint=api_endpoint,
        token=token,
        keyspace=keyspace_param,
        id=id,
        region=region,
        api_path=api_path,
        api_version=api_version,
    ).to_async()
def get_database(self, api_endpoint_or_id: str | None = None, *, api_endpoint: str | None = None, token: str | TokenProvider | None = None, keyspace: str | None = None, namespace: str | None = None, id: str | None = None, region: str | None = None, api_path: str | None = None, api_version: str | None = None, max_time_ms: int | None = None) ‑> Database

Create a Database instance for a specific database, to be used when doing data-level work (such as creating/managing collections).

Args

api_endpoint_or_id
positional parameter that can stand for both api_endpoint and id. Passing them together is an error.
api_endpoint
the API Endpoint for the target database (e.g. https://<ID>-<REGION>.apps.astra.datastax.com). The database must exist already for the resulting object to be effectively used; in other words, this invocation does not create the database, just the object instance.
token
if supplied, is passed to the Database instead of the one set for this object. This can be either a literal token string or a subclass of TokenProvider.
keyspace
used to specify a certain keyspace the resulting Database will primarily work on. If not specified, an additional DevOps API call reveals the default keyspace for the target database.
namespace
an alias for keyspace. DEPRECATED, removal in 2.0.
id
the target database ID. This is alternative to using the API Endpoint.
region
the region to use for connecting to the database. The database must be located in that region. This parameter can be used only if the database is specified by its ID (instead of API Endpoint). If this parameter is not passed, and cannot be inferred from the API endpoint, an additional DevOps API request is made to determine the default region and use it subsequently.
api_path
path to append to the API Endpoint. In typical usage, this should be left to its default of "/api/json".
api_version
version specifier to append to the API path. In typical usage, this should be left to its default of "v1".
max_time_ms
a timeout, in milliseconds, for the DevOps API HTTP request should it be necessary (see the region argument).

Returns

A Database object ready to be used.

Example

>>> my_db = my_astra_db_admin.get_database(
...     "https://<ID>-<REGION>.apps.astra.datastax.com",
...     keyspace="my_prod_keyspace",
... )
>>> coll = my_db.create_collection("movies", dimension=2)
>>> my_coll.insert_one({"title": "The Title", "$vector": [0.3, 0.4]})
Expand source code
def get_database(
    self,
    api_endpoint_or_id: str | None = None,
    *,
    api_endpoint: str | None = None,
    token: str | TokenProvider | None = None,
    keyspace: str | None = None,
    namespace: str | None = None,
    id: str | None = None,
    region: str | None = None,
    api_path: str | None = None,
    api_version: str | None = None,
    max_time_ms: int | None = None,
) -> Database:
    """
    Create a Database instance for a specific database, to be used
    when doing data-level work (such as creating/managing collections).

    Args:
        api_endpoint_or_id: positional parameter that can stand for both
            `api_endpoint` and `id`. Passing them together is an error.
        api_endpoint: the API Endpoint for the target database
            (e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`).
            The database must exist already for the resulting object
            to be effectively used; in other words, this invocation
            does not create the database, just the object instance.
        token: if supplied, is passed to the Database instead of
            the one set for this object.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        keyspace: used to specify a certain keyspace the resulting
            Database will primarily work on. If not specified, an additional
            DevOps API call reveals the default keyspace for the target database.
        namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
        id: the target database ID. This is alternative to using the API Endpoint.
        region: the region to use for connecting to the database. The
            database must be located in that region. This parameter can be used
            only if the database is specified by its ID (instead of API Endpoint).
            If this parameter is not passed, and cannot be inferred
            from the API endpoint, an additional DevOps API request is made
            to determine the default region and use it subsequently.
        api_path: path to append to the API Endpoint. In typical usage, this
            should be left to its default of "/api/json".
        api_version: version specifier to append to the API path. In typical
            usage, this should be left to its default of "v1".
        max_time_ms: a timeout, in milliseconds, for the DevOps API
            HTTP request should it be necessary (see the `region` argument).

    Returns:
        A Database object ready to be used.

    Example:
        >>> my_db = my_astra_db_admin.get_database(
        ...     "https://<ID>-<REGION>.apps.astra.datastax.com",
        ...     keyspace="my_prod_keyspace",
        ... )
        >>> coll = my_db.create_collection("movies", dimension=2)
        >>> my_coll.insert_one({"title": "The Title", "$vector": [0.3, 0.4]})
    """

    _api_endpoint_p, _id_p = check_id_endpoint_parg_kwargs(
        p_arg=api_endpoint_or_id, api_endpoint=api_endpoint, id=id
    )
    keyspace_param = check_namespace_keyspace(
        keyspace=keyspace,
        namespace=namespace,
    )

    # lazy importing here to avoid circular dependency
    from astrapy import Database

    _token = coerce_token_provider(token) or self.token_provider
    _keyspace: str | None
    # handle the "endpoint passed as id" case first:
    if _api_endpoint_p is not None:
        if region is not None:
            raise ValueError(
                "Parameter `region` not supported with an API endpoint."
            )
        if keyspace_param:
            _keyspace = keyspace_param
        else:
            parsed_api_endpoint = parse_api_endpoint(_api_endpoint_p)
            if parsed_api_endpoint is None:
                msg = api_endpoint_parsing_error_message(_api_endpoint_p)
                raise ValueError(msg)
            _keyspace = self.database_info(
                parsed_api_endpoint.database_id,
                max_time_ms=max_time_ms,
            ).info.keyspace
        return Database(
            api_endpoint=_api_endpoint_p,
            token=_token,
            keyspace=_keyspace,
            callers=self.callers,
            environment=self.environment,
            api_path=api_path,
            api_version=api_version,
        )
    else:
        # the case where an ID is passed:
        if _id_p is None:
            raise ValueError("Either `api_endpoint` or `id` must be supplied.")
        _region = normalize_region_for_id(
            database_id=_id_p,
            token_str=self.token_provider.get_token(),
            environment=self.environment,
            region_param=region,
            max_time_ms=max_time_ms,
        )
        if keyspace_param:
            _keyspace = keyspace_param
        else:
            _keyspace = self.database_info(
                _id_p, max_time_ms=max_time_ms
            ).info.keyspace
        return Database(
            api_endpoint=build_api_endpoint(
                environment=self.environment,
                database_id=_id_p,
                region=_region,
            ),
            token=_token,
            keyspace=_keyspace,
            callers=self.callers,
            environment=self.environment,
            api_path=api_path,
            api_version=api_version,
        )
def get_database_admin(self, api_endpoint_or_id: str | None = None, *, api_endpoint: str | None = None, id: str | None = None, region: str | None = None, max_time_ms: int | None = None) ‑> AstraDBDatabaseAdmin

Create an AstraDBDatabaseAdmin object for admin work within a certain database.

Args

api_endpoint_or_id
positional parameter that can stand for both api_endpoint and id. Passing them together is an error.
api_endpoint
the API Endpoint for the target database (e.g. https://<ID>-<REGION>.apps.astra.datastax.com). The database must exist already for the resulting object to be effectively used; in other words, this invocation does not create the database, just the object instance.
id
the target database ID. This is alternative to using the API Endpoint.
region
the region to use for connecting to the database. The database must be located in that region. This parameter can be used only if the database is specified by its ID (instead of API Endpoint). If this parameter is not passed, and cannot be inferred from the API endpoint, an additional DevOps API request is made to determine the default region and use it subsequently.
max_time_ms
a timeout, in milliseconds, for the DevOps API HTTP request should it be necessary (see the region argument).

Returns

An AstraDBDatabaseAdmin instance representing the requested database.

Example

>>> my_db_admin = my_astra_db_admin.get_database_admin("01234567-...")
>>> my_db_admin.list_keyspaces()
['default_keyspace']
>>> my_db_admin.create_keyspace("that_other_one")
{'ok': 1}
>>> my_db_admin.list_keyspaces()
['default_keyspace', 'that_other_one']

Note

This method does not perform any admin-level operation through the DevOps API. For actual creation of a database, see the create_database method.

Expand source code
def get_database_admin(
    self,
    api_endpoint_or_id: str | None = None,
    *,
    api_endpoint: str | None = None,
    id: str | None = None,
    region: str | None = None,
    max_time_ms: int | None = None,
) -> AstraDBDatabaseAdmin:
    """
    Create an AstraDBDatabaseAdmin object for admin work within a certain database.

    Args:
        api_endpoint_or_id: positional parameter that can stand for both
            `api_endpoint` and `id`. Passing them together is an error.
        api_endpoint: the API Endpoint for the target database
            (e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`).
            The database must exist already for the resulting object
            to be effectively used; in other words, this invocation
            does not create the database, just the object instance.
        id: the target database ID. This is alternative to using the API Endpoint.
        region: the region to use for connecting to the database. The
            database must be located in that region. This parameter can be used
            only if the database is specified by its ID (instead of API Endpoint).
            If this parameter is not passed, and cannot be inferred
            from the API endpoint, an additional DevOps API request is made
            to determine the default region and use it subsequently.
        max_time_ms: a timeout, in milliseconds, for the DevOps API
            HTTP request should it be necessary (see the `region` argument).

    Returns:
        An AstraDBDatabaseAdmin instance representing the requested database.

    Example:
        >>> my_db_admin = my_astra_db_admin.get_database_admin("01234567-...")
        >>> my_db_admin.list_keyspaces()
        ['default_keyspace']
        >>> my_db_admin.create_keyspace("that_other_one")
        {'ok': 1}
        >>> my_db_admin.list_keyspaces()
        ['default_keyspace', 'that_other_one']

    Note:
        This method does not perform any admin-level operation through
        the DevOps API. For actual creation of a database, see the
        `create_database` method.
    """

    _api_endpoint_p, _id_p = check_id_endpoint_parg_kwargs(
        p_arg=api_endpoint_or_id, api_endpoint=api_endpoint, id=id
    )
    # handle the "endpoint passed as id" case first:
    if _api_endpoint_p is not None:
        if region is not None:
            raise ValueError(
                "Parameter `region` not supported with an API endpoint."
            )
        # in this case max_time_ms is ignored (no calls take place)
        return AstraDBDatabaseAdmin.from_astra_db_admin(
            api_endpoint=_api_endpoint_p,
            astra_db_admin=self,
            max_time_ms=max_time_ms,
        )
    else:
        if _id_p is None:
            raise ValueError("Either `api_endpoint` or `id` must be supplied.")

        _region = normalize_region_for_id(
            database_id=_id_p,
            token_str=self.token_provider.get_token(),
            environment=self.environment,
            region_param=region,
            max_time_ms=max_time_ms,
        )
        return AstraDBDatabaseAdmin.from_astra_db_admin(
            api_endpoint=build_api_endpoint(
                environment=self.environment,
                database_id=_id_p,
                region=_region,
            ),
            astra_db_admin=self,
            max_time_ms=max_time_ms,
        )
def list_databases(self, *, include: str | None = None, provider: str | None = None, page_size: int | None = None, max_time_ms: int | None = None) ‑> CommandCursor[AdminDatabaseInfo]

Get the list of databases, as obtained with a request to the DevOps API.

Args

include
a filter on what databases are to be returned. As per DevOps API, defaults to "nonterminated". Pass "all" to include the already terminated databases.
provider
a filter on the cloud provider for the databases. As per DevOps API, defaults to "ALL". Pass e.g. "AWS" to restrict the results.
page_size
number of results per page from the DevOps API. Optional.
max_time_ms
a timeout, in milliseconds, for the API request.

Returns

A CommandCursor to iterate over the detected databases, represented as AdminDatabaseInfo objects.

Example

>>> database_cursor = my_astra_db_admin.list_databases()
>>> database_list = list(database_cursor)
>>> len(database_list)
3
>>> database_list[2].id
'01234567-...'
>>> database_list[2].status
'ACTIVE'
>>> database_list[2].info.region
'eu-west-1'
Expand source code
def list_databases(
    self,
    *,
    include: str | None = None,
    provider: str | None = None,
    page_size: int | None = None,
    max_time_ms: int | None = None,
) -> CommandCursor[AdminDatabaseInfo]:
    """
    Get the list of databases, as obtained with a request to the DevOps API.

    Args:
        include: a filter on what databases are to be returned. As per
            DevOps API, defaults to "nonterminated". Pass "all" to include
            the already terminated databases.
        provider: a filter on the cloud provider for the databases.
            As per DevOps API, defaults to "ALL". Pass e.g. "AWS" to
            restrict the results.
        page_size: number of results per page from the DevOps API. Optional.
        max_time_ms: a timeout, in milliseconds, for the API request.

    Returns:
        A CommandCursor to iterate over the detected databases,
        represented as AdminDatabaseInfo objects.

    Example:
        >>> database_cursor = my_astra_db_admin.list_databases()
        >>> database_list = list(database_cursor)
        >>> len(database_list)
        3
        >>> database_list[2].id
        '01234567-...'
        >>> database_list[2].status
        'ACTIVE'
        >>> database_list[2].info.region
        'eu-west-1'
    """

    logger.info("getting databases (DevOps API)")
    request_params_0 = {
        k: v
        for k, v in {
            "include": include,
            "provider": provider,
            "limit": page_size or DEV_OPS_DEFAULT_DATABASES_PAGE_SIZE,
        }.items()
        if v is not None
    }
    responses: list[dict[str, Any]] = []
    logger.info("request 0, getting databases (DevOps API)")
    response_0 = self._dev_ops_api_commander.request(
        http_method=HttpMethod.GET,
        request_params=request_params_0,
        timeout_info=base_timeout_info(max_time_ms),
    )
    if not isinstance(response_0, list):
        raise DevOpsAPIException(
            "Faulty response from get-databases DevOps API command.",
        )
    logger.info("finished request 0, getting databases (DevOps API)")
    responses += [response_0]
    while len(responses[-1]) >= request_params_0["limit"]:
        if "id" not in responses[-1][-1]:
            raise DevOpsAPIException(
                "Faulty response from get-databases DevOps API command.",
            )
        last_received_db_id = responses[-1][-1]["id"]
        request_params_n = {
            **request_params_0,
            **{"starting_after": last_received_db_id},
        }
        logger.info(
            "request %s, getting databases (DevOps API)",
            len(responses),
        )
        response_n = self._dev_ops_api_commander.request(
            http_method=HttpMethod.GET,
            request_params=request_params_n,
            timeout_info=base_timeout_info(max_time_ms),
        )
        logger.info(
            "finished request %s, getting databases (DevOps API)",
            len(responses),
        )
        if not isinstance(response_n, list):
            raise DevOpsAPIException(
                "Faulty response from get-databases DevOps API command.",
            )
        responses += [response_n]

    logger.info("finished getting databases (DevOps API)")
    return CommandCursor(
        address=self._dev_ops_api_commander.full_path,
        items=[
            _recast_as_admin_database_info(
                db_dict,
                environment=self.environment,
            )
            for response in responses
            for db_dict in response
        ],
    )
def set_caller(self, caller_name: str | None = None, caller_version: str | None = None) ‑> None

Set a new identity for the application/framework on behalf of which the DevOps API calls will be performed (the "caller").

New objects spawned from this client afterwards will inherit the new settings.

Args

caller_name
name of the application, or framework, on behalf of which the DevOps API calls are performed. This ends up in the request user-agent.
caller_version
version of the caller.

Example

>>> my_astra_db_admin.set_caller(
...     callers=[("the_caller", "0.1.0")],
... )

Deprecated since version: 1.5.1

This will be removed in 2.0.0. Please provide the caller(s) at constructor time through the callers list parameter.

Expand source code
@deprecation.deprecated(  # type: ignore[misc]
    deprecated_in="1.5.1",
    removed_in="2.0.0",
    current_version=__version__,
    details=SET_CALLER_DEPRECATION_NOTICE,
)
def set_caller(
    self,
    caller_name: str | None = None,
    caller_version: str | None = None,
) -> None:
    """
    Set a new identity for the application/framework on behalf of which
    the DevOps API calls will be performed (the "caller").

    New objects spawned from this client afterwards will inherit the new settings.

    Args:
        caller_name: name of the application, or framework, on behalf of which
            the DevOps API calls are performed. This ends up in the request user-agent.
        caller_version: version of the caller.

    Example:
        >>> my_astra_db_admin.set_caller(
        ...     callers=[("the_caller", "0.1.0")],
        ... )
    """

    logger.info(f"setting caller to {caller_name}/{caller_version}")
    callers_param = check_caller_parameters([], caller_name, caller_version)
    self.callers = callers_param
    self._dev_ops_api_commander = self._get_dev_ops_api_commander()
def with_options(self, *, token: str | TokenProvider | None = None, callers: Sequence[CallerType] = [], caller_name: str | None = None, caller_version: str | None = None) ‑> AstraDBAdmin

Create a clone of this AstraDBAdmin with some changed attributes.

Args

token
an Access Token to the database. Example: "AstraCS:xyz...". This can be either a literal token string or a subclass of TokenProvider.
callers
a list of caller identities, i.e. applications, or frameworks, on behalf of which DevOps API calls are performed. These end up in the request user-agent. Each caller identity is a ("caller_name", "caller_version") pair.
caller_name
DEPRECATED, use callers. Removal 2.0. Name of the application, or framework, on behalf of which the DevOps API calls are performed. This ends up in the request user-agent.
caller_version
version of the caller. DEPRECATED, use callers. Removal 2.0.

Returns

a new AstraDBAdmin instance.

Example

>>> another_astra_db_admin = my_astra_db_admin.with_options(
...     callers=[("caller_identity", "1.2.0")],
... )
Expand source code
def with_options(
    self,
    *,
    token: str | TokenProvider | None = None,
    callers: Sequence[CallerType] = [],
    caller_name: str | None = None,
    caller_version: str | None = None,
) -> AstraDBAdmin:
    """
    Create a clone of this AstraDBAdmin with some changed attributes.

    Args:
        token: an Access Token to the database. Example: `"AstraCS:xyz..."`.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        callers: a list of caller identities, i.e. applications, or frameworks,
            on behalf of which DevOps API calls are performed. These end up in
            the request user-agent.
            Each caller identity is a ("caller_name", "caller_version") pair.
        caller_name: *DEPRECATED*, use `callers`. Removal 2.0. Name of the
            application, or framework, on behalf of which the DevOps API calls
            are performed. This ends up in the request user-agent.
        caller_version: version of the caller. *DEPRECATED*, use `callers`.
            Removal 2.0.

    Returns:
        a new AstraDBAdmin instance.

    Example:
        >>> another_astra_db_admin = my_astra_db_admin.with_options(
        ...     callers=[("caller_identity", "1.2.0")],
        ... )
    """

    callers_param = check_caller_parameters(callers, caller_name, caller_version)
    return self._copy(
        token=token,
        callers=callers_param,
    )
class AstraDBDatabaseAdmin (api_endpoint: str, *, token: str | TokenProvider | None = None, environment: str | None = None, callers: Sequence[CallerType] = [], caller_name: str | None = None, caller_version: str | None = None, dev_ops_url: str | None = None, dev_ops_api_version: str | None = None, api_path: str | None = None, api_version: str | None = None, spawner_database: Database | AsyncDatabase | None = None, max_time_ms: int | None = None)

An "admin" object, able to perform administrative tasks at the keyspaces level (i.e. within a certain database), such as creating/listing/dropping keyspaces.

This is one layer below the AstraDBAdmin concept, in that it is tied to a single database and enables admin work within it. As such, it is generally created by a method call on an AstraDBAdmin.

Args

api_endpoint
the API Endpoint for the target database (e.g. https://<ID>-<REGION>.apps.astra.datastax.com). The database must exist already for the resulting object to be effectively used; in other words, this invocation does not create the database, just the object instance.
token
an access token with enough permission to perform admin tasks. This can be either a literal token string or a subclass of TokenProvider.
environment
a label, whose value is one of Environment.PROD (default), Environment.DEV or Environment.TEST.
callers
a list of caller identities, i.e. applications, or frameworks, on behalf of which Data API and DevOps API calls are performed. These end up in the request user-agent. Each caller identity is a ("caller_name", "caller_version") pair.
caller_name
DEPRECATED, use callers. Removal 2.0. Name of the application, or framework, on behalf of which the Data API and DevOps API calls are performed. This ends up in the request user-agent.
caller_version
version of the caller. DEPRECATED, use callers. Removal 2.0.
dev_ops_url
in case of custom deployments, this can be used to specify the URL to the DevOps API, such as "https://api.astra.datastax.com". Generally it can be omitted. The environment (prod/dev/…) is determined from the API Endpoint.
dev_ops_api_version
this can specify a custom version of the DevOps API (such as "v2"). Generally not needed.
api_path
path to append to the API Endpoint. In typical usage, this class is created by a method such as Database.get_database_admin(), which passes the matching value. Generally to be left to its Astra DB default of "/api/json".
api_version
version specifier to append to the API path. In typical usage, this class is created by a method such as Database.get_database_admin(), which passes the matching value. Generally to be left to its Astra DB default of "/v1".
spawner_database
either a Database or an AsyncDatabase instance. This represents the database class which spawns this admin object, so that, if required, a keyspace creation can retroactively "use" the new keyspace in the spawner. Used to enable the Async/Database.get_admin_database().create_keyspace() pattern.
max_time_ms
a timeout, in milliseconds, for the DevOps API HTTP request should it be necessary (see the region argument).

Example

>>> from astrapy import DataAPIClient
>>> my_client = DataAPIClient("AstraCS:...")
>>> admin_for_my_db = my_client.get_admin().get_database_admin(
...     "https://<ID>-<REGION>.apps.astra.datastax.com"
... )
>>> admin_for_my_db.list_keyspaces()
['default_keyspace', 'staging_keyspace']
>>> admin_for_my_db.info().status
'ACTIVE'

Note

creating an instance of AstraDBDatabaseAdmin does not trigger actual creation of the database itself, which should exist beforehand. To create databases, see the AstraDBAdmin class.

Expand source code
class AstraDBDatabaseAdmin(DatabaseAdmin):
    """
    An "admin" object, able to perform administrative tasks at the keyspaces level
    (i.e. within a certain database), such as creating/listing/dropping keyspaces.

    This is one layer below the AstraDBAdmin concept, in that it is tied to
    a single database and enables admin work within it. As such, it is generally
    created by a method call on an AstraDBAdmin.

    Args:
        api_endpoint: the API Endpoint for the target database
            (e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`).
            The database must exist already for the resulting object
            to be effectively used; in other words, this invocation
            does not create the database, just the object instance.
        token: an access token with enough permission to perform admin tasks.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        environment: a label, whose value is one of Environment.PROD (default),
            Environment.DEV or Environment.TEST.
        callers: a list of caller identities, i.e. applications, or frameworks,
            on behalf of which Data API and DevOps API calls are performed.
            These end up in the request user-agent.
            Each caller identity is a ("caller_name", "caller_version") pair.
        caller_name: *DEPRECATED*, use `callers`. Removal 2.0. Name of the
            application, or framework, on behalf of which the Data API and
            DevOps API calls are performed. This ends up in the request user-agent.
        caller_version: version of the caller. *DEPRECATED*, use `callers`. Removal 2.0.
        dev_ops_url: in case of custom deployments, this can be used to specify
            the URL to the DevOps API, such as "https://api.astra.datastax.com".
            Generally it can be omitted. The environment (prod/dev/...) is
            determined from the API Endpoint.
        dev_ops_api_version: this can specify a custom version of the DevOps API
            (such as "v2"). Generally not needed.
        api_path: path to append to the API Endpoint. In typical usage, this
            class is created by a method such as `Database.get_database_admin()`,
            which passes the matching value. Generally to be left to its Astra DB
            default of "/api/json".
        api_version: version specifier to append to the API path. In typical
            usage, this class is created by a method such as
            `Database.get_database_admin()`, which passes the matching value.
            Generally to be left to its Astra DB default of "/v1".
        spawner_database: either a Database or an AsyncDatabase instance. This represents
            the database class which spawns this admin object, so that, if required,
            a keyspace creation can retroactively "use" the new keyspace in the spawner.
            Used to enable the Async/Database.get_admin_database().create_keyspace() pattern.
        max_time_ms: a timeout, in milliseconds, for the DevOps API
            HTTP request should it be necessary (see the `region` argument).

    Example:
        >>> from astrapy import DataAPIClient
        >>> my_client = DataAPIClient("AstraCS:...")
        >>> admin_for_my_db = my_client.get_admin().get_database_admin(
        ...     "https://<ID>-<REGION>.apps.astra.datastax.com"
        ... )
        >>> admin_for_my_db.list_keyspaces()
        ['default_keyspace', 'staging_keyspace']
        >>> admin_for_my_db.info().status
        'ACTIVE'

    Note:
        creating an instance of AstraDBDatabaseAdmin does not trigger actual creation
        of the database itself, which should exist beforehand. To create databases,
        see the AstraDBAdmin class.
    """

    def __init__(
        self,
        api_endpoint: str,
        *,
        token: str | TokenProvider | None = None,
        environment: str | None = None,
        callers: Sequence[CallerType] = [],
        caller_name: str | None = None,
        caller_version: str | None = None,
        dev_ops_url: str | None = None,
        dev_ops_api_version: str | None = None,
        api_path: str | None = None,
        api_version: str | None = None,
        spawner_database: Database | AsyncDatabase | None = None,
        max_time_ms: int | None = None,
    ) -> None:
        # lazy import here to avoid circular dependency
        from astrapy.database import Database

        callers_param = check_caller_parameters(callers, caller_name, caller_version)
        self.token_provider = coerce_token_provider(token)
        self.environment = (environment or Environment.PROD).lower()
        self.api_endpoint = api_endpoint
        parsed_api_endpoint = parse_api_endpoint(self.api_endpoint)
        if parsed_api_endpoint is None:
            msg = api_endpoint_parsing_error_message(self.api_endpoint)
            raise ValueError(msg)

        self._database_id = parsed_api_endpoint.database_id
        self._region = parsed_api_endpoint.region
        if parsed_api_endpoint.environment != self.environment:
            raise ValueError(
                "Environment mismatch between client and provided "
                "API endpoint. You can try adding "
                f'`environment="{parsed_api_endpoint.environment}"` '
                "to the class constructor."
            )
        self.callers = callers_param
        self.api_path = (
            api_path if api_path is not None else API_PATH_ENV_MAP[self.environment]
        )
        self.api_version = (
            api_version
            if api_version is not None
            else API_VERSION_ENV_MAP[self.environment]
        )
        if spawner_database is not None:
            self.spawner_database = spawner_database
        else:
            # leaving the keyspace to its per-environment default
            # (a task for the Database)
            self.spawner_database = Database(
                api_endpoint=self.api_endpoint,
                token=self.token_provider,
                keyspace=None,
                callers=self.callers,
                environment=self.environment,
                api_path=self.api_path,
                api_version=self.api_version,
            )

        # API-commander-specific init (for the vectorizeOps invocations)
        self._commander_headers = {
            DEFAULT_DATA_API_AUTH_HEADER: self.token_provider.get_token(),
        }
        self._api_commander = self._get_api_commander()

        # DevOps-API-commander specific init (keyspace CRUD, etc)
        self.dev_ops_url = (
            dev_ops_url
            if dev_ops_url is not None
            else DEV_OPS_URL_ENV_MAP[self.environment]
        ).rstrip("/")
        self.dev_ops_api_version = (
            dev_ops_api_version
            if dev_ops_api_version is not None
            else DEV_OPS_VERSION_ENV_MAP[self.environment]
        ).strip("/")
        self._dev_ops_commander_headers: dict[str, str | None]
        if self.token_provider:
            _token = self.token_provider.get_token()
            self._dev_ops_commander_headers = {
                DEFAULT_DEV_OPS_AUTH_HEADER: f"{DEFAULT_DEV_OPS_AUTH_PREFIX}{_token}",
            }
        else:
            self._dev_ops_commander_headers = {}
        self._dev_ops_api_commander = self._get_dev_ops_api_commander()

        # this class keeps a reference to the AstraDBAdmin associated to this org:
        self._astra_db_admin = AstraDBAdmin(
            token=self.token_provider,
            environment=self.environment,
            callers=self.callers,
            dev_ops_url=self.dev_ops_url,
            dev_ops_api_version=self.dev_ops_api_version,
        )

    def __repr__(self) -> str:
        ep_desc = f'api_endpoint="{self.api_endpoint}"'
        token_desc: str | None
        if self.token_provider:
            token_desc = f'token="{redact_secret(str(self.token_provider), 15)}"'
        else:
            token_desc = None
        env_desc: str | None
        if self.environment == Environment.PROD:
            env_desc = None
        else:
            env_desc = f'environment="{self.environment}"'
        parts = [pt for pt in [ep_desc, token_desc, env_desc] if pt is not None]
        return f"{self.__class__.__name__}({', '.join(parts)})"

    def __eq__(self, other: Any) -> bool:
        if isinstance(other, AstraDBDatabaseAdmin):
            return all(
                [
                    self.token_provider == other.token_provider,
                    self.environment == other.environment,
                    self.api_endpoint == other.api_endpoint,
                    self.callers == other.callers,
                    self.api_path == other.api_path,
                    self.api_version == other.api_version,
                    self.spawner_database == other.spawner_database,
                    self.dev_ops_url == other.dev_ops_url,
                    self.dev_ops_api_version == other.dev_ops_api_version,
                ]
            )
        else:
            return False

    def _get_api_commander(self) -> APICommander:
        """Instantiate a new APICommander for Data API calls."""
        base_path = "/".join(comp for comp in [self.api_path, self.api_version] if comp)
        api_commander = APICommander(
            api_endpoint=self.api_endpoint,
            path=base_path,
            headers=self._commander_headers,
            callers=self.callers,
        )
        return api_commander

    def _get_dev_ops_api_commander(self) -> APICommander:
        """Instantiate a new APICommander for DevOps calls."""

        dev_ops_base_path = "/".join(
            [
                self.dev_ops_api_version,
                "databases",
                self._database_id,
            ]
        )
        dev_ops_commander = APICommander(
            api_endpoint=self.dev_ops_url,
            path=dev_ops_base_path,
            headers=self._dev_ops_commander_headers,
            callers=self.callers,
            dev_ops_api=True,
        )
        return dev_ops_commander

    def _copy(
        self,
        api_endpoint: str | None = None,
        *,
        token: str | TokenProvider | None = None,
        environment: str | None = None,
        callers: Sequence[CallerType] = [],
        caller_name: str | None = None,
        caller_version: str | None = None,
        dev_ops_url: str | None = None,
        dev_ops_api_version: str | None = None,
        api_path: str | None = None,
        api_version: str | None = None,
    ) -> AstraDBDatabaseAdmin:
        callers_param = check_caller_parameters(callers, caller_name, caller_version)
        return AstraDBDatabaseAdmin(
            api_endpoint=api_endpoint or self.api_endpoint,
            token=coerce_token_provider(token) or self.token_provider,
            environment=environment or self.environment,
            callers=callers_param or self.callers,
            dev_ops_url=dev_ops_url or self.dev_ops_url,
            dev_ops_api_version=dev_ops_api_version or self.dev_ops_api_version,
            api_path=api_path or self.api_path,
            api_version=api_version or self.api_version,
        )

    def with_options(
        self,
        api_endpoint: str | None = None,
        *,
        token: str | TokenProvider | None = None,
        callers: Sequence[CallerType] = [],
        caller_name: str | None = None,
        caller_version: str | None = None,
    ) -> AstraDBDatabaseAdmin:
        """
        Create a clone of this AstraDBDatabaseAdmin with some changed attributes.

        Args:
            api_endpoint: the API Endpoint for the target database
                (e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`).
                The database must exist already for the resulting object
                to be effectively used; in other words, this invocation
                does not create the database, just the object instance.
            token: an Access Token to the database. Example: `"AstraCS:xyz..."`.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            callers: a list of caller identities, i.e. applications, or frameworks,
                on behalf of which Data API and DevOps API calls are performed.
                These end up in the request user-agent.
                Each caller identity is a ("caller_name", "caller_version") pair.
            caller_name: *DEPRECATED*, use `callers`. Removal 2.0. Name of the
                application, or framework, on behalf of which the Data API and
                DevOps API calls are performed. This ends up in the request user-agent.
            caller_version: version of the caller. *DEPRECATED*, use `callers`.
                Removal 2.0.

        Returns:
            a new AstraDBDatabaseAdmin instance.

        Example:
            >>> admin_for_my_other_db = admin_for_my_db.with_options(
            ...     "https://<ID>-<REGION>.apps.astra.datastax.com",
            ... )
        """

        callers_param = check_caller_parameters(callers, caller_name, caller_version)
        return self._copy(
            api_endpoint=api_endpoint,
            token=token,
            callers=callers_param,
        )

    @deprecation.deprecated(  # type: ignore[misc]
        deprecated_in="1.5.1",
        removed_in="2.0.0",
        current_version=__version__,
        details=SET_CALLER_DEPRECATION_NOTICE,
    )
    def set_caller(
        self,
        caller_name: str | None = None,
        caller_version: str | None = None,
    ) -> None:
        """
        Set a new identity for the application/framework on behalf of which
        the DevOps API calls will be performed (the "caller").

        New objects spawned from this client afterwards will inherit the new settings.

        Args:
            caller_name: name of the application, or framework, on behalf of which
                the DevOps API calls are performed. This ends up in the request user-agent.
            caller_version: version of the caller.

        Example:
            >>> admin_for_my_db.set_caller(
            ...     caller_name="the_caller",
            ...     caller_version="0.1.0",
            ... )
        """

        logger.info(f"setting caller to {caller_name}/{caller_version}")
        callers_param = check_caller_parameters([], caller_name, caller_version)
        self.callers = callers_param or self.callers
        self._api_commander = self._get_api_commander()
        self._dev_ops_api_commander = self._get_dev_ops_api_commander()

    @property
    def id(self) -> str:
        """
        The ID of this database admin.

        Example:
            >>> my_db_admin.id
            '01234567-89ab-cdef-0123-456789abcdef'
        """
        return self._database_id

    @property
    def region(self) -> str:
        """
        The region for this database admin.

        Example:
            >>> my_db_admin.region
            'us-east-1'
        """
        return self._region

    @staticmethod
    def from_astra_db_admin(
        api_endpoint: str,
        *,
        astra_db_admin: AstraDBAdmin,
        max_time_ms: int | None = None,
    ) -> AstraDBDatabaseAdmin:
        """
        Create an AstraDBDatabaseAdmin from an AstraDBAdmin and an API Endpoint.

        Args:
            api_endpoint: the API Endpoint for the target database
                (e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`).
                The database must exist already for the resulting object
                to be effectively used; in other words, this invocation
                does not create the database, just the object instance.
            astra_db_admin: an AstraDBAdmin object that has visibility over
                the target database.
            max_time_ms: a timeout, in milliseconds, for the DevOps API
                HTTP request should it be necessary (see the `region` argument).

        Returns:
            An AstraDBDatabaseAdmin object, for admin work within the database.

        Example:
            >>> from astrapy import DataAPIClient, AstraDBDatabaseAdmin
            >>> admin_for_my_db = AstraDBDatabaseAdmin.from_astra_db_admin(
            ...     "https://<ID>-<REGION>.apps.astra.datastax.com",
            ...     astra_db_admin=DataAPIClient("AstraCS:...").get_admin(),
            ... )
            >>> admin_for_my_db.list_keyspaces()
            ['default_keyspace', 'staging_keyspace']
            >>> admin_for_my_db.info().status
            'ACTIVE'

        Note:
            Creating an instance of AstraDBDatabaseAdmin does not trigger actual creation
            of the database itself, which should exist beforehand. To create databases,
            see the AstraDBAdmin class.
        """

        return AstraDBDatabaseAdmin(
            api_endpoint=api_endpoint,
            token=astra_db_admin.token_provider,
            environment=astra_db_admin.environment,
            callers=astra_db_admin.callers,
            dev_ops_url=astra_db_admin._dev_ops_url,
            dev_ops_api_version=astra_db_admin._dev_ops_api_version,
            max_time_ms=max_time_ms,
        )

    @staticmethod
    def from_api_endpoint(
        api_endpoint: str,
        *,
        token: str | TokenProvider | None = None,
        callers: Sequence[CallerType] = [],
        caller_name: str | None = None,
        caller_version: str | None = None,
        dev_ops_url: str | None = None,
        dev_ops_api_version: str | None = None,
    ) -> AstraDBDatabaseAdmin:
        """
        Create an AstraDBDatabaseAdmin from an API Endpoint and optionally a token.

        Args:
            api_endpoint: the API Endpoint for the target database
                (e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`).
                The database must exist already for the resulting object
                to be effectively used; in other words, this invocation
                does not create the database, just the object instance.
            token: an access token with enough permissions to do admin work.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            callers: a list of caller identities, i.e. applications, or frameworks,
                on behalf of which Data API and DevOps API calls are performed.
                These end up in the request user-agent.
                Each caller identity is a ("caller_name", "caller_version") pair.
            caller_name: *DEPRECATED*, use `callers`. Removal 2.0. Name of the
                application, or framework, on behalf of which the Data API and
                DevOps API calls are performed. This ends up in the request user-agent.
            caller_version: version of the caller. *DEPRECATED*, use `callers`.
                Removal 2.0.
            dev_ops_url: in case of custom deployments, this can be used to specify
                the URL to the DevOps API, such as "https://api.astra.datastax.com".
                Generally it can be omitted. The environment (prod/dev/...) is
                determined from the API Endpoint.
            dev_ops_api_version: this can specify a custom version of the DevOps API
                (such as "v2"). Generally not needed.

        Returns:
            An AstraDBDatabaseAdmin object, for admin work within the database.

        Example:
            >>> from astrapy import AstraDBDatabaseAdmin
            >>> admin_for_my_db = AstraDBDatabaseAdmin.from_api_endpoint(
            ...     api_endpoint="https://01234567-....apps.astra.datastax.com",
            ...     token="AstraCS:...",
            ... )
            >>> admin_for_my_db.list_keyspaces()
            ['default_keyspace', 'another_keyspace']
            >>> admin_for_my_db.info().status
            'ACTIVE'

        Note:
            Creating an instance of AstraDBDatabaseAdmin does not trigger actual creation
            of the database itself, which should exist beforehand. To create databases,
            see the AstraDBAdmin class.
        """

        callers_param = check_caller_parameters(callers, caller_name, caller_version)
        parsed_api_endpoint = parse_api_endpoint(api_endpoint)
        if parsed_api_endpoint:
            return AstraDBDatabaseAdmin(
                api_endpoint=api_endpoint,
                token=token,
                environment=parsed_api_endpoint.environment,
                callers=callers_param,
                dev_ops_url=dev_ops_url,
                dev_ops_api_version=dev_ops_api_version,
            )
        else:
            msg = api_endpoint_parsing_error_message(api_endpoint)
            raise ValueError(msg)

    def info(self, *, max_time_ms: int | None = None) -> AdminDatabaseInfo:
        """
        Query the DevOps API for the full info on this database.

        Args:
            max_time_ms: a timeout, in milliseconds, for the DevOps API request.

        Returns:
            An AdminDatabaseInfo object.

        Example:
            >>> my_db_info = admin_for_my_db.info()
            >>> my_db_info.status
            'ACTIVE'
            >>> my_db_info.info.region
            'us-east1'
        """

        logger.info(f"getting info ('{self._database_id}')")
        req_response = self._astra_db_admin.database_info(
            id=self._database_id,
            max_time_ms=max_time_ms,
        )
        logger.info(f"finished getting info ('{self._database_id}')")
        return req_response

    async def async_info(self, *, max_time_ms: int | None = None) -> AdminDatabaseInfo:
        """
        Query the DevOps API for the full info on this database.
        Async version of the method, for use in an asyncio context.

        Args:
            max_time_ms: a timeout, in milliseconds, for the DevOps API request.

        Returns:
            An AdminDatabaseInfo object.

        Example:
            >>> async def wait_until_active(db_admin: AstraDBDatabaseAdmin) -> None:
            ...     while True:
            ...         info = await db_admin.async_info()
            ...         if info.status == "ACTIVE":
            ...             return
            ...
            >>> asyncio.run(wait_until_active(admin_for_my_db))
        """

        logger.info(f"getting info ('{self._database_id}'), async")
        req_response = await self._astra_db_admin.async_database_info(
            id=self._database_id,
            max_time_ms=max_time_ms,
        )
        logger.info(f"finished getting info ('{self._database_id}'), async")
        return req_response

    @deprecation.deprecated(  # type: ignore[misc]
        deprecated_in="1.5.0",
        removed_in="2.0.0",
        current_version=__version__,
        details=NAMESPACE_DEPRECATION_NOTICE_METHOD,
    )
    def list_namespaces(self, *, max_time_ms: int | None = None) -> list[str]:
        """
        Query the DevOps API for a list of the namespaces in the database.

        *DEPRECATED* (removal in 2.0). Switch to the "list_keyspaces" method.**

        Args:
            max_time_ms: a timeout, in milliseconds, for the DevOps API request.

        Returns:
            A list of the namespaces, each a string, in no particular order.

        Example:
            >>> admin_for_my_db.list_namespaces()
            ['default_keyspace', 'staging_namespace']
        """

        return self.list_keyspaces(max_time_ms=max_time_ms)

    def list_keyspaces(self, *, max_time_ms: int | None = None) -> list[str]:
        """
        Query the DevOps API for a list of the keyspaces in the database.

        Args:
            max_time_ms: a timeout, in milliseconds, for the DevOps API request.

        Returns:
            A list of the keyspaces, each a string, in no particular order.

        Example:
            >>> admin_for_my_db.list_keyspaces()
            ['default_keyspace', 'staging_keyspace']
        """

        logger.info(f"getting keyspaces ('{self._database_id}')")
        info = self.info(max_time_ms=max_time_ms)
        logger.info(f"finished getting keyspaces ('{self._database_id}')")
        if info.raw_info is None:
            raise DevOpsAPIException("Could not get the keyspace list.")
        else:
            return info.raw_info.get("info", {}).get("keyspaces") or []

    @deprecation.deprecated(  # type: ignore[misc]
        deprecated_in="1.5.0",
        removed_in="2.0.0",
        current_version=__version__,
        details=NAMESPACE_DEPRECATION_NOTICE_METHOD,
    )
    async def async_list_namespaces(
        self, *, max_time_ms: int | None = None
    ) -> list[str]:
        """
        Query the DevOps API for a list of the namespaces in the database.
        Async version of the method, for use in an asyncio context.

        *DEPRECATED* (removal in 2.0). Switch to the "keyspace" property.**

        Args:
            max_time_ms: a timeout, in milliseconds, for the DevOps API request.

        Returns:
            A list of the namespaces, each a string, in no particular order.

        Example:
            >>> async def check_if_ns_exists(
            ...     db_admin: AstraDBDatabaseAdmin, namespace: str
            ... ) -> bool:
            ...     ns_list = await db_admin.async_list_namespaces()
            ...     return namespace in ns_list
            ...
            >>> asyncio.run(check_if_ns_exists(admin_for_my_db, "dragons"))
            False
            >>> asyncio.run(check_if_db_exists(admin_for_my_db, "app_namespace"))
            True
        """

        return await self.async_list_keyspaces(max_time_ms=max_time_ms)

    async def async_list_keyspaces(
        self, *, max_time_ms: int | None = None
    ) -> list[str]:
        """
        Query the DevOps API for a list of the keyspaces in the database.
        Async version of the method, for use in an asyncio context.

        Args:
            max_time_ms: a timeout, in milliseconds, for the DevOps API request.

        Returns:
            A list of the keyspaces, each a string, in no particular order.

        Example:
            >>> async def check_if_ks_exists(
            ...     db_admin: AstraDBDatabaseAdmin, keyspace: str
            ... ) -> bool:
            ...     ks_list = await db_admin.async_list_keyspaces()
            ...     return keyspace in ks_list
            ...
            >>> asyncio.run(check_if_ks_exists(admin_for_my_db, "dragons"))
            False
            >>> asyncio.run(check_if_db_exists(admin_for_my_db, "app_keyspace"))
            True
        """

        logger.info(f"getting keyspaces ('{self._database_id}'), async")
        info = await self.async_info(max_time_ms=max_time_ms)
        logger.info(f"finished getting keyspaces ('{self._database_id}'), async")
        if info.raw_info is None:
            raise DevOpsAPIException("Could not get the keyspace list.")
        else:
            return info.raw_info.get("info", {}).get("keyspaces") or []

    @deprecation.deprecated(  # type: ignore[misc]
        deprecated_in="1.5.0",
        removed_in="2.0.0",
        current_version=__version__,
        details=NAMESPACE_DEPRECATION_NOTICE_METHOD,
    )
    def create_namespace(
        self,
        name: str,
        *,
        wait_until_active: bool = True,
        update_db_keyspace: bool | None = None,
        update_db_namespace: bool | None = None,
        max_time_ms: int | None = None,
        **kwargs: Any,
    ) -> dict[str, Any]:
        """
        Create a namespace in this database as requested,
        optionally waiting for it to be ready.

        *DEPRECATED* (removal in 2.0). Switch to the "keyspace" property.**

        Args:
            name: the namespace name. If supplying a namespace that exists
                already, the method call proceeds as usual, no errors are
                raised, and the whole invocation is a no-op.
            wait_until_active: if True (default), the method returns only after
                the target database is in ACTIVE state again (a few
                seconds, usually). If False, it will return right after issuing the
                creation request to the DevOps API, and it will be responsibility
                of the caller to check the database status/namespace availability
                before working with it.
            update_db_keyspace: if True, the `Database` or `AsyncDatabase` class
                that spawned this DatabaseAdmin, if any, gets updated to work on
                the newly-created keyspace starting when this method returns.
            update_db_namespace: an alias for update_db_keyspace.
                *DEPRECATED* as of v1.5.0, removal in v2.0.0.
            max_time_ms: a timeout, in milliseconds, for the whole requested
                operation to complete.
                Note that a timeout is no guarantee that the creation request
                has not reached the API server.

        Returns:
            A dictionary of the form {"ok": 1} in case of success.
            Otherwise, an exception is raised.

        Example:
            >>> my_db_admin.list_namespaces()
            ['default_keyspace']
            >>> my_db_admin.create_namespace("that_other_one")
            {'ok': 1}
            >>> my_db_admin.list_namespaces()
            ['default_keyspace', 'that_other_one']
        """

        return self.create_keyspace(
            name=name,
            wait_until_active=wait_until_active,
            update_db_keyspace=update_db_keyspace,
            update_db_namespace=update_db_namespace,
            max_time_ms=max_time_ms,
            **kwargs,
        )

    def create_keyspace(
        self,
        name: str,
        *,
        wait_until_active: bool = True,
        update_db_keyspace: bool | None = None,
        update_db_namespace: bool | None = None,
        max_time_ms: int | None = None,
        **kwargs: Any,
    ) -> dict[str, Any]:
        """
        Create a keyspace in this database as requested,
        optionally waiting for it to be ready.

        Args:
            name: the keyspace name. If supplying a keyspace that exists
                already, the method call proceeds as usual, no errors are
                raised, and the whole invocation is a no-op.
            wait_until_active: if True (default), the method returns only after
                the target database is in ACTIVE state again (a few
                seconds, usually). If False, it will return right after issuing the
                creation request to the DevOps API, and it will be responsibility
                of the caller to check the database status/keyspace availability
                before working with it.
            update_db_keyspace: if True, the `Database` or `AsyncDatabase` class
                that spawned this DatabaseAdmin, if any, gets updated to work on
                the newly-created keyspace starting when this method returns.
            update_db_namespace: an alias for update_db_keyspace.
                *DEPRECATED* as of v1.5.0, removal in v2.0.0.
            max_time_ms: a timeout, in milliseconds, for the whole requested
                operation to complete.
                Note that a timeout is no guarantee that the creation request
                has not reached the API server.

        Returns:
            A dictionary of the form {"ok": 1} in case of success.
            Otherwise, an exception is raised.

        Example:
            >>> my_db_admin.keyspaces()
            ['default_keyspace']
            >>> my_db_admin.create_keyspace("that_other_one")
            {'ok': 1}
            >>> my_db_admin.list_keyspaces()
            ['default_keyspace', 'that_other_one']
        """

        _update_db_keyspace = check_update_db_namespace_keyspace(
            update_db_keyspace=update_db_keyspace,
            update_db_namespace=update_db_namespace,
        )

        timeout_manager = MultiCallTimeoutManager(
            overall_max_time_ms=max_time_ms, dev_ops_api=True
        )
        logger.info(
            f"creating keyspace '{name}' on " f"'{self._database_id}' (DevOps API)"
        )
        cn_raw_response = self._dev_ops_api_commander.raw_request(
            http_method=HttpMethod.POST,
            additional_path=f"keyspaces/{name}",
            timeout_info=timeout_manager.remaining_timeout_info(),
        )
        if cn_raw_response.status_code != DEV_OPS_RESPONSE_HTTP_CREATED:
            raise DevOpsAPIException(
                f"keyspace creation ('{name}') failed: API returned HTTP "
                f"{cn_raw_response.status_code} instead of "
                f"{DEV_OPS_RESPONSE_HTTP_CREATED} - Created."
            )
        logger.info(
            "DevOps API returned from creating keyspace "
            f"'{name}' on '{self._database_id}'"
        )
        if wait_until_active:
            last_status_seen = DEV_OPS_DATABASE_STATUS_MAINTENANCE
            while last_status_seen == DEV_OPS_DATABASE_STATUS_MAINTENANCE:
                logger.info(f"sleeping to poll for status of '{self._database_id}'")
                time.sleep(DEV_OPS_KEYSPACE_POLL_INTERVAL_S)
                last_status_seen = self.info(
                    max_time_ms=timeout_manager.remaining_timeout_ms(),
                ).status
            if last_status_seen != DEV_OPS_DATABASE_STATUS_ACTIVE:
                raise DevOpsAPIException(
                    f"Database entered unexpected status {last_status_seen} after MAINTENANCE."
                )
            # is the keyspace found?
            if name not in self.list_keyspaces():
                raise DevOpsAPIException("Could not create the keyspace.")
        logger.info(
            f"finished creating keyspace '{name}' on "
            f"'{self._database_id}' (DevOps API)"
        )
        if _update_db_keyspace:
            self.spawner_database.use_keyspace(name)
        return {"ok": 1}

    @deprecation.deprecated(  # type: ignore[misc]
        deprecated_in="1.5.0",
        removed_in="2.0.0",
        current_version=__version__,
        details=NAMESPACE_DEPRECATION_NOTICE_METHOD,
    )
    async def async_create_namespace(
        self,
        name: str,
        *,
        wait_until_active: bool = True,
        update_db_keyspace: bool | None = None,
        update_db_namespace: bool | None = None,
        max_time_ms: int | None = None,
        **kwargs: Any,
    ) -> dict[str, Any]:
        """
        Create a namespace in this database as requested,
        optionally waiting for it to be ready.
        Async version of the method, for use in an asyncio context.

        *DEPRECATED* (removal in 2.0). Switch to the "async_create_keyspace" method.**

        Args:
            name: the namespace name. If supplying a namespace that exists
                already, the method call proceeds as usual, no errors are
                raised, and the whole invocation is a no-op.
            wait_until_active: if True (default), the method returns only after
                the target database is in ACTIVE state again (a few
                seconds, usually). If False, it will return right after issuing the
                creation request to the DevOps API, and it will be responsibility
                of the caller to check the database status/namespace availability
                before working with it.
            update_db_keyspace: if True, the `Database` or `AsyncDatabase` class
                that spawned this DatabaseAdmin, if any, gets updated to work on
                the newly-created keyspace starting when this method returns.
            update_db_namespace: an alias for update_db_keyspace.
                *DEPRECATED* as of v1.5.0, removal in v2.0.0.
            max_time_ms: a timeout, in milliseconds, for the whole requested
                operation to complete.
                Note that a timeout is no guarantee that the creation request
                has not reached the API server.

        Returns:
            A dictionary of the form {"ok": 1} in case of success.
            Otherwise, an exception is raised.

        Example:
            >>> asyncio.run(
            ...     my_db_admin.async_create_namespace("app_namespace")
            ... )
            {'ok': 1}
        """

        return await self.async_create_keyspace(
            name=name,
            wait_until_active=wait_until_active,
            update_db_keyspace=update_db_keyspace,
            update_db_namespace=update_db_namespace,
            max_time_ms=max_time_ms,
            **kwargs,
        )

    async def async_create_keyspace(
        self,
        name: str,
        *,
        wait_until_active: bool = True,
        update_db_keyspace: bool | None = None,
        update_db_namespace: bool | None = None,
        max_time_ms: int | None = None,
        **kwargs: Any,
    ) -> dict[str, Any]:
        """
        Create a keyspace in this database as requested,
        optionally waiting for it to be ready.
        Async version of the method, for use in an asyncio context.

        Args:
            name: the keyspace name. If supplying a keyspace that exists
                already, the method call proceeds as usual, no errors are
                raised, and the whole invocation is a no-op.
            wait_until_active: if True (default), the method returns only after
                the target database is in ACTIVE state again (a few
                seconds, usually). If False, it will return right after issuing the
                creation request to the DevOps API, and it will be responsibility
                of the caller to check the database status/keyspace availability
                before working with it.
            update_db_keyspace: if True, the `Database` or `AsyncDatabase` class
                that spawned this DatabaseAdmin, if any, gets updated to work on
                the newly-created keyspace starting when this method returns.
            update_db_namespace: an alias for update_db_keyspace.
                *DEPRECATED* as of v1.5.0, removal in v2.0.0.
            max_time_ms: a timeout, in milliseconds, for the whole requested
                operation to complete.
                Note that a timeout is no guarantee that the creation request
                has not reached the API server.

        Returns:
            A dictionary of the form {"ok": 1} in case of success.
            Otherwise, an exception is raised.

        Example:
            >>> asyncio.run(
            ...     my_db_admin.async_create_keyspace("app_keyspace")
            ... )
            {'ok': 1}
        """

        _update_db_keyspace = check_update_db_namespace_keyspace(
            update_db_keyspace=update_db_keyspace,
            update_db_namespace=update_db_namespace,
        )

        timeout_manager = MultiCallTimeoutManager(
            overall_max_time_ms=max_time_ms, dev_ops_api=True
        )
        logger.info(
            f"creating keyspace '{name}' on "
            f"'{self._database_id}' (DevOps API), async"
        )
        cn_raw_response = await self._dev_ops_api_commander.async_raw_request(
            http_method=HttpMethod.POST,
            additional_path=f"keyspaces/{name}",
            timeout_info=timeout_manager.remaining_timeout_info(),
        )
        if cn_raw_response.status_code != DEV_OPS_RESPONSE_HTTP_CREATED:
            raise DevOpsAPIException(
                f"keyspace creation ('{name}') failed: API returned HTTP "
                f"{cn_raw_response.status_code} instead of "
                f"{DEV_OPS_RESPONSE_HTTP_CREATED} - Created."
            )
        logger.info(
            f"DevOps API returned from creating keyspace "
            f"'{name}' on '{self._database_id}', async"
        )
        if wait_until_active:
            last_status_seen = DEV_OPS_DATABASE_STATUS_MAINTENANCE
            while last_status_seen == DEV_OPS_DATABASE_STATUS_MAINTENANCE:
                logger.info(
                    f"sleeping to poll for status of '{self._database_id}', async"
                )
                await asyncio.sleep(DEV_OPS_KEYSPACE_POLL_INTERVAL_S)
                last_db_info = await self.async_info(
                    max_time_ms=timeout_manager.remaining_timeout_ms(),
                )
                last_status_seen = last_db_info.status
            if last_status_seen != DEV_OPS_DATABASE_STATUS_ACTIVE:
                raise DevOpsAPIException(
                    f"Database entered unexpected status {last_status_seen} after MAINTENANCE."
                )
            # is the keyspace found?
            if name not in await self.async_list_keyspaces():
                raise DevOpsAPIException("Could not create the keyspace.")
        logger.info(
            f"finished creating keyspace '{name}' on "
            f"'{self._database_id}' (DevOps API), async"
        )
        if _update_db_keyspace:
            self.spawner_database.use_keyspace(name)
        return {"ok": 1}

    @deprecation.deprecated(  # type: ignore[misc]
        deprecated_in="1.5.0",
        removed_in="2.0.0",
        current_version=__version__,
        details=NAMESPACE_DEPRECATION_NOTICE_METHOD,
    )
    def drop_namespace(
        self,
        name: str,
        *,
        wait_until_active: bool = True,
        max_time_ms: int | None = None,
    ) -> dict[str, Any]:
        """
        Delete a namespace from the database, optionally waiting for the database
        to become active again.

        *DEPRECATED* (removal in 2.0). Switch to the "drop_keyspace" method.**

        Args:
            name: the namespace to delete. If it does not exist in this database,
                an error is raised.
            wait_until_active: if True (default), the method returns only after
                the target database is in ACTIVE state again (a few
                seconds, usually). If False, it will return right after issuing the
                deletion request to the DevOps API, and it will be responsibility
                of the caller to check the database status/namespace availability
                before working with it.
            max_time_ms: a timeout, in milliseconds, for the whole requested
                operation to complete.
                Note that a timeout is no guarantee that the deletion request
                has not reached the API server.

        Returns:
            A dictionary of the form {"ok": 1} in case of success.
            Otherwise, an exception is raised.

        Example:
            >>> my_db_admin.list_namespaces()
            ['default_keyspace', 'that_other_one']
            >>> my_db_admin.drop_namespace("that_other_one")
            {'ok': 1}
            >>> my_db_admin.list_namespaces()
            ['default_keyspace']
        """

        return self.drop_keyspace(
            name=name,
            wait_until_active=wait_until_active,
            max_time_ms=max_time_ms,
        )

    def drop_keyspace(
        self,
        name: str,
        *,
        wait_until_active: bool = True,
        max_time_ms: int | None = None,
    ) -> dict[str, Any]:
        """
        Delete a keyspace from the database, optionally waiting for the database
        to become active again.

        Args:
            name: the keyspace to delete. If it does not exist in this database,
                an error is raised.
            wait_until_active: if True (default), the method returns only after
                the target database is in ACTIVE state again (a few
                seconds, usually). If False, it will return right after issuing the
                deletion request to the DevOps API, and it will be responsibility
                of the caller to check the database status/keyspace availability
                before working with it.
            max_time_ms: a timeout, in milliseconds, for the whole requested
                operation to complete.
                Note that a timeout is no guarantee that the deletion request
                has not reached the API server.

        Returns:
            A dictionary of the form {"ok": 1} in case of success.
            Otherwise, an exception is raised.

        Example:
            >>> my_db_admin.list_keyspaces()
            ['default_keyspace', 'that_other_one']
            >>> my_db_admin.drop_keyspace("that_other_one")
            {'ok': 1}
            >>> my_db_admin.list_keyspaces()
            ['default_keyspace']
        """

        timeout_manager = MultiCallTimeoutManager(
            overall_max_time_ms=max_time_ms, dev_ops_api=True
        )
        logger.info(
            f"dropping keyspace '{name}' on " f"'{self._database_id}' (DevOps API)"
        )
        dk_raw_response = self._dev_ops_api_commander.raw_request(
            http_method=HttpMethod.DELETE,
            additional_path=f"keyspaces/{name}",
            timeout_info=timeout_manager.remaining_timeout_info(),
        )
        if dk_raw_response.status_code != DEV_OPS_RESPONSE_HTTP_ACCEPTED:
            raise DevOpsAPIException(
                f"keyspace deletion ('{id}') failed: API returned HTTP "
                f"{dk_raw_response.status_code} instead of "
                f"{DEV_OPS_RESPONSE_HTTP_ACCEPTED} - Created"
            )
        logger.info(
            "DevOps API returned from dropping keyspace "
            f"'{name}' on '{self._database_id}'"
        )
        if wait_until_active:
            last_status_seen = DEV_OPS_DATABASE_STATUS_MAINTENANCE
            while last_status_seen == DEV_OPS_DATABASE_STATUS_MAINTENANCE:
                logger.info(f"sleeping to poll for status of '{self._database_id}'")
                time.sleep(DEV_OPS_KEYSPACE_POLL_INTERVAL_S)
                last_status_seen = self.info(
                    max_time_ms=timeout_manager.remaining_timeout_ms(),
                ).status
            if last_status_seen != DEV_OPS_DATABASE_STATUS_ACTIVE:
                raise DevOpsAPIException(
                    f"Database entered unexpected status {last_status_seen} after MAINTENANCE."
                )
            # is the keyspace found?
            if name in self.list_keyspaces():
                raise DevOpsAPIException("Could not drop the keyspace.")
        logger.info(
            f"finished dropping keyspace '{name}' on "
            f"'{self._database_id}' (DevOps API)"
        )
        return {"ok": 1}

    @deprecation.deprecated(  # type: ignore[misc]
        deprecated_in="1.5.0",
        removed_in="2.0.0",
        current_version=__version__,
        details=NAMESPACE_DEPRECATION_NOTICE_METHOD,
    )
    async def async_drop_namespace(
        self,
        name: str,
        *,
        wait_until_active: bool = True,
        max_time_ms: int | None = None,
    ) -> dict[str, Any]:
        """
        Delete a namespace from the database, optionally waiting for the database
        to become active again.
        Async version of the method, for use in an asyncio context.

        *DEPRECATED* (removal in 2.0). Switch to the "async_drop_namespace" method.**

        Args:
            name: the namespace to delete. If it does not exist in this database,
                an error is raised.
            wait_until_active: if True (default), the method returns only after
                the target database is in ACTIVE state again (a few
                seconds, usually). If False, it will return right after issuing the
                deletion request to the DevOps API, and it will be responsibility
                of the caller to check the database status/namespace availability
                before working with it.
            max_time_ms: a timeout, in milliseconds, for the whole requested
                operation to complete.
                Note that a timeout is no guarantee that the deletion request
                has not reached the API server.

        Returns:
            A dictionary of the form {"ok": 1} in case of success.
            Otherwise, an exception is raised.

        Example:
            >>> asyncio.run(
            ...     my_db_admin.async_drop_namespace("app_namespace")
            ... )
            {'ok': 1}
        """

        return await self.async_drop_keyspace(
            name=name,
            wait_until_active=wait_until_active,
            max_time_ms=max_time_ms,
        )

    async def async_drop_keyspace(
        self,
        name: str,
        *,
        wait_until_active: bool = True,
        max_time_ms: int | None = None,
    ) -> dict[str, Any]:
        """
        Delete a keyspace from the database, optionally waiting for the database
        to become active again.
        Async version of the method, for use in an asyncio context.

        Args:
            name: the keyspace to delete. If it does not exist in this database,
                an error is raised.
            wait_until_active: if True (default), the method returns only after
                the target database is in ACTIVE state again (a few
                seconds, usually). If False, it will return right after issuing the
                deletion request to the DevOps API, and it will be responsibility
                of the caller to check the database status/keyspace availability
                before working with it.
            max_time_ms: a timeout, in milliseconds, for the whole requested
                operation to complete.
                Note that a timeout is no guarantee that the deletion request
                has not reached the API server.

        Returns:
            A dictionary of the form {"ok": 1} in case of success.
            Otherwise, an exception is raised.

        Example:
            >>> asyncio.run(
            ...     my_db_admin.async_drop_keyspace("app_keyspace")
            ... )
            {'ok': 1}
        """

        timeout_manager = MultiCallTimeoutManager(
            overall_max_time_ms=max_time_ms, dev_ops_api=True
        )
        logger.info(
            f"dropping keyspace '{name}' on "
            f"'{self._database_id}' (DevOps API), async"
        )
        dk_raw_response = await self._dev_ops_api_commander.async_raw_request(
            http_method=HttpMethod.DELETE,
            additional_path=f"keyspaces/{name}",
            timeout_info=timeout_manager.remaining_timeout_info(),
        )
        if dk_raw_response.status_code != DEV_OPS_RESPONSE_HTTP_ACCEPTED:
            raise DevOpsAPIException(
                f"keyspace deletion ('{id}') failed: API returned HTTP "
                f"{dk_raw_response.status_code} instead of "
                f"{DEV_OPS_RESPONSE_HTTP_ACCEPTED} - Created"
            )
        logger.info(
            f"DevOps API returned from dropping keyspace "
            f"'{name}' on '{self._database_id}', async"
        )
        if wait_until_active:
            last_status_seen = DEV_OPS_DATABASE_STATUS_MAINTENANCE
            while last_status_seen == DEV_OPS_DATABASE_STATUS_MAINTENANCE:
                logger.info(
                    f"sleeping to poll for status of '{self._database_id}', async"
                )
                await asyncio.sleep(DEV_OPS_KEYSPACE_POLL_INTERVAL_S)
                last_db_info = await self.async_info(
                    max_time_ms=timeout_manager.remaining_timeout_ms(),
                )
                last_status_seen = last_db_info.status
            if last_status_seen != DEV_OPS_DATABASE_STATUS_ACTIVE:
                raise DevOpsAPIException(
                    f"Database entered unexpected status {last_status_seen} after MAINTENANCE."
                )
            # is the keyspace found?
            if name in await self.async_list_keyspaces():
                raise DevOpsAPIException("Could not drop the keyspace.")
        logger.info(
            f"finished dropping keyspace '{name}' on "
            f"'{self._database_id}' (DevOps API), async"
        )
        return {"ok": 1}

    def drop(
        self,
        *,
        wait_until_active: bool = True,
        max_time_ms: int | None = None,
    ) -> dict[str, Any]:
        """
        Drop this database, i.e. delete it completely and permanently with all its data.

        This method wraps the `drop_database` method of the AstraDBAdmin class,
        where more information may be found.

        Args:
            wait_until_active: if True (default), the method returns only after
                the database has actually been deleted (generally a few minutes).
                If False, it will return right after issuing the
                drop request to the DevOps API, and it will be responsibility
                of the caller to check the database status/availability
                after that, if desired.
            max_time_ms: a timeout, in milliseconds, for the whole requested
                operation to complete.
                Note that a timeout is no guarantee that the deletion request
                has not reached the API server.

        Returns:
            A dictionary of the form {"ok": 1} in case of success.
            Otherwise, an exception is raised.

        Example:
            >>> my_db_admin.list_keyspaces()
            ['default_keyspace', 'that_other_one']
            >>> my_db_admin.drop()
            {'ok': 1}
            >>> my_db_admin.list_keyspaces()  # raises a 404 Not Found http error

        Note:
            Once the method succeeds, methods on this object -- such as `info()`,
            or `list_keyspaces()` -- can still be invoked: however, this hardly
            makes sense as the underlying actual database is no more.
            It is responsibility of the developer to design a correct flow
            which avoids using a deceased database any further.
        """

        logger.info(f"dropping this database ('{self._database_id}')")
        return self._astra_db_admin.drop_database(
            id=self._database_id,
            wait_until_active=wait_until_active,
            max_time_ms=max_time_ms,
        )
        logger.info(f"finished dropping this database ('{self._database_id}')")

    async def async_drop(
        self,
        *,
        wait_until_active: bool = True,
        max_time_ms: int | None = None,
    ) -> dict[str, Any]:
        """
        Drop this database, i.e. delete it completely and permanently with all its data.
        Async version of the method, for use in an asyncio context.

        This method wraps the `drop_database` method of the AstraDBAdmin class,
        where more information may be found.

        Args:
            wait_until_active: if True (default), the method returns only after
                the database has actually been deleted (generally a few minutes).
                If False, it will return right after issuing the
                drop request to the DevOps API, and it will be responsibility
                of the caller to check the database status/availability
                after that, if desired.
            max_time_ms: a timeout, in milliseconds, for the whole requested
                operation to complete.
                Note that a timeout is no guarantee that the deletion request
                has not reached the API server.

        Returns:
            A dictionary of the form {"ok": 1} in case of success.
            Otherwise, an exception is raised.

        Example:
            >>> asyncio.run(my_db_admin.async_drop())
            {'ok': 1}

        Note:
            Once the method succeeds, methods on this object -- such as `info()`,
            or `list_keyspaces()` -- can still be invoked: however, this hardly
            makes sense as the underlying actual database is no more.
            It is responsibility of the developer to design a correct flow
            which avoids using a deceased database any further.
        """

        logger.info(f"dropping this database ('{self._database_id}'), async")
        return await self._astra_db_admin.async_drop_database(
            id=self._database_id,
            wait_until_active=wait_until_active,
            max_time_ms=max_time_ms,
        )
        logger.info(f"finished dropping this database ('{self._database_id}'), async")

    def get_database(
        self,
        *,
        token: str | TokenProvider | None = None,
        keyspace: str | None = None,
        namespace: str | None = None,
        region: str | None = None,
        api_path: str | None = None,
        api_version: str | None = None,
        max_time_ms: int | None = None,
    ) -> Database:
        """
        Create a Database instance from this database admin, for data-related tasks.

        Args:
            token: if supplied, is passed to the Database instead of
                the one set for this object. Useful if one wants to work in
                a least-privilege manner, limiting the permissions for non-admin work.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            keyspace: an optional keyspace to set in the resulting Database.
                The same default logic as for `AstraDBAdmin.get_database` applies.
            namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
            region: *This parameter is deprecated and should not be used.*
                Ignored in the method.
            api_path: path to append to the API Endpoint. In typical usage, this
                should be left to its default of "/api/json".
            api_version: version specifier to append to the API path. In typical
                usage, this should be left to its default of "v1".

        Returns:
            A Database object, ready to be used for working with data and collections.

        Example:
            >>> my_db = my_db_admin.get_database()
            >>> my_db.list_collection_names()
            ['movies', 'another_collection']

        Note:
            creating an instance of Database does not trigger actual creation
            of the database itself, which should exist beforehand. To create databases,
            see the AstraDBAdmin class.
        """

        keyspace_param = check_namespace_keyspace(
            keyspace=keyspace,
            namespace=namespace,
        )

        if region is not None:
            the_warning = deprecation.DeprecatedWarning(
                "The 'region' parameter is deprecated in this method and will be ignored.",
                deprecated_in="1.3.2",
                removed_in="2.0.0",
                details="The database class whose method is invoked already has a region set.",
            )
            warnings.warn(
                the_warning,
                stacklevel=2,
            )

        return self._astra_db_admin.get_database(
            api_endpoint=self.api_endpoint,
            token=token,
            keyspace=keyspace_param,
            api_path=api_path,
            api_version=api_version,
            max_time_ms=max_time_ms,
        )

    def get_async_database(
        self,
        *,
        token: str | TokenProvider | None = None,
        keyspace: str | None = None,
        namespace: str | None = None,
        region: str | None = None,
        api_path: str | None = None,
        api_version: str | None = None,
        max_time_ms: int | None = None,
    ) -> AsyncDatabase:
        """
        Create an AsyncDatabase instance out of this class for working
        with the data in it.

        This method has identical behavior and signature as the sync
        counterpart `get_database`: please see that one for more details.
        """

        keyspace_param = check_namespace_keyspace(
            keyspace=keyspace,
            namespace=namespace,
        )

        return self.get_database(
            token=token,
            keyspace=keyspace_param,
            region=region,
            api_path=api_path,
            api_version=api_version,
            max_time_ms=max_time_ms,
        ).to_async()

    def find_embedding_providers(
        self, *, max_time_ms: int | None = None
    ) -> FindEmbeddingProvidersResult:
        """
        Query the API for the full information on available embedding providers.

        Args:
            max_time_ms: a timeout, in milliseconds, for the DevOps API request.

        Returns:
            A `FindEmbeddingProvidersResult` object with the complete information
            returned by the API about available embedding providers

        Example (output abridged and indented for clarity):
            >>> admin_for_my_db.find_embedding_providers()
            FindEmbeddingProvidersResult(embedding_providers=..., openai, ...)
            >>> admin_for_my_db.find_embedding_providers().embedding_providers
            {
                'openai': EmbeddingProvider(
                    display_name='OpenAI',
                    models=[
                        EmbeddingProviderModel(name='text-embedding-3-small'),
                        ...
                    ]
                ),
                ...
            }
        """

        logger.info("findEmbeddingProviders")
        fe_response = self._api_commander.request(
            payload={"findEmbeddingProviders": {}},
            timeout_info=base_timeout_info(max_time_ms),
        )
        if "embeddingProviders" not in fe_response.get("status", {}):
            raise DataAPIFaultyResponseException(
                text="Faulty response from findEmbeddingProviders API command.",
                raw_response=fe_response,
            )
        else:
            logger.info("finished findEmbeddingProviders")
            return FindEmbeddingProvidersResult.from_dict(fe_response["status"])

    async def async_find_embedding_providers(
        self, *, max_time_ms: int | None = None
    ) -> FindEmbeddingProvidersResult:
        """
        Query the API for the full information on available embedding providers.
        Async version of the method, for use in an asyncio context.

        Args:
            max_time_ms: a timeout, in milliseconds, for the DevOps API request.

        Returns:
            A `FindEmbeddingProvidersResult` object with the complete information
            returned by the API about available embedding providers

        Example (output abridged and indented for clarity):
            >>> admin_for_my_db.find_embedding_providers()
            FindEmbeddingProvidersResult(embedding_providers=..., openai, ...)
            >>> admin_for_my_db.find_embedding_providers().embedding_providers
            {
                'openai': EmbeddingProvider(
                    display_name='OpenAI',
                    models=[
                        EmbeddingProviderModel(name='text-embedding-3-small'),
                        ...
                    ]
                ),
                ...
            }
        """

        logger.info("findEmbeddingProviders, async")
        fe_response = await self._api_commander.async_request(
            payload={"findEmbeddingProviders": {}},
            timeout_info=base_timeout_info(max_time_ms),
        )
        if "embeddingProviders" not in fe_response.get("status", {}):
            raise DataAPIFaultyResponseException(
                text="Faulty response from findEmbeddingProviders API command.",
                raw_response=fe_response,
            )
        else:
            logger.info("finished findEmbeddingProviders, async")
            return FindEmbeddingProvidersResult.from_dict(fe_response["status"])

Ancestors

Static methods

def from_api_endpoint(api_endpoint: str, *, token: str | TokenProvider | None = None, callers: Sequence[CallerType] = [], caller_name: str | None = None, caller_version: str | None = None, dev_ops_url: str | None = None, dev_ops_api_version: str | None = None) ‑> AstraDBDatabaseAdmin

Create an AstraDBDatabaseAdmin from an API Endpoint and optionally a token.

Args

api_endpoint
the API Endpoint for the target database (e.g. https://<ID>-<REGION>.apps.astra.datastax.com). The database must exist already for the resulting object to be effectively used; in other words, this invocation does not create the database, just the object instance.
token
an access token with enough permissions to do admin work. This can be either a literal token string or a subclass of TokenProvider.
callers
a list of caller identities, i.e. applications, or frameworks, on behalf of which Data API and DevOps API calls are performed. These end up in the request user-agent. Each caller identity is a ("caller_name", "caller_version") pair.
caller_name
DEPRECATED, use callers. Removal 2.0. Name of the application, or framework, on behalf of which the Data API and DevOps API calls are performed. This ends up in the request user-agent.
caller_version
version of the caller. DEPRECATED, use callers. Removal 2.0.
dev_ops_url
in case of custom deployments, this can be used to specify the URL to the DevOps API, such as "https://api.astra.datastax.com". Generally it can be omitted. The environment (prod/dev/…) is determined from the API Endpoint.
dev_ops_api_version
this can specify a custom version of the DevOps API (such as "v2"). Generally not needed.

Returns

An AstraDBDatabaseAdmin object, for admin work within the database.

Example

>>> from astrapy import AstraDBDatabaseAdmin
>>> admin_for_my_db = AstraDBDatabaseAdmin.from_api_endpoint(
...     api_endpoint="https://01234567-....apps.astra.datastax.com",
...     token="AstraCS:...",
... )
>>> admin_for_my_db.list_keyspaces()
['default_keyspace', 'another_keyspace']
>>> admin_for_my_db.info().status
'ACTIVE'

Note

Creating an instance of AstraDBDatabaseAdmin does not trigger actual creation of the database itself, which should exist beforehand. To create databases, see the AstraDBAdmin class.

Expand source code
@staticmethod
def from_api_endpoint(
    api_endpoint: str,
    *,
    token: str | TokenProvider | None = None,
    callers: Sequence[CallerType] = [],
    caller_name: str | None = None,
    caller_version: str | None = None,
    dev_ops_url: str | None = None,
    dev_ops_api_version: str | None = None,
) -> AstraDBDatabaseAdmin:
    """
    Create an AstraDBDatabaseAdmin from an API Endpoint and optionally a token.

    Args:
        api_endpoint: the API Endpoint for the target database
            (e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`).
            The database must exist already for the resulting object
            to be effectively used; in other words, this invocation
            does not create the database, just the object instance.
        token: an access token with enough permissions to do admin work.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        callers: a list of caller identities, i.e. applications, or frameworks,
            on behalf of which Data API and DevOps API calls are performed.
            These end up in the request user-agent.
            Each caller identity is a ("caller_name", "caller_version") pair.
        caller_name: *DEPRECATED*, use `callers`. Removal 2.0. Name of the
            application, or framework, on behalf of which the Data API and
            DevOps API calls are performed. This ends up in the request user-agent.
        caller_version: version of the caller. *DEPRECATED*, use `callers`.
            Removal 2.0.
        dev_ops_url: in case of custom deployments, this can be used to specify
            the URL to the DevOps API, such as "https://api.astra.datastax.com".
            Generally it can be omitted. The environment (prod/dev/...) is
            determined from the API Endpoint.
        dev_ops_api_version: this can specify a custom version of the DevOps API
            (such as "v2"). Generally not needed.

    Returns:
        An AstraDBDatabaseAdmin object, for admin work within the database.

    Example:
        >>> from astrapy import AstraDBDatabaseAdmin
        >>> admin_for_my_db = AstraDBDatabaseAdmin.from_api_endpoint(
        ...     api_endpoint="https://01234567-....apps.astra.datastax.com",
        ...     token="AstraCS:...",
        ... )
        >>> admin_for_my_db.list_keyspaces()
        ['default_keyspace', 'another_keyspace']
        >>> admin_for_my_db.info().status
        'ACTIVE'

    Note:
        Creating an instance of AstraDBDatabaseAdmin does not trigger actual creation
        of the database itself, which should exist beforehand. To create databases,
        see the AstraDBAdmin class.
    """

    callers_param = check_caller_parameters(callers, caller_name, caller_version)
    parsed_api_endpoint = parse_api_endpoint(api_endpoint)
    if parsed_api_endpoint:
        return AstraDBDatabaseAdmin(
            api_endpoint=api_endpoint,
            token=token,
            environment=parsed_api_endpoint.environment,
            callers=callers_param,
            dev_ops_url=dev_ops_url,
            dev_ops_api_version=dev_ops_api_version,
        )
    else:
        msg = api_endpoint_parsing_error_message(api_endpoint)
        raise ValueError(msg)
def from_astra_db_admin(api_endpoint: str, *, astra_db_admin: AstraDBAdmin, max_time_ms: int | None = None) ‑> AstraDBDatabaseAdmin

Create an AstraDBDatabaseAdmin from an AstraDBAdmin and an API Endpoint.

Args

api_endpoint
the API Endpoint for the target database (e.g. https://<ID>-<REGION>.apps.astra.datastax.com). The database must exist already for the resulting object to be effectively used; in other words, this invocation does not create the database, just the object instance.
astra_db_admin
an AstraDBAdmin object that has visibility over the target database.
max_time_ms
a timeout, in milliseconds, for the DevOps API HTTP request should it be necessary (see the region argument).

Returns

An AstraDBDatabaseAdmin object, for admin work within the database.

Example

>>> from astrapy import DataAPIClient, AstraDBDatabaseAdmin
>>> admin_for_my_db = AstraDBDatabaseAdmin.from_astra_db_admin(
...     "https://<ID>-<REGION>.apps.astra.datastax.com",
...     astra_db_admin=DataAPIClient("AstraCS:...").get_admin(),
... )
>>> admin_for_my_db.list_keyspaces()
['default_keyspace', 'staging_keyspace']
>>> admin_for_my_db.info().status
'ACTIVE'

Note

Creating an instance of AstraDBDatabaseAdmin does not trigger actual creation of the database itself, which should exist beforehand. To create databases, see the AstraDBAdmin class.

Expand source code
@staticmethod
def from_astra_db_admin(
    api_endpoint: str,
    *,
    astra_db_admin: AstraDBAdmin,
    max_time_ms: int | None = None,
) -> AstraDBDatabaseAdmin:
    """
    Create an AstraDBDatabaseAdmin from an AstraDBAdmin and an API Endpoint.

    Args:
        api_endpoint: the API Endpoint for the target database
            (e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`).
            The database must exist already for the resulting object
            to be effectively used; in other words, this invocation
            does not create the database, just the object instance.
        astra_db_admin: an AstraDBAdmin object that has visibility over
            the target database.
        max_time_ms: a timeout, in milliseconds, for the DevOps API
            HTTP request should it be necessary (see the `region` argument).

    Returns:
        An AstraDBDatabaseAdmin object, for admin work within the database.

    Example:
        >>> from astrapy import DataAPIClient, AstraDBDatabaseAdmin
        >>> admin_for_my_db = AstraDBDatabaseAdmin.from_astra_db_admin(
        ...     "https://<ID>-<REGION>.apps.astra.datastax.com",
        ...     astra_db_admin=DataAPIClient("AstraCS:...").get_admin(),
        ... )
        >>> admin_for_my_db.list_keyspaces()
        ['default_keyspace', 'staging_keyspace']
        >>> admin_for_my_db.info().status
        'ACTIVE'

    Note:
        Creating an instance of AstraDBDatabaseAdmin does not trigger actual creation
        of the database itself, which should exist beforehand. To create databases,
        see the AstraDBAdmin class.
    """

    return AstraDBDatabaseAdmin(
        api_endpoint=api_endpoint,
        token=astra_db_admin.token_provider,
        environment=astra_db_admin.environment,
        callers=astra_db_admin.callers,
        dev_ops_url=astra_db_admin._dev_ops_url,
        dev_ops_api_version=astra_db_admin._dev_ops_api_version,
        max_time_ms=max_time_ms,
    )

Instance variables

var id : str

The ID of this database admin.

Example

>>> my_db_admin.id
'01234567-89ab-cdef-0123-456789abcdef'
Expand source code
@property
def id(self) -> str:
    """
    The ID of this database admin.

    Example:
        >>> my_db_admin.id
        '01234567-89ab-cdef-0123-456789abcdef'
    """
    return self._database_id
var region : str

The region for this database admin.

Example

>>> my_db_admin.region
'us-east-1'
Expand source code
@property
def region(self) -> str:
    """
    The region for this database admin.

    Example:
        >>> my_db_admin.region
        'us-east-1'
    """
    return self._region

Methods

async def async_create_keyspace(self, name: str, *, wait_until_active: bool = True, update_db_keyspace: bool | None = None, update_db_namespace: bool | None = None, max_time_ms: int | None = None, **kwargs: Any) ‑> dict[str, typing.Any]

Create a keyspace in this database as requested, optionally waiting for it to be ready. Async version of the method, for use in an asyncio context.

Args

name
the keyspace name. If supplying a keyspace that exists already, the method call proceeds as usual, no errors are raised, and the whole invocation is a no-op.
wait_until_active
if True (default), the method returns only after the target database is in ACTIVE state again (a few seconds, usually). If False, it will return right after issuing the creation request to the DevOps API, and it will be responsibility of the caller to check the database status/keyspace availability before working with it.
update_db_keyspace
if True, the Database or AsyncDatabase class that spawned this DatabaseAdmin, if any, gets updated to work on the newly-created keyspace starting when this method returns.
update_db_namespace
an alias for update_db_keyspace. DEPRECATED as of v1.5.0, removal in v2.0.0.
max_time_ms
a timeout, in milliseconds, for the whole requested operation to complete. Note that a timeout is no guarantee that the creation request has not reached the API server.

Returns

A dictionary of the form {"ok": 1} in case of success. Otherwise, an exception is raised.

Example

>>> asyncio.run(
...     my_db_admin.async_create_keyspace("app_keyspace")
... )
{'ok': 1}
Expand source code
async def async_create_keyspace(
    self,
    name: str,
    *,
    wait_until_active: bool = True,
    update_db_keyspace: bool | None = None,
    update_db_namespace: bool | None = None,
    max_time_ms: int | None = None,
    **kwargs: Any,
) -> dict[str, Any]:
    """
    Create a keyspace in this database as requested,
    optionally waiting for it to be ready.
    Async version of the method, for use in an asyncio context.

    Args:
        name: the keyspace name. If supplying a keyspace that exists
            already, the method call proceeds as usual, no errors are
            raised, and the whole invocation is a no-op.
        wait_until_active: if True (default), the method returns only after
            the target database is in ACTIVE state again (a few
            seconds, usually). If False, it will return right after issuing the
            creation request to the DevOps API, and it will be responsibility
            of the caller to check the database status/keyspace availability
            before working with it.
        update_db_keyspace: if True, the `Database` or `AsyncDatabase` class
            that spawned this DatabaseAdmin, if any, gets updated to work on
            the newly-created keyspace starting when this method returns.
        update_db_namespace: an alias for update_db_keyspace.
            *DEPRECATED* as of v1.5.0, removal in v2.0.0.
        max_time_ms: a timeout, in milliseconds, for the whole requested
            operation to complete.
            Note that a timeout is no guarantee that the creation request
            has not reached the API server.

    Returns:
        A dictionary of the form {"ok": 1} in case of success.
        Otherwise, an exception is raised.

    Example:
        >>> asyncio.run(
        ...     my_db_admin.async_create_keyspace("app_keyspace")
        ... )
        {'ok': 1}
    """

    _update_db_keyspace = check_update_db_namespace_keyspace(
        update_db_keyspace=update_db_keyspace,
        update_db_namespace=update_db_namespace,
    )

    timeout_manager = MultiCallTimeoutManager(
        overall_max_time_ms=max_time_ms, dev_ops_api=True
    )
    logger.info(
        f"creating keyspace '{name}' on "
        f"'{self._database_id}' (DevOps API), async"
    )
    cn_raw_response = await self._dev_ops_api_commander.async_raw_request(
        http_method=HttpMethod.POST,
        additional_path=f"keyspaces/{name}",
        timeout_info=timeout_manager.remaining_timeout_info(),
    )
    if cn_raw_response.status_code != DEV_OPS_RESPONSE_HTTP_CREATED:
        raise DevOpsAPIException(
            f"keyspace creation ('{name}') failed: API returned HTTP "
            f"{cn_raw_response.status_code} instead of "
            f"{DEV_OPS_RESPONSE_HTTP_CREATED} - Created."
        )
    logger.info(
        f"DevOps API returned from creating keyspace "
        f"'{name}' on '{self._database_id}', async"
    )
    if wait_until_active:
        last_status_seen = DEV_OPS_DATABASE_STATUS_MAINTENANCE
        while last_status_seen == DEV_OPS_DATABASE_STATUS_MAINTENANCE:
            logger.info(
                f"sleeping to poll for status of '{self._database_id}', async"
            )
            await asyncio.sleep(DEV_OPS_KEYSPACE_POLL_INTERVAL_S)
            last_db_info = await self.async_info(
                max_time_ms=timeout_manager.remaining_timeout_ms(),
            )
            last_status_seen = last_db_info.status
        if last_status_seen != DEV_OPS_DATABASE_STATUS_ACTIVE:
            raise DevOpsAPIException(
                f"Database entered unexpected status {last_status_seen} after MAINTENANCE."
            )
        # is the keyspace found?
        if name not in await self.async_list_keyspaces():
            raise DevOpsAPIException("Could not create the keyspace.")
    logger.info(
        f"finished creating keyspace '{name}' on "
        f"'{self._database_id}' (DevOps API), async"
    )
    if _update_db_keyspace:
        self.spawner_database.use_keyspace(name)
    return {"ok": 1}
async def async_create_namespace(self, name: str, *, wait_until_active: bool = True, update_db_keyspace: bool | None = None, update_db_namespace: bool | None = None, max_time_ms: int | None = None, **kwargs: Any) ‑> dict[str, typing.Any]

Create a namespace in this database as requested, optionally waiting for it to be ready. Async version of the method, for use in an asyncio context.

DEPRECATED (removal in 2.0). Switch to the "async_create_keyspace" method.**

Args

name
the namespace name. If supplying a namespace that exists already, the method call proceeds as usual, no errors are raised, and the whole invocation is a no-op.
wait_until_active
if True (default), the method returns only after the target database is in ACTIVE state again (a few seconds, usually). If False, it will return right after issuing the creation request to the DevOps API, and it will be responsibility of the caller to check the database status/namespace availability before working with it.
update_db_keyspace
if True, the Database or AsyncDatabase class that spawned this DatabaseAdmin, if any, gets updated to work on the newly-created keyspace starting when this method returns.
update_db_namespace
an alias for update_db_keyspace. DEPRECATED as of v1.5.0, removal in v2.0.0.
max_time_ms
a timeout, in milliseconds, for the whole requested operation to complete. Note that a timeout is no guarantee that the creation request has not reached the API server.

Returns

A dictionary of the form {"ok": 1} in case of success. Otherwise, an exception is raised.

Example

>>> asyncio.run(
...     my_db_admin.async_create_namespace("app_namespace")
... )
{'ok': 1}

Deprecated since version: 1.5.0

This will be removed in 2.0.0. The term 'namespace' is being replaced by 'keyspace' throughout the Data API and the clients. Please adapt method and parameter names consistently (examples: db_admin.findNamespaces => db_admin.findKeyspaces; collection.namespace => collection.keyspace; database.list_collections(namespace=...) => database.list_collections(keyspace=...)). See https://docs.datastax.com/en/astra-db-serverless/api-reference/client-versions.html#version-1-5 for more information.

Expand source code
@deprecation.deprecated(  # type: ignore[misc]
    deprecated_in="1.5.0",
    removed_in="2.0.0",
    current_version=__version__,
    details=NAMESPACE_DEPRECATION_NOTICE_METHOD,
)
async def async_create_namespace(
    self,
    name: str,
    *,
    wait_until_active: bool = True,
    update_db_keyspace: bool | None = None,
    update_db_namespace: bool | None = None,
    max_time_ms: int | None = None,
    **kwargs: Any,
) -> dict[str, Any]:
    """
    Create a namespace in this database as requested,
    optionally waiting for it to be ready.
    Async version of the method, for use in an asyncio context.

    *DEPRECATED* (removal in 2.0). Switch to the "async_create_keyspace" method.**

    Args:
        name: the namespace name. If supplying a namespace that exists
            already, the method call proceeds as usual, no errors are
            raised, and the whole invocation is a no-op.
        wait_until_active: if True (default), the method returns only after
            the target database is in ACTIVE state again (a few
            seconds, usually). If False, it will return right after issuing the
            creation request to the DevOps API, and it will be responsibility
            of the caller to check the database status/namespace availability
            before working with it.
        update_db_keyspace: if True, the `Database` or `AsyncDatabase` class
            that spawned this DatabaseAdmin, if any, gets updated to work on
            the newly-created keyspace starting when this method returns.
        update_db_namespace: an alias for update_db_keyspace.
            *DEPRECATED* as of v1.5.0, removal in v2.0.0.
        max_time_ms: a timeout, in milliseconds, for the whole requested
            operation to complete.
            Note that a timeout is no guarantee that the creation request
            has not reached the API server.

    Returns:
        A dictionary of the form {"ok": 1} in case of success.
        Otherwise, an exception is raised.

    Example:
        >>> asyncio.run(
        ...     my_db_admin.async_create_namespace("app_namespace")
        ... )
        {'ok': 1}
    """

    return await self.async_create_keyspace(
        name=name,
        wait_until_active=wait_until_active,
        update_db_keyspace=update_db_keyspace,
        update_db_namespace=update_db_namespace,
        max_time_ms=max_time_ms,
        **kwargs,
    )
async def async_drop(self, *, wait_until_active: bool = True, max_time_ms: int | None = None) ‑> dict[str, typing.Any]

Drop this database, i.e. delete it completely and permanently with all its data. Async version of the method, for use in an asyncio context.

This method wraps the drop_database method of the AstraDBAdmin class, where more information may be found.

Args

wait_until_active
if True (default), the method returns only after the database has actually been deleted (generally a few minutes). If False, it will return right after issuing the drop request to the DevOps API, and it will be responsibility of the caller to check the database status/availability after that, if desired.
max_time_ms
a timeout, in milliseconds, for the whole requested operation to complete. Note that a timeout is no guarantee that the deletion request has not reached the API server.

Returns

A dictionary of the form {"ok": 1} in case of success. Otherwise, an exception is raised.

Example

>>> asyncio.run(my_db_admin.async_drop())
{'ok': 1}

Note

Once the method succeeds, methods on this object – such as astrapy.info, or list_keyspaces() – can still be invoked: however, this hardly makes sense as the underlying actual database is no more. It is responsibility of the developer to design a correct flow which avoids using a deceased database any further.

Expand source code
async def async_drop(
    self,
    *,
    wait_until_active: bool = True,
    max_time_ms: int | None = None,
) -> dict[str, Any]:
    """
    Drop this database, i.e. delete it completely and permanently with all its data.
    Async version of the method, for use in an asyncio context.

    This method wraps the `drop_database` method of the AstraDBAdmin class,
    where more information may be found.

    Args:
        wait_until_active: if True (default), the method returns only after
            the database has actually been deleted (generally a few minutes).
            If False, it will return right after issuing the
            drop request to the DevOps API, and it will be responsibility
            of the caller to check the database status/availability
            after that, if desired.
        max_time_ms: a timeout, in milliseconds, for the whole requested
            operation to complete.
            Note that a timeout is no guarantee that the deletion request
            has not reached the API server.

    Returns:
        A dictionary of the form {"ok": 1} in case of success.
        Otherwise, an exception is raised.

    Example:
        >>> asyncio.run(my_db_admin.async_drop())
        {'ok': 1}

    Note:
        Once the method succeeds, methods on this object -- such as `info()`,
        or `list_keyspaces()` -- can still be invoked: however, this hardly
        makes sense as the underlying actual database is no more.
        It is responsibility of the developer to design a correct flow
        which avoids using a deceased database any further.
    """

    logger.info(f"dropping this database ('{self._database_id}'), async")
    return await self._astra_db_admin.async_drop_database(
        id=self._database_id,
        wait_until_active=wait_until_active,
        max_time_ms=max_time_ms,
    )
    logger.info(f"finished dropping this database ('{self._database_id}'), async")
async def async_drop_keyspace(self, name: str, *, wait_until_active: bool = True, max_time_ms: int | None = None) ‑> dict[str, typing.Any]

Delete a keyspace from the database, optionally waiting for the database to become active again. Async version of the method, for use in an asyncio context.

Args

name
the keyspace to delete. If it does not exist in this database, an error is raised.
wait_until_active
if True (default), the method returns only after the target database is in ACTIVE state again (a few seconds, usually). If False, it will return right after issuing the deletion request to the DevOps API, and it will be responsibility of the caller to check the database status/keyspace availability before working with it.
max_time_ms
a timeout, in milliseconds, for the whole requested operation to complete. Note that a timeout is no guarantee that the deletion request has not reached the API server.

Returns

A dictionary of the form {"ok": 1} in case of success. Otherwise, an exception is raised.

Example

>>> asyncio.run(
...     my_db_admin.async_drop_keyspace("app_keyspace")
... )
{'ok': 1}
Expand source code
async def async_drop_keyspace(
    self,
    name: str,
    *,
    wait_until_active: bool = True,
    max_time_ms: int | None = None,
) -> dict[str, Any]:
    """
    Delete a keyspace from the database, optionally waiting for the database
    to become active again.
    Async version of the method, for use in an asyncio context.

    Args:
        name: the keyspace to delete. If it does not exist in this database,
            an error is raised.
        wait_until_active: if True (default), the method returns only after
            the target database is in ACTIVE state again (a few
            seconds, usually). If False, it will return right after issuing the
            deletion request to the DevOps API, and it will be responsibility
            of the caller to check the database status/keyspace availability
            before working with it.
        max_time_ms: a timeout, in milliseconds, for the whole requested
            operation to complete.
            Note that a timeout is no guarantee that the deletion request
            has not reached the API server.

    Returns:
        A dictionary of the form {"ok": 1} in case of success.
        Otherwise, an exception is raised.

    Example:
        >>> asyncio.run(
        ...     my_db_admin.async_drop_keyspace("app_keyspace")
        ... )
        {'ok': 1}
    """

    timeout_manager = MultiCallTimeoutManager(
        overall_max_time_ms=max_time_ms, dev_ops_api=True
    )
    logger.info(
        f"dropping keyspace '{name}' on "
        f"'{self._database_id}' (DevOps API), async"
    )
    dk_raw_response = await self._dev_ops_api_commander.async_raw_request(
        http_method=HttpMethod.DELETE,
        additional_path=f"keyspaces/{name}",
        timeout_info=timeout_manager.remaining_timeout_info(),
    )
    if dk_raw_response.status_code != DEV_OPS_RESPONSE_HTTP_ACCEPTED:
        raise DevOpsAPIException(
            f"keyspace deletion ('{id}') failed: API returned HTTP "
            f"{dk_raw_response.status_code} instead of "
            f"{DEV_OPS_RESPONSE_HTTP_ACCEPTED} - Created"
        )
    logger.info(
        f"DevOps API returned from dropping keyspace "
        f"'{name}' on '{self._database_id}', async"
    )
    if wait_until_active:
        last_status_seen = DEV_OPS_DATABASE_STATUS_MAINTENANCE
        while last_status_seen == DEV_OPS_DATABASE_STATUS_MAINTENANCE:
            logger.info(
                f"sleeping to poll for status of '{self._database_id}', async"
            )
            await asyncio.sleep(DEV_OPS_KEYSPACE_POLL_INTERVAL_S)
            last_db_info = await self.async_info(
                max_time_ms=timeout_manager.remaining_timeout_ms(),
            )
            last_status_seen = last_db_info.status
        if last_status_seen != DEV_OPS_DATABASE_STATUS_ACTIVE:
            raise DevOpsAPIException(
                f"Database entered unexpected status {last_status_seen} after MAINTENANCE."
            )
        # is the keyspace found?
        if name in await self.async_list_keyspaces():
            raise DevOpsAPIException("Could not drop the keyspace.")
    logger.info(
        f"finished dropping keyspace '{name}' on "
        f"'{self._database_id}' (DevOps API), async"
    )
    return {"ok": 1}
async def async_drop_namespace(self, name: str, *, wait_until_active: bool = True, max_time_ms: int | None = None) ‑> dict[str, typing.Any]

Delete a namespace from the database, optionally waiting for the database to become active again. Async version of the method, for use in an asyncio context.

DEPRECATED (removal in 2.0). Switch to the "async_drop_namespace" method.**

Args

name
the namespace to delete. If it does not exist in this database, an error is raised.
wait_until_active
if True (default), the method returns only after the target database is in ACTIVE state again (a few seconds, usually). If False, it will return right after issuing the deletion request to the DevOps API, and it will be responsibility of the caller to check the database status/namespace availability before working with it.
max_time_ms
a timeout, in milliseconds, for the whole requested operation to complete. Note that a timeout is no guarantee that the deletion request has not reached the API server.

Returns

A dictionary of the form {"ok": 1} in case of success. Otherwise, an exception is raised.

Example

>>> asyncio.run(
...     my_db_admin.async_drop_namespace("app_namespace")
... )
{'ok': 1}

Deprecated since version: 1.5.0

This will be removed in 2.0.0. The term 'namespace' is being replaced by 'keyspace' throughout the Data API and the clients. Please adapt method and parameter names consistently (examples: db_admin.findNamespaces => db_admin.findKeyspaces; collection.namespace => collection.keyspace; database.list_collections(namespace=...) => database.list_collections(keyspace=...)). See https://docs.datastax.com/en/astra-db-serverless/api-reference/client-versions.html#version-1-5 for more information.

Expand source code
@deprecation.deprecated(  # type: ignore[misc]
    deprecated_in="1.5.0",
    removed_in="2.0.0",
    current_version=__version__,
    details=NAMESPACE_DEPRECATION_NOTICE_METHOD,
)
async def async_drop_namespace(
    self,
    name: str,
    *,
    wait_until_active: bool = True,
    max_time_ms: int | None = None,
) -> dict[str, Any]:
    """
    Delete a namespace from the database, optionally waiting for the database
    to become active again.
    Async version of the method, for use in an asyncio context.

    *DEPRECATED* (removal in 2.0). Switch to the "async_drop_namespace" method.**

    Args:
        name: the namespace to delete. If it does not exist in this database,
            an error is raised.
        wait_until_active: if True (default), the method returns only after
            the target database is in ACTIVE state again (a few
            seconds, usually). If False, it will return right after issuing the
            deletion request to the DevOps API, and it will be responsibility
            of the caller to check the database status/namespace availability
            before working with it.
        max_time_ms: a timeout, in milliseconds, for the whole requested
            operation to complete.
            Note that a timeout is no guarantee that the deletion request
            has not reached the API server.

    Returns:
        A dictionary of the form {"ok": 1} in case of success.
        Otherwise, an exception is raised.

    Example:
        >>> asyncio.run(
        ...     my_db_admin.async_drop_namespace("app_namespace")
        ... )
        {'ok': 1}
    """

    return await self.async_drop_keyspace(
        name=name,
        wait_until_active=wait_until_active,
        max_time_ms=max_time_ms,
    )
async def async_find_embedding_providers(self, *, max_time_ms: int | None = None) ‑> FindEmbeddingProvidersResult

Query the API for the full information on available embedding providers. Async version of the method, for use in an asyncio context.

Args

max_time_ms
a timeout, in milliseconds, for the DevOps API request.

Returns

A FindEmbeddingProvidersResult object with the complete information returned by the API about available embedding providers Example (output abridged and indented for clarity): >>> admin_for_my_db.find_embedding_providers() FindEmbeddingProvidersResult(embedding_providers=…, openai, …) >>> admin_for_my_db.find_embedding_providers().embedding_providers { 'openai': EmbeddingProvider( display_name='OpenAI', models=[ EmbeddingProviderModel(name='text-embedding-3-small'), … ] ), … }

Expand source code
async def async_find_embedding_providers(
    self, *, max_time_ms: int | None = None
) -> FindEmbeddingProvidersResult:
    """
    Query the API for the full information on available embedding providers.
    Async version of the method, for use in an asyncio context.

    Args:
        max_time_ms: a timeout, in milliseconds, for the DevOps API request.

    Returns:
        A `FindEmbeddingProvidersResult` object with the complete information
        returned by the API about available embedding providers

    Example (output abridged and indented for clarity):
        >>> admin_for_my_db.find_embedding_providers()
        FindEmbeddingProvidersResult(embedding_providers=..., openai, ...)
        >>> admin_for_my_db.find_embedding_providers().embedding_providers
        {
            'openai': EmbeddingProvider(
                display_name='OpenAI',
                models=[
                    EmbeddingProviderModel(name='text-embedding-3-small'),
                    ...
                ]
            ),
            ...
        }
    """

    logger.info("findEmbeddingProviders, async")
    fe_response = await self._api_commander.async_request(
        payload={"findEmbeddingProviders": {}},
        timeout_info=base_timeout_info(max_time_ms),
    )
    if "embeddingProviders" not in fe_response.get("status", {}):
        raise DataAPIFaultyResponseException(
            text="Faulty response from findEmbeddingProviders API command.",
            raw_response=fe_response,
        )
    else:
        logger.info("finished findEmbeddingProviders, async")
        return FindEmbeddingProvidersResult.from_dict(fe_response["status"])
async def async_info(self, *, max_time_ms: int | None = None) ‑> AdminDatabaseInfo

Query the DevOps API for the full info on this database. Async version of the method, for use in an asyncio context.

Args

max_time_ms
a timeout, in milliseconds, for the DevOps API request.

Returns

An AdminDatabaseInfo object.

Example

>>> async def wait_until_active(db_admin: AstraDBDatabaseAdmin) -> None:
...     while True:
...         info = await db_admin.async_info()
...         if info.status == "ACTIVE":
...             return
...
>>> asyncio.run(wait_until_active(admin_for_my_db))
Expand source code
async def async_info(self, *, max_time_ms: int | None = None) -> AdminDatabaseInfo:
    """
    Query the DevOps API for the full info on this database.
    Async version of the method, for use in an asyncio context.

    Args:
        max_time_ms: a timeout, in milliseconds, for the DevOps API request.

    Returns:
        An AdminDatabaseInfo object.

    Example:
        >>> async def wait_until_active(db_admin: AstraDBDatabaseAdmin) -> None:
        ...     while True:
        ...         info = await db_admin.async_info()
        ...         if info.status == "ACTIVE":
        ...             return
        ...
        >>> asyncio.run(wait_until_active(admin_for_my_db))
    """

    logger.info(f"getting info ('{self._database_id}'), async")
    req_response = await self._astra_db_admin.async_database_info(
        id=self._database_id,
        max_time_ms=max_time_ms,
    )
    logger.info(f"finished getting info ('{self._database_id}'), async")
    return req_response
async def async_list_keyspaces(self, *, max_time_ms: int | None = None) ‑> list[str]

Query the DevOps API for a list of the keyspaces in the database. Async version of the method, for use in an asyncio context.

Args

max_time_ms
a timeout, in milliseconds, for the DevOps API request.

Returns

A list of the keyspaces, each a string, in no particular order.

Example

>>> async def check_if_ks_exists(
...     db_admin: AstraDBDatabaseAdmin, keyspace: str
... ) -> bool:
...     ks_list = await db_admin.async_list_keyspaces()
...     return keyspace in ks_list
...
>>> asyncio.run(check_if_ks_exists(admin_for_my_db, "dragons"))
False
>>> asyncio.run(check_if_db_exists(admin_for_my_db, "app_keyspace"))
True
Expand source code
async def async_list_keyspaces(
    self, *, max_time_ms: int | None = None
) -> list[str]:
    """
    Query the DevOps API for a list of the keyspaces in the database.
    Async version of the method, for use in an asyncio context.

    Args:
        max_time_ms: a timeout, in milliseconds, for the DevOps API request.

    Returns:
        A list of the keyspaces, each a string, in no particular order.

    Example:
        >>> async def check_if_ks_exists(
        ...     db_admin: AstraDBDatabaseAdmin, keyspace: str
        ... ) -> bool:
        ...     ks_list = await db_admin.async_list_keyspaces()
        ...     return keyspace in ks_list
        ...
        >>> asyncio.run(check_if_ks_exists(admin_for_my_db, "dragons"))
        False
        >>> asyncio.run(check_if_db_exists(admin_for_my_db, "app_keyspace"))
        True
    """

    logger.info(f"getting keyspaces ('{self._database_id}'), async")
    info = await self.async_info(max_time_ms=max_time_ms)
    logger.info(f"finished getting keyspaces ('{self._database_id}'), async")
    if info.raw_info is None:
        raise DevOpsAPIException("Could not get the keyspace list.")
    else:
        return info.raw_info.get("info", {}).get("keyspaces") or []
async def async_list_namespaces(self, *, max_time_ms: int | None = None) ‑> list[str]

Query the DevOps API for a list of the namespaces in the database. Async version of the method, for use in an asyncio context.

DEPRECATED (removal in 2.0). Switch to the "keyspace" property.**

Args

max_time_ms
a timeout, in milliseconds, for the DevOps API request.

Returns

A list of the namespaces, each a string, in no particular order.

Example

>>> async def check_if_ns_exists(
...     db_admin: AstraDBDatabaseAdmin, namespace: str
... ) -> bool:
...     ns_list = await db_admin.async_list_namespaces()
...     return namespace in ns_list
...
>>> asyncio.run(check_if_ns_exists(admin_for_my_db, "dragons"))
False
>>> asyncio.run(check_if_db_exists(admin_for_my_db, "app_namespace"))
True

Deprecated since version: 1.5.0

This will be removed in 2.0.0. The term 'namespace' is being replaced by 'keyspace' throughout the Data API and the clients. Please adapt method and parameter names consistently (examples: db_admin.findNamespaces => db_admin.findKeyspaces; collection.namespace => collection.keyspace; database.list_collections(namespace=...) => database.list_collections(keyspace=...)). See https://docs.datastax.com/en/astra-db-serverless/api-reference/client-versions.html#version-1-5 for more information.

Expand source code
@deprecation.deprecated(  # type: ignore[misc]
    deprecated_in="1.5.0",
    removed_in="2.0.0",
    current_version=__version__,
    details=NAMESPACE_DEPRECATION_NOTICE_METHOD,
)
async def async_list_namespaces(
    self, *, max_time_ms: int | None = None
) -> list[str]:
    """
    Query the DevOps API for a list of the namespaces in the database.
    Async version of the method, for use in an asyncio context.

    *DEPRECATED* (removal in 2.0). Switch to the "keyspace" property.**

    Args:
        max_time_ms: a timeout, in milliseconds, for the DevOps API request.

    Returns:
        A list of the namespaces, each a string, in no particular order.

    Example:
        >>> async def check_if_ns_exists(
        ...     db_admin: AstraDBDatabaseAdmin, namespace: str
        ... ) -> bool:
        ...     ns_list = await db_admin.async_list_namespaces()
        ...     return namespace in ns_list
        ...
        >>> asyncio.run(check_if_ns_exists(admin_for_my_db, "dragons"))
        False
        >>> asyncio.run(check_if_db_exists(admin_for_my_db, "app_namespace"))
        True
    """

    return await self.async_list_keyspaces(max_time_ms=max_time_ms)
def create_keyspace(self, name: str, *, wait_until_active: bool = True, update_db_keyspace: bool | None = None, update_db_namespace: bool | None = None, max_time_ms: int | None = None, **kwargs: Any) ‑> dict[str, typing.Any]

Create a keyspace in this database as requested, optionally waiting for it to be ready.

Args

name
the keyspace name. If supplying a keyspace that exists already, the method call proceeds as usual, no errors are raised, and the whole invocation is a no-op.
wait_until_active
if True (default), the method returns only after the target database is in ACTIVE state again (a few seconds, usually). If False, it will return right after issuing the creation request to the DevOps API, and it will be responsibility of the caller to check the database status/keyspace availability before working with it.
update_db_keyspace
if True, the Database or AsyncDatabase class that spawned this DatabaseAdmin, if any, gets updated to work on the newly-created keyspace starting when this method returns.
update_db_namespace
an alias for update_db_keyspace. DEPRECATED as of v1.5.0, removal in v2.0.0.
max_time_ms
a timeout, in milliseconds, for the whole requested operation to complete. Note that a timeout is no guarantee that the creation request has not reached the API server.

Returns

A dictionary of the form {"ok": 1} in case of success. Otherwise, an exception is raised.

Example

>>> my_db_admin.keyspaces()
['default_keyspace']
>>> my_db_admin.create_keyspace("that_other_one")
{'ok': 1}
>>> my_db_admin.list_keyspaces()
['default_keyspace', 'that_other_one']
Expand source code
def create_keyspace(
    self,
    name: str,
    *,
    wait_until_active: bool = True,
    update_db_keyspace: bool | None = None,
    update_db_namespace: bool | None = None,
    max_time_ms: int | None = None,
    **kwargs: Any,
) -> dict[str, Any]:
    """
    Create a keyspace in this database as requested,
    optionally waiting for it to be ready.

    Args:
        name: the keyspace name. If supplying a keyspace that exists
            already, the method call proceeds as usual, no errors are
            raised, and the whole invocation is a no-op.
        wait_until_active: if True (default), the method returns only after
            the target database is in ACTIVE state again (a few
            seconds, usually). If False, it will return right after issuing the
            creation request to the DevOps API, and it will be responsibility
            of the caller to check the database status/keyspace availability
            before working with it.
        update_db_keyspace: if True, the `Database` or `AsyncDatabase` class
            that spawned this DatabaseAdmin, if any, gets updated to work on
            the newly-created keyspace starting when this method returns.
        update_db_namespace: an alias for update_db_keyspace.
            *DEPRECATED* as of v1.5.0, removal in v2.0.0.
        max_time_ms: a timeout, in milliseconds, for the whole requested
            operation to complete.
            Note that a timeout is no guarantee that the creation request
            has not reached the API server.

    Returns:
        A dictionary of the form {"ok": 1} in case of success.
        Otherwise, an exception is raised.

    Example:
        >>> my_db_admin.keyspaces()
        ['default_keyspace']
        >>> my_db_admin.create_keyspace("that_other_one")
        {'ok': 1}
        >>> my_db_admin.list_keyspaces()
        ['default_keyspace', 'that_other_one']
    """

    _update_db_keyspace = check_update_db_namespace_keyspace(
        update_db_keyspace=update_db_keyspace,
        update_db_namespace=update_db_namespace,
    )

    timeout_manager = MultiCallTimeoutManager(
        overall_max_time_ms=max_time_ms, dev_ops_api=True
    )
    logger.info(
        f"creating keyspace '{name}' on " f"'{self._database_id}' (DevOps API)"
    )
    cn_raw_response = self._dev_ops_api_commander.raw_request(
        http_method=HttpMethod.POST,
        additional_path=f"keyspaces/{name}",
        timeout_info=timeout_manager.remaining_timeout_info(),
    )
    if cn_raw_response.status_code != DEV_OPS_RESPONSE_HTTP_CREATED:
        raise DevOpsAPIException(
            f"keyspace creation ('{name}') failed: API returned HTTP "
            f"{cn_raw_response.status_code} instead of "
            f"{DEV_OPS_RESPONSE_HTTP_CREATED} - Created."
        )
    logger.info(
        "DevOps API returned from creating keyspace "
        f"'{name}' on '{self._database_id}'"
    )
    if wait_until_active:
        last_status_seen = DEV_OPS_DATABASE_STATUS_MAINTENANCE
        while last_status_seen == DEV_OPS_DATABASE_STATUS_MAINTENANCE:
            logger.info(f"sleeping to poll for status of '{self._database_id}'")
            time.sleep(DEV_OPS_KEYSPACE_POLL_INTERVAL_S)
            last_status_seen = self.info(
                max_time_ms=timeout_manager.remaining_timeout_ms(),
            ).status
        if last_status_seen != DEV_OPS_DATABASE_STATUS_ACTIVE:
            raise DevOpsAPIException(
                f"Database entered unexpected status {last_status_seen} after MAINTENANCE."
            )
        # is the keyspace found?
        if name not in self.list_keyspaces():
            raise DevOpsAPIException("Could not create the keyspace.")
    logger.info(
        f"finished creating keyspace '{name}' on "
        f"'{self._database_id}' (DevOps API)"
    )
    if _update_db_keyspace:
        self.spawner_database.use_keyspace(name)
    return {"ok": 1}
def create_namespace(self, name: str, *, wait_until_active: bool = True, update_db_keyspace: bool | None = None, update_db_namespace: bool | None = None, max_time_ms: int | None = None, **kwargs: Any) ‑> dict[str, typing.Any]

Create a namespace in this database as requested, optionally waiting for it to be ready.

DEPRECATED (removal in 2.0). Switch to the "keyspace" property.**

Args

name
the namespace name. If supplying a namespace that exists already, the method call proceeds as usual, no errors are raised, and the whole invocation is a no-op.
wait_until_active
if True (default), the method returns only after the target database is in ACTIVE state again (a few seconds, usually). If False, it will return right after issuing the creation request to the DevOps API, and it will be responsibility of the caller to check the database status/namespace availability before working with it.
update_db_keyspace
if True, the Database or AsyncDatabase class that spawned this DatabaseAdmin, if any, gets updated to work on the newly-created keyspace starting when this method returns.
update_db_namespace
an alias for update_db_keyspace. DEPRECATED as of v1.5.0, removal in v2.0.0.
max_time_ms
a timeout, in milliseconds, for the whole requested operation to complete. Note that a timeout is no guarantee that the creation request has not reached the API server.

Returns

A dictionary of the form {"ok": 1} in case of success. Otherwise, an exception is raised.

Example

>>> my_db_admin.list_namespaces()
['default_keyspace']
>>> my_db_admin.create_namespace("that_other_one")
{'ok': 1}
>>> my_db_admin.list_namespaces()
['default_keyspace', 'that_other_one']

Deprecated since version: 1.5.0

This will be removed in 2.0.0. The term 'namespace' is being replaced by 'keyspace' throughout the Data API and the clients. Please adapt method and parameter names consistently (examples: db_admin.findNamespaces => db_admin.findKeyspaces; collection.namespace => collection.keyspace; database.list_collections(namespace=...) => database.list_collections(keyspace=...)). See https://docs.datastax.com/en/astra-db-serverless/api-reference/client-versions.html#version-1-5 for more information.

Expand source code
@deprecation.deprecated(  # type: ignore[misc]
    deprecated_in="1.5.0",
    removed_in="2.0.0",
    current_version=__version__,
    details=NAMESPACE_DEPRECATION_NOTICE_METHOD,
)
def create_namespace(
    self,
    name: str,
    *,
    wait_until_active: bool = True,
    update_db_keyspace: bool | None = None,
    update_db_namespace: bool | None = None,
    max_time_ms: int | None = None,
    **kwargs: Any,
) -> dict[str, Any]:
    """
    Create a namespace in this database as requested,
    optionally waiting for it to be ready.

    *DEPRECATED* (removal in 2.0). Switch to the "keyspace" property.**

    Args:
        name: the namespace name. If supplying a namespace that exists
            already, the method call proceeds as usual, no errors are
            raised, and the whole invocation is a no-op.
        wait_until_active: if True (default), the method returns only after
            the target database is in ACTIVE state again (a few
            seconds, usually). If False, it will return right after issuing the
            creation request to the DevOps API, and it will be responsibility
            of the caller to check the database status/namespace availability
            before working with it.
        update_db_keyspace: if True, the `Database` or `AsyncDatabase` class
            that spawned this DatabaseAdmin, if any, gets updated to work on
            the newly-created keyspace starting when this method returns.
        update_db_namespace: an alias for update_db_keyspace.
            *DEPRECATED* as of v1.5.0, removal in v2.0.0.
        max_time_ms: a timeout, in milliseconds, for the whole requested
            operation to complete.
            Note that a timeout is no guarantee that the creation request
            has not reached the API server.

    Returns:
        A dictionary of the form {"ok": 1} in case of success.
        Otherwise, an exception is raised.

    Example:
        >>> my_db_admin.list_namespaces()
        ['default_keyspace']
        >>> my_db_admin.create_namespace("that_other_one")
        {'ok': 1}
        >>> my_db_admin.list_namespaces()
        ['default_keyspace', 'that_other_one']
    """

    return self.create_keyspace(
        name=name,
        wait_until_active=wait_until_active,
        update_db_keyspace=update_db_keyspace,
        update_db_namespace=update_db_namespace,
        max_time_ms=max_time_ms,
        **kwargs,
    )
def drop(self, *, wait_until_active: bool = True, max_time_ms: int | None = None) ‑> dict[str, typing.Any]

Drop this database, i.e. delete it completely and permanently with all its data.

This method wraps the drop_database method of the AstraDBAdmin class, where more information may be found.

Args

wait_until_active
if True (default), the method returns only after the database has actually been deleted (generally a few minutes). If False, it will return right after issuing the drop request to the DevOps API, and it will be responsibility of the caller to check the database status/availability after that, if desired.
max_time_ms
a timeout, in milliseconds, for the whole requested operation to complete. Note that a timeout is no guarantee that the deletion request has not reached the API server.

Returns

A dictionary of the form {"ok": 1} in case of success. Otherwise, an exception is raised.

Example

>>> my_db_admin.list_keyspaces()
['default_keyspace', 'that_other_one']
>>> my_db_admin.drop()
{'ok': 1}
>>> my_db_admin.list_keyspaces()  # raises a 404 Not Found http error

Note

Once the method succeeds, methods on this object – such as astrapy.info, or list_keyspaces() – can still be invoked: however, this hardly makes sense as the underlying actual database is no more. It is responsibility of the developer to design a correct flow which avoids using a deceased database any further.

Expand source code
def drop(
    self,
    *,
    wait_until_active: bool = True,
    max_time_ms: int | None = None,
) -> dict[str, Any]:
    """
    Drop this database, i.e. delete it completely and permanently with all its data.

    This method wraps the `drop_database` method of the AstraDBAdmin class,
    where more information may be found.

    Args:
        wait_until_active: if True (default), the method returns only after
            the database has actually been deleted (generally a few minutes).
            If False, it will return right after issuing the
            drop request to the DevOps API, and it will be responsibility
            of the caller to check the database status/availability
            after that, if desired.
        max_time_ms: a timeout, in milliseconds, for the whole requested
            operation to complete.
            Note that a timeout is no guarantee that the deletion request
            has not reached the API server.

    Returns:
        A dictionary of the form {"ok": 1} in case of success.
        Otherwise, an exception is raised.

    Example:
        >>> my_db_admin.list_keyspaces()
        ['default_keyspace', 'that_other_one']
        >>> my_db_admin.drop()
        {'ok': 1}
        >>> my_db_admin.list_keyspaces()  # raises a 404 Not Found http error

    Note:
        Once the method succeeds, methods on this object -- such as `info()`,
        or `list_keyspaces()` -- can still be invoked: however, this hardly
        makes sense as the underlying actual database is no more.
        It is responsibility of the developer to design a correct flow
        which avoids using a deceased database any further.
    """

    logger.info(f"dropping this database ('{self._database_id}')")
    return self._astra_db_admin.drop_database(
        id=self._database_id,
        wait_until_active=wait_until_active,
        max_time_ms=max_time_ms,
    )
    logger.info(f"finished dropping this database ('{self._database_id}')")
def drop_keyspace(self, name: str, *, wait_until_active: bool = True, max_time_ms: int | None = None) ‑> dict[str, typing.Any]

Delete a keyspace from the database, optionally waiting for the database to become active again.

Args

name
the keyspace to delete. If it does not exist in this database, an error is raised.
wait_until_active
if True (default), the method returns only after the target database is in ACTIVE state again (a few seconds, usually). If False, it will return right after issuing the deletion request to the DevOps API, and it will be responsibility of the caller to check the database status/keyspace availability before working with it.
max_time_ms
a timeout, in milliseconds, for the whole requested operation to complete. Note that a timeout is no guarantee that the deletion request has not reached the API server.

Returns

A dictionary of the form {"ok": 1} in case of success. Otherwise, an exception is raised.

Example

>>> my_db_admin.list_keyspaces()
['default_keyspace', 'that_other_one']
>>> my_db_admin.drop_keyspace("that_other_one")
{'ok': 1}
>>> my_db_admin.list_keyspaces()
['default_keyspace']
Expand source code
def drop_keyspace(
    self,
    name: str,
    *,
    wait_until_active: bool = True,
    max_time_ms: int | None = None,
) -> dict[str, Any]:
    """
    Delete a keyspace from the database, optionally waiting for the database
    to become active again.

    Args:
        name: the keyspace to delete. If it does not exist in this database,
            an error is raised.
        wait_until_active: if True (default), the method returns only after
            the target database is in ACTIVE state again (a few
            seconds, usually). If False, it will return right after issuing the
            deletion request to the DevOps API, and it will be responsibility
            of the caller to check the database status/keyspace availability
            before working with it.
        max_time_ms: a timeout, in milliseconds, for the whole requested
            operation to complete.
            Note that a timeout is no guarantee that the deletion request
            has not reached the API server.

    Returns:
        A dictionary of the form {"ok": 1} in case of success.
        Otherwise, an exception is raised.

    Example:
        >>> my_db_admin.list_keyspaces()
        ['default_keyspace', 'that_other_one']
        >>> my_db_admin.drop_keyspace("that_other_one")
        {'ok': 1}
        >>> my_db_admin.list_keyspaces()
        ['default_keyspace']
    """

    timeout_manager = MultiCallTimeoutManager(
        overall_max_time_ms=max_time_ms, dev_ops_api=True
    )
    logger.info(
        f"dropping keyspace '{name}' on " f"'{self._database_id}' (DevOps API)"
    )
    dk_raw_response = self._dev_ops_api_commander.raw_request(
        http_method=HttpMethod.DELETE,
        additional_path=f"keyspaces/{name}",
        timeout_info=timeout_manager.remaining_timeout_info(),
    )
    if dk_raw_response.status_code != DEV_OPS_RESPONSE_HTTP_ACCEPTED:
        raise DevOpsAPIException(
            f"keyspace deletion ('{id}') failed: API returned HTTP "
            f"{dk_raw_response.status_code} instead of "
            f"{DEV_OPS_RESPONSE_HTTP_ACCEPTED} - Created"
        )
    logger.info(
        "DevOps API returned from dropping keyspace "
        f"'{name}' on '{self._database_id}'"
    )
    if wait_until_active:
        last_status_seen = DEV_OPS_DATABASE_STATUS_MAINTENANCE
        while last_status_seen == DEV_OPS_DATABASE_STATUS_MAINTENANCE:
            logger.info(f"sleeping to poll for status of '{self._database_id}'")
            time.sleep(DEV_OPS_KEYSPACE_POLL_INTERVAL_S)
            last_status_seen = self.info(
                max_time_ms=timeout_manager.remaining_timeout_ms(),
            ).status
        if last_status_seen != DEV_OPS_DATABASE_STATUS_ACTIVE:
            raise DevOpsAPIException(
                f"Database entered unexpected status {last_status_seen} after MAINTENANCE."
            )
        # is the keyspace found?
        if name in self.list_keyspaces():
            raise DevOpsAPIException("Could not drop the keyspace.")
    logger.info(
        f"finished dropping keyspace '{name}' on "
        f"'{self._database_id}' (DevOps API)"
    )
    return {"ok": 1}
def drop_namespace(self, name: str, *, wait_until_active: bool = True, max_time_ms: int | None = None) ‑> dict[str, typing.Any]

Delete a namespace from the database, optionally waiting for the database to become active again.

DEPRECATED (removal in 2.0). Switch to the "drop_keyspace" method.**

Args

name
the namespace to delete. If it does not exist in this database, an error is raised.
wait_until_active
if True (default), the method returns only after the target database is in ACTIVE state again (a few seconds, usually). If False, it will return right after issuing the deletion request to the DevOps API, and it will be responsibility of the caller to check the database status/namespace availability before working with it.
max_time_ms
a timeout, in milliseconds, for the whole requested operation to complete. Note that a timeout is no guarantee that the deletion request has not reached the API server.

Returns

A dictionary of the form {"ok": 1} in case of success. Otherwise, an exception is raised.

Example

>>> my_db_admin.list_namespaces()
['default_keyspace', 'that_other_one']
>>> my_db_admin.drop_namespace("that_other_one")
{'ok': 1}
>>> my_db_admin.list_namespaces()
['default_keyspace']

Deprecated since version: 1.5.0

This will be removed in 2.0.0. The term 'namespace' is being replaced by 'keyspace' throughout the Data API and the clients. Please adapt method and parameter names consistently (examples: db_admin.findNamespaces => db_admin.findKeyspaces; collection.namespace => collection.keyspace; database.list_collections(namespace=...) => database.list_collections(keyspace=...)). See https://docs.datastax.com/en/astra-db-serverless/api-reference/client-versions.html#version-1-5 for more information.

Expand source code
@deprecation.deprecated(  # type: ignore[misc]
    deprecated_in="1.5.0",
    removed_in="2.0.0",
    current_version=__version__,
    details=NAMESPACE_DEPRECATION_NOTICE_METHOD,
)
def drop_namespace(
    self,
    name: str,
    *,
    wait_until_active: bool = True,
    max_time_ms: int | None = None,
) -> dict[str, Any]:
    """
    Delete a namespace from the database, optionally waiting for the database
    to become active again.

    *DEPRECATED* (removal in 2.0). Switch to the "drop_keyspace" method.**

    Args:
        name: the namespace to delete. If it does not exist in this database,
            an error is raised.
        wait_until_active: if True (default), the method returns only after
            the target database is in ACTIVE state again (a few
            seconds, usually). If False, it will return right after issuing the
            deletion request to the DevOps API, and it will be responsibility
            of the caller to check the database status/namespace availability
            before working with it.
        max_time_ms: a timeout, in milliseconds, for the whole requested
            operation to complete.
            Note that a timeout is no guarantee that the deletion request
            has not reached the API server.

    Returns:
        A dictionary of the form {"ok": 1} in case of success.
        Otherwise, an exception is raised.

    Example:
        >>> my_db_admin.list_namespaces()
        ['default_keyspace', 'that_other_one']
        >>> my_db_admin.drop_namespace("that_other_one")
        {'ok': 1}
        >>> my_db_admin.list_namespaces()
        ['default_keyspace']
    """

    return self.drop_keyspace(
        name=name,
        wait_until_active=wait_until_active,
        max_time_ms=max_time_ms,
    )
def find_embedding_providers(self, *, max_time_ms: int | None = None) ‑> FindEmbeddingProvidersResult

Query the API for the full information on available embedding providers.

Args

max_time_ms
a timeout, in milliseconds, for the DevOps API request.

Returns

A FindEmbeddingProvidersResult object with the complete information returned by the API about available embedding providers Example (output abridged and indented for clarity): >>> admin_for_my_db.find_embedding_providers() FindEmbeddingProvidersResult(embedding_providers=…, openai, …) >>> admin_for_my_db.find_embedding_providers().embedding_providers { 'openai': EmbeddingProvider( display_name='OpenAI', models=[ EmbeddingProviderModel(name='text-embedding-3-small'), … ] ), … }

Expand source code
def find_embedding_providers(
    self, *, max_time_ms: int | None = None
) -> FindEmbeddingProvidersResult:
    """
    Query the API for the full information on available embedding providers.

    Args:
        max_time_ms: a timeout, in milliseconds, for the DevOps API request.

    Returns:
        A `FindEmbeddingProvidersResult` object with the complete information
        returned by the API about available embedding providers

    Example (output abridged and indented for clarity):
        >>> admin_for_my_db.find_embedding_providers()
        FindEmbeddingProvidersResult(embedding_providers=..., openai, ...)
        >>> admin_for_my_db.find_embedding_providers().embedding_providers
        {
            'openai': EmbeddingProvider(
                display_name='OpenAI',
                models=[
                    EmbeddingProviderModel(name='text-embedding-3-small'),
                    ...
                ]
            ),
            ...
        }
    """

    logger.info("findEmbeddingProviders")
    fe_response = self._api_commander.request(
        payload={"findEmbeddingProviders": {}},
        timeout_info=base_timeout_info(max_time_ms),
    )
    if "embeddingProviders" not in fe_response.get("status", {}):
        raise DataAPIFaultyResponseException(
            text="Faulty response from findEmbeddingProviders API command.",
            raw_response=fe_response,
        )
    else:
        logger.info("finished findEmbeddingProviders")
        return FindEmbeddingProvidersResult.from_dict(fe_response["status"])
def get_async_database(self, *, token: str | TokenProvider | None = None, keyspace: str | None = None, namespace: str | None = None, region: str | None = None, api_path: str | None = None, api_version: str | None = None, max_time_ms: int | None = None) ‑> AsyncDatabase

Create an AsyncDatabase instance out of this class for working with the data in it.

This method has identical behavior and signature as the sync counterpart get_database: please see that one for more details.

Expand source code
def get_async_database(
    self,
    *,
    token: str | TokenProvider | None = None,
    keyspace: str | None = None,
    namespace: str | None = None,
    region: str | None = None,
    api_path: str | None = None,
    api_version: str | None = None,
    max_time_ms: int | None = None,
) -> AsyncDatabase:
    """
    Create an AsyncDatabase instance out of this class for working
    with the data in it.

    This method has identical behavior and signature as the sync
    counterpart `get_database`: please see that one for more details.
    """

    keyspace_param = check_namespace_keyspace(
        keyspace=keyspace,
        namespace=namespace,
    )

    return self.get_database(
        token=token,
        keyspace=keyspace_param,
        region=region,
        api_path=api_path,
        api_version=api_version,
        max_time_ms=max_time_ms,
    ).to_async()
def get_database(self, *, token: str | TokenProvider | None = None, keyspace: str | None = None, namespace: str | None = None, region: str | None = None, api_path: str | None = None, api_version: str | None = None, max_time_ms: int | None = None) ‑> Database

Create a Database instance from this database admin, for data-related tasks.

Args

token
if supplied, is passed to the Database instead of the one set for this object. Useful if one wants to work in a least-privilege manner, limiting the permissions for non-admin work. This can be either a literal token string or a subclass of TokenProvider.
keyspace
an optional keyspace to set in the resulting Database. The same default logic as for AstraDBAdmin.get_database() applies.
namespace
an alias for keyspace. DEPRECATED, removal in 2.0.
region
This parameter is deprecated and should not be used. Ignored in the method.
api_path
path to append to the API Endpoint. In typical usage, this should be left to its default of "/api/json".
api_version
version specifier to append to the API path. In typical usage, this should be left to its default of "v1".

Returns

A Database object, ready to be used for working with data and collections.

Example

>>> my_db = my_db_admin.get_database()
>>> my_db.list_collection_names()
['movies', 'another_collection']

Note

creating an instance of Database does not trigger actual creation of the database itself, which should exist beforehand. To create databases, see the AstraDBAdmin class.

Expand source code
def get_database(
    self,
    *,
    token: str | TokenProvider | None = None,
    keyspace: str | None = None,
    namespace: str | None = None,
    region: str | None = None,
    api_path: str | None = None,
    api_version: str | None = None,
    max_time_ms: int | None = None,
) -> Database:
    """
    Create a Database instance from this database admin, for data-related tasks.

    Args:
        token: if supplied, is passed to the Database instead of
            the one set for this object. Useful if one wants to work in
            a least-privilege manner, limiting the permissions for non-admin work.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        keyspace: an optional keyspace to set in the resulting Database.
            The same default logic as for `AstraDBAdmin.get_database` applies.
        namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
        region: *This parameter is deprecated and should not be used.*
            Ignored in the method.
        api_path: path to append to the API Endpoint. In typical usage, this
            should be left to its default of "/api/json".
        api_version: version specifier to append to the API path. In typical
            usage, this should be left to its default of "v1".

    Returns:
        A Database object, ready to be used for working with data and collections.

    Example:
        >>> my_db = my_db_admin.get_database()
        >>> my_db.list_collection_names()
        ['movies', 'another_collection']

    Note:
        creating an instance of Database does not trigger actual creation
        of the database itself, which should exist beforehand. To create databases,
        see the AstraDBAdmin class.
    """

    keyspace_param = check_namespace_keyspace(
        keyspace=keyspace,
        namespace=namespace,
    )

    if region is not None:
        the_warning = deprecation.DeprecatedWarning(
            "The 'region' parameter is deprecated in this method and will be ignored.",
            deprecated_in="1.3.2",
            removed_in="2.0.0",
            details="The database class whose method is invoked already has a region set.",
        )
        warnings.warn(
            the_warning,
            stacklevel=2,
        )

    return self._astra_db_admin.get_database(
        api_endpoint=self.api_endpoint,
        token=token,
        keyspace=keyspace_param,
        api_path=api_path,
        api_version=api_version,
        max_time_ms=max_time_ms,
    )
def info(self, *, max_time_ms: int | None = None) ‑> AdminDatabaseInfo

Query the DevOps API for the full info on this database.

Args

max_time_ms
a timeout, in milliseconds, for the DevOps API request.

Returns

An AdminDatabaseInfo object.

Example

>>> my_db_info = admin_for_my_db.info()
>>> my_db_info.status
'ACTIVE'
>>> my_db_info.info.region
'us-east1'
Expand source code
def info(self, *, max_time_ms: int | None = None) -> AdminDatabaseInfo:
    """
    Query the DevOps API for the full info on this database.

    Args:
        max_time_ms: a timeout, in milliseconds, for the DevOps API request.

    Returns:
        An AdminDatabaseInfo object.

    Example:
        >>> my_db_info = admin_for_my_db.info()
        >>> my_db_info.status
        'ACTIVE'
        >>> my_db_info.info.region
        'us-east1'
    """

    logger.info(f"getting info ('{self._database_id}')")
    req_response = self._astra_db_admin.database_info(
        id=self._database_id,
        max_time_ms=max_time_ms,
    )
    logger.info(f"finished getting info ('{self._database_id}')")
    return req_response
def list_keyspaces(self, *, max_time_ms: int | None = None) ‑> list[str]

Query the DevOps API for a list of the keyspaces in the database.

Args

max_time_ms
a timeout, in milliseconds, for the DevOps API request.

Returns

A list of the keyspaces, each a string, in no particular order.

Example

>>> admin_for_my_db.list_keyspaces()
['default_keyspace', 'staging_keyspace']
Expand source code
def list_keyspaces(self, *, max_time_ms: int | None = None) -> list[str]:
    """
    Query the DevOps API for a list of the keyspaces in the database.

    Args:
        max_time_ms: a timeout, in milliseconds, for the DevOps API request.

    Returns:
        A list of the keyspaces, each a string, in no particular order.

    Example:
        >>> admin_for_my_db.list_keyspaces()
        ['default_keyspace', 'staging_keyspace']
    """

    logger.info(f"getting keyspaces ('{self._database_id}')")
    info = self.info(max_time_ms=max_time_ms)
    logger.info(f"finished getting keyspaces ('{self._database_id}')")
    if info.raw_info is None:
        raise DevOpsAPIException("Could not get the keyspace list.")
    else:
        return info.raw_info.get("info", {}).get("keyspaces") or []
def list_namespaces(self, *, max_time_ms: int | None = None) ‑> list[str]

Query the DevOps API for a list of the namespaces in the database.

DEPRECATED (removal in 2.0). Switch to the "list_keyspaces" method.**

Args

max_time_ms
a timeout, in milliseconds, for the DevOps API request.

Returns

A list of the namespaces, each a string, in no particular order.

Example

>>> admin_for_my_db.list_namespaces()
['default_keyspace', 'staging_namespace']

Deprecated since version: 1.5.0

This will be removed in 2.0.0. The term 'namespace' is being replaced by 'keyspace' throughout the Data API and the clients. Please adapt method and parameter names consistently (examples: db_admin.findNamespaces => db_admin.findKeyspaces; collection.namespace => collection.keyspace; database.list_collections(namespace=...) => database.list_collections(keyspace=...)). See https://docs.datastax.com/en/astra-db-serverless/api-reference/client-versions.html#version-1-5 for more information.

Expand source code
@deprecation.deprecated(  # type: ignore[misc]
    deprecated_in="1.5.0",
    removed_in="2.0.0",
    current_version=__version__,
    details=NAMESPACE_DEPRECATION_NOTICE_METHOD,
)
def list_namespaces(self, *, max_time_ms: int | None = None) -> list[str]:
    """
    Query the DevOps API for a list of the namespaces in the database.

    *DEPRECATED* (removal in 2.0). Switch to the "list_keyspaces" method.**

    Args:
        max_time_ms: a timeout, in milliseconds, for the DevOps API request.

    Returns:
        A list of the namespaces, each a string, in no particular order.

    Example:
        >>> admin_for_my_db.list_namespaces()
        ['default_keyspace', 'staging_namespace']
    """

    return self.list_keyspaces(max_time_ms=max_time_ms)
def set_caller(self, caller_name: str | None = None, caller_version: str | None = None) ‑> None

Set a new identity for the application/framework on behalf of which the DevOps API calls will be performed (the "caller").

New objects spawned from this client afterwards will inherit the new settings.

Args

caller_name
name of the application, or framework, on behalf of which the DevOps API calls are performed. This ends up in the request user-agent.
caller_version
version of the caller.

Example

>>> admin_for_my_db.set_caller(
...     caller_name="the_caller",
...     caller_version="0.1.0",
... )

Deprecated since version: 1.5.1

This will be removed in 2.0.0. Please provide the caller(s) at constructor time through the callers list parameter.

Expand source code
@deprecation.deprecated(  # type: ignore[misc]
    deprecated_in="1.5.1",
    removed_in="2.0.0",
    current_version=__version__,
    details=SET_CALLER_DEPRECATION_NOTICE,
)
def set_caller(
    self,
    caller_name: str | None = None,
    caller_version: str | None = None,
) -> None:
    """
    Set a new identity for the application/framework on behalf of which
    the DevOps API calls will be performed (the "caller").

    New objects spawned from this client afterwards will inherit the new settings.

    Args:
        caller_name: name of the application, or framework, on behalf of which
            the DevOps API calls are performed. This ends up in the request user-agent.
        caller_version: version of the caller.

    Example:
        >>> admin_for_my_db.set_caller(
        ...     caller_name="the_caller",
        ...     caller_version="0.1.0",
        ... )
    """

    logger.info(f"setting caller to {caller_name}/{caller_version}")
    callers_param = check_caller_parameters([], caller_name, caller_version)
    self.callers = callers_param or self.callers
    self._api_commander = self._get_api_commander()
    self._dev_ops_api_commander = self._get_dev_ops_api_commander()
def with_options(self, api_endpoint: str | None = None, *, token: str | TokenProvider | None = None, callers: Sequence[CallerType] = [], caller_name: str | None = None, caller_version: str | None = None) ‑> AstraDBDatabaseAdmin

Create a clone of this AstraDBDatabaseAdmin with some changed attributes.

Args

api_endpoint
the API Endpoint for the target database (e.g. https://<ID>-<REGION>.apps.astra.datastax.com). The database must exist already for the resulting object to be effectively used; in other words, this invocation does not create the database, just the object instance.
token
an Access Token to the database. Example: "AstraCS:xyz...". This can be either a literal token string or a subclass of TokenProvider.
callers
a list of caller identities, i.e. applications, or frameworks, on behalf of which Data API and DevOps API calls are performed. These end up in the request user-agent. Each caller identity is a ("caller_name", "caller_version") pair.
caller_name
DEPRECATED, use callers. Removal 2.0. Name of the application, or framework, on behalf of which the Data API and DevOps API calls are performed. This ends up in the request user-agent.
caller_version
version of the caller. DEPRECATED, use callers. Removal 2.0.

Returns

a new AstraDBDatabaseAdmin instance.

Example

>>> admin_for_my_other_db = admin_for_my_db.with_options(
...     "https://<ID>-<REGION>.apps.astra.datastax.com",
... )
Expand source code
def with_options(
    self,
    api_endpoint: str | None = None,
    *,
    token: str | TokenProvider | None = None,
    callers: Sequence[CallerType] = [],
    caller_name: str | None = None,
    caller_version: str | None = None,
) -> AstraDBDatabaseAdmin:
    """
    Create a clone of this AstraDBDatabaseAdmin with some changed attributes.

    Args:
        api_endpoint: the API Endpoint for the target database
            (e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`).
            The database must exist already for the resulting object
            to be effectively used; in other words, this invocation
            does not create the database, just the object instance.
        token: an Access Token to the database. Example: `"AstraCS:xyz..."`.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        callers: a list of caller identities, i.e. applications, or frameworks,
            on behalf of which Data API and DevOps API calls are performed.
            These end up in the request user-agent.
            Each caller identity is a ("caller_name", "caller_version") pair.
        caller_name: *DEPRECATED*, use `callers`. Removal 2.0. Name of the
            application, or framework, on behalf of which the Data API and
            DevOps API calls are performed. This ends up in the request user-agent.
        caller_version: version of the caller. *DEPRECATED*, use `callers`.
            Removal 2.0.

    Returns:
        a new AstraDBDatabaseAdmin instance.

    Example:
        >>> admin_for_my_other_db = admin_for_my_db.with_options(
        ...     "https://<ID>-<REGION>.apps.astra.datastax.com",
        ... )
    """

    callers_param = check_caller_parameters(callers, caller_name, caller_version)
    return self._copy(
        api_endpoint=api_endpoint,
        token=token,
        callers=callers_param,
    )
class AsyncCollection (database: AsyncDatabase, name: str, *, keyspace: str | None = None, namespace: str | None = None, api_options: CollectionAPIOptions | None = None, callers: Sequence[CallerType] = [], caller_name: str | None = None, caller_version: str | None = None)

A Data API collection, the main object to interact with the Data API, especially for DDL operations. This class has an asynchronous interface for use with asyncio.

An AsyncCollection is spawned from a Database object, from which it inherits the details on how to reach the API server (endpoint, authentication token).

Args

database
a Database object, instantiated earlier. This represents the database the collection belongs to.
name
the collection name. This parameter should match an existing collection on the database.
keyspace
this is the keyspace to which the collection belongs. If not specified, the database's working keyspace is used.
namespace
an alias for keyspace. DEPRECATED, removal in 2.0.
api_options
An instance of astrapy.api_options.CollectionAPIOptions providing the general settings for interacting with the Data API.
callers
a list of caller identities, i.e. applications, or frameworks, on behalf of which the Data API calls are performed. These end up in the request user-agent. Each caller identity is a ("caller_name", "caller_version") pair.
caller_name
DEPRECATED, use callers. Removal 2.0. Name of the application, or framework, on behalf of which the Data API calls are performed. This ends up in the request user-agent.
caller_version
version of the caller. DEPRECATED, use callers. Removal 2.0.

Examples

>>> from astrapy import DataAPIClient, AsyncCollection
>>> my_client = astrapy.DataAPIClient("AstraCS:...")
>>> my_async_db = my_client.get_async_database(
...    "https://01234567-....apps.astra.datastax.com"
... )
>>> my_async_coll_1 = AsyncCollection(database=my_async_db, name="my_collection")
>>> my_async coll_2 = asyncio.run(my_async_db.create_collection(
...     "my_v_collection",
...     dimension=3,
...     metric="cosine",
... ))
>>> my_async_coll_3a = asyncio.run(my_async_db.get_collection(
...     "my_already_existing_collection",
... ))
>>> my_async_coll_3b = my_async_db.my_already_existing_collection
>>> my_async_coll_3c = my_async_db["my_already_existing_collection"]

Note

creating an instance of AsyncCollection does not trigger actual creation of the collection on the database. The latter should have been created beforehand, e.g. through the create_collection method of an AsyncDatabase.

Expand source code
class AsyncCollection:
    """
    A Data API collection, the main object to interact with the Data API,
    especially for DDL operations.
    This class has an asynchronous interface for use with asyncio.

    An AsyncCollection is spawned from a Database object, from which it inherits
    the details on how to reach the API server (endpoint, authentication token).

    Args:
        database: a Database object, instantiated earlier. This represents
            the database the collection belongs to.
        name: the collection name. This parameter should match an existing
            collection on the database.
        keyspace: this is the keyspace to which the collection belongs.
            If not specified, the database's working keyspace is used.
        namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
        api_options: An instance of `astrapy.api_options.CollectionAPIOptions`
            providing the general settings for interacting with the Data API.
        callers: a list of caller identities, i.e. applications, or frameworks,
            on behalf of which the Data API calls are performed. These end up
            in the request user-agent.
            Each caller identity is a ("caller_name", "caller_version") pair.
        caller_name: *DEPRECATED*, use `callers`. Removal 2.0. Name of the
            application, or framework, on behalf of which the Data API calls
            are performed. This ends up in the request user-agent.
        caller_version: version of the caller. *DEPRECATED*, use `callers`. Removal 2.0.

    Examples:
        >>> from astrapy import DataAPIClient, AsyncCollection
        >>> my_client = astrapy.DataAPIClient("AstraCS:...")
        >>> my_async_db = my_client.get_async_database(
        ...    "https://01234567-....apps.astra.datastax.com"
        ... )
        >>> my_async_coll_1 = AsyncCollection(database=my_async_db, name="my_collection")
        >>> my_async coll_2 = asyncio.run(my_async_db.create_collection(
        ...     "my_v_collection",
        ...     dimension=3,
        ...     metric="cosine",
        ... ))
        >>> my_async_coll_3a = asyncio.run(my_async_db.get_collection(
        ...     "my_already_existing_collection",
        ... ))
        >>> my_async_coll_3b = my_async_db.my_already_existing_collection
        >>> my_async_coll_3c = my_async_db["my_already_existing_collection"]

    Note:
        creating an instance of AsyncCollection does not trigger actual creation
        of the collection on the database. The latter should have been created
        beforehand, e.g. through the `create_collection` method of an AsyncDatabase.
    """

    def __init__(
        self,
        database: AsyncDatabase,
        name: str,
        *,
        keyspace: str | None = None,
        namespace: str | None = None,
        api_options: CollectionAPIOptions | None = None,
        callers: Sequence[CallerType] = [],
        caller_name: str | None = None,
        caller_version: str | None = None,
    ) -> None:
        callers_param = check_caller_parameters(callers, caller_name, caller_version)
        keyspace_param = check_namespace_keyspace(
            keyspace=keyspace,
            namespace=namespace,
        )

        if api_options is None:
            self.api_options = CollectionAPIOptions()
        else:
            self.api_options = api_options
        _keyspace = keyspace_param if keyspace_param is not None else database.keyspace
        if _keyspace is None:
            raise ValueError(
                "Attempted to create AsyncCollection with 'keyspace' unset."
            )
        self._database = database._copy(
            keyspace=_keyspace,
            callers=callers_param,
        )
        self._name = name

        additional_headers = self.api_options.embedding_api_key.get_headers()
        self._commander_headers = {
            **{DEFAULT_DATA_API_AUTH_HEADER: self._database.token_provider.get_token()},
            **additional_headers,
        }

        self.callers = callers_param
        self._api_commander = self._get_api_commander()

    def __repr__(self) -> str:
        return (
            f'{self.__class__.__name__}(name="{self.name}", '
            f'keyspace="{self.keyspace}", database={self.database}, '
            f"api_options={self.api_options})"
        )

    def __eq__(self, other: Any) -> bool:
        if isinstance(other, AsyncCollection):
            return all(
                [
                    self._api_commander == other._api_commander,
                    self.api_options == other.api_options,
                ]
            )
        else:
            return False

    def __call__(self, *pargs: Any, **kwargs: Any) -> None:
        raise TypeError(
            f"'{self.__class__.__name__}' object is not callable. If you "
            f"meant to call the '{self.name}' method on a "
            f"'{self.database.__class__.__name__}' object "
            "it is failing because no such method exists."
        )

    def _get_api_commander(self) -> APICommander:
        """Instantiate a new APICommander based on the properties of this class."""

        if self._database.keyspace is None:
            raise ValueError(
                "No keyspace specified. AsyncCollection requires a keyspace to "
                "be set, e.g. through the `keyspace` constructor parameter."
            )

        base_path_components = [
            comp
            for comp in (
                self._database.api_path.strip("/"),
                self._database.api_version.strip("/"),
                self._database.keyspace,
                self._name,
            )
            if comp != ""
        ]
        base_path = f"/{'/'.join(base_path_components)}"
        api_commander = APICommander(
            api_endpoint=self._database.api_endpoint,
            path=base_path,
            headers=self._commander_headers,
            callers=self.callers,
        )
        return api_commander

    async def __aenter__(self) -> AsyncCollection:
        return self

    async def __aexit__(
        self,
        exc_type: type[BaseException] | None = None,
        exc_value: BaseException | None = None,
        traceback: TracebackType | None = None,
    ) -> None:
        if self._api_commander is not None:
            await self._api_commander.__aexit__(
                exc_type=exc_type,
                exc_value=exc_value,
                traceback=traceback,
            )

    def _copy(
        self,
        *,
        database: AsyncDatabase | None = None,
        name: str | None = None,
        keyspace: str | None = None,
        namespace: str | None = None,
        api_options: CollectionAPIOptions | None = None,
        callers: Sequence[CallerType] = [],
        caller_name: str | None = None,
        caller_version: str | None = None,
    ) -> AsyncCollection:
        callers_param = check_caller_parameters(callers, caller_name, caller_version)
        keyspace_param = check_namespace_keyspace(
            keyspace=keyspace,
            namespace=namespace,
        )
        return AsyncCollection(
            database=database or self.database._copy(),
            name=name or self.name,
            keyspace=keyspace_param or self.keyspace,
            api_options=self.api_options.with_override(api_options),
            callers=callers_param or self.callers,
        )

    def with_options(
        self,
        *,
        name: str | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | None = None,
        collection_max_time_ms: int | None = None,
        callers: Sequence[CallerType] = [],
        caller_name: str | None = None,
        caller_version: str | None = None,
    ) -> AsyncCollection:
        """
        Create a clone of this collection with some changed attributes.

        Args:
            name: the name of the collection. This parameter is useful to
                quickly spawn AsyncCollection instances each pointing to a different
                collection existing in the same keyspace.
            embedding_api_key: optional API key(s) for interacting with the collection.
                If an embedding service is configured, and this parameter is not None,
                each Data API call will include the necessary embedding-related headers
                as specified by this parameter. If a string is passed, it translates
                into the one "embedding api key" header
                (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
                For some vectorize providers/models, if using header-based authentication,
                specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
                should be supplied.
            collection_max_time_ms: a default timeout, in millisecond, for the duration of each
                operation on the collection. Individual timeouts can be provided to
                each collection method call and will take precedence, with this value
                being an overall default.
                Note that for some methods involving multiple API calls (such as
                `find`, `delete_many`, `insert_many` and so on), it is strongly suggested
                to provide a specific timeout as the default one likely wouldn't make
                much sense.
            callers: a list of caller identities, i.e. applications, or frameworks,
                on behalf of which the Data API calls are performed. These end up
                in the request user-agent.
                Each caller identity is a ("caller_name", "caller_version") pair.
            caller_name: *DEPRECATED*, use `callers`. Removal 2.0. Name of the
                application, or framework, on behalf of which the Data API calls
                are performed. This ends up in the request user-agent.
            caller_version: version of the caller. *DEPRECATED*, use `callers`.
                Removal 2.0.

        Returns:
            a new AsyncCollection instance.

        Example:
            >>> my_other_async_coll = my_async_coll.with_options(
            ...     name="the_other_coll",
            ...     callers=[("caller_identity", "0.1.2")],
            ... )
        """

        callers_param = check_caller_parameters(callers, caller_name, caller_version)
        _api_options = CollectionAPIOptions(
            embedding_api_key=coerce_embedding_headers_provider(embedding_api_key),
            max_time_ms=collection_max_time_ms,
        )

        return self._copy(
            name=name,
            api_options=_api_options,
            callers=callers_param,
        )

    def to_sync(
        self,
        *,
        database: Database | None = None,
        name: str | None = None,
        keyspace: str | None = None,
        namespace: str | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | None = None,
        collection_max_time_ms: int | None = None,
        callers: Sequence[CallerType] = [],
        caller_name: str | None = None,
        caller_version: str | None = None,
    ) -> Collection:
        """
        Create a Collection from this one. Save for the arguments
        explicitly provided as overrides, everything else is kept identical
        to this collection in the copy (the database is converted into
        a sync object).

        Args:
            database: a Database object, instantiated earlier.
                This represents the database the new collection belongs to.
            name: the collection name. This parameter should match an existing
                collection on the database.
            keyspace: this is the keyspace to which the collection belongs.
                If not specified, the database's working keyspace is used.
            namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
            embedding_api_key: optional API key(s) for interacting with the collection.
                If an embedding service is configured, and this parameter is not None,
                each Data API call will include the necessary embedding-related headers
                as specified by this parameter. If a string is passed, it translates
                into the one "embedding api key" header
                (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
                For some vectorize providers/models, if using header-based authentication,
                specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
                should be supplied.
            collection_max_time_ms: a default timeout, in millisecond, for the duration of each
                operation on the collection. Individual timeouts can be provided to
                each collection method call and will take precedence, with this value
                being an overall default.
                Note that for some methods involving multiple API calls (such as
                `find`, `delete_many`, `insert_many` and so on), it is strongly suggested
                to provide a specific timeout as the default one likely wouldn't make
                much sense.
            callers: a list of caller identities, i.e. applications, or frameworks,
                on behalf of which the Data API calls are performed. These end up
                in the request user-agent.
                Each caller identity is a ("caller_name", "caller_version") pair.
            caller_name: *DEPRECATED*, use `callers`. Removal 2.0. Name of the
                application, or framework, on behalf of which the Data API calls
                are performed. This ends up in the request user-agent.
            caller_version: version of the caller. *DEPRECATED*, use `callers`.
                Removal 2.0.

        Returns:
            the new copy, a Collection instance.

        Example:
            >>> my_async_coll.to_sync().count_documents({}, upper_bound=100)
            77
        """

        callers_param = check_caller_parameters(callers, caller_name, caller_version)
        keyspace_param = check_namespace_keyspace(
            keyspace=keyspace,
            namespace=namespace,
        )
        _api_options = CollectionAPIOptions(
            embedding_api_key=coerce_embedding_headers_provider(embedding_api_key),
            max_time_ms=collection_max_time_ms,
        )

        return Collection(
            database=database or self.database.to_sync(),
            name=name or self.name,
            keyspace=keyspace_param or self.keyspace,
            api_options=self.api_options.with_override(_api_options),
            callers=callers_param or self.callers,
        )

    @deprecation.deprecated(  # type: ignore[misc]
        deprecated_in="1.5.1",
        removed_in="2.0.0",
        current_version=__version__,
        details=SET_CALLER_DEPRECATION_NOTICE,
    )
    def set_caller(
        self,
        caller_name: str | None = None,
        caller_version: str | None = None,
    ) -> None:
        """
        Set a new identity for the application/framework on behalf of which
        the Data API calls are performed (the "caller").

        Args:
            caller_name: name of the application, or framework, on behalf of which
                the Data API calls are performed. This ends up in the request user-agent.
            caller_version: version of the caller.

        Example:
            >>> my_coll.set_caller(caller_name="the_caller", caller_version="0.1.0")
        """

        logger.info(f"setting caller to {caller_name}/{caller_version}")
        callers_param = check_caller_parameters([], caller_name, caller_version)
        self.callers = callers_param or self.callers
        self._api_commander = self._get_api_commander()

    async def options(self, *, max_time_ms: int | None = None) -> CollectionOptions:
        """
        Get the collection options, i.e. its configuration as read from the database.

        The method issues a request to the Data API each time is invoked,
        without caching mechanisms: this ensures up-to-date information
        for usages such as real-time collection validation by the application.

        Args:
            max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
                If not passed, the collection-level setting is used instead.

        Returns:
            a CollectionOptions instance describing the collection.
            (See also the database `list_collections` method.)

        Example:
            >>> asyncio.run(my_async_coll.options())
            CollectionOptions(vector=CollectionVectorOptions(dimension=3, metric='cosine'))
        """

        _max_time_ms = max_time_ms or self.api_options.max_time_ms
        logger.info(f"getting collections in search of '{self.name}'")
        self_descriptors = [
            coll_desc
            async for coll_desc in self.database.list_collections(
                max_time_ms=_max_time_ms
            )
            if coll_desc.name == self.name
        ]
        logger.info(f"finished getting collections in search of '{self.name}'")
        if self_descriptors:
            return self_descriptors[0].options
        else:
            raise CollectionNotFoundException(
                text=f"Collection {self.keyspace}.{self.name} not found.",
                keyspace=self.keyspace,
                collection_name=self.name,
            )

    def info(self) -> CollectionInfo:
        """
        Information on the collection (name, location, database), in the
        form of a CollectionInfo object.

        Not to be confused with the collection `options` method (related
        to the collection internal configuration).

        Example:
            >>> my_async_coll.info().database_info.region
            'us-east1'
            >>> my_async_coll.info().full_name
            'default_keyspace.my_v_collection'

        Note:
            the returned CollectionInfo wraps, among other things,
            the database information: as such, calling this method
            triggers the same-named method of a Database object (which, in turn,
            performs a HTTP request to the DevOps API).
            See the documentation for `Database.info()` for more details.
        """

        return CollectionInfo(
            database_info=self.database.info(),
            keyspace=self.keyspace,
            namespace=self.keyspace,
            name=self.name,
            full_name=self.full_name,
        )

    @property
    def database(self) -> AsyncDatabase:
        """
        a Database object, the database this collection belongs to.

        Example:
            >>> my_async_coll.database.name
            'quicktest'
        """

        return self._database

    @property
    def namespace(self) -> str:
        """
        The namespace this collection is in.

        *DEPRECATED* (removal in 2.0). Switch to the "keyspace" property.**

        Example:
            >>> my_async_coll.namespace
            'default_keyspace'
        """

        the_warning = deprecation.DeprecatedWarning(
            "the 'namespace' property",
            deprecated_in="1.5.0",
            removed_in="2.0.0",
            details=NAMESPACE_DEPRECATION_NOTICE_METHOD,
        )
        warnings.warn(the_warning, stacklevel=2)

        return self.keyspace

    @property
    def keyspace(self) -> str:
        """
        The keyspace this collection is in.

        Example:
            >>> my_coll.keyspace
            'default_keyspace'
        """

        _keyspace = self.database.keyspace
        if _keyspace is None:
            raise ValueError("The collection's DB is set with keyspace=None")
        return _keyspace

    @property
    def name(self) -> str:
        """
        The name of this collection.

        Example:
            >>> my_async_coll.name
            'my_v_collection'
        """

        return self._name

    @property
    def full_name(self) -> str:
        """
        The fully-qualified collection name within the database,
        in the form "keyspace.collection_name".

        Example:
            >>> my_async_coll.full_name
            'default_keyspace.my_v_collection'
        """

        return f"{self.keyspace}.{self.name}"

    async def insert_one(
        self,
        document: DocumentType,
        *,
        vector: VectorType | None = None,
        vectorize: str | None = None,
        max_time_ms: int | None = None,
    ) -> InsertOneResult:
        """
        Insert a single document in the collection in an atomic operation.

        Args:
            document: the dictionary expressing the document to insert.
                The `_id` field of the document can be left out, in which
                case it will be created automatically.
            vector: a vector (a list of numbers appropriate for the collection)
                for the document. Passing this parameter is equivalent to
                providing a `$vector` field within the document itself,
                however the two are mutually exclusive.
                *DEPRECATED* (removal in 2.0). Use a `$vector` key in the document instead.
            vectorize: a string to be made into a vector, if such a service
                is configured for the collection. Passing this parameter is
                equivalent to providing a `$vectorize` field in the document itself,
                however the two are mutually exclusive.
                Moreover, this parameter cannot coexist with `vector`.
                *DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the document instead.
            max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
                If not passed, the collection-level setting is used instead.

        Returns:
            an InsertOneResult object.

        Example:
            >>> async def write_and_count(acol: AsyncCollection) -> None:
            ...     count0 = await acol.count_documents({}, upper_bound=10)
            ...     print("count0", count0)
            ...     await acol.insert_one(
            ...         {
            ...             "age": 30,
            ...             "name": "Smith",
            ...             "food": ["pear", "peach"],
            ...             "likes_fruit": True,
            ...         },
            ...     )
            ...     await acol.insert_one({"_id": "user-123", "age": 50, "name": "Maccio"})
            ...     count1 = await acol.count_documents({}, upper_bound=10)
            ...     print("count1", count1)
            ...
            >>> asyncio.run(write_and_count(my_async_coll))
            count0 0
            count1 2

            >>> asyncio.run(my_async_coll.insert_one({"tag": v", "$vector": [10, 11]}))
            InsertOneResult(...)

        Note:
            If an `_id` is explicitly provided, which corresponds to a document
            that exists already in the collection, an error is raised and
            the insertion fails.
        """

        check_deprecated_vector_ize(
            vector=vector,
            vectors=None,
            vectorize=vectorize,
            kind="insert",
        )
        _document = _collate_vector_to_document(document, vector, vectorize)
        _max_time_ms = max_time_ms or self.api_options.max_time_ms
        io_payload = {"insertOne": {"document": _document}}
        logger.info(f"insertOne on '{self.name}'")
        io_response = await self._api_commander.async_request(
            payload=io_payload,
            timeout_info=base_timeout_info(_max_time_ms),
        )
        logger.info(f"finished insertOne on '{self.name}'")
        if "insertedIds" in io_response.get("status", {}):
            if io_response["status"]["insertedIds"]:
                inserted_id = io_response["status"]["insertedIds"][0]
                return InsertOneResult(
                    raw_results=[io_response],
                    inserted_id=inserted_id,
                )
            else:
                raise ValueError(
                    "Could not complete a insert_one operation. "
                    f"(gotten '${json.dumps(io_response)}')"
                )
        else:
            raise ValueError(
                "Could not complete a insert_one operation. "
                f"(gotten '${json.dumps(io_response)}')"
            )

    async def insert_many(
        self,
        documents: Iterable[DocumentType],
        *,
        vectors: Iterable[VectorType | None] | None = None,
        vectorize: Iterable[str | None] | None = None,
        ordered: bool = False,
        chunk_size: int | None = None,
        concurrency: int | None = None,
        max_time_ms: int | None = None,
    ) -> InsertManyResult:
        """
        Insert a list of documents into the collection.
        This is not an atomic operation.

        Args:
            documents: an iterable of dictionaries, each a document to insert.
                Documents may specify their `_id` field or leave it out, in which
                case it will be added automatically.
            vectors: an optional list of vectors (as many vectors as the provided
                documents) to associate to the documents when inserting.
                Passing vectors this way is indeed equivalent to the "$vector" field
                of the documents, however the two are mutually exclusive.
                *DEPRECATED* (removal in 2.0). Use a `$vector` key in the documents instead.
            vectorize: an optional list of strings to be made into as many vectors
                (one per document), if such a service is configured for the collection.
                Passing this parameter is equivalent to providing a `$vectorize`
                field in the documents themselves, however the two are mutually exclusive.
                *DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the documents instead.
            ordered: if False (default), the insertions can occur in arbitrary order
                and possibly concurrently. If True, they are processed sequentially.
                If there are no specific reasons against it, unordered insertions are to
                be preferred as they complete much faster.
            chunk_size: how many documents to include in a single API request.
                Exceeding the server maximum allowed value results in an error.
                Leave it unspecified (recommended) to use the system default.
            concurrency: maximum number of concurrent requests to the API at
                a given time. It cannot be more than one for ordered insertions.
            max_time_ms: a timeout, in milliseconds, for the operation.
                If not passed, the collection-level setting is used instead:
                If many documents are being inserted, this method corresponds
                to several HTTP requests: in such cases one may want to specify
                a more tolerant timeout here.
        Returns:
            an InsertManyResult object.

        Examples:
            >>> async def write_and_count(acol: AsyncCollection) -> None:
            ...             count0 = await acol.count_documents({}, upper_bound=10)
            ...             print("count0", count0)
            ...             im_result1 = await acol.insert_many(
            ...                 [
            ...                     {"a": 10},
            ...                     {"a": 5},
            ...                     {"b": [True, False, False]},
            ...                 ],
            ...                 ordered=True,
            ...             )
            ...             print("inserted1", im_result1.inserted_ids)
            ...             count1 = await acol.count_documents({}, upper_bound=100)
            ...             print("count1", count1)
            ...             await acol.insert_many(
            ...                 [{"seq": i} for i in range(50)],
            ...                 concurrency=5,
            ...             )
            ...             count2 = await acol.count_documents({}, upper_bound=100)
            ...             print("count2", count2)
            ...
            >>> asyncio.run(write_and_count(my_async_coll))
            count0 0
            inserted1 ['e3c2a684-...', '1de4949f-...', '167dacc3-...']
            count1 3
            count2 53
            >>> asyncio.run(my_async_coll.insert_many(
            ...     [
            ...         {"tag": "a", "$vector": [1, 2]},
            ...         {"tag": "b", "$vector": [3, 4]},
            ...     ]
            ... ))
            InsertManyResult(...)

        Note:
            Unordered insertions are executed with some degree of concurrency,
            so it is usually better to prefer this mode unless the order in the
            document sequence is important.

        Note:
            A failure mode for this command is related to certain faulty documents
            found among those to insert: a document may have the an `_id` already
            present on the collection, or its vector dimension may not
            match the collection setting.

            For an ordered insertion, the method will raise an exception at
            the first such faulty document -- nevertheless, all documents processed
            until then will end up being written to the database.

            For unordered insertions, if the error stems from faulty documents
            the insertion proceeds until exhausting the input documents: then,
            an exception is raised -- and all insertable documents will have been
            written to the database, including those "after" the troublesome ones.

            If, on the other hand, there are errors not related to individual
            documents (such as a network connectivity error), the whole
            `insert_many` operation will stop in mid-way, an exception will be raised,
            and only a certain amount of the input documents will
            have made their way to the database.
        """

        check_deprecated_vector_ize(
            vector=None,
            vectors=vectors,
            vectorize=vectorize,
            kind="insert",
        )
        if concurrency is None:
            if ordered:
                _concurrency = 1
            else:
                _concurrency = DEFAULT_INSERT_MANY_CONCURRENCY
        else:
            _concurrency = concurrency
        if _concurrency > 1 and ordered:
            raise ValueError("Cannot run ordered insert_many concurrently.")
        if chunk_size is None:
            _chunk_size = DEFAULT_INSERT_MANY_CHUNK_SIZE
        else:
            _chunk_size = chunk_size
        _documents = _collate_vectors_to_documents(documents, vectors, vectorize)
        _max_time_ms = max_time_ms or self.api_options.max_time_ms
        logger.info(f"inserting {len(_documents)} documents in '{self.name}'")
        raw_results: list[dict[str, Any]] = []
        timeout_manager = MultiCallTimeoutManager(overall_max_time_ms=_max_time_ms)
        if ordered:
            options = {"ordered": True}
            inserted_ids: list[Any] = []
            for i in range(0, len(_documents), _chunk_size):
                im_payload = {
                    "insertMany": {
                        "documents": _documents[i : i + _chunk_size],
                        "options": options,
                    },
                }
                logger.info(f"insertMany on '{self.name}'")
                chunk_response = await self._api_commander.async_request(
                    payload=im_payload,
                    raise_api_errors=False,
                    timeout_info=timeout_manager.remaining_timeout_info(),
                )
                logger.info(f"finished insertMany on '{self.name}'")
                # accumulate the results in this call
                chunk_inserted_ids = (chunk_response.get("status") or {}).get(
                    "insertedIds", []
                )
                inserted_ids += chunk_inserted_ids
                raw_results += [chunk_response]
                # if errors, quit early
                if chunk_response.get("errors", []):
                    partial_result = InsertManyResult(
                        raw_results=raw_results,
                        inserted_ids=inserted_ids,
                    )
                    raise InsertManyException.from_response(
                        command=None,
                        raw_response=chunk_response,
                        partial_result=partial_result,
                    )

            # return
            full_result = InsertManyResult(
                raw_results=raw_results,
                inserted_ids=inserted_ids,
            )
            logger.info(
                f"finished inserting {len(_documents)} documents in '{self.name}'"
            )
            return full_result

        else:
            # unordered: concurrent or not, do all of them and parse the results
            options = {"ordered": False}

            sem = asyncio.Semaphore(_concurrency)

            async def concurrent_insert_chunk(
                document_chunk: list[DocumentType],
            ) -> dict[str, Any]:
                async with sem:
                    im_payload = {
                        "insertMany": {
                            "documents": document_chunk,
                            "options": options,
                        },
                    }
                    logger.info(f"insertMany(chunk) on '{self.name}'")
                    im_response = await self._api_commander.async_request(
                        payload=im_payload,
                        raise_api_errors=False,
                        timeout_info=timeout_manager.remaining_timeout_info(),
                    )
                    logger.info(f"finished insertMany(chunk) on '{self.name}'")
                    return im_response

            if _concurrency > 1:
                tasks = [
                    asyncio.create_task(
                        concurrent_insert_chunk(_documents[i : i + _chunk_size])
                    )
                    for i in range(0, len(_documents), _chunk_size)
                ]
                raw_results = await asyncio.gather(*tasks)
            else:
                raw_results = [
                    await concurrent_insert_chunk(_documents[i : i + _chunk_size])
                    for i in range(0, len(_documents), _chunk_size)
                ]

            # recast raw_results
            inserted_ids = [
                inserted_id
                for chunk_response in raw_results
                for inserted_id in (chunk_response.get("status") or {}).get(
                    "insertedIds", []
                )
            ]

            # check-raise
            if any(
                [chunk_response.get("errors", []) for chunk_response in raw_results]
            ):
                partial_result = InsertManyResult(
                    raw_results=raw_results,
                    inserted_ids=inserted_ids,
                )
                raise InsertManyException.from_responses(
                    commands=[None for _ in raw_results],
                    raw_responses=raw_results,
                    partial_result=partial_result,
                )

            # return
            full_result = InsertManyResult(
                raw_results=raw_results,
                inserted_ids=inserted_ids,
            )
            logger.info(
                f"finished inserting {len(_documents)} documents in '{self.name}'"
            )
            return full_result

    def find(
        self,
        filter: FilterType | None = None,
        *,
        projection: ProjectionType | None = None,
        skip: int | None = None,
        limit: int | None = None,
        vector: VectorType | None = None,
        vectorize: str | None = None,
        include_similarity: bool | None = None,
        include_sort_vector: bool | None = None,
        sort: SortType | None = None,
        max_time_ms: int | None = None,
    ) -> AsyncCursor:
        """
        Find documents on the collection, matching a certain provided filter.

        The method returns a Cursor that can then be iterated over. Depending
        on the method call pattern, the iteration over all documents can reflect
        collection mutations occurred since the `find` method was called, or not.
        In cases where the cursor reflects mutations in real-time, it will iterate
        over cursors in an approximate way (i.e. exhibiting occasional skipped
        or duplicate documents). This happens when making use of the `sort`
        option in a non-vector-search manner.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            projection: it controls which parts of the document are returned.
                It can be an allow-list: `{"f1": True, "f2": True}`,
                or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
                (except for the `_id` and other special fields, which can be
                associated to both True or False independently of the rest
                of the specification).
                The special star-projections `{"*": True}` and `{"*": False}`
                have the effect of returning the whole document and `{}` respectively.
                For lists in documents, slice directives can be passed to select
                portions of the list: for instance, `{"array": {"$slice": 2}}`,
                `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
                `{"array": {"$slice": [-4, 2]}}`.
                An iterable over strings will be treated implicitly as an allow-list.
                The default projection (used if this parameter is not passed) does not
                necessarily include "special" fields such as `$vector` or `$vectorize`.
                See the Data API documentation for more on projections.
            skip: with this integer parameter, what would be the first `skip`
                documents returned by the query are discarded, and the results
                start from the (skip+1)-th document.
                This parameter can be used only in conjunction with an explicit
                `sort` criterion of the ascending/descending type (i.e. it cannot
                be used when not sorting, nor with vector-based ANN search).
            limit: this (integer) parameter sets a limit over how many documents
                are returned. Once `limit` is reached (or the cursor is exhausted
                for lack of matching documents), nothing more is returned.
            vector: a suitable vector, i.e. a list of float numbers of the appropriate
                dimensionality, to perform vector search (i.e. ANN,
                or "approximate nearest-neighbours" search).
                When running similarity search on a collection, no other sorting
                criteria can be specified. Moreover, there is an upper bound
                to the number of documents that can be returned. For details,
                see the Note about upper bounds and the Data API documentation.
                *DEPRECATED* (removal in 2.0). Use a `$vector` key in the
                sort clause dict instead.
            vectorize: a string to be made into a vector to perform vector search.
                This can be supplied in (exclusive) alternative to `vector`,
                provided such a service is configured for the collection,
                and achieves the same effect.
                *DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the
                sort clause dict instead.
            include_similarity: a boolean to request the numeric value of the
                similarity to be returned as an added "$similarity" key in each
                returned document. Can only be used for vector ANN search, i.e.
                when either `vector` is supplied or the `sort` parameter has the
                shape {"$vector": ...}.
            include_sort_vector: a boolean to request query vector used in this search.
                If set to True (and if the invocation is a vector search), calling
                the `get_sort_vector` method on the returned cursor will yield
                the vector used for the ANN search.
            sort: with this dictionary parameter one can control the order
                the documents are returned. See the Note about sorting, as well as
                the one about upper bounds, for details.
                Vector-based ANN sorting is achieved by providing a "$vector"
                or a "$vectorize" key in `sort`.
            max_time_ms: a timeout, in milliseconds, for each single one
                of the underlying HTTP requests used to fetch documents as the
                cursor is iterated over.
                If not passed, the collection-level setting is used instead.

        Returns:
            an AsyncCursor object representing iterations over the matching documents
            (see the AsyncCursor object for how to use it. The simplest thing is to
            run a for loop: `for document in collection.sort(...):`).

        Examples:
            >>> async def run_finds(acol: AsyncCollection) -> None:
            ...             filter = {"seq": {"$exists": True}}
            ...             print("find results 1:")
            ...             async for doc in acol.find(filter, projection={"seq": True}, limit=5):
            ...                 print(doc["seq"])
            ...             async_cursor1 = acol.find(
            ...                 {},
            ...                 limit=4,
            ...                 sort={"seq": astrapy.constants.SortDocuments.DESCENDING},
            ...             )
            ...             ids = [doc["_id"] async for doc in async_cursor1]
            ...             print("find results 2:", ids)
            ...             async_cursor2 = acol.find({}, limit=3)
            ...             seqs = await async_cursor2.distinct("seq")
            ...             print("distinct results 3:", seqs)
            ...
            >>> asyncio.run(run_finds(my_async_coll))
            find results 1:
            48
            35
            7
            11
            13
            find results 2: ['d656cd9d-...', '479c7ce8-...', '96dc87fd-...', '83f0a21f-...']
            distinct results 3: [48, 35, 7]

            >>> async def run_vector_finds(acol: AsyncCollection) -> None:
            ...     await acol.insert_many([
            ...         {"tag": "A", "$vector": [4, 5]},
            ...         {"tag": "B", "$vector": [3, 4]},
            ...         {"tag": "C", "$vector": [3, 2]},
            ...         {"tag": "D", "$vector": [4, 1]},
            ...         {"tag": "E", "$vector": [2, 5]},
            ...     ])
            ...     ann_tags = [
            ...         document["tag"]
            ...         async for document in acol.find(
            ...             {},
            ...             sort={"$vector": [3, 3]},
            ...             limit=3,
            ...         )
            ...     ]
            ...     return ann_tags
            ...
            >>> asyncio.run(run_vector_finds(my_async_coll))
            ['A', 'B', 'C']
            >>> # (assuming the collection has metric VectorMetric.COSINE)

            >>> async_cursor = my_async_coll.find(
            ...     sort={"$vector": [3, 3]},
            ...     limit=3,
            ...     include_sort_vector=True,
            ... )
            >>> asyncio.run(async_cursor.get_sort_vector())
            [3.0, 3.0]
            >>> asyncio.run(async_cursor.__anext__())
            {'_id': 'b13ce177-738e-47ec-bce1-77738ee7ec93', 'tag': 'A'}
            >>> asyncio.run(async_cursor.get_sort_vector())
            [3.0, 3.0]

        Note:
            The following are example values for the `sort` parameter.
            When no particular order is required:
                sort={}
            When sorting by a certain value in ascending/descending order:
                sort={"field": SortDocuments.ASCENDING}
                sort={"field": SortDocuments.DESCENDING}
            When sorting first by "field" and then by "subfield"
            (while modern Python versions preserve the order of dictionaries,
            it is suggested for clarity to employ a `collections.OrderedDict`
            in these cases):
                sort={
                    "field": SortDocuments.ASCENDING,
                    "subfield": SortDocuments.ASCENDING,
                }
            When running a vector similarity (ANN) search:
                sort={"$vector": [0.4, 0.15, -0.5]}

        Note:
            Some combinations of arguments impose an implicit upper bound on the
            number of documents that are returned by the Data API. More specifically:
            (a) Vector ANN searches cannot return more than a number of documents
            that at the time of writing is set to 1000 items.
            (b) When using a sort criterion of the ascending/descending type,
            the Data API will return a smaller number of documents, set to 20
            at the time of writing, and stop there. The returned documents are
            the top results across the whole collection according to the requested
            criterion.
            These provisions should be kept in mind even when subsequently running
            a command such as `.distinct()` on a cursor.

        Note:
            When not specifying sorting criteria at all (by vector or otherwise),
            the cursor can scroll through an arbitrary number of documents as
            the Data API and the client periodically exchange new chunks of documents.
            It should be noted that the behavior of the cursor in the case documents
            have been added/removed after the `find` was started depends on database
            internals and it is not guaranteed, nor excluded, that such "real-time"
            changes in the data would be picked up by the cursor.
        """

        check_deprecated_vector_ize(
            vector=vector, vectors=None, vectorize=vectorize, kind="find"
        )
        _sort = _collate_vector_to_sort(sort, vector, vectorize)
        _max_time_ms = max_time_ms or self.api_options.max_time_ms
        if include_similarity is not None and not _is_vector_sort(_sort):
            raise ValueError(
                "Cannot use `include_similarity` unless for vector search."
            )
        return (
            AsyncCursor(
                collection=self,
                filter=filter,
                projection=projection,
                max_time_ms=_max_time_ms,
                overall_max_time_ms=None,
            )
            .skip(skip)
            .limit(limit)
            .sort(_sort)
            .include_similarity(include_similarity)
            .include_sort_vector(include_sort_vector)
        )

    async def find_one(
        self,
        filter: FilterType | None = None,
        *,
        projection: ProjectionType | None = None,
        vector: VectorType | None = None,
        vectorize: str | None = None,
        include_similarity: bool | None = None,
        sort: SortType | None = None,
        max_time_ms: int | None = None,
    ) -> DocumentType | None:
        """
        Run a search, returning the first document in the collection that matches
        provided filters, if any is found.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            projection: it controls which parts of the document are returned.
                It can be an allow-list: `{"f1": True, "f2": True}`,
                or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
                (except for the `_id` and other special fields, which can be
                associated to both True or False independently of the rest
                of the specification).
                The special star-projections `{"*": True}` and `{"*": False}`
                have the effect of returning the whole document and `{}` respectively.
                For lists in documents, slice directives can be passed to select
                portions of the list: for instance, `{"array": {"$slice": 2}}`,
                `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
                `{"array": {"$slice": [-4, 2]}}`.
                An iterable over strings will be treated implicitly as an allow-list.
                The default projection (used if this parameter is not passed) does not
                necessarily include "special" fields such as `$vector` or `$vectorize`.
                See the Data API documentation for more on projections.
            vector: a suitable vector, i.e. a list of float numbers of the appropriate
                dimensionality, to perform vector search (i.e. ANN,
                or "approximate nearest-neighbours" search), extracting the most
                similar document in the collection matching the filter.
                *DEPRECATED* (removal in 2.0). Use a `$vector` key in the
                sort clause dict instead.
            vectorize: a string to be made into a vector to perform vector search.
                Using vectorize assumes a suitable service is configured for the collection.
                *DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the
                sort clause dict instead.
            include_similarity: a boolean to request the numeric value of the
                similarity to be returned as an added "$similarity" key in the
                returned document. Can only be used for vector ANN search, i.e.
                when either `vector` is supplied or the `sort` parameter has the
                shape {"$vector": ...}.
            sort: with this dictionary parameter one can control the order
                the documents are returned. See the Note about sorting for details.
                Vector-based ANN sorting is achieved by providing a "$vector"
                or a "$vectorize" key in `sort`.
            max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
                If not passed, the collection-level setting is used instead.

        Returns:
            a dictionary expressing the required document, otherwise None.

        Example:
            >>> async def demo_find_one(acol: AsyncCollection) -> None:
            ....    print("Count:", await acol.count_documents({}, upper_bound=100))
            ...     result0 = await acol.find_one({})
            ...     print("result0", result0)
            ...     result1 = await acol.find_one({"seq": 10})
            ...     print("result1", result1)
            ...     result2 = await acol.find_one({"seq": 1011})
            ...     print("result2", result2)
            ...     result3 = await acol.find_one({}, projection={"seq": False})
            ...     print("result3", result3)
            ...     result4 = await acol.find_one(
            ...         {},
            ...         sort={"seq": astrapy.constants.SortDocuments.DESCENDING},
            ...     )
            ...     print("result4", result4)
            ...
            >>>
            >>> asyncio.run(demo_find_one(my_async_coll))
            Count: 50
            result0 {'_id': '479c7ce8-...', 'seq': 48}
            result1 {'_id': '93e992c4-...', 'seq': 10}
            result2 None
            result3 {'_id': '479c7ce8-...'}
            result4 {'_id': 'd656cd9d-...', 'seq': 49}

            >>> asyncio.run(my_async_coll.find_one(
            ...     {},
            ...     sort={"$vector": [1, 0]},
            ...     projection={"*": True},
            ... ))
            {'_id': '...', 'tag': 'D', '$vector': [4.0, 1.0]}

        Note:
            See the `find` method for more details on the accepted parameters
            (whereas `skip` and `limit` are not valid parameters for `find_one`).
        """

        check_deprecated_vector_ize(
            vector=vector,
            vectors=None,
            vectorize=vectorize,
            kind="find",
        )
        _max_time_ms = max_time_ms or self.api_options.max_time_ms
        fo_cursor = self.find(
            filter=filter,
            projection=projection,
            skip=None,
            limit=1,
            vector=vector,
            vectorize=vectorize,
            include_similarity=include_similarity,
            sort=sort,
            max_time_ms=_max_time_ms,
        )
        try:
            document = await fo_cursor.__anext__()
            return document
        except StopAsyncIteration:
            return None

    async def distinct(
        self,
        key: str,
        *,
        filter: FilterType | None = None,
        max_time_ms: int | None = None,
    ) -> list[Any]:
        """
        Return a list of the unique values of `key` across the documents
        in the collection that match the provided filter.

        Args:
            key: the name of the field whose value is inspected across documents.
                Keys can use dot-notation to descend to deeper document levels.
                Example of acceptable `key` values:
                    "field"
                    "field.subfield"
                    "field.3"
                    "field.3.subfield"
                If lists are encountered and no numeric index is specified,
                all items in the list are visited.
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            max_time_ms: a timeout, in milliseconds, with the same meaning as for `find`.
                If not passed, the collection-level setting is used instead.

        Returns:
            a list of all different values for `key` found across the documents
            that match the filter. The result list has no repeated items.

        Example:
            >>> async def run_distinct(acol: AsyncCollection) -> None:
            ...     await acol.insert_many(
            ...         [
            ...             {"name": "Marco", "food": ["apple", "orange"], "city": "Helsinki"},
            ...             {"name": "Emma", "food": {"likes_fruit": True, "allergies": []}},
            ...         ]
            ...     )
            ...     distinct0 = await acol.distinct("name")
            ...     print("distinct('name')", distinct0)
            ...     distinct1 = await acol.distinct("city")
            ...     print("distinct('city')", distinct1)
            ...     distinct2 = await acol.distinct("food")
            ...     print("distinct('food')", distinct2)
            ...     distinct3 = await acol.distinct("food.1")
            ...     print("distinct('food.1')", distinct3)
            ...     distinct4 = await acol.distinct("food.allergies")
            ...     print("distinct('food.allergies')", distinct4)
            ...     distinct5 = await acol.distinct("food.likes_fruit")
            ...     print("distinct('food.likes_fruit')", distinct5)
            ...
            >>> asyncio.run(run_distinct(my_async_coll))
            distinct('name') ['Emma', 'Marco']
            distinct('city') ['Helsinki']
            distinct('food') [{'likes_fruit': True, 'allergies': []}, 'apple', 'orange']
            distinct('food.1') ['orange']
            distinct('food.allergies') []
            distinct('food.likes_fruit') [True]

        Note:
            It must be kept in mind that `distinct` is a client-side operation,
            which effectively browses all required documents using the logic
            of the `find` method and collects the unique values found for `key`.
            As such, there may be performance, latency and ultimately
            billing implications if the amount of matching documents is large.

        Note:
            For details on the behaviour of "distinct" in conjunction with
            real-time changes in the collection contents, see the
            Note of the `find` command.
        """

        _max_time_ms = max_time_ms or self.api_options.max_time_ms
        f_cursor = AsyncCursor(
            collection=self,
            filter=filter,
            projection={key: True},
            max_time_ms=None,
            overall_max_time_ms=_max_time_ms,
        )
        return await f_cursor.distinct(key)

    async def count_documents(
        self,
        filter: FilterType,
        *,
        upper_bound: int,
        max_time_ms: int | None = None,
    ) -> int:
        """
        Count the documents in the collection matching the specified filter.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            upper_bound: a required ceiling on the result of the count operation.
                If the actual number of documents exceeds this value,
                an exception will be raised.
                Furthermore, if the actual number of documents exceeds the maximum
                count that the Data API can reach (regardless of upper_bound),
                an exception will be raised.
            max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
                If not passed, the collection-level setting is used instead.

        Returns:
            the exact count of matching documents.

        Example:
            >>> async def do_count_docs(acol: AsyncCollection) -> None:
            ...     await acol.insert_many([{"seq": i} for i in range(20)])
            ...     count0 = await acol.count_documents({}, upper_bound=100)
            ...     print("count0", count0)
            ...     count1 = await acol.count_documents({"seq":{"$gt": 15}}, upper_bound=100)
            ...     print("count1", count1)
            ...     count2 = await acol.count_documents({}, upper_bound=10)
            ...     print("count2", count2)
            ...
            >>> asyncio.run(do_count_docs(my_async_coll))
            count0 20
            count1 4
            Traceback (most recent call last):
                ... ...
            astrapy.exceptions.TooManyDocumentsToCountException

        Note:
            Count operations are expensive: for this reason, the best practice
            is to provide a reasonable `upper_bound` according to the caller
            expectations. Moreover, indiscriminate usage of count operations
            for sizeable amounts of documents (i.e. in the thousands and more)
            is discouraged in favor of alternative application-specific solutions.
            Keep in mind that the Data API has a hard upper limit on the amount
            of documents it will count, and that an exception will be thrown
            by this method if this limit is encountered.
        """

        _max_time_ms = max_time_ms or self.api_options.max_time_ms
        cd_payload = {"countDocuments": {"filter": filter}}
        logger.info(f"countDocuments on '{self.name}'")
        cd_response = await self._api_commander.async_request(
            payload=cd_payload,
            timeout_info=base_timeout_info(_max_time_ms),
        )
        logger.info(f"finished countDocuments on '{self.name}'")
        if "count" in cd_response.get("status", {}):
            count: int = cd_response["status"]["count"]
            if cd_response["status"].get("moreData", False):
                raise TooManyDocumentsToCountException(
                    text=f"Document count exceeds {count}, the maximum allowed by the server",
                    server_max_count_exceeded=True,
                )
            else:
                if count > upper_bound:
                    raise TooManyDocumentsToCountException(
                        text="Document count exceeds required upper bound",
                        server_max_count_exceeded=False,
                    )
                else:
                    return count
        else:
            raise DataAPIFaultyResponseException(
                text="Faulty response from count_documents API command.",
                raw_response=cd_response,
            )

    async def estimated_document_count(
        self,
        *,
        max_time_ms: int | None = None,
    ) -> int:
        """
        Query the API server for an estimate of the document count in the collection.

        Contrary to `count_documents`, this method has no filtering parameters.

        Args:
            max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
                If not passed, the collection-level setting is used instead.

        Returns:
            a server-provided estimate count of the documents in the collection.

        Example:
            >>> asyncio.run(my_async_coll.estimated_document_count())
            35700
        """
        _max_time_ms = max_time_ms or self.api_options.max_time_ms
        ed_payload: dict[str, Any] = {"estimatedDocumentCount": {}}
        logger.info(f"estimatedDocumentCount on '{self.name}'")
        ed_response = await self._api_commander.async_request(
            payload=ed_payload,
            timeout_info=base_timeout_info(_max_time_ms),
        )
        logger.info(f"finished estimatedDocumentCount on '{self.name}'")
        if "count" in ed_response.get("status", {}):
            count: int = ed_response["status"]["count"]
            return count
        else:
            raise DataAPIFaultyResponseException(
                text="Faulty response from estimated_document_count API command.",
                raw_response=ed_response,
            )

    async def find_one_and_replace(
        self,
        filter: FilterType,
        replacement: DocumentType,
        *,
        projection: ProjectionType | None = None,
        vector: VectorType | None = None,
        vectorize: str | None = None,
        sort: SortType | None = None,
        upsert: bool = False,
        return_document: str = ReturnDocument.BEFORE,
        max_time_ms: int | None = None,
    ) -> DocumentType | None:
        """
        Find a document on the collection and replace it entirely with a new one,
        optionally inserting a new one if no match is found.

        Args:

            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            replacement: the new document to write into the collection.
            projection: it controls which parts of the document are returned.
                It can be an allow-list: `{"f1": True, "f2": True}`,
                or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
                (except for the `_id` and other special fields, which can be
                associated to both True or False independently of the rest
                of the specification).
                The special star-projections `{"*": True}` and `{"*": False}`
                have the effect of returning the whole document and `{}` respectively.
                For lists in documents, slice directives can be passed to select
                portions of the list: for instance, `{"array": {"$slice": 2}}`,
                `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
                `{"array": {"$slice": [-4, 2]}}`.
                An iterable over strings will be treated implicitly as an allow-list.
                The default projection (used if this parameter is not passed) does not
                necessarily include "special" fields such as `$vector` or `$vectorize`.
                See the Data API documentation for more on projections.
            vector: a suitable vector, i.e. a list of float numbers of the appropriate
                dimensionality, to use vector search (i.e. ANN,
                or "approximate nearest-neighbours" search), as the sorting criterion.
                In this way, the matched document (if any) will be the one
                that is most similar to the provided vector.
                *DEPRECATED* (removal in 2.0). Use a `$vector` key in the
                sort clause dict instead.
            vectorize: a string to be made into a vector to perform vector search.
                Using vectorize assumes a suitable service is configured for the collection.
                *DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the
                sort clause dict instead.
            sort: with this dictionary parameter one can control the sorting
                order of the documents matching the filter, effectively
                determining what document will come first and hence be the
                replaced one. See the `find` method for more on sorting.
                Vector-based ANN sorting is achieved by providing a "$vector"
                or a "$vectorize" key in `sort`.
            upsert: this parameter controls the behavior in absence of matches.
                If True, `replacement` is inserted as a new document
                if no matches are found on the collection. If False,
                the operation silently does nothing in case of no matches.
            return_document: a flag controlling what document is returned:
                if set to `ReturnDocument.BEFORE`, or the string "before",
                the document found on database is returned; if set to
                `ReturnDocument.AFTER`, or the string "after", the new
                document is returned. The default is "before".
            max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
                If not passed, the collection-level setting is used instead.

        Returns:
            A document, either the one before the replace operation or the
            one after that. Alternatively, the method returns None to represent
            that no matching document was found, or that no replacement
            was inserted (depending on the `return_document` parameter).

        Example:
            >>> async def do_find_one_and_replace(acol: AsyncCollection) -> None:
            ...             await acol.insert_one({"_id": "rule1", "text": "all animals are equal"})
            ...             result0 = await acol.find_one_and_replace(
            ...                 {"_id": "rule1"},
            ...                 {"text": "some animals are more equal!"},
            ...             )
            ...             print("result0", result0)
            ...             result1 = await acol.find_one_and_replace(
            ...                 {"text": "some animals are more equal!"},
            ...                 {"text": "and the pigs are the rulers"},
            ...                 return_document=astrapy.constants.ReturnDocument.AFTER,
            ...             )
            ...             print("result1", result1)
            ...             result2 = await acol.find_one_and_replace(
            ...                 {"_id": "rule2"},
            ...                 {"text": "F=ma^2"},
            ...                 return_document=astrapy.constants.ReturnDocument.AFTER,
            ...             )
            ...             print("result2", result2)
            ...             result3 = await acol.find_one_and_replace(
            ...                 {"_id": "rule2"},
            ...                 {"text": "F=ma"},
            ...                 upsert=True,
            ...                 return_document=astrapy.constants.ReturnDocument.AFTER,
            ...                 projection={"_id": False},
            ...             )
            ...             print("result3", result3)
            ...
            >>> asyncio.run(do_find_one_and_replace(my_async_coll))
            result0 {'_id': 'rule1', 'text': 'all animals are equal'}
            result1 {'_id': 'rule1', 'text': 'and the pigs are the rulers'}
            result2 None
            result3 {'text': 'F=ma'}
        """

        check_deprecated_vector_ize(
            vector=vector,
            vectors=None,
            vectorize=vectorize,
            kind="find",
        )
        _sort = _collate_vector_to_sort(sort, vector, vectorize)
        options = {
            "returnDocument": return_document,
            "upsert": upsert,
        }
        _max_time_ms = max_time_ms or self.api_options.max_time_ms
        fo_payload = {
            "findOneAndReplace": {
                k: v
                for k, v in {
                    "filter": filter,
                    "projection": normalize_optional_projection(projection),
                    "replacement": replacement,
                    "options": options,
                    "sort": _sort,
                }.items()
                if v is not None
            }
        }
        logger.info(f"findOneAndReplace on '{self.name}'")
        fo_response = await self._api_commander.async_request(
            payload=fo_payload,
            timeout_info=base_timeout_info(_max_time_ms),
        )
        logger.info(f"finished findOneAndReplace on '{self.name}'")
        if "document" in fo_response.get("data", {}):
            ret_document = fo_response.get("data", {}).get("document")
            if ret_document is None:
                return None
            else:
                return ret_document  # type: ignore[no-any-return]
        else:
            raise DataAPIFaultyResponseException(
                text="Faulty response from find_one_and_replace API command.",
                raw_response=fo_response,
            )

    async def replace_one(
        self,
        filter: FilterType,
        replacement: DocumentType,
        *,
        vector: VectorType | None = None,
        vectorize: str | None = None,
        sort: SortType | None = None,
        upsert: bool = False,
        max_time_ms: int | None = None,
    ) -> UpdateResult:
        """
        Replace a single document on the collection with a new one,
        optionally inserting a new one if no match is found.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            replacement: the new document to write into the collection.
            vector: a suitable vector, i.e. a list of float numbers of the appropriate
                dimensionality, to use vector search (i.e. ANN,
                or "approximate nearest-neighbours" search), as the sorting criterion.
                In this way, the matched document (if any) will be the one
                that is most similar to the provided vector.
                *DEPRECATED* (removal in 2.0). Use a `$vector` key in the
                sort clause dict instead.
            vectorize: a string to be made into a vector to perform vector search.
                Using vectorize assumes a suitable service is configured for the collection.
                *DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the
                sort clause dict instead.
            sort: with this dictionary parameter one can control the sorting
                order of the documents matching the filter, effectively
                determining what document will come first and hence be the
                replaced one. See the `find` method for more on sorting.
                Vector-based ANN sorting is achieved by providing a "$vector"
                or a "$vectorize" key in `sort`.
            upsert: this parameter controls the behavior in absence of matches.
                If True, `replacement` is inserted as a new document
                if no matches are found on the collection. If False,
                the operation silently does nothing in case of no matches.
            max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
                If not passed, the collection-level setting is used instead.

        Returns:
            an UpdateResult object summarizing the outcome of the replace operation.

        Example:
            >>> async def do_replace_one(acol: AsyncCollection) -> None:
            ...     await acol.insert_one({"Marco": "Polo"})
            ...     result0 = await acol.replace_one(
            ...         {"Marco": {"$exists": True}},
            ...         {"Buda": "Pest"},
            ...     )
            ...     print("result0.update_info", result0.update_info)
            ...     doc1 = await acol.find_one({"Buda": "Pest"})
            ...     print("doc1", doc1)
            ...     result1 = await acol.replace_one(
            ...         {"Mirco": {"$exists": True}},
            ...         {"Oh": "yeah?"},
            ...     )
            ...     print("result1.update_info", result1.update_info)
            ...     result2 = await acol.replace_one(
            ...         {"Mirco": {"$exists": True}},
            ...         {"Oh": "yeah?"},
            ...         upsert=True,
            ...     )
            ...     print("result2.update_info", result2.update_info)
            ...
            >>> asyncio.run(do_replace_one(my_async_coll))
            result0.update_info {'n': 1, 'updatedExisting': True, 'ok': 1.0, 'nModified': 1}
            doc1 {'_id': '6e669a5a-...', 'Buda': 'Pest'}
            result1.update_info {'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0}
            result2.update_info {'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '30e34e00-...'}
        """

        check_deprecated_vector_ize(
            vector=vector,
            vectors=None,
            vectorize=vectorize,
            kind="find",
        )
        _sort = _collate_vector_to_sort(sort, vector, vectorize)
        options = {
            "upsert": upsert,
        }
        _max_time_ms = max_time_ms or self.api_options.max_time_ms
        fo_payload = {
            "findOneAndReplace": {
                k: v
                for k, v in {
                    "filter": filter,
                    "replacement": replacement,
                    "options": options,
                    "sort": _sort,
                }.items()
                if v is not None
            }
        }
        logger.info(f"findOneAndReplace on '{self.name}'")
        fo_response = await self._api_commander.async_request(
            payload=fo_payload,
            timeout_info=base_timeout_info(_max_time_ms),
        )
        logger.info(f"finished findOneAndReplace on '{self.name}'")
        if "document" in fo_response.get("data", {}):
            fo_status = fo_response.get("status") or {}
            _update_info = _prepare_update_info([fo_status])
            return UpdateResult(
                raw_results=[fo_response],
                update_info=_update_info,
            )
        else:
            raise DataAPIFaultyResponseException(
                text="Faulty response from find_one_and_replace API command.",
                raw_response=fo_response,
            )

    async def find_one_and_update(
        self,
        filter: FilterType,
        update: dict[str, Any],
        *,
        projection: ProjectionType | None = None,
        vector: VectorType | None = None,
        vectorize: str | None = None,
        sort: SortType | None = None,
        upsert: bool = False,
        return_document: str = ReturnDocument.BEFORE,
        max_time_ms: int | None = None,
    ) -> DocumentType | None:
        """
        Find a document on the collection and update it as requested,
        optionally inserting a new one if no match is found.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            update: the update prescription to apply to the document, expressed
                as a dictionary as per Data API syntax. Examples are:
                    {"$set": {"field": "value}}
                    {"$inc": {"counter": 10}}
                    {"$unset": {"field": ""}}
                See the Data API documentation for the full syntax.
            projection: it controls which parts of the document are returned.
                It can be an allow-list: `{"f1": True, "f2": True}`,
                or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
                (except for the `_id` and other special fields, which can be
                associated to both True or False independently of the rest
                of the specification).
                The special star-projections `{"*": True}` and `{"*": False}`
                have the effect of returning the whole document and `{}` respectively.
                For lists in documents, slice directives can be passed to select
                portions of the list: for instance, `{"array": {"$slice": 2}}`,
                `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
                `{"array": {"$slice": [-4, 2]}}`.
                An iterable over strings will be treated implicitly as an allow-list.
                The default projection (used if this parameter is not passed) does not
                necessarily include "special" fields such as `$vector` or `$vectorize`.
                See the Data API documentation for more on projections.
            vector: a suitable vector, i.e. a list of float numbers of the appropriate
                dimensionality, to use vector search (i.e. ANN,
                or "approximate nearest-neighbours" search), as the sorting criterion.
                In this way, the matched document (if any) will be the one
                that is most similar to the provided vector.
                *DEPRECATED* (removal in 2.0). Use a `$vector` key in the
                sort clause dict instead.
            vectorize: a string to be made into a vector to perform vector search.
                Using vectorize assumes a suitable service is configured for the collection.
                *DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the
                sort clause dict instead.
            sort: with this dictionary parameter one can control the sorting
                order of the documents matching the filter, effectively
                determining what document will come first and hence be the
                replaced one. See the `find` method for more on sorting.
                Vector-based ANN sorting is achieved by providing a "$vector"
                or a "$vectorize" key in `sort`.
            upsert: this parameter controls the behavior in absence of matches.
                If True, a new document (resulting from applying the `update`
                to an empty document) is inserted if no matches are found on
                the collection. If False, the operation silently does nothing
                in case of no matches.
            return_document: a flag controlling what document is returned:
                if set to `ReturnDocument.BEFORE`, or the string "before",
                the document found on database is returned; if set to
                `ReturnDocument.AFTER`, or the string "after", the new
                document is returned. The default is "before".
            max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
                If not passed, the collection-level setting is used instead.

        Returns:
            A document (or a projection thereof, as required), either the one
            before the replace operation or the one after that.
            Alternatively, the method returns None to represent
            that no matching document was found, or that no update
            was applied (depending on the `return_document` parameter).

        Example:
            >>> async def do_find_one_and_update(acol: AsyncCollection) -> None:
            ...     await acol.insert_one({"Marco": "Polo"})
            ...     result0 = await acol.find_one_and_update(
            ...         {"Marco": {"$exists": True}},
            ...         {"$set": {"title": "Mr."}},
            ...     )
            ...     print("result0", result0)
            ...     result1 = await acol.find_one_and_update(
            ...         {"title": "Mr."},
            ...         {"$inc": {"rank": 3}},
            ...         projection=["title", "rank"],
            ...         return_document=astrapy.constants.ReturnDocument.AFTER,
            ...     )
            ...     print("result1", result1)
            ...     result2 = await acol.find_one_and_update(
            ...         {"name": "Johnny"},
            ...         {"$set": {"rank": 0}},
            ...         return_document=astrapy.constants.ReturnDocument.AFTER,
            ...     )
            ...     print("result2", result2)
            ...     result3 = await acol.find_one_and_update(
            ...         {"name": "Johnny"},
            ...         {"$set": {"rank": 0}},
            ...         upsert=True,
            ...         return_document=astrapy.constants.ReturnDocument.AFTER,
            ...     )
            ...     print("result3", result3)
            ...
            >>> asyncio.run(do_find_one_and_update(my_async_coll))
            result0 {'_id': 'f7c936d3-b0a0-45eb-a676-e2829662a57c', 'Marco': 'Polo'}
            result1 {'_id': 'f7c936d3-b0a0-45eb-a676-e2829662a57c', 'title': 'Mr.', 'rank': 3}
            result2 None
            result3 {'_id': 'db3d678d-14d4-4caa-82d2-d5fb77dab7ec', 'name': 'Johnny', 'rank': 0}
        """

        check_deprecated_vector_ize(
            vector=vector,
            vectors=None,
            vectorize=vectorize,
            kind="find",
        )
        _sort = _collate_vector_to_sort(sort, vector, vectorize)
        options = {
            "returnDocument": return_document,
            "upsert": upsert,
        }
        _max_time_ms = max_time_ms or self.api_options.max_time_ms
        fo_payload = {
            "findOneAndUpdate": {
                k: v
                for k, v in {
                    "filter": filter,
                    "update": update,
                    "options": options,
                    "sort": _sort,
                    "projection": normalize_optional_projection(projection),
                }.items()
                if v is not None
            }
        }
        logger.info(f"findOneAndUpdate on '{self.name}'")
        fo_response = await self._api_commander.async_request(
            payload=fo_payload,
            timeout_info=base_timeout_info(_max_time_ms),
        )
        logger.info(f"finished findOneAndUpdate on '{self.name}'")
        if "document" in fo_response.get("data", {}):
            ret_document = fo_response.get("data", {}).get("document")
            if ret_document is None:
                return None
            else:
                return ret_document  # type: ignore[no-any-return]
        else:
            raise DataAPIFaultyResponseException(
                text="Faulty response from find_one_and_update API command.",
                raw_response=fo_response,
            )

    async def update_one(
        self,
        filter: FilterType,
        update: dict[str, Any],
        *,
        vector: VectorType | None = None,
        vectorize: str | None = None,
        sort: SortType | None = None,
        upsert: bool = False,
        max_time_ms: int | None = None,
    ) -> UpdateResult:
        """
        Update a single document on the collection as requested,
        optionally inserting a new one if no match is found.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            update: the update prescription to apply to the document, expressed
                as a dictionary as per Data API syntax. Examples are:
                    {"$set": {"field": "value}}
                    {"$inc": {"counter": 10}}
                    {"$unset": {"field": ""}}
                See the Data API documentation for the full syntax.
            vector: a suitable vector, i.e. a list of float numbers of the appropriate
                dimensionality, to use vector search (i.e. ANN,
                or "approximate nearest-neighbours" search), as the sorting criterion.
                In this way, the matched document (if any) will be the one
                that is most similar to the provided vector.
                *DEPRECATED* (removal in 2.0). Use a `$vector` key in the
                sort clause dict instead.
            vectorize: a string to be made into a vector to perform vector search.
                Using vectorize assumes a suitable service is configured for the collection.
                *DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the
                sort clause dict instead.
            sort: with this dictionary parameter one can control the sorting
                order of the documents matching the filter, effectively
                determining what document will come first and hence be the
                replaced one. See the `find` method for more on sorting.
                Vector-based ANN sorting is achieved by providing a "$vector"
                or a "$vectorize" key in `sort`.
            upsert: this parameter controls the behavior in absence of matches.
                If True, a new document (resulting from applying the `update`
                to an empty document) is inserted if no matches are found on
                the collection. If False, the operation silently does nothing
                in case of no matches.
            max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
                If not passed, the collection-level setting is used instead.

        Returns:
            an UpdateResult object summarizing the outcome of the update operation.

        Example:
            >>> async def do_update_one(acol: AsyncCollection) -> None:
            ...     await acol.insert_one({"Marco": "Polo"})
            ...     result0 = await acol.update_one(
            ...         {"Marco": {"$exists": True}},
            ...         {"$inc": {"rank": 3}},
            ...     )
            ...     print("result0.update_info", result0.update_info)
            ...     result1 = await acol.update_one(
            ...         {"Mirko": {"$exists": True}},
            ...         {"$inc": {"rank": 3}},
            ...     )
            ...     print("result1.update_info", result1.update_info)
            ...     result2 = await acol.update_one(
            ...         {"Mirko": {"$exists": True}},
            ...         {"$inc": {"rank": 3}},
            ...         upsert=True,
            ...     )
            ...     print("result2.update_info", result2.update_info)
            ...
            >>> asyncio.run(do_update_one(my_async_coll))
            result0.update_info {'n': 1, 'updatedExisting': True, 'ok': 1.0, 'nModified': 1})
            result1.update_info {'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0})
            result2.update_info {'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '75748092-...'}
        """

        check_deprecated_vector_ize(
            vector=vector,
            vectors=None,
            vectorize=vectorize,
            kind="find",
        )
        _sort = _collate_vector_to_sort(sort, vector, vectorize)
        options = {
            "upsert": upsert,
        }
        _max_time_ms = max_time_ms or self.api_options.max_time_ms
        uo_payload = {
            "updateOne": {
                k: v
                for k, v in {
                    "filter": filter,
                    "update": update,
                    "options": options,
                    "sort": _sort,
                }.items()
                if v is not None
            }
        }
        logger.info(f"updateOne on '{self.name}'")
        uo_response = await self._api_commander.async_request(
            payload=uo_payload,
            timeout_info=base_timeout_info(_max_time_ms),
        )
        logger.info(f"finished updateOne on '{self.name}'")
        if "status" in uo_response:
            uo_status = uo_response["status"]
            _update_info = _prepare_update_info([uo_status])
            return UpdateResult(
                raw_results=[uo_response],
                update_info=_update_info,
            )
        else:
            raise DataAPIFaultyResponseException(
                text="Faulty response from update_one API command.",
                raw_response=uo_response,
            )

    async def update_many(
        self,
        filter: FilterType,
        update: dict[str, Any],
        *,
        upsert: bool = False,
        max_time_ms: int | None = None,
    ) -> UpdateResult:
        """
        Apply an update operations to all documents matching a condition,
        optionally inserting one documents in absence of matches.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            update: the update prescription to apply to the documents, expressed
                as a dictionary as per Data API syntax. Examples are:
                    {"$set": {"field": "value}}
                    {"$inc": {"counter": 10}}
                    {"$unset": {"field": ""}}
                See the Data API documentation for the full syntax.
            upsert: this parameter controls the behavior in absence of matches.
                If True, a single new document (resulting from applying `update`
                to an empty document) is inserted if no matches are found on
                the collection. If False, the operation silently does nothing
                in case of no matches.
            max_time_ms: a timeout, in milliseconds, for the operation.
                If not passed, the collection-level setting is used instead:
                if a large number of document updates is anticipated, it is suggested
                to specify a larger timeout than in most other operations as the
                update will span several HTTP calls to the API in sequence.

        Returns:
            an UpdateResult object summarizing the outcome of the update operation.

        Example:
            >>> async def do_update_many(acol: AsyncCollection) -> None:
            ...     await acol.insert_many([{"c": "red"}, {"c": "green"}, {"c": "blue"}])
            ...     result0 = await acol.update_many(
            ...         {"c": {"$ne": "green"}},
            ...         {"$set": {"nongreen": True}},
            ...     )
            ...     print("result0.update_info", result0.update_info)
            ...     result1 = await acol.update_many(
            ...         {"c": "orange"},
            ...         {"$set": {"is_also_fruit": True}},
            ...     )
            ...     print("result1.update_info", result1.update_info)
            ...     result2 = await acol.update_many(
            ...         {"c": "orange"},
            ...         {"$set": {"is_also_fruit": True}},
            ...         upsert=True,
            ...     )
            ...     print("result2.update_info", result2.update_info)
            ...
            >>> asyncio.run(do_update_many(my_async_coll))
            result0.update_info {'n': 2, 'updatedExisting': True, 'ok': 1.0, 'nModified': 2}
            result1.update_info {'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0}
            result2.update_info {'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '79ffd5a3-ab99-4dff-a2a5-4aaa0e59e854'}

        Note:
            Similarly to the case of `find` (see its docstring for more details),
            running this command while, at the same time, another process is
            inserting new documents which match the filter of the `update_many`
            can result in an unpredictable fraction of these documents being updated.
            In other words, it cannot be easily predicted whether a given
            newly-inserted document will be picked up by the update_many command or not.
        """

        api_options = {
            "upsert": upsert,
        }
        page_state_options: dict[str, str] = {}
        um_responses: list[dict[str, Any]] = []
        um_statuses: list[dict[str, Any]] = []
        must_proceed = True
        _max_time_ms = max_time_ms or self.api_options.max_time_ms
        logger.info(f"starting update_many on '{self.name}'")
        timeout_manager = MultiCallTimeoutManager(overall_max_time_ms=_max_time_ms)
        while must_proceed:
            options = {**api_options, **page_state_options}
            this_um_payload = {
                "updateMany": {
                    k: v
                    for k, v in {
                        "filter": filter,
                        "update": update,
                        "options": options,
                    }.items()
                    if v is not None
                }
            }
            logger.info(f"updateMany on '{self.name}'")
            this_um_response = await self._api_commander.async_request(
                payload=this_um_payload,
                timeout_info=timeout_manager.remaining_timeout_info(),
            )
            logger.info(f"finished updateMany on '{self.name}'")
            this_um_status = this_um_response.get("status") or {}
            #
            # if errors, quit early
            if this_um_response.get("errors", []):
                partial_update_info = _prepare_update_info(um_statuses)
                partial_result = UpdateResult(
                    raw_results=um_responses,
                    update_info=partial_update_info,
                )
                all_um_responses = um_responses + [this_um_response]
                raise UpdateManyException.from_responses(
                    commands=[None for _ in all_um_responses],
                    raw_responses=all_um_responses,
                    partial_result=partial_result,
                )
            else:
                if "status" not in this_um_response:
                    raise DataAPIFaultyResponseException(
                        text="Faulty response from update_many API command.",
                        raw_response=this_um_response,
                    )
                um_responses.append(this_um_response)
                um_statuses.append(this_um_status)
                next_page_state = this_um_status.get("nextPageState")
                if next_page_state is not None:
                    must_proceed = True
                    page_state_options = {"pageState": next_page_state}
                else:
                    must_proceed = False
                    page_state_options = {}

        update_info = _prepare_update_info(um_statuses)
        logger.info(f"finished update_many on '{self.name}'")
        return UpdateResult(
            raw_results=um_responses,
            update_info=update_info,
        )

    async def find_one_and_delete(
        self,
        filter: FilterType,
        *,
        projection: ProjectionType | None = None,
        vector: VectorType | None = None,
        vectorize: str | None = None,
        sort: SortType | None = None,
        max_time_ms: int | None = None,
    ) -> DocumentType | None:
        """
        Find a document in the collection and delete it. The deleted document,
        however, is the return value of the method.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            projection: it controls which parts of the document are returned.
                It can be an allow-list: `{"f1": True, "f2": True}`,
                or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
                (except for the `_id` and other special fields, which can be
                associated to both True or False independently of the rest
                of the specification).
                The special star-projections `{"*": True}` and `{"*": False}`
                have the effect of returning the whole document and `{}` respectively.
                For lists in documents, slice directives can be passed to select
                portions of the list: for instance, `{"array": {"$slice": 2}}`,
                `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
                `{"array": {"$slice": [-4, 2]}}`.
                An iterable over strings will be treated implicitly as an allow-list.
                The default projection (used if this parameter is not passed) does not
                necessarily include "special" fields such as `$vector` or `$vectorize`.
                See the Data API documentation for more on projections.
            vector: a suitable vector, i.e. a list of float numbers of the appropriate
                dimensionality, to use vector search (i.e. ANN,
                or "approximate nearest-neighbours" search), as the sorting criterion.
                In this way, the matched document (if any) will be the one
                that is most similar to the provided vector.
                *DEPRECATED* (removal in 2.0). Use a `$vector` key in the
                sort clause dict instead.
            vectorize: a string to be made into a vector to perform vector search.
                Using vectorize assumes a suitable service is configured for the collection.
                *DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the
                sort clause dict instead.
            sort: with this dictionary parameter one can control the sorting
                order of the documents matching the filter, effectively
                determining what document will come first and hence be the
                replaced one. See the `find` method for more on sorting.
                Vector-based ANN sorting is achieved by providing a "$vector"
                or a "$vectorize" key in `sort`.
            max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
                If not passed, the collection-level setting is used instead.

        Returns:
            Either the document (or a projection thereof, as requested), or None
            if no matches were found in the first place.

        Example:
            >>> async def do_find_one_and_delete(acol: AsyncCollection) -> None:
            ...     await acol.insert_many(
            ...         [
            ...             {"species": "swan", "class": "Aves"},
            ...             {"species": "frog", "class": "Amphibia"},
            ...         ],
            ...     )
            ...     delete_result0 = await acol.find_one_and_delete(
            ...         {"species": {"$ne": "frog"}},
            ...         projection=["species"],
            ...     )
            ...     print("delete_result0", delete_result0)
            ...     delete_result1 = await acol.find_one_and_delete(
            ...         {"species": {"$ne": "frog"}},
            ...     )
            ...     print("delete_result1", delete_result1)
            ...
            >>> asyncio.run(do_find_one_and_delete(my_async_coll))
            delete_result0 {'_id': 'f335cd0f-...', 'species': 'swan'}
            delete_result1 None
        """

        check_deprecated_vector_ize(
            vector=vector,
            vectors=None,
            vectorize=vectorize,
            kind="find",
        )
        _sort = _collate_vector_to_sort(sort, vector, vectorize)
        _projection = normalize_optional_projection(projection)
        _max_time_ms = max_time_ms or self.api_options.max_time_ms
        fo_payload = {
            "findOneAndDelete": {
                k: v
                for k, v in {
                    "filter": filter,
                    "sort": _sort,
                    "projection": _projection,
                }.items()
                if v is not None
            }
        }
        logger.info(f"findOneAndDelete on '{self.name}'")
        fo_response = await self._api_commander.async_request(
            payload=fo_payload,
            timeout_info=base_timeout_info(_max_time_ms),
        )
        logger.info(f"finished findOneAndDelete on '{self.name}'")
        if "document" in fo_response.get("data", {}):
            document = fo_response["data"]["document"]
            return document  # type: ignore[no-any-return]
        else:
            deleted_count = fo_response.get("status", {}).get("deletedCount")
            if deleted_count == 0:
                return None
            else:
                raise DataAPIFaultyResponseException(
                    text="Faulty response from find_one_and_delete API command.",
                    raw_response=fo_response,
                )

    async def delete_one(
        self,
        filter: FilterType,
        *,
        vector: VectorType | None = None,
        vectorize: str | None = None,
        sort: SortType | None = None,
        max_time_ms: int | None = None,
    ) -> DeleteResult:
        """
        Delete one document matching a provided filter.
        This method never deletes more than a single document, regardless
        of the number of matches to the provided filters.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            vector: a suitable vector, i.e. a list of float numbers of the appropriate
                dimensionality, to use vector search (i.e. ANN,
                or "approximate nearest-neighbours" search), as the sorting criterion.
                In this way, the matched document (if any) will be the one
                that is most similar to the provided vector.
                *DEPRECATED* (removal in 2.0). Use a `$vector` key in the
                sort clause dict instead.
            vectorize: a string to be made into a vector to perform vector search.
                Using vectorize assumes a suitable service is configured for the collection.
                *DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the
                sort clause dict instead.
            sort: with this dictionary parameter one can control the sorting
                order of the documents matching the filter, effectively
                determining what document will come first and hence be the
                replaced one. See the `find` method for more on sorting.
                Vector-based ANN sorting is achieved by providing a "$vector"
                or a "$vectorize" key in `sort`.
            max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
                If not passed, the collection-level setting is used instead.

        Returns:
            a DeleteResult object summarizing the outcome of the delete operation.

        Example:
            >>> my_coll.insert_many([{"seq": 1}, {"seq": 0}, {"seq": 2}])
            InsertManyResult(...)
            >>> my_coll.delete_one({"seq": 1})
            DeleteResult(raw_results=..., deleted_count=1)
            >>> my_coll.distinct("seq")
            [0, 2]
            >>> my_coll.delete_one(
            ...     {"seq": {"$exists": True}},
            ...     sort={"seq": astrapy.constants.SortDocuments.DESCENDING},
            ... )
            DeleteResult(raw_results=..., deleted_count=1)
            >>> my_coll.distinct("seq")
            [0]
            >>> my_coll.delete_one({"seq": 2})
            DeleteResult(raw_results=..., deleted_count=0)
        """

        check_deprecated_vector_ize(
            vector=vector,
            vectors=None,
            vectorize=vectorize,
            kind="find",
        )
        _sort = _collate_vector_to_sort(sort, vector, vectorize)
        _max_time_ms = max_time_ms or self.api_options.max_time_ms
        do_payload = {
            "deleteOne": {
                k: v
                for k, v in {
                    "filter": filter,
                    "sort": _sort,
                }.items()
                if v is not None
            }
        }
        logger.info(f"deleteOne on '{self.name}'")
        do_response = await self._api_commander.async_request(
            payload=do_payload,
            timeout_info=base_timeout_info(_max_time_ms),
        )
        logger.info(f"finished deleteOne on '{self.name}'")
        if "deletedCount" in do_response.get("status", {}):
            deleted_count = do_response["status"]["deletedCount"]
            return DeleteResult(
                deleted_count=deleted_count,
                raw_results=[do_response],
            )
        else:
            raise DataAPIFaultyResponseException(
                text="Faulty response from delete_one API command.",
                raw_response=do_response,
            )

    async def delete_many(
        self,
        filter: FilterType,
        *,
        max_time_ms: int | None = None,
    ) -> DeleteResult:
        """
        Delete all documents matching a provided filter.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
                Passing an empty filter, `{}`, completely erases all contents
                of the collection.
            max_time_ms: a timeout, in milliseconds, for the operation.
                If not passed, the collection-level setting is used instead:
                keep in mind that this method entails successive HTTP requests
                to the API, depending on how many documents are to be deleted.
                For this reason, in most cases it is suggested to relax the
                timeout compared to other method calls.

        Returns:
            a DeleteResult object summarizing the outcome of the delete operation.

        Example:
            >>> async def do_delete_many(acol: AsyncCollection) -> None:
            ...     await acol.insert_many([{"seq": 1}, {"seq": 0}, {"seq": 2}])
            ...     delete_result0 = await acol.delete_many({"seq": {"$lte": 1}})
            ...     print("delete_result0.deleted_count", delete_result0.deleted_count)
            ...     distinct1 = await acol.distinct("seq")
            ...     print("distinct1", distinct1)
            ...     delete_result2 = await acol.delete_many({"seq": {"$lte": 1}})
            ...     print("delete_result2.deleted_count", delete_result2.deleted_count)
            ...
            >>> asyncio.run(do_delete_many(my_async_coll))
            delete_result0.deleted_count 2
            distinct1 [2]
            delete_result2.deleted_count 0

        Note:
            This operation is in general not atomic. Depending on the amount
            of matching documents, it can keep running (in a blocking way)
            for a macroscopic time. In that case, new documents that are
            meanwhile inserted (e.g. from another process/application) will be
            deleted during the execution of this method call until the
            collection is devoid of matches.
            An exception is the `filter={}` case, whereby the operation is atomic.
        """
        dm_responses: list[dict[str, Any]] = []
        deleted_count = 0
        must_proceed = True
        _max_time_ms = max_time_ms or self.api_options.max_time_ms
        timeout_manager = MultiCallTimeoutManager(overall_max_time_ms=_max_time_ms)
        this_dm_payload = {"deleteMany": {"filter": filter}}
        logger.info(f"starting delete_many on '{self.name}'")
        while must_proceed:
            logger.info(f"deleteMany on '{self.name}'")
            this_dm_response = await self._api_commander.async_request(
                payload=this_dm_payload,
                raise_api_errors=False,
                timeout_info=timeout_manager.remaining_timeout_info(),
            )
            logger.info(f"finished deleteMany on '{self.name}'")
            # if errors, quit early
            if this_dm_response.get("errors", []):
                partial_result = DeleteResult(
                    deleted_count=deleted_count,
                    raw_results=dm_responses,
                )
                all_dm_responses = dm_responses + [this_dm_response]
                raise DeleteManyException.from_responses(
                    commands=[None for _ in all_dm_responses],
                    raw_responses=all_dm_responses,
                    partial_result=partial_result,
                )
            else:
                this_dc = this_dm_response.get("status", {}).get("deletedCount")
                if this_dc is None:
                    raise DataAPIFaultyResponseException(
                        text="Faulty response from delete_many API command.",
                        raw_response=this_dm_response,
                    )
                dm_responses.append(this_dm_response)
                deleted_count += this_dc
                must_proceed = this_dm_response.get("status", {}).get("moreData", False)

        logger.info(f"finished delete_many on '{self.name}'")
        return DeleteResult(
            deleted_count=deleted_count,
            raw_results=dm_responses,
        )

    @deprecation.deprecated(  # type: ignore[misc]
        deprecated_in="1.3.0",
        removed_in="2.0.0",
        current_version=__version__,
        details="Use delete_many with filter={} instead.",
    )
    async def delete_all(self, *, max_time_ms: int | None = None) -> dict[str, Any]:
        """
        Delete all documents in a collection.

        Args:
            max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
                If not passed, the collection-level setting is used instead.

        Returns:
            a dictionary of the form {"ok": 1} to signal successful deletion.

        Example:
            >>> async def do_delete_all(acol: AsyncCollection) -> None:
            ...     distinct0 = await acol.distinct("seq")
            ...     print("distinct0", distinct0)
            ...     count1 = await acol.count_documents({}, upper_bound=100)
            ...     print("count1", count1)
            ...     delete_result2 = await acol.delete_all()
            ...     print("delete_result2", delete_result2)
            ...     count3 = await acol.count_documents({}, upper_bound=100)
            ...     print("count3", count3)
            ...
            >>> asyncio.run(do_delete_all(my_async_coll))
            distinct0 [4, 2, 3, 0, 1]
            count1 5
            delete_result2 {'ok': 1}
            count3 0

        Note:
            Use with caution.
        """
        dm_result = await self.delete_many(filter={}, max_time_ms=max_time_ms)
        if dm_result.deleted_count == -1:
            return {"ok": 1}
        else:
            raise DataAPIFaultyResponseException(
                text="Unexpected response from collection.delete_many({}).",
                raw_response=None,
            )

    @deprecation.deprecated(  # type: ignore[misc]
        deprecated_in="1.5.0",
        removed_in="2.0.0",
        current_version=__version__,
        details=(
            "Please switch to managing sequences of DML operations "
            "in app code instead."
        ),
    )
    async def bulk_write(
        self,
        requests: Iterable[AsyncBaseOperation],
        *,
        ordered: bool = False,
        concurrency: int | None = None,
        max_time_ms: int | None = None,
    ) -> BulkWriteResult:
        """
        Execute an arbitrary amount of operations such as inserts, updates, deletes
        either sequentially or concurrently.

        This method does not execute atomically, i.e. individual operations are
        each performed in the same way as the corresponding collection method,
        and each one is a different and unrelated database mutation.

        Args:
            requests: an iterable over concrete subclasses of `BaseOperation`,
                such as `AsyncInsertMany` or `AsyncReplaceOne`. Each such object
                represents an operation ready to be executed on a collection,
                and is instantiated by passing the same parameters as one
                would the corresponding collection method.
            ordered: whether to launch the `requests` one after the other or
                in arbitrary order, possibly in a concurrent fashion. For
                performance reasons, False (default) should be preferred
                when compatible with the needs of the application flow.
            concurrency: maximum number of concurrent operations executing at
                a given time. It cannot be more than one for ordered bulk writes.
            max_time_ms: a timeout, in milliseconds, for the whole bulk write.
                Remember that, if the method call times out, then there's no
                guarantee about what portion of the bulk write has been received
                and successfully executed by the Data API.
                If not passed, the collection-level setting is used instead:
                in most cases, however, one should pass a relaxed timeout
                if longer sequences of operations are to be executed in bulk.

        Returns:
            A single BulkWriteResult summarizing the whole list of requested
            operations. The keys in the map attributes of BulkWriteResult
            (when present) are the integer indices of the corresponding operation
            in the `requests` iterable.

        Example:
            >>> from astrapy.operations import AsyncInsertMany, AsyncReplaceOne, AsyncOperation
            >>> from astrapy.results import BulkWriteResult
            >>>
            >>> async def do_bulk_write(
            ...     acol: AsyncCollection,
            ...     async_operations: List[AsyncOperation],
            ... ) -> BulkWriteResult:
            ...     bw_result = await acol.bulk_write(async_operations)
            ...     count0 = await acol.count_documents({}, upper_bound=100)
            ...     print("count0", count0)
            ...     distinct0 = await acol.distinct("replaced")
            ...     print("distinct0", distinct0)
            ...     return bw_result
            ...
            >>> op1 = AsyncInsertMany([{"a": 1}, {"a": 2}])
            >>> op2 = AsyncReplaceOne(
            ...     {"z": 9},
            ...     replacement={"z": 9, "replaced": True},
            ...     upsert=True,
            ... )
            >>> result = asyncio.run(do_bulk_write(my_async_coll, [op1, op2]))
            count0 3
            distinct0 [True]
            >>> print("result", result)
            result BulkWriteResult(bulk_api_results={0: ..., 1: ...}, deleted_count=0, inserted_count=3, matched_count=0, modified_count=0, upserted_count=1, upserted_ids={1: 'ccd0a800-...'})
        """

        # lazy importing here against circular-import error
        from astrapy.operations import reduce_bulk_write_results

        if concurrency is None:
            if ordered:
                _concurrency = 1
            else:
                _concurrency = DEFAULT_BULK_WRITE_CONCURRENCY
        else:
            _concurrency = concurrency
        if _concurrency > 1 and ordered:
            raise ValueError("Cannot run ordered bulk_write concurrently.")
        _max_time_ms = max_time_ms or self.api_options.max_time_ms
        logger.info(f"startng a bulk write on '{self.name}'")
        timeout_manager = MultiCallTimeoutManager(overall_max_time_ms=_max_time_ms)
        if ordered:
            bulk_write_results: list[BulkWriteResult] = []
            for operation_i, operation in enumerate(requests):
                try:
                    this_bw_result = await operation.execute(
                        self,
                        index_in_bulk_write=operation_i,
                        bulk_write_timeout_ms=timeout_manager.remaining_timeout_ms(),
                    )
                    bulk_write_results.append(this_bw_result)
                except CumulativeOperationException as exc:
                    partial_result = exc.partial_result
                    partial_bw_result = reduce_bulk_write_results(
                        bulk_write_results
                        + [
                            partial_result.to_bulk_write_result(
                                index_in_bulk_write=operation_i
                            )
                        ]
                    )
                    dar_exception = exc.data_api_response_exception()
                    raise BulkWriteException(
                        text=dar_exception.text,
                        error_descriptors=dar_exception.error_descriptors,
                        detailed_error_descriptors=dar_exception.detailed_error_descriptors,
                        partial_result=partial_bw_result,
                        exceptions=[dar_exception],
                    )
                except DataAPIResponseException as exc:
                    # the cumulative exceptions, with their
                    # partially-done-info, are handled above:
                    # here it's just one-shot d.a.r. exceptions
                    partial_bw_result = reduce_bulk_write_results(bulk_write_results)
                    dar_exception = exc.data_api_response_exception()
                    raise BulkWriteException(
                        text=dar_exception.text,
                        error_descriptors=dar_exception.error_descriptors,
                        detailed_error_descriptors=dar_exception.detailed_error_descriptors,
                        partial_result=partial_bw_result,
                        exceptions=[dar_exception],
                    )
            full_bw_result = reduce_bulk_write_results(bulk_write_results)
            logger.info(f"finished a bulk write on '{self.name}'")
            return full_bw_result
        else:
            sem = asyncio.Semaphore(_concurrency)

            async def _concurrent_execute_as_either(
                operation: AsyncBaseOperation, operation_i: int
            ) -> tuple[BulkWriteResult | None, DataAPIResponseException | None]:
                async with sem:
                    try:
                        ex_result = await operation.execute(
                            self,
                            index_in_bulk_write=operation_i,
                            bulk_write_timeout_ms=timeout_manager.remaining_timeout_ms(),
                        )
                        return (ex_result, None)
                    except DataAPIResponseException as exc:
                        return (None, exc)

            tasks = [
                asyncio.create_task(
                    _concurrent_execute_as_either(operation, operation_i)
                )
                for operation_i, operation in enumerate(requests)
            ]
            bulk_write_either_results = await asyncio.gather(*tasks)
            # regroup
            bulk_write_successes = [bwr for bwr, _ in bulk_write_either_results if bwr]
            bulk_write_failures = [bwf for _, bwf in bulk_write_either_results if bwf]
            if bulk_write_failures:
                # extract and cumulate
                partial_results_from_failures = [
                    failure.partial_result.to_bulk_write_result(
                        index_in_bulk_write=operation_i
                    )
                    for failure in bulk_write_failures
                    if isinstance(failure, CumulativeOperationException)
                ]
                partial_bw_result = reduce_bulk_write_results(
                    bulk_write_successes + partial_results_from_failures
                )
                # raise and recast the first exception
                all_dar_exceptions = [
                    bw_failure.data_api_response_exception()
                    for bw_failure in bulk_write_failures
                ]
                dar_exception = all_dar_exceptions[0]
                raise BulkWriteException(
                    text=dar_exception.text,
                    error_descriptors=dar_exception.error_descriptors,
                    detailed_error_descriptors=dar_exception.detailed_error_descriptors,
                    partial_result=partial_bw_result,
                    exceptions=all_dar_exceptions,
                )
            else:
                logger.info(f"finished a bulk write on '{self.name}'")
                return reduce_bulk_write_results(bulk_write_successes)

    async def drop(self, *, max_time_ms: int | None = None) -> dict[str, Any]:
        """
        Drop the collection, i.e. delete it from the database along with
        all the documents it contains.

        Args:
            max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
                If not passed, the collection-level setting is used instead.
                Remember there is not guarantee that a request that has
                timed out us not in fact honored.

        Returns:
            a dictionary of the form {"ok": 1} to signal successful deletion.

        Example:
            >>> async def drop_and_check(acol: AsyncCollection) -> None:
            ...     doc0 = await acol.find_one({})
            ...     print("doc0", doc0)
            ...     drop_result = await acol.drop()
            ...     print("drop_result", drop_result)
            ...     doc1 = await acol.find_one({})
            ...
            >>> asyncio.run(drop_and_check(my_async_coll))
            doc0 {'_id': '...', 'z': -10}
            drop_result {'ok': 1}
            Traceback (most recent call last):
                ... ...
            astrapy.exceptions.DataAPIResponseException: Collection does not exist, collection name: my_collection

        Note:
            Use with caution.

        Note:
            Once the method succeeds, methods on this object can still be invoked:
            however, this hardly makes sense as the underlying actual collection
            is no more.
            It is responsibility of the developer to design a correct flow
            which avoids using a deceased collection any further.
        """

        _max_time_ms = max_time_ms or self.api_options.max_time_ms
        logger.info(f"dropping collection '{self.name}' (self)")
        drop_result = await self.database.drop_collection(
            self, max_time_ms=_max_time_ms
        )
        logger.info(f"finished dropping collection '{self.name}' (self)")
        return drop_result

    async def command(
        self,
        body: dict[str, Any],
        *,
        raise_api_errors: bool = True,
        max_time_ms: int | None = None,
    ) -> dict[str, Any]:
        """
        Send a POST request to the Data API for this collection with
        an arbitrary, caller-provided payload.

        Args:
            body: a JSON-serializable dictionary, the payload of the request.
            raise_api_errors: if True, responses with a nonempty 'errors' field
                result in an astrapy exception being raised.
            max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
                If not passed, the collection-level setting is used instead.

        Returns:
            a dictionary with the response of the HTTP request.

        Example:
            >>> asyncio.await(my_async_coll.command({"countDocuments": {}}))
            {'status': {'count': 123}}
        """

        _max_time_ms = max_time_ms or self.api_options.max_time_ms
        _cmd_desc = ",".join(sorted(body.keys()))
        logger.info(f"command={_cmd_desc} on '{self.name}'")
        command_result = await self._api_commander.async_request(
            payload=body,
            raise_api_errors=raise_api_errors,
            timeout_info=base_timeout_info(_max_time_ms),
        )
        logger.info(f"finished command={_cmd_desc} on '{self.name}'")
        return command_result

Instance variables

var databaseAsyncDatabase

a Database object, the database this collection belongs to.

Example

>>> my_async_coll.database.name
'quicktest'
Expand source code
@property
def database(self) -> AsyncDatabase:
    """
    a Database object, the database this collection belongs to.

    Example:
        >>> my_async_coll.database.name
        'quicktest'
    """

    return self._database
var full_name : str

The fully-qualified collection name within the database, in the form "keyspace.collection_name".

Example

>>> my_async_coll.full_name
'default_keyspace.my_v_collection'
Expand source code
@property
def full_name(self) -> str:
    """
    The fully-qualified collection name within the database,
    in the form "keyspace.collection_name".

    Example:
        >>> my_async_coll.full_name
        'default_keyspace.my_v_collection'
    """

    return f"{self.keyspace}.{self.name}"
var keyspace : str

The keyspace this collection is in.

Example

>>> my_coll.keyspace
'default_keyspace'
Expand source code
@property
def keyspace(self) -> str:
    """
    The keyspace this collection is in.

    Example:
        >>> my_coll.keyspace
        'default_keyspace'
    """

    _keyspace = self.database.keyspace
    if _keyspace is None:
        raise ValueError("The collection's DB is set with keyspace=None")
    return _keyspace
var name : str

The name of this collection.

Example

>>> my_async_coll.name
'my_v_collection'
Expand source code
@property
def name(self) -> str:
    """
    The name of this collection.

    Example:
        >>> my_async_coll.name
        'my_v_collection'
    """

    return self._name
var namespace : str

The namespace this collection is in.

DEPRECATED (removal in 2.0). Switch to the "keyspace" property.**

Example

>>> my_async_coll.namespace
'default_keyspace'
Expand source code
@property
def namespace(self) -> str:
    """
    The namespace this collection is in.

    *DEPRECATED* (removal in 2.0). Switch to the "keyspace" property.**

    Example:
        >>> my_async_coll.namespace
        'default_keyspace'
    """

    the_warning = deprecation.DeprecatedWarning(
        "the 'namespace' property",
        deprecated_in="1.5.0",
        removed_in="2.0.0",
        details=NAMESPACE_DEPRECATION_NOTICE_METHOD,
    )
    warnings.warn(the_warning, stacklevel=2)

    return self.keyspace

Methods

async def bulk_write(self, requests: Iterable[AsyncBaseOperation], *, ordered: bool = False, concurrency: int | None = None, max_time_ms: int | None = None) ‑> BulkWriteResult

Execute an arbitrary amount of operations such as inserts, updates, deletes either sequentially or concurrently.

This method does not execute atomically, i.e. individual operations are each performed in the same way as the corresponding collection method, and each one is a different and unrelated database mutation.

Args

requests
an iterable over concrete subclasses of BaseOperation, such as AsyncInsertMany or AsyncReplaceOne. Each such object represents an operation ready to be executed on a collection, and is instantiated by passing the same parameters as one would the corresponding collection method.
ordered
whether to launch the requests one after the other or in arbitrary order, possibly in a concurrent fashion. For performance reasons, False (default) should be preferred when compatible with the needs of the application flow.
concurrency
maximum number of concurrent operations executing at a given time. It cannot be more than one for ordered bulk writes.
max_time_ms
a timeout, in milliseconds, for the whole bulk write. Remember that, if the method call times out, then there's no guarantee about what portion of the bulk write has been received and successfully executed by the Data API. If not passed, the collection-level setting is used instead: in most cases, however, one should pass a relaxed timeout if longer sequences of operations are to be executed in bulk.

Returns

A single BulkWriteResult summarizing the whole list of requested operations. The keys in the map attributes of BulkWriteResult (when present) are the integer indices of the corresponding operation in the requests iterable.

Example

>>> from astrapy.operations import AsyncInsertMany, AsyncReplaceOne, AsyncOperation
>>> from astrapy.results import BulkWriteResult
>>>
>>> async def do_bulk_write(
...     acol: AsyncCollection,
...     async_operations: List[AsyncOperation],
... ) -> BulkWriteResult:
...     bw_result = await acol.bulk_write(async_operations)
...     count0 = await acol.count_documents({}, upper_bound=100)
...     print("count0", count0)
...     distinct0 = await acol.distinct("replaced")
...     print("distinct0", distinct0)
...     return bw_result
...
>>> op1 = AsyncInsertMany([{"a": 1}, {"a": 2}])
>>> op2 = AsyncReplaceOne(
...     {"z": 9},
...     replacement={"z": 9, "replaced": True},
...     upsert=True,
... )
>>> result = asyncio.run(do_bulk_write(my_async_coll, [op1, op2]))
count0 3
distinct0 [True]
>>> print("result", result)
result BulkWriteResult(bulk_api_results={0: ..., 1: ...}, deleted_count=0, inserted_count=3, matched_count=0, modified_count=0, upserted_count=1, upserted_ids={1: 'ccd0a800-...'})

Deprecated since version: 1.5.0

This will be removed in 2.0.0. Please switch to managing sequences of DML operations in app code instead.

Expand source code
@deprecation.deprecated(  # type: ignore[misc]
    deprecated_in="1.5.0",
    removed_in="2.0.0",
    current_version=__version__,
    details=(
        "Please switch to managing sequences of DML operations "
        "in app code instead."
    ),
)
async def bulk_write(
    self,
    requests: Iterable[AsyncBaseOperation],
    *,
    ordered: bool = False,
    concurrency: int | None = None,
    max_time_ms: int | None = None,
) -> BulkWriteResult:
    """
    Execute an arbitrary amount of operations such as inserts, updates, deletes
    either sequentially or concurrently.

    This method does not execute atomically, i.e. individual operations are
    each performed in the same way as the corresponding collection method,
    and each one is a different and unrelated database mutation.

    Args:
        requests: an iterable over concrete subclasses of `BaseOperation`,
            such as `AsyncInsertMany` or `AsyncReplaceOne`. Each such object
            represents an operation ready to be executed on a collection,
            and is instantiated by passing the same parameters as one
            would the corresponding collection method.
        ordered: whether to launch the `requests` one after the other or
            in arbitrary order, possibly in a concurrent fashion. For
            performance reasons, False (default) should be preferred
            when compatible with the needs of the application flow.
        concurrency: maximum number of concurrent operations executing at
            a given time. It cannot be more than one for ordered bulk writes.
        max_time_ms: a timeout, in milliseconds, for the whole bulk write.
            Remember that, if the method call times out, then there's no
            guarantee about what portion of the bulk write has been received
            and successfully executed by the Data API.
            If not passed, the collection-level setting is used instead:
            in most cases, however, one should pass a relaxed timeout
            if longer sequences of operations are to be executed in bulk.

    Returns:
        A single BulkWriteResult summarizing the whole list of requested
        operations. The keys in the map attributes of BulkWriteResult
        (when present) are the integer indices of the corresponding operation
        in the `requests` iterable.

    Example:
        >>> from astrapy.operations import AsyncInsertMany, AsyncReplaceOne, AsyncOperation
        >>> from astrapy.results import BulkWriteResult
        >>>
        >>> async def do_bulk_write(
        ...     acol: AsyncCollection,
        ...     async_operations: List[AsyncOperation],
        ... ) -> BulkWriteResult:
        ...     bw_result = await acol.bulk_write(async_operations)
        ...     count0 = await acol.count_documents({}, upper_bound=100)
        ...     print("count0", count0)
        ...     distinct0 = await acol.distinct("replaced")
        ...     print("distinct0", distinct0)
        ...     return bw_result
        ...
        >>> op1 = AsyncInsertMany([{"a": 1}, {"a": 2}])
        >>> op2 = AsyncReplaceOne(
        ...     {"z": 9},
        ...     replacement={"z": 9, "replaced": True},
        ...     upsert=True,
        ... )
        >>> result = asyncio.run(do_bulk_write(my_async_coll, [op1, op2]))
        count0 3
        distinct0 [True]
        >>> print("result", result)
        result BulkWriteResult(bulk_api_results={0: ..., 1: ...}, deleted_count=0, inserted_count=3, matched_count=0, modified_count=0, upserted_count=1, upserted_ids={1: 'ccd0a800-...'})
    """

    # lazy importing here against circular-import error
    from astrapy.operations import reduce_bulk_write_results

    if concurrency is None:
        if ordered:
            _concurrency = 1
        else:
            _concurrency = DEFAULT_BULK_WRITE_CONCURRENCY
    else:
        _concurrency = concurrency
    if _concurrency > 1 and ordered:
        raise ValueError("Cannot run ordered bulk_write concurrently.")
    _max_time_ms = max_time_ms or self.api_options.max_time_ms
    logger.info(f"startng a bulk write on '{self.name}'")
    timeout_manager = MultiCallTimeoutManager(overall_max_time_ms=_max_time_ms)
    if ordered:
        bulk_write_results: list[BulkWriteResult] = []
        for operation_i, operation in enumerate(requests):
            try:
                this_bw_result = await operation.execute(
                    self,
                    index_in_bulk_write=operation_i,
                    bulk_write_timeout_ms=timeout_manager.remaining_timeout_ms(),
                )
                bulk_write_results.append(this_bw_result)
            except CumulativeOperationException as exc:
                partial_result = exc.partial_result
                partial_bw_result = reduce_bulk_write_results(
                    bulk_write_results
                    + [
                        partial_result.to_bulk_write_result(
                            index_in_bulk_write=operation_i
                        )
                    ]
                )
                dar_exception = exc.data_api_response_exception()
                raise BulkWriteException(
                    text=dar_exception.text,
                    error_descriptors=dar_exception.error_descriptors,
                    detailed_error_descriptors=dar_exception.detailed_error_descriptors,
                    partial_result=partial_bw_result,
                    exceptions=[dar_exception],
                )
            except DataAPIResponseException as exc:
                # the cumulative exceptions, with their
                # partially-done-info, are handled above:
                # here it's just one-shot d.a.r. exceptions
                partial_bw_result = reduce_bulk_write_results(bulk_write_results)
                dar_exception = exc.data_api_response_exception()
                raise BulkWriteException(
                    text=dar_exception.text,
                    error_descriptors=dar_exception.error_descriptors,
                    detailed_error_descriptors=dar_exception.detailed_error_descriptors,
                    partial_result=partial_bw_result,
                    exceptions=[dar_exception],
                )
        full_bw_result = reduce_bulk_write_results(bulk_write_results)
        logger.info(f"finished a bulk write on '{self.name}'")
        return full_bw_result
    else:
        sem = asyncio.Semaphore(_concurrency)

        async def _concurrent_execute_as_either(
            operation: AsyncBaseOperation, operation_i: int
        ) -> tuple[BulkWriteResult | None, DataAPIResponseException | None]:
            async with sem:
                try:
                    ex_result = await operation.execute(
                        self,
                        index_in_bulk_write=operation_i,
                        bulk_write_timeout_ms=timeout_manager.remaining_timeout_ms(),
                    )
                    return (ex_result, None)
                except DataAPIResponseException as exc:
                    return (None, exc)

        tasks = [
            asyncio.create_task(
                _concurrent_execute_as_either(operation, operation_i)
            )
            for operation_i, operation in enumerate(requests)
        ]
        bulk_write_either_results = await asyncio.gather(*tasks)
        # regroup
        bulk_write_successes = [bwr for bwr, _ in bulk_write_either_results if bwr]
        bulk_write_failures = [bwf for _, bwf in bulk_write_either_results if bwf]
        if bulk_write_failures:
            # extract and cumulate
            partial_results_from_failures = [
                failure.partial_result.to_bulk_write_result(
                    index_in_bulk_write=operation_i
                )
                for failure in bulk_write_failures
                if isinstance(failure, CumulativeOperationException)
            ]
            partial_bw_result = reduce_bulk_write_results(
                bulk_write_successes + partial_results_from_failures
            )
            # raise and recast the first exception
            all_dar_exceptions = [
                bw_failure.data_api_response_exception()
                for bw_failure in bulk_write_failures
            ]
            dar_exception = all_dar_exceptions[0]
            raise BulkWriteException(
                text=dar_exception.text,
                error_descriptors=dar_exception.error_descriptors,
                detailed_error_descriptors=dar_exception.detailed_error_descriptors,
                partial_result=partial_bw_result,
                exceptions=all_dar_exceptions,
            )
        else:
            logger.info(f"finished a bulk write on '{self.name}'")
            return reduce_bulk_write_results(bulk_write_successes)
async def command(self, body: dict[str, Any], *, raise_api_errors: bool = True, max_time_ms: int | None = None) ‑> dict[str, typing.Any]

Send a POST request to the Data API for this collection with an arbitrary, caller-provided payload.

Args

body
a JSON-serializable dictionary, the payload of the request.
raise_api_errors
if True, responses with a nonempty 'errors' field result in an astrapy exception being raised.
max_time_ms
a timeout, in milliseconds, for the underlying HTTP request. If not passed, the collection-level setting is used instead.

Returns

a dictionary with the response of the HTTP request.

Example

>>> asyncio.await(my_async_coll.command({"countDocuments": {}}))
{'status': {'count': 123}}
Expand source code
async def command(
    self,
    body: dict[str, Any],
    *,
    raise_api_errors: bool = True,
    max_time_ms: int | None = None,
) -> dict[str, Any]:
    """
    Send a POST request to the Data API for this collection with
    an arbitrary, caller-provided payload.

    Args:
        body: a JSON-serializable dictionary, the payload of the request.
        raise_api_errors: if True, responses with a nonempty 'errors' field
            result in an astrapy exception being raised.
        max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
            If not passed, the collection-level setting is used instead.

    Returns:
        a dictionary with the response of the HTTP request.

    Example:
        >>> asyncio.await(my_async_coll.command({"countDocuments": {}}))
        {'status': {'count': 123}}
    """

    _max_time_ms = max_time_ms or self.api_options.max_time_ms
    _cmd_desc = ",".join(sorted(body.keys()))
    logger.info(f"command={_cmd_desc} on '{self.name}'")
    command_result = await self._api_commander.async_request(
        payload=body,
        raise_api_errors=raise_api_errors,
        timeout_info=base_timeout_info(_max_time_ms),
    )
    logger.info(f"finished command={_cmd_desc} on '{self.name}'")
    return command_result
async def count_documents(self, filter: FilterType, *, upper_bound: int, max_time_ms: int | None = None) ‑> int

Count the documents in the collection matching the specified filter.

Args

filter
a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
upper_bound
a required ceiling on the result of the count operation. If the actual number of documents exceeds this value, an exception will be raised. Furthermore, if the actual number of documents exceeds the maximum count that the Data API can reach (regardless of upper_bound), an exception will be raised.
max_time_ms
a timeout, in milliseconds, for the underlying HTTP request. If not passed, the collection-level setting is used instead.

Returns

the exact count of matching documents.

Example

>>> async def do_count_docs(acol: AsyncCollection) -> None:
...     await acol.insert_many([{"seq": i} for i in range(20)])
...     count0 = await acol.count_documents({}, upper_bound=100)
...     print("count0", count0)
...     count1 = await acol.count_documents({"seq":{"$gt": 15}}, upper_bound=100)
...     print("count1", count1)
...     count2 = await acol.count_documents({}, upper_bound=10)
...     print("count2", count2)
...
>>> asyncio.run(do_count_docs(my_async_coll))
count0 20
count1 4
Traceback (most recent call last):
    ... ...
astrapy.exceptions.TooManyDocumentsToCountException

Note

Count operations are expensive: for this reason, the best practice is to provide a reasonable upper_bound according to the caller expectations. Moreover, indiscriminate usage of count operations for sizeable amounts of documents (i.e. in the thousands and more) is discouraged in favor of alternative application-specific solutions. Keep in mind that the Data API has a hard upper limit on the amount of documents it will count, and that an exception will be thrown by this method if this limit is encountered.

Expand source code
async def count_documents(
    self,
    filter: FilterType,
    *,
    upper_bound: int,
    max_time_ms: int | None = None,
) -> int:
    """
    Count the documents in the collection matching the specified filter.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        upper_bound: a required ceiling on the result of the count operation.
            If the actual number of documents exceeds this value,
            an exception will be raised.
            Furthermore, if the actual number of documents exceeds the maximum
            count that the Data API can reach (regardless of upper_bound),
            an exception will be raised.
        max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
            If not passed, the collection-level setting is used instead.

    Returns:
        the exact count of matching documents.

    Example:
        >>> async def do_count_docs(acol: AsyncCollection) -> None:
        ...     await acol.insert_many([{"seq": i} for i in range(20)])
        ...     count0 = await acol.count_documents({}, upper_bound=100)
        ...     print("count0", count0)
        ...     count1 = await acol.count_documents({"seq":{"$gt": 15}}, upper_bound=100)
        ...     print("count1", count1)
        ...     count2 = await acol.count_documents({}, upper_bound=10)
        ...     print("count2", count2)
        ...
        >>> asyncio.run(do_count_docs(my_async_coll))
        count0 20
        count1 4
        Traceback (most recent call last):
            ... ...
        astrapy.exceptions.TooManyDocumentsToCountException

    Note:
        Count operations are expensive: for this reason, the best practice
        is to provide a reasonable `upper_bound` according to the caller
        expectations. Moreover, indiscriminate usage of count operations
        for sizeable amounts of documents (i.e. in the thousands and more)
        is discouraged in favor of alternative application-specific solutions.
        Keep in mind that the Data API has a hard upper limit on the amount
        of documents it will count, and that an exception will be thrown
        by this method if this limit is encountered.
    """

    _max_time_ms = max_time_ms or self.api_options.max_time_ms
    cd_payload = {"countDocuments": {"filter": filter}}
    logger.info(f"countDocuments on '{self.name}'")
    cd_response = await self._api_commander.async_request(
        payload=cd_payload,
        timeout_info=base_timeout_info(_max_time_ms),
    )
    logger.info(f"finished countDocuments on '{self.name}'")
    if "count" in cd_response.get("status", {}):
        count: int = cd_response["status"]["count"]
        if cd_response["status"].get("moreData", False):
            raise TooManyDocumentsToCountException(
                text=f"Document count exceeds {count}, the maximum allowed by the server",
                server_max_count_exceeded=True,
            )
        else:
            if count > upper_bound:
                raise TooManyDocumentsToCountException(
                    text="Document count exceeds required upper bound",
                    server_max_count_exceeded=False,
                )
            else:
                return count
    else:
        raise DataAPIFaultyResponseException(
            text="Faulty response from count_documents API command.",
            raw_response=cd_response,
        )
async def delete_all(self, *, max_time_ms: int | None = None) ‑> dict[str, typing.Any]

Delete all documents in a collection.

Args

max_time_ms
a timeout, in milliseconds, for the underlying HTTP request. If not passed, the collection-level setting is used instead.

Returns

a dictionary of the form {"ok": 1} to signal successful deletion.

Example

>>> async def do_delete_all(acol: AsyncCollection) -> None:
...     distinct0 = await acol.distinct("seq")
...     print("distinct0", distinct0)
...     count1 = await acol.count_documents({}, upper_bound=100)
...     print("count1", count1)
...     delete_result2 = await acol.delete_all()
...     print("delete_result2", delete_result2)
...     count3 = await acol.count_documents({}, upper_bound=100)
...     print("count3", count3)
...
>>> asyncio.run(do_delete_all(my_async_coll))
distinct0 [4, 2, 3, 0, 1]
count1 5
delete_result2 {'ok': 1}
count3 0

Note

Use with caution.

Deprecated since version: 1.3.0

This will be removed in 2.0.0. Use delete_many with filter={} instead.

Expand source code
@deprecation.deprecated(  # type: ignore[misc]
    deprecated_in="1.3.0",
    removed_in="2.0.0",
    current_version=__version__,
    details="Use delete_many with filter={} instead.",
)
async def delete_all(self, *, max_time_ms: int | None = None) -> dict[str, Any]:
    """
    Delete all documents in a collection.

    Args:
        max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
            If not passed, the collection-level setting is used instead.

    Returns:
        a dictionary of the form {"ok": 1} to signal successful deletion.

    Example:
        >>> async def do_delete_all(acol: AsyncCollection) -> None:
        ...     distinct0 = await acol.distinct("seq")
        ...     print("distinct0", distinct0)
        ...     count1 = await acol.count_documents({}, upper_bound=100)
        ...     print("count1", count1)
        ...     delete_result2 = await acol.delete_all()
        ...     print("delete_result2", delete_result2)
        ...     count3 = await acol.count_documents({}, upper_bound=100)
        ...     print("count3", count3)
        ...
        >>> asyncio.run(do_delete_all(my_async_coll))
        distinct0 [4, 2, 3, 0, 1]
        count1 5
        delete_result2 {'ok': 1}
        count3 0

    Note:
        Use with caution.
    """
    dm_result = await self.delete_many(filter={}, max_time_ms=max_time_ms)
    if dm_result.deleted_count == -1:
        return {"ok": 1}
    else:
        raise DataAPIFaultyResponseException(
            text="Unexpected response from collection.delete_many({}).",
            raw_response=None,
        )
async def delete_many(self, filter: FilterType, *, max_time_ms: int | None = None) ‑> DeleteResult

Delete all documents matching a provided filter.

Args

filter
a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators. Passing an empty filter, {}, completely erases all contents of the collection.
max_time_ms
a timeout, in milliseconds, for the operation. If not passed, the collection-level setting is used instead: keep in mind that this method entails successive HTTP requests to the API, depending on how many documents are to be deleted. For this reason, in most cases it is suggested to relax the timeout compared to other method calls.

Returns

a DeleteResult object summarizing the outcome of the delete operation.

Example

>>> async def do_delete_many(acol: AsyncCollection) -> None:
...     await acol.insert_many([{"seq": 1}, {"seq": 0}, {"seq": 2}])
...     delete_result0 = await acol.delete_many({"seq": {"$lte": 1}})
...     print("delete_result0.deleted_count", delete_result0.deleted_count)
...     distinct1 = await acol.distinct("seq")
...     print("distinct1", distinct1)
...     delete_result2 = await acol.delete_many({"seq": {"$lte": 1}})
...     print("delete_result2.deleted_count", delete_result2.deleted_count)
...
>>> asyncio.run(do_delete_many(my_async_coll))
delete_result0.deleted_count 2
distinct1 [2]
delete_result2.deleted_count 0

Note

This operation is in general not atomic. Depending on the amount of matching documents, it can keep running (in a blocking way) for a macroscopic time. In that case, new documents that are meanwhile inserted (e.g. from another process/application) will be deleted during the execution of this method call until the collection is devoid of matches. An exception is the filter={} case, whereby the operation is atomic.

Expand source code
async def delete_many(
    self,
    filter: FilterType,
    *,
    max_time_ms: int | None = None,
) -> DeleteResult:
    """
    Delete all documents matching a provided filter.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
            Passing an empty filter, `{}`, completely erases all contents
            of the collection.
        max_time_ms: a timeout, in milliseconds, for the operation.
            If not passed, the collection-level setting is used instead:
            keep in mind that this method entails successive HTTP requests
            to the API, depending on how many documents are to be deleted.
            For this reason, in most cases it is suggested to relax the
            timeout compared to other method calls.

    Returns:
        a DeleteResult object summarizing the outcome of the delete operation.

    Example:
        >>> async def do_delete_many(acol: AsyncCollection) -> None:
        ...     await acol.insert_many([{"seq": 1}, {"seq": 0}, {"seq": 2}])
        ...     delete_result0 = await acol.delete_many({"seq": {"$lte": 1}})
        ...     print("delete_result0.deleted_count", delete_result0.deleted_count)
        ...     distinct1 = await acol.distinct("seq")
        ...     print("distinct1", distinct1)
        ...     delete_result2 = await acol.delete_many({"seq": {"$lte": 1}})
        ...     print("delete_result2.deleted_count", delete_result2.deleted_count)
        ...
        >>> asyncio.run(do_delete_many(my_async_coll))
        delete_result0.deleted_count 2
        distinct1 [2]
        delete_result2.deleted_count 0

    Note:
        This operation is in general not atomic. Depending on the amount
        of matching documents, it can keep running (in a blocking way)
        for a macroscopic time. In that case, new documents that are
        meanwhile inserted (e.g. from another process/application) will be
        deleted during the execution of this method call until the
        collection is devoid of matches.
        An exception is the `filter={}` case, whereby the operation is atomic.
    """
    dm_responses: list[dict[str, Any]] = []
    deleted_count = 0
    must_proceed = True
    _max_time_ms = max_time_ms or self.api_options.max_time_ms
    timeout_manager = MultiCallTimeoutManager(overall_max_time_ms=_max_time_ms)
    this_dm_payload = {"deleteMany": {"filter": filter}}
    logger.info(f"starting delete_many on '{self.name}'")
    while must_proceed:
        logger.info(f"deleteMany on '{self.name}'")
        this_dm_response = await self._api_commander.async_request(
            payload=this_dm_payload,
            raise_api_errors=False,
            timeout_info=timeout_manager.remaining_timeout_info(),
        )
        logger.info(f"finished deleteMany on '{self.name}'")
        # if errors, quit early
        if this_dm_response.get("errors", []):
            partial_result = DeleteResult(
                deleted_count=deleted_count,
                raw_results=dm_responses,
            )
            all_dm_responses = dm_responses + [this_dm_response]
            raise DeleteManyException.from_responses(
                commands=[None for _ in all_dm_responses],
                raw_responses=all_dm_responses,
                partial_result=partial_result,
            )
        else:
            this_dc = this_dm_response.get("status", {}).get("deletedCount")
            if this_dc is None:
                raise DataAPIFaultyResponseException(
                    text="Faulty response from delete_many API command.",
                    raw_response=this_dm_response,
                )
            dm_responses.append(this_dm_response)
            deleted_count += this_dc
            must_proceed = this_dm_response.get("status", {}).get("moreData", False)

    logger.info(f"finished delete_many on '{self.name}'")
    return DeleteResult(
        deleted_count=deleted_count,
        raw_results=dm_responses,
    )
async def delete_one(self, filter: FilterType, *, vector: VectorType | None = None, vectorize: str | None = None, sort: SortType | None = None, max_time_ms: int | None = None) ‑> DeleteResult

Delete one document matching a provided filter. This method never deletes more than a single document, regardless of the number of matches to the provided filters.

Args

filter
a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
vector
a suitable vector, i.e. a list of float numbers of the appropriate dimensionality, to use vector search (i.e. ANN, or "approximate nearest-neighbours" search), as the sorting criterion. In this way, the matched document (if any) will be the one that is most similar to the provided vector. DEPRECATED (removal in 2.0). Use a $vector key in the sort clause dict instead.
vectorize
a string to be made into a vector to perform vector search. Using vectorize assumes a suitable service is configured for the collection. DEPRECATED (removal in 2.0). Use a $vectorize key in the sort clause dict instead.
sort
with this dictionary parameter one can control the sorting order of the documents matching the filter, effectively determining what document will come first and hence be the replaced one. See the find method for more on sorting. Vector-based ANN sorting is achieved by providing a "$vector" or a "$vectorize" key in sort.
max_time_ms
a timeout, in milliseconds, for the underlying HTTP request. If not passed, the collection-level setting is used instead.

Returns

a DeleteResult object summarizing the outcome of the delete operation.

Example

>>> my_coll.insert_many([{"seq": 1}, {"seq": 0}, {"seq": 2}])
InsertManyResult(...)
>>> my_coll.delete_one({"seq": 1})
DeleteResult(raw_results=..., deleted_count=1)
>>> my_coll.distinct("seq")
[0, 2]
>>> my_coll.delete_one(
...     {"seq": {"$exists": True}},
...     sort={"seq": astrapy.constants.SortDocuments.DESCENDING},
... )
DeleteResult(raw_results=..., deleted_count=1)
>>> my_coll.distinct("seq")
[0]
>>> my_coll.delete_one({"seq": 2})
DeleteResult(raw_results=..., deleted_count=0)
Expand source code
async def delete_one(
    self,
    filter: FilterType,
    *,
    vector: VectorType | None = None,
    vectorize: str | None = None,
    sort: SortType | None = None,
    max_time_ms: int | None = None,
) -> DeleteResult:
    """
    Delete one document matching a provided filter.
    This method never deletes more than a single document, regardless
    of the number of matches to the provided filters.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        vector: a suitable vector, i.e. a list of float numbers of the appropriate
            dimensionality, to use vector search (i.e. ANN,
            or "approximate nearest-neighbours" search), as the sorting criterion.
            In this way, the matched document (if any) will be the one
            that is most similar to the provided vector.
            *DEPRECATED* (removal in 2.0). Use a `$vector` key in the
            sort clause dict instead.
        vectorize: a string to be made into a vector to perform vector search.
            Using vectorize assumes a suitable service is configured for the collection.
            *DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the
            sort clause dict instead.
        sort: with this dictionary parameter one can control the sorting
            order of the documents matching the filter, effectively
            determining what document will come first and hence be the
            replaced one. See the `find` method for more on sorting.
            Vector-based ANN sorting is achieved by providing a "$vector"
            or a "$vectorize" key in `sort`.
        max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
            If not passed, the collection-level setting is used instead.

    Returns:
        a DeleteResult object summarizing the outcome of the delete operation.

    Example:
        >>> my_coll.insert_many([{"seq": 1}, {"seq": 0}, {"seq": 2}])
        InsertManyResult(...)
        >>> my_coll.delete_one({"seq": 1})
        DeleteResult(raw_results=..., deleted_count=1)
        >>> my_coll.distinct("seq")
        [0, 2]
        >>> my_coll.delete_one(
        ...     {"seq": {"$exists": True}},
        ...     sort={"seq": astrapy.constants.SortDocuments.DESCENDING},
        ... )
        DeleteResult(raw_results=..., deleted_count=1)
        >>> my_coll.distinct("seq")
        [0]
        >>> my_coll.delete_one({"seq": 2})
        DeleteResult(raw_results=..., deleted_count=0)
    """

    check_deprecated_vector_ize(
        vector=vector,
        vectors=None,
        vectorize=vectorize,
        kind="find",
    )
    _sort = _collate_vector_to_sort(sort, vector, vectorize)
    _max_time_ms = max_time_ms or self.api_options.max_time_ms
    do_payload = {
        "deleteOne": {
            k: v
            for k, v in {
                "filter": filter,
                "sort": _sort,
            }.items()
            if v is not None
        }
    }
    logger.info(f"deleteOne on '{self.name}'")
    do_response = await self._api_commander.async_request(
        payload=do_payload,
        timeout_info=base_timeout_info(_max_time_ms),
    )
    logger.info(f"finished deleteOne on '{self.name}'")
    if "deletedCount" in do_response.get("status", {}):
        deleted_count = do_response["status"]["deletedCount"]
        return DeleteResult(
            deleted_count=deleted_count,
            raw_results=[do_response],
        )
    else:
        raise DataAPIFaultyResponseException(
            text="Faulty response from delete_one API command.",
            raw_response=do_response,
        )
async def distinct(self, key: str, *, filter: FilterType | None = None, max_time_ms: int | None = None) ‑> list[typing.Any]

Return a list of the unique values of key across the documents in the collection that match the provided filter.

Args

key
the name of the field whose value is inspected across documents. Keys can use dot-notation to descend to deeper document levels. Example of acceptable key values: "field" "field.subfield" "field.3" "field.3.subfield" If lists are encountered and no numeric index is specified, all items in the list are visited.
filter
a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
max_time_ms
a timeout, in milliseconds, with the same meaning as for find. If not passed, the collection-level setting is used instead.

Returns

a list of all different values for key found across the documents that match the filter. The result list has no repeated items.

Example

>>> async def run_distinct(acol: AsyncCollection) -> None:
...     await acol.insert_many(
...         [
...             {"name": "Marco", "food": ["apple", "orange"], "city": "Helsinki"},
...             {"name": "Emma", "food": {"likes_fruit": True, "allergies": []}},
...         ]
...     )
...     distinct0 = await acol.distinct("name")
...     print("distinct('name')", distinct0)
...     distinct1 = await acol.distinct("city")
...     print("distinct('city')", distinct1)
...     distinct2 = await acol.distinct("food")
...     print("distinct('food')", distinct2)
...     distinct3 = await acol.distinct("food.1")
...     print("distinct('food.1')", distinct3)
...     distinct4 = await acol.distinct("food.allergies")
...     print("distinct('food.allergies')", distinct4)
...     distinct5 = await acol.distinct("food.likes_fruit")
...     print("distinct('food.likes_fruit')", distinct5)
...
>>> asyncio.run(run_distinct(my_async_coll))
distinct('name') ['Emma', 'Marco']
distinct('city') ['Helsinki']
distinct('food') [{'likes_fruit': True, 'allergies': []}, 'apple', 'orange']
distinct('food.1') ['orange']
distinct('food.allergies') []
distinct('food.likes_fruit') [True]

Note

It must be kept in mind that distinct is a client-side operation, which effectively browses all required documents using the logic of the find method and collects the unique values found for key. As such, there may be performance, latency and ultimately billing implications if the amount of matching documents is large.

Note

For details on the behaviour of "distinct" in conjunction with real-time changes in the collection contents, see the Note of the find command.

Expand source code
async def distinct(
    self,
    key: str,
    *,
    filter: FilterType | None = None,
    max_time_ms: int | None = None,
) -> list[Any]:
    """
    Return a list of the unique values of `key` across the documents
    in the collection that match the provided filter.

    Args:
        key: the name of the field whose value is inspected across documents.
            Keys can use dot-notation to descend to deeper document levels.
            Example of acceptable `key` values:
                "field"
                "field.subfield"
                "field.3"
                "field.3.subfield"
            If lists are encountered and no numeric index is specified,
            all items in the list are visited.
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        max_time_ms: a timeout, in milliseconds, with the same meaning as for `find`.
            If not passed, the collection-level setting is used instead.

    Returns:
        a list of all different values for `key` found across the documents
        that match the filter. The result list has no repeated items.

    Example:
        >>> async def run_distinct(acol: AsyncCollection) -> None:
        ...     await acol.insert_many(
        ...         [
        ...             {"name": "Marco", "food": ["apple", "orange"], "city": "Helsinki"},
        ...             {"name": "Emma", "food": {"likes_fruit": True, "allergies": []}},
        ...         ]
        ...     )
        ...     distinct0 = await acol.distinct("name")
        ...     print("distinct('name')", distinct0)
        ...     distinct1 = await acol.distinct("city")
        ...     print("distinct('city')", distinct1)
        ...     distinct2 = await acol.distinct("food")
        ...     print("distinct('food')", distinct2)
        ...     distinct3 = await acol.distinct("food.1")
        ...     print("distinct('food.1')", distinct3)
        ...     distinct4 = await acol.distinct("food.allergies")
        ...     print("distinct('food.allergies')", distinct4)
        ...     distinct5 = await acol.distinct("food.likes_fruit")
        ...     print("distinct('food.likes_fruit')", distinct5)
        ...
        >>> asyncio.run(run_distinct(my_async_coll))
        distinct('name') ['Emma', 'Marco']
        distinct('city') ['Helsinki']
        distinct('food') [{'likes_fruit': True, 'allergies': []}, 'apple', 'orange']
        distinct('food.1') ['orange']
        distinct('food.allergies') []
        distinct('food.likes_fruit') [True]

    Note:
        It must be kept in mind that `distinct` is a client-side operation,
        which effectively browses all required documents using the logic
        of the `find` method and collects the unique values found for `key`.
        As such, there may be performance, latency and ultimately
        billing implications if the amount of matching documents is large.

    Note:
        For details on the behaviour of "distinct" in conjunction with
        real-time changes in the collection contents, see the
        Note of the `find` command.
    """

    _max_time_ms = max_time_ms or self.api_options.max_time_ms
    f_cursor = AsyncCursor(
        collection=self,
        filter=filter,
        projection={key: True},
        max_time_ms=None,
        overall_max_time_ms=_max_time_ms,
    )
    return await f_cursor.distinct(key)
async def drop(self, *, max_time_ms: int | None = None) ‑> dict[str, typing.Any]

Drop the collection, i.e. delete it from the database along with all the documents it contains.

Args

max_time_ms
a timeout, in milliseconds, for the underlying HTTP request. If not passed, the collection-level setting is used instead. Remember there is not guarantee that a request that has timed out us not in fact honored.

Returns

a dictionary of the form {"ok": 1} to signal successful deletion.

Example

>>> async def drop_and_check(acol: AsyncCollection) -> None:
...     doc0 = await acol.find_one({})
...     print("doc0", doc0)
...     drop_result = await acol.drop()
...     print("drop_result", drop_result)
...     doc1 = await acol.find_one({})
...
>>> asyncio.run(drop_and_check(my_async_coll))
doc0 {'_id': '...', 'z': -10}
drop_result {'ok': 1}
Traceback (most recent call last):
    ... ...
astrapy.exceptions.DataAPIResponseException: Collection does not exist, collection name: my_collection

Note

Use with caution.

Note

Once the method succeeds, methods on this object can still be invoked: however, this hardly makes sense as the underlying actual collection is no more. It is responsibility of the developer to design a correct flow which avoids using a deceased collection any further.

Expand source code
async def drop(self, *, max_time_ms: int | None = None) -> dict[str, Any]:
    """
    Drop the collection, i.e. delete it from the database along with
    all the documents it contains.

    Args:
        max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
            If not passed, the collection-level setting is used instead.
            Remember there is not guarantee that a request that has
            timed out us not in fact honored.

    Returns:
        a dictionary of the form {"ok": 1} to signal successful deletion.

    Example:
        >>> async def drop_and_check(acol: AsyncCollection) -> None:
        ...     doc0 = await acol.find_one({})
        ...     print("doc0", doc0)
        ...     drop_result = await acol.drop()
        ...     print("drop_result", drop_result)
        ...     doc1 = await acol.find_one({})
        ...
        >>> asyncio.run(drop_and_check(my_async_coll))
        doc0 {'_id': '...', 'z': -10}
        drop_result {'ok': 1}
        Traceback (most recent call last):
            ... ...
        astrapy.exceptions.DataAPIResponseException: Collection does not exist, collection name: my_collection

    Note:
        Use with caution.

    Note:
        Once the method succeeds, methods on this object can still be invoked:
        however, this hardly makes sense as the underlying actual collection
        is no more.
        It is responsibility of the developer to design a correct flow
        which avoids using a deceased collection any further.
    """

    _max_time_ms = max_time_ms or self.api_options.max_time_ms
    logger.info(f"dropping collection '{self.name}' (self)")
    drop_result = await self.database.drop_collection(
        self, max_time_ms=_max_time_ms
    )
    logger.info(f"finished dropping collection '{self.name}' (self)")
    return drop_result
async def estimated_document_count(self, *, max_time_ms: int | None = None) ‑> int

Query the API server for an estimate of the document count in the collection.

Contrary to count_documents, this method has no filtering parameters.

Args

max_time_ms
a timeout, in milliseconds, for the underlying HTTP request. If not passed, the collection-level setting is used instead.

Returns

a server-provided estimate count of the documents in the collection.

Example

>>> asyncio.run(my_async_coll.estimated_document_count())
35700
Expand source code
async def estimated_document_count(
    self,
    *,
    max_time_ms: int | None = None,
) -> int:
    """
    Query the API server for an estimate of the document count in the collection.

    Contrary to `count_documents`, this method has no filtering parameters.

    Args:
        max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
            If not passed, the collection-level setting is used instead.

    Returns:
        a server-provided estimate count of the documents in the collection.

    Example:
        >>> asyncio.run(my_async_coll.estimated_document_count())
        35700
    """
    _max_time_ms = max_time_ms or self.api_options.max_time_ms
    ed_payload: dict[str, Any] = {"estimatedDocumentCount": {}}
    logger.info(f"estimatedDocumentCount on '{self.name}'")
    ed_response = await self._api_commander.async_request(
        payload=ed_payload,
        timeout_info=base_timeout_info(_max_time_ms),
    )
    logger.info(f"finished estimatedDocumentCount on '{self.name}'")
    if "count" in ed_response.get("status", {}):
        count: int = ed_response["status"]["count"]
        return count
    else:
        raise DataAPIFaultyResponseException(
            text="Faulty response from estimated_document_count API command.",
            raw_response=ed_response,
        )
def find(self, filter: FilterType | None = None, *, projection: ProjectionType | None = None, skip: int | None = None, limit: int | None = None, vector: VectorType | None = None, vectorize: str | None = None, include_similarity: bool | None = None, include_sort_vector: bool | None = None, sort: SortType | None = None, max_time_ms: int | None = None) ‑> AsyncCursor

Find documents on the collection, matching a certain provided filter.

The method returns a Cursor that can then be iterated over. Depending on the method call pattern, the iteration over all documents can reflect collection mutations occurred since the find method was called, or not. In cases where the cursor reflects mutations in real-time, it will iterate over cursors in an approximate way (i.e. exhibiting occasional skipped or duplicate documents). This happens when making use of the sort option in a non-vector-search manner.

Args

filter
a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
projection
it controls which parts of the document are returned. It can be an allow-list: {"f1": True, "f2": True}, or a deny-list: {"fx": False, "fy": False}, but not a mixture (except for the _id and other special fields, which can be associated to both True or False independently of the rest of the specification). The special star-projections {"*": True} and {"*": False} have the effect of returning the whole document and {} respectively. For lists in documents, slice directives can be passed to select portions of the list: for instance, {"array": {"$slice": 2}}, {"array": {"$slice": -2}}, {"array": {"$slice": [4, 2]}} or {"array": {"$slice": [-4, 2]}}. An iterable over strings will be treated implicitly as an allow-list. The default projection (used if this parameter is not passed) does not necessarily include "special" fields such as $vector or $vectorize. See the Data API documentation for more on projections.
skip
with this integer parameter, what would be the first skip documents returned by the query are discarded, and the results start from the (skip+1)-th document. This parameter can be used only in conjunction with an explicit sort criterion of the ascending/descending type (i.e. it cannot be used when not sorting, nor with vector-based ANN search).
limit
this (integer) parameter sets a limit over how many documents are returned. Once limit is reached (or the cursor is exhausted for lack of matching documents), nothing more is returned.
vector
a suitable vector, i.e. a list of float numbers of the appropriate dimensionality, to perform vector search (i.e. ANN, or "approximate nearest-neighbours" search). When running similarity search on a collection, no other sorting criteria can be specified. Moreover, there is an upper bound to the number of documents that can be returned. For details, see the Note about upper bounds and the Data API documentation. DEPRECATED (removal in 2.0). Use a $vector key in the sort clause dict instead.
vectorize
a string to be made into a vector to perform vector search. This can be supplied in (exclusive) alternative to vector, provided such a service is configured for the collection, and achieves the same effect. DEPRECATED (removal in 2.0). Use a $vectorize key in the sort clause dict instead.
include_similarity
a boolean to request the numeric value of the similarity to be returned as an added "$similarity" key in each returned document. Can only be used for vector ANN search, i.e. when either vector is supplied or the sort parameter has the shape {"$vector": …}.
include_sort_vector
a boolean to request query vector used in this search. If set to True (and if the invocation is a vector search), calling the get_sort_vector method on the returned cursor will yield the vector used for the ANN search.
sort
with this dictionary parameter one can control the order the documents are returned. See the Note about sorting, as well as the one about upper bounds, for details. Vector-based ANN sorting is achieved by providing a "$vector" or a "$vectorize" key in sort.
max_time_ms
a timeout, in milliseconds, for each single one of the underlying HTTP requests used to fetch documents as the cursor is iterated over. If not passed, the collection-level setting is used instead.

Returns

an AsyncCursor object representing iterations over the matching documents
(see the AsyncCursor object for how to use it. The simplest thing is to
run a for loop
for document in collection.sort(...):).

Examples

>>> async def run_finds(acol: AsyncCollection) -> None:
...             filter = {"seq": {"$exists": True}}
...             print("find results 1:")
...             async for doc in acol.find(filter, projection={"seq": True}, limit=5):
...                 print(doc["seq"])
...             async_cursor1 = acol.find(
...                 {},
...                 limit=4,
...                 sort={"seq": astrapy.constants.SortDocuments.DESCENDING},
...             )
...             ids = [doc["_id"] async for doc in async_cursor1]
...             print("find results 2:", ids)
...             async_cursor2 = acol.find({}, limit=3)
...             seqs = await async_cursor2.distinct("seq")
...             print("distinct results 3:", seqs)
...
>>> asyncio.run(run_finds(my_async_coll))
find results 1:
48
35
7
11
13
find results 2: ['d656cd9d-...', '479c7ce8-...', '96dc87fd-...', '83f0a21f-...']
distinct results 3: [48, 35, 7]
>>> async def run_vector_finds(acol: AsyncCollection) -> None:
...     await acol.insert_many([
...         {"tag": "A", "$vector": [4, 5]},
...         {"tag": "B", "$vector": [3, 4]},
...         {"tag": "C", "$vector": [3, 2]},
...         {"tag": "D", "$vector": [4, 1]},
...         {"tag": "E", "$vector": [2, 5]},
...     ])
...     ann_tags = [
...         document["tag"]
...         async for document in acol.find(
...             {},
...             sort={"$vector": [3, 3]},
...             limit=3,
...         )
...     ]
...     return ann_tags
...
>>> asyncio.run(run_vector_finds(my_async_coll))
['A', 'B', 'C']
>>> # (assuming the collection has metric VectorMetric.COSINE)
>>> async_cursor = my_async_coll.find(
...     sort={"$vector": [3, 3]},
...     limit=3,
...     include_sort_vector=True,
... )
>>> asyncio.run(async_cursor.get_sort_vector())
[3.0, 3.0]
>>> asyncio.run(async_cursor.__anext__())
{'_id': 'b13ce177-738e-47ec-bce1-77738ee7ec93', 'tag': 'A'}
>>> asyncio.run(async_cursor.get_sort_vector())
[3.0, 3.0]

Note

The following are example values for the sort parameter. When no particular order is required: sort={} When sorting by a certain value in ascending/descending order: sort={"field": SortDocuments.ASCENDING} sort={"field": SortDocuments.DESCENDING} When sorting first by "field" and then by "subfield" (while modern Python versions preserve the order of dictionaries, it is suggested for clarity to employ a collections.OrderedDict in these cases): sort={ "field": SortDocuments.ASCENDING, "subfield": SortDocuments.ASCENDING, } When running a vector similarity (ANN) search: sort={"$vector": [0.4, 0.15, -0.5]}

Note

Some combinations of arguments impose an implicit upper bound on the number of documents that are returned by the Data API. More specifically: (a) Vector ANN searches cannot return more than a number of documents that at the time of writing is set to 1000 items. (b) When using a sort criterion of the ascending/descending type, the Data API will return a smaller number of documents, set to 20 at the time of writing, and stop there. The returned documents are the top results across the whole collection according to the requested criterion. These provisions should be kept in mind even when subsequently running a command such as .distinct() on a cursor.

Note

When not specifying sorting criteria at all (by vector or otherwise), the cursor can scroll through an arbitrary number of documents as the Data API and the client periodically exchange new chunks of documents. It should be noted that the behavior of the cursor in the case documents have been added/removed after the find was started depends on database internals and it is not guaranteed, nor excluded, that such "real-time" changes in the data would be picked up by the cursor.

Expand source code
def find(
    self,
    filter: FilterType | None = None,
    *,
    projection: ProjectionType | None = None,
    skip: int | None = None,
    limit: int | None = None,
    vector: VectorType | None = None,
    vectorize: str | None = None,
    include_similarity: bool | None = None,
    include_sort_vector: bool | None = None,
    sort: SortType | None = None,
    max_time_ms: int | None = None,
) -> AsyncCursor:
    """
    Find documents on the collection, matching a certain provided filter.

    The method returns a Cursor that can then be iterated over. Depending
    on the method call pattern, the iteration over all documents can reflect
    collection mutations occurred since the `find` method was called, or not.
    In cases where the cursor reflects mutations in real-time, it will iterate
    over cursors in an approximate way (i.e. exhibiting occasional skipped
    or duplicate documents). This happens when making use of the `sort`
    option in a non-vector-search manner.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        projection: it controls which parts of the document are returned.
            It can be an allow-list: `{"f1": True, "f2": True}`,
            or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
            (except for the `_id` and other special fields, which can be
            associated to both True or False independently of the rest
            of the specification).
            The special star-projections `{"*": True}` and `{"*": False}`
            have the effect of returning the whole document and `{}` respectively.
            For lists in documents, slice directives can be passed to select
            portions of the list: for instance, `{"array": {"$slice": 2}}`,
            `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
            `{"array": {"$slice": [-4, 2]}}`.
            An iterable over strings will be treated implicitly as an allow-list.
            The default projection (used if this parameter is not passed) does not
            necessarily include "special" fields such as `$vector` or `$vectorize`.
            See the Data API documentation for more on projections.
        skip: with this integer parameter, what would be the first `skip`
            documents returned by the query are discarded, and the results
            start from the (skip+1)-th document.
            This parameter can be used only in conjunction with an explicit
            `sort` criterion of the ascending/descending type (i.e. it cannot
            be used when not sorting, nor with vector-based ANN search).
        limit: this (integer) parameter sets a limit over how many documents
            are returned. Once `limit` is reached (or the cursor is exhausted
            for lack of matching documents), nothing more is returned.
        vector: a suitable vector, i.e. a list of float numbers of the appropriate
            dimensionality, to perform vector search (i.e. ANN,
            or "approximate nearest-neighbours" search).
            When running similarity search on a collection, no other sorting
            criteria can be specified. Moreover, there is an upper bound
            to the number of documents that can be returned. For details,
            see the Note about upper bounds and the Data API documentation.
            *DEPRECATED* (removal in 2.0). Use a `$vector` key in the
            sort clause dict instead.
        vectorize: a string to be made into a vector to perform vector search.
            This can be supplied in (exclusive) alternative to `vector`,
            provided such a service is configured for the collection,
            and achieves the same effect.
            *DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the
            sort clause dict instead.
        include_similarity: a boolean to request the numeric value of the
            similarity to be returned as an added "$similarity" key in each
            returned document. Can only be used for vector ANN search, i.e.
            when either `vector` is supplied or the `sort` parameter has the
            shape {"$vector": ...}.
        include_sort_vector: a boolean to request query vector used in this search.
            If set to True (and if the invocation is a vector search), calling
            the `get_sort_vector` method on the returned cursor will yield
            the vector used for the ANN search.
        sort: with this dictionary parameter one can control the order
            the documents are returned. See the Note about sorting, as well as
            the one about upper bounds, for details.
            Vector-based ANN sorting is achieved by providing a "$vector"
            or a "$vectorize" key in `sort`.
        max_time_ms: a timeout, in milliseconds, for each single one
            of the underlying HTTP requests used to fetch documents as the
            cursor is iterated over.
            If not passed, the collection-level setting is used instead.

    Returns:
        an AsyncCursor object representing iterations over the matching documents
        (see the AsyncCursor object for how to use it. The simplest thing is to
        run a for loop: `for document in collection.sort(...):`).

    Examples:
        >>> async def run_finds(acol: AsyncCollection) -> None:
        ...             filter = {"seq": {"$exists": True}}
        ...             print("find results 1:")
        ...             async for doc in acol.find(filter, projection={"seq": True}, limit=5):
        ...                 print(doc["seq"])
        ...             async_cursor1 = acol.find(
        ...                 {},
        ...                 limit=4,
        ...                 sort={"seq": astrapy.constants.SortDocuments.DESCENDING},
        ...             )
        ...             ids = [doc["_id"] async for doc in async_cursor1]
        ...             print("find results 2:", ids)
        ...             async_cursor2 = acol.find({}, limit=3)
        ...             seqs = await async_cursor2.distinct("seq")
        ...             print("distinct results 3:", seqs)
        ...
        >>> asyncio.run(run_finds(my_async_coll))
        find results 1:
        48
        35
        7
        11
        13
        find results 2: ['d656cd9d-...', '479c7ce8-...', '96dc87fd-...', '83f0a21f-...']
        distinct results 3: [48, 35, 7]

        >>> async def run_vector_finds(acol: AsyncCollection) -> None:
        ...     await acol.insert_many([
        ...         {"tag": "A", "$vector": [4, 5]},
        ...         {"tag": "B", "$vector": [3, 4]},
        ...         {"tag": "C", "$vector": [3, 2]},
        ...         {"tag": "D", "$vector": [4, 1]},
        ...         {"tag": "E", "$vector": [2, 5]},
        ...     ])
        ...     ann_tags = [
        ...         document["tag"]
        ...         async for document in acol.find(
        ...             {},
        ...             sort={"$vector": [3, 3]},
        ...             limit=3,
        ...         )
        ...     ]
        ...     return ann_tags
        ...
        >>> asyncio.run(run_vector_finds(my_async_coll))
        ['A', 'B', 'C']
        >>> # (assuming the collection has metric VectorMetric.COSINE)

        >>> async_cursor = my_async_coll.find(
        ...     sort={"$vector": [3, 3]},
        ...     limit=3,
        ...     include_sort_vector=True,
        ... )
        >>> asyncio.run(async_cursor.get_sort_vector())
        [3.0, 3.0]
        >>> asyncio.run(async_cursor.__anext__())
        {'_id': 'b13ce177-738e-47ec-bce1-77738ee7ec93', 'tag': 'A'}
        >>> asyncio.run(async_cursor.get_sort_vector())
        [3.0, 3.0]

    Note:
        The following are example values for the `sort` parameter.
        When no particular order is required:
            sort={}
        When sorting by a certain value in ascending/descending order:
            sort={"field": SortDocuments.ASCENDING}
            sort={"field": SortDocuments.DESCENDING}
        When sorting first by "field" and then by "subfield"
        (while modern Python versions preserve the order of dictionaries,
        it is suggested for clarity to employ a `collections.OrderedDict`
        in these cases):
            sort={
                "field": SortDocuments.ASCENDING,
                "subfield": SortDocuments.ASCENDING,
            }
        When running a vector similarity (ANN) search:
            sort={"$vector": [0.4, 0.15, -0.5]}

    Note:
        Some combinations of arguments impose an implicit upper bound on the
        number of documents that are returned by the Data API. More specifically:
        (a) Vector ANN searches cannot return more than a number of documents
        that at the time of writing is set to 1000 items.
        (b) When using a sort criterion of the ascending/descending type,
        the Data API will return a smaller number of documents, set to 20
        at the time of writing, and stop there. The returned documents are
        the top results across the whole collection according to the requested
        criterion.
        These provisions should be kept in mind even when subsequently running
        a command such as `.distinct()` on a cursor.

    Note:
        When not specifying sorting criteria at all (by vector or otherwise),
        the cursor can scroll through an arbitrary number of documents as
        the Data API and the client periodically exchange new chunks of documents.
        It should be noted that the behavior of the cursor in the case documents
        have been added/removed after the `find` was started depends on database
        internals and it is not guaranteed, nor excluded, that such "real-time"
        changes in the data would be picked up by the cursor.
    """

    check_deprecated_vector_ize(
        vector=vector, vectors=None, vectorize=vectorize, kind="find"
    )
    _sort = _collate_vector_to_sort(sort, vector, vectorize)
    _max_time_ms = max_time_ms or self.api_options.max_time_ms
    if include_similarity is not None and not _is_vector_sort(_sort):
        raise ValueError(
            "Cannot use `include_similarity` unless for vector search."
        )
    return (
        AsyncCursor(
            collection=self,
            filter=filter,
            projection=projection,
            max_time_ms=_max_time_ms,
            overall_max_time_ms=None,
        )
        .skip(skip)
        .limit(limit)
        .sort(_sort)
        .include_similarity(include_similarity)
        .include_sort_vector(include_sort_vector)
    )
async def find_one(self, filter: FilterType | None = None, *, projection: ProjectionType | None = None, vector: VectorType | None = None, vectorize: str | None = None, include_similarity: bool | None = None, sort: SortType | None = None, max_time_ms: int | None = None) ‑> Optional[Dict[str, Any]]

Run a search, returning the first document in the collection that matches provided filters, if any is found.

Args

filter
a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
projection
it controls which parts of the document are returned. It can be an allow-list: {"f1": True, "f2": True}, or a deny-list: {"fx": False, "fy": False}, but not a mixture (except for the _id and other special fields, which can be associated to both True or False independently of the rest of the specification). The special star-projections {"*": True} and {"*": False} have the effect of returning the whole document and {} respectively. For lists in documents, slice directives can be passed to select portions of the list: for instance, {"array": {"$slice": 2}}, {"array": {"$slice": -2}}, {"array": {"$slice": [4, 2]}} or {"array": {"$slice": [-4, 2]}}. An iterable over strings will be treated implicitly as an allow-list. The default projection (used if this parameter is not passed) does not necessarily include "special" fields such as $vector or $vectorize. See the Data API documentation for more on projections.
vector
a suitable vector, i.e. a list of float numbers of the appropriate dimensionality, to perform vector search (i.e. ANN, or "approximate nearest-neighbours" search), extracting the most similar document in the collection matching the filter. DEPRECATED (removal in 2.0). Use a $vector key in the sort clause dict instead.
vectorize
a string to be made into a vector to perform vector search. Using vectorize assumes a suitable service is configured for the collection. DEPRECATED (removal in 2.0). Use a $vectorize key in the sort clause dict instead.
include_similarity
a boolean to request the numeric value of the similarity to be returned as an added "$similarity" key in the returned document. Can only be used for vector ANN search, i.e. when either vector is supplied or the sort parameter has the shape {"$vector": …}.
sort
with this dictionary parameter one can control the order the documents are returned. See the Note about sorting for details. Vector-based ANN sorting is achieved by providing a "$vector" or a "$vectorize" key in sort.
max_time_ms
a timeout, in milliseconds, for the underlying HTTP request. If not passed, the collection-level setting is used instead.

Returns

a dictionary expressing the required document, otherwise None.

Example

>>> async def demo_find_one(acol: AsyncCollection) -> None:
....    print("Count:", await acol.count_documents({}, upper_bound=100))
...     result0 = await acol.find_one({})
...     print("result0", result0)
...     result1 = await acol.find_one({"seq": 10})
...     print("result1", result1)
...     result2 = await acol.find_one({"seq": 1011})
...     print("result2", result2)
...     result3 = await acol.find_one({}, projection={"seq": False})
...     print("result3", result3)
...     result4 = await acol.find_one(
...         {},
...         sort={"seq": astrapy.constants.SortDocuments.DESCENDING},
...     )
...     print("result4", result4)
...
>>>
>>> asyncio.run(demo_find_one(my_async_coll))
Count: 50
result0 {'_id': '479c7ce8-...', 'seq': 48}
result1 {'_id': '93e992c4-...', 'seq': 10}
result2 None
result3 {'_id': '479c7ce8-...'}
result4 {'_id': 'd656cd9d-...', 'seq': 49}
>>> asyncio.run(my_async_coll.find_one(
...     {},
...     sort={"$vector": [1, 0]},
...     projection={"*": True},
... ))
{'_id': '...', 'tag': 'D', '$vector': [4.0, 1.0]}

Note

See the find method for more details on the accepted parameters (whereas skip and limit are not valid parameters for find_one).

Expand source code
async def find_one(
    self,
    filter: FilterType | None = None,
    *,
    projection: ProjectionType | None = None,
    vector: VectorType | None = None,
    vectorize: str | None = None,
    include_similarity: bool | None = None,
    sort: SortType | None = None,
    max_time_ms: int | None = None,
) -> DocumentType | None:
    """
    Run a search, returning the first document in the collection that matches
    provided filters, if any is found.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        projection: it controls which parts of the document are returned.
            It can be an allow-list: `{"f1": True, "f2": True}`,
            or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
            (except for the `_id` and other special fields, which can be
            associated to both True or False independently of the rest
            of the specification).
            The special star-projections `{"*": True}` and `{"*": False}`
            have the effect of returning the whole document and `{}` respectively.
            For lists in documents, slice directives can be passed to select
            portions of the list: for instance, `{"array": {"$slice": 2}}`,
            `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
            `{"array": {"$slice": [-4, 2]}}`.
            An iterable over strings will be treated implicitly as an allow-list.
            The default projection (used if this parameter is not passed) does not
            necessarily include "special" fields such as `$vector` or `$vectorize`.
            See the Data API documentation for more on projections.
        vector: a suitable vector, i.e. a list of float numbers of the appropriate
            dimensionality, to perform vector search (i.e. ANN,
            or "approximate nearest-neighbours" search), extracting the most
            similar document in the collection matching the filter.
            *DEPRECATED* (removal in 2.0). Use a `$vector` key in the
            sort clause dict instead.
        vectorize: a string to be made into a vector to perform vector search.
            Using vectorize assumes a suitable service is configured for the collection.
            *DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the
            sort clause dict instead.
        include_similarity: a boolean to request the numeric value of the
            similarity to be returned as an added "$similarity" key in the
            returned document. Can only be used for vector ANN search, i.e.
            when either `vector` is supplied or the `sort` parameter has the
            shape {"$vector": ...}.
        sort: with this dictionary parameter one can control the order
            the documents are returned. See the Note about sorting for details.
            Vector-based ANN sorting is achieved by providing a "$vector"
            or a "$vectorize" key in `sort`.
        max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
            If not passed, the collection-level setting is used instead.

    Returns:
        a dictionary expressing the required document, otherwise None.

    Example:
        >>> async def demo_find_one(acol: AsyncCollection) -> None:
        ....    print("Count:", await acol.count_documents({}, upper_bound=100))
        ...     result0 = await acol.find_one({})
        ...     print("result0", result0)
        ...     result1 = await acol.find_one({"seq": 10})
        ...     print("result1", result1)
        ...     result2 = await acol.find_one({"seq": 1011})
        ...     print("result2", result2)
        ...     result3 = await acol.find_one({}, projection={"seq": False})
        ...     print("result3", result3)
        ...     result4 = await acol.find_one(
        ...         {},
        ...         sort={"seq": astrapy.constants.SortDocuments.DESCENDING},
        ...     )
        ...     print("result4", result4)
        ...
        >>>
        >>> asyncio.run(demo_find_one(my_async_coll))
        Count: 50
        result0 {'_id': '479c7ce8-...', 'seq': 48}
        result1 {'_id': '93e992c4-...', 'seq': 10}
        result2 None
        result3 {'_id': '479c7ce8-...'}
        result4 {'_id': 'd656cd9d-...', 'seq': 49}

        >>> asyncio.run(my_async_coll.find_one(
        ...     {},
        ...     sort={"$vector": [1, 0]},
        ...     projection={"*": True},
        ... ))
        {'_id': '...', 'tag': 'D', '$vector': [4.0, 1.0]}

    Note:
        See the `find` method for more details on the accepted parameters
        (whereas `skip` and `limit` are not valid parameters for `find_one`).
    """

    check_deprecated_vector_ize(
        vector=vector,
        vectors=None,
        vectorize=vectorize,
        kind="find",
    )
    _max_time_ms = max_time_ms or self.api_options.max_time_ms
    fo_cursor = self.find(
        filter=filter,
        projection=projection,
        skip=None,
        limit=1,
        vector=vector,
        vectorize=vectorize,
        include_similarity=include_similarity,
        sort=sort,
        max_time_ms=_max_time_ms,
    )
    try:
        document = await fo_cursor.__anext__()
        return document
    except StopAsyncIteration:
        return None
async def find_one_and_delete(self, filter: FilterType, *, projection: ProjectionType | None = None, vector: VectorType | None = None, vectorize: str | None = None, sort: SortType | None = None, max_time_ms: int | None = None) ‑> Optional[Dict[str, Any]]

Find a document in the collection and delete it. The deleted document, however, is the return value of the method.

Args

filter
a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
projection
it controls which parts of the document are returned. It can be an allow-list: {"f1": True, "f2": True}, or a deny-list: {"fx": False, "fy": False}, but not a mixture (except for the _id and other special fields, which can be associated to both True or False independently of the rest of the specification). The special star-projections {"*": True} and {"*": False} have the effect of returning the whole document and {} respectively. For lists in documents, slice directives can be passed to select portions of the list: for instance, {"array": {"$slice": 2}}, {"array": {"$slice": -2}}, {"array": {"$slice": [4, 2]}} or {"array": {"$slice": [-4, 2]}}. An iterable over strings will be treated implicitly as an allow-list. The default projection (used if this parameter is not passed) does not necessarily include "special" fields such as $vector or $vectorize. See the Data API documentation for more on projections.
vector
a suitable vector, i.e. a list of float numbers of the appropriate dimensionality, to use vector search (i.e. ANN, or "approximate nearest-neighbours" search), as the sorting criterion. In this way, the matched document (if any) will be the one that is most similar to the provided vector. DEPRECATED (removal in 2.0). Use a $vector key in the sort clause dict instead.
vectorize
a string to be made into a vector to perform vector search. Using vectorize assumes a suitable service is configured for the collection. DEPRECATED (removal in 2.0). Use a $vectorize key in the sort clause dict instead.
sort
with this dictionary parameter one can control the sorting order of the documents matching the filter, effectively determining what document will come first and hence be the replaced one. See the find method for more on sorting. Vector-based ANN sorting is achieved by providing a "$vector" or a "$vectorize" key in sort.
max_time_ms
a timeout, in milliseconds, for the underlying HTTP request. If not passed, the collection-level setting is used instead.

Returns

Either the document (or a projection thereof, as requested), or None if no matches were found in the first place.

Example

>>> async def do_find_one_and_delete(acol: AsyncCollection) -> None:
...     await acol.insert_many(
...         [
...             {"species": "swan", "class": "Aves"},
...             {"species": "frog", "class": "Amphibia"},
...         ],
...     )
...     delete_result0 = await acol.find_one_and_delete(
...         {"species": {"$ne": "frog"}},
...         projection=["species"],
...     )
...     print("delete_result0", delete_result0)
...     delete_result1 = await acol.find_one_and_delete(
...         {"species": {"$ne": "frog"}},
...     )
...     print("delete_result1", delete_result1)
...
>>> asyncio.run(do_find_one_and_delete(my_async_coll))
delete_result0 {'_id': 'f335cd0f-...', 'species': 'swan'}
delete_result1 None
Expand source code
async def find_one_and_delete(
    self,
    filter: FilterType,
    *,
    projection: ProjectionType | None = None,
    vector: VectorType | None = None,
    vectorize: str | None = None,
    sort: SortType | None = None,
    max_time_ms: int | None = None,
) -> DocumentType | None:
    """
    Find a document in the collection and delete it. The deleted document,
    however, is the return value of the method.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        projection: it controls which parts of the document are returned.
            It can be an allow-list: `{"f1": True, "f2": True}`,
            or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
            (except for the `_id` and other special fields, which can be
            associated to both True or False independently of the rest
            of the specification).
            The special star-projections `{"*": True}` and `{"*": False}`
            have the effect of returning the whole document and `{}` respectively.
            For lists in documents, slice directives can be passed to select
            portions of the list: for instance, `{"array": {"$slice": 2}}`,
            `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
            `{"array": {"$slice": [-4, 2]}}`.
            An iterable over strings will be treated implicitly as an allow-list.
            The default projection (used if this parameter is not passed) does not
            necessarily include "special" fields such as `$vector` or `$vectorize`.
            See the Data API documentation for more on projections.
        vector: a suitable vector, i.e. a list of float numbers of the appropriate
            dimensionality, to use vector search (i.e. ANN,
            or "approximate nearest-neighbours" search), as the sorting criterion.
            In this way, the matched document (if any) will be the one
            that is most similar to the provided vector.
            *DEPRECATED* (removal in 2.0). Use a `$vector` key in the
            sort clause dict instead.
        vectorize: a string to be made into a vector to perform vector search.
            Using vectorize assumes a suitable service is configured for the collection.
            *DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the
            sort clause dict instead.
        sort: with this dictionary parameter one can control the sorting
            order of the documents matching the filter, effectively
            determining what document will come first and hence be the
            replaced one. See the `find` method for more on sorting.
            Vector-based ANN sorting is achieved by providing a "$vector"
            or a "$vectorize" key in `sort`.
        max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
            If not passed, the collection-level setting is used instead.

    Returns:
        Either the document (or a projection thereof, as requested), or None
        if no matches were found in the first place.

    Example:
        >>> async def do_find_one_and_delete(acol: AsyncCollection) -> None:
        ...     await acol.insert_many(
        ...         [
        ...             {"species": "swan", "class": "Aves"},
        ...             {"species": "frog", "class": "Amphibia"},
        ...         ],
        ...     )
        ...     delete_result0 = await acol.find_one_and_delete(
        ...         {"species": {"$ne": "frog"}},
        ...         projection=["species"],
        ...     )
        ...     print("delete_result0", delete_result0)
        ...     delete_result1 = await acol.find_one_and_delete(
        ...         {"species": {"$ne": "frog"}},
        ...     )
        ...     print("delete_result1", delete_result1)
        ...
        >>> asyncio.run(do_find_one_and_delete(my_async_coll))
        delete_result0 {'_id': 'f335cd0f-...', 'species': 'swan'}
        delete_result1 None
    """

    check_deprecated_vector_ize(
        vector=vector,
        vectors=None,
        vectorize=vectorize,
        kind="find",
    )
    _sort = _collate_vector_to_sort(sort, vector, vectorize)
    _projection = normalize_optional_projection(projection)
    _max_time_ms = max_time_ms or self.api_options.max_time_ms
    fo_payload = {
        "findOneAndDelete": {
            k: v
            for k, v in {
                "filter": filter,
                "sort": _sort,
                "projection": _projection,
            }.items()
            if v is not None
        }
    }
    logger.info(f"findOneAndDelete on '{self.name}'")
    fo_response = await self._api_commander.async_request(
        payload=fo_payload,
        timeout_info=base_timeout_info(_max_time_ms),
    )
    logger.info(f"finished findOneAndDelete on '{self.name}'")
    if "document" in fo_response.get("data", {}):
        document = fo_response["data"]["document"]
        return document  # type: ignore[no-any-return]
    else:
        deleted_count = fo_response.get("status", {}).get("deletedCount")
        if deleted_count == 0:
            return None
        else:
            raise DataAPIFaultyResponseException(
                text="Faulty response from find_one_and_delete API command.",
                raw_response=fo_response,
            )
async def find_one_and_replace(self, filter: FilterType, replacement: DocumentType, *, projection: ProjectionType | None = None, vector: VectorType | None = None, vectorize: str | None = None, sort: SortType | None = None, upsert: bool = False, return_document: str = 'before', max_time_ms: int | None = None) ‑> Optional[Dict[str, Any]]

Find a document on the collection and replace it entirely with a new one, optionally inserting a new one if no match is found.

Args

filter
a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
replacement
the new document to write into the collection.
projection
it controls which parts of the document are returned. It can be an allow-list: {"f1": True, "f2": True}, or a deny-list: {"fx": False, "fy": False}, but not a mixture (except for the _id and other special fields, which can be associated to both True or False independently of the rest of the specification). The special star-projections {"*": True} and {"*": False} have the effect of returning the whole document and {} respectively. For lists in documents, slice directives can be passed to select portions of the list: for instance, {"array": {"$slice": 2}}, {"array": {"$slice": -2}}, {"array": {"$slice": [4, 2]}} or {"array": {"$slice": [-4, 2]}}. An iterable over strings will be treated implicitly as an allow-list. The default projection (used if this parameter is not passed) does not necessarily include "special" fields such as $vector or $vectorize. See the Data API documentation for more on projections.
vector
a suitable vector, i.e. a list of float numbers of the appropriate dimensionality, to use vector search (i.e. ANN, or "approximate nearest-neighbours" search), as the sorting criterion. In this way, the matched document (if any) will be the one that is most similar to the provided vector. DEPRECATED (removal in 2.0). Use a $vector key in the sort clause dict instead.
vectorize
a string to be made into a vector to perform vector search. Using vectorize assumes a suitable service is configured for the collection. DEPRECATED (removal in 2.0). Use a $vectorize key in the sort clause dict instead.
sort
with this dictionary parameter one can control the sorting order of the documents matching the filter, effectively determining what document will come first and hence be the replaced one. See the find method for more on sorting. Vector-based ANN sorting is achieved by providing a "$vector" or a "$vectorize" key in sort.
upsert
this parameter controls the behavior in absence of matches. If True, replacement is inserted as a new document if no matches are found on the collection. If False, the operation silently does nothing in case of no matches.
return_document
a flag controlling what document is returned: if set to ReturnDocument.BEFORE, or the string "before", the document found on database is returned; if set to ReturnDocument.AFTER, or the string "after", the new document is returned. The default is "before".
max_time_ms
a timeout, in milliseconds, for the underlying HTTP request. If not passed, the collection-level setting is used instead.

Returns

A document, either the one before the replace operation or the one after that. Alternatively, the method returns None to represent that no matching document was found, or that no replacement was inserted (depending on the return_document parameter).

Example

>>> async def do_find_one_and_replace(acol: AsyncCollection) -> None:
...             await acol.insert_one({"_id": "rule1", "text": "all animals are equal"})
...             result0 = await acol.find_one_and_replace(
...                 {"_id": "rule1"},
...                 {"text": "some animals are more equal!"},
...             )
...             print("result0", result0)
...             result1 = await acol.find_one_and_replace(
...                 {"text": "some animals are more equal!"},
...                 {"text": "and the pigs are the rulers"},
...                 return_document=astrapy.constants.ReturnDocument.AFTER,
...             )
...             print("result1", result1)
...             result2 = await acol.find_one_and_replace(
...                 {"_id": "rule2"},
...                 {"text": "F=ma^2"},
...                 return_document=astrapy.constants.ReturnDocument.AFTER,
...             )
...             print("result2", result2)
...             result3 = await acol.find_one_and_replace(
...                 {"_id": "rule2"},
...                 {"text": "F=ma"},
...                 upsert=True,
...                 return_document=astrapy.constants.ReturnDocument.AFTER,
...                 projection={"_id": False},
...             )
...             print("result3", result3)
...
>>> asyncio.run(do_find_one_and_replace(my_async_coll))
result0 {'_id': 'rule1', 'text': 'all animals are equal'}
result1 {'_id': 'rule1', 'text': 'and the pigs are the rulers'}
result2 None
result3 {'text': 'F=ma'}
Expand source code
async def find_one_and_replace(
    self,
    filter: FilterType,
    replacement: DocumentType,
    *,
    projection: ProjectionType | None = None,
    vector: VectorType | None = None,
    vectorize: str | None = None,
    sort: SortType | None = None,
    upsert: bool = False,
    return_document: str = ReturnDocument.BEFORE,
    max_time_ms: int | None = None,
) -> DocumentType | None:
    """
    Find a document on the collection and replace it entirely with a new one,
    optionally inserting a new one if no match is found.

    Args:

        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        replacement: the new document to write into the collection.
        projection: it controls which parts of the document are returned.
            It can be an allow-list: `{"f1": True, "f2": True}`,
            or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
            (except for the `_id` and other special fields, which can be
            associated to both True or False independently of the rest
            of the specification).
            The special star-projections `{"*": True}` and `{"*": False}`
            have the effect of returning the whole document and `{}` respectively.
            For lists in documents, slice directives can be passed to select
            portions of the list: for instance, `{"array": {"$slice": 2}}`,
            `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
            `{"array": {"$slice": [-4, 2]}}`.
            An iterable over strings will be treated implicitly as an allow-list.
            The default projection (used if this parameter is not passed) does not
            necessarily include "special" fields such as `$vector` or `$vectorize`.
            See the Data API documentation for more on projections.
        vector: a suitable vector, i.e. a list of float numbers of the appropriate
            dimensionality, to use vector search (i.e. ANN,
            or "approximate nearest-neighbours" search), as the sorting criterion.
            In this way, the matched document (if any) will be the one
            that is most similar to the provided vector.
            *DEPRECATED* (removal in 2.0). Use a `$vector` key in the
            sort clause dict instead.
        vectorize: a string to be made into a vector to perform vector search.
            Using vectorize assumes a suitable service is configured for the collection.
            *DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the
            sort clause dict instead.
        sort: with this dictionary parameter one can control the sorting
            order of the documents matching the filter, effectively
            determining what document will come first and hence be the
            replaced one. See the `find` method for more on sorting.
            Vector-based ANN sorting is achieved by providing a "$vector"
            or a "$vectorize" key in `sort`.
        upsert: this parameter controls the behavior in absence of matches.
            If True, `replacement` is inserted as a new document
            if no matches are found on the collection. If False,
            the operation silently does nothing in case of no matches.
        return_document: a flag controlling what document is returned:
            if set to `ReturnDocument.BEFORE`, or the string "before",
            the document found on database is returned; if set to
            `ReturnDocument.AFTER`, or the string "after", the new
            document is returned. The default is "before".
        max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
            If not passed, the collection-level setting is used instead.

    Returns:
        A document, either the one before the replace operation or the
        one after that. Alternatively, the method returns None to represent
        that no matching document was found, or that no replacement
        was inserted (depending on the `return_document` parameter).

    Example:
        >>> async def do_find_one_and_replace(acol: AsyncCollection) -> None:
        ...             await acol.insert_one({"_id": "rule1", "text": "all animals are equal"})
        ...             result0 = await acol.find_one_and_replace(
        ...                 {"_id": "rule1"},
        ...                 {"text": "some animals are more equal!"},
        ...             )
        ...             print("result0", result0)
        ...             result1 = await acol.find_one_and_replace(
        ...                 {"text": "some animals are more equal!"},
        ...                 {"text": "and the pigs are the rulers"},
        ...                 return_document=astrapy.constants.ReturnDocument.AFTER,
        ...             )
        ...             print("result1", result1)
        ...             result2 = await acol.find_one_and_replace(
        ...                 {"_id": "rule2"},
        ...                 {"text": "F=ma^2"},
        ...                 return_document=astrapy.constants.ReturnDocument.AFTER,
        ...             )
        ...             print("result2", result2)
        ...             result3 = await acol.find_one_and_replace(
        ...                 {"_id": "rule2"},
        ...                 {"text": "F=ma"},
        ...                 upsert=True,
        ...                 return_document=astrapy.constants.ReturnDocument.AFTER,
        ...                 projection={"_id": False},
        ...             )
        ...             print("result3", result3)
        ...
        >>> asyncio.run(do_find_one_and_replace(my_async_coll))
        result0 {'_id': 'rule1', 'text': 'all animals are equal'}
        result1 {'_id': 'rule1', 'text': 'and the pigs are the rulers'}
        result2 None
        result3 {'text': 'F=ma'}
    """

    check_deprecated_vector_ize(
        vector=vector,
        vectors=None,
        vectorize=vectorize,
        kind="find",
    )
    _sort = _collate_vector_to_sort(sort, vector, vectorize)
    options = {
        "returnDocument": return_document,
        "upsert": upsert,
    }
    _max_time_ms = max_time_ms or self.api_options.max_time_ms
    fo_payload = {
        "findOneAndReplace": {
            k: v
            for k, v in {
                "filter": filter,
                "projection": normalize_optional_projection(projection),
                "replacement": replacement,
                "options": options,
                "sort": _sort,
            }.items()
            if v is not None
        }
    }
    logger.info(f"findOneAndReplace on '{self.name}'")
    fo_response = await self._api_commander.async_request(
        payload=fo_payload,
        timeout_info=base_timeout_info(_max_time_ms),
    )
    logger.info(f"finished findOneAndReplace on '{self.name}'")
    if "document" in fo_response.get("data", {}):
        ret_document = fo_response.get("data", {}).get("document")
        if ret_document is None:
            return None
        else:
            return ret_document  # type: ignore[no-any-return]
    else:
        raise DataAPIFaultyResponseException(
            text="Faulty response from find_one_and_replace API command.",
            raw_response=fo_response,
        )
async def find_one_and_update(self, filter: FilterType, update: dict[str, Any], *, projection: ProjectionType | None = None, vector: VectorType | None = None, vectorize: str | None = None, sort: SortType | None = None, upsert: bool = False, return_document: str = 'before', max_time_ms: int | None = None) ‑> Optional[Dict[str, Any]]

Find a document on the collection and update it as requested, optionally inserting a new one if no match is found.

Args

filter
a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
update
the update prescription to apply to the document, expressed as a dictionary as per Data API syntax. Examples are: {"$set": {"field": "value}} {"$inc": {"counter": 10}} {"$unset": {"field": ""}} See the Data API documentation for the full syntax.
projection
it controls which parts of the document are returned. It can be an allow-list: {"f1": True, "f2": True}, or a deny-list: {"fx": False, "fy": False}, but not a mixture (except for the _id and other special fields, which can be associated to both True or False independently of the rest of the specification). The special star-projections {"*": True} and {"*": False} have the effect of returning the whole document and {} respectively. For lists in documents, slice directives can be passed to select portions of the list: for instance, {"array": {"$slice": 2}}, {"array": {"$slice": -2}}, {"array": {"$slice": [4, 2]}} or {"array": {"$slice": [-4, 2]}}. An iterable over strings will be treated implicitly as an allow-list. The default projection (used if this parameter is not passed) does not necessarily include "special" fields such as $vector or $vectorize. See the Data API documentation for more on projections.
vector
a suitable vector, i.e. a list of float numbers of the appropriate dimensionality, to use vector search (i.e. ANN, or "approximate nearest-neighbours" search), as the sorting criterion. In this way, the matched document (if any) will be the one that is most similar to the provided vector. DEPRECATED (removal in 2.0). Use a $vector key in the sort clause dict instead.
vectorize
a string to be made into a vector to perform vector search. Using vectorize assumes a suitable service is configured for the collection. DEPRECATED (removal in 2.0). Use a $vectorize key in the sort clause dict instead.
sort
with this dictionary parameter one can control the sorting order of the documents matching the filter, effectively determining what document will come first and hence be the replaced one. See the find method for more on sorting. Vector-based ANN sorting is achieved by providing a "$vector" or a "$vectorize" key in sort.
upsert
this parameter controls the behavior in absence of matches. If True, a new document (resulting from applying the update to an empty document) is inserted if no matches are found on the collection. If False, the operation silently does nothing in case of no matches.
return_document
a flag controlling what document is returned: if set to ReturnDocument.BEFORE, or the string "before", the document found on database is returned; if set to ReturnDocument.AFTER, or the string "after", the new document is returned. The default is "before".
max_time_ms
a timeout, in milliseconds, for the underlying HTTP request. If not passed, the collection-level setting is used instead.

Returns

A document (or a projection thereof, as required), either the one before the replace operation or the one after that. Alternatively, the method returns None to represent that no matching document was found, or that no update was applied (depending on the return_document parameter).

Example

>>> async def do_find_one_and_update(acol: AsyncCollection) -> None:
...     await acol.insert_one({"Marco": "Polo"})
...     result0 = await acol.find_one_and_update(
...         {"Marco": {"$exists": True}},
...         {"$set": {"title": "Mr."}},
...     )
...     print("result0", result0)
...     result1 = await acol.find_one_and_update(
...         {"title": "Mr."},
...         {"$inc": {"rank": 3}},
...         projection=["title", "rank"],
...         return_document=astrapy.constants.ReturnDocument.AFTER,
...     )
...     print("result1", result1)
...     result2 = await acol.find_one_and_update(
...         {"name": "Johnny"},
...         {"$set": {"rank": 0}},
...         return_document=astrapy.constants.ReturnDocument.AFTER,
...     )
...     print("result2", result2)
...     result3 = await acol.find_one_and_update(
...         {"name": "Johnny"},
...         {"$set": {"rank": 0}},
...         upsert=True,
...         return_document=astrapy.constants.ReturnDocument.AFTER,
...     )
...     print("result3", result3)
...
>>> asyncio.run(do_find_one_and_update(my_async_coll))
result0 {'_id': 'f7c936d3-b0a0-45eb-a676-e2829662a57c', 'Marco': 'Polo'}
result1 {'_id': 'f7c936d3-b0a0-45eb-a676-e2829662a57c', 'title': 'Mr.', 'rank': 3}
result2 None
result3 {'_id': 'db3d678d-14d4-4caa-82d2-d5fb77dab7ec', 'name': 'Johnny', 'rank': 0}
Expand source code
async def find_one_and_update(
    self,
    filter: FilterType,
    update: dict[str, Any],
    *,
    projection: ProjectionType | None = None,
    vector: VectorType | None = None,
    vectorize: str | None = None,
    sort: SortType | None = None,
    upsert: bool = False,
    return_document: str = ReturnDocument.BEFORE,
    max_time_ms: int | None = None,
) -> DocumentType | None:
    """
    Find a document on the collection and update it as requested,
    optionally inserting a new one if no match is found.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        update: the update prescription to apply to the document, expressed
            as a dictionary as per Data API syntax. Examples are:
                {"$set": {"field": "value}}
                {"$inc": {"counter": 10}}
                {"$unset": {"field": ""}}
            See the Data API documentation for the full syntax.
        projection: it controls which parts of the document are returned.
            It can be an allow-list: `{"f1": True, "f2": True}`,
            or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
            (except for the `_id` and other special fields, which can be
            associated to both True or False independently of the rest
            of the specification).
            The special star-projections `{"*": True}` and `{"*": False}`
            have the effect of returning the whole document and `{}` respectively.
            For lists in documents, slice directives can be passed to select
            portions of the list: for instance, `{"array": {"$slice": 2}}`,
            `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
            `{"array": {"$slice": [-4, 2]}}`.
            An iterable over strings will be treated implicitly as an allow-list.
            The default projection (used if this parameter is not passed) does not
            necessarily include "special" fields such as `$vector` or `$vectorize`.
            See the Data API documentation for more on projections.
        vector: a suitable vector, i.e. a list of float numbers of the appropriate
            dimensionality, to use vector search (i.e. ANN,
            or "approximate nearest-neighbours" search), as the sorting criterion.
            In this way, the matched document (if any) will be the one
            that is most similar to the provided vector.
            *DEPRECATED* (removal in 2.0). Use a `$vector` key in the
            sort clause dict instead.
        vectorize: a string to be made into a vector to perform vector search.
            Using vectorize assumes a suitable service is configured for the collection.
            *DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the
            sort clause dict instead.
        sort: with this dictionary parameter one can control the sorting
            order of the documents matching the filter, effectively
            determining what document will come first and hence be the
            replaced one. See the `find` method for more on sorting.
            Vector-based ANN sorting is achieved by providing a "$vector"
            or a "$vectorize" key in `sort`.
        upsert: this parameter controls the behavior in absence of matches.
            If True, a new document (resulting from applying the `update`
            to an empty document) is inserted if no matches are found on
            the collection. If False, the operation silently does nothing
            in case of no matches.
        return_document: a flag controlling what document is returned:
            if set to `ReturnDocument.BEFORE`, or the string "before",
            the document found on database is returned; if set to
            `ReturnDocument.AFTER`, or the string "after", the new
            document is returned. The default is "before".
        max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
            If not passed, the collection-level setting is used instead.

    Returns:
        A document (or a projection thereof, as required), either the one
        before the replace operation or the one after that.
        Alternatively, the method returns None to represent
        that no matching document was found, or that no update
        was applied (depending on the `return_document` parameter).

    Example:
        >>> async def do_find_one_and_update(acol: AsyncCollection) -> None:
        ...     await acol.insert_one({"Marco": "Polo"})
        ...     result0 = await acol.find_one_and_update(
        ...         {"Marco": {"$exists": True}},
        ...         {"$set": {"title": "Mr."}},
        ...     )
        ...     print("result0", result0)
        ...     result1 = await acol.find_one_and_update(
        ...         {"title": "Mr."},
        ...         {"$inc": {"rank": 3}},
        ...         projection=["title", "rank"],
        ...         return_document=astrapy.constants.ReturnDocument.AFTER,
        ...     )
        ...     print("result1", result1)
        ...     result2 = await acol.find_one_and_update(
        ...         {"name": "Johnny"},
        ...         {"$set": {"rank": 0}},
        ...         return_document=astrapy.constants.ReturnDocument.AFTER,
        ...     )
        ...     print("result2", result2)
        ...     result3 = await acol.find_one_and_update(
        ...         {"name": "Johnny"},
        ...         {"$set": {"rank": 0}},
        ...         upsert=True,
        ...         return_document=astrapy.constants.ReturnDocument.AFTER,
        ...     )
        ...     print("result3", result3)
        ...
        >>> asyncio.run(do_find_one_and_update(my_async_coll))
        result0 {'_id': 'f7c936d3-b0a0-45eb-a676-e2829662a57c', 'Marco': 'Polo'}
        result1 {'_id': 'f7c936d3-b0a0-45eb-a676-e2829662a57c', 'title': 'Mr.', 'rank': 3}
        result2 None
        result3 {'_id': 'db3d678d-14d4-4caa-82d2-d5fb77dab7ec', 'name': 'Johnny', 'rank': 0}
    """

    check_deprecated_vector_ize(
        vector=vector,
        vectors=None,
        vectorize=vectorize,
        kind="find",
    )
    _sort = _collate_vector_to_sort(sort, vector, vectorize)
    options = {
        "returnDocument": return_document,
        "upsert": upsert,
    }
    _max_time_ms = max_time_ms or self.api_options.max_time_ms
    fo_payload = {
        "findOneAndUpdate": {
            k: v
            for k, v in {
                "filter": filter,
                "update": update,
                "options": options,
                "sort": _sort,
                "projection": normalize_optional_projection(projection),
            }.items()
            if v is not None
        }
    }
    logger.info(f"findOneAndUpdate on '{self.name}'")
    fo_response = await self._api_commander.async_request(
        payload=fo_payload,
        timeout_info=base_timeout_info(_max_time_ms),
    )
    logger.info(f"finished findOneAndUpdate on '{self.name}'")
    if "document" in fo_response.get("data", {}):
        ret_document = fo_response.get("data", {}).get("document")
        if ret_document is None:
            return None
        else:
            return ret_document  # type: ignore[no-any-return]
    else:
        raise DataAPIFaultyResponseException(
            text="Faulty response from find_one_and_update API command.",
            raw_response=fo_response,
        )
def info(self) ‑> CollectionInfo

Information on the collection (name, location, database), in the form of a CollectionInfo object.

Not to be confused with the collection options method (related to the collection internal configuration).

Example

>>> my_async_coll.info().database_info.region
'us-east1'
>>> my_async_coll.info().full_name
'default_keyspace.my_v_collection'

Note

the returned CollectionInfo wraps, among other things, the database information: as such, calling this method triggers the same-named method of a Database object (which, in turn, performs a HTTP request to the DevOps API). See the documentation for Database.info() for more details.

Expand source code
def info(self) -> CollectionInfo:
    """
    Information on the collection (name, location, database), in the
    form of a CollectionInfo object.

    Not to be confused with the collection `options` method (related
    to the collection internal configuration).

    Example:
        >>> my_async_coll.info().database_info.region
        'us-east1'
        >>> my_async_coll.info().full_name
        'default_keyspace.my_v_collection'

    Note:
        the returned CollectionInfo wraps, among other things,
        the database information: as such, calling this method
        triggers the same-named method of a Database object (which, in turn,
        performs a HTTP request to the DevOps API).
        See the documentation for `Database.info()` for more details.
    """

    return CollectionInfo(
        database_info=self.database.info(),
        keyspace=self.keyspace,
        namespace=self.keyspace,
        name=self.name,
        full_name=self.full_name,
    )
async def insert_many(self, documents: Iterable[DocumentType], *, vectors: Iterable[VectorType | None] | None = None, vectorize: Iterable[str | None] | None = None, ordered: bool = False, chunk_size: int | None = None, concurrency: int | None = None, max_time_ms: int | None = None) ‑> InsertManyResult

Insert a list of documents into the collection. This is not an atomic operation.

Args

documents
an iterable of dictionaries, each a document to insert. Documents may specify their _id field or leave it out, in which case it will be added automatically.
vectors
an optional list of vectors (as many vectors as the provided documents) to associate to the documents when inserting. Passing vectors this way is indeed equivalent to the "$vector" field of the documents, however the two are mutually exclusive. DEPRECATED (removal in 2.0). Use a $vector key in the documents instead.
vectorize
an optional list of strings to be made into as many vectors (one per document), if such a service is configured for the collection. Passing this parameter is equivalent to providing a $vectorize field in the documents themselves, however the two are mutually exclusive. DEPRECATED (removal in 2.0). Use a $vectorize key in the documents instead.
ordered
if False (default), the insertions can occur in arbitrary order and possibly concurrently. If True, they are processed sequentially. If there are no specific reasons against it, unordered insertions are to be preferred as they complete much faster.
chunk_size
how many documents to include in a single API request. Exceeding the server maximum allowed value results in an error. Leave it unspecified (recommended) to use the system default.
concurrency
maximum number of concurrent requests to the API at a given time. It cannot be more than one for ordered insertions.
max_time_ms
a timeout, in milliseconds, for the operation. If not passed, the collection-level setting is used instead: If many documents are being inserted, this method corresponds to several HTTP requests: in such cases one may want to specify a more tolerant timeout here.

Returns

an InsertManyResult object.

Examples

>>> async def write_and_count(acol: AsyncCollection) -> None:
...             count0 = await acol.count_documents({}, upper_bound=10)
...             print("count0", count0)
...             im_result1 = await acol.insert_many(
...                 [
...                     {"a": 10},
...                     {"a": 5},
...                     {"b": [True, False, False]},
...                 ],
...                 ordered=True,
...             )
...             print("inserted1", im_result1.inserted_ids)
...             count1 = await acol.count_documents({}, upper_bound=100)
...             print("count1", count1)
...             await acol.insert_many(
...                 [{"seq": i} for i in range(50)],
...                 concurrency=5,
...             )
...             count2 = await acol.count_documents({}, upper_bound=100)
...             print("count2", count2)
...
>>> asyncio.run(write_and_count(my_async_coll))
count0 0
inserted1 ['e3c2a684-...', '1de4949f-...', '167dacc3-...']
count1 3
count2 53
>>> asyncio.run(my_async_coll.insert_many(
...     [
...         {"tag": "a", "$vector": [1, 2]},
...         {"tag": "b", "$vector": [3, 4]},
...     ]
... ))
InsertManyResult(...)

Note

Unordered insertions are executed with some degree of concurrency, so it is usually better to prefer this mode unless the order in the document sequence is important.

Note

A failure mode for this command is related to certain faulty documents found among those to insert: a document may have the an _id already present on the collection, or its vector dimension may not match the collection setting.

For an ordered insertion, the method will raise an exception at the first such faulty document – nevertheless, all documents processed until then will end up being written to the database.

For unordered insertions, if the error stems from faulty documents the insertion proceeds until exhausting the input documents: then, an exception is raised – and all insertable documents will have been written to the database, including those "after" the troublesome ones.

If, on the other hand, there are errors not related to individual documents (such as a network connectivity error), the whole insert_many operation will stop in mid-way, an exception will be raised, and only a certain amount of the input documents will have made their way to the database.

Expand source code
async def insert_many(
    self,
    documents: Iterable[DocumentType],
    *,
    vectors: Iterable[VectorType | None] | None = None,
    vectorize: Iterable[str | None] | None = None,
    ordered: bool = False,
    chunk_size: int | None = None,
    concurrency: int | None = None,
    max_time_ms: int | None = None,
) -> InsertManyResult:
    """
    Insert a list of documents into the collection.
    This is not an atomic operation.

    Args:
        documents: an iterable of dictionaries, each a document to insert.
            Documents may specify their `_id` field or leave it out, in which
            case it will be added automatically.
        vectors: an optional list of vectors (as many vectors as the provided
            documents) to associate to the documents when inserting.
            Passing vectors this way is indeed equivalent to the "$vector" field
            of the documents, however the two are mutually exclusive.
            *DEPRECATED* (removal in 2.0). Use a `$vector` key in the documents instead.
        vectorize: an optional list of strings to be made into as many vectors
            (one per document), if such a service is configured for the collection.
            Passing this parameter is equivalent to providing a `$vectorize`
            field in the documents themselves, however the two are mutually exclusive.
            *DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the documents instead.
        ordered: if False (default), the insertions can occur in arbitrary order
            and possibly concurrently. If True, they are processed sequentially.
            If there are no specific reasons against it, unordered insertions are to
            be preferred as they complete much faster.
        chunk_size: how many documents to include in a single API request.
            Exceeding the server maximum allowed value results in an error.
            Leave it unspecified (recommended) to use the system default.
        concurrency: maximum number of concurrent requests to the API at
            a given time. It cannot be more than one for ordered insertions.
        max_time_ms: a timeout, in milliseconds, for the operation.
            If not passed, the collection-level setting is used instead:
            If many documents are being inserted, this method corresponds
            to several HTTP requests: in such cases one may want to specify
            a more tolerant timeout here.
    Returns:
        an InsertManyResult object.

    Examples:
        >>> async def write_and_count(acol: AsyncCollection) -> None:
        ...             count0 = await acol.count_documents({}, upper_bound=10)
        ...             print("count0", count0)
        ...             im_result1 = await acol.insert_many(
        ...                 [
        ...                     {"a": 10},
        ...                     {"a": 5},
        ...                     {"b": [True, False, False]},
        ...                 ],
        ...                 ordered=True,
        ...             )
        ...             print("inserted1", im_result1.inserted_ids)
        ...             count1 = await acol.count_documents({}, upper_bound=100)
        ...             print("count1", count1)
        ...             await acol.insert_many(
        ...                 [{"seq": i} for i in range(50)],
        ...                 concurrency=5,
        ...             )
        ...             count2 = await acol.count_documents({}, upper_bound=100)
        ...             print("count2", count2)
        ...
        >>> asyncio.run(write_and_count(my_async_coll))
        count0 0
        inserted1 ['e3c2a684-...', '1de4949f-...', '167dacc3-...']
        count1 3
        count2 53
        >>> asyncio.run(my_async_coll.insert_many(
        ...     [
        ...         {"tag": "a", "$vector": [1, 2]},
        ...         {"tag": "b", "$vector": [3, 4]},
        ...     ]
        ... ))
        InsertManyResult(...)

    Note:
        Unordered insertions are executed with some degree of concurrency,
        so it is usually better to prefer this mode unless the order in the
        document sequence is important.

    Note:
        A failure mode for this command is related to certain faulty documents
        found among those to insert: a document may have the an `_id` already
        present on the collection, or its vector dimension may not
        match the collection setting.

        For an ordered insertion, the method will raise an exception at
        the first such faulty document -- nevertheless, all documents processed
        until then will end up being written to the database.

        For unordered insertions, if the error stems from faulty documents
        the insertion proceeds until exhausting the input documents: then,
        an exception is raised -- and all insertable documents will have been
        written to the database, including those "after" the troublesome ones.

        If, on the other hand, there are errors not related to individual
        documents (such as a network connectivity error), the whole
        `insert_many` operation will stop in mid-way, an exception will be raised,
        and only a certain amount of the input documents will
        have made their way to the database.
    """

    check_deprecated_vector_ize(
        vector=None,
        vectors=vectors,
        vectorize=vectorize,
        kind="insert",
    )
    if concurrency is None:
        if ordered:
            _concurrency = 1
        else:
            _concurrency = DEFAULT_INSERT_MANY_CONCURRENCY
    else:
        _concurrency = concurrency
    if _concurrency > 1 and ordered:
        raise ValueError("Cannot run ordered insert_many concurrently.")
    if chunk_size is None:
        _chunk_size = DEFAULT_INSERT_MANY_CHUNK_SIZE
    else:
        _chunk_size = chunk_size
    _documents = _collate_vectors_to_documents(documents, vectors, vectorize)
    _max_time_ms = max_time_ms or self.api_options.max_time_ms
    logger.info(f"inserting {len(_documents)} documents in '{self.name}'")
    raw_results: list[dict[str, Any]] = []
    timeout_manager = MultiCallTimeoutManager(overall_max_time_ms=_max_time_ms)
    if ordered:
        options = {"ordered": True}
        inserted_ids: list[Any] = []
        for i in range(0, len(_documents), _chunk_size):
            im_payload = {
                "insertMany": {
                    "documents": _documents[i : i + _chunk_size],
                    "options": options,
                },
            }
            logger.info(f"insertMany on '{self.name}'")
            chunk_response = await self._api_commander.async_request(
                payload=im_payload,
                raise_api_errors=False,
                timeout_info=timeout_manager.remaining_timeout_info(),
            )
            logger.info(f"finished insertMany on '{self.name}'")
            # accumulate the results in this call
            chunk_inserted_ids = (chunk_response.get("status") or {}).get(
                "insertedIds", []
            )
            inserted_ids += chunk_inserted_ids
            raw_results += [chunk_response]
            # if errors, quit early
            if chunk_response.get("errors", []):
                partial_result = InsertManyResult(
                    raw_results=raw_results,
                    inserted_ids=inserted_ids,
                )
                raise InsertManyException.from_response(
                    command=None,
                    raw_response=chunk_response,
                    partial_result=partial_result,
                )

        # return
        full_result = InsertManyResult(
            raw_results=raw_results,
            inserted_ids=inserted_ids,
        )
        logger.info(
            f"finished inserting {len(_documents)} documents in '{self.name}'"
        )
        return full_result

    else:
        # unordered: concurrent or not, do all of them and parse the results
        options = {"ordered": False}

        sem = asyncio.Semaphore(_concurrency)

        async def concurrent_insert_chunk(
            document_chunk: list[DocumentType],
        ) -> dict[str, Any]:
            async with sem:
                im_payload = {
                    "insertMany": {
                        "documents": document_chunk,
                        "options": options,
                    },
                }
                logger.info(f"insertMany(chunk) on '{self.name}'")
                im_response = await self._api_commander.async_request(
                    payload=im_payload,
                    raise_api_errors=False,
                    timeout_info=timeout_manager.remaining_timeout_info(),
                )
                logger.info(f"finished insertMany(chunk) on '{self.name}'")
                return im_response

        if _concurrency > 1:
            tasks = [
                asyncio.create_task(
                    concurrent_insert_chunk(_documents[i : i + _chunk_size])
                )
                for i in range(0, len(_documents), _chunk_size)
            ]
            raw_results = await asyncio.gather(*tasks)
        else:
            raw_results = [
                await concurrent_insert_chunk(_documents[i : i + _chunk_size])
                for i in range(0, len(_documents), _chunk_size)
            ]

        # recast raw_results
        inserted_ids = [
            inserted_id
            for chunk_response in raw_results
            for inserted_id in (chunk_response.get("status") or {}).get(
                "insertedIds", []
            )
        ]

        # check-raise
        if any(
            [chunk_response.get("errors", []) for chunk_response in raw_results]
        ):
            partial_result = InsertManyResult(
                raw_results=raw_results,
                inserted_ids=inserted_ids,
            )
            raise InsertManyException.from_responses(
                commands=[None for _ in raw_results],
                raw_responses=raw_results,
                partial_result=partial_result,
            )

        # return
        full_result = InsertManyResult(
            raw_results=raw_results,
            inserted_ids=inserted_ids,
        )
        logger.info(
            f"finished inserting {len(_documents)} documents in '{self.name}'"
        )
        return full_result
async def insert_one(self, document: DocumentType, *, vector: VectorType | None = None, vectorize: str | None = None, max_time_ms: int | None = None) ‑> InsertOneResult

Insert a single document in the collection in an atomic operation.

Args

document
the dictionary expressing the document to insert. The _id field of the document can be left out, in which case it will be created automatically.
vector
a vector (a list of numbers appropriate for the collection) for the document. Passing this parameter is equivalent to providing a $vector field within the document itself, however the two are mutually exclusive. DEPRECATED (removal in 2.0). Use a $vector key in the document instead.
vectorize
a string to be made into a vector, if such a service is configured for the collection. Passing this parameter is equivalent to providing a $vectorize field in the document itself, however the two are mutually exclusive. Moreover, this parameter cannot coexist with vector. DEPRECATED (removal in 2.0). Use a $vectorize key in the document instead.
max_time_ms
a timeout, in milliseconds, for the underlying HTTP request. If not passed, the collection-level setting is used instead.

Returns

an InsertOneResult object.

Example

>>> async def write_and_count(acol: AsyncCollection) -> None:
...     count0 = await acol.count_documents({}, upper_bound=10)
...     print("count0", count0)
...     await acol.insert_one(
...         {
...             "age": 30,
...             "name": "Smith",
...             "food": ["pear", "peach"],
...             "likes_fruit": True,
...         },
...     )
...     await acol.insert_one({"_id": "user-123", "age": 50, "name": "Maccio"})
...     count1 = await acol.count_documents({}, upper_bound=10)
...     print("count1", count1)
...
>>> asyncio.run(write_and_count(my_async_coll))
count0 0
count1 2
>>> asyncio.run(my_async_coll.insert_one({"tag": v", "$vector": [10, 11]}))
InsertOneResult(...)

Note

If an _id is explicitly provided, which corresponds to a document that exists already in the collection, an error is raised and the insertion fails.

Expand source code
async def insert_one(
    self,
    document: DocumentType,
    *,
    vector: VectorType | None = None,
    vectorize: str | None = None,
    max_time_ms: int | None = None,
) -> InsertOneResult:
    """
    Insert a single document in the collection in an atomic operation.

    Args:
        document: the dictionary expressing the document to insert.
            The `_id` field of the document can be left out, in which
            case it will be created automatically.
        vector: a vector (a list of numbers appropriate for the collection)
            for the document. Passing this parameter is equivalent to
            providing a `$vector` field within the document itself,
            however the two are mutually exclusive.
            *DEPRECATED* (removal in 2.0). Use a `$vector` key in the document instead.
        vectorize: a string to be made into a vector, if such a service
            is configured for the collection. Passing this parameter is
            equivalent to providing a `$vectorize` field in the document itself,
            however the two are mutually exclusive.
            Moreover, this parameter cannot coexist with `vector`.
            *DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the document instead.
        max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
            If not passed, the collection-level setting is used instead.

    Returns:
        an InsertOneResult object.

    Example:
        >>> async def write_and_count(acol: AsyncCollection) -> None:
        ...     count0 = await acol.count_documents({}, upper_bound=10)
        ...     print("count0", count0)
        ...     await acol.insert_one(
        ...         {
        ...             "age": 30,
        ...             "name": "Smith",
        ...             "food": ["pear", "peach"],
        ...             "likes_fruit": True,
        ...         },
        ...     )
        ...     await acol.insert_one({"_id": "user-123", "age": 50, "name": "Maccio"})
        ...     count1 = await acol.count_documents({}, upper_bound=10)
        ...     print("count1", count1)
        ...
        >>> asyncio.run(write_and_count(my_async_coll))
        count0 0
        count1 2

        >>> asyncio.run(my_async_coll.insert_one({"tag": v", "$vector": [10, 11]}))
        InsertOneResult(...)

    Note:
        If an `_id` is explicitly provided, which corresponds to a document
        that exists already in the collection, an error is raised and
        the insertion fails.
    """

    check_deprecated_vector_ize(
        vector=vector,
        vectors=None,
        vectorize=vectorize,
        kind="insert",
    )
    _document = _collate_vector_to_document(document, vector, vectorize)
    _max_time_ms = max_time_ms or self.api_options.max_time_ms
    io_payload = {"insertOne": {"document": _document}}
    logger.info(f"insertOne on '{self.name}'")
    io_response = await self._api_commander.async_request(
        payload=io_payload,
        timeout_info=base_timeout_info(_max_time_ms),
    )
    logger.info(f"finished insertOne on '{self.name}'")
    if "insertedIds" in io_response.get("status", {}):
        if io_response["status"]["insertedIds"]:
            inserted_id = io_response["status"]["insertedIds"][0]
            return InsertOneResult(
                raw_results=[io_response],
                inserted_id=inserted_id,
            )
        else:
            raise ValueError(
                "Could not complete a insert_one operation. "
                f"(gotten '${json.dumps(io_response)}')"
            )
    else:
        raise ValueError(
            "Could not complete a insert_one operation. "
            f"(gotten '${json.dumps(io_response)}')"
        )
async def options(self, *, max_time_ms: int | None = None) ‑> CollectionOptions

Get the collection options, i.e. its configuration as read from the database.

The method issues a request to the Data API each time is invoked, without caching mechanisms: this ensures up-to-date information for usages such as real-time collection validation by the application.

Args

max_time_ms
a timeout, in milliseconds, for the underlying HTTP request. If not passed, the collection-level setting is used instead.

Returns

a CollectionOptions instance describing the collection. (See also the database list_collections method.)

Example

>>> asyncio.run(my_async_coll.options())
CollectionOptions(vector=CollectionVectorOptions(dimension=3, metric='cosine'))
Expand source code
async def options(self, *, max_time_ms: int | None = None) -> CollectionOptions:
    """
    Get the collection options, i.e. its configuration as read from the database.

    The method issues a request to the Data API each time is invoked,
    without caching mechanisms: this ensures up-to-date information
    for usages such as real-time collection validation by the application.

    Args:
        max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
            If not passed, the collection-level setting is used instead.

    Returns:
        a CollectionOptions instance describing the collection.
        (See also the database `list_collections` method.)

    Example:
        >>> asyncio.run(my_async_coll.options())
        CollectionOptions(vector=CollectionVectorOptions(dimension=3, metric='cosine'))
    """

    _max_time_ms = max_time_ms or self.api_options.max_time_ms
    logger.info(f"getting collections in search of '{self.name}'")
    self_descriptors = [
        coll_desc
        async for coll_desc in self.database.list_collections(
            max_time_ms=_max_time_ms
        )
        if coll_desc.name == self.name
    ]
    logger.info(f"finished getting collections in search of '{self.name}'")
    if self_descriptors:
        return self_descriptors[0].options
    else:
        raise CollectionNotFoundException(
            text=f"Collection {self.keyspace}.{self.name} not found.",
            keyspace=self.keyspace,
            collection_name=self.name,
        )
async def replace_one(self, filter: FilterType, replacement: DocumentType, *, vector: VectorType | None = None, vectorize: str | None = None, sort: SortType | None = None, upsert: bool = False, max_time_ms: int | None = None) ‑> UpdateResult

Replace a single document on the collection with a new one, optionally inserting a new one if no match is found.

Args

filter
a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
replacement
the new document to write into the collection.
vector
a suitable vector, i.e. a list of float numbers of the appropriate dimensionality, to use vector search (i.e. ANN, or "approximate nearest-neighbours" search), as the sorting criterion. In this way, the matched document (if any) will be the one that is most similar to the provided vector. DEPRECATED (removal in 2.0). Use a $vector key in the sort clause dict instead.
vectorize
a string to be made into a vector to perform vector search. Using vectorize assumes a suitable service is configured for the collection. DEPRECATED (removal in 2.0). Use a $vectorize key in the sort clause dict instead.
sort
with this dictionary parameter one can control the sorting order of the documents matching the filter, effectively determining what document will come first and hence be the replaced one. See the find method for more on sorting. Vector-based ANN sorting is achieved by providing a "$vector" or a "$vectorize" key in sort.
upsert
this parameter controls the behavior in absence of matches. If True, replacement is inserted as a new document if no matches are found on the collection. If False, the operation silently does nothing in case of no matches.
max_time_ms
a timeout, in milliseconds, for the underlying HTTP request. If not passed, the collection-level setting is used instead.

Returns

an UpdateResult object summarizing the outcome of the replace operation.

Example

>>> async def do_replace_one(acol: AsyncCollection) -> None:
...     await acol.insert_one({"Marco": "Polo"})
...     result0 = await acol.replace_one(
...         {"Marco": {"$exists": True}},
...         {"Buda": "Pest"},
...     )
...     print("result0.update_info", result0.update_info)
...     doc1 = await acol.find_one({"Buda": "Pest"})
...     print("doc1", doc1)
...     result1 = await acol.replace_one(
...         {"Mirco": {"$exists": True}},
...         {"Oh": "yeah?"},
...     )
...     print("result1.update_info", result1.update_info)
...     result2 = await acol.replace_one(
...         {"Mirco": {"$exists": True}},
...         {"Oh": "yeah?"},
...         upsert=True,
...     )
...     print("result2.update_info", result2.update_info)
...
>>> asyncio.run(do_replace_one(my_async_coll))
result0.update_info {'n': 1, 'updatedExisting': True, 'ok': 1.0, 'nModified': 1}
doc1 {'_id': '6e669a5a-...', 'Buda': 'Pest'}
result1.update_info {'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0}
result2.update_info {'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '30e34e00-...'}
Expand source code
async def replace_one(
    self,
    filter: FilterType,
    replacement: DocumentType,
    *,
    vector: VectorType | None = None,
    vectorize: str | None = None,
    sort: SortType | None = None,
    upsert: bool = False,
    max_time_ms: int | None = None,
) -> UpdateResult:
    """
    Replace a single document on the collection with a new one,
    optionally inserting a new one if no match is found.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        replacement: the new document to write into the collection.
        vector: a suitable vector, i.e. a list of float numbers of the appropriate
            dimensionality, to use vector search (i.e. ANN,
            or "approximate nearest-neighbours" search), as the sorting criterion.
            In this way, the matched document (if any) will be the one
            that is most similar to the provided vector.
            *DEPRECATED* (removal in 2.0). Use a `$vector` key in the
            sort clause dict instead.
        vectorize: a string to be made into a vector to perform vector search.
            Using vectorize assumes a suitable service is configured for the collection.
            *DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the
            sort clause dict instead.
        sort: with this dictionary parameter one can control the sorting
            order of the documents matching the filter, effectively
            determining what document will come first and hence be the
            replaced one. See the `find` method for more on sorting.
            Vector-based ANN sorting is achieved by providing a "$vector"
            or a "$vectorize" key in `sort`.
        upsert: this parameter controls the behavior in absence of matches.
            If True, `replacement` is inserted as a new document
            if no matches are found on the collection. If False,
            the operation silently does nothing in case of no matches.
        max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
            If not passed, the collection-level setting is used instead.

    Returns:
        an UpdateResult object summarizing the outcome of the replace operation.

    Example:
        >>> async def do_replace_one(acol: AsyncCollection) -> None:
        ...     await acol.insert_one({"Marco": "Polo"})
        ...     result0 = await acol.replace_one(
        ...         {"Marco": {"$exists": True}},
        ...         {"Buda": "Pest"},
        ...     )
        ...     print("result0.update_info", result0.update_info)
        ...     doc1 = await acol.find_one({"Buda": "Pest"})
        ...     print("doc1", doc1)
        ...     result1 = await acol.replace_one(
        ...         {"Mirco": {"$exists": True}},
        ...         {"Oh": "yeah?"},
        ...     )
        ...     print("result1.update_info", result1.update_info)
        ...     result2 = await acol.replace_one(
        ...         {"Mirco": {"$exists": True}},
        ...         {"Oh": "yeah?"},
        ...         upsert=True,
        ...     )
        ...     print("result2.update_info", result2.update_info)
        ...
        >>> asyncio.run(do_replace_one(my_async_coll))
        result0.update_info {'n': 1, 'updatedExisting': True, 'ok': 1.0, 'nModified': 1}
        doc1 {'_id': '6e669a5a-...', 'Buda': 'Pest'}
        result1.update_info {'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0}
        result2.update_info {'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '30e34e00-...'}
    """

    check_deprecated_vector_ize(
        vector=vector,
        vectors=None,
        vectorize=vectorize,
        kind="find",
    )
    _sort = _collate_vector_to_sort(sort, vector, vectorize)
    options = {
        "upsert": upsert,
    }
    _max_time_ms = max_time_ms or self.api_options.max_time_ms
    fo_payload = {
        "findOneAndReplace": {
            k: v
            for k, v in {
                "filter": filter,
                "replacement": replacement,
                "options": options,
                "sort": _sort,
            }.items()
            if v is not None
        }
    }
    logger.info(f"findOneAndReplace on '{self.name}'")
    fo_response = await self._api_commander.async_request(
        payload=fo_payload,
        timeout_info=base_timeout_info(_max_time_ms),
    )
    logger.info(f"finished findOneAndReplace on '{self.name}'")
    if "document" in fo_response.get("data", {}):
        fo_status = fo_response.get("status") or {}
        _update_info = _prepare_update_info([fo_status])
        return UpdateResult(
            raw_results=[fo_response],
            update_info=_update_info,
        )
    else:
        raise DataAPIFaultyResponseException(
            text="Faulty response from find_one_and_replace API command.",
            raw_response=fo_response,
        )
def set_caller(self, caller_name: str | None = None, caller_version: str | None = None) ‑> None

Set a new identity for the application/framework on behalf of which the Data API calls are performed (the "caller").

Args

caller_name
name of the application, or framework, on behalf of which the Data API calls are performed. This ends up in the request user-agent.
caller_version
version of the caller.

Example

>>> my_coll.set_caller(caller_name="the_caller", caller_version="0.1.0")

Deprecated since version: 1.5.1

This will be removed in 2.0.0. Please provide the caller(s) at constructor time through the callers list parameter.

Expand source code
@deprecation.deprecated(  # type: ignore[misc]
    deprecated_in="1.5.1",
    removed_in="2.0.0",
    current_version=__version__,
    details=SET_CALLER_DEPRECATION_NOTICE,
)
def set_caller(
    self,
    caller_name: str | None = None,
    caller_version: str | None = None,
) -> None:
    """
    Set a new identity for the application/framework on behalf of which
    the Data API calls are performed (the "caller").

    Args:
        caller_name: name of the application, or framework, on behalf of which
            the Data API calls are performed. This ends up in the request user-agent.
        caller_version: version of the caller.

    Example:
        >>> my_coll.set_caller(caller_name="the_caller", caller_version="0.1.0")
    """

    logger.info(f"setting caller to {caller_name}/{caller_version}")
    callers_param = check_caller_parameters([], caller_name, caller_version)
    self.callers = callers_param or self.callers
    self._api_commander = self._get_api_commander()
def to_sync(self, *, database: Database | None = None, name: str | None = None, keyspace: str | None = None, namespace: str | None = None, embedding_api_key: str | EmbeddingHeadersProvider | None = None, collection_max_time_ms: int | None = None, callers: Sequence[CallerType] = [], caller_name: str | None = None, caller_version: str | None = None) ‑> Collection

Create a Collection from this one. Save for the arguments explicitly provided as overrides, everything else is kept identical to this collection in the copy (the database is converted into a sync object).

Args

database
a Database object, instantiated earlier. This represents the database the new collection belongs to.
name
the collection name. This parameter should match an existing collection on the database.
keyspace
this is the keyspace to which the collection belongs. If not specified, the database's working keyspace is used.
namespace
an alias for keyspace. DEPRECATED, removal in 2.0.
embedding_api_key
optional API key(s) for interacting with the collection. If an embedding service is configured, and this parameter is not None, each Data API call will include the necessary embedding-related headers as specified by this parameter. If a string is passed, it translates into the one "embedding api key" header (i.e. EmbeddingAPIKeyHeaderProvider). For some vectorize providers/models, if using header-based authentication, specialized subclasses of EmbeddingHeadersProvider should be supplied.
collection_max_time_ms
a default timeout, in millisecond, for the duration of each operation on the collection. Individual timeouts can be provided to each collection method call and will take precedence, with this value being an overall default. Note that for some methods involving multiple API calls (such as find, delete_many, insert_many and so on), it is strongly suggested to provide a specific timeout as the default one likely wouldn't make much sense.
callers
a list of caller identities, i.e. applications, or frameworks, on behalf of which the Data API calls are performed. These end up in the request user-agent. Each caller identity is a ("caller_name", "caller_version") pair.
caller_name
DEPRECATED, use callers. Removal 2.0. Name of the application, or framework, on behalf of which the Data API calls are performed. This ends up in the request user-agent.
caller_version
version of the caller. DEPRECATED, use callers. Removal 2.0.

Returns

the new copy, a Collection instance.

Example

>>> my_async_coll.to_sync().count_documents({}, upper_bound=100)
77
Expand source code
def to_sync(
    self,
    *,
    database: Database | None = None,
    name: str | None = None,
    keyspace: str | None = None,
    namespace: str | None = None,
    embedding_api_key: str | EmbeddingHeadersProvider | None = None,
    collection_max_time_ms: int | None = None,
    callers: Sequence[CallerType] = [],
    caller_name: str | None = None,
    caller_version: str | None = None,
) -> Collection:
    """
    Create a Collection from this one. Save for the arguments
    explicitly provided as overrides, everything else is kept identical
    to this collection in the copy (the database is converted into
    a sync object).

    Args:
        database: a Database object, instantiated earlier.
            This represents the database the new collection belongs to.
        name: the collection name. This parameter should match an existing
            collection on the database.
        keyspace: this is the keyspace to which the collection belongs.
            If not specified, the database's working keyspace is used.
        namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
        embedding_api_key: optional API key(s) for interacting with the collection.
            If an embedding service is configured, and this parameter is not None,
            each Data API call will include the necessary embedding-related headers
            as specified by this parameter. If a string is passed, it translates
            into the one "embedding api key" header
            (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
            For some vectorize providers/models, if using header-based authentication,
            specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
            should be supplied.
        collection_max_time_ms: a default timeout, in millisecond, for the duration of each
            operation on the collection. Individual timeouts can be provided to
            each collection method call and will take precedence, with this value
            being an overall default.
            Note that for some methods involving multiple API calls (such as
            `find`, `delete_many`, `insert_many` and so on), it is strongly suggested
            to provide a specific timeout as the default one likely wouldn't make
            much sense.
        callers: a list of caller identities, i.e. applications, or frameworks,
            on behalf of which the Data API calls are performed. These end up
            in the request user-agent.
            Each caller identity is a ("caller_name", "caller_version") pair.
        caller_name: *DEPRECATED*, use `callers`. Removal 2.0. Name of the
            application, or framework, on behalf of which the Data API calls
            are performed. This ends up in the request user-agent.
        caller_version: version of the caller. *DEPRECATED*, use `callers`.
            Removal 2.0.

    Returns:
        the new copy, a Collection instance.

    Example:
        >>> my_async_coll.to_sync().count_documents({}, upper_bound=100)
        77
    """

    callers_param = check_caller_parameters(callers, caller_name, caller_version)
    keyspace_param = check_namespace_keyspace(
        keyspace=keyspace,
        namespace=namespace,
    )
    _api_options = CollectionAPIOptions(
        embedding_api_key=coerce_embedding_headers_provider(embedding_api_key),
        max_time_ms=collection_max_time_ms,
    )

    return Collection(
        database=database or self.database.to_sync(),
        name=name or self.name,
        keyspace=keyspace_param or self.keyspace,
        api_options=self.api_options.with_override(_api_options),
        callers=callers_param or self.callers,
    )
async def update_many(self, filter: FilterType, update: dict[str, Any], *, upsert: bool = False, max_time_ms: int | None = None) ‑> UpdateResult

Apply an update operations to all documents matching a condition, optionally inserting one documents in absence of matches.

Args

filter
a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
update
the update prescription to apply to the documents, expressed as a dictionary as per Data API syntax. Examples are: {"$set": {"field": "value}} {"$inc": {"counter": 10}} {"$unset": {"field": ""}} See the Data API documentation for the full syntax.
upsert
this parameter controls the behavior in absence of matches. If True, a single new document (resulting from applying update to an empty document) is inserted if no matches are found on the collection. If False, the operation silently does nothing in case of no matches.
max_time_ms
a timeout, in milliseconds, for the operation. If not passed, the collection-level setting is used instead: if a large number of document updates is anticipated, it is suggested to specify a larger timeout than in most other operations as the update will span several HTTP calls to the API in sequence.

Returns

an UpdateResult object summarizing the outcome of the update operation.

Example

>>> async def do_update_many(acol: AsyncCollection) -> None:
...     await acol.insert_many([{"c": "red"}, {"c": "green"}, {"c": "blue"}])
...     result0 = await acol.update_many(
...         {"c": {"$ne": "green"}},
...         {"$set": {"nongreen": True}},
...     )
...     print("result0.update_info", result0.update_info)
...     result1 = await acol.update_many(
...         {"c": "orange"},
...         {"$set": {"is_also_fruit": True}},
...     )
...     print("result1.update_info", result1.update_info)
...     result2 = await acol.update_many(
...         {"c": "orange"},
...         {"$set": {"is_also_fruit": True}},
...         upsert=True,
...     )
...     print("result2.update_info", result2.update_info)
...
>>> asyncio.run(do_update_many(my_async_coll))
result0.update_info {'n': 2, 'updatedExisting': True, 'ok': 1.0, 'nModified': 2}
result1.update_info {'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0}
result2.update_info {'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '79ffd5a3-ab99-4dff-a2a5-4aaa0e59e854'}

Note

Similarly to the case of find (see its docstring for more details), running this command while, at the same time, another process is inserting new documents which match the filter of the update_many can result in an unpredictable fraction of these documents being updated. In other words, it cannot be easily predicted whether a given newly-inserted document will be picked up by the update_many command or not.

Expand source code
async def update_many(
    self,
    filter: FilterType,
    update: dict[str, Any],
    *,
    upsert: bool = False,
    max_time_ms: int | None = None,
) -> UpdateResult:
    """
    Apply an update operations to all documents matching a condition,
    optionally inserting one documents in absence of matches.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        update: the update prescription to apply to the documents, expressed
            as a dictionary as per Data API syntax. Examples are:
                {"$set": {"field": "value}}
                {"$inc": {"counter": 10}}
                {"$unset": {"field": ""}}
            See the Data API documentation for the full syntax.
        upsert: this parameter controls the behavior in absence of matches.
            If True, a single new document (resulting from applying `update`
            to an empty document) is inserted if no matches are found on
            the collection. If False, the operation silently does nothing
            in case of no matches.
        max_time_ms: a timeout, in milliseconds, for the operation.
            If not passed, the collection-level setting is used instead:
            if a large number of document updates is anticipated, it is suggested
            to specify a larger timeout than in most other operations as the
            update will span several HTTP calls to the API in sequence.

    Returns:
        an UpdateResult object summarizing the outcome of the update operation.

    Example:
        >>> async def do_update_many(acol: AsyncCollection) -> None:
        ...     await acol.insert_many([{"c": "red"}, {"c": "green"}, {"c": "blue"}])
        ...     result0 = await acol.update_many(
        ...         {"c": {"$ne": "green"}},
        ...         {"$set": {"nongreen": True}},
        ...     )
        ...     print("result0.update_info", result0.update_info)
        ...     result1 = await acol.update_many(
        ...         {"c": "orange"},
        ...         {"$set": {"is_also_fruit": True}},
        ...     )
        ...     print("result1.update_info", result1.update_info)
        ...     result2 = await acol.update_many(
        ...         {"c": "orange"},
        ...         {"$set": {"is_also_fruit": True}},
        ...         upsert=True,
        ...     )
        ...     print("result2.update_info", result2.update_info)
        ...
        >>> asyncio.run(do_update_many(my_async_coll))
        result0.update_info {'n': 2, 'updatedExisting': True, 'ok': 1.0, 'nModified': 2}
        result1.update_info {'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0}
        result2.update_info {'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '79ffd5a3-ab99-4dff-a2a5-4aaa0e59e854'}

    Note:
        Similarly to the case of `find` (see its docstring for more details),
        running this command while, at the same time, another process is
        inserting new documents which match the filter of the `update_many`
        can result in an unpredictable fraction of these documents being updated.
        In other words, it cannot be easily predicted whether a given
        newly-inserted document will be picked up by the update_many command or not.
    """

    api_options = {
        "upsert": upsert,
    }
    page_state_options: dict[str, str] = {}
    um_responses: list[dict[str, Any]] = []
    um_statuses: list[dict[str, Any]] = []
    must_proceed = True
    _max_time_ms = max_time_ms or self.api_options.max_time_ms
    logger.info(f"starting update_many on '{self.name}'")
    timeout_manager = MultiCallTimeoutManager(overall_max_time_ms=_max_time_ms)
    while must_proceed:
        options = {**api_options, **page_state_options}
        this_um_payload = {
            "updateMany": {
                k: v
                for k, v in {
                    "filter": filter,
                    "update": update,
                    "options": options,
                }.items()
                if v is not None
            }
        }
        logger.info(f"updateMany on '{self.name}'")
        this_um_response = await self._api_commander.async_request(
            payload=this_um_payload,
            timeout_info=timeout_manager.remaining_timeout_info(),
        )
        logger.info(f"finished updateMany on '{self.name}'")
        this_um_status = this_um_response.get("status") or {}
        #
        # if errors, quit early
        if this_um_response.get("errors", []):
            partial_update_info = _prepare_update_info(um_statuses)
            partial_result = UpdateResult(
                raw_results=um_responses,
                update_info=partial_update_info,
            )
            all_um_responses = um_responses + [this_um_response]
            raise UpdateManyException.from_responses(
                commands=[None for _ in all_um_responses],
                raw_responses=all_um_responses,
                partial_result=partial_result,
            )
        else:
            if "status" not in this_um_response:
                raise DataAPIFaultyResponseException(
                    text="Faulty response from update_many API command.",
                    raw_response=this_um_response,
                )
            um_responses.append(this_um_response)
            um_statuses.append(this_um_status)
            next_page_state = this_um_status.get("nextPageState")
            if next_page_state is not None:
                must_proceed = True
                page_state_options = {"pageState": next_page_state}
            else:
                must_proceed = False
                page_state_options = {}

    update_info = _prepare_update_info(um_statuses)
    logger.info(f"finished update_many on '{self.name}'")
    return UpdateResult(
        raw_results=um_responses,
        update_info=update_info,
    )
async def update_one(self, filter: FilterType, update: dict[str, Any], *, vector: VectorType | None = None, vectorize: str | None = None, sort: SortType | None = None, upsert: bool = False, max_time_ms: int | None = None) ‑> UpdateResult

Update a single document on the collection as requested, optionally inserting a new one if no match is found.

Args

filter
a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
update
the update prescription to apply to the document, expressed as a dictionary as per Data API syntax. Examples are: {"$set": {"field": "value}} {"$inc": {"counter": 10}} {"$unset": {"field": ""}} See the Data API documentation for the full syntax.
vector
a suitable vector, i.e. a list of float numbers of the appropriate dimensionality, to use vector search (i.e. ANN, or "approximate nearest-neighbours" search), as the sorting criterion. In this way, the matched document (if any) will be the one that is most similar to the provided vector. DEPRECATED (removal in 2.0). Use a $vector key in the sort clause dict instead.
vectorize
a string to be made into a vector to perform vector search. Using vectorize assumes a suitable service is configured for the collection. DEPRECATED (removal in 2.0). Use a $vectorize key in the sort clause dict instead.
sort
with this dictionary parameter one can control the sorting order of the documents matching the filter, effectively determining what document will come first and hence be the replaced one. See the find method for more on sorting. Vector-based ANN sorting is achieved by providing a "$vector" or a "$vectorize" key in sort.
upsert
this parameter controls the behavior in absence of matches. If True, a new document (resulting from applying the update to an empty document) is inserted if no matches are found on the collection. If False, the operation silently does nothing in case of no matches.
max_time_ms
a timeout, in milliseconds, for the underlying HTTP request. If not passed, the collection-level setting is used instead.

Returns

an UpdateResult object summarizing the outcome of the update operation.

Example

>>> async def do_update_one(acol: AsyncCollection) -> None:
...     await acol.insert_one({"Marco": "Polo"})
...     result0 = await acol.update_one(
...         {"Marco": {"$exists": True}},
...         {"$inc": {"rank": 3}},
...     )
...     print("result0.update_info", result0.update_info)
...     result1 = await acol.update_one(
...         {"Mirko": {"$exists": True}},
...         {"$inc": {"rank": 3}},
...     )
...     print("result1.update_info", result1.update_info)
...     result2 = await acol.update_one(
...         {"Mirko": {"$exists": True}},
...         {"$inc": {"rank": 3}},
...         upsert=True,
...     )
...     print("result2.update_info", result2.update_info)
...
>>> asyncio.run(do_update_one(my_async_coll))
result0.update_info {'n': 1, 'updatedExisting': True, 'ok': 1.0, 'nModified': 1})
result1.update_info {'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0})
result2.update_info {'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '75748092-...'}
Expand source code
async def update_one(
    self,
    filter: FilterType,
    update: dict[str, Any],
    *,
    vector: VectorType | None = None,
    vectorize: str | None = None,
    sort: SortType | None = None,
    upsert: bool = False,
    max_time_ms: int | None = None,
) -> UpdateResult:
    """
    Update a single document on the collection as requested,
    optionally inserting a new one if no match is found.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        update: the update prescription to apply to the document, expressed
            as a dictionary as per Data API syntax. Examples are:
                {"$set": {"field": "value}}
                {"$inc": {"counter": 10}}
                {"$unset": {"field": ""}}
            See the Data API documentation for the full syntax.
        vector: a suitable vector, i.e. a list of float numbers of the appropriate
            dimensionality, to use vector search (i.e. ANN,
            or "approximate nearest-neighbours" search), as the sorting criterion.
            In this way, the matched document (if any) will be the one
            that is most similar to the provided vector.
            *DEPRECATED* (removal in 2.0). Use a `$vector` key in the
            sort clause dict instead.
        vectorize: a string to be made into a vector to perform vector search.
            Using vectorize assumes a suitable service is configured for the collection.
            *DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the
            sort clause dict instead.
        sort: with this dictionary parameter one can control the sorting
            order of the documents matching the filter, effectively
            determining what document will come first and hence be the
            replaced one. See the `find` method for more on sorting.
            Vector-based ANN sorting is achieved by providing a "$vector"
            or a "$vectorize" key in `sort`.
        upsert: this parameter controls the behavior in absence of matches.
            If True, a new document (resulting from applying the `update`
            to an empty document) is inserted if no matches are found on
            the collection. If False, the operation silently does nothing
            in case of no matches.
        max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
            If not passed, the collection-level setting is used instead.

    Returns:
        an UpdateResult object summarizing the outcome of the update operation.

    Example:
        >>> async def do_update_one(acol: AsyncCollection) -> None:
        ...     await acol.insert_one({"Marco": "Polo"})
        ...     result0 = await acol.update_one(
        ...         {"Marco": {"$exists": True}},
        ...         {"$inc": {"rank": 3}},
        ...     )
        ...     print("result0.update_info", result0.update_info)
        ...     result1 = await acol.update_one(
        ...         {"Mirko": {"$exists": True}},
        ...         {"$inc": {"rank": 3}},
        ...     )
        ...     print("result1.update_info", result1.update_info)
        ...     result2 = await acol.update_one(
        ...         {"Mirko": {"$exists": True}},
        ...         {"$inc": {"rank": 3}},
        ...         upsert=True,
        ...     )
        ...     print("result2.update_info", result2.update_info)
        ...
        >>> asyncio.run(do_update_one(my_async_coll))
        result0.update_info {'n': 1, 'updatedExisting': True, 'ok': 1.0, 'nModified': 1})
        result1.update_info {'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0})
        result2.update_info {'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '75748092-...'}
    """

    check_deprecated_vector_ize(
        vector=vector,
        vectors=None,
        vectorize=vectorize,
        kind="find",
    )
    _sort = _collate_vector_to_sort(sort, vector, vectorize)
    options = {
        "upsert": upsert,
    }
    _max_time_ms = max_time_ms or self.api_options.max_time_ms
    uo_payload = {
        "updateOne": {
            k: v
            for k, v in {
                "filter": filter,
                "update": update,
                "options": options,
                "sort": _sort,
            }.items()
            if v is not None
        }
    }
    logger.info(f"updateOne on '{self.name}'")
    uo_response = await self._api_commander.async_request(
        payload=uo_payload,
        timeout_info=base_timeout_info(_max_time_ms),
    )
    logger.info(f"finished updateOne on '{self.name}'")
    if "status" in uo_response:
        uo_status = uo_response["status"]
        _update_info = _prepare_update_info([uo_status])
        return UpdateResult(
            raw_results=[uo_response],
            update_info=_update_info,
        )
    else:
        raise DataAPIFaultyResponseException(
            text="Faulty response from update_one API command.",
            raw_response=uo_response,
        )
def with_options(self, *, name: str | None = None, embedding_api_key: str | EmbeddingHeadersProvider | None = None, collection_max_time_ms: int | None = None, callers: Sequence[CallerType] = [], caller_name: str | None = None, caller_version: str | None = None) ‑> AsyncCollection

Create a clone of this collection with some changed attributes.

Args

name
the name of the collection. This parameter is useful to quickly spawn AsyncCollection instances each pointing to a different collection existing in the same keyspace.
embedding_api_key
optional API key(s) for interacting with the collection. If an embedding service is configured, and this parameter is not None, each Data API call will include the necessary embedding-related headers as specified by this parameter. If a string is passed, it translates into the one "embedding api key" header (i.e. EmbeddingAPIKeyHeaderProvider). For some vectorize providers/models, if using header-based authentication, specialized subclasses of EmbeddingHeadersProvider should be supplied.
collection_max_time_ms
a default timeout, in millisecond, for the duration of each operation on the collection. Individual timeouts can be provided to each collection method call and will take precedence, with this value being an overall default. Note that for some methods involving multiple API calls (such as find, delete_many, insert_many and so on), it is strongly suggested to provide a specific timeout as the default one likely wouldn't make much sense.
callers
a list of caller identities, i.e. applications, or frameworks, on behalf of which the Data API calls are performed. These end up in the request user-agent. Each caller identity is a ("caller_name", "caller_version") pair.
caller_name
DEPRECATED, use callers. Removal 2.0. Name of the application, or framework, on behalf of which the Data API calls are performed. This ends up in the request user-agent.
caller_version
version of the caller. DEPRECATED, use callers. Removal 2.0.

Returns

a new AsyncCollection instance.

Example

>>> my_other_async_coll = my_async_coll.with_options(
...     name="the_other_coll",
...     callers=[("caller_identity", "0.1.2")],
... )
Expand source code
def with_options(
    self,
    *,
    name: str | None = None,
    embedding_api_key: str | EmbeddingHeadersProvider | None = None,
    collection_max_time_ms: int | None = None,
    callers: Sequence[CallerType] = [],
    caller_name: str | None = None,
    caller_version: str | None = None,
) -> AsyncCollection:
    """
    Create a clone of this collection with some changed attributes.

    Args:
        name: the name of the collection. This parameter is useful to
            quickly spawn AsyncCollection instances each pointing to a different
            collection existing in the same keyspace.
        embedding_api_key: optional API key(s) for interacting with the collection.
            If an embedding service is configured, and this parameter is not None,
            each Data API call will include the necessary embedding-related headers
            as specified by this parameter. If a string is passed, it translates
            into the one "embedding api key" header
            (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
            For some vectorize providers/models, if using header-based authentication,
            specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
            should be supplied.
        collection_max_time_ms: a default timeout, in millisecond, for the duration of each
            operation on the collection. Individual timeouts can be provided to
            each collection method call and will take precedence, with this value
            being an overall default.
            Note that for some methods involving multiple API calls (such as
            `find`, `delete_many`, `insert_many` and so on), it is strongly suggested
            to provide a specific timeout as the default one likely wouldn't make
            much sense.
        callers: a list of caller identities, i.e. applications, or frameworks,
            on behalf of which the Data API calls are performed. These end up
            in the request user-agent.
            Each caller identity is a ("caller_name", "caller_version") pair.
        caller_name: *DEPRECATED*, use `callers`. Removal 2.0. Name of the
            application, or framework, on behalf of which the Data API calls
            are performed. This ends up in the request user-agent.
        caller_version: version of the caller. *DEPRECATED*, use `callers`.
            Removal 2.0.

    Returns:
        a new AsyncCollection instance.

    Example:
        >>> my_other_async_coll = my_async_coll.with_options(
        ...     name="the_other_coll",
        ...     callers=[("caller_identity", "0.1.2")],
        ... )
    """

    callers_param = check_caller_parameters(callers, caller_name, caller_version)
    _api_options = CollectionAPIOptions(
        embedding_api_key=coerce_embedding_headers_provider(embedding_api_key),
        max_time_ms=collection_max_time_ms,
    )

    return self._copy(
        name=name,
        api_options=_api_options,
        callers=callers_param,
    )
class AsyncDatabase (api_endpoint: str, token: str | TokenProvider | None = None, *, keyspace: str | None = None, namespace: str | None = None, callers: Sequence[CallerType] = [], caller_name: str | None = None, caller_version: str | None = None, environment: str | None = None, api_path: str | None = None, api_version: str | None = None)

A Data API database. This is the object for doing database-level DML, such as creating/deleting collections, and for obtaining Collection objects themselves. This class has an asynchronous interface.

The usual way of obtaining one AsyncDatabase is through the get_async_database method of a DataAPIClient.

On Astra DB, an AsyncDatabase comes with an "API Endpoint", which implies an AsyncDatabase object instance reaches a specific region (relevant point in case of multi-region databases).

Args

api_endpoint
the full "API Endpoint" string used to reach the Data API. Example: "https://-.apps.astra.datastax.com"
token
an Access Token to the database. Example: "AstraCS:xyz…" This can be either a literal token string or a subclass of TokenProvider.
keyspace
this is the keyspace all method calls will target, unless one is explicitly specified in the call. If no keyspace is supplied when creating a Database, on Astra DB the name "default_keyspace" is set, while on other environments the keyspace is left unspecified: in this case, most operations are unavailable until a keyspace is set (through an explicit use_keyspace invocation or equivalent).
namespace
an alias for keyspace. DEPRECATED, removal in 2.0.
callers
a list of caller identities, i.e. applications, or frameworks, on behalf of which the Data API calls are performed. These end up in the request user-agent. Each caller identity is a ("caller_name", "caller_version") pair.
caller_name
DEPRECATED, use callers. Removal 2.0. Name of the application, or framework, on behalf of which the Data API calls are performed. This ends up in the request user-agent.
caller_version
version of the caller. DEPRECATED, use callers. Removal 2.0.
environment
a string representing the target Data API environment. It can be left unspecified for the default value of Environment.PROD; other values include Environment.OTHER, Environment.DSE.
api_path
path to append to the API Endpoint. In typical usage, this should be left to its default (sensibly chosen based on the environment).
api_version
version specifier to append to the API path. In typical usage, this should be left to its default of "v1".

Example

>>> from astrapy import DataAPIClient
>>> my_client = astrapy.DataAPIClient("AstraCS:...")
>>> my_db = my_client.get_async_database(
...    "https://01234567-....apps.astra.datastax.com"
... )

Note

creating an instance of AsyncDatabase does not trigger actual creation of the database itself, which should exist beforehand. To create databases, see the AstraDBAdmin class.

Expand source code
class AsyncDatabase:
    """
    A Data API database. This is the object for doing database-level
    DML, such as creating/deleting collections, and for obtaining Collection
    objects themselves. This class has an asynchronous interface.

    The usual way of obtaining one AsyncDatabase is through the `get_async_database`
    method of a `DataAPIClient`.

    On Astra DB, an AsyncDatabase comes with an "API Endpoint", which implies
    an AsyncDatabase object instance reaches a specific region (relevant point in
    case of multi-region databases).

    Args:
        api_endpoint: the full "API Endpoint" string used to reach the Data API.
            Example: "https://<database_id>-<region>.apps.astra.datastax.com"
        token: an Access Token to the database. Example: "AstraCS:xyz..."
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        keyspace: this is the keyspace all method calls will target, unless
            one is explicitly specified in the call. If no keyspace is supplied
            when creating a Database, on Astra DB the name "default_keyspace" is set,
            while on other environments the keyspace is left unspecified: in this case,
            most operations are unavailable until a keyspace is set (through an explicit
            `use_keyspace` invocation or equivalent).
        namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
        callers: a list of caller identities, i.e. applications, or frameworks,
            on behalf of which the Data API calls are performed. These end up
            in the request user-agent.
            Each caller identity is a ("caller_name", "caller_version") pair.
        caller_name: *DEPRECATED*, use `callers`. Removal 2.0. Name of the
            application, or framework, on behalf of which the Data API calls
            are performed. This ends up in the request user-agent.
        caller_version: version of the caller. *DEPRECATED*, use `callers`. Removal 2.0.
        environment: a string representing the target Data API environment.
            It can be left unspecified for the default value of `Environment.PROD`;
            other values include `Environment.OTHER`, `Environment.DSE`.
        api_path: path to append to the API Endpoint. In typical usage, this
            should be left to its default (sensibly chosen based on the environment).
        api_version: version specifier to append to the API path. In typical
            usage, this should be left to its default of "v1".

    Example:
        >>> from astrapy import DataAPIClient
        >>> my_client = astrapy.DataAPIClient("AstraCS:...")
        >>> my_db = my_client.get_async_database(
        ...    "https://01234567-....apps.astra.datastax.com"
        ... )

    Note:
        creating an instance of AsyncDatabase does not trigger actual creation
        of the database itself, which should exist beforehand. To create databases,
        see the AstraDBAdmin class.
    """

    def __init__(
        self,
        api_endpoint: str,
        token: str | TokenProvider | None = None,
        *,
        keyspace: str | None = None,
        namespace: str | None = None,
        callers: Sequence[CallerType] = [],
        caller_name: str | None = None,
        caller_version: str | None = None,
        environment: str | None = None,
        api_path: str | None = None,
        api_version: str | None = None,
    ) -> None:
        callers_param = check_caller_parameters(callers, caller_name, caller_version)
        keyspace_param = check_namespace_keyspace(
            keyspace=keyspace,
            namespace=namespace,
        )
        self.environment = (environment or Environment.PROD).lower()
        #
        _api_path: str | None
        _api_version: str | None
        if api_path is None:
            _api_path = API_PATH_ENV_MAP[self.environment]
        else:
            _api_path = api_path
        if api_version is None:
            _api_version = API_VERSION_ENV_MAP[self.environment]
        else:
            _api_version = api_version
        self.token_provider = coerce_token_provider(token)
        self.api_endpoint = api_endpoint.strip("/")
        self.api_path = _api_path
        self.api_version = _api_version

        # enforce defaults if on Astra DB:
        self._using_keyspace: str | None
        if keyspace_param is None and self.environment in Environment.astra_db_values:
            self._using_keyspace = DEFAULT_ASTRA_DB_KEYSPACE
        else:
            self._using_keyspace = keyspace_param

        self._commander_headers = {
            DEFAULT_DATA_API_AUTH_HEADER: self.token_provider.get_token(),
        }

        self.callers = callers_param
        self._api_commander = self._get_api_commander(keyspace=self.keyspace)
        self._name: str | None = None

    def __getattr__(self, collection_name: str) -> AsyncCollection:
        return self.to_sync().get_collection(name=collection_name).to_async()

    def __getitem__(self, collection_name: str) -> AsyncCollection:
        return self.to_sync().get_collection(name=collection_name).to_async()

    def __repr__(self) -> str:
        ep_desc = f'api_endpoint="{self.api_endpoint}"'
        token_desc: str | None
        if self.token_provider:
            token_desc = f'token="{redact_secret(str(self.token_provider), 15)}"'
        else:
            token_desc = None
        keyspace_desc: str | None
        if self.keyspace is None:
            keyspace_desc = "keyspace not set"
        else:
            keyspace_desc = f'keyspace="{self.keyspace}"'
        parts = [pt for pt in [ep_desc, token_desc, keyspace_desc] if pt is not None]
        return f"{self.__class__.__name__}({', '.join(parts)})"

    def __eq__(self, other: Any) -> bool:
        if isinstance(other, AsyncDatabase):
            return all(
                [
                    self.token_provider == other.token_provider,
                    self.api_endpoint == other.api_endpoint,
                    self.api_path == other.api_path,
                    self.api_version == other.api_version,
                    self.keyspace == other.keyspace,
                    self.callers == other.callers,
                    self.api_commander == other.api_commander,
                ]
            )
        else:
            return False

    def _get_api_commander(self, keyspace: str | None) -> APICommander | None:
        """
        Instantiate a new APICommander based on the properties of this class
        and a provided keyspace.

        If keyspace is None, return None (signaling a "keyspace not set").
        """

        if keyspace is None:
            return None
        else:
            base_path_components = [
                comp
                for comp in (
                    self.api_path.strip("/"),
                    self.api_version.strip("/"),
                    keyspace,
                )
                if comp != ""
            ]
            base_path = f"/{'/'.join(base_path_components)}"
            api_commander = APICommander(
                api_endpoint=self.api_endpoint,
                path=base_path,
                headers=self._commander_headers,
                callers=self.callers,
            )
            return api_commander

    def _get_driver_commander(self, keyspace: str | None) -> APICommander:
        """
        Building on _get_api_commander, fall back to class keyspace in
        creating/returning a commander, and in any case raise an error if not set.
        """
        driver_commander: APICommander | None
        if keyspace:
            driver_commander = self._get_api_commander(keyspace=keyspace)
        else:
            driver_commander = self._api_commander
        if driver_commander is None:
            raise ValueError(
                "No keyspace specified. This operation requires a keyspace to "
                "be set, e.g. through the `use_keyspace` method."
            )
        return driver_commander

    async def __aenter__(self) -> AsyncDatabase:
        return self

    async def __aexit__(
        self,
        exc_type: type[BaseException] | None = None,
        exc_value: BaseException | None = None,
        traceback: TracebackType | None = None,
    ) -> None:
        if self._api_commander is not None:
            await self._api_commander.__aexit__(
                exc_type=exc_type,
                exc_value=exc_value,
                traceback=traceback,
            )

    def _copy(
        self,
        *,
        api_endpoint: str | None = None,
        token: str | TokenProvider | None = None,
        keyspace: str | None = None,
        namespace: str | None = None,
        callers: Sequence[CallerType] = [],
        caller_name: str | None = None,
        caller_version: str | None = None,
        environment: str | None = None,
        api_path: str | None = None,
        api_version: str | None = None,
    ) -> AsyncDatabase:
        callers_param = check_caller_parameters(callers, caller_name, caller_version)
        keyspace_param = check_namespace_keyspace(
            keyspace=keyspace,
            namespace=namespace,
        )
        return AsyncDatabase(
            api_endpoint=api_endpoint or self.api_endpoint,
            token=coerce_token_provider(token) or self.token_provider,
            keyspace=keyspace_param or self.keyspace,
            callers=callers_param or self.callers,
            environment=environment or self.environment,
            api_path=api_path or self.api_path,
            api_version=api_version or self.api_version,
        )

    def with_options(
        self,
        *,
        keyspace: str | None = None,
        namespace: str | None = None,
        callers: Sequence[CallerType] = [],
        caller_name: str | None = None,
        caller_version: str | None = None,
    ) -> AsyncDatabase:
        """
        Create a clone of this database with some changed attributes.

        Args:
            keyspace: this is the keyspace all method calls will target, unless
                one is explicitly specified in the call. If no keyspace is supplied
                when creating a Database, the name "default_keyspace" is set.
            namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
            callers: a list of caller identities, i.e. applications, or frameworks,
                on behalf of which the Data API calls are performed. These end up
                in the request user-agent.
                Each caller identity is a ("caller_name", "caller_version") pair.
            caller_name: *DEPRECATED*, use `callers`. Removal 2.0. Name of the
                application, or framework, on behalf of which the Data API calls
                are performed. This ends up in the request user-agent.
            caller_version: version of the caller. *DEPRECATED*, use `callers`.
                Removal 2.0.

        Returns:
            a new `AsyncDatabase` instance.

        Example:
            >>> my_async_db_2 = my_async_db.with_options(
            ...     keyspace="the_other_keyspace",
            ...     callers=[("the_caller", "0.1.0")],
            ... )
        """

        callers_param = check_caller_parameters(callers, caller_name, caller_version)
        keyspace_param = check_namespace_keyspace(
            keyspace=keyspace,
            namespace=namespace,
        )

        return self._copy(
            keyspace=keyspace_param,
            callers=callers_param,
        )

    def to_sync(
        self,
        *,
        api_endpoint: str | None = None,
        token: str | TokenProvider | None = None,
        keyspace: str | None = None,
        namespace: str | None = None,
        callers: Sequence[CallerType] = [],
        caller_name: str | None = None,
        caller_version: str | None = None,
        environment: str | None = None,
        api_path: str | None = None,
        api_version: str | None = None,
    ) -> Database:
        """
        Create a (synchronous) Database from this one. Save for the arguments
        explicitly provided as overrides, everything else is kept identical
        to this database in the copy.

        Args:
            api_endpoint: the full "API Endpoint" string used to reach the Data API.
                Example: "https://<database_id>-<region>.apps.astra.datastax.com"
            token: an Access Token to the database. Example: "AstraCS:xyz..."
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            keyspace: this is the keyspace all method calls will target, unless
                one is explicitly specified in the call. If no keyspace is supplied
                when creating a Database, the name "default_keyspace" is set.
            namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
            callers: a list of caller identities, i.e. applications, or frameworks,
                on behalf of which the Data API calls are performed. These end up
                in the request user-agent.
                Each caller identity is a ("caller_name", "caller_version") pair.
            caller_name: *DEPRECATED*, use `callers`. Removal 2.0. Name of the
                application, or framework, on behalf of which the Data API calls
                are performed. This ends up in the request user-agent.
            caller_version: version of the caller. *DEPRECATED*, use `callers`.
                Removal 2.0.
            environment: a string representing the target Data API environment.
                Values are, for example, `Environment.PROD`, `Environment.OTHER`,
                or `Environment.DSE`.
            api_path: path to append to the API Endpoint. In typical usage, this
                should be left to its default of "/api/json".
            api_version: version specifier to append to the API path. In typical
                usage, this should be left to its default of "v1".

        Returns:
            the new copy, a `Database` instance.

        Example:
            >>> my_sync_db = my_async_db.to_sync()
            >>> my_sync_db.list_collection_names()
            ['a_collection', 'another_collection']
        """

        callers_param = check_caller_parameters(callers, caller_name, caller_version)
        keyspace_param = check_namespace_keyspace(
            keyspace=keyspace,
            namespace=namespace,
        )
        return Database(
            api_endpoint=api_endpoint or self.api_endpoint,
            token=coerce_token_provider(token) or self.token_provider,
            keyspace=keyspace_param or self.keyspace,
            callers=callers_param or self.callers,
            environment=environment or self.environment,
            api_path=api_path or self.api_path,
            api_version=api_version or self.api_version,
        )

    @deprecation.deprecated(  # type: ignore[misc]
        deprecated_in="1.5.1",
        removed_in="2.0.0",
        current_version=__version__,
        details=SET_CALLER_DEPRECATION_NOTICE,
    )
    def set_caller(
        self,
        caller_name: str | None = None,
        caller_version: str | None = None,
    ) -> None:
        """
        Set a new identity for the application/framework on behalf of which
        the Data API calls are performed (the "caller").

        Args:
            caller_name: name of the application, or framework, on behalf of which
                the Data API calls are performed. This ends up in the request user-agent.
            caller_version: version of the caller.

        Example:
            >>> my_db.set_caller(caller_name="the_caller", caller_version="0.1.0")
        """

        logger.info(f"setting caller to {caller_name}/{caller_version}")
        callers_param = check_caller_parameters([], caller_name, caller_version)
        self.callers = callers_param
        self._api_commander = self._get_api_commander(keyspace=self.keyspace)

    @deprecation.deprecated(  # type: ignore[misc]
        deprecated_in="1.5.0",
        removed_in="2.0.0",
        current_version=__version__,
        details=NAMESPACE_DEPRECATION_NOTICE_METHOD,
    )
    def use_namespace(self, namespace: str) -> None:
        """
        Switch to a new working namespace for this database.
        This method changes (mutates) the AsyncDatabase instance.

        *DEPRECATED* (removal in 2.0). Switch to the "use_keyspace" method.**

        Note that this method does not create the namespace, which should exist
        already (created for instance with a `DatabaseAdmin.async_create_namespace` call).

        Args:
            namespace: the new namespace to use as the database working namespace.

        Returns:
            None.

        Example:
            >>> asyncio.run(my_async_db.list_collection_names())
            ['coll_1', 'coll_2']
            >>> my_async_db.use_namespace("an_empty_namespace")
            >>> asyncio.run(my_async_db.list_collection_names())
            []
        """
        return self.use_keyspace(keyspace=namespace)

    def use_keyspace(self, keyspace: str) -> None:
        """
        Switch to a new working keyspace for this database.
        This method changes (mutates) the AsyncDatabase instance.

        Note that this method does not create the keyspace, which should exist
        already (created for instance with a `DatabaseAdmin.async_create_keyspace` call).

        Args:
            keyspace: the new keyspace to use as the database working keyspace.

        Returns:
            None.

        Example:
            >>> asyncio.run(my_async_db.list_collection_names())
            ['coll_1', 'coll_2']
            >>> my_async_db.use_keyspace("an_empty_keyspace")
            >>> asyncio.run(my_async_db.list_collection_names())
            []
        """
        logger.info(f"switching to keyspace '{keyspace}'")
        self._using_keyspace = keyspace
        self._api_commander = self._get_api_commander(keyspace=self.keyspace)

    def info(self) -> DatabaseInfo:
        """
        Additional information on the database as a DatabaseInfo instance.

        Some of the returned properties are dynamic throughout the lifetime
        of the database (such as raw_info["keyspaces"]). For this reason,
        each invocation of this method triggers a new request to the DevOps API.

        Example:
            >>> my_async_db.info().region
            'eu-west-1'

            >>> my_async_db.info().raw_info['datacenters'][0]['dateCreated']
            '2023-01-30T12:34:56Z'

        Note:
            see the DatabaseInfo documentation for a caveat about the difference
            between the `region` and the `raw_info["region"]` attributes.
        """

        logger.info("getting database info")
        database_info = fetch_database_info(
            self.api_endpoint,
            token=self.token_provider.get_token(),
            keyspace=self.keyspace,
        )
        if database_info is not None:
            logger.info("finished getting database info")
            return database_info
        else:
            raise DevOpsAPIException(
                "Database is not in a supported environment for this operation."
            )

    @property
    def id(self) -> str:
        """
        The ID of this database.

        Example:
            >>> my_async_db.id
            '01234567-89ab-cdef-0123-456789abcdef'
        """

        parsed_api_endpoint = parse_api_endpoint(self.api_endpoint)
        if parsed_api_endpoint is not None:
            return parsed_api_endpoint.database_id
        else:
            raise DevOpsAPIException(
                "Database is not in a supported environment for this operation."
            )

    def name(self) -> str:
        """
        The name of this database. Note that this bears no unicity guarantees.

        Calling this method the first time involves a request
        to the DevOps API (the resulting database name is then cached).
        See the `info()` method for more details.

        Example:
            >>> my_async_db.name()
            'the_application_database'
        """

        if self._name is None:
            self._name = self.info().name
        return self._name

    @property
    def namespace(self) -> str | None:
        """
        The namespace this database uses as target for all commands when
        no method-call-specific namespace is specified.

        *DEPRECATED* (removal in 2.0). Switch to the "keyspace" property.**

        Returns:
            the working namespace (a string), or None if not set.

        Example:
            >>> my_async_db.namespace
            'the_keyspace'
        """

        the_warning = deprecation.DeprecatedWarning(
            "the 'namespace' property",
            deprecated_in="1.5.0",
            removed_in="2.0.0",
            details=NAMESPACE_DEPRECATION_NOTICE_METHOD,
        )
        warnings.warn(the_warning, stacklevel=2)

        return self.keyspace

    @property
    def keyspace(self) -> str | None:
        """
        The keyspace this database uses as target for all commands when
        no method-call-specific keyspace is specified.

        Returns:
            the working keyspace (a string), or None if not set.

        Example:
            >>> my_async_db.keyspace
            'the_keyspace'
        """

        return self._using_keyspace

    async def get_collection(
        self,
        name: str,
        *,
        keyspace: str | None = None,
        namespace: str | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | None = None,
        collection_max_time_ms: int | None = None,
    ) -> AsyncCollection:
        """
        Spawn an `AsyncCollection` object instance representing a collection
        on this database.

        Creating an `AsyncCollection` instance does not have any effect on the
        actual state of the database: in other words, for the created
        `AsyncCollection` instance to be used meaningfully, the collection
        must exist already (for instance, it should have been created
        previously by calling the `create_collection` method).

        Args:
            name: the name of the collection.
            keyspace: the keyspace containing the collection. If no keyspace
                is specified, the setting for this database is used.
            namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
            embedding_api_key: optional API key(s) for interacting with the collection.
                If an embedding service is configured, and this parameter is not None,
                each Data API call will include the necessary embedding-related headers
                as specified by this parameter. If a string is passed, it translates
                into the one "embedding api key" header
                (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
                For some vectorize providers/models, if using header-based
                authentication, specialized subclasses of
                `astrapy.authentication.EmbeddingHeadersProvider` should be supplied.
            collection_max_time_ms: a default timeout, in millisecond, for the duration
                of each operation on the collection. Individual timeouts can be
                provided to each collection method call and will take precedence, with
                this value being an overall default.
                Note that for some methods involving multiple API calls (such as `find`,
                `delete_many`, `insert_many` and so on), it is strongly suggested
                to provide a specific timeout as the default one likely wouldn't make
                much sense.

        Returns:
            an `AsyncCollection` instance, representing the desired collection
                (but without any form of validation).

        Example:
            >>> async def count_docs(adb: AsyncDatabase, c_name: str) -> int:
            ...    async_col = await adb.get_collection(c_name)
            ...    return await async_col.count_documents({}, upper_bound=100)
            ...
            >>> asyncio.run(count_docs(my_async_db, "my_collection"))
            45

        Note: the attribute and indexing syntax forms achieve the same effect
            as this method, returning an AsyncCollection, albeit
            in a synchronous way. In other words, the following are equivalent:
                await my_async_db.get_collection("coll_name")
                my_async_db.coll_name
                my_async_db["coll_name"]
        """

        keyspace_param = check_namespace_keyspace(
            keyspace=keyspace,
            namespace=namespace,
        )

        # lazy importing here against circular-import error
        from astrapy.collection import AsyncCollection

        _keyspace = keyspace_param or self.keyspace
        if _keyspace is None:
            raise ValueError(
                "No keyspace specified. This operation requires a keyspace to "
                "be set, e.g. through the `use_keyspace` method."
            )
        return AsyncCollection(
            self,
            name,
            keyspace=_keyspace,
            api_options=CollectionAPIOptions(
                embedding_api_key=coerce_embedding_headers_provider(embedding_api_key),
                max_time_ms=collection_max_time_ms,
            ),
            callers=self.callers,
        )

    async def create_collection(
        self,
        name: str,
        *,
        keyspace: str | None = None,
        namespace: str | None = None,
        dimension: int | None = None,
        metric: str | None = None,
        service: CollectionVectorServiceOptions | dict[str, Any] | None = None,
        indexing: dict[str, Any] | None = None,
        default_id_type: str | None = None,
        additional_options: dict[str, Any] | None = None,
        check_exists: bool | None = None,
        max_time_ms: int | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | None = None,
        collection_max_time_ms: int | None = None,
    ) -> AsyncCollection:
        """
        Creates a collection on the database and return the AsyncCollection
        instance that represents it.

        This is a blocking operation: the method returns when the collection
        is ready to be used. As opposed to the `get_collection` instance,
        this method triggers causes the collection to be actually created on DB.

        Args:
            name: the name of the collection.
            keyspace: the keyspace where the collection is to be created.
                If not specified, the general setting for this database is used.
            namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
            dimension: for vector collections, the dimension of the vectors
                (i.e. the number of their components).
            metric: the similarity metric used for vector searches.
                Allowed values are `VectorMetric.DOT_PRODUCT`, `VectorMetric.EUCLIDEAN`
                or `VectorMetric.COSINE` (default).
            service: a dictionary describing a service for
                embedding computation, e.g. `{"provider": "ab", "modelName": "xy"}`.
                Alternatively, a CollectionVectorServiceOptions object to the same effect.
            indexing: optional specification of the indexing options for
                the collection, in the form of a dictionary such as
                    {"deny": [...]}
                or
                    {"allow": [...]}
            default_id_type: this sets what type of IDs the API server will
                generate when inserting documents that do not specify their
                `_id` field explicitly. Can be set to any of the values
                `DefaultIdType.UUID`, `DefaultIdType.OBJECTID`,
                `DefaultIdType.UUIDV6`, `DefaultIdType.UUIDV7`,
                `DefaultIdType.DEFAULT`.
            additional_options: any further set of key-value pairs that will
                be added to the "options" part of the payload when sending
                the Data API command to create a collection.
            check_exists: whether to run an existence check for the collection
                name before attempting to create the collection:
                If check_exists is True, an error is raised when creating
                an existing collection.
                If it is False, the creation is attempted. In this case, for
                preexisting collections, the command will succeed or fail
                depending on whether the options match or not.
            max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
            embedding_api_key: optional API key(s) for interacting with the collection.
                If an embedding service is configured, and this parameter is not None,
                each Data API call will include the necessary embedding-related headers
                as specified by this parameter. If a string is passed, it translates
                into the one "embedding api key" header
                (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
                For some vectorize providers/models, if using header-based authentication,
                specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
                should be supplied.
            collection_max_time_ms: a default timeout, in millisecond, for the duration of each
                operation on the collection. Individual timeouts can be provided to
                each collection method call and will take precedence, with this value
                being an overall default.
                Note that for some methods involving multiple API calls (such as
                `find`, `delete_many`, `insert_many` and so on), it is strongly suggested
                to provide a specific timeout as the default one likely wouldn't make
                much sense.

        Returns:
            an `AsyncCollection` instance, representing the newly-created collection.

        Example:
            >>> async def create_and_insert(adb: AsyncDatabase) -> Dict[str, Any]:
            ...     new_a_col = await adb.create_collection("my_v_col", dimension=3)
            ...     return await new_a_col.insert_one(
            ...         {"name": "the_row", "$vector": [0.4, 0.5, 0.7]},
            ...     )
            ...
            >>> asyncio.run(create_and_insert(my_async_db))
            InsertOneResult(raw_results=..., inserted_id='08f05ecf-...-...-...')

        Note:
            A collection is considered a vector collection if at least one of
            `dimension` or `service` are provided and not null. In that case,
            and only in that case, is `metric` an accepted parameter.
            Note, moreover, that if passing both these parameters, then
            the dimension must be compatible with the chosen service.
        """

        keyspace_param = check_namespace_keyspace(
            keyspace=keyspace,
            namespace=namespace,
        )
        cc_options = _normalize_create_collection_options(
            dimension=dimension,
            metric=metric,
            service=service,
            indexing=indexing,
            default_id_type=default_id_type,
            additional_options=additional_options,
        )

        timeout_manager = MultiCallTimeoutManager(overall_max_time_ms=max_time_ms)

        if check_exists is None:
            _check_exists = True
        else:
            _check_exists = check_exists
        if _check_exists:
            logger.info(f"checking collection existence for '{name}'")
            existing_names = await self.list_collection_names(
                keyspace=keyspace_param,
                max_time_ms=timeout_manager.remaining_timeout_ms(),
            )
            if name in existing_names:
                raise CollectionAlreadyExistsException(
                    text=f"Collection {name} already exists",
                    keyspace=keyspace_param or self.keyspace or "(unspecified)",
                    collection_name=name,
                )

        driver_commander = self._get_driver_commander(keyspace=keyspace_param)
        cc_payload = {"createCollection": {"name": name, "options": cc_options}}
        logger.info(f"createCollection('{name}')")
        await driver_commander.async_request(
            payload=cc_payload,
            timeout_info=timeout_manager.remaining_timeout_info(),
        )
        logger.info(f"createCollection('{name}')")
        return await self.get_collection(
            name,
            keyspace=keyspace_param,
            embedding_api_key=coerce_embedding_headers_provider(embedding_api_key),
            collection_max_time_ms=collection_max_time_ms,
        )

    async def drop_collection(
        self,
        name_or_collection: str | AsyncCollection,
        *,
        max_time_ms: int | None = None,
    ) -> dict[str, Any]:
        """
        Drop a collection from the database, along with all documents therein.

        Args:
            name_or_collection: either the name of a collection or
                an `AsyncCollection` instance.
            max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.

        Returns:
            a dictionary in the form {"ok": 1} if the command succeeds.

        Example:
            >>> asyncio.run(my_async_db.list_collection_names())
            ['a_collection', 'my_v_col', 'another_col']
            >>> asyncio.run(my_async_db.drop_collection("my_v_col"))
            {'ok': 1}
            >>> asyncio.run(my_async_db.list_collection_names())
            ['a_collection', 'another_col']

        Note:
            when providing a collection name, it is assumed that the collection
            is to be found in the keyspace that was set at database instance level.
        """

        # lazy importing here against circular-import error
        from astrapy.collection import AsyncCollection

        keyspace: str | None
        _collection_name: str
        if isinstance(name_or_collection, AsyncCollection):
            keyspace = name_or_collection.keyspace
            _collection_name = name_or_collection.name
        else:
            keyspace = self.keyspace
            _collection_name = name_or_collection
        driver_commander = self._get_driver_commander(keyspace=keyspace)
        dc_payload = {"deleteCollection": {"name": _collection_name}}
        logger.info(f"deleteCollection('{_collection_name}')")
        dc_response = await driver_commander.async_request(
            payload=dc_payload,
            timeout_info=base_timeout_info(max_time_ms),
        )
        logger.info(f"finished deleteCollection('{_collection_name}')")
        return dc_response.get("status", {})  # type: ignore[no-any-return]

    def list_collections(
        self,
        *,
        keyspace: str | None = None,
        namespace: str | None = None,
        max_time_ms: int | None = None,
    ) -> AsyncCommandCursor[CollectionDescriptor]:
        """
        List all collections in a given keyspace for this database.

        Args:
            keyspace: the keyspace to be inspected. If not specified,
                the general setting for this database is assumed.
            namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
            max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.

        Returns:
            an `AsyncCommandCursor` to iterate over CollectionDescriptor instances,
            each corresponding to a collection.

        Example:
            >>> async def a_list_colls(adb: AsyncDatabase) -> None:
            ...     a_ccur = adb.list_collections()
            ...     print("* a_ccur:", a_ccur)
            ...     print("* list:", [coll async for coll in a_ccur])
            ...     async for coll in adb.list_collections():
            ...         print("* coll:", coll)
            ...
            >>> asyncio.run(a_list_colls(my_async_db))
            * a_ccur: <astrapy.cursors.AsyncCommandCursor object at ...>
            * list: [CollectionDescriptor(name='my_v_col', options=CollectionOptions())]
            * coll: CollectionDescriptor(name='my_v_col', options=CollectionOptions())
        """

        keyspace_param = check_namespace_keyspace(
            keyspace=keyspace,
            namespace=namespace,
        )

        driver_commander = self._get_driver_commander(keyspace=keyspace_param)
        gc_payload = {"findCollections": {"options": {"explain": True}}}
        logger.info("findCollections")
        gc_response = driver_commander.request(
            payload=gc_payload,
            timeout_info=base_timeout_info(max_time_ms),
        )
        if "collections" not in gc_response.get("status", {}):
            raise DataAPIFaultyResponseException(
                text="Faulty response from get_collections API command.",
                raw_response=gc_response,
            )
        else:
            # we know this is a list of dicts, to marshal into "descriptors"
            logger.info("finished findCollections")
            return AsyncCommandCursor(
                address=driver_commander.full_path,
                items=[
                    CollectionDescriptor.from_dict(col_dict)
                    for col_dict in gc_response["status"]["collections"]
                ],
            )

    async def list_collection_names(
        self,
        *,
        keyspace: str | None = None,
        namespace: str | None = None,
        max_time_ms: int | None = None,
    ) -> list[str]:
        """
        List the names of all collections in a given keyspace of this database.

        Args:
            keyspace: the keyspace to be inspected. If not specified,
                the general setting for this database is assumed.
            namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
            max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.

        Returns:
            a list of the collection names as strings, in no particular order.

        Example:
            >>> asyncio.run(my_async_db.list_collection_names())
            ['a_collection', 'another_col']
        """

        keyspace_param = check_namespace_keyspace(
            keyspace=keyspace,
            namespace=namespace,
        )

        driver_commander = self._get_driver_commander(keyspace=keyspace_param)
        gc_payload: dict[str, Any] = {"findCollections": {}}
        logger.info("findCollections")
        gc_response = await driver_commander.async_request(
            payload=gc_payload,
            timeout_info=base_timeout_info(max_time_ms),
        )
        if "collections" not in gc_response.get("status", {}):
            raise DataAPIFaultyResponseException(
                text="Faulty response from get_collections API command.",
                raw_response=gc_response,
            )
        else:
            # we know this is a list of dicts, to marshal into "descriptors"
            logger.info("finished findCollections")
            return gc_response["status"]["collections"]  # type: ignore[no-any-return]

    async def command(
        self,
        body: dict[str, Any],
        *,
        keyspace: str | None = None,
        namespace: str | None = None,
        collection_name: str | None = None,
        raise_api_errors: bool = True,
        max_time_ms: int | None = None,
    ) -> dict[str, Any]:
        """
        Send a POST request to the Data API for this database with
        an arbitrary, caller-provided payload.

        Args:
            body: a JSON-serializable dictionary, the payload of the request.
            keyspace: the keyspace to use. Requests always target a keyspace:
                if not specified, the general setting for this database is assumed.
            namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
            collection_name: if provided, the collection name is appended at the end
                of the endpoint. In this way, this method allows collection-level
                arbitrary POST requests as well.
            raise_api_errors: if True, responses with a nonempty 'errors' field
                result in an astrapy exception being raised.
            max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.

        Returns:
            a dictionary with the response of the HTTP request.

        Example:
            >>> asyncio.run(my_async_db.command({"findCollections": {}}))
            {'status': {'collections': ['my_coll']}}
            >>> asyncio.run(my_async_db.command(
            ...     {"countDocuments": {}},
            ...     collection_name="my_coll",
            ... )
            {'status': {'count': 123}}
        """

        keyspace_param = check_namespace_keyspace(
            keyspace=keyspace,
            namespace=namespace,
        )

        if collection_name:
            # if keyspace and collection_name both passed, a new database is needed
            _database: AsyncDatabase
            if keyspace_param:
                _database = self._copy(keyspace=keyspace_param)
            else:
                _database = self
            logger.info("deferring to collection " f"'{collection_name}' for command.")
            _collection = await _database.get_collection(collection_name)
            coll_req_response = await _collection.command(
                body=body,
                raise_api_errors=raise_api_errors,
                max_time_ms=max_time_ms,
            )
            logger.info(
                "finished deferring to collection " f"'{collection_name}' for command."
            )
            return coll_req_response
        else:
            driver_commander = self._get_driver_commander(keyspace=keyspace_param)
            _cmd_desc = ",".join(sorted(body.keys()))
            logger.info(f"command={_cmd_desc} on {self.__class__.__name__}")
            req_response = await driver_commander.async_request(
                payload=body,
                raise_api_errors=raise_api_errors,
                timeout_info=base_timeout_info(max_time_ms),
            )
            logger.info(f"command={_cmd_desc} on {self.__class__.__name__}")
            return req_response

    def get_database_admin(
        self,
        *,
        token: str | TokenProvider | None = None,
        dev_ops_url: str | None = None,
        dev_ops_api_version: str | None = None,
    ) -> DatabaseAdmin:
        """
        Return a DatabaseAdmin object corresponding to this database, for
        use in admin tasks such as managing keyspaces.

        This method, depending on the environment where the database resides,
        returns an appropriate subclass of DatabaseAdmin.

        Args:
            token: an access token with enough permission on the database to
                perform the desired tasks. If omitted (as it can generally be done),
                the token of this Database is used.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            dev_ops_url: in case of custom deployments, this can be used to specify
                the URL to the DevOps API, such as "https://api.astra.datastax.com".
                Generally it can be omitted. The environment (prod/dev/...) is
                determined from the API Endpoint.
                Note that this parameter is allowed only for Astra DB environments.
            dev_ops_api_version: this can specify a custom version of the DevOps API
                (such as "v2"). Generally not needed.
                Note that this parameter is allowed only for Astra DB environments.

        Returns:
            A DatabaseAdmin instance targeting this database. More precisely,
            for Astra DB an instance of `AstraDBDatabaseAdmin` is returned;
            for other environments, an instance of `DataAPIDatabaseAdmin` is returned.

        Example:
            >>> my_db_admin = my_async_db.get_database_admin()
            >>> if "new_keyspace" not in my_db_admin.list_keyspaces():
            ...     my_db_admin.create_keyspace("new_keyspace")
            >>> my_db_admin.list_keyspaces()
            ['default_keyspace', 'new_keyspace']
        """

        # lazy importing here to avoid circular dependency
        from astrapy.admin import AstraDBDatabaseAdmin, DataAPIDatabaseAdmin

        if self.environment in Environment.astra_db_values:
            return AstraDBDatabaseAdmin(
                api_endpoint=self.api_endpoint,
                token=coerce_token_provider(token) or self.token_provider,
                environment=self.environment,
                callers=self.callers,
                dev_ops_url=dev_ops_url,
                dev_ops_api_version=dev_ops_api_version,
                spawner_database=self,
            )
        else:
            if dev_ops_url is not None:
                raise ValueError(
                    "Parameter `dev_ops_url` not supported outside of Astra DB."
                )
            if dev_ops_api_version is not None:
                raise ValueError(
                    "Parameter `dev_ops_api_version` not supported outside of Astra DB."
                )
            return DataAPIDatabaseAdmin(
                api_endpoint=self.api_endpoint,
                token=coerce_token_provider(token) or self.token_provider,
                environment=self.environment,
                api_path=self.api_path,
                api_version=self.api_version,
                callers=self.callers,
                spawner_database=self,
            )

Instance variables

var id : str

The ID of this database.

Example

>>> my_async_db.id
'01234567-89ab-cdef-0123-456789abcdef'
Expand source code
@property
def id(self) -> str:
    """
    The ID of this database.

    Example:
        >>> my_async_db.id
        '01234567-89ab-cdef-0123-456789abcdef'
    """

    parsed_api_endpoint = parse_api_endpoint(self.api_endpoint)
    if parsed_api_endpoint is not None:
        return parsed_api_endpoint.database_id
    else:
        raise DevOpsAPIException(
            "Database is not in a supported environment for this operation."
        )
var keyspace : str | None

The keyspace this database uses as target for all commands when no method-call-specific keyspace is specified.

Returns

the working keyspace (a string), or None if not set.

Example

>>> my_async_db.keyspace
'the_keyspace'
Expand source code
@property
def keyspace(self) -> str | None:
    """
    The keyspace this database uses as target for all commands when
    no method-call-specific keyspace is specified.

    Returns:
        the working keyspace (a string), or None if not set.

    Example:
        >>> my_async_db.keyspace
        'the_keyspace'
    """

    return self._using_keyspace
var namespace : str | None

The namespace this database uses as target for all commands when no method-call-specific namespace is specified.

DEPRECATED (removal in 2.0). Switch to the "keyspace" property.**

Returns

the working namespace (a string), or None if not set.

Example

>>> my_async_db.namespace
'the_keyspace'
Expand source code
@property
def namespace(self) -> str | None:
    """
    The namespace this database uses as target for all commands when
    no method-call-specific namespace is specified.

    *DEPRECATED* (removal in 2.0). Switch to the "keyspace" property.**

    Returns:
        the working namespace (a string), or None if not set.

    Example:
        >>> my_async_db.namespace
        'the_keyspace'
    """

    the_warning = deprecation.DeprecatedWarning(
        "the 'namespace' property",
        deprecated_in="1.5.0",
        removed_in="2.0.0",
        details=NAMESPACE_DEPRECATION_NOTICE_METHOD,
    )
    warnings.warn(the_warning, stacklevel=2)

    return self.keyspace

Methods

async def command(self, body: dict[str, Any], *, keyspace: str | None = None, namespace: str | None = None, collection_name: str | None = None, raise_api_errors: bool = True, max_time_ms: int | None = None) ‑> dict[str, typing.Any]

Send a POST request to the Data API for this database with an arbitrary, caller-provided payload.

Args

body
a JSON-serializable dictionary, the payload of the request.
keyspace
the keyspace to use. Requests always target a keyspace: if not specified, the general setting for this database is assumed.
namespace
an alias for keyspace. DEPRECATED, removal in 2.0.
collection_name
if provided, the collection name is appended at the end of the endpoint. In this way, this method allows collection-level arbitrary POST requests as well.
raise_api_errors
if True, responses with a nonempty 'errors' field result in an astrapy exception being raised.
max_time_ms
a timeout, in milliseconds, for the underlying HTTP request.

Returns

a dictionary with the response of the HTTP request.

Example

>>> asyncio.run(my_async_db.command({"findCollections": {}}))
{'status': {'collections': ['my_coll']}}
>>> asyncio.run(my_async_db.command(
...     {"countDocuments": {}},
...     collection_name="my_coll",
... )
{'status': {'count': 123}}
Expand source code
async def command(
    self,
    body: dict[str, Any],
    *,
    keyspace: str | None = None,
    namespace: str | None = None,
    collection_name: str | None = None,
    raise_api_errors: bool = True,
    max_time_ms: int | None = None,
) -> dict[str, Any]:
    """
    Send a POST request to the Data API for this database with
    an arbitrary, caller-provided payload.

    Args:
        body: a JSON-serializable dictionary, the payload of the request.
        keyspace: the keyspace to use. Requests always target a keyspace:
            if not specified, the general setting for this database is assumed.
        namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
        collection_name: if provided, the collection name is appended at the end
            of the endpoint. In this way, this method allows collection-level
            arbitrary POST requests as well.
        raise_api_errors: if True, responses with a nonempty 'errors' field
            result in an astrapy exception being raised.
        max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.

    Returns:
        a dictionary with the response of the HTTP request.

    Example:
        >>> asyncio.run(my_async_db.command({"findCollections": {}}))
        {'status': {'collections': ['my_coll']}}
        >>> asyncio.run(my_async_db.command(
        ...     {"countDocuments": {}},
        ...     collection_name="my_coll",
        ... )
        {'status': {'count': 123}}
    """

    keyspace_param = check_namespace_keyspace(
        keyspace=keyspace,
        namespace=namespace,
    )

    if collection_name:
        # if keyspace and collection_name both passed, a new database is needed
        _database: AsyncDatabase
        if keyspace_param:
            _database = self._copy(keyspace=keyspace_param)
        else:
            _database = self
        logger.info("deferring to collection " f"'{collection_name}' for command.")
        _collection = await _database.get_collection(collection_name)
        coll_req_response = await _collection.command(
            body=body,
            raise_api_errors=raise_api_errors,
            max_time_ms=max_time_ms,
        )
        logger.info(
            "finished deferring to collection " f"'{collection_name}' for command."
        )
        return coll_req_response
    else:
        driver_commander = self._get_driver_commander(keyspace=keyspace_param)
        _cmd_desc = ",".join(sorted(body.keys()))
        logger.info(f"command={_cmd_desc} on {self.__class__.__name__}")
        req_response = await driver_commander.async_request(
            payload=body,
            raise_api_errors=raise_api_errors,
            timeout_info=base_timeout_info(max_time_ms),
        )
        logger.info(f"command={_cmd_desc} on {self.__class__.__name__}")
        return req_response
async def create_collection(self, name: str, *, keyspace: str | None = None, namespace: str | None = None, dimension: int | None = None, metric: str | None = None, service: CollectionVectorServiceOptions | dict[str, Any] | None = None, indexing: dict[str, Any] | None = None, default_id_type: str | None = None, additional_options: dict[str, Any] | None = None, check_exists: bool | None = None, max_time_ms: int | None = None, embedding_api_key: str | EmbeddingHeadersProvider | None = None, collection_max_time_ms: int | None = None) ‑> AsyncCollection

Creates a collection on the database and return the AsyncCollection instance that represents it.

This is a blocking operation: the method returns when the collection is ready to be used. As opposed to the get_collection instance, this method triggers causes the collection to be actually created on DB.

Args

name
the name of the collection.
keyspace
the keyspace where the collection is to be created. If not specified, the general setting for this database is used.
namespace
an alias for keyspace. DEPRECATED, removal in 2.0.
dimension
for vector collections, the dimension of the vectors (i.e. the number of their components).
metric
the similarity metric used for vector searches. Allowed values are VectorMetric.DOT_PRODUCT, VectorMetric.EUCLIDEAN or VectorMetric.COSINE (default).
service
a dictionary describing a service for embedding computation, e.g. {"provider": "ab", "modelName": "xy"}. Alternatively, a CollectionVectorServiceOptions object to the same effect.
indexing
optional specification of the indexing options for the collection, in the form of a dictionary such as {"deny": […]} or
default_id_type
this sets what type of IDs the API server will generate when inserting documents that do not specify their _id field explicitly. Can be set to any of the values DefaultIdType.UUID, DefaultIdType.OBJECTID, DefaultIdType.UUIDV6, DefaultIdType.UUIDV7, DefaultIdType.DEFAULT.
additional_options
any further set of key-value pairs that will be added to the "options" part of the payload when sending the Data API command to create a collection.
check_exists
whether to run an existence check for the collection name before attempting to create the collection: If check_exists is True, an error is raised when creating an existing collection. If it is False, the creation is attempted. In this case, for preexisting collections, the command will succeed or fail depending on whether the options match or not.
max_time_ms
a timeout, in milliseconds, for the underlying HTTP request.
embedding_api_key
optional API key(s) for interacting with the collection. If an embedding service is configured, and this parameter is not None, each Data API call will include the necessary embedding-related headers as specified by this parameter. If a string is passed, it translates into the one "embedding api key" header (i.e. EmbeddingAPIKeyHeaderProvider). For some vectorize providers/models, if using header-based authentication, specialized subclasses of EmbeddingHeadersProvider should be supplied.
collection_max_time_ms
a default timeout, in millisecond, for the duration of each operation on the collection. Individual timeouts can be provided to each collection method call and will take precedence, with this value being an overall default. Note that for some methods involving multiple API calls (such as find, delete_many, insert_many and so on), it is strongly suggested to provide a specific timeout as the default one likely wouldn't make much sense.

Returns

an AsyncCollection instance, representing the newly-created collection.

Example

>>> async def create_and_insert(adb: AsyncDatabase) -> Dict[str, Any]:
...     new_a_col = await adb.create_collection("my_v_col", dimension=3)
...     return await new_a_col.insert_one(
...         {"name": "the_row", "$vector": [0.4, 0.5, 0.7]},
...     )
...
>>> asyncio.run(create_and_insert(my_async_db))
InsertOneResult(raw_results=..., inserted_id='08f05ecf-...-...-...')

Note

A collection is considered a vector collection if at least one of dimension or service are provided and not null. In that case, and only in that case, is metric an accepted parameter. Note, moreover, that if passing both these parameters, then the dimension must be compatible with the chosen service.

Expand source code
async def create_collection(
    self,
    name: str,
    *,
    keyspace: str | None = None,
    namespace: str | None = None,
    dimension: int | None = None,
    metric: str | None = None,
    service: CollectionVectorServiceOptions | dict[str, Any] | None = None,
    indexing: dict[str, Any] | None = None,
    default_id_type: str | None = None,
    additional_options: dict[str, Any] | None = None,
    check_exists: bool | None = None,
    max_time_ms: int | None = None,
    embedding_api_key: str | EmbeddingHeadersProvider | None = None,
    collection_max_time_ms: int | None = None,
) -> AsyncCollection:
    """
    Creates a collection on the database and return the AsyncCollection
    instance that represents it.

    This is a blocking operation: the method returns when the collection
    is ready to be used. As opposed to the `get_collection` instance,
    this method triggers causes the collection to be actually created on DB.

    Args:
        name: the name of the collection.
        keyspace: the keyspace where the collection is to be created.
            If not specified, the general setting for this database is used.
        namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
        dimension: for vector collections, the dimension of the vectors
            (i.e. the number of their components).
        metric: the similarity metric used for vector searches.
            Allowed values are `VectorMetric.DOT_PRODUCT`, `VectorMetric.EUCLIDEAN`
            or `VectorMetric.COSINE` (default).
        service: a dictionary describing a service for
            embedding computation, e.g. `{"provider": "ab", "modelName": "xy"}`.
            Alternatively, a CollectionVectorServiceOptions object to the same effect.
        indexing: optional specification of the indexing options for
            the collection, in the form of a dictionary such as
                {"deny": [...]}
            or
                {"allow": [...]}
        default_id_type: this sets what type of IDs the API server will
            generate when inserting documents that do not specify their
            `_id` field explicitly. Can be set to any of the values
            `DefaultIdType.UUID`, `DefaultIdType.OBJECTID`,
            `DefaultIdType.UUIDV6`, `DefaultIdType.UUIDV7`,
            `DefaultIdType.DEFAULT`.
        additional_options: any further set of key-value pairs that will
            be added to the "options" part of the payload when sending
            the Data API command to create a collection.
        check_exists: whether to run an existence check for the collection
            name before attempting to create the collection:
            If check_exists is True, an error is raised when creating
            an existing collection.
            If it is False, the creation is attempted. In this case, for
            preexisting collections, the command will succeed or fail
            depending on whether the options match or not.
        max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
        embedding_api_key: optional API key(s) for interacting with the collection.
            If an embedding service is configured, and this parameter is not None,
            each Data API call will include the necessary embedding-related headers
            as specified by this parameter. If a string is passed, it translates
            into the one "embedding api key" header
            (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
            For some vectorize providers/models, if using header-based authentication,
            specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
            should be supplied.
        collection_max_time_ms: a default timeout, in millisecond, for the duration of each
            operation on the collection. Individual timeouts can be provided to
            each collection method call and will take precedence, with this value
            being an overall default.
            Note that for some methods involving multiple API calls (such as
            `find`, `delete_many`, `insert_many` and so on), it is strongly suggested
            to provide a specific timeout as the default one likely wouldn't make
            much sense.

    Returns:
        an `AsyncCollection` instance, representing the newly-created collection.

    Example:
        >>> async def create_and_insert(adb: AsyncDatabase) -> Dict[str, Any]:
        ...     new_a_col = await adb.create_collection("my_v_col", dimension=3)
        ...     return await new_a_col.insert_one(
        ...         {"name": "the_row", "$vector": [0.4, 0.5, 0.7]},
        ...     )
        ...
        >>> asyncio.run(create_and_insert(my_async_db))
        InsertOneResult(raw_results=..., inserted_id='08f05ecf-...-...-...')

    Note:
        A collection is considered a vector collection if at least one of
        `dimension` or `service` are provided and not null. In that case,
        and only in that case, is `metric` an accepted parameter.
        Note, moreover, that if passing both these parameters, then
        the dimension must be compatible with the chosen service.
    """

    keyspace_param = check_namespace_keyspace(
        keyspace=keyspace,
        namespace=namespace,
    )
    cc_options = _normalize_create_collection_options(
        dimension=dimension,
        metric=metric,
        service=service,
        indexing=indexing,
        default_id_type=default_id_type,
        additional_options=additional_options,
    )

    timeout_manager = MultiCallTimeoutManager(overall_max_time_ms=max_time_ms)

    if check_exists is None:
        _check_exists = True
    else:
        _check_exists = check_exists
    if _check_exists:
        logger.info(f"checking collection existence for '{name}'")
        existing_names = await self.list_collection_names(
            keyspace=keyspace_param,
            max_time_ms=timeout_manager.remaining_timeout_ms(),
        )
        if name in existing_names:
            raise CollectionAlreadyExistsException(
                text=f"Collection {name} already exists",
                keyspace=keyspace_param or self.keyspace or "(unspecified)",
                collection_name=name,
            )

    driver_commander = self._get_driver_commander(keyspace=keyspace_param)
    cc_payload = {"createCollection": {"name": name, "options": cc_options}}
    logger.info(f"createCollection('{name}')")
    await driver_commander.async_request(
        payload=cc_payload,
        timeout_info=timeout_manager.remaining_timeout_info(),
    )
    logger.info(f"createCollection('{name}')")
    return await self.get_collection(
        name,
        keyspace=keyspace_param,
        embedding_api_key=coerce_embedding_headers_provider(embedding_api_key),
        collection_max_time_ms=collection_max_time_ms,
    )
async def drop_collection(self, name_or_collection: str | AsyncCollection, *, max_time_ms: int | None = None) ‑> dict[str, Any]

Drop a collection from the database, along with all documents therein.

Args

name_or_collection
either the name of a collection or an AsyncCollection instance.
max_time_ms
a timeout, in milliseconds, for the underlying HTTP request.

Returns

a dictionary in the form {"ok": 1} if the command succeeds.

Example

>>> asyncio.run(my_async_db.list_collection_names())
['a_collection', 'my_v_col', 'another_col']
>>> asyncio.run(my_async_db.drop_collection("my_v_col"))
{'ok': 1}
>>> asyncio.run(my_async_db.list_collection_names())
['a_collection', 'another_col']

Note

when providing a collection name, it is assumed that the collection is to be found in the keyspace that was set at database instance level.

Expand source code
async def drop_collection(
    self,
    name_or_collection: str | AsyncCollection,
    *,
    max_time_ms: int | None = None,
) -> dict[str, Any]:
    """
    Drop a collection from the database, along with all documents therein.

    Args:
        name_or_collection: either the name of a collection or
            an `AsyncCollection` instance.
        max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.

    Returns:
        a dictionary in the form {"ok": 1} if the command succeeds.

    Example:
        >>> asyncio.run(my_async_db.list_collection_names())
        ['a_collection', 'my_v_col', 'another_col']
        >>> asyncio.run(my_async_db.drop_collection("my_v_col"))
        {'ok': 1}
        >>> asyncio.run(my_async_db.list_collection_names())
        ['a_collection', 'another_col']

    Note:
        when providing a collection name, it is assumed that the collection
        is to be found in the keyspace that was set at database instance level.
    """

    # lazy importing here against circular-import error
    from astrapy.collection import AsyncCollection

    keyspace: str | None
    _collection_name: str
    if isinstance(name_or_collection, AsyncCollection):
        keyspace = name_or_collection.keyspace
        _collection_name = name_or_collection.name
    else:
        keyspace = self.keyspace
        _collection_name = name_or_collection
    driver_commander = self._get_driver_commander(keyspace=keyspace)
    dc_payload = {"deleteCollection": {"name": _collection_name}}
    logger.info(f"deleteCollection('{_collection_name}')")
    dc_response = await driver_commander.async_request(
        payload=dc_payload,
        timeout_info=base_timeout_info(max_time_ms),
    )
    logger.info(f"finished deleteCollection('{_collection_name}')")
    return dc_response.get("status", {})  # type: ignore[no-any-return]
async def get_collection(self, name: str, *, keyspace: str | None = None, namespace: str | None = None, embedding_api_key: str | EmbeddingHeadersProvider | None = None, collection_max_time_ms: int | None = None) ‑> AsyncCollection

Spawn an AsyncCollection object instance representing a collection on this database.

Creating an AsyncCollection instance does not have any effect on the actual state of the database: in other words, for the created AsyncCollection instance to be used meaningfully, the collection must exist already (for instance, it should have been created previously by calling the create_collection method).

Args

name
the name of the collection.
keyspace
the keyspace containing the collection. If no keyspace is specified, the setting for this database is used.
namespace
an alias for keyspace. DEPRECATED, removal in 2.0.
embedding_api_key
optional API key(s) for interacting with the collection. If an embedding service is configured, and this parameter is not None, each Data API call will include the necessary embedding-related headers as specified by this parameter. If a string is passed, it translates into the one "embedding api key" header (i.e. EmbeddingAPIKeyHeaderProvider). For some vectorize providers/models, if using header-based authentication, specialized subclasses of EmbeddingHeadersProvider should be supplied.
collection_max_time_ms
a default timeout, in millisecond, for the duration of each operation on the collection. Individual timeouts can be provided to each collection method call and will take precedence, with this value being an overall default. Note that for some methods involving multiple API calls (such as find, delete_many, insert_many and so on), it is strongly suggested to provide a specific timeout as the default one likely wouldn't make much sense.

Returns

an AsyncCollection instance, representing the desired collection (but without any form of validation).

Example

>>> async def count_docs(adb: AsyncDatabase, c_name: str) -> int:
...    async_col = await adb.get_collection(c_name)
...    return await async_col.count_documents({}, upper_bound=100)
...
>>> asyncio.run(count_docs(my_async_db, "my_collection"))
45

Note: the attribute and indexing syntax forms achieve the same effect as this method, returning an AsyncCollection, albeit in a synchronous way. In other words, the following are equivalent: await my_async_db.get_collection("coll_name") my_async_db.coll_name my_async_db["coll_name"]

Expand source code
async def get_collection(
    self,
    name: str,
    *,
    keyspace: str | None = None,
    namespace: str | None = None,
    embedding_api_key: str | EmbeddingHeadersProvider | None = None,
    collection_max_time_ms: int | None = None,
) -> AsyncCollection:
    """
    Spawn an `AsyncCollection` object instance representing a collection
    on this database.

    Creating an `AsyncCollection` instance does not have any effect on the
    actual state of the database: in other words, for the created
    `AsyncCollection` instance to be used meaningfully, the collection
    must exist already (for instance, it should have been created
    previously by calling the `create_collection` method).

    Args:
        name: the name of the collection.
        keyspace: the keyspace containing the collection. If no keyspace
            is specified, the setting for this database is used.
        namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
        embedding_api_key: optional API key(s) for interacting with the collection.
            If an embedding service is configured, and this parameter is not None,
            each Data API call will include the necessary embedding-related headers
            as specified by this parameter. If a string is passed, it translates
            into the one "embedding api key" header
            (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
            For some vectorize providers/models, if using header-based
            authentication, specialized subclasses of
            `astrapy.authentication.EmbeddingHeadersProvider` should be supplied.
        collection_max_time_ms: a default timeout, in millisecond, for the duration
            of each operation on the collection. Individual timeouts can be
            provided to each collection method call and will take precedence, with
            this value being an overall default.
            Note that for some methods involving multiple API calls (such as `find`,
            `delete_many`, `insert_many` and so on), it is strongly suggested
            to provide a specific timeout as the default one likely wouldn't make
            much sense.

    Returns:
        an `AsyncCollection` instance, representing the desired collection
            (but without any form of validation).

    Example:
        >>> async def count_docs(adb: AsyncDatabase, c_name: str) -> int:
        ...    async_col = await adb.get_collection(c_name)
        ...    return await async_col.count_documents({}, upper_bound=100)
        ...
        >>> asyncio.run(count_docs(my_async_db, "my_collection"))
        45

    Note: the attribute and indexing syntax forms achieve the same effect
        as this method, returning an AsyncCollection, albeit
        in a synchronous way. In other words, the following are equivalent:
            await my_async_db.get_collection("coll_name")
            my_async_db.coll_name
            my_async_db["coll_name"]
    """

    keyspace_param = check_namespace_keyspace(
        keyspace=keyspace,
        namespace=namespace,
    )

    # lazy importing here against circular-import error
    from astrapy.collection import AsyncCollection

    _keyspace = keyspace_param or self.keyspace
    if _keyspace is None:
        raise ValueError(
            "No keyspace specified. This operation requires a keyspace to "
            "be set, e.g. through the `use_keyspace` method."
        )
    return AsyncCollection(
        self,
        name,
        keyspace=_keyspace,
        api_options=CollectionAPIOptions(
            embedding_api_key=coerce_embedding_headers_provider(embedding_api_key),
            max_time_ms=collection_max_time_ms,
        ),
        callers=self.callers,
    )
def get_database_admin(self, *, token: str | TokenProvider | None = None, dev_ops_url: str | None = None, dev_ops_api_version: str | None = None) ‑> DatabaseAdmin

Return a DatabaseAdmin object corresponding to this database, for use in admin tasks such as managing keyspaces.

This method, depending on the environment where the database resides, returns an appropriate subclass of DatabaseAdmin.

Args

token
an access token with enough permission on the database to perform the desired tasks. If omitted (as it can generally be done), the token of this Database is used. This can be either a literal token string or a subclass of TokenProvider.
dev_ops_url
in case of custom deployments, this can be used to specify the URL to the DevOps API, such as "https://api.astra.datastax.com". Generally it can be omitted. The environment (prod/dev/…) is determined from the API Endpoint. Note that this parameter is allowed only for Astra DB environments.
dev_ops_api_version
this can specify a custom version of the DevOps API (such as "v2"). Generally not needed. Note that this parameter is allowed only for Astra DB environments.

Returns

A DatabaseAdmin instance targeting this database. More precisely, for Astra DB an instance of AstraDBDatabaseAdmin is returned; for other environments, an instance of DataAPIDatabaseAdmin is returned.

Example

>>> my_db_admin = my_async_db.get_database_admin()
>>> if "new_keyspace" not in my_db_admin.list_keyspaces():
...     my_db_admin.create_keyspace("new_keyspace")
>>> my_db_admin.list_keyspaces()
['default_keyspace', 'new_keyspace']
Expand source code
def get_database_admin(
    self,
    *,
    token: str | TokenProvider | None = None,
    dev_ops_url: str | None = None,
    dev_ops_api_version: str | None = None,
) -> DatabaseAdmin:
    """
    Return a DatabaseAdmin object corresponding to this database, for
    use in admin tasks such as managing keyspaces.

    This method, depending on the environment where the database resides,
    returns an appropriate subclass of DatabaseAdmin.

    Args:
        token: an access token with enough permission on the database to
            perform the desired tasks. If omitted (as it can generally be done),
            the token of this Database is used.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        dev_ops_url: in case of custom deployments, this can be used to specify
            the URL to the DevOps API, such as "https://api.astra.datastax.com".
            Generally it can be omitted. The environment (prod/dev/...) is
            determined from the API Endpoint.
            Note that this parameter is allowed only for Astra DB environments.
        dev_ops_api_version: this can specify a custom version of the DevOps API
            (such as "v2"). Generally not needed.
            Note that this parameter is allowed only for Astra DB environments.

    Returns:
        A DatabaseAdmin instance targeting this database. More precisely,
        for Astra DB an instance of `AstraDBDatabaseAdmin` is returned;
        for other environments, an instance of `DataAPIDatabaseAdmin` is returned.

    Example:
        >>> my_db_admin = my_async_db.get_database_admin()
        >>> if "new_keyspace" not in my_db_admin.list_keyspaces():
        ...     my_db_admin.create_keyspace("new_keyspace")
        >>> my_db_admin.list_keyspaces()
        ['default_keyspace', 'new_keyspace']
    """

    # lazy importing here to avoid circular dependency
    from astrapy.admin import AstraDBDatabaseAdmin, DataAPIDatabaseAdmin

    if self.environment in Environment.astra_db_values:
        return AstraDBDatabaseAdmin(
            api_endpoint=self.api_endpoint,
            token=coerce_token_provider(token) or self.token_provider,
            environment=self.environment,
            callers=self.callers,
            dev_ops_url=dev_ops_url,
            dev_ops_api_version=dev_ops_api_version,
            spawner_database=self,
        )
    else:
        if dev_ops_url is not None:
            raise ValueError(
                "Parameter `dev_ops_url` not supported outside of Astra DB."
            )
        if dev_ops_api_version is not None:
            raise ValueError(
                "Parameter `dev_ops_api_version` not supported outside of Astra DB."
            )
        return DataAPIDatabaseAdmin(
            api_endpoint=self.api_endpoint,
            token=coerce_token_provider(token) or self.token_provider,
            environment=self.environment,
            api_path=self.api_path,
            api_version=self.api_version,
            callers=self.callers,
            spawner_database=self,
        )
def info(self) ‑> DatabaseInfo

Additional information on the database as a DatabaseInfo instance.

Some of the returned properties are dynamic throughout the lifetime of the database (such as raw_info["keyspaces"]). For this reason, each invocation of this method triggers a new request to the DevOps API.

Example

>>> my_async_db.info().region
'eu-west-1'
>>> my_async_db.info().raw_info['datacenters'][0]['dateCreated']
'2023-01-30T12:34:56Z'

Note

see the DatabaseInfo documentation for a caveat about the difference between the region and the raw_info["region"] attributes.

Expand source code
def info(self) -> DatabaseInfo:
    """
    Additional information on the database as a DatabaseInfo instance.

    Some of the returned properties are dynamic throughout the lifetime
    of the database (such as raw_info["keyspaces"]). For this reason,
    each invocation of this method triggers a new request to the DevOps API.

    Example:
        >>> my_async_db.info().region
        'eu-west-1'

        >>> my_async_db.info().raw_info['datacenters'][0]['dateCreated']
        '2023-01-30T12:34:56Z'

    Note:
        see the DatabaseInfo documentation for a caveat about the difference
        between the `region` and the `raw_info["region"]` attributes.
    """

    logger.info("getting database info")
    database_info = fetch_database_info(
        self.api_endpoint,
        token=self.token_provider.get_token(),
        keyspace=self.keyspace,
    )
    if database_info is not None:
        logger.info("finished getting database info")
        return database_info
    else:
        raise DevOpsAPIException(
            "Database is not in a supported environment for this operation."
        )
async def list_collection_names(self, *, keyspace: str | None = None, namespace: str | None = None, max_time_ms: int | None = None) ‑> list[str]

List the names of all collections in a given keyspace of this database.

Args

keyspace
the keyspace to be inspected. If not specified, the general setting for this database is assumed.
namespace
an alias for keyspace. DEPRECATED, removal in 2.0.
max_time_ms
a timeout, in milliseconds, for the underlying HTTP request.

Returns

a list of the collection names as strings, in no particular order.

Example

>>> asyncio.run(my_async_db.list_collection_names())
['a_collection', 'another_col']
Expand source code
async def list_collection_names(
    self,
    *,
    keyspace: str | None = None,
    namespace: str | None = None,
    max_time_ms: int | None = None,
) -> list[str]:
    """
    List the names of all collections in a given keyspace of this database.

    Args:
        keyspace: the keyspace to be inspected. If not specified,
            the general setting for this database is assumed.
        namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
        max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.

    Returns:
        a list of the collection names as strings, in no particular order.

    Example:
        >>> asyncio.run(my_async_db.list_collection_names())
        ['a_collection', 'another_col']
    """

    keyspace_param = check_namespace_keyspace(
        keyspace=keyspace,
        namespace=namespace,
    )

    driver_commander = self._get_driver_commander(keyspace=keyspace_param)
    gc_payload: dict[str, Any] = {"findCollections": {}}
    logger.info("findCollections")
    gc_response = await driver_commander.async_request(
        payload=gc_payload,
        timeout_info=base_timeout_info(max_time_ms),
    )
    if "collections" not in gc_response.get("status", {}):
        raise DataAPIFaultyResponseException(
            text="Faulty response from get_collections API command.",
            raw_response=gc_response,
        )
    else:
        # we know this is a list of dicts, to marshal into "descriptors"
        logger.info("finished findCollections")
        return gc_response["status"]["collections"]  # type: ignore[no-any-return]
def list_collections(self, *, keyspace: str | None = None, namespace: str | None = None, max_time_ms: int | None = None) ‑> AsyncCommandCursor[CollectionDescriptor]

List all collections in a given keyspace for this database.

Args

keyspace
the keyspace to be inspected. If not specified, the general setting for this database is assumed.
namespace
an alias for keyspace. DEPRECATED, removal in 2.0.
max_time_ms
a timeout, in milliseconds, for the underlying HTTP request.

Returns

an AsyncCommandCursor to iterate over CollectionDescriptor instances, each corresponding to a collection.

Example

>>> async def a_list_colls(adb: AsyncDatabase) -> None:
...     a_ccur = adb.list_collections()
...     print("* a_ccur:", a_ccur)
...     print("* list:", [coll async for coll in a_ccur])
...     async for coll in adb.list_collections():
...         print("* coll:", coll)
...
>>> asyncio.run(a_list_colls(my_async_db))
* a_ccur: <astrapy.cursors.AsyncCommandCursor object at ...>
* list: [CollectionDescriptor(name='my_v_col', options=CollectionOptions())]
* coll: CollectionDescriptor(name='my_v_col', options=CollectionOptions())
Expand source code
def list_collections(
    self,
    *,
    keyspace: str | None = None,
    namespace: str | None = None,
    max_time_ms: int | None = None,
) -> AsyncCommandCursor[CollectionDescriptor]:
    """
    List all collections in a given keyspace for this database.

    Args:
        keyspace: the keyspace to be inspected. If not specified,
            the general setting for this database is assumed.
        namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
        max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.

    Returns:
        an `AsyncCommandCursor` to iterate over CollectionDescriptor instances,
        each corresponding to a collection.

    Example:
        >>> async def a_list_colls(adb: AsyncDatabase) -> None:
        ...     a_ccur = adb.list_collections()
        ...     print("* a_ccur:", a_ccur)
        ...     print("* list:", [coll async for coll in a_ccur])
        ...     async for coll in adb.list_collections():
        ...         print("* coll:", coll)
        ...
        >>> asyncio.run(a_list_colls(my_async_db))
        * a_ccur: <astrapy.cursors.AsyncCommandCursor object at ...>
        * list: [CollectionDescriptor(name='my_v_col', options=CollectionOptions())]
        * coll: CollectionDescriptor(name='my_v_col', options=CollectionOptions())
    """

    keyspace_param = check_namespace_keyspace(
        keyspace=keyspace,
        namespace=namespace,
    )

    driver_commander = self._get_driver_commander(keyspace=keyspace_param)
    gc_payload = {"findCollections": {"options": {"explain": True}}}
    logger.info("findCollections")
    gc_response = driver_commander.request(
        payload=gc_payload,
        timeout_info=base_timeout_info(max_time_ms),
    )
    if "collections" not in gc_response.get("status", {}):
        raise DataAPIFaultyResponseException(
            text="Faulty response from get_collections API command.",
            raw_response=gc_response,
        )
    else:
        # we know this is a list of dicts, to marshal into "descriptors"
        logger.info("finished findCollections")
        return AsyncCommandCursor(
            address=driver_commander.full_path,
            items=[
                CollectionDescriptor.from_dict(col_dict)
                for col_dict in gc_response["status"]["collections"]
            ],
        )
def name(self) ‑> str

The name of this database. Note that this bears no unicity guarantees.

Calling this method the first time involves a request to the DevOps API (the resulting database name is then cached). See the astrapy.info method for more details.

Example

>>> my_async_db.name()
'the_application_database'
Expand source code
def name(self) -> str:
    """
    The name of this database. Note that this bears no unicity guarantees.

    Calling this method the first time involves a request
    to the DevOps API (the resulting database name is then cached).
    See the `info()` method for more details.

    Example:
        >>> my_async_db.name()
        'the_application_database'
    """

    if self._name is None:
        self._name = self.info().name
    return self._name
def set_caller(self, caller_name: str | None = None, caller_version: str | None = None) ‑> None

Set a new identity for the application/framework on behalf of which the Data API calls are performed (the "caller").

Args

caller_name
name of the application, or framework, on behalf of which the Data API calls are performed. This ends up in the request user-agent.
caller_version
version of the caller.

Example

>>> my_db.set_caller(caller_name="the_caller", caller_version="0.1.0")

Deprecated since version: 1.5.1

This will be removed in 2.0.0. Please provide the caller(s) at constructor time through the callers list parameter.

Expand source code
@deprecation.deprecated(  # type: ignore[misc]
    deprecated_in="1.5.1",
    removed_in="2.0.0",
    current_version=__version__,
    details=SET_CALLER_DEPRECATION_NOTICE,
)
def set_caller(
    self,
    caller_name: str | None = None,
    caller_version: str | None = None,
) -> None:
    """
    Set a new identity for the application/framework on behalf of which
    the Data API calls are performed (the "caller").

    Args:
        caller_name: name of the application, or framework, on behalf of which
            the Data API calls are performed. This ends up in the request user-agent.
        caller_version: version of the caller.

    Example:
        >>> my_db.set_caller(caller_name="the_caller", caller_version="0.1.0")
    """

    logger.info(f"setting caller to {caller_name}/{caller_version}")
    callers_param = check_caller_parameters([], caller_name, caller_version)
    self.callers = callers_param
    self._api_commander = self._get_api_commander(keyspace=self.keyspace)
def to_sync(self, *, api_endpoint: str | None = None, token: str | TokenProvider | None = None, keyspace: str | None = None, namespace: str | None = None, callers: Sequence[CallerType] = [], caller_name: str | None = None, caller_version: str | None = None, environment: str | None = None, api_path: str | None = None, api_version: str | None = None) ‑> Database

Create a (synchronous) Database from this one. Save for the arguments explicitly provided as overrides, everything else is kept identical to this database in the copy.

Args

api_endpoint
the full "API Endpoint" string used to reach the Data API. Example: "https://-.apps.astra.datastax.com"
token
an Access Token to the database. Example: "AstraCS:xyz…" This can be either a literal token string or a subclass of TokenProvider.
keyspace
this is the keyspace all method calls will target, unless one is explicitly specified in the call. If no keyspace is supplied when creating a Database, the name "default_keyspace" is set.
namespace
an alias for keyspace. DEPRECATED, removal in 2.0.
callers
a list of caller identities, i.e. applications, or frameworks, on behalf of which the Data API calls are performed. These end up in the request user-agent. Each caller identity is a ("caller_name", "caller_version") pair.
caller_name
DEPRECATED, use callers. Removal 2.0. Name of the application, or framework, on behalf of which the Data API calls are performed. This ends up in the request user-agent.
caller_version
version of the caller. DEPRECATED, use callers. Removal 2.0.
environment
a string representing the target Data API environment. Values are, for example, Environment.PROD, Environment.OTHER, or Environment.DSE.
api_path
path to append to the API Endpoint. In typical usage, this should be left to its default of "/api/json".
api_version
version specifier to append to the API path. In typical usage, this should be left to its default of "v1".

Returns

the new copy, a Database instance.

Example

>>> my_sync_db = my_async_db.to_sync()
>>> my_sync_db.list_collection_names()
['a_collection', 'another_collection']
Expand source code
def to_sync(
    self,
    *,
    api_endpoint: str | None = None,
    token: str | TokenProvider | None = None,
    keyspace: str | None = None,
    namespace: str | None = None,
    callers: Sequence[CallerType] = [],
    caller_name: str | None = None,
    caller_version: str | None = None,
    environment: str | None = None,
    api_path: str | None = None,
    api_version: str | None = None,
) -> Database:
    """
    Create a (synchronous) Database from this one. Save for the arguments
    explicitly provided as overrides, everything else is kept identical
    to this database in the copy.

    Args:
        api_endpoint: the full "API Endpoint" string used to reach the Data API.
            Example: "https://<database_id>-<region>.apps.astra.datastax.com"
        token: an Access Token to the database. Example: "AstraCS:xyz..."
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        keyspace: this is the keyspace all method calls will target, unless
            one is explicitly specified in the call. If no keyspace is supplied
            when creating a Database, the name "default_keyspace" is set.
        namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
        callers: a list of caller identities, i.e. applications, or frameworks,
            on behalf of which the Data API calls are performed. These end up
            in the request user-agent.
            Each caller identity is a ("caller_name", "caller_version") pair.
        caller_name: *DEPRECATED*, use `callers`. Removal 2.0. Name of the
            application, or framework, on behalf of which the Data API calls
            are performed. This ends up in the request user-agent.
        caller_version: version of the caller. *DEPRECATED*, use `callers`.
            Removal 2.0.
        environment: a string representing the target Data API environment.
            Values are, for example, `Environment.PROD`, `Environment.OTHER`,
            or `Environment.DSE`.
        api_path: path to append to the API Endpoint. In typical usage, this
            should be left to its default of "/api/json".
        api_version: version specifier to append to the API path. In typical
            usage, this should be left to its default of "v1".

    Returns:
        the new copy, a `Database` instance.

    Example:
        >>> my_sync_db = my_async_db.to_sync()
        >>> my_sync_db.list_collection_names()
        ['a_collection', 'another_collection']
    """

    callers_param = check_caller_parameters(callers, caller_name, caller_version)
    keyspace_param = check_namespace_keyspace(
        keyspace=keyspace,
        namespace=namespace,
    )
    return Database(
        api_endpoint=api_endpoint or self.api_endpoint,
        token=coerce_token_provider(token) or self.token_provider,
        keyspace=keyspace_param or self.keyspace,
        callers=callers_param or self.callers,
        environment=environment or self.environment,
        api_path=api_path or self.api_path,
        api_version=api_version or self.api_version,
    )
def use_keyspace(self, keyspace: str) ‑> None

Switch to a new working keyspace for this database. This method changes (mutates) the AsyncDatabase instance.

Note that this method does not create the keyspace, which should exist already (created for instance with a DatabaseAdmin.async_create_keyspace call).

Args

keyspace
the new keyspace to use as the database working keyspace.

Returns

None.

Example

>>> asyncio.run(my_async_db.list_collection_names())
['coll_1', 'coll_2']
>>> my_async_db.use_keyspace("an_empty_keyspace")
>>> asyncio.run(my_async_db.list_collection_names())
[]
Expand source code
def use_keyspace(self, keyspace: str) -> None:
    """
    Switch to a new working keyspace for this database.
    This method changes (mutates) the AsyncDatabase instance.

    Note that this method does not create the keyspace, which should exist
    already (created for instance with a `DatabaseAdmin.async_create_keyspace` call).

    Args:
        keyspace: the new keyspace to use as the database working keyspace.

    Returns:
        None.

    Example:
        >>> asyncio.run(my_async_db.list_collection_names())
        ['coll_1', 'coll_2']
        >>> my_async_db.use_keyspace("an_empty_keyspace")
        >>> asyncio.run(my_async_db.list_collection_names())
        []
    """
    logger.info(f"switching to keyspace '{keyspace}'")
    self._using_keyspace = keyspace
    self._api_commander = self._get_api_commander(keyspace=self.keyspace)
def use_namespace(self, namespace: str) ‑> None

Switch to a new working namespace for this database. This method changes (mutates) the AsyncDatabase instance.

DEPRECATED (removal in 2.0). Switch to the "use_keyspace" method.**

Note that this method does not create the namespace, which should exist already (created for instance with a DatabaseAdmin.async_create_namespace call).

Args

namespace
the new namespace to use as the database working namespace.

Returns

None.

Example

>>> asyncio.run(my_async_db.list_collection_names())
['coll_1', 'coll_2']
>>> my_async_db.use_namespace("an_empty_namespace")
>>> asyncio.run(my_async_db.list_collection_names())
[]

Deprecated since version: 1.5.0

This will be removed in 2.0.0. The term 'namespace' is being replaced by 'keyspace' throughout the Data API and the clients. Please adapt method and parameter names consistently (examples: db_admin.findNamespaces => db_admin.findKeyspaces; collection.namespace => collection.keyspace; database.list_collections(namespace=...) => database.list_collections(keyspace=...)). See https://docs.datastax.com/en/astra-db-serverless/api-reference/client-versions.html#version-1-5 for more information.

Expand source code
@deprecation.deprecated(  # type: ignore[misc]
    deprecated_in="1.5.0",
    removed_in="2.0.0",
    current_version=__version__,
    details=NAMESPACE_DEPRECATION_NOTICE_METHOD,
)
def use_namespace(self, namespace: str) -> None:
    """
    Switch to a new working namespace for this database.
    This method changes (mutates) the AsyncDatabase instance.

    *DEPRECATED* (removal in 2.0). Switch to the "use_keyspace" method.**

    Note that this method does not create the namespace, which should exist
    already (created for instance with a `DatabaseAdmin.async_create_namespace` call).

    Args:
        namespace: the new namespace to use as the database working namespace.

    Returns:
        None.

    Example:
        >>> asyncio.run(my_async_db.list_collection_names())
        ['coll_1', 'coll_2']
        >>> my_async_db.use_namespace("an_empty_namespace")
        >>> asyncio.run(my_async_db.list_collection_names())
        []
    """
    return self.use_keyspace(keyspace=namespace)
def with_options(self, *, keyspace: str | None = None, namespace: str | None = None, callers: Sequence[CallerType] = [], caller_name: str | None = None, caller_version: str | None = None) ‑> AsyncDatabase

Create a clone of this database with some changed attributes.

Args

keyspace
this is the keyspace all method calls will target, unless one is explicitly specified in the call. If no keyspace is supplied when creating a Database, the name "default_keyspace" is set.
namespace
an alias for keyspace. DEPRECATED, removal in 2.0.
callers
a list of caller identities, i.e. applications, or frameworks, on behalf of which the Data API calls are performed. These end up in the request user-agent. Each caller identity is a ("caller_name", "caller_version") pair.
caller_name
DEPRECATED, use callers. Removal 2.0. Name of the application, or framework, on behalf of which the Data API calls are performed. This ends up in the request user-agent.
caller_version
version of the caller. DEPRECATED, use callers. Removal 2.0.

Returns

a new AsyncDatabase instance.

Example

>>> my_async_db_2 = my_async_db.with_options(
...     keyspace="the_other_keyspace",
...     callers=[("the_caller", "0.1.0")],
... )
Expand source code
def with_options(
    self,
    *,
    keyspace: str | None = None,
    namespace: str | None = None,
    callers: Sequence[CallerType] = [],
    caller_name: str | None = None,
    caller_version: str | None = None,
) -> AsyncDatabase:
    """
    Create a clone of this database with some changed attributes.

    Args:
        keyspace: this is the keyspace all method calls will target, unless
            one is explicitly specified in the call. If no keyspace is supplied
            when creating a Database, the name "default_keyspace" is set.
        namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
        callers: a list of caller identities, i.e. applications, or frameworks,
            on behalf of which the Data API calls are performed. These end up
            in the request user-agent.
            Each caller identity is a ("caller_name", "caller_version") pair.
        caller_name: *DEPRECATED*, use `callers`. Removal 2.0. Name of the
            application, or framework, on behalf of which the Data API calls
            are performed. This ends up in the request user-agent.
        caller_version: version of the caller. *DEPRECATED*, use `callers`.
            Removal 2.0.

    Returns:
        a new `AsyncDatabase` instance.

    Example:
        >>> my_async_db_2 = my_async_db.with_options(
        ...     keyspace="the_other_keyspace",
        ...     callers=[("the_caller", "0.1.0")],
        ... )
    """

    callers_param = check_caller_parameters(callers, caller_name, caller_version)
    keyspace_param = check_namespace_keyspace(
        keyspace=keyspace,
        namespace=namespace,
    )

    return self._copy(
        keyspace=keyspace_param,
        callers=callers_param,
    )
class Collection (database: Database, name: str, *, keyspace: str | None = None, namespace: str | None = None, api_options: CollectionAPIOptions | None = None, callers: Sequence[CallerType] = [], caller_name: str | None = None, caller_version: str | None = None)

A Data API collection, the main object to interact with the Data API, especially for DDL operations. This class has a synchronous interface.

A Collection is spawned from a Database object, from which it inherits the details on how to reach the API server (endpoint, authentication token).

Args

database
a Database object, instantiated earlier. This represents the database the collection belongs to.
name
the collection name. This parameter should match an existing collection on the database.
keyspace
this is the keyspace to which the collection belongs. If not specified, the database's working keyspace is used.
namespace
an alias for keyspace. DEPRECATED, removal in 2.0.
api_options
An instance of astrapy.api_options.CollectionAPIOptions providing the general settings for interacting with the Data API.
callers
a list of caller identities, i.e. applications, or frameworks, on behalf of which the Data API calls are performed. These end up in the request user-agent. Each caller identity is a ("caller_name", "caller_version") pair.
caller_name
DEPRECATED, use callers. Removal 2.0. Name of the application, or framework, on behalf of which the Data API calls are performed. This ends up in the request user-agent.
caller_version
version of the caller. DEPRECATED, use callers. Removal 2.0.

Examples

>>> from astrapy import DataAPIClient, Collection
>>> my_client = astrapy.DataAPIClient("AstraCS:...")
>>> my_db = my_client.get_database(
...    "https://01234567-....apps.astra.datastax.com"
... )
>>> my_coll_1 = Collection(database=my_db, name="my_collection")
>>> my_coll_2 = my_db.create_collection(
...     "my_v_collection",
...     dimension=3,
...     metric="cosine",
... )
>>> my_coll_3a = my_db.get_collection("my_already_existing_collection")
>>> my_coll_3b = my_db.my_already_existing_collection
>>> my_coll_3c = my_db["my_already_existing_collection"]

Note

creating an instance of Collection does not trigger actual creation of the collection on the database. The latter should have been created beforehand, e.g. through the create_collection method of a Database.

Expand source code
class Collection:
    """
    A Data API collection, the main object to interact with the Data API,
    especially for DDL operations.
    This class has a synchronous interface.

    A Collection is spawned from a Database object, from which it inherits
    the details on how to reach the API server (endpoint, authentication token).

    Args:
        database: a Database object, instantiated earlier. This represents
            the database the collection belongs to.
        name: the collection name. This parameter should match an existing
            collection on the database.
        keyspace: this is the keyspace to which the collection belongs.
            If not specified, the database's working keyspace is used.
        namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
        api_options: An instance of `astrapy.api_options.CollectionAPIOptions`
            providing the general settings for interacting with the Data API.
        callers: a list of caller identities, i.e. applications, or frameworks,
            on behalf of which the Data API calls are performed. These end up
            in the request user-agent.
            Each caller identity is a ("caller_name", "caller_version") pair.
        caller_name: *DEPRECATED*, use `callers`. Removal 2.0. Name of the
            application, or framework, on behalf of which the Data API calls
            are performed. This ends up in the request user-agent.
        caller_version: version of the caller. *DEPRECATED*, use `callers`. Removal 2.0.

    Examples:
        >>> from astrapy import DataAPIClient, Collection
        >>> my_client = astrapy.DataAPIClient("AstraCS:...")
        >>> my_db = my_client.get_database(
        ...    "https://01234567-....apps.astra.datastax.com"
        ... )
        >>> my_coll_1 = Collection(database=my_db, name="my_collection")
        >>> my_coll_2 = my_db.create_collection(
        ...     "my_v_collection",
        ...     dimension=3,
        ...     metric="cosine",
        ... )
        >>> my_coll_3a = my_db.get_collection("my_already_existing_collection")
        >>> my_coll_3b = my_db.my_already_existing_collection
        >>> my_coll_3c = my_db["my_already_existing_collection"]

    Note:
        creating an instance of Collection does not trigger actual creation
        of the collection on the database. The latter should have been created
        beforehand, e.g. through the `create_collection` method of a Database.
    """

    def __init__(
        self,
        database: Database,
        name: str,
        *,
        keyspace: str | None = None,
        namespace: str | None = None,
        api_options: CollectionAPIOptions | None = None,
        callers: Sequence[CallerType] = [],
        caller_name: str | None = None,
        caller_version: str | None = None,
    ) -> None:
        callers_param = check_caller_parameters(callers, caller_name, caller_version)
        keyspace_param = check_namespace_keyspace(
            keyspace=keyspace,
            namespace=namespace,
        )

        if api_options is None:
            self.api_options = CollectionAPIOptions()
        else:
            self.api_options = api_options
        _keyspace = keyspace_param if keyspace_param is not None else database.keyspace
        if _keyspace is None:
            raise ValueError("Attempted to create Collection with 'keyspace' unset.")
        self._database = database._copy(
            keyspace=_keyspace,
            callers=callers_param,
        )
        self._name = name

        additional_headers = self.api_options.embedding_api_key.get_headers()
        self._commander_headers = {
            **{DEFAULT_DATA_API_AUTH_HEADER: self._database.token_provider.get_token()},
            **additional_headers,
        }

        self.callers = callers_param
        self._api_commander = self._get_api_commander()

    def __repr__(self) -> str:
        return (
            f'{self.__class__.__name__}(name="{self.name}", '
            f'keyspace="{self.keyspace}", database={self.database}, '
            f"api_options={self.api_options})"
        )

    def __eq__(self, other: Any) -> bool:
        if isinstance(other, Collection):
            return all(
                [
                    self._api_commander == other._api_commander,
                    self.api_options == other.api_options,
                ]
            )
        else:
            return False

    def __call__(self, *pargs: Any, **kwargs: Any) -> None:
        raise TypeError(
            f"'{self.__class__.__name__}' object is not callable. If you "
            f"meant to call the '{self.name}' method on a "
            f"'{self.database.__class__.__name__}' object "
            "it is failing because no such method exists."
        )

    def _get_api_commander(self) -> APICommander:
        """Instantiate a new APICommander based on the properties of this class."""

        if self._database.keyspace is None:
            raise ValueError(
                "No keyspace specified. Collection requires a keyspace to "
                "be set, e.g. through the `keyspace` constructor parameter."
            )

        base_path_components = [
            comp
            for comp in (
                self._database.api_path.strip("/"),
                self._database.api_version.strip("/"),
                self._database.keyspace,
                self._name,
            )
            if comp != ""
        ]
        base_path = f"/{'/'.join(base_path_components)}"
        api_commander = APICommander(
            api_endpoint=self._database.api_endpoint,
            path=base_path,
            headers=self._commander_headers,
            callers=self.callers,
        )
        return api_commander

    def _copy(
        self,
        *,
        database: Database | None = None,
        name: str | None = None,
        keyspace: str | None = None,
        namespace: str | None = None,
        api_options: CollectionAPIOptions | None = None,
        callers: Sequence[CallerType] = [],
        caller_name: str | None = None,
        caller_version: str | None = None,
    ) -> Collection:
        callers_param = check_caller_parameters(callers, caller_name, caller_version)
        keyspace_param = check_namespace_keyspace(
            keyspace=keyspace,
            namespace=namespace,
        )
        return Collection(
            database=database or self.database._copy(),
            name=name or self.name,
            keyspace=keyspace_param or self.keyspace,
            api_options=self.api_options.with_override(api_options),
            callers=callers_param or self.callers,
        )

    def with_options(
        self,
        *,
        name: str | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | None = None,
        collection_max_time_ms: int | None = None,
        callers: Sequence[CallerType] = [],
        caller_name: str | None = None,
        caller_version: str | None = None,
    ) -> Collection:
        """
        Create a clone of this collection with some changed attributes.

        Args:
            name: the name of the collection. This parameter is useful to
                quickly spawn Collection instances each pointing to a different
                collection existing in the same keyspace.
            embedding_api_key: optional API key(s) for interacting with the collection.
                If an embedding service is configured, and this parameter is not None,
                each Data API call will include the necessary embedding-related headers
                as specified by this parameter. If a string is passed, it translates
                into the one "embedding api key" header
                (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
                For some vectorize providers/models, if using header-based authentication,
                specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
                should be supplied.
            collection_max_time_ms: a default timeout, in millisecond, for the duration of each
                operation on the collection. Individual timeouts can be provided to
                each collection method call and will take precedence, with this value
                being an overall default.
                Note that for some methods involving multiple API calls (such as
                `find`, `delete_many`, `insert_many` and so on), it is strongly suggested
                to provide a specific timeout as the default one likely wouldn't make
                much sense.
            callers: a list of caller identities, i.e. applications, or frameworks,
                on behalf of which the Data API calls are performed. These end up
                in the request user-agent.
                Each caller identity is a ("caller_name", "caller_version") pair.
            caller_name: *DEPRECATED*, use `callers`. Removal 2.0. Name of the
                application, or framework, on behalf of which the Data API calls
                are performed. This ends up in the request user-agent.
            caller_version: version of the caller. *DEPRECATED*, use `callers`.
                Removal 2.0.

        Returns:
            a new Collection instance.

        Example:
            >>> my_other_coll = my_coll.with_options(
            ...     name="the_other_coll",
            ...     callers=[("caller_identity", "0.1.2")],
            ... )
        """

        callers_param = check_caller_parameters(callers, caller_name, caller_version)
        _api_options = CollectionAPIOptions(
            embedding_api_key=coerce_embedding_headers_provider(embedding_api_key),
            max_time_ms=collection_max_time_ms,
        )

        return self._copy(
            name=name,
            api_options=_api_options,
            callers=callers_param,
        )

    def to_async(
        self,
        *,
        database: AsyncDatabase | None = None,
        name: str | None = None,
        keyspace: str | None = None,
        namespace: str | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | None = None,
        collection_max_time_ms: int | None = None,
        callers: Sequence[CallerType] = [],
        caller_name: str | None = None,
        caller_version: str | None = None,
    ) -> AsyncCollection:
        """
        Create an AsyncCollection from this one. Save for the arguments
        explicitly provided as overrides, everything else is kept identical
        to this collection in the copy (the database is converted into
        an async object).

        Args:
            database: an AsyncDatabase object, instantiated earlier.
                This represents the database the new collection belongs to.
            name: the collection name. This parameter should match an existing
                collection on the database.
            keyspace: this is the keyspace to which the collection belongs.
                If not specified, the database's working keyspace is used.
            namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
            embedding_api_key: optional API key(s) for interacting with the collection.
                If an embedding service is configured, and this parameter is not None,
                each Data API call will include the necessary embedding-related headers
                as specified by this parameter. If a string is passed, it translates
                into the one "embedding api key" header
                (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
                For some vectorize providers/models, if using header-based authentication,
                specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
                should be supplied.
            collection_max_time_ms: a default timeout, in millisecond, for the duration of each
                operation on the collection. Individual timeouts can be provided to
                each collection method call and will take precedence, with this value
                being an overall default.
                Note that for some methods involving multiple API calls (such as
                `find`, `delete_many`, `insert_many` and so on), it is strongly suggested
                to provide a specific timeout as the default one likely wouldn't make
                much sense.
            callers: a list of caller identities, i.e. applications, or frameworks,
                on behalf of which the Data API calls are performed. These end up
                in the request user-agent.
                Each caller identity is a ("caller_name", "caller_version") pair.
            caller_name: *DEPRECATED*, use `callers`. Removal 2.0. Name of the
                application, or framework, on behalf of which the Data API calls
                are performed. This ends up in the request user-agent.
            caller_version: version of the caller. *DEPRECATED*, use `callers`.
                Removal 2.0.

        Returns:
            the new copy, an AsyncCollection instance.

        Example:
            >>> asyncio.run(my_coll.to_async().count_documents({},upper_bound=100))
            77
        """

        callers_param = check_caller_parameters(callers, caller_name, caller_version)
        keyspace_param = check_namespace_keyspace(
            keyspace=keyspace,
            namespace=namespace,
        )
        _api_options = CollectionAPIOptions(
            embedding_api_key=coerce_embedding_headers_provider(embedding_api_key),
            max_time_ms=collection_max_time_ms,
        )

        return AsyncCollection(
            database=database or self.database.to_async(),
            name=name or self.name,
            keyspace=keyspace_param or self.keyspace,
            api_options=self.api_options.with_override(_api_options),
            callers=callers_param or self.callers,
        )

    @deprecation.deprecated(  # type: ignore[misc]
        deprecated_in="1.5.1",
        removed_in="2.0.0",
        current_version=__version__,
        details=SET_CALLER_DEPRECATION_NOTICE,
    )
    def set_caller(
        self,
        caller_name: str | None = None,
        caller_version: str | None = None,
    ) -> None:
        """
        Set a new identity for the application/framework on behalf of which
        the Data API calls are performed (the "caller").

        Args:
            caller_name: name of the application, or framework, on behalf of which
                the Data API calls are performed. This ends up in the request user-agent.
            caller_version: version of the caller.

        Example:
            >>> my_coll.set_caller(caller_name="the_caller", caller_version="0.1.0")
        """

        logger.info(f"setting caller to {caller_name}/{caller_version}")
        callers_param = check_caller_parameters([], caller_name, caller_version)
        self.callers = callers_param or self.callers
        self._api_commander = self._get_api_commander()

    def options(self, *, max_time_ms: int | None = None) -> CollectionOptions:
        """
        Get the collection options, i.e. its configuration as read from the database.

        The method issues a request to the Data API each time is invoked,
        without caching mechanisms: this ensures up-to-date information
        for usages such as real-time collection validation by the application.

        Args:
            max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
                If not passed, the collection-level setting is used instead.

        Returns:
            a CollectionOptions instance describing the collection.
            (See also the database `list_collections` method.)

        Example:
            >>> my_coll.options()
            CollectionOptions(vector=CollectionVectorOptions(dimension=3, metric='cosine'))
        """

        _max_time_ms = max_time_ms or self.api_options.max_time_ms
        logger.info(f"getting collections in search of '{self.name}'")
        self_descriptors = [
            coll_desc
            for coll_desc in self.database.list_collections(max_time_ms=_max_time_ms)
            if coll_desc.name == self.name
        ]
        logger.info(f"finished getting collections in search of '{self.name}'")
        if self_descriptors:
            return self_descriptors[0].options
        else:
            raise CollectionNotFoundException(
                text=f"Collection {self.keyspace}.{self.name} not found.",
                keyspace=self.keyspace,
                collection_name=self.name,
            )

    def info(self) -> CollectionInfo:
        """
        Information on the collection (name, location, database), in the
        form of a CollectionInfo object.

        Not to be confused with the collection `options` method (related
        to the collection internal configuration).

        Example:
            >>> my_coll.info().database_info.region
            'eu-west-1'
            >>> my_coll.info().full_name
            'default_keyspace.my_v_collection'

        Note:
            the returned CollectionInfo wraps, among other things,
            the database information: as such, calling this method
            triggers the same-named method of a Database object (which, in turn,
            performs a HTTP request to the DevOps API).
            See the documentation for `Database.info()` for more details.
        """

        return CollectionInfo(
            database_info=self.database.info(),
            keyspace=self.keyspace,
            namespace=self.keyspace,
            name=self.name,
            full_name=self.full_name,
        )

    @property
    def database(self) -> Database:
        """
        a Database object, the database this collection belongs to.

        Example:
            >>> my_coll.database.name
            'the_application_database'
        """

        return self._database

    @property
    def namespace(self) -> str:
        """
        The namespace this collection is in.

        *DEPRECATED* (removal in 2.0). Switch to the "keyspace" property.**

        Example:
            >>> my_coll.namespace
            'default_keyspace'
        """

        the_warning = deprecation.DeprecatedWarning(
            "the 'namespace' property",
            deprecated_in="1.5.0",
            removed_in="2.0.0",
            details=NAMESPACE_DEPRECATION_NOTICE_METHOD,
        )
        warnings.warn(the_warning, stacklevel=2)

        return self.keyspace

    @property
    def keyspace(self) -> str:
        """
        The keyspace this collection is in.

        Example:
            >>> my_coll.keyspace
            'default_keyspace'
        """

        _keyspace = self.database.keyspace
        if _keyspace is None:
            raise ValueError("The collection's DB is set with keyspace=None")
        return _keyspace

    @property
    def name(self) -> str:
        """
        The name of this collection.

        Example:
            >>> my_coll.name
            'my_v_collection'
        """

        return self._name

    @property
    def full_name(self) -> str:
        """
        The fully-qualified collection name within the database,
        in the form "keyspace.collection_name".

        Example:
            >>> my_coll.full_name
            'default_keyspace.my_v_collection'
        """

        return f"{self.keyspace}.{self.name}"

    def insert_one(
        self,
        document: DocumentType,
        *,
        vector: VectorType | None = None,
        vectorize: str | None = None,
        max_time_ms: int | None = None,
    ) -> InsertOneResult:
        """
        Insert a single document in the collection in an atomic operation.

        Args:
            document: the dictionary expressing the document to insert.
                The `_id` field of the document can be left out, in which
                case it will be created automatically.
            vector: a vector (a list of numbers appropriate for the collection)
                for the document. Passing this parameter is equivalent to
                providing a `$vector` field within the document itself,
                however the two are mutually exclusive.
                *DEPRECATED* (removal in 2.0). Use a `$vector` key in the document instead.
            vectorize: a string to be made into a vector, if such a service
                is configured for the collection. Passing this parameter is
                equivalent to providing a `$vectorize` field in the document itself,
                however the two are mutually exclusive.
                Moreover, this parameter cannot coexist with `vector`.
                *DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the document instead.
            max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
                If not passed, the collection-level setting is used instead.

        Returns:
            an InsertOneResult object.

        Examples:
            >>> my_coll.count_documents({}, upper_bound=10)
            0
            >>> my_coll.insert_one(
            ...     {
            ...         "age": 30,
            ...         "name": "Smith",
            ...         "food": ["pear", "peach"],
            ...         "likes_fruit": True,
            ...     },
            ... )
            InsertOneResult(raw_results=..., inserted_id='ed4587a4-...-...-...')
            >>> my_coll.insert_one({"_id": "user-123", "age": 50, "name": "Maccio"})
            InsertOneResult(raw_results=..., inserted_id='user-123')
            >>> my_coll.count_documents({}, upper_bound=10)
            2

            >>> my_coll.insert_one({"tag": "v", "$vector": [10, 11]})
            InsertOneResult(...)

        Note:
            If an `_id` is explicitly provided, which corresponds to a document
            that exists already in the collection, an error is raised and
            the insertion fails.
        """

        check_deprecated_vector_ize(
            vector=vector, vectors=None, vectorize=vectorize, kind="insert"
        )
        _document = _collate_vector_to_document(document, vector, vectorize)
        _max_time_ms = max_time_ms or self.api_options.max_time_ms
        io_payload = {"insertOne": {"document": _document}}
        logger.info(f"insertOne on '{self.name}'")
        io_response = self._api_commander.request(
            payload=io_payload,
            timeout_info=base_timeout_info(_max_time_ms),
        )
        logger.info(f"finished insertOne on '{self.name}'")
        if "insertedIds" in io_response.get("status", {}):
            if io_response["status"]["insertedIds"]:
                inserted_id = io_response["status"]["insertedIds"][0]
                return InsertOneResult(
                    raw_results=[io_response],
                    inserted_id=inserted_id,
                )
            else:
                raise DataAPIFaultyResponseException(
                    text="Faulty response from insert_one API command.",
                    raw_response=io_response,
                )
        else:
            raise DataAPIFaultyResponseException(
                text="Faulty response from insert_one API command.",
                raw_response=io_response,
            )

    def insert_many(
        self,
        documents: Iterable[DocumentType],
        *,
        vectors: Iterable[VectorType | None] | None = None,
        vectorize: Iterable[str | None] | None = None,
        ordered: bool = False,
        chunk_size: int | None = None,
        concurrency: int | None = None,
        max_time_ms: int | None = None,
    ) -> InsertManyResult:
        """
        Insert a list of documents into the collection.
        This is not an atomic operation.

        Args:
            documents: an iterable of dictionaries, each a document to insert.
                Documents may specify their `_id` field or leave it out, in which
                case it will be added automatically.
            vectors: an optional list of vectors (as many vectors as the provided
                documents) to associate to the documents when inserting.
                Passing vectors this way is indeed equivalent to the "$vector" field
                of the documents, however the two are mutually exclusive.
                *DEPRECATED* (removal in 2.0). Use a `$vector` key in the documents instead.
            vectorize: an optional list of strings to be made into as many vectors
                (one per document), if such a service is configured for the collection.
                Passing this parameter is equivalent to providing a `$vectorize`
                field in the documents themselves, however the two are mutually exclusive.
                *DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the documents instead.
            ordered: if False (default), the insertions can occur in arbitrary order
                and possibly concurrently. If True, they are processed sequentially.
                If there are no specific reasons against it, unordered insertions are to
                be preferred as they complete much faster.
            chunk_size: how many documents to include in a single API request.
                Exceeding the server maximum allowed value results in an error.
                Leave it unspecified (recommended) to use the system default.
            concurrency: maximum number of concurrent requests to the API at
                a given time. It cannot be more than one for ordered insertions.
            max_time_ms: a timeout, in milliseconds, for the operation.
                If not passed, the collection-level setting is used instead:
                If many documents are being inserted, this method corresponds
                to several HTTP requests: in such cases one may want to specify
                a more tolerant timeout here.

        Returns:
            an InsertManyResult object.

        Examples:
            >>> my_coll.count_documents({}, upper_bound=10)
            0
            >>> my_coll.insert_many(
            ...     [{"a": 10}, {"a": 5}, {"b": [True, False, False]}],
            ...     ordered=True,
            ... )
            InsertManyResult(raw_results=..., inserted_ids=['184bb06f-...', '...', '...'])
            >>> my_coll.count_documents({}, upper_bound=100)
            3
            >>> my_coll.insert_many(
            ...     [{"seq": i} for i in range(50)],
            ...     concurrency=5,
            ... )
            InsertManyResult(raw_results=..., inserted_ids=[... ...])
            >>> my_coll.count_documents({}, upper_bound=100)
            53
            >>> my_coll.insert_many(
            ...     [
            ...         {"tag": "a", "$vector": [1, 2]},
            ...         {"tag": "b", "$vector": [3, 4]},
            ...     ]
            ... )
            InsertManyResult(...)

        Note:
            Unordered insertions are executed with some degree of concurrency,
            so it is usually better to prefer this mode unless the order in the
            document sequence is important.

        Note:
            A failure mode for this command is related to certain faulty documents
            found among those to insert: a document may have the an `_id` already
            present on the collection, or its vector dimension may not
            match the collection setting.

            For an ordered insertion, the method will raise an exception at
            the first such faulty document -- nevertheless, all documents processed
            until then will end up being written to the database.

            For unordered insertions, if the error stems from faulty documents
            the insertion proceeds until exhausting the input documents: then,
            an exception is raised -- and all insertable documents will have been
            written to the database, including those "after" the troublesome ones.

            If, on the other hand, there are errors not related to individual
            documents (such as a network connectivity error), the whole
            `insert_many` operation will stop in mid-way, an exception will be raised,
            and only a certain amount of the input documents will
            have made their way to the database.
        """

        check_deprecated_vector_ize(
            vector=None, vectors=vectors, vectorize=vectorize, kind="insert"
        )
        if concurrency is None:
            if ordered:
                _concurrency = 1
            else:
                _concurrency = DEFAULT_INSERT_MANY_CONCURRENCY
        else:
            _concurrency = concurrency
        if _concurrency > 1 and ordered:
            raise ValueError("Cannot run ordered insert_many concurrently.")
        if chunk_size is None:
            _chunk_size = DEFAULT_INSERT_MANY_CHUNK_SIZE
        else:
            _chunk_size = chunk_size
        _documents = _collate_vectors_to_documents(documents, vectors, vectorize)
        _max_time_ms = max_time_ms or self.api_options.max_time_ms
        logger.info(f"inserting {len(_documents)} documents in '{self.name}'")
        raw_results: list[dict[str, Any]] = []
        timeout_manager = MultiCallTimeoutManager(overall_max_time_ms=_max_time_ms)
        if ordered:
            options = {"ordered": True}
            inserted_ids: list[Any] = []
            for i in range(0, len(_documents), _chunk_size):
                im_payload = {
                    "insertMany": {
                        "documents": _documents[i : i + _chunk_size],
                        "options": options,
                    },
                }
                logger.info(f"insertMany on '{self.name}'")
                chunk_response = self._api_commander.request(
                    payload=im_payload,
                    raise_api_errors=False,
                    timeout_info=timeout_manager.remaining_timeout_info(),
                )
                logger.info(f"finished insertMany on '{self.name}'")
                # accumulate the results in this call
                chunk_inserted_ids = (chunk_response.get("status") or {}).get(
                    "insertedIds", []
                )
                inserted_ids += chunk_inserted_ids
                raw_results += [chunk_response]
                # if errors, quit early
                if chunk_response.get("errors", []):
                    partial_result = InsertManyResult(
                        raw_results=raw_results,
                        inserted_ids=inserted_ids,
                    )
                    raise InsertManyException.from_response(
                        command=None,
                        raw_response=chunk_response,
                        partial_result=partial_result,
                    )

            # return
            full_result = InsertManyResult(
                raw_results=raw_results,
                inserted_ids=inserted_ids,
            )
            logger.info(
                f"finished inserting {len(_documents)} documents in '{self.name}'"
            )
            return full_result

        else:
            # unordered: concurrent or not, do all of them and parse the results
            options = {"ordered": False}
            if _concurrency > 1:
                with ThreadPoolExecutor(max_workers=_concurrency) as executor:

                    def _chunk_insertor(
                        document_chunk: list[dict[str, Any]],
                    ) -> dict[str, Any]:
                        im_payload = {
                            "insertMany": {
                                "documents": document_chunk,
                                "options": options,
                            },
                        }
                        logger.info(f"insertMany(chunk) on '{self.name}'")
                        im_response = self._api_commander.request(
                            payload=im_payload,
                            raise_api_errors=False,
                            timeout_info=timeout_manager.remaining_timeout_info(),
                        )
                        logger.info(f"finished insertMany(chunk) on '{self.name}'")
                        return im_response

                    raw_results = list(
                        executor.map(
                            _chunk_insertor,
                            (
                                _documents[i : i + _chunk_size]
                                for i in range(0, len(_documents), _chunk_size)
                            ),
                        )
                    )
            else:
                for i in range(0, len(_documents), _chunk_size):
                    im_payload = {
                        "insertMany": {
                            "documents": _documents[i : i + _chunk_size],
                            "options": options,
                        },
                    }
                    logger.info(f"insertMany(chunk) on '{self.name}'")
                    im_response = self._api_commander.request(
                        payload=im_payload,
                        raise_api_errors=False,
                        timeout_info=timeout_manager.remaining_timeout_info(),
                    )
                    logger.info(f"finished insertMany(chunk) on '{self.name}'")
                    raw_results.append(im_response)
            # recast raw_results
            inserted_ids = [
                inserted_id
                for chunk_response in raw_results
                for inserted_id in (chunk_response.get("status") or {}).get(
                    "insertedIds", []
                )
            ]

            # check-raise
            if any(
                [chunk_response.get("errors", []) for chunk_response in raw_results]
            ):
                partial_result = InsertManyResult(
                    raw_results=raw_results,
                    inserted_ids=inserted_ids,
                )
                raise InsertManyException.from_responses(
                    commands=[None for _ in raw_results],
                    raw_responses=raw_results,
                    partial_result=partial_result,
                )

            # return
            full_result = InsertManyResult(
                raw_results=raw_results,
                inserted_ids=inserted_ids,
            )
            logger.info(
                f"finished inserting {len(_documents)} documents in '{self.name}'"
            )
            return full_result

    def find(
        self,
        filter: FilterType | None = None,
        *,
        projection: ProjectionType | None = None,
        skip: int | None = None,
        limit: int | None = None,
        vector: VectorType | None = None,
        vectorize: str | None = None,
        include_similarity: bool | None = None,
        include_sort_vector: bool | None = None,
        sort: SortType | None = None,
        max_time_ms: int | None = None,
    ) -> Cursor:
        """
        Find documents on the collection, matching a certain provided filter.

        The method returns a Cursor that can then be iterated over. Depending
        on the method call pattern, the iteration over all documents can reflect
        collection mutations occurred since the `find` method was called, or not.
        In cases where the cursor reflects mutations in real-time, it will iterate
        over cursors in an approximate way (i.e. exhibiting occasional skipped
        or duplicate documents). This happens when making use of the `sort`
        option in a non-vector-search manner.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            projection: it controls which parts of the document are returned.
                It can be an allow-list: `{"f1": True, "f2": True}`,
                or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
                (except for the `_id` and other special fields, which can be
                associated to both True or False independently of the rest
                of the specification).
                The special star-projections `{"*": True}` and `{"*": False}`
                have the effect of returning the whole document and `{}` respectively.
                For lists in documents, slice directives can be passed to select
                portions of the list: for instance, `{"array": {"$slice": 2}}`,
                `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
                `{"array": {"$slice": [-4, 2]}}`.
                An iterable over strings will be treated implicitly as an allow-list.
                The default projection (used if this parameter is not passed) does not
                necessarily include "special" fields such as `$vector` or `$vectorize`.
                See the Data API documentation for more on projections.
            skip: with this integer parameter, what would be the first `skip`
                documents returned by the query are discarded, and the results
                start from the (skip+1)-th document.
                This parameter can be used only in conjunction with an explicit
                `sort` criterion of the ascending/descending type (i.e. it cannot
                be used when not sorting, nor with vector-based ANN search).
            limit: this (integer) parameter sets a limit over how many documents
                are returned. Once `limit` is reached (or the cursor is exhausted
                for lack of matching documents), nothing more is returned.
            vector: a suitable vector, i.e. a list of float numbers of the appropriate
                dimensionality, to perform vector search (i.e. ANN,
                or "approximate nearest-neighbours" search).
                When running similarity search on a collection, no other sorting
                criteria can be specified. Moreover, there is an upper bound
                to the number of documents that can be returned. For details,
                see the Note about upper bounds and the Data API documentation.
                *DEPRECATED* (removal in 2.0). Use a `$vector` key in the
                sort clause dict instead.
            vectorize: a string to be made into a vector to perform vector search.
                This can be supplied in (exclusive) alternative to `vector`,
                provided such a service is configured for the collection,
                and achieves the same effect.
                *DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the
                sort clause dict instead.
            include_similarity: a boolean to request the numeric value of the
                similarity to be returned as an added "$similarity" key in each
                returned document. Can only be used for vector ANN search, i.e.
                when either `vector` is supplied or the `sort` parameter has the
                shape {"$vector": ...}.
            include_sort_vector: a boolean to request query vector used in this search.
                If set to True (and if the invocation is a vector search), calling
                the `get_sort_vector` method on the returned cursor will yield
                the vector used for the ANN search.
            sort: with this dictionary parameter one can control the order
                the documents are returned. See the Note about sorting, as well as
                the one about upper bounds, for details.
                Vector-based ANN sorting is achieved by providing a "$vector"
                or a "$vectorize" key in `sort`.
            max_time_ms: a timeout, in milliseconds, for each single one
                of the underlying HTTP requests used to fetch documents as the
                cursor is iterated over.
                If not passed, the collection-level setting is used instead.

        Returns:
            a Cursor object representing iterations over the matching documents
            (see the Cursor object for how to use it. The simplest thing is to
            run a for loop: `for document in collection.sort(...):`).

        Examples:
            >>> filter = {"seq": {"$exists": True}}
            >>> for doc in my_coll.find(filter, projection={"seq": True}, limit=5):
            ...     print(doc["seq"])
            ...
            37
            35
            10
            36
            27
            >>> cursor1 = my_coll.find(
            ...     {},
            ...     limit=4,
            ...     sort={"seq": astrapy.constants.SortDocuments.DESCENDING},
            ... )
            >>> [doc["_id"] for doc in cursor1]
            ['97e85f81-...', '1581efe4-...', '...', '...']
            >>> cursor2 = my_coll.find({}, limit=3)
            >>> cursor2.distinct("seq")
            [37, 35, 10]

            >>> my_coll.insert_many([
            ...     {"tag": "A", "$vector": [4, 5]},
            ...     {"tag": "B", "$vector": [3, 4]},
            ...     {"tag": "C", "$vector": [3, 2]},
            ...     {"tag": "D", "$vector": [4, 1]},
            ...     {"tag": "E", "$vector": [2, 5]},
            ... ])
            >>> ann_tags = [
            ...     document["tag"]
            ...     for document in my_coll.find(
            ...         {},
            ...         sort={"$vector": [3, 3]},
            ...         limit=3,
            ...     )
            ... ]
            >>> ann_tags
            ['A', 'B', 'C']
            >>> # (assuming the collection has metric VectorMetric.COSINE)

            >>> cursor = my_coll.find(
            ...     sort={"$vector": [3, 3]},
            ...     limit=3,
            ...     include_sort_vector=True,
            ... )
            >>> cursor.get_sort_vector()
            [3.0, 3.0]
            >>> matches = list(cursor)
            >>> cursor.get_sort_vector()
            [3.0, 3.0]

        Note:
            The following are example values for the `sort` parameter.
            When no particular order is required:
                sort={}  # (default when parameter not provided)
            When sorting by a certain value in ascending/descending order:
                sort={"field": SortDocuments.ASCENDING}
                sort={"field": SortDocuments.DESCENDING}
            When sorting first by "field" and then by "subfield"
            (while modern Python versions preserve the order of dictionaries,
            it is suggested for clarity to employ a `collections.OrderedDict`
            in these cases):
                sort={
                    "field": SortDocuments.ASCENDING,
                    "subfield": SortDocuments.ASCENDING,
                }
            When running a vector similarity (ANN) search:
                sort={"$vector": [0.4, 0.15, -0.5]}

        Note:
            Some combinations of arguments impose an implicit upper bound on the
            number of documents that are returned by the Data API. More specifically:
            (a) Vector ANN searches cannot return more than a number of documents
            that at the time of writing is set to 1000 items.
            (b) When using a sort criterion of the ascending/descending type,
            the Data API will return a smaller number of documents, set to 20
            at the time of writing, and stop there. The returned documents are
            the top results across the whole collection according to the requested
            criterion.
            These provisions should be kept in mind even when subsequently running
            a command such as `.distinct()` on a cursor.

        Note:
            When not specifying sorting criteria at all (by vector or otherwise),
            the cursor can scroll through an arbitrary number of documents as
            the Data API and the client periodically exchange new chunks of documents.
            It should be noted that the behavior of the cursor in the case documents
            have been added/removed after the `find` was started depends on database
            internals and it is not guaranteed, nor excluded, that such "real-time"
            changes in the data would be picked up by the cursor.
        """

        check_deprecated_vector_ize(
            vector=vector, vectors=None, vectorize=vectorize, kind="find"
        )
        _sort = _collate_vector_to_sort(sort, vector, vectorize)
        _max_time_ms = max_time_ms or self.api_options.max_time_ms
        if include_similarity is not None and not _is_vector_sort(_sort):
            raise ValueError(
                "Cannot use `include_similarity` unless for vector search."
            )
        return (
            Cursor(
                collection=self,
                filter=filter,
                projection=projection,
                max_time_ms=_max_time_ms,
                overall_max_time_ms=None,
            )
            .skip(skip)
            .limit(limit)
            .sort(_sort)
            .include_similarity(include_similarity)
            .include_sort_vector(include_sort_vector)
        )

    def find_one(
        self,
        filter: FilterType | None = None,
        *,
        projection: ProjectionType | None = None,
        vector: VectorType | None = None,
        vectorize: str | None = None,
        include_similarity: bool | None = None,
        sort: SortType | None = None,
        max_time_ms: int | None = None,
    ) -> DocumentType | None:
        """
        Run a search, returning the first document in the collection that matches
        provided filters, if any is found.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            projection: it controls which parts of the document are returned.
                It can be an allow-list: `{"f1": True, "f2": True}`,
                or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
                (except for the `_id` and other special fields, which can be
                associated to both True or False independently of the rest
                of the specification).
                The special star-projections `{"*": True}` and `{"*": False}`
                have the effect of returning the whole document and `{}` respectively.
                For lists in documents, slice directives can be passed to select
                portions of the list: for instance, `{"array": {"$slice": 2}}`,
                `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
                `{"array": {"$slice": [-4, 2]}}`.
                An iterable over strings will be treated implicitly as an allow-list.
                The default projection (used if this parameter is not passed) does not
                necessarily include "special" fields such as `$vector` or `$vectorize`.
                See the Data API documentation for more on projections.
            vector: a suitable vector, i.e. a list of float numbers of the appropriate
                dimensionality, to perform vector search (i.e. ANN,
                or "approximate nearest-neighbours" search), extracting the most
                similar document in the collection matching the filter.
                *DEPRECATED* (removal in 2.0). Use a `$vector` key in the
                sort clause dict instead.
            vectorize: a string to be made into a vector to perform vector search.
                Using vectorize assumes a suitable service is configured for the collection.
                *DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the
                sort clause dict instead.
            include_similarity: a boolean to request the numeric value of the
                similarity to be returned as an added "$similarity" key in the
                returned document. Can only be used for vector ANN search, i.e.
                when either `vector` is supplied or the `sort` parameter has the
                shape {"$vector": ...}.
            sort: with this dictionary parameter one can control the order
                the documents are returned. See the Note about sorting for details.
                Vector-based ANN sorting is achieved by providing a "$vector"
                or a "$vectorize" key in `sort`.
            max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
                If not passed, the collection-level setting is used instead.

        Returns:
            a dictionary expressing the required document, otherwise None.

        Examples:
            >>> my_coll.find_one({})
            {'_id': '68d1e515-...', 'seq': 37}
            >>> my_coll.find_one({"seq": 10})
            {'_id': 'd560e217-...', 'seq': 10}
            >>> my_coll.find_one({"seq": 1011})
            >>> # (returns None for no matches)
            >>> my_coll.find_one({}, projection={"seq": False})
            {'_id': '68d1e515-...'}
            >>> my_coll.find_one(
            ...     {},
            ...     sort={"seq": astrapy.constants.SortDocuments.DESCENDING},
            ... )
            {'_id': '97e85f81-...', 'seq': 69}
            >>> my_coll.find_one({}, sort={"$vector": [1, 0]}, projection={"*": True})
            {'_id': '...', 'tag': 'D', '$vector': [4.0, 1.0]}

        Note:
            See the `find` method for more details on the accepted parameters
            (whereas `skip` and `limit` are not valid parameters for `find_one`).
        """

        check_deprecated_vector_ize(
            vector=vector, vectors=None, vectorize=vectorize, kind="find"
        )
        _max_time_ms = max_time_ms or self.api_options.max_time_ms
        fo_cursor = self.find(
            filter=filter,
            projection=projection,
            skip=None,
            limit=1,
            vector=vector,
            vectorize=vectorize,
            include_similarity=include_similarity,
            sort=sort,
            max_time_ms=_max_time_ms,
        )
        try:
            document = fo_cursor.__next__()
            return document
        except StopIteration:
            return None

    def distinct(
        self,
        key: str,
        *,
        filter: FilterType | None = None,
        max_time_ms: int | None = None,
    ) -> list[Any]:
        """
        Return a list of the unique values of `key` across the documents
        in the collection that match the provided filter.

        Args:
            key: the name of the field whose value is inspected across documents.
                Keys can use dot-notation to descend to deeper document levels.
                Example of acceptable `key` values:
                    "field"
                    "field.subfield"
                    "field.3"
                    "field.3.subfield"
                If lists are encountered and no numeric index is specified,
                all items in the list are visited.
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            max_time_ms: a timeout, in milliseconds, with the same meaning as for `find`.
                If not passed, the collection-level setting is used instead.

        Returns:
            a list of all different values for `key` found across the documents
            that match the filter. The result list has no repeated items.

        Example:
            >>> my_coll.insert_many(
            ...     [
            ...         {"name": "Marco", "food": ["apple", "orange"], "city": "Helsinki"},
            ...         {"name": "Emma", "food": {"likes_fruit": True, "allergies": []}},
            ...     ]
            ... )
            InsertManyResult(raw_results=..., inserted_ids=['c5b99f37-...', 'd6416321-...'])
            >>> my_coll.distinct("name")
            ['Marco', 'Emma']
            >>> my_coll.distinct("city")
            ['Helsinki']
            >>> my_coll.distinct("food")
            ['apple', 'orange', {'likes_fruit': True, 'allergies': []}]
            >>> my_coll.distinct("food.1")
            ['orange']
            >>> my_coll.distinct("food.allergies")
            []
            >>> my_coll.distinct("food.likes_fruit")
            [True]

        Note:
            It must be kept in mind that `distinct` is a client-side operation,
            which effectively browses all required documents using the logic
            of the `find` method and collects the unique values found for `key`.
            As such, there may be performance, latency and ultimately
            billing implications if the amount of matching documents is large.

        Note:
            For details on the behaviour of "distinct" in conjunction with
            real-time changes in the collection contents, see the
            Note of the `find` command.
        """

        _max_time_ms = max_time_ms or self.api_options.max_time_ms
        f_cursor = Cursor(
            collection=self,
            filter=filter,
            projection={key: True},
            max_time_ms=None,
            overall_max_time_ms=_max_time_ms,
        )
        return f_cursor.distinct(key)

    def count_documents(
        self,
        filter: FilterType,
        *,
        upper_bound: int,
        max_time_ms: int | None = None,
    ) -> int:
        """
        Count the documents in the collection matching the specified filter.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            upper_bound: a required ceiling on the result of the count operation.
                If the actual number of documents exceeds this value,
                an exception will be raised.
                Furthermore, if the actual number of documents exceeds the maximum
                count that the Data API can reach (regardless of upper_bound),
                an exception will be raised.
            max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
                If not passed, the collection-level setting is used instead.

        Returns:
            the exact count of matching documents.

        Example:
            >>> my_coll.insert_many([{"seq": i} for i in range(20)])
            InsertManyResult(...)
            >>> my_coll.count_documents({}, upper_bound=100)
            20
            >>> my_coll.count_documents({"seq":{"$gt": 15}}, upper_bound=100)
            4
            >>> my_coll.count_documents({}, upper_bound=10)
            Traceback (most recent call last):
                ... ...
            astrapy.exceptions.TooManyDocumentsToCountException

        Note:
            Count operations are expensive: for this reason, the best practice
            is to provide a reasonable `upper_bound` according to the caller
            expectations. Moreover, indiscriminate usage of count operations
            for sizeable amounts of documents (i.e. in the thousands and more)
            is discouraged in favor of alternative application-specific solutions.
            Keep in mind that the Data API has a hard upper limit on the amount
            of documents it will count, and that an exception will be thrown
            by this method if this limit is encountered.
        """

        _max_time_ms = max_time_ms or self.api_options.max_time_ms
        cd_payload = {"countDocuments": {"filter": filter}}
        logger.info(f"countDocuments on '{self.name}'")
        cd_response = self._api_commander.request(
            payload=cd_payload,
            timeout_info=base_timeout_info(_max_time_ms),
        )
        logger.info(f"finished countDocuments on '{self.name}'")
        if "count" in cd_response.get("status", {}):
            count: int = cd_response["status"]["count"]
            if cd_response["status"].get("moreData", False):
                raise TooManyDocumentsToCountException(
                    text=f"Document count exceeds {count}, the maximum allowed by the server",
                    server_max_count_exceeded=True,
                )
            else:
                if count > upper_bound:
                    raise TooManyDocumentsToCountException(
                        text="Document count exceeds required upper bound",
                        server_max_count_exceeded=False,
                    )
                else:
                    return count
        else:
            raise DataAPIFaultyResponseException(
                text="Faulty response from count_documents API command.",
                raw_response=cd_response,
            )

    def estimated_document_count(
        self,
        *,
        max_time_ms: int | None = None,
    ) -> int:
        """
        Query the API server for an estimate of the document count in the collection.

        Contrary to `count_documents`, this method has no filtering parameters.

        Args:
            max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
                If not passed, the collection-level setting is used instead.

        Returns:
            a server-provided estimate count of the documents in the collection.

        Example:
            >>> my_coll.estimated_document_count()
            35700
        """
        _max_time_ms = max_time_ms or self.api_options.max_time_ms
        ed_payload: dict[str, Any] = {"estimatedDocumentCount": {}}
        logger.info(f"estimatedDocumentCount on '{self.name}'")
        ed_response = self._api_commander.request(
            payload=ed_payload,
            timeout_info=base_timeout_info(_max_time_ms),
        )
        logger.info(f"finished estimatedDocumentCount on '{self.name}'")
        if "count" in ed_response.get("status", {}):
            count: int = ed_response["status"]["count"]
            return count
        else:
            raise DataAPIFaultyResponseException(
                text="Faulty response from estimated_document_count API command.",
                raw_response=ed_response,
            )

    def find_one_and_replace(
        self,
        filter: FilterType,
        replacement: DocumentType,
        *,
        projection: ProjectionType | None = None,
        vector: VectorType | None = None,
        vectorize: str | None = None,
        sort: SortType | None = None,
        upsert: bool = False,
        return_document: str = ReturnDocument.BEFORE,
        max_time_ms: int | None = None,
    ) -> DocumentType | None:
        """
        Find a document on the collection and replace it entirely with a new one,
        optionally inserting a new one if no match is found.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            replacement: the new document to write into the collection.
            projection: it controls which parts of the document are returned.
                It can be an allow-list: `{"f1": True, "f2": True}`,
                or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
                (except for the `_id` and other special fields, which can be
                associated to both True or False independently of the rest
                of the specification).
                The special star-projections `{"*": True}` and `{"*": False}`
                have the effect of returning the whole document and `{}` respectively.
                For lists in documents, slice directives can be passed to select
                portions of the list: for instance, `{"array": {"$slice": 2}}`,
                `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
                `{"array": {"$slice": [-4, 2]}}`.
                An iterable over strings will be treated implicitly as an allow-list.
                The default projection (used if this parameter is not passed) does not
                necessarily include "special" fields such as `$vector` or `$vectorize`.
                See the Data API documentation for more on projections.
            vector: a suitable vector, i.e. a list of float numbers of the appropriate
                dimensionality, to use vector search (i.e. ANN,
                or "approximate nearest-neighbours" search), as the sorting criterion.
                In this way, the matched document (if any) will be the one
                that is most similar to the provided vector.
                *DEPRECATED* (removal in 2.0). Use a `$vector` key in the
                sort clause dict instead.
            vectorize: a string to be made into a vector to perform vector search.
                Using vectorize assumes a suitable service is configured for the collection.
                *DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the
                sort clause dict instead.
            sort: with this dictionary parameter one can control the sorting
                order of the documents matching the filter, effectively
                determining what document will come first and hence be the
                replaced one. See the `find` method for more on sorting.
                Vector-based ANN sorting is achieved by providing a "$vector"
                or a "$vectorize" key in `sort`.
            upsert: this parameter controls the behavior in absence of matches.
                If True, `replacement` is inserted as a new document
                if no matches are found on the collection. If False,
                the operation silently does nothing in case of no matches.
            return_document: a flag controlling what document is returned:
                if set to `ReturnDocument.BEFORE`, or the string "before",
                the document found on database is returned; if set to
                `ReturnDocument.AFTER`, or the string "after", the new
                document is returned. The default is "before".
            max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
                If not passed, the collection-level setting is used instead.

        Returns:
            A document (or a projection thereof, as required), either the one
            before the replace operation or the one after that.
            Alternatively, the method returns None to represent
            that no matching document was found, or that no replacement
            was inserted (depending on the `return_document` parameter).

        Example:
            >>> my_coll.insert_one({"_id": "rule1", "text": "all animals are equal"})
            InsertOneResult(...)
            >>> my_coll.find_one_and_replace(
            ...     {"_id": "rule1"},
            ...     {"text": "some animals are more equal!"},
            ... )
            {'_id': 'rule1', 'text': 'all animals are equal'}
            >>> my_coll.find_one_and_replace(
            ...     {"text": "some animals are more equal!"},
            ...     {"text": "and the pigs are the rulers"},
            ...     return_document=astrapy.constants.ReturnDocument.AFTER,
            ... )
            {'_id': 'rule1', 'text': 'and the pigs are the rulers'}
            >>> my_coll.find_one_and_replace(
            ...     {"_id": "rule2"},
            ...     {"text": "F=ma^2"},
            ...     return_document=astrapy.constants.ReturnDocument.AFTER,
            ... )
            >>> # (returns None for no matches)
            >>> my_coll.find_one_and_replace(
            ...     {"_id": "rule2"},
            ...     {"text": "F=ma"},
            ...     upsert=True,
            ...     return_document=astrapy.constants.ReturnDocument.AFTER,
            ...     projection={"_id": False},
            ... )
            {'text': 'F=ma'}
        """

        check_deprecated_vector_ize(
            vector=vector, vectors=None, vectorize=vectorize, kind="find"
        )
        _sort = _collate_vector_to_sort(sort, vector, vectorize)
        options = {
            "returnDocument": return_document,
            "upsert": upsert,
        }
        _max_time_ms = max_time_ms or self.api_options.max_time_ms
        fo_payload = {
            "findOneAndReplace": {
                k: v
                for k, v in {
                    "filter": filter,
                    "projection": normalize_optional_projection(projection),
                    "replacement": replacement,
                    "options": options,
                    "sort": _sort,
                }.items()
                if v is not None
            }
        }
        logger.info(f"findOneAndReplace on '{self.name}'")
        fo_response = self._api_commander.request(
            payload=fo_payload,
            timeout_info=base_timeout_info(_max_time_ms),
        )
        logger.info(f"finished findOneAndReplace on '{self.name}'")
        if "document" in fo_response.get("data", {}):
            ret_document = fo_response.get("data", {}).get("document")
            if ret_document is None:
                return None
            else:
                return ret_document  # type: ignore[no-any-return]
        else:
            raise DataAPIFaultyResponseException(
                text="Faulty response from find_one_and_replace API command.",
                raw_response=fo_response,
            )

    def replace_one(
        self,
        filter: FilterType,
        replacement: DocumentType,
        *,
        vector: VectorType | None = None,
        vectorize: str | None = None,
        sort: SortType | None = None,
        upsert: bool = False,
        max_time_ms: int | None = None,
    ) -> UpdateResult:
        """
        Replace a single document on the collection with a new one,
        optionally inserting a new one if no match is found.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            replacement: the new document to write into the collection.
            vector: a suitable vector, i.e. a list of float numbers of the appropriate
                dimensionality, to use vector search (i.e. ANN,
                or "approximate nearest-neighbours" search), as the sorting criterion.
                In this way, the matched document (if any) will be the one
                that is most similar to the provided vector.
                *DEPRECATED* (removal in 2.0). Use a `$vector` key in the
                sort clause dict instead.
            vectorize: a string to be made into a vector to perform vector search.
                Using vectorize assumes a suitable service is configured for the collection.
                *DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the
                sort clause dict instead.
            sort: with this dictionary parameter one can control the sorting
                order of the documents matching the filter, effectively
                determining what document will come first and hence be the
                replaced one. See the `find` method for more on sorting.
                Vector-based ANN sorting is achieved by providing a "$vector"
                or a "$vectorize" key in `sort`.
            upsert: this parameter controls the behavior in absence of matches.
                If True, `replacement` is inserted as a new document
                if no matches are found on the collection. If False,
                the operation silently does nothing in case of no matches.
            max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
                If not passed, the collection-level setting is used instead.

        Returns:
            an UpdateResult object summarizing the outcome of the replace operation.

        Example:
            >>> my_coll.insert_one({"Marco": "Polo"})
            InsertOneResult(...)
            >>> my_coll.replace_one({"Marco": {"$exists": True}}, {"Buda": "Pest"})
            UpdateResult(raw_results=..., update_info={'n': 1, 'updatedExisting': True, 'ok': 1.0, 'nModified': 1})
            >>> my_coll.find_one({"Buda": "Pest"})
            {'_id': '8424905a-...', 'Buda': 'Pest'}
            >>> my_coll.replace_one({"Mirco": {"$exists": True}}, {"Oh": "yeah?"})
            UpdateResult(raw_results=..., update_info={'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0})
            >>> my_coll.replace_one({"Mirco": {"$exists": True}}, {"Oh": "yeah?"}, upsert=True)
            UpdateResult(raw_results=..., update_info={'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '931b47d6-...'})
        """

        check_deprecated_vector_ize(
            vector=vector, vectors=None, vectorize=vectorize, kind="find"
        )
        _sort = _collate_vector_to_sort(sort, vector, vectorize)
        options = {
            "upsert": upsert,
        }
        _max_time_ms = max_time_ms or self.api_options.max_time_ms
        fo_payload = {
            "findOneAndReplace": {
                k: v
                for k, v in {
                    "filter": filter,
                    "replacement": replacement,
                    "options": options,
                    "sort": _sort,
                }.items()
                if v is not None
            }
        }
        logger.info(f"findOneAndReplace on '{self.name}'")
        fo_response = self._api_commander.request(
            payload=fo_payload,
            timeout_info=base_timeout_info(_max_time_ms),
        )
        logger.info(f"finished findOneAndReplace on '{self.name}'")
        if "document" in fo_response.get("data", {}):
            fo_status = fo_response.get("status") or {}
            _update_info = _prepare_update_info([fo_status])
            return UpdateResult(
                raw_results=[fo_response],
                update_info=_update_info,
            )
        else:
            raise DataAPIFaultyResponseException(
                text="Faulty response from find_one_and_replace API command.",
                raw_response=fo_response,
            )

    def find_one_and_update(
        self,
        filter: FilterType,
        update: dict[str, Any],
        *,
        projection: ProjectionType | None = None,
        vector: VectorType | None = None,
        vectorize: str | None = None,
        sort: SortType | None = None,
        upsert: bool = False,
        return_document: str = ReturnDocument.BEFORE,
        max_time_ms: int | None = None,
    ) -> DocumentType | None:
        """
        Find a document on the collection and update it as requested,
        optionally inserting a new one if no match is found.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            update: the update prescription to apply to the document, expressed
                as a dictionary as per Data API syntax. Examples are:
                    {"$set": {"field": "value}}
                    {"$inc": {"counter": 10}}
                    {"$unset": {"field": ""}}
                See the Data API documentation for the full syntax.
            projection: it controls which parts of the document are returned.
                It can be an allow-list: `{"f1": True, "f2": True}`,
                or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
                (except for the `_id` and other special fields, which can be
                associated to both True or False independently of the rest
                of the specification).
                The special star-projections `{"*": True}` and `{"*": False}`
                have the effect of returning the whole document and `{}` respectively.
                For lists in documents, slice directives can be passed to select
                portions of the list: for instance, `{"array": {"$slice": 2}}`,
                `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
                `{"array": {"$slice": [-4, 2]}}`.
                An iterable over strings will be treated implicitly as an allow-list.
                The default projection (used if this parameter is not passed) does not
                necessarily include "special" fields such as `$vector` or `$vectorize`.
                See the Data API documentation for more on projections.
            vector: a suitable vector, i.e. a list of float numbers of the appropriate
                dimensionality, to use vector search (i.e. ANN,
                or "approximate nearest-neighbours" search), as the sorting criterion.
                In this way, the matched document (if any) will be the one
                that is most similar to the provided vector.
                *DEPRECATED* (removal in 2.0). Use a `$vector` key in the
                sort clause dict instead.
            vectorize: a string to be made into a vector to perform vector search.
                Using vectorize assumes a suitable service is configured for the collection.
                *DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the
                sort clause dict instead.
            sort: with this dictionary parameter one can control the sorting
                order of the documents matching the filter, effectively
                determining what document will come first and hence be the
                replaced one. See the `find` method for more on sorting.
                Vector-based ANN sorting is achieved by providing a "$vector"
                or a "$vectorize" key in `sort`.
            upsert: this parameter controls the behavior in absence of matches.
                If True, a new document (resulting from applying the `update`
                to an empty document) is inserted if no matches are found on
                the collection. If False, the operation silently does nothing
                in case of no matches.
            return_document: a flag controlling what document is returned:
                if set to `ReturnDocument.BEFORE`, or the string "before",
                the document found on database is returned; if set to
                `ReturnDocument.AFTER`, or the string "after", the new
                document is returned. The default is "before".
            max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
                If not passed, the collection-level setting is used instead.

        Returns:
            A document (or a projection thereof, as required), either the one
            before the replace operation or the one after that.
            Alternatively, the method returns None to represent
            that no matching document was found, or that no update
            was applied (depending on the `return_document` parameter).

        Example:
            >>> my_coll.insert_one({"Marco": "Polo"})
            InsertOneResult(...)
            >>> my_coll.find_one_and_update(
            ...     {"Marco": {"$exists": True}},
            ...     {"$set": {"title": "Mr."}},
            ... )
            {'_id': 'a80106f2-...', 'Marco': 'Polo'}
            >>> my_coll.find_one_and_update(
            ...     {"title": "Mr."},
            ...     {"$inc": {"rank": 3}},
            ...     projection=["title", "rank"],
            ...     return_document=astrapy.constants.ReturnDocument.AFTER,
            ... )
            {'_id': 'a80106f2-...', 'title': 'Mr.', 'rank': 3}
            >>> my_coll.find_one_and_update(
            ...     {"name": "Johnny"},
            ...     {"$set": {"rank": 0}},
            ...     return_document=astrapy.constants.ReturnDocument.AFTER,
            ... )
            >>> # (returns None for no matches)
            >>> my_coll.find_one_and_update(
            ...     {"name": "Johnny"},
            ...     {"$set": {"rank": 0}},
            ...     upsert=True,
            ...     return_document=astrapy.constants.ReturnDocument.AFTER,
            ... )
            {'_id': 'cb4ef2ab-...', 'name': 'Johnny', 'rank': 0}
        """

        check_deprecated_vector_ize(
            vector=vector, vectors=None, vectorize=vectorize, kind="find"
        )
        _sort = _collate_vector_to_sort(sort, vector, vectorize)
        options = {
            "returnDocument": return_document,
            "upsert": upsert,
        }
        _max_time_ms = max_time_ms or self.api_options.max_time_ms
        fo_payload = {
            "findOneAndUpdate": {
                k: v
                for k, v in {
                    "filter": filter,
                    "update": update,
                    "options": options,
                    "sort": _sort,
                    "projection": normalize_optional_projection(projection),
                }.items()
                if v is not None
            }
        }
        logger.info(f"findOneAndUpdate on '{self.name}'")
        fo_response = self._api_commander.request(
            payload=fo_payload,
            timeout_info=base_timeout_info(_max_time_ms),
        )
        logger.info(f"finished findOneAndUpdate on '{self.name}'")
        if "document" in fo_response.get("data", {}):
            ret_document = fo_response.get("data", {}).get("document")
            if ret_document is None:
                return None
            else:
                return ret_document  # type: ignore[no-any-return]
        else:
            raise DataAPIFaultyResponseException(
                text="Faulty response from find_one_and_update API command.",
                raw_response=fo_response,
            )

    def update_one(
        self,
        filter: FilterType,
        update: dict[str, Any],
        *,
        vector: VectorType | None = None,
        vectorize: str | None = None,
        sort: SortType | None = None,
        upsert: bool = False,
        max_time_ms: int | None = None,
    ) -> UpdateResult:
        """
        Update a single document on the collection as requested,
        optionally inserting a new one if no match is found.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            update: the update prescription to apply to the document, expressed
                as a dictionary as per Data API syntax. Examples are:
                    {"$set": {"field": "value}}
                    {"$inc": {"counter": 10}}
                    {"$unset": {"field": ""}}
                See the Data API documentation for the full syntax.
            vector: a suitable vector, i.e. a list of float numbers of the appropriate
                dimensionality, to use vector search (i.e. ANN,
                or "approximate nearest-neighbours" search), as the sorting criterion.
                In this way, the matched document (if any) will be the one
                that is most similar to the provided vector.
                *DEPRECATED* (removal in 2.0). Use a `$vector` key in the
                sort clause dict instead.
            vectorize: a string to be made into a vector to perform vector search.
                Using vectorize assumes a suitable service is configured for the collection.
                *DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the
                sort clause dict instead.
            sort: with this dictionary parameter one can control the sorting
                order of the documents matching the filter, effectively
                determining what document will come first and hence be the
                replaced one. See the `find` method for more on sorting.
                Vector-based ANN sorting is achieved by providing a "$vector"
                or a "$vectorize" key in `sort`.
            upsert: this parameter controls the behavior in absence of matches.
                If True, a new document (resulting from applying the `update`
                to an empty document) is inserted if no matches are found on
                the collection. If False, the operation silently does nothing
                in case of no matches.
            max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
                If not passed, the collection-level setting is used instead.

        Returns:
            an UpdateResult object summarizing the outcome of the update operation.

        Example:
            >>> my_coll.insert_one({"Marco": "Polo"})
            InsertOneResult(...)
            >>> my_coll.update_one({"Marco": {"$exists": True}}, {"$inc": {"rank": 3}})
            UpdateResult(raw_results=..., update_info={'n': 1, 'updatedExisting': True, 'ok': 1.0, 'nModified': 1})
            >>> my_coll.update_one({"Mirko": {"$exists": True}}, {"$inc": {"rank": 3}})
            UpdateResult(raw_results=..., update_info={'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0})
            >>> my_coll.update_one({"Mirko": {"$exists": True}}, {"$inc": {"rank": 3}}, upsert=True)
            UpdateResult(raw_results=..., update_info={'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '2a45ff60-...'})
        """

        check_deprecated_vector_ize(
            vector=vector, vectors=None, vectorize=vectorize, kind="find"
        )
        _sort = _collate_vector_to_sort(sort, vector, vectorize)
        options = {
            "upsert": upsert,
        }
        _max_time_ms = max_time_ms or self.api_options.max_time_ms
        uo_payload = {
            "updateOne": {
                k: v
                for k, v in {
                    "filter": filter,
                    "update": update,
                    "options": options,
                    "sort": _sort,
                }.items()
                if v is not None
            }
        }
        logger.info(f"updateOne on '{self.name}'")
        uo_response = self._api_commander.request(
            payload=uo_payload,
            timeout_info=base_timeout_info(_max_time_ms),
        )
        logger.info(f"finished updateOne on '{self.name}'")
        if "status" in uo_response:
            uo_status = uo_response["status"]
            _update_info = _prepare_update_info([uo_status])
            return UpdateResult(
                raw_results=[uo_response],
                update_info=_update_info,
            )
        else:
            raise DataAPIFaultyResponseException(
                text="Faulty response from update_one API command.",
                raw_response=uo_response,
            )

    def update_many(
        self,
        filter: FilterType,
        update: dict[str, Any],
        *,
        upsert: bool = False,
        max_time_ms: int | None = None,
    ) -> UpdateResult:
        """
        Apply an update operations to all documents matching a condition,
        optionally inserting one documents in absence of matches.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            update: the update prescription to apply to the documents, expressed
                as a dictionary as per Data API syntax. Examples are:
                    {"$set": {"field": "value}}
                    {"$inc": {"counter": 10}}
                    {"$unset": {"field": ""}}
                See the Data API documentation for the full syntax.
            upsert: this parameter controls the behavior in absence of matches.
                If True, a single new document (resulting from applying `update`
                to an empty document) is inserted if no matches are found on
                the collection. If False, the operation silently does nothing
                in case of no matches.
            max_time_ms: a timeout, in milliseconds, for the operation.
                If not passed, the collection-level setting is used instead:
                if a large number of document updates is anticipated, it is suggested
                to specify a larger timeout than in most other operations as the
                update will span several HTTP calls to the API in sequence.

        Returns:
            an UpdateResult object summarizing the outcome of the update operation.

        Example:
            >>> my_coll.insert_many([{"c": "red"}, {"c": "green"}, {"c": "blue"}])
            InsertManyResult(...)
            >>> my_coll.update_many({"c": {"$ne": "green"}}, {"$set": {"nongreen": True}})
            UpdateResult(raw_results=..., update_info={'n': 2, 'updatedExisting': True, 'ok': 1.0, 'nModified': 2})
            >>> my_coll.update_many({"c": "orange"}, {"$set": {"is_also_fruit": True}})
            UpdateResult(raw_results=..., update_info={'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0})
            >>> my_coll.update_many(
            ...     {"c": "orange"},
            ...     {"$set": {"is_also_fruit": True}},
            ...     upsert=True,
            ... )
            UpdateResult(raw_results=..., update_info={'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '46643050-...'})

        Note:
            Similarly to the case of `find` (see its docstring for more details),
            running this command while, at the same time, another process is
            inserting new documents which match the filter of the `update_many`
            can result in an unpredictable fraction of these documents being updated.
            In other words, it cannot be easily predicted whether a given
            newly-inserted document will be picked up by the update_many command or not.
        """

        api_options = {
            "upsert": upsert,
        }
        page_state_options: dict[str, str] = {}
        um_responses: list[dict[str, Any]] = []
        um_statuses: list[dict[str, Any]] = []
        must_proceed = True
        _max_time_ms = max_time_ms or self.api_options.max_time_ms
        logger.info(f"starting update_many on '{self.name}'")
        timeout_manager = MultiCallTimeoutManager(overall_max_time_ms=_max_time_ms)
        while must_proceed:
            options = {**api_options, **page_state_options}
            this_um_payload = {
                "updateMany": {
                    k: v
                    for k, v in {
                        "filter": filter,
                        "update": update,
                        "options": options,
                    }.items()
                    if v is not None
                }
            }
            logger.info(f"updateMany on '{self.name}'")
            this_um_response = self._api_commander.request(
                payload=this_um_payload,
                timeout_info=timeout_manager.remaining_timeout_info(),
            )
            logger.info(f"finished updateMany on '{self.name}'")
            this_um_status = this_um_response.get("status") or {}
            #
            # if errors, quit early
            if this_um_response.get("errors", []):
                partial_update_info = _prepare_update_info(um_statuses)
                partial_result = UpdateResult(
                    raw_results=um_responses,
                    update_info=partial_update_info,
                )
                all_um_responses = um_responses + [this_um_response]
                raise UpdateManyException.from_responses(
                    commands=[None for _ in all_um_responses],
                    raw_responses=all_um_responses,
                    partial_result=partial_result,
                )
            else:
                if "status" not in this_um_response:
                    raise DataAPIFaultyResponseException(
                        text="Faulty response from update_many API command.",
                        raw_response=this_um_response,
                    )
                um_responses.append(this_um_response)
                um_statuses.append(this_um_status)
                next_page_state = this_um_status.get("nextPageState")
                if next_page_state is not None:
                    must_proceed = True
                    page_state_options = {"pageState": next_page_state}
                else:
                    must_proceed = False
                    page_state_options = {}

        update_info = _prepare_update_info(um_statuses)
        logger.info(f"finished update_many on '{self.name}'")
        return UpdateResult(
            raw_results=um_responses,
            update_info=update_info,
        )

    def find_one_and_delete(
        self,
        filter: FilterType,
        *,
        projection: ProjectionType | None = None,
        vector: VectorType | None = None,
        vectorize: str | None = None,
        sort: SortType | None = None,
        max_time_ms: int | None = None,
    ) -> DocumentType | None:
        """
        Find a document in the collection and delete it. The deleted document,
        however, is the return value of the method.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            projection: it controls which parts of the document are returned.
                It can be an allow-list: `{"f1": True, "f2": True}`,
                or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
                (except for the `_id` and other special fields, which can be
                associated to both True or False independently of the rest
                of the specification).
                The special star-projections `{"*": True}` and `{"*": False}`
                have the effect of returning the whole document and `{}` respectively.
                For lists in documents, slice directives can be passed to select
                portions of the list: for instance, `{"array": {"$slice": 2}}`,
                `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
                `{"array": {"$slice": [-4, 2]}}`.
                An iterable over strings will be treated implicitly as an allow-list.
                The default projection (used if this parameter is not passed) does not
                necessarily include "special" fields such as `$vector` or `$vectorize`.
                See the Data API documentation for more on projections.
            vector: a suitable vector, i.e. a list of float numbers of the appropriate
                dimensionality, to use vector search (i.e. ANN,
                or "approximate nearest-neighbours" search), as the sorting criterion.
                In this way, the matched document (if any) will be the one
                that is most similar to the provided vector.
                This parameter cannot be used together with `sort`.
                See the `find` method for more details on this parameter.
                *DEPRECATED* (removal in 2.0). Use a `$vector` key in the
                sort clause dict instead.
            vectorize: a string to be made into a vector to perform vector search.
                This can be supplied in (exclusive) alternative to `vector`,
                provided such a service is configured for the collection,
                and achieves the same effect.
                *DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the
                sort clause dict instead.
            sort: with this dictionary parameter one can control the sorting
                order of the documents matching the filter, effectively
                determining what document will come first and hence be the
                deleted one. See the `find` method for more on sorting.
            max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
                If not passed, the collection-level setting is used instead.

        Returns:
            Either the document (or a projection thereof, as requested), or None
            if no matches were found in the first place.

        Example:
            >>> my_coll.insert_many(
            ...     [
            ...         {"species": "swan", "class": "Aves"},
            ...         {"species": "frog", "class": "Amphibia"},
            ...     ],
            ... )
            InsertManyResult(...)
            >>> my_coll.find_one_and_delete(
            ...     {"species": {"$ne": "frog"}},
            ...     projection=["species"],
            ... )
            {'_id': '5997fb48-...', 'species': 'swan'}
            >>> my_coll.find_one_and_delete({"species": {"$ne": "frog"}})
            >>> # (returns None for no matches)
        """

        check_deprecated_vector_ize(
            vector=vector, vectors=None, vectorize=vectorize, kind="find"
        )
        _sort = _collate_vector_to_sort(sort, vector, vectorize)
        _projection = normalize_optional_projection(projection)
        _max_time_ms = max_time_ms or self.api_options.max_time_ms
        fo_payload = {
            "findOneAndDelete": {
                k: v
                for k, v in {
                    "filter": filter,
                    "sort": _sort,
                    "projection": _projection,
                }.items()
                if v is not None
            }
        }
        logger.info(f"findOneAndDelete on '{self.name}'")
        fo_response = self._api_commander.request(
            payload=fo_payload,
            timeout_info=base_timeout_info(_max_time_ms),
        )
        logger.info(f"finished findOneAndDelete on '{self.name}'")
        if "document" in fo_response.get("data", {}):
            document = fo_response["data"]["document"]
            return document  # type: ignore[no-any-return]
        else:
            deleted_count = fo_response.get("status", {}).get("deletedCount")
            if deleted_count == 0:
                return None
            else:
                raise DataAPIFaultyResponseException(
                    text="Faulty response from find_one_and_delete API command.",
                    raw_response=fo_response,
                )

    def delete_one(
        self,
        filter: FilterType,
        *,
        vector: VectorType | None = None,
        vectorize: str | None = None,
        sort: SortType | None = None,
        max_time_ms: int | None = None,
    ) -> DeleteResult:
        """
        Delete one document matching a provided filter.
        This method never deletes more than a single document, regardless
        of the number of matches to the provided filters.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
            vector: a suitable vector, i.e. a list of float numbers of the appropriate
                dimensionality, to use vector search (i.e. ANN,
                or "approximate nearest-neighbours" search), as the sorting criterion.
                In this way, the matched document (if any) will be the one
                that is most similar to the provided vector.
                This parameter cannot be used together with `sort`.
                See the `find` method for more details on this parameter.
                *DEPRECATED* (removal in 2.0). Use a `$vector` key in the
                sort clause dict instead.
            vectorize: a string to be made into a vector to perform vector search.
                This can be supplied in (exclusive) alternative to `vector`,
                provided such a service is configured for the collection,
                and achieves the same effect.
                *DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the
                sort clause dict instead.
            sort: with this dictionary parameter one can control the sorting
                order of the documents matching the filter, effectively
                determining what document will come first and hence be the
                deleted one. See the `find` method for more on sorting.
            max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
                If not passed, the collection-level setting is used instead.

        Returns:
            a DeleteResult object summarizing the outcome of the delete operation.

        Example:
            >>> my_coll.insert_many([{"seq": 1}, {"seq": 0}, {"seq": 2}])
            InsertManyResult(...)
            >>> my_coll.delete_one({"seq": 1})
            DeleteResult(raw_results=..., deleted_count=1)
            >>> my_coll.distinct("seq")
            [0, 2]
            >>> my_coll.delete_one(
            ...     {"seq": {"$exists": True}},
            ...     sort={"seq": astrapy.constants.SortDocuments.DESCENDING},
            ... )
            DeleteResult(raw_results=..., deleted_count=1)
            >>> my_coll.distinct("seq")
            [0]
            >>> my_coll.delete_one({"seq": 2})
            DeleteResult(raw_results=..., deleted_count=0)
        """

        check_deprecated_vector_ize(
            vector=vector, vectors=None, vectorize=vectorize, kind="find"
        )
        _sort = _collate_vector_to_sort(sort, vector, vectorize)
        _max_time_ms = max_time_ms or self.api_options.max_time_ms
        do_payload = {
            "deleteOne": {
                k: v
                for k, v in {
                    "filter": filter,
                    "sort": _sort,
                }.items()
                if v is not None
            }
        }
        logger.info(f"deleteOne on '{self.name}'")
        do_response = self._api_commander.request(
            payload=do_payload,
            timeout_info=base_timeout_info(_max_time_ms),
        )
        logger.info(f"finished deleteOne on '{self.name}'")
        if "deletedCount" in do_response.get("status", {}):
            deleted_count = do_response["status"]["deletedCount"]
            return DeleteResult(
                deleted_count=deleted_count,
                raw_results=[do_response],
            )
        else:
            raise DataAPIFaultyResponseException(
                text="Faulty response from delete_one API command.",
                raw_response=do_response,
            )

    def delete_many(
        self,
        filter: FilterType,
        *,
        max_time_ms: int | None = None,
    ) -> DeleteResult:
        """
        Delete all documents matching a provided filter.

        Args:
            filter: a predicate expressed as a dictionary according to the
                Data API filter syntax. Examples are:
                    {}
                    {"name": "John"}
                    {"price": {"$lt": 100}}
                    {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
                See the Data API documentation for the full set of operators.
                Passing an empty filter, `{}`, completely erases all contents
                of the collection.
            max_time_ms: a timeout, in milliseconds, for the operation.
                If not passed, the collection-level setting is used instead:
                keep in mind that this method entails successive HTTP requests
                to the API, depending on how many documents are to be deleted.
                For this reason, in most cases it is suggested to relax the
                timeout compared to other method calls.

        Returns:
            a DeleteResult object summarizing the outcome of the delete operation.

        Example:
            >>> my_coll.insert_many([{"seq": 1}, {"seq": 0}, {"seq": 2}])
            InsertManyResult(...)
            >>> my_coll.delete_many({"seq": {"$lte": 1}})
            DeleteResult(raw_results=..., deleted_count=2)
            >>> my_coll.distinct("seq")
            [2]
            >>> my_coll.delete_many({"seq": {"$lte": 1}})
            DeleteResult(raw_results=..., deleted_count=0)

        Note:
            This operation is in general not atomic. Depending on the amount
            of matching documents, it can keep running (in a blocking way)
            for a macroscopic time. In that case, new documents that are
            meanwhile inserted (e.g. from another process/application) will be
            deleted during the execution of this method call until the
            collection is devoid of matches.
            An exception is the `filter={}` case, whereby the operation is atomic.
        """
        dm_responses: list[dict[str, Any]] = []
        deleted_count = 0
        must_proceed = True
        _max_time_ms = max_time_ms or self.api_options.max_time_ms
        timeout_manager = MultiCallTimeoutManager(overall_max_time_ms=_max_time_ms)
        this_dm_payload = {"deleteMany": {"filter": filter}}
        logger.info(f"starting delete_many on '{self.name}'")
        while must_proceed:
            logger.info(f"deleteMany on '{self.name}'")
            this_dm_response = self._api_commander.request(
                payload=this_dm_payload,
                raise_api_errors=False,
                timeout_info=timeout_manager.remaining_timeout_info(),
            )
            logger.info(f"finished deleteMany on '{self.name}'")
            # if errors, quit early
            if this_dm_response.get("errors", []):
                partial_result = DeleteResult(
                    deleted_count=deleted_count,
                    raw_results=dm_responses,
                )
                all_dm_responses = dm_responses + [this_dm_response]
                raise DeleteManyException.from_responses(
                    commands=[None for _ in all_dm_responses],
                    raw_responses=all_dm_responses,
                    partial_result=partial_result,
                )
            else:
                this_dc = this_dm_response.get("status", {}).get("deletedCount")
                if this_dc is None:
                    raise DataAPIFaultyResponseException(
                        text="Faulty response from delete_many API command.",
                        raw_response=this_dm_response,
                    )
                dm_responses.append(this_dm_response)
                deleted_count += this_dc
                must_proceed = this_dm_response.get("status", {}).get("moreData", False)

        logger.info(f"finished delete_many on '{self.name}'")
        return DeleteResult(
            deleted_count=deleted_count,
            raw_results=dm_responses,
        )

    @deprecation.deprecated(  # type: ignore[misc]
        deprecated_in="1.3.0",
        removed_in="2.0.0",
        current_version=__version__,
        details="Use delete_many with filter={} instead.",
    )
    def delete_all(self, *, max_time_ms: int | None = None) -> dict[str, Any]:
        """
        Delete all documents in a collection.

        Args:
            max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
                If not passed, the collection-level setting is used instead.

        Returns:
            a dictionary of the form {"ok": 1} to signal successful deletion.

        Example:
            >>> my_coll.distinct("seq")
            [2, 1, 0]
            >>> my_coll.count_documents({}, upper_bound=100)
            4
            >>> my_coll.delete_all()
            {'ok': 1}
            >>> my_coll.count_documents({}, upper_bound=100)
            0

        Note:
            Use with caution.
        """
        dm_result = self.delete_many(filter={}, max_time_ms=max_time_ms)
        if dm_result.deleted_count == -1:
            return {"ok": 1}
        else:
            raise DataAPIFaultyResponseException(
                text="Unexpected response from collection.delete_many({}).",
                raw_response=None,
            )

    @deprecation.deprecated(  # type: ignore[misc]
        deprecated_in="1.5.0",
        removed_in="2.0.0",
        current_version=__version__,
        details=(
            "Please switch to managing sequences of DML operations "
            "in app code instead."
        ),
    )
    def bulk_write(
        self,
        requests: Iterable[BaseOperation],
        *,
        ordered: bool = False,
        concurrency: int | None = None,
        max_time_ms: int | None = None,
    ) -> BulkWriteResult:
        """
        Execute an arbitrary amount of operations such as inserts, updates, deletes
        either sequentially or concurrently.

        This method does not execute atomically, i.e. individual operations are
        each performed in the same way as the corresponding collection method,
        and each one is a different and unrelated database mutation.

        Args:
            requests: an iterable over concrete subclasses of `BaseOperation`,
                such as `InsertMany` or `ReplaceOne`. Each such object
                represents an operation ready to be executed on a collection,
                and is instantiated by passing the same parameters as one
                would the corresponding collection method.
            ordered: whether to launch the `requests` one after the other or
                in arbitrary order, possibly in a concurrent fashion. For
                performance reasons, False (default) should be preferred
                when compatible with the needs of the application flow.
            concurrency: maximum number of concurrent operations executing at
                a given time. It cannot be more than one for ordered bulk writes.
            max_time_ms: a timeout, in milliseconds, for the whole bulk write.
                Remember that, if the method call times out, then there's no
                guarantee about what portion of the bulk write has been received
                and successfully executed by the Data API.
                If not passed, the collection-level setting is used instead:
                in most cases, however, one should pass a relaxed timeout
                if longer sequences of operations are to be executed in bulk.

        Returns:
            A single BulkWriteResult summarizing the whole list of requested
            operations. The keys in the map attributes of BulkWriteResult
            (when present) are the integer indices of the corresponding operation
            in the `requests` iterable.

        Example:
            >>> from astrapy.operations import InsertMany, ReplaceOne
            >>> op1 = InsertMany([{"a": 1}, {"a": 2}])
            >>> op2 = ReplaceOne({"z": 9}, replacement={"z": 9, "replaced": True}, upsert=True)
            >>> my_coll.bulk_write([op1, op2])
            BulkWriteResult(bulk_api_results={0: ..., 1: ...}, deleted_count=0, inserted_count=3, matched_count=0, modified_count=0, upserted_count=1, upserted_ids={1: '2addd676-...'})
            >>> my_coll.count_documents({}, upper_bound=100)
            3
            >>> my_coll.distinct("replaced")
            [True]
        """

        # lazy importing here against circular-import error
        from astrapy.operations import reduce_bulk_write_results

        if concurrency is None:
            if ordered:
                _concurrency = 1
            else:
                _concurrency = DEFAULT_BULK_WRITE_CONCURRENCY
        else:
            _concurrency = concurrency
        if _concurrency > 1 and ordered:
            raise ValueError("Cannot run ordered bulk_write concurrently.")
        _max_time_ms = max_time_ms or self.api_options.max_time_ms
        logger.info(f"startng a bulk write on '{self.name}'")
        timeout_manager = MultiCallTimeoutManager(overall_max_time_ms=_max_time_ms)
        if ordered:
            bulk_write_results: list[BulkWriteResult] = []
            for operation_i, operation in enumerate(requests):
                try:
                    this_bw_result = operation.execute(
                        self,
                        index_in_bulk_write=operation_i,
                        bulk_write_timeout_ms=timeout_manager.remaining_timeout_ms(),
                    )
                    bulk_write_results.append(this_bw_result)
                except CumulativeOperationException as exc:
                    partial_result = exc.partial_result
                    partial_bw_result = reduce_bulk_write_results(
                        bulk_write_results
                        + [
                            partial_result.to_bulk_write_result(
                                index_in_bulk_write=operation_i
                            )
                        ]
                    )
                    dar_exception = exc.data_api_response_exception()
                    raise BulkWriteException(
                        text=dar_exception.text,
                        error_descriptors=dar_exception.error_descriptors,
                        detailed_error_descriptors=dar_exception.detailed_error_descriptors,
                        partial_result=partial_bw_result,
                        exceptions=[dar_exception],
                    )
                except DataAPIResponseException as exc:
                    # the cumulative exceptions, with their
                    # partially-done-info, are handled above:
                    # here it's just one-shot d.a.r. exceptions
                    partial_bw_result = reduce_bulk_write_results(bulk_write_results)
                    dar_exception = exc.data_api_response_exception()
                    raise BulkWriteException(
                        text=dar_exception.text,
                        error_descriptors=dar_exception.error_descriptors,
                        detailed_error_descriptors=dar_exception.detailed_error_descriptors,
                        partial_result=partial_bw_result,
                        exceptions=[dar_exception],
                    )
            full_bw_result = reduce_bulk_write_results(bulk_write_results)
            logger.info(f"finished a bulk write on '{self.name}'")
            return full_bw_result
        else:

            def _execute_as_either(
                operation: BaseOperation, operation_i: int
            ) -> tuple[BulkWriteResult | None, DataAPIResponseException | None]:
                try:
                    ex_result = operation.execute(
                        self,
                        index_in_bulk_write=operation_i,
                        bulk_write_timeout_ms=timeout_manager.remaining_timeout_ms(),
                    )
                    return (ex_result, None)
                except DataAPIResponseException as exc:
                    return (None, exc)

            with ThreadPoolExecutor(max_workers=_concurrency) as executor:
                bulk_write_either_futures = [
                    executor.submit(
                        _execute_as_either,
                        operation,
                        operation_i,
                    )
                    for operation_i, operation in enumerate(requests)
                ]
                bulk_write_either_results = [
                    bulk_write_either_future.result()
                    for bulk_write_either_future in bulk_write_either_futures
                ]
                # regroup
                bulk_write_successes = [
                    bwr for bwr, _ in bulk_write_either_results if bwr
                ]
                bulk_write_failures = [
                    bwf for _, bwf in bulk_write_either_results if bwf
                ]
                if bulk_write_failures:
                    # extract and cumulate
                    partial_results_from_failures = [
                        failure.partial_result.to_bulk_write_result(
                            index_in_bulk_write=operation_i
                        )
                        for failure in bulk_write_failures
                        if isinstance(failure, CumulativeOperationException)
                    ]
                    partial_bw_result = reduce_bulk_write_results(
                        bulk_write_successes + partial_results_from_failures
                    )
                    # raise and recast the first exception
                    all_dar_exceptions = [
                        bw_failure.data_api_response_exception()
                        for bw_failure in bulk_write_failures
                    ]
                    dar_exception = all_dar_exceptions[0]
                    raise BulkWriteException(
                        text=dar_exception.text,
                        error_descriptors=dar_exception.error_descriptors,
                        detailed_error_descriptors=dar_exception.detailed_error_descriptors,
                        partial_result=partial_bw_result,
                        exceptions=all_dar_exceptions,
                    )
                else:
                    logger.info(f"finished a bulk write on '{self.name}'")
                    return reduce_bulk_write_results(bulk_write_successes)

    def drop(self, *, max_time_ms: int | None = None) -> dict[str, Any]:
        """
        Drop the collection, i.e. delete it from the database along with
        all the documents it contains.

        Args:
            max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
                If not passed, the collection-level setting is used instead.
                Remember there is not guarantee that a request that has
                timed out us not in fact honored.

        Returns:
            a dictionary of the form {"ok": 1} to signal successful deletion.

        Example:
            >>> my_coll.find_one({})
            {'_id': '...', 'a': 100}
            >>> my_coll.drop()
            {'ok': 1}
            >>> my_coll.find_one({})
            Traceback (most recent call last):
                ... ...
            astrapy.exceptions.DataAPIResponseException: Collection does not exist, collection name: my_collection

        Note:
            Use with caution.

        Note:
            Once the method succeeds, methods on this object can still be invoked:
            however, this hardly makes sense as the underlying actual collection
            is no more.
            It is responsibility of the developer to design a correct flow
            which avoids using a deceased collection any further.
        """

        _max_time_ms = max_time_ms or self.api_options.max_time_ms
        logger.info(f"dropping collection '{self.name}' (self)")
        drop_result = self.database.drop_collection(self, max_time_ms=_max_time_ms)
        logger.info(f"finished dropping collection '{self.name}' (self)")
        return drop_result

    def command(
        self,
        body: dict[str, Any],
        *,
        raise_api_errors: bool = True,
        max_time_ms: int | None = None,
    ) -> dict[str, Any]:
        """
        Send a POST request to the Data API for this collection with
        an arbitrary, caller-provided payload.

        Args:
            body: a JSON-serializable dictionary, the payload of the request.
            raise_api_errors: if True, responses with a nonempty 'errors' field
                result in an astrapy exception being raised.
            max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
                If not passed, the collection-level setting is used instead.

        Returns:
            a dictionary with the response of the HTTP request.

        Example:
            >>> my_coll.command({"countDocuments": {}})
            {'status': {'count': 123}}
        """

        _max_time_ms = max_time_ms or self.api_options.max_time_ms
        _cmd_desc = ",".join(sorted(body.keys()))
        logger.info(f"command={_cmd_desc} on '{self.name}'")
        command_result = self._api_commander.request(
            payload=body,
            raise_api_errors=raise_api_errors,
            timeout_info=base_timeout_info(_max_time_ms),
        )
        logger.info(f"finished command={_cmd_desc} on '{self.name}'")
        return command_result

Instance variables

var databaseDatabase

a Database object, the database this collection belongs to.

Example

>>> my_coll.database.name
'the_application_database'
Expand source code
@property
def database(self) -> Database:
    """
    a Database object, the database this collection belongs to.

    Example:
        >>> my_coll.database.name
        'the_application_database'
    """

    return self._database
var full_name : str

The fully-qualified collection name within the database, in the form "keyspace.collection_name".

Example

>>> my_coll.full_name
'default_keyspace.my_v_collection'
Expand source code
@property
def full_name(self) -> str:
    """
    The fully-qualified collection name within the database,
    in the form "keyspace.collection_name".

    Example:
        >>> my_coll.full_name
        'default_keyspace.my_v_collection'
    """

    return f"{self.keyspace}.{self.name}"
var keyspace : str

The keyspace this collection is in.

Example

>>> my_coll.keyspace
'default_keyspace'
Expand source code
@property
def keyspace(self) -> str:
    """
    The keyspace this collection is in.

    Example:
        >>> my_coll.keyspace
        'default_keyspace'
    """

    _keyspace = self.database.keyspace
    if _keyspace is None:
        raise ValueError("The collection's DB is set with keyspace=None")
    return _keyspace
var name : str

The name of this collection.

Example

>>> my_coll.name
'my_v_collection'
Expand source code
@property
def name(self) -> str:
    """
    The name of this collection.

    Example:
        >>> my_coll.name
        'my_v_collection'
    """

    return self._name
var namespace : str

The namespace this collection is in.

DEPRECATED (removal in 2.0). Switch to the "keyspace" property.**

Example

>>> my_coll.namespace
'default_keyspace'
Expand source code
@property
def namespace(self) -> str:
    """
    The namespace this collection is in.

    *DEPRECATED* (removal in 2.0). Switch to the "keyspace" property.**

    Example:
        >>> my_coll.namespace
        'default_keyspace'
    """

    the_warning = deprecation.DeprecatedWarning(
        "the 'namespace' property",
        deprecated_in="1.5.0",
        removed_in="2.0.0",
        details=NAMESPACE_DEPRECATION_NOTICE_METHOD,
    )
    warnings.warn(the_warning, stacklevel=2)

    return self.keyspace

Methods

def bulk_write(self, requests: Iterable[BaseOperation], *, ordered: bool = False, concurrency: int | None = None, max_time_ms: int | None = None) ‑> BulkWriteResult

Execute an arbitrary amount of operations such as inserts, updates, deletes either sequentially or concurrently.

This method does not execute atomically, i.e. individual operations are each performed in the same way as the corresponding collection method, and each one is a different and unrelated database mutation.

Args

requests
an iterable over concrete subclasses of BaseOperation, such as InsertMany or ReplaceOne. Each such object represents an operation ready to be executed on a collection, and is instantiated by passing the same parameters as one would the corresponding collection method.
ordered
whether to launch the requests one after the other or in arbitrary order, possibly in a concurrent fashion. For performance reasons, False (default) should be preferred when compatible with the needs of the application flow.
concurrency
maximum number of concurrent operations executing at a given time. It cannot be more than one for ordered bulk writes.
max_time_ms
a timeout, in milliseconds, for the whole bulk write. Remember that, if the method call times out, then there's no guarantee about what portion of the bulk write has been received and successfully executed by the Data API. If not passed, the collection-level setting is used instead: in most cases, however, one should pass a relaxed timeout if longer sequences of operations are to be executed in bulk.

Returns

A single BulkWriteResult summarizing the whole list of requested operations. The keys in the map attributes of BulkWriteResult (when present) are the integer indices of the corresponding operation in the requests iterable.

Example

>>> from astrapy.operations import InsertMany, ReplaceOne
>>> op1 = InsertMany([{"a": 1}, {"a": 2}])
>>> op2 = ReplaceOne({"z": 9}, replacement={"z": 9, "replaced": True}, upsert=True)
>>> my_coll.bulk_write([op1, op2])
BulkWriteResult(bulk_api_results={0: ..., 1: ...}, deleted_count=0, inserted_count=3, matched_count=0, modified_count=0, upserted_count=1, upserted_ids={1: '2addd676-...'})
>>> my_coll.count_documents({}, upper_bound=100)
3
>>> my_coll.distinct("replaced")
[True]

Deprecated since version: 1.5.0

This will be removed in 2.0.0. Please switch to managing sequences of DML operations in app code instead.

Expand source code
@deprecation.deprecated(  # type: ignore[misc]
    deprecated_in="1.5.0",
    removed_in="2.0.0",
    current_version=__version__,
    details=(
        "Please switch to managing sequences of DML operations "
        "in app code instead."
    ),
)
def bulk_write(
    self,
    requests: Iterable[BaseOperation],
    *,
    ordered: bool = False,
    concurrency: int | None = None,
    max_time_ms: int | None = None,
) -> BulkWriteResult:
    """
    Execute an arbitrary amount of operations such as inserts, updates, deletes
    either sequentially or concurrently.

    This method does not execute atomically, i.e. individual operations are
    each performed in the same way as the corresponding collection method,
    and each one is a different and unrelated database mutation.

    Args:
        requests: an iterable over concrete subclasses of `BaseOperation`,
            such as `InsertMany` or `ReplaceOne`. Each such object
            represents an operation ready to be executed on a collection,
            and is instantiated by passing the same parameters as one
            would the corresponding collection method.
        ordered: whether to launch the `requests` one after the other or
            in arbitrary order, possibly in a concurrent fashion. For
            performance reasons, False (default) should be preferred
            when compatible with the needs of the application flow.
        concurrency: maximum number of concurrent operations executing at
            a given time. It cannot be more than one for ordered bulk writes.
        max_time_ms: a timeout, in milliseconds, for the whole bulk write.
            Remember that, if the method call times out, then there's no
            guarantee about what portion of the bulk write has been received
            and successfully executed by the Data API.
            If not passed, the collection-level setting is used instead:
            in most cases, however, one should pass a relaxed timeout
            if longer sequences of operations are to be executed in bulk.

    Returns:
        A single BulkWriteResult summarizing the whole list of requested
        operations. The keys in the map attributes of BulkWriteResult
        (when present) are the integer indices of the corresponding operation
        in the `requests` iterable.

    Example:
        >>> from astrapy.operations import InsertMany, ReplaceOne
        >>> op1 = InsertMany([{"a": 1}, {"a": 2}])
        >>> op2 = ReplaceOne({"z": 9}, replacement={"z": 9, "replaced": True}, upsert=True)
        >>> my_coll.bulk_write([op1, op2])
        BulkWriteResult(bulk_api_results={0: ..., 1: ...}, deleted_count=0, inserted_count=3, matched_count=0, modified_count=0, upserted_count=1, upserted_ids={1: '2addd676-...'})
        >>> my_coll.count_documents({}, upper_bound=100)
        3
        >>> my_coll.distinct("replaced")
        [True]
    """

    # lazy importing here against circular-import error
    from astrapy.operations import reduce_bulk_write_results

    if concurrency is None:
        if ordered:
            _concurrency = 1
        else:
            _concurrency = DEFAULT_BULK_WRITE_CONCURRENCY
    else:
        _concurrency = concurrency
    if _concurrency > 1 and ordered:
        raise ValueError("Cannot run ordered bulk_write concurrently.")
    _max_time_ms = max_time_ms or self.api_options.max_time_ms
    logger.info(f"startng a bulk write on '{self.name}'")
    timeout_manager = MultiCallTimeoutManager(overall_max_time_ms=_max_time_ms)
    if ordered:
        bulk_write_results: list[BulkWriteResult] = []
        for operation_i, operation in enumerate(requests):
            try:
                this_bw_result = operation.execute(
                    self,
                    index_in_bulk_write=operation_i,
                    bulk_write_timeout_ms=timeout_manager.remaining_timeout_ms(),
                )
                bulk_write_results.append(this_bw_result)
            except CumulativeOperationException as exc:
                partial_result = exc.partial_result
                partial_bw_result = reduce_bulk_write_results(
                    bulk_write_results
                    + [
                        partial_result.to_bulk_write_result(
                            index_in_bulk_write=operation_i
                        )
                    ]
                )
                dar_exception = exc.data_api_response_exception()
                raise BulkWriteException(
                    text=dar_exception.text,
                    error_descriptors=dar_exception.error_descriptors,
                    detailed_error_descriptors=dar_exception.detailed_error_descriptors,
                    partial_result=partial_bw_result,
                    exceptions=[dar_exception],
                )
            except DataAPIResponseException as exc:
                # the cumulative exceptions, with their
                # partially-done-info, are handled above:
                # here it's just one-shot d.a.r. exceptions
                partial_bw_result = reduce_bulk_write_results(bulk_write_results)
                dar_exception = exc.data_api_response_exception()
                raise BulkWriteException(
                    text=dar_exception.text,
                    error_descriptors=dar_exception.error_descriptors,
                    detailed_error_descriptors=dar_exception.detailed_error_descriptors,
                    partial_result=partial_bw_result,
                    exceptions=[dar_exception],
                )
        full_bw_result = reduce_bulk_write_results(bulk_write_results)
        logger.info(f"finished a bulk write on '{self.name}'")
        return full_bw_result
    else:

        def _execute_as_either(
            operation: BaseOperation, operation_i: int
        ) -> tuple[BulkWriteResult | None, DataAPIResponseException | None]:
            try:
                ex_result = operation.execute(
                    self,
                    index_in_bulk_write=operation_i,
                    bulk_write_timeout_ms=timeout_manager.remaining_timeout_ms(),
                )
                return (ex_result, None)
            except DataAPIResponseException as exc:
                return (None, exc)

        with ThreadPoolExecutor(max_workers=_concurrency) as executor:
            bulk_write_either_futures = [
                executor.submit(
                    _execute_as_either,
                    operation,
                    operation_i,
                )
                for operation_i, operation in enumerate(requests)
            ]
            bulk_write_either_results = [
                bulk_write_either_future.result()
                for bulk_write_either_future in bulk_write_either_futures
            ]
            # regroup
            bulk_write_successes = [
                bwr for bwr, _ in bulk_write_either_results if bwr
            ]
            bulk_write_failures = [
                bwf for _, bwf in bulk_write_either_results if bwf
            ]
            if bulk_write_failures:
                # extract and cumulate
                partial_results_from_failures = [
                    failure.partial_result.to_bulk_write_result(
                        index_in_bulk_write=operation_i
                    )
                    for failure in bulk_write_failures
                    if isinstance(failure, CumulativeOperationException)
                ]
                partial_bw_result = reduce_bulk_write_results(
                    bulk_write_successes + partial_results_from_failures
                )
                # raise and recast the first exception
                all_dar_exceptions = [
                    bw_failure.data_api_response_exception()
                    for bw_failure in bulk_write_failures
                ]
                dar_exception = all_dar_exceptions[0]
                raise BulkWriteException(
                    text=dar_exception.text,
                    error_descriptors=dar_exception.error_descriptors,
                    detailed_error_descriptors=dar_exception.detailed_error_descriptors,
                    partial_result=partial_bw_result,
                    exceptions=all_dar_exceptions,
                )
            else:
                logger.info(f"finished a bulk write on '{self.name}'")
                return reduce_bulk_write_results(bulk_write_successes)
def command(self, body: dict[str, Any], *, raise_api_errors: bool = True, max_time_ms: int | None = None) ‑> dict[str, typing.Any]

Send a POST request to the Data API for this collection with an arbitrary, caller-provided payload.

Args

body
a JSON-serializable dictionary, the payload of the request.
raise_api_errors
if True, responses with a nonempty 'errors' field result in an astrapy exception being raised.
max_time_ms
a timeout, in milliseconds, for the underlying HTTP request. If not passed, the collection-level setting is used instead.

Returns

a dictionary with the response of the HTTP request.

Example

>>> my_coll.command({"countDocuments": {}})
{'status': {'count': 123}}
Expand source code
def command(
    self,
    body: dict[str, Any],
    *,
    raise_api_errors: bool = True,
    max_time_ms: int | None = None,
) -> dict[str, Any]:
    """
    Send a POST request to the Data API for this collection with
    an arbitrary, caller-provided payload.

    Args:
        body: a JSON-serializable dictionary, the payload of the request.
        raise_api_errors: if True, responses with a nonempty 'errors' field
            result in an astrapy exception being raised.
        max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
            If not passed, the collection-level setting is used instead.

    Returns:
        a dictionary with the response of the HTTP request.

    Example:
        >>> my_coll.command({"countDocuments": {}})
        {'status': {'count': 123}}
    """

    _max_time_ms = max_time_ms or self.api_options.max_time_ms
    _cmd_desc = ",".join(sorted(body.keys()))
    logger.info(f"command={_cmd_desc} on '{self.name}'")
    command_result = self._api_commander.request(
        payload=body,
        raise_api_errors=raise_api_errors,
        timeout_info=base_timeout_info(_max_time_ms),
    )
    logger.info(f"finished command={_cmd_desc} on '{self.name}'")
    return command_result
def count_documents(self, filter: FilterType, *, upper_bound: int, max_time_ms: int | None = None) ‑> int

Count the documents in the collection matching the specified filter.

Args

filter
a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
upper_bound
a required ceiling on the result of the count operation. If the actual number of documents exceeds this value, an exception will be raised. Furthermore, if the actual number of documents exceeds the maximum count that the Data API can reach (regardless of upper_bound), an exception will be raised.
max_time_ms
a timeout, in milliseconds, for the underlying HTTP request. If not passed, the collection-level setting is used instead.

Returns

the exact count of matching documents.

Example

>>> my_coll.insert_many([{"seq": i} for i in range(20)])
InsertManyResult(...)
>>> my_coll.count_documents({}, upper_bound=100)
20
>>> my_coll.count_documents({"seq":{"$gt": 15}}, upper_bound=100)
4
>>> my_coll.count_documents({}, upper_bound=10)
Traceback (most recent call last):
    ... ...
astrapy.exceptions.TooManyDocumentsToCountException

Note

Count operations are expensive: for this reason, the best practice is to provide a reasonable upper_bound according to the caller expectations. Moreover, indiscriminate usage of count operations for sizeable amounts of documents (i.e. in the thousands and more) is discouraged in favor of alternative application-specific solutions. Keep in mind that the Data API has a hard upper limit on the amount of documents it will count, and that an exception will be thrown by this method if this limit is encountered.

Expand source code
def count_documents(
    self,
    filter: FilterType,
    *,
    upper_bound: int,
    max_time_ms: int | None = None,
) -> int:
    """
    Count the documents in the collection matching the specified filter.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        upper_bound: a required ceiling on the result of the count operation.
            If the actual number of documents exceeds this value,
            an exception will be raised.
            Furthermore, if the actual number of documents exceeds the maximum
            count that the Data API can reach (regardless of upper_bound),
            an exception will be raised.
        max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
            If not passed, the collection-level setting is used instead.

    Returns:
        the exact count of matching documents.

    Example:
        >>> my_coll.insert_many([{"seq": i} for i in range(20)])
        InsertManyResult(...)
        >>> my_coll.count_documents({}, upper_bound=100)
        20
        >>> my_coll.count_documents({"seq":{"$gt": 15}}, upper_bound=100)
        4
        >>> my_coll.count_documents({}, upper_bound=10)
        Traceback (most recent call last):
            ... ...
        astrapy.exceptions.TooManyDocumentsToCountException

    Note:
        Count operations are expensive: for this reason, the best practice
        is to provide a reasonable `upper_bound` according to the caller
        expectations. Moreover, indiscriminate usage of count operations
        for sizeable amounts of documents (i.e. in the thousands and more)
        is discouraged in favor of alternative application-specific solutions.
        Keep in mind that the Data API has a hard upper limit on the amount
        of documents it will count, and that an exception will be thrown
        by this method if this limit is encountered.
    """

    _max_time_ms = max_time_ms or self.api_options.max_time_ms
    cd_payload = {"countDocuments": {"filter": filter}}
    logger.info(f"countDocuments on '{self.name}'")
    cd_response = self._api_commander.request(
        payload=cd_payload,
        timeout_info=base_timeout_info(_max_time_ms),
    )
    logger.info(f"finished countDocuments on '{self.name}'")
    if "count" in cd_response.get("status", {}):
        count: int = cd_response["status"]["count"]
        if cd_response["status"].get("moreData", False):
            raise TooManyDocumentsToCountException(
                text=f"Document count exceeds {count}, the maximum allowed by the server",
                server_max_count_exceeded=True,
            )
        else:
            if count > upper_bound:
                raise TooManyDocumentsToCountException(
                    text="Document count exceeds required upper bound",
                    server_max_count_exceeded=False,
                )
            else:
                return count
    else:
        raise DataAPIFaultyResponseException(
            text="Faulty response from count_documents API command.",
            raw_response=cd_response,
        )
def delete_all(self, *, max_time_ms: int | None = None) ‑> dict[str, typing.Any]

Delete all documents in a collection.

Args

max_time_ms
a timeout, in milliseconds, for the underlying HTTP request. If not passed, the collection-level setting is used instead.

Returns

a dictionary of the form {"ok": 1} to signal successful deletion.

Example

>>> my_coll.distinct("seq")
[2, 1, 0]
>>> my_coll.count_documents({}, upper_bound=100)
4
>>> my_coll.delete_all()
{'ok': 1}
>>> my_coll.count_documents({}, upper_bound=100)
0

Note

Use with caution.

Deprecated since version: 1.3.0

This will be removed in 2.0.0. Use delete_many with filter={} instead.

Expand source code
@deprecation.deprecated(  # type: ignore[misc]
    deprecated_in="1.3.0",
    removed_in="2.0.0",
    current_version=__version__,
    details="Use delete_many with filter={} instead.",
)
def delete_all(self, *, max_time_ms: int | None = None) -> dict[str, Any]:
    """
    Delete all documents in a collection.

    Args:
        max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
            If not passed, the collection-level setting is used instead.

    Returns:
        a dictionary of the form {"ok": 1} to signal successful deletion.

    Example:
        >>> my_coll.distinct("seq")
        [2, 1, 0]
        >>> my_coll.count_documents({}, upper_bound=100)
        4
        >>> my_coll.delete_all()
        {'ok': 1}
        >>> my_coll.count_documents({}, upper_bound=100)
        0

    Note:
        Use with caution.
    """
    dm_result = self.delete_many(filter={}, max_time_ms=max_time_ms)
    if dm_result.deleted_count == -1:
        return {"ok": 1}
    else:
        raise DataAPIFaultyResponseException(
            text="Unexpected response from collection.delete_many({}).",
            raw_response=None,
        )
def delete_many(self, filter: FilterType, *, max_time_ms: int | None = None) ‑> DeleteResult

Delete all documents matching a provided filter.

Args

filter
a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators. Passing an empty filter, {}, completely erases all contents of the collection.
max_time_ms
a timeout, in milliseconds, for the operation. If not passed, the collection-level setting is used instead: keep in mind that this method entails successive HTTP requests to the API, depending on how many documents are to be deleted. For this reason, in most cases it is suggested to relax the timeout compared to other method calls.

Returns

a DeleteResult object summarizing the outcome of the delete operation.

Example

>>> my_coll.insert_many([{"seq": 1}, {"seq": 0}, {"seq": 2}])
InsertManyResult(...)
>>> my_coll.delete_many({"seq": {"$lte": 1}})
DeleteResult(raw_results=..., deleted_count=2)
>>> my_coll.distinct("seq")
[2]
>>> my_coll.delete_many({"seq": {"$lte": 1}})
DeleteResult(raw_results=..., deleted_count=0)

Note

This operation is in general not atomic. Depending on the amount of matching documents, it can keep running (in a blocking way) for a macroscopic time. In that case, new documents that are meanwhile inserted (e.g. from another process/application) will be deleted during the execution of this method call until the collection is devoid of matches. An exception is the filter={} case, whereby the operation is atomic.

Expand source code
def delete_many(
    self,
    filter: FilterType,
    *,
    max_time_ms: int | None = None,
) -> DeleteResult:
    """
    Delete all documents matching a provided filter.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
            Passing an empty filter, `{}`, completely erases all contents
            of the collection.
        max_time_ms: a timeout, in milliseconds, for the operation.
            If not passed, the collection-level setting is used instead:
            keep in mind that this method entails successive HTTP requests
            to the API, depending on how many documents are to be deleted.
            For this reason, in most cases it is suggested to relax the
            timeout compared to other method calls.

    Returns:
        a DeleteResult object summarizing the outcome of the delete operation.

    Example:
        >>> my_coll.insert_many([{"seq": 1}, {"seq": 0}, {"seq": 2}])
        InsertManyResult(...)
        >>> my_coll.delete_many({"seq": {"$lte": 1}})
        DeleteResult(raw_results=..., deleted_count=2)
        >>> my_coll.distinct("seq")
        [2]
        >>> my_coll.delete_many({"seq": {"$lte": 1}})
        DeleteResult(raw_results=..., deleted_count=0)

    Note:
        This operation is in general not atomic. Depending on the amount
        of matching documents, it can keep running (in a blocking way)
        for a macroscopic time. In that case, new documents that are
        meanwhile inserted (e.g. from another process/application) will be
        deleted during the execution of this method call until the
        collection is devoid of matches.
        An exception is the `filter={}` case, whereby the operation is atomic.
    """
    dm_responses: list[dict[str, Any]] = []
    deleted_count = 0
    must_proceed = True
    _max_time_ms = max_time_ms or self.api_options.max_time_ms
    timeout_manager = MultiCallTimeoutManager(overall_max_time_ms=_max_time_ms)
    this_dm_payload = {"deleteMany": {"filter": filter}}
    logger.info(f"starting delete_many on '{self.name}'")
    while must_proceed:
        logger.info(f"deleteMany on '{self.name}'")
        this_dm_response = self._api_commander.request(
            payload=this_dm_payload,
            raise_api_errors=False,
            timeout_info=timeout_manager.remaining_timeout_info(),
        )
        logger.info(f"finished deleteMany on '{self.name}'")
        # if errors, quit early
        if this_dm_response.get("errors", []):
            partial_result = DeleteResult(
                deleted_count=deleted_count,
                raw_results=dm_responses,
            )
            all_dm_responses = dm_responses + [this_dm_response]
            raise DeleteManyException.from_responses(
                commands=[None for _ in all_dm_responses],
                raw_responses=all_dm_responses,
                partial_result=partial_result,
            )
        else:
            this_dc = this_dm_response.get("status", {}).get("deletedCount")
            if this_dc is None:
                raise DataAPIFaultyResponseException(
                    text="Faulty response from delete_many API command.",
                    raw_response=this_dm_response,
                )
            dm_responses.append(this_dm_response)
            deleted_count += this_dc
            must_proceed = this_dm_response.get("status", {}).get("moreData", False)

    logger.info(f"finished delete_many on '{self.name}'")
    return DeleteResult(
        deleted_count=deleted_count,
        raw_results=dm_responses,
    )
def delete_one(self, filter: FilterType, *, vector: VectorType | None = None, vectorize: str | None = None, sort: SortType | None = None, max_time_ms: int | None = None) ‑> DeleteResult

Delete one document matching a provided filter. This method never deletes more than a single document, regardless of the number of matches to the provided filters.

Args

filter
a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
vector
a suitable vector, i.e. a list of float numbers of the appropriate dimensionality, to use vector search (i.e. ANN, or "approximate nearest-neighbours" search), as the sorting criterion. In this way, the matched document (if any) will be the one that is most similar to the provided vector. This parameter cannot be used together with sort. See the find method for more details on this parameter. DEPRECATED (removal in 2.0). Use a $vector key in the sort clause dict instead.
vectorize
a string to be made into a vector to perform vector search. This can be supplied in (exclusive) alternative to vector, provided such a service is configured for the collection, and achieves the same effect. DEPRECATED (removal in 2.0). Use a $vectorize key in the sort clause dict instead.
sort
with this dictionary parameter one can control the sorting order of the documents matching the filter, effectively determining what document will come first and hence be the deleted one. See the find method for more on sorting.
max_time_ms
a timeout, in milliseconds, for the underlying HTTP request. If not passed, the collection-level setting is used instead.

Returns

a DeleteResult object summarizing the outcome of the delete operation.

Example

>>> my_coll.insert_many([{"seq": 1}, {"seq": 0}, {"seq": 2}])
InsertManyResult(...)
>>> my_coll.delete_one({"seq": 1})
DeleteResult(raw_results=..., deleted_count=1)
>>> my_coll.distinct("seq")
[0, 2]
>>> my_coll.delete_one(
...     {"seq": {"$exists": True}},
...     sort={"seq": astrapy.constants.SortDocuments.DESCENDING},
... )
DeleteResult(raw_results=..., deleted_count=1)
>>> my_coll.distinct("seq")
[0]
>>> my_coll.delete_one({"seq": 2})
DeleteResult(raw_results=..., deleted_count=0)
Expand source code
def delete_one(
    self,
    filter: FilterType,
    *,
    vector: VectorType | None = None,
    vectorize: str | None = None,
    sort: SortType | None = None,
    max_time_ms: int | None = None,
) -> DeleteResult:
    """
    Delete one document matching a provided filter.
    This method never deletes more than a single document, regardless
    of the number of matches to the provided filters.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        vector: a suitable vector, i.e. a list of float numbers of the appropriate
            dimensionality, to use vector search (i.e. ANN,
            or "approximate nearest-neighbours" search), as the sorting criterion.
            In this way, the matched document (if any) will be the one
            that is most similar to the provided vector.
            This parameter cannot be used together with `sort`.
            See the `find` method for more details on this parameter.
            *DEPRECATED* (removal in 2.0). Use a `$vector` key in the
            sort clause dict instead.
        vectorize: a string to be made into a vector to perform vector search.
            This can be supplied in (exclusive) alternative to `vector`,
            provided such a service is configured for the collection,
            and achieves the same effect.
            *DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the
            sort clause dict instead.
        sort: with this dictionary parameter one can control the sorting
            order of the documents matching the filter, effectively
            determining what document will come first and hence be the
            deleted one. See the `find` method for more on sorting.
        max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
            If not passed, the collection-level setting is used instead.

    Returns:
        a DeleteResult object summarizing the outcome of the delete operation.

    Example:
        >>> my_coll.insert_many([{"seq": 1}, {"seq": 0}, {"seq": 2}])
        InsertManyResult(...)
        >>> my_coll.delete_one({"seq": 1})
        DeleteResult(raw_results=..., deleted_count=1)
        >>> my_coll.distinct("seq")
        [0, 2]
        >>> my_coll.delete_one(
        ...     {"seq": {"$exists": True}},
        ...     sort={"seq": astrapy.constants.SortDocuments.DESCENDING},
        ... )
        DeleteResult(raw_results=..., deleted_count=1)
        >>> my_coll.distinct("seq")
        [0]
        >>> my_coll.delete_one({"seq": 2})
        DeleteResult(raw_results=..., deleted_count=0)
    """

    check_deprecated_vector_ize(
        vector=vector, vectors=None, vectorize=vectorize, kind="find"
    )
    _sort = _collate_vector_to_sort(sort, vector, vectorize)
    _max_time_ms = max_time_ms or self.api_options.max_time_ms
    do_payload = {
        "deleteOne": {
            k: v
            for k, v in {
                "filter": filter,
                "sort": _sort,
            }.items()
            if v is not None
        }
    }
    logger.info(f"deleteOne on '{self.name}'")
    do_response = self._api_commander.request(
        payload=do_payload,
        timeout_info=base_timeout_info(_max_time_ms),
    )
    logger.info(f"finished deleteOne on '{self.name}'")
    if "deletedCount" in do_response.get("status", {}):
        deleted_count = do_response["status"]["deletedCount"]
        return DeleteResult(
            deleted_count=deleted_count,
            raw_results=[do_response],
        )
    else:
        raise DataAPIFaultyResponseException(
            text="Faulty response from delete_one API command.",
            raw_response=do_response,
        )
def distinct(self, key: str, *, filter: FilterType | None = None, max_time_ms: int | None = None) ‑> list[typing.Any]

Return a list of the unique values of key across the documents in the collection that match the provided filter.

Args

key
the name of the field whose value is inspected across documents. Keys can use dot-notation to descend to deeper document levels. Example of acceptable key values: "field" "field.subfield" "field.3" "field.3.subfield" If lists are encountered and no numeric index is specified, all items in the list are visited.
filter
a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
max_time_ms
a timeout, in milliseconds, with the same meaning as for find. If not passed, the collection-level setting is used instead.

Returns

a list of all different values for key found across the documents that match the filter. The result list has no repeated items.

Example

>>> my_coll.insert_many(
...     [
...         {"name": "Marco", "food": ["apple", "orange"], "city": "Helsinki"},
...         {"name": "Emma", "food": {"likes_fruit": True, "allergies": []}},
...     ]
... )
InsertManyResult(raw_results=..., inserted_ids=['c5b99f37-...', 'd6416321-...'])
>>> my_coll.distinct("name")
['Marco', 'Emma']
>>> my_coll.distinct("city")
['Helsinki']
>>> my_coll.distinct("food")
['apple', 'orange', {'likes_fruit': True, 'allergies': []}]
>>> my_coll.distinct("food.1")
['orange']
>>> my_coll.distinct("food.allergies")
[]
>>> my_coll.distinct("food.likes_fruit")
[True]

Note

It must be kept in mind that distinct is a client-side operation, which effectively browses all required documents using the logic of the find method and collects the unique values found for key. As such, there may be performance, latency and ultimately billing implications if the amount of matching documents is large.

Note

For details on the behaviour of "distinct" in conjunction with real-time changes in the collection contents, see the Note of the find command.

Expand source code
def distinct(
    self,
    key: str,
    *,
    filter: FilterType | None = None,
    max_time_ms: int | None = None,
) -> list[Any]:
    """
    Return a list of the unique values of `key` across the documents
    in the collection that match the provided filter.

    Args:
        key: the name of the field whose value is inspected across documents.
            Keys can use dot-notation to descend to deeper document levels.
            Example of acceptable `key` values:
                "field"
                "field.subfield"
                "field.3"
                "field.3.subfield"
            If lists are encountered and no numeric index is specified,
            all items in the list are visited.
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        max_time_ms: a timeout, in milliseconds, with the same meaning as for `find`.
            If not passed, the collection-level setting is used instead.

    Returns:
        a list of all different values for `key` found across the documents
        that match the filter. The result list has no repeated items.

    Example:
        >>> my_coll.insert_many(
        ...     [
        ...         {"name": "Marco", "food": ["apple", "orange"], "city": "Helsinki"},
        ...         {"name": "Emma", "food": {"likes_fruit": True, "allergies": []}},
        ...     ]
        ... )
        InsertManyResult(raw_results=..., inserted_ids=['c5b99f37-...', 'd6416321-...'])
        >>> my_coll.distinct("name")
        ['Marco', 'Emma']
        >>> my_coll.distinct("city")
        ['Helsinki']
        >>> my_coll.distinct("food")
        ['apple', 'orange', {'likes_fruit': True, 'allergies': []}]
        >>> my_coll.distinct("food.1")
        ['orange']
        >>> my_coll.distinct("food.allergies")
        []
        >>> my_coll.distinct("food.likes_fruit")
        [True]

    Note:
        It must be kept in mind that `distinct` is a client-side operation,
        which effectively browses all required documents using the logic
        of the `find` method and collects the unique values found for `key`.
        As such, there may be performance, latency and ultimately
        billing implications if the amount of matching documents is large.

    Note:
        For details on the behaviour of "distinct" in conjunction with
        real-time changes in the collection contents, see the
        Note of the `find` command.
    """

    _max_time_ms = max_time_ms or self.api_options.max_time_ms
    f_cursor = Cursor(
        collection=self,
        filter=filter,
        projection={key: True},
        max_time_ms=None,
        overall_max_time_ms=_max_time_ms,
    )
    return f_cursor.distinct(key)
def drop(self, *, max_time_ms: int | None = None) ‑> dict[str, typing.Any]

Drop the collection, i.e. delete it from the database along with all the documents it contains.

Args

max_time_ms
a timeout, in milliseconds, for the underlying HTTP request. If not passed, the collection-level setting is used instead. Remember there is not guarantee that a request that has timed out us not in fact honored.

Returns

a dictionary of the form {"ok": 1} to signal successful deletion.

Example

>>> my_coll.find_one({})
{'_id': '...', 'a': 100}
>>> my_coll.drop()
{'ok': 1}
>>> my_coll.find_one({})
Traceback (most recent call last):
    ... ...
astrapy.exceptions.DataAPIResponseException: Collection does not exist, collection name: my_collection

Note

Use with caution.

Note

Once the method succeeds, methods on this object can still be invoked: however, this hardly makes sense as the underlying actual collection is no more. It is responsibility of the developer to design a correct flow which avoids using a deceased collection any further.

Expand source code
def drop(self, *, max_time_ms: int | None = None) -> dict[str, Any]:
    """
    Drop the collection, i.e. delete it from the database along with
    all the documents it contains.

    Args:
        max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
            If not passed, the collection-level setting is used instead.
            Remember there is not guarantee that a request that has
            timed out us not in fact honored.

    Returns:
        a dictionary of the form {"ok": 1} to signal successful deletion.

    Example:
        >>> my_coll.find_one({})
        {'_id': '...', 'a': 100}
        >>> my_coll.drop()
        {'ok': 1}
        >>> my_coll.find_one({})
        Traceback (most recent call last):
            ... ...
        astrapy.exceptions.DataAPIResponseException: Collection does not exist, collection name: my_collection

    Note:
        Use with caution.

    Note:
        Once the method succeeds, methods on this object can still be invoked:
        however, this hardly makes sense as the underlying actual collection
        is no more.
        It is responsibility of the developer to design a correct flow
        which avoids using a deceased collection any further.
    """

    _max_time_ms = max_time_ms or self.api_options.max_time_ms
    logger.info(f"dropping collection '{self.name}' (self)")
    drop_result = self.database.drop_collection(self, max_time_ms=_max_time_ms)
    logger.info(f"finished dropping collection '{self.name}' (self)")
    return drop_result
def estimated_document_count(self, *, max_time_ms: int | None = None) ‑> int

Query the API server for an estimate of the document count in the collection.

Contrary to count_documents, this method has no filtering parameters.

Args

max_time_ms
a timeout, in milliseconds, for the underlying HTTP request. If not passed, the collection-level setting is used instead.

Returns

a server-provided estimate count of the documents in the collection.

Example

>>> my_coll.estimated_document_count()
35700
Expand source code
def estimated_document_count(
    self,
    *,
    max_time_ms: int | None = None,
) -> int:
    """
    Query the API server for an estimate of the document count in the collection.

    Contrary to `count_documents`, this method has no filtering parameters.

    Args:
        max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
            If not passed, the collection-level setting is used instead.

    Returns:
        a server-provided estimate count of the documents in the collection.

    Example:
        >>> my_coll.estimated_document_count()
        35700
    """
    _max_time_ms = max_time_ms or self.api_options.max_time_ms
    ed_payload: dict[str, Any] = {"estimatedDocumentCount": {}}
    logger.info(f"estimatedDocumentCount on '{self.name}'")
    ed_response = self._api_commander.request(
        payload=ed_payload,
        timeout_info=base_timeout_info(_max_time_ms),
    )
    logger.info(f"finished estimatedDocumentCount on '{self.name}'")
    if "count" in ed_response.get("status", {}):
        count: int = ed_response["status"]["count"]
        return count
    else:
        raise DataAPIFaultyResponseException(
            text="Faulty response from estimated_document_count API command.",
            raw_response=ed_response,
        )
def find(self, filter: FilterType | None = None, *, projection: ProjectionType | None = None, skip: int | None = None, limit: int | None = None, vector: VectorType | None = None, vectorize: str | None = None, include_similarity: bool | None = None, include_sort_vector: bool | None = None, sort: SortType | None = None, max_time_ms: int | None = None) ‑> Cursor

Find documents on the collection, matching a certain provided filter.

The method returns a Cursor that can then be iterated over. Depending on the method call pattern, the iteration over all documents can reflect collection mutations occurred since the find method was called, or not. In cases where the cursor reflects mutations in real-time, it will iterate over cursors in an approximate way (i.e. exhibiting occasional skipped or duplicate documents). This happens when making use of the sort option in a non-vector-search manner.

Args

filter
a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
projection
it controls which parts of the document are returned. It can be an allow-list: {"f1": True, "f2": True}, or a deny-list: {"fx": False, "fy": False}, but not a mixture (except for the _id and other special fields, which can be associated to both True or False independently of the rest of the specification). The special star-projections {"*": True} and {"*": False} have the effect of returning the whole document and {} respectively. For lists in documents, slice directives can be passed to select portions of the list: for instance, {"array": {"$slice": 2}}, {"array": {"$slice": -2}}, {"array": {"$slice": [4, 2]}} or {"array": {"$slice": [-4, 2]}}. An iterable over strings will be treated implicitly as an allow-list. The default projection (used if this parameter is not passed) does not necessarily include "special" fields such as $vector or $vectorize. See the Data API documentation for more on projections.
skip
with this integer parameter, what would be the first skip documents returned by the query are discarded, and the results start from the (skip+1)-th document. This parameter can be used only in conjunction with an explicit sort criterion of the ascending/descending type (i.e. it cannot be used when not sorting, nor with vector-based ANN search).
limit
this (integer) parameter sets a limit over how many documents are returned. Once limit is reached (or the cursor is exhausted for lack of matching documents), nothing more is returned.
vector
a suitable vector, i.e. a list of float numbers of the appropriate dimensionality, to perform vector search (i.e. ANN, or "approximate nearest-neighbours" search). When running similarity search on a collection, no other sorting criteria can be specified. Moreover, there is an upper bound to the number of documents that can be returned. For details, see the Note about upper bounds and the Data API documentation. DEPRECATED (removal in 2.0). Use a $vector key in the sort clause dict instead.
vectorize
a string to be made into a vector to perform vector search. This can be supplied in (exclusive) alternative to vector, provided such a service is configured for the collection, and achieves the same effect. DEPRECATED (removal in 2.0). Use a $vectorize key in the sort clause dict instead.
include_similarity
a boolean to request the numeric value of the similarity to be returned as an added "$similarity" key in each returned document. Can only be used for vector ANN search, i.e. when either vector is supplied or the sort parameter has the shape {"$vector": …}.
include_sort_vector
a boolean to request query vector used in this search. If set to True (and if the invocation is a vector search), calling the get_sort_vector method on the returned cursor will yield the vector used for the ANN search.
sort
with this dictionary parameter one can control the order the documents are returned. See the Note about sorting, as well as the one about upper bounds, for details. Vector-based ANN sorting is achieved by providing a "$vector" or a "$vectorize" key in sort.
max_time_ms
a timeout, in milliseconds, for each single one of the underlying HTTP requests used to fetch documents as the cursor is iterated over. If not passed, the collection-level setting is used instead.

Returns

a Cursor object representing iterations over the matching documents
(see the Cursor object for how to use it. The simplest thing is to
run a for loop
for document in collection.sort(...):).

Examples

>>> filter = {"seq": {"$exists": True}}
>>> for doc in my_coll.find(filter, projection={"seq": True}, limit=5):
...     print(doc["seq"])
...
37
35
10
36
27
>>> cursor1 = my_coll.find(
...     {},
...     limit=4,
...     sort={"seq": astrapy.constants.SortDocuments.DESCENDING},
... )
>>> [doc["_id"] for doc in cursor1]
['97e85f81-...', '1581efe4-...', '...', '...']
>>> cursor2 = my_coll.find({}, limit=3)
>>> cursor2.distinct("seq")
[37, 35, 10]
>>> my_coll.insert_many([
...     {"tag": "A", "$vector": [4, 5]},
...     {"tag": "B", "$vector": [3, 4]},
...     {"tag": "C", "$vector": [3, 2]},
...     {"tag": "D", "$vector": [4, 1]},
...     {"tag": "E", "$vector": [2, 5]},
... ])
>>> ann_tags = [
...     document["tag"]
...     for document in my_coll.find(
...         {},
...         sort={"$vector": [3, 3]},
...         limit=3,
...     )
... ]
>>> ann_tags
['A', 'B', 'C']
>>> # (assuming the collection has metric VectorMetric.COSINE)
>>> cursor = my_coll.find(
...     sort={"$vector": [3, 3]},
...     limit=3,
...     include_sort_vector=True,
... )
>>> cursor.get_sort_vector()
[3.0, 3.0]
>>> matches = list(cursor)
>>> cursor.get_sort_vector()
[3.0, 3.0]

Note

The following are example values for the sort parameter. When no particular order is required: sort={} # (default when parameter not provided) When sorting by a certain value in ascending/descending order: sort={"field": SortDocuments.ASCENDING} sort={"field": SortDocuments.DESCENDING} When sorting first by "field" and then by "subfield" (while modern Python versions preserve the order of dictionaries, it is suggested for clarity to employ a collections.OrderedDict in these cases): sort={ "field": SortDocuments.ASCENDING, "subfield": SortDocuments.ASCENDING, } When running a vector similarity (ANN) search: sort={"$vector": [0.4, 0.15, -0.5]}

Note

Some combinations of arguments impose an implicit upper bound on the number of documents that are returned by the Data API. More specifically: (a) Vector ANN searches cannot return more than a number of documents that at the time of writing is set to 1000 items. (b) When using a sort criterion of the ascending/descending type, the Data API will return a smaller number of documents, set to 20 at the time of writing, and stop there. The returned documents are the top results across the whole collection according to the requested criterion. These provisions should be kept in mind even when subsequently running a command such as .distinct() on a cursor.

Note

When not specifying sorting criteria at all (by vector or otherwise), the cursor can scroll through an arbitrary number of documents as the Data API and the client periodically exchange new chunks of documents. It should be noted that the behavior of the cursor in the case documents have been added/removed after the find was started depends on database internals and it is not guaranteed, nor excluded, that such "real-time" changes in the data would be picked up by the cursor.

Expand source code
def find(
    self,
    filter: FilterType | None = None,
    *,
    projection: ProjectionType | None = None,
    skip: int | None = None,
    limit: int | None = None,
    vector: VectorType | None = None,
    vectorize: str | None = None,
    include_similarity: bool | None = None,
    include_sort_vector: bool | None = None,
    sort: SortType | None = None,
    max_time_ms: int | None = None,
) -> Cursor:
    """
    Find documents on the collection, matching a certain provided filter.

    The method returns a Cursor that can then be iterated over. Depending
    on the method call pattern, the iteration over all documents can reflect
    collection mutations occurred since the `find` method was called, or not.
    In cases where the cursor reflects mutations in real-time, it will iterate
    over cursors in an approximate way (i.e. exhibiting occasional skipped
    or duplicate documents). This happens when making use of the `sort`
    option in a non-vector-search manner.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        projection: it controls which parts of the document are returned.
            It can be an allow-list: `{"f1": True, "f2": True}`,
            or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
            (except for the `_id` and other special fields, which can be
            associated to both True or False independently of the rest
            of the specification).
            The special star-projections `{"*": True}` and `{"*": False}`
            have the effect of returning the whole document and `{}` respectively.
            For lists in documents, slice directives can be passed to select
            portions of the list: for instance, `{"array": {"$slice": 2}}`,
            `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
            `{"array": {"$slice": [-4, 2]}}`.
            An iterable over strings will be treated implicitly as an allow-list.
            The default projection (used if this parameter is not passed) does not
            necessarily include "special" fields such as `$vector` or `$vectorize`.
            See the Data API documentation for more on projections.
        skip: with this integer parameter, what would be the first `skip`
            documents returned by the query are discarded, and the results
            start from the (skip+1)-th document.
            This parameter can be used only in conjunction with an explicit
            `sort` criterion of the ascending/descending type (i.e. it cannot
            be used when not sorting, nor with vector-based ANN search).
        limit: this (integer) parameter sets a limit over how many documents
            are returned. Once `limit` is reached (or the cursor is exhausted
            for lack of matching documents), nothing more is returned.
        vector: a suitable vector, i.e. a list of float numbers of the appropriate
            dimensionality, to perform vector search (i.e. ANN,
            or "approximate nearest-neighbours" search).
            When running similarity search on a collection, no other sorting
            criteria can be specified. Moreover, there is an upper bound
            to the number of documents that can be returned. For details,
            see the Note about upper bounds and the Data API documentation.
            *DEPRECATED* (removal in 2.0). Use a `$vector` key in the
            sort clause dict instead.
        vectorize: a string to be made into a vector to perform vector search.
            This can be supplied in (exclusive) alternative to `vector`,
            provided such a service is configured for the collection,
            and achieves the same effect.
            *DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the
            sort clause dict instead.
        include_similarity: a boolean to request the numeric value of the
            similarity to be returned as an added "$similarity" key in each
            returned document. Can only be used for vector ANN search, i.e.
            when either `vector` is supplied or the `sort` parameter has the
            shape {"$vector": ...}.
        include_sort_vector: a boolean to request query vector used in this search.
            If set to True (and if the invocation is a vector search), calling
            the `get_sort_vector` method on the returned cursor will yield
            the vector used for the ANN search.
        sort: with this dictionary parameter one can control the order
            the documents are returned. See the Note about sorting, as well as
            the one about upper bounds, for details.
            Vector-based ANN sorting is achieved by providing a "$vector"
            or a "$vectorize" key in `sort`.
        max_time_ms: a timeout, in milliseconds, for each single one
            of the underlying HTTP requests used to fetch documents as the
            cursor is iterated over.
            If not passed, the collection-level setting is used instead.

    Returns:
        a Cursor object representing iterations over the matching documents
        (see the Cursor object for how to use it. The simplest thing is to
        run a for loop: `for document in collection.sort(...):`).

    Examples:
        >>> filter = {"seq": {"$exists": True}}
        >>> for doc in my_coll.find(filter, projection={"seq": True}, limit=5):
        ...     print(doc["seq"])
        ...
        37
        35
        10
        36
        27
        >>> cursor1 = my_coll.find(
        ...     {},
        ...     limit=4,
        ...     sort={"seq": astrapy.constants.SortDocuments.DESCENDING},
        ... )
        >>> [doc["_id"] for doc in cursor1]
        ['97e85f81-...', '1581efe4-...', '...', '...']
        >>> cursor2 = my_coll.find({}, limit=3)
        >>> cursor2.distinct("seq")
        [37, 35, 10]

        >>> my_coll.insert_many([
        ...     {"tag": "A", "$vector": [4, 5]},
        ...     {"tag": "B", "$vector": [3, 4]},
        ...     {"tag": "C", "$vector": [3, 2]},
        ...     {"tag": "D", "$vector": [4, 1]},
        ...     {"tag": "E", "$vector": [2, 5]},
        ... ])
        >>> ann_tags = [
        ...     document["tag"]
        ...     for document in my_coll.find(
        ...         {},
        ...         sort={"$vector": [3, 3]},
        ...         limit=3,
        ...     )
        ... ]
        >>> ann_tags
        ['A', 'B', 'C']
        >>> # (assuming the collection has metric VectorMetric.COSINE)

        >>> cursor = my_coll.find(
        ...     sort={"$vector": [3, 3]},
        ...     limit=3,
        ...     include_sort_vector=True,
        ... )
        >>> cursor.get_sort_vector()
        [3.0, 3.0]
        >>> matches = list(cursor)
        >>> cursor.get_sort_vector()
        [3.0, 3.0]

    Note:
        The following are example values for the `sort` parameter.
        When no particular order is required:
            sort={}  # (default when parameter not provided)
        When sorting by a certain value in ascending/descending order:
            sort={"field": SortDocuments.ASCENDING}
            sort={"field": SortDocuments.DESCENDING}
        When sorting first by "field" and then by "subfield"
        (while modern Python versions preserve the order of dictionaries,
        it is suggested for clarity to employ a `collections.OrderedDict`
        in these cases):
            sort={
                "field": SortDocuments.ASCENDING,
                "subfield": SortDocuments.ASCENDING,
            }
        When running a vector similarity (ANN) search:
            sort={"$vector": [0.4, 0.15, -0.5]}

    Note:
        Some combinations of arguments impose an implicit upper bound on the
        number of documents that are returned by the Data API. More specifically:
        (a) Vector ANN searches cannot return more than a number of documents
        that at the time of writing is set to 1000 items.
        (b) When using a sort criterion of the ascending/descending type,
        the Data API will return a smaller number of documents, set to 20
        at the time of writing, and stop there. The returned documents are
        the top results across the whole collection according to the requested
        criterion.
        These provisions should be kept in mind even when subsequently running
        a command such as `.distinct()` on a cursor.

    Note:
        When not specifying sorting criteria at all (by vector or otherwise),
        the cursor can scroll through an arbitrary number of documents as
        the Data API and the client periodically exchange new chunks of documents.
        It should be noted that the behavior of the cursor in the case documents
        have been added/removed after the `find` was started depends on database
        internals and it is not guaranteed, nor excluded, that such "real-time"
        changes in the data would be picked up by the cursor.
    """

    check_deprecated_vector_ize(
        vector=vector, vectors=None, vectorize=vectorize, kind="find"
    )
    _sort = _collate_vector_to_sort(sort, vector, vectorize)
    _max_time_ms = max_time_ms or self.api_options.max_time_ms
    if include_similarity is not None and not _is_vector_sort(_sort):
        raise ValueError(
            "Cannot use `include_similarity` unless for vector search."
        )
    return (
        Cursor(
            collection=self,
            filter=filter,
            projection=projection,
            max_time_ms=_max_time_ms,
            overall_max_time_ms=None,
        )
        .skip(skip)
        .limit(limit)
        .sort(_sort)
        .include_similarity(include_similarity)
        .include_sort_vector(include_sort_vector)
    )
def find_one(self, filter: FilterType | None = None, *, projection: ProjectionType | None = None, vector: VectorType | None = None, vectorize: str | None = None, include_similarity: bool | None = None, sort: SortType | None = None, max_time_ms: int | None = None) ‑> Optional[Dict[str, Any]]

Run a search, returning the first document in the collection that matches provided filters, if any is found.

Args

filter
a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
projection
it controls which parts of the document are returned. It can be an allow-list: {"f1": True, "f2": True}, or a deny-list: {"fx": False, "fy": False}, but not a mixture (except for the _id and other special fields, which can be associated to both True or False independently of the rest of the specification). The special star-projections {"*": True} and {"*": False} have the effect of returning the whole document and {} respectively. For lists in documents, slice directives can be passed to select portions of the list: for instance, {"array": {"$slice": 2}}, {"array": {"$slice": -2}}, {"array": {"$slice": [4, 2]}} or {"array": {"$slice": [-4, 2]}}. An iterable over strings will be treated implicitly as an allow-list. The default projection (used if this parameter is not passed) does not necessarily include "special" fields such as $vector or $vectorize. See the Data API documentation for more on projections.
vector
a suitable vector, i.e. a list of float numbers of the appropriate dimensionality, to perform vector search (i.e. ANN, or "approximate nearest-neighbours" search), extracting the most similar document in the collection matching the filter. DEPRECATED (removal in 2.0). Use a $vector key in the sort clause dict instead.
vectorize
a string to be made into a vector to perform vector search. Using vectorize assumes a suitable service is configured for the collection. DEPRECATED (removal in 2.0). Use a $vectorize key in the sort clause dict instead.
include_similarity
a boolean to request the numeric value of the similarity to be returned as an added "$similarity" key in the returned document. Can only be used for vector ANN search, i.e. when either vector is supplied or the sort parameter has the shape {"$vector": …}.
sort
with this dictionary parameter one can control the order the documents are returned. See the Note about sorting for details. Vector-based ANN sorting is achieved by providing a "$vector" or a "$vectorize" key in sort.
max_time_ms
a timeout, in milliseconds, for the underlying HTTP request. If not passed, the collection-level setting is used instead.

Returns

a dictionary expressing the required document, otherwise None.

Examples

>>> my_coll.find_one({})
{'_id': '68d1e515-...', 'seq': 37}
>>> my_coll.find_one({"seq": 10})
{'_id': 'd560e217-...', 'seq': 10}
>>> my_coll.find_one({"seq": 1011})
>>> # (returns None for no matches)
>>> my_coll.find_one({}, projection={"seq": False})
{'_id': '68d1e515-...'}
>>> my_coll.find_one(
...     {},
...     sort={"seq": astrapy.constants.SortDocuments.DESCENDING},
... )
{'_id': '97e85f81-...', 'seq': 69}
>>> my_coll.find_one({}, sort={"$vector": [1, 0]}, projection={"*": True})
{'_id': '...', 'tag': 'D', '$vector': [4.0, 1.0]}

Note

See the find method for more details on the accepted parameters (whereas skip and limit are not valid parameters for find_one).

Expand source code
def find_one(
    self,
    filter: FilterType | None = None,
    *,
    projection: ProjectionType | None = None,
    vector: VectorType | None = None,
    vectorize: str | None = None,
    include_similarity: bool | None = None,
    sort: SortType | None = None,
    max_time_ms: int | None = None,
) -> DocumentType | None:
    """
    Run a search, returning the first document in the collection that matches
    provided filters, if any is found.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        projection: it controls which parts of the document are returned.
            It can be an allow-list: `{"f1": True, "f2": True}`,
            or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
            (except for the `_id` and other special fields, which can be
            associated to both True or False independently of the rest
            of the specification).
            The special star-projections `{"*": True}` and `{"*": False}`
            have the effect of returning the whole document and `{}` respectively.
            For lists in documents, slice directives can be passed to select
            portions of the list: for instance, `{"array": {"$slice": 2}}`,
            `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
            `{"array": {"$slice": [-4, 2]}}`.
            An iterable over strings will be treated implicitly as an allow-list.
            The default projection (used if this parameter is not passed) does not
            necessarily include "special" fields such as `$vector` or `$vectorize`.
            See the Data API documentation for more on projections.
        vector: a suitable vector, i.e. a list of float numbers of the appropriate
            dimensionality, to perform vector search (i.e. ANN,
            or "approximate nearest-neighbours" search), extracting the most
            similar document in the collection matching the filter.
            *DEPRECATED* (removal in 2.0). Use a `$vector` key in the
            sort clause dict instead.
        vectorize: a string to be made into a vector to perform vector search.
            Using vectorize assumes a suitable service is configured for the collection.
            *DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the
            sort clause dict instead.
        include_similarity: a boolean to request the numeric value of the
            similarity to be returned as an added "$similarity" key in the
            returned document. Can only be used for vector ANN search, i.e.
            when either `vector` is supplied or the `sort` parameter has the
            shape {"$vector": ...}.
        sort: with this dictionary parameter one can control the order
            the documents are returned. See the Note about sorting for details.
            Vector-based ANN sorting is achieved by providing a "$vector"
            or a "$vectorize" key in `sort`.
        max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
            If not passed, the collection-level setting is used instead.

    Returns:
        a dictionary expressing the required document, otherwise None.

    Examples:
        >>> my_coll.find_one({})
        {'_id': '68d1e515-...', 'seq': 37}
        >>> my_coll.find_one({"seq": 10})
        {'_id': 'd560e217-...', 'seq': 10}
        >>> my_coll.find_one({"seq": 1011})
        >>> # (returns None for no matches)
        >>> my_coll.find_one({}, projection={"seq": False})
        {'_id': '68d1e515-...'}
        >>> my_coll.find_one(
        ...     {},
        ...     sort={"seq": astrapy.constants.SortDocuments.DESCENDING},
        ... )
        {'_id': '97e85f81-...', 'seq': 69}
        >>> my_coll.find_one({}, sort={"$vector": [1, 0]}, projection={"*": True})
        {'_id': '...', 'tag': 'D', '$vector': [4.0, 1.0]}

    Note:
        See the `find` method for more details on the accepted parameters
        (whereas `skip` and `limit` are not valid parameters for `find_one`).
    """

    check_deprecated_vector_ize(
        vector=vector, vectors=None, vectorize=vectorize, kind="find"
    )
    _max_time_ms = max_time_ms or self.api_options.max_time_ms
    fo_cursor = self.find(
        filter=filter,
        projection=projection,
        skip=None,
        limit=1,
        vector=vector,
        vectorize=vectorize,
        include_similarity=include_similarity,
        sort=sort,
        max_time_ms=_max_time_ms,
    )
    try:
        document = fo_cursor.__next__()
        return document
    except StopIteration:
        return None
def find_one_and_delete(self, filter: FilterType, *, projection: ProjectionType | None = None, vector: VectorType | None = None, vectorize: str | None = None, sort: SortType | None = None, max_time_ms: int | None = None) ‑> Optional[Dict[str, Any]]

Find a document in the collection and delete it. The deleted document, however, is the return value of the method.

Args

filter
a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
projection
it controls which parts of the document are returned. It can be an allow-list: {"f1": True, "f2": True}, or a deny-list: {"fx": False, "fy": False}, but not a mixture (except for the _id and other special fields, which can be associated to both True or False independently of the rest of the specification). The special star-projections {"*": True} and {"*": False} have the effect of returning the whole document and {} respectively. For lists in documents, slice directives can be passed to select portions of the list: for instance, {"array": {"$slice": 2}}, {"array": {"$slice": -2}}, {"array": {"$slice": [4, 2]}} or {"array": {"$slice": [-4, 2]}}. An iterable over strings will be treated implicitly as an allow-list. The default projection (used if this parameter is not passed) does not necessarily include "special" fields such as $vector or $vectorize. See the Data API documentation for more on projections.
vector
a suitable vector, i.e. a list of float numbers of the appropriate dimensionality, to use vector search (i.e. ANN, or "approximate nearest-neighbours" search), as the sorting criterion. In this way, the matched document (if any) will be the one that is most similar to the provided vector. This parameter cannot be used together with sort. See the find method for more details on this parameter. DEPRECATED (removal in 2.0). Use a $vector key in the sort clause dict instead.
vectorize
a string to be made into a vector to perform vector search. This can be supplied in (exclusive) alternative to vector, provided such a service is configured for the collection, and achieves the same effect. DEPRECATED (removal in 2.0). Use a $vectorize key in the sort clause dict instead.
sort
with this dictionary parameter one can control the sorting order of the documents matching the filter, effectively determining what document will come first and hence be the deleted one. See the find method for more on sorting.
max_time_ms
a timeout, in milliseconds, for the underlying HTTP request. If not passed, the collection-level setting is used instead.

Returns

Either the document (or a projection thereof, as requested), or None if no matches were found in the first place.

Example

>>> my_coll.insert_many(
...     [
...         {"species": "swan", "class": "Aves"},
...         {"species": "frog", "class": "Amphibia"},
...     ],
... )
InsertManyResult(...)
>>> my_coll.find_one_and_delete(
...     {"species": {"$ne": "frog"}},
...     projection=["species"],
... )
{'_id': '5997fb48-...', 'species': 'swan'}
>>> my_coll.find_one_and_delete({"species": {"$ne": "frog"}})
>>> # (returns None for no matches)
Expand source code
def find_one_and_delete(
    self,
    filter: FilterType,
    *,
    projection: ProjectionType | None = None,
    vector: VectorType | None = None,
    vectorize: str | None = None,
    sort: SortType | None = None,
    max_time_ms: int | None = None,
) -> DocumentType | None:
    """
    Find a document in the collection and delete it. The deleted document,
    however, is the return value of the method.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        projection: it controls which parts of the document are returned.
            It can be an allow-list: `{"f1": True, "f2": True}`,
            or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
            (except for the `_id` and other special fields, which can be
            associated to both True or False independently of the rest
            of the specification).
            The special star-projections `{"*": True}` and `{"*": False}`
            have the effect of returning the whole document and `{}` respectively.
            For lists in documents, slice directives can be passed to select
            portions of the list: for instance, `{"array": {"$slice": 2}}`,
            `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
            `{"array": {"$slice": [-4, 2]}}`.
            An iterable over strings will be treated implicitly as an allow-list.
            The default projection (used if this parameter is not passed) does not
            necessarily include "special" fields such as `$vector` or `$vectorize`.
            See the Data API documentation for more on projections.
        vector: a suitable vector, i.e. a list of float numbers of the appropriate
            dimensionality, to use vector search (i.e. ANN,
            or "approximate nearest-neighbours" search), as the sorting criterion.
            In this way, the matched document (if any) will be the one
            that is most similar to the provided vector.
            This parameter cannot be used together with `sort`.
            See the `find` method for more details on this parameter.
            *DEPRECATED* (removal in 2.0). Use a `$vector` key in the
            sort clause dict instead.
        vectorize: a string to be made into a vector to perform vector search.
            This can be supplied in (exclusive) alternative to `vector`,
            provided such a service is configured for the collection,
            and achieves the same effect.
            *DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the
            sort clause dict instead.
        sort: with this dictionary parameter one can control the sorting
            order of the documents matching the filter, effectively
            determining what document will come first and hence be the
            deleted one. See the `find` method for more on sorting.
        max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
            If not passed, the collection-level setting is used instead.

    Returns:
        Either the document (or a projection thereof, as requested), or None
        if no matches were found in the first place.

    Example:
        >>> my_coll.insert_many(
        ...     [
        ...         {"species": "swan", "class": "Aves"},
        ...         {"species": "frog", "class": "Amphibia"},
        ...     ],
        ... )
        InsertManyResult(...)
        >>> my_coll.find_one_and_delete(
        ...     {"species": {"$ne": "frog"}},
        ...     projection=["species"],
        ... )
        {'_id': '5997fb48-...', 'species': 'swan'}
        >>> my_coll.find_one_and_delete({"species": {"$ne": "frog"}})
        >>> # (returns None for no matches)
    """

    check_deprecated_vector_ize(
        vector=vector, vectors=None, vectorize=vectorize, kind="find"
    )
    _sort = _collate_vector_to_sort(sort, vector, vectorize)
    _projection = normalize_optional_projection(projection)
    _max_time_ms = max_time_ms or self.api_options.max_time_ms
    fo_payload = {
        "findOneAndDelete": {
            k: v
            for k, v in {
                "filter": filter,
                "sort": _sort,
                "projection": _projection,
            }.items()
            if v is not None
        }
    }
    logger.info(f"findOneAndDelete on '{self.name}'")
    fo_response = self._api_commander.request(
        payload=fo_payload,
        timeout_info=base_timeout_info(_max_time_ms),
    )
    logger.info(f"finished findOneAndDelete on '{self.name}'")
    if "document" in fo_response.get("data", {}):
        document = fo_response["data"]["document"]
        return document  # type: ignore[no-any-return]
    else:
        deleted_count = fo_response.get("status", {}).get("deletedCount")
        if deleted_count == 0:
            return None
        else:
            raise DataAPIFaultyResponseException(
                text="Faulty response from find_one_and_delete API command.",
                raw_response=fo_response,
            )
def find_one_and_replace(self, filter: FilterType, replacement: DocumentType, *, projection: ProjectionType | None = None, vector: VectorType | None = None, vectorize: str | None = None, sort: SortType | None = None, upsert: bool = False, return_document: str = 'before', max_time_ms: int | None = None) ‑> Optional[Dict[str, Any]]

Find a document on the collection and replace it entirely with a new one, optionally inserting a new one if no match is found.

Args

filter
a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
replacement
the new document to write into the collection.
projection
it controls which parts of the document are returned. It can be an allow-list: {"f1": True, "f2": True}, or a deny-list: {"fx": False, "fy": False}, but not a mixture (except for the _id and other special fields, which can be associated to both True or False independently of the rest of the specification). The special star-projections {"*": True} and {"*": False} have the effect of returning the whole document and {} respectively. For lists in documents, slice directives can be passed to select portions of the list: for instance, {"array": {"$slice": 2}}, {"array": {"$slice": -2}}, {"array": {"$slice": [4, 2]}} or {"array": {"$slice": [-4, 2]}}. An iterable over strings will be treated implicitly as an allow-list. The default projection (used if this parameter is not passed) does not necessarily include "special" fields such as $vector or $vectorize. See the Data API documentation for more on projections.
vector
a suitable vector, i.e. a list of float numbers of the appropriate dimensionality, to use vector search (i.e. ANN, or "approximate nearest-neighbours" search), as the sorting criterion. In this way, the matched document (if any) will be the one that is most similar to the provided vector. DEPRECATED (removal in 2.0). Use a $vector key in the sort clause dict instead.
vectorize
a string to be made into a vector to perform vector search. Using vectorize assumes a suitable service is configured for the collection. DEPRECATED (removal in 2.0). Use a $vectorize key in the sort clause dict instead.
sort
with this dictionary parameter one can control the sorting order of the documents matching the filter, effectively determining what document will come first and hence be the replaced one. See the find method for more on sorting. Vector-based ANN sorting is achieved by providing a "$vector" or a "$vectorize" key in sort.
upsert
this parameter controls the behavior in absence of matches. If True, replacement is inserted as a new document if no matches are found on the collection. If False, the operation silently does nothing in case of no matches.
return_document
a flag controlling what document is returned: if set to ReturnDocument.BEFORE, or the string "before", the document found on database is returned; if set to ReturnDocument.AFTER, or the string "after", the new document is returned. The default is "before".
max_time_ms
a timeout, in milliseconds, for the underlying HTTP request. If not passed, the collection-level setting is used instead.

Returns

A document (or a projection thereof, as required), either the one before the replace operation or the one after that. Alternatively, the method returns None to represent that no matching document was found, or that no replacement was inserted (depending on the return_document parameter).

Example

>>> my_coll.insert_one({"_id": "rule1", "text": "all animals are equal"})
InsertOneResult(...)
>>> my_coll.find_one_and_replace(
...     {"_id": "rule1"},
...     {"text": "some animals are more equal!"},
... )
{'_id': 'rule1', 'text': 'all animals are equal'}
>>> my_coll.find_one_and_replace(
...     {"text": "some animals are more equal!"},
...     {"text": "and the pigs are the rulers"},
...     return_document=astrapy.constants.ReturnDocument.AFTER,
... )
{'_id': 'rule1', 'text': 'and the pigs are the rulers'}
>>> my_coll.find_one_and_replace(
...     {"_id": "rule2"},
...     {"text": "F=ma^2"},
...     return_document=astrapy.constants.ReturnDocument.AFTER,
... )
>>> # (returns None for no matches)
>>> my_coll.find_one_and_replace(
...     {"_id": "rule2"},
...     {"text": "F=ma"},
...     upsert=True,
...     return_document=astrapy.constants.ReturnDocument.AFTER,
...     projection={"_id": False},
... )
{'text': 'F=ma'}
Expand source code
def find_one_and_replace(
    self,
    filter: FilterType,
    replacement: DocumentType,
    *,
    projection: ProjectionType | None = None,
    vector: VectorType | None = None,
    vectorize: str | None = None,
    sort: SortType | None = None,
    upsert: bool = False,
    return_document: str = ReturnDocument.BEFORE,
    max_time_ms: int | None = None,
) -> DocumentType | None:
    """
    Find a document on the collection and replace it entirely with a new one,
    optionally inserting a new one if no match is found.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        replacement: the new document to write into the collection.
        projection: it controls which parts of the document are returned.
            It can be an allow-list: `{"f1": True, "f2": True}`,
            or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
            (except for the `_id` and other special fields, which can be
            associated to both True or False independently of the rest
            of the specification).
            The special star-projections `{"*": True}` and `{"*": False}`
            have the effect of returning the whole document and `{}` respectively.
            For lists in documents, slice directives can be passed to select
            portions of the list: for instance, `{"array": {"$slice": 2}}`,
            `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
            `{"array": {"$slice": [-4, 2]}}`.
            An iterable over strings will be treated implicitly as an allow-list.
            The default projection (used if this parameter is not passed) does not
            necessarily include "special" fields such as `$vector` or `$vectorize`.
            See the Data API documentation for more on projections.
        vector: a suitable vector, i.e. a list of float numbers of the appropriate
            dimensionality, to use vector search (i.e. ANN,
            or "approximate nearest-neighbours" search), as the sorting criterion.
            In this way, the matched document (if any) will be the one
            that is most similar to the provided vector.
            *DEPRECATED* (removal in 2.0). Use a `$vector` key in the
            sort clause dict instead.
        vectorize: a string to be made into a vector to perform vector search.
            Using vectorize assumes a suitable service is configured for the collection.
            *DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the
            sort clause dict instead.
        sort: with this dictionary parameter one can control the sorting
            order of the documents matching the filter, effectively
            determining what document will come first and hence be the
            replaced one. See the `find` method for more on sorting.
            Vector-based ANN sorting is achieved by providing a "$vector"
            or a "$vectorize" key in `sort`.
        upsert: this parameter controls the behavior in absence of matches.
            If True, `replacement` is inserted as a new document
            if no matches are found on the collection. If False,
            the operation silently does nothing in case of no matches.
        return_document: a flag controlling what document is returned:
            if set to `ReturnDocument.BEFORE`, or the string "before",
            the document found on database is returned; if set to
            `ReturnDocument.AFTER`, or the string "after", the new
            document is returned. The default is "before".
        max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
            If not passed, the collection-level setting is used instead.

    Returns:
        A document (or a projection thereof, as required), either the one
        before the replace operation or the one after that.
        Alternatively, the method returns None to represent
        that no matching document was found, or that no replacement
        was inserted (depending on the `return_document` parameter).

    Example:
        >>> my_coll.insert_one({"_id": "rule1", "text": "all animals are equal"})
        InsertOneResult(...)
        >>> my_coll.find_one_and_replace(
        ...     {"_id": "rule1"},
        ...     {"text": "some animals are more equal!"},
        ... )
        {'_id': 'rule1', 'text': 'all animals are equal'}
        >>> my_coll.find_one_and_replace(
        ...     {"text": "some animals are more equal!"},
        ...     {"text": "and the pigs are the rulers"},
        ...     return_document=astrapy.constants.ReturnDocument.AFTER,
        ... )
        {'_id': 'rule1', 'text': 'and the pigs are the rulers'}
        >>> my_coll.find_one_and_replace(
        ...     {"_id": "rule2"},
        ...     {"text": "F=ma^2"},
        ...     return_document=astrapy.constants.ReturnDocument.AFTER,
        ... )
        >>> # (returns None for no matches)
        >>> my_coll.find_one_and_replace(
        ...     {"_id": "rule2"},
        ...     {"text": "F=ma"},
        ...     upsert=True,
        ...     return_document=astrapy.constants.ReturnDocument.AFTER,
        ...     projection={"_id": False},
        ... )
        {'text': 'F=ma'}
    """

    check_deprecated_vector_ize(
        vector=vector, vectors=None, vectorize=vectorize, kind="find"
    )
    _sort = _collate_vector_to_sort(sort, vector, vectorize)
    options = {
        "returnDocument": return_document,
        "upsert": upsert,
    }
    _max_time_ms = max_time_ms or self.api_options.max_time_ms
    fo_payload = {
        "findOneAndReplace": {
            k: v
            for k, v in {
                "filter": filter,
                "projection": normalize_optional_projection(projection),
                "replacement": replacement,
                "options": options,
                "sort": _sort,
            }.items()
            if v is not None
        }
    }
    logger.info(f"findOneAndReplace on '{self.name}'")
    fo_response = self._api_commander.request(
        payload=fo_payload,
        timeout_info=base_timeout_info(_max_time_ms),
    )
    logger.info(f"finished findOneAndReplace on '{self.name}'")
    if "document" in fo_response.get("data", {}):
        ret_document = fo_response.get("data", {}).get("document")
        if ret_document is None:
            return None
        else:
            return ret_document  # type: ignore[no-any-return]
    else:
        raise DataAPIFaultyResponseException(
            text="Faulty response from find_one_and_replace API command.",
            raw_response=fo_response,
        )
def find_one_and_update(self, filter: FilterType, update: dict[str, Any], *, projection: ProjectionType | None = None, vector: VectorType | None = None, vectorize: str | None = None, sort: SortType | None = None, upsert: bool = False, return_document: str = 'before', max_time_ms: int | None = None) ‑> Optional[Dict[str, Any]]

Find a document on the collection and update it as requested, optionally inserting a new one if no match is found.

Args

filter
a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
update
the update prescription to apply to the document, expressed as a dictionary as per Data API syntax. Examples are: {"$set": {"field": "value}} {"$inc": {"counter": 10}} {"$unset": {"field": ""}} See the Data API documentation for the full syntax.
projection
it controls which parts of the document are returned. It can be an allow-list: {"f1": True, "f2": True}, or a deny-list: {"fx": False, "fy": False}, but not a mixture (except for the _id and other special fields, which can be associated to both True or False independently of the rest of the specification). The special star-projections {"*": True} and {"*": False} have the effect of returning the whole document and {} respectively. For lists in documents, slice directives can be passed to select portions of the list: for instance, {"array": {"$slice": 2}}, {"array": {"$slice": -2}}, {"array": {"$slice": [4, 2]}} or {"array": {"$slice": [-4, 2]}}. An iterable over strings will be treated implicitly as an allow-list. The default projection (used if this parameter is not passed) does not necessarily include "special" fields such as $vector or $vectorize. See the Data API documentation for more on projections.
vector
a suitable vector, i.e. a list of float numbers of the appropriate dimensionality, to use vector search (i.e. ANN, or "approximate nearest-neighbours" search), as the sorting criterion. In this way, the matched document (if any) will be the one that is most similar to the provided vector. DEPRECATED (removal in 2.0). Use a $vector key in the sort clause dict instead.
vectorize
a string to be made into a vector to perform vector search. Using vectorize assumes a suitable service is configured for the collection. DEPRECATED (removal in 2.0). Use a $vectorize key in the sort clause dict instead.
sort
with this dictionary parameter one can control the sorting order of the documents matching the filter, effectively determining what document will come first and hence be the replaced one. See the find method for more on sorting. Vector-based ANN sorting is achieved by providing a "$vector" or a "$vectorize" key in sort.
upsert
this parameter controls the behavior in absence of matches. If True, a new document (resulting from applying the update to an empty document) is inserted if no matches are found on the collection. If False, the operation silently does nothing in case of no matches.
return_document
a flag controlling what document is returned: if set to ReturnDocument.BEFORE, or the string "before", the document found on database is returned; if set to ReturnDocument.AFTER, or the string "after", the new document is returned. The default is "before".
max_time_ms
a timeout, in milliseconds, for the underlying HTTP request. If not passed, the collection-level setting is used instead.

Returns

A document (or a projection thereof, as required), either the one before the replace operation or the one after that. Alternatively, the method returns None to represent that no matching document was found, or that no update was applied (depending on the return_document parameter).

Example

>>> my_coll.insert_one({"Marco": "Polo"})
InsertOneResult(...)
>>> my_coll.find_one_and_update(
...     {"Marco": {"$exists": True}},
...     {"$set": {"title": "Mr."}},
... )
{'_id': 'a80106f2-...', 'Marco': 'Polo'}
>>> my_coll.find_one_and_update(
...     {"title": "Mr."},
...     {"$inc": {"rank": 3}},
...     projection=["title", "rank"],
...     return_document=astrapy.constants.ReturnDocument.AFTER,
... )
{'_id': 'a80106f2-...', 'title': 'Mr.', 'rank': 3}
>>> my_coll.find_one_and_update(
...     {"name": "Johnny"},
...     {"$set": {"rank": 0}},
...     return_document=astrapy.constants.ReturnDocument.AFTER,
... )
>>> # (returns None for no matches)
>>> my_coll.find_one_and_update(
...     {"name": "Johnny"},
...     {"$set": {"rank": 0}},
...     upsert=True,
...     return_document=astrapy.constants.ReturnDocument.AFTER,
... )
{'_id': 'cb4ef2ab-...', 'name': 'Johnny', 'rank': 0}
Expand source code
def find_one_and_update(
    self,
    filter: FilterType,
    update: dict[str, Any],
    *,
    projection: ProjectionType | None = None,
    vector: VectorType | None = None,
    vectorize: str | None = None,
    sort: SortType | None = None,
    upsert: bool = False,
    return_document: str = ReturnDocument.BEFORE,
    max_time_ms: int | None = None,
) -> DocumentType | None:
    """
    Find a document on the collection and update it as requested,
    optionally inserting a new one if no match is found.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        update: the update prescription to apply to the document, expressed
            as a dictionary as per Data API syntax. Examples are:
                {"$set": {"field": "value}}
                {"$inc": {"counter": 10}}
                {"$unset": {"field": ""}}
            See the Data API documentation for the full syntax.
        projection: it controls which parts of the document are returned.
            It can be an allow-list: `{"f1": True, "f2": True}`,
            or a deny-list: `{"fx": False, "fy": False}`, but not a mixture
            (except for the `_id` and other special fields, which can be
            associated to both True or False independently of the rest
            of the specification).
            The special star-projections `{"*": True}` and `{"*": False}`
            have the effect of returning the whole document and `{}` respectively.
            For lists in documents, slice directives can be passed to select
            portions of the list: for instance, `{"array": {"$slice": 2}}`,
            `{"array": {"$slice": -2}}`, `{"array": {"$slice": [4, 2]}}` or
            `{"array": {"$slice": [-4, 2]}}`.
            An iterable over strings will be treated implicitly as an allow-list.
            The default projection (used if this parameter is not passed) does not
            necessarily include "special" fields such as `$vector` or `$vectorize`.
            See the Data API documentation for more on projections.
        vector: a suitable vector, i.e. a list of float numbers of the appropriate
            dimensionality, to use vector search (i.e. ANN,
            or "approximate nearest-neighbours" search), as the sorting criterion.
            In this way, the matched document (if any) will be the one
            that is most similar to the provided vector.
            *DEPRECATED* (removal in 2.0). Use a `$vector` key in the
            sort clause dict instead.
        vectorize: a string to be made into a vector to perform vector search.
            Using vectorize assumes a suitable service is configured for the collection.
            *DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the
            sort clause dict instead.
        sort: with this dictionary parameter one can control the sorting
            order of the documents matching the filter, effectively
            determining what document will come first and hence be the
            replaced one. See the `find` method for more on sorting.
            Vector-based ANN sorting is achieved by providing a "$vector"
            or a "$vectorize" key in `sort`.
        upsert: this parameter controls the behavior in absence of matches.
            If True, a new document (resulting from applying the `update`
            to an empty document) is inserted if no matches are found on
            the collection. If False, the operation silently does nothing
            in case of no matches.
        return_document: a flag controlling what document is returned:
            if set to `ReturnDocument.BEFORE`, or the string "before",
            the document found on database is returned; if set to
            `ReturnDocument.AFTER`, or the string "after", the new
            document is returned. The default is "before".
        max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
            If not passed, the collection-level setting is used instead.

    Returns:
        A document (or a projection thereof, as required), either the one
        before the replace operation or the one after that.
        Alternatively, the method returns None to represent
        that no matching document was found, or that no update
        was applied (depending on the `return_document` parameter).

    Example:
        >>> my_coll.insert_one({"Marco": "Polo"})
        InsertOneResult(...)
        >>> my_coll.find_one_and_update(
        ...     {"Marco": {"$exists": True}},
        ...     {"$set": {"title": "Mr."}},
        ... )
        {'_id': 'a80106f2-...', 'Marco': 'Polo'}
        >>> my_coll.find_one_and_update(
        ...     {"title": "Mr."},
        ...     {"$inc": {"rank": 3}},
        ...     projection=["title", "rank"],
        ...     return_document=astrapy.constants.ReturnDocument.AFTER,
        ... )
        {'_id': 'a80106f2-...', 'title': 'Mr.', 'rank': 3}
        >>> my_coll.find_one_and_update(
        ...     {"name": "Johnny"},
        ...     {"$set": {"rank": 0}},
        ...     return_document=astrapy.constants.ReturnDocument.AFTER,
        ... )
        >>> # (returns None for no matches)
        >>> my_coll.find_one_and_update(
        ...     {"name": "Johnny"},
        ...     {"$set": {"rank": 0}},
        ...     upsert=True,
        ...     return_document=astrapy.constants.ReturnDocument.AFTER,
        ... )
        {'_id': 'cb4ef2ab-...', 'name': 'Johnny', 'rank': 0}
    """

    check_deprecated_vector_ize(
        vector=vector, vectors=None, vectorize=vectorize, kind="find"
    )
    _sort = _collate_vector_to_sort(sort, vector, vectorize)
    options = {
        "returnDocument": return_document,
        "upsert": upsert,
    }
    _max_time_ms = max_time_ms or self.api_options.max_time_ms
    fo_payload = {
        "findOneAndUpdate": {
            k: v
            for k, v in {
                "filter": filter,
                "update": update,
                "options": options,
                "sort": _sort,
                "projection": normalize_optional_projection(projection),
            }.items()
            if v is not None
        }
    }
    logger.info(f"findOneAndUpdate on '{self.name}'")
    fo_response = self._api_commander.request(
        payload=fo_payload,
        timeout_info=base_timeout_info(_max_time_ms),
    )
    logger.info(f"finished findOneAndUpdate on '{self.name}'")
    if "document" in fo_response.get("data", {}):
        ret_document = fo_response.get("data", {}).get("document")
        if ret_document is None:
            return None
        else:
            return ret_document  # type: ignore[no-any-return]
    else:
        raise DataAPIFaultyResponseException(
            text="Faulty response from find_one_and_update API command.",
            raw_response=fo_response,
        )
def info(self) ‑> CollectionInfo

Information on the collection (name, location, database), in the form of a CollectionInfo object.

Not to be confused with the collection options method (related to the collection internal configuration).

Example

>>> my_coll.info().database_info.region
'eu-west-1'
>>> my_coll.info().full_name
'default_keyspace.my_v_collection'

Note

the returned CollectionInfo wraps, among other things, the database information: as such, calling this method triggers the same-named method of a Database object (which, in turn, performs a HTTP request to the DevOps API). See the documentation for Database.info() for more details.

Expand source code
def info(self) -> CollectionInfo:
    """
    Information on the collection (name, location, database), in the
    form of a CollectionInfo object.

    Not to be confused with the collection `options` method (related
    to the collection internal configuration).

    Example:
        >>> my_coll.info().database_info.region
        'eu-west-1'
        >>> my_coll.info().full_name
        'default_keyspace.my_v_collection'

    Note:
        the returned CollectionInfo wraps, among other things,
        the database information: as such, calling this method
        triggers the same-named method of a Database object (which, in turn,
        performs a HTTP request to the DevOps API).
        See the documentation for `Database.info()` for more details.
    """

    return CollectionInfo(
        database_info=self.database.info(),
        keyspace=self.keyspace,
        namespace=self.keyspace,
        name=self.name,
        full_name=self.full_name,
    )
def insert_many(self, documents: Iterable[DocumentType], *, vectors: Iterable[VectorType | None] | None = None, vectorize: Iterable[str | None] | None = None, ordered: bool = False, chunk_size: int | None = None, concurrency: int | None = None, max_time_ms: int | None = None) ‑> InsertManyResult

Insert a list of documents into the collection. This is not an atomic operation.

Args

documents
an iterable of dictionaries, each a document to insert. Documents may specify their _id field or leave it out, in which case it will be added automatically.
vectors
an optional list of vectors (as many vectors as the provided documents) to associate to the documents when inserting. Passing vectors this way is indeed equivalent to the "$vector" field of the documents, however the two are mutually exclusive. DEPRECATED (removal in 2.0). Use a $vector key in the documents instead.
vectorize
an optional list of strings to be made into as many vectors (one per document), if such a service is configured for the collection. Passing this parameter is equivalent to providing a $vectorize field in the documents themselves, however the two are mutually exclusive. DEPRECATED (removal in 2.0). Use a $vectorize key in the documents instead.
ordered
if False (default), the insertions can occur in arbitrary order and possibly concurrently. If True, they are processed sequentially. If there are no specific reasons against it, unordered insertions are to be preferred as they complete much faster.
chunk_size
how many documents to include in a single API request. Exceeding the server maximum allowed value results in an error. Leave it unspecified (recommended) to use the system default.
concurrency
maximum number of concurrent requests to the API at a given time. It cannot be more than one for ordered insertions.
max_time_ms
a timeout, in milliseconds, for the operation. If not passed, the collection-level setting is used instead: If many documents are being inserted, this method corresponds to several HTTP requests: in such cases one may want to specify a more tolerant timeout here.

Returns

an InsertManyResult object.

Examples

>>> my_coll.count_documents({}, upper_bound=10)
0
>>> my_coll.insert_many(
...     [{"a": 10}, {"a": 5}, {"b": [True, False, False]}],
...     ordered=True,
... )
InsertManyResult(raw_results=..., inserted_ids=['184bb06f-...', '...', '...'])
>>> my_coll.count_documents({}, upper_bound=100)
3
>>> my_coll.insert_many(
...     [{"seq": i} for i in range(50)],
...     concurrency=5,
... )
InsertManyResult(raw_results=..., inserted_ids=[... ...])
>>> my_coll.count_documents({}, upper_bound=100)
53
>>> my_coll.insert_many(
...     [
...         {"tag": "a", "$vector": [1, 2]},
...         {"tag": "b", "$vector": [3, 4]},
...     ]
... )
InsertManyResult(...)

Note

Unordered insertions are executed with some degree of concurrency, so it is usually better to prefer this mode unless the order in the document sequence is important.

Note

A failure mode for this command is related to certain faulty documents found among those to insert: a document may have the an _id already present on the collection, or its vector dimension may not match the collection setting.

For an ordered insertion, the method will raise an exception at the first such faulty document – nevertheless, all documents processed until then will end up being written to the database.

For unordered insertions, if the error stems from faulty documents the insertion proceeds until exhausting the input documents: then, an exception is raised – and all insertable documents will have been written to the database, including those "after" the troublesome ones.

If, on the other hand, there are errors not related to individual documents (such as a network connectivity error), the whole insert_many operation will stop in mid-way, an exception will be raised, and only a certain amount of the input documents will have made their way to the database.

Expand source code
def insert_many(
    self,
    documents: Iterable[DocumentType],
    *,
    vectors: Iterable[VectorType | None] | None = None,
    vectorize: Iterable[str | None] | None = None,
    ordered: bool = False,
    chunk_size: int | None = None,
    concurrency: int | None = None,
    max_time_ms: int | None = None,
) -> InsertManyResult:
    """
    Insert a list of documents into the collection.
    This is not an atomic operation.

    Args:
        documents: an iterable of dictionaries, each a document to insert.
            Documents may specify their `_id` field or leave it out, in which
            case it will be added automatically.
        vectors: an optional list of vectors (as many vectors as the provided
            documents) to associate to the documents when inserting.
            Passing vectors this way is indeed equivalent to the "$vector" field
            of the documents, however the two are mutually exclusive.
            *DEPRECATED* (removal in 2.0). Use a `$vector` key in the documents instead.
        vectorize: an optional list of strings to be made into as many vectors
            (one per document), if such a service is configured for the collection.
            Passing this parameter is equivalent to providing a `$vectorize`
            field in the documents themselves, however the two are mutually exclusive.
            *DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the documents instead.
        ordered: if False (default), the insertions can occur in arbitrary order
            and possibly concurrently. If True, they are processed sequentially.
            If there are no specific reasons against it, unordered insertions are to
            be preferred as they complete much faster.
        chunk_size: how many documents to include in a single API request.
            Exceeding the server maximum allowed value results in an error.
            Leave it unspecified (recommended) to use the system default.
        concurrency: maximum number of concurrent requests to the API at
            a given time. It cannot be more than one for ordered insertions.
        max_time_ms: a timeout, in milliseconds, for the operation.
            If not passed, the collection-level setting is used instead:
            If many documents are being inserted, this method corresponds
            to several HTTP requests: in such cases one may want to specify
            a more tolerant timeout here.

    Returns:
        an InsertManyResult object.

    Examples:
        >>> my_coll.count_documents({}, upper_bound=10)
        0
        >>> my_coll.insert_many(
        ...     [{"a": 10}, {"a": 5}, {"b": [True, False, False]}],
        ...     ordered=True,
        ... )
        InsertManyResult(raw_results=..., inserted_ids=['184bb06f-...', '...', '...'])
        >>> my_coll.count_documents({}, upper_bound=100)
        3
        >>> my_coll.insert_many(
        ...     [{"seq": i} for i in range(50)],
        ...     concurrency=5,
        ... )
        InsertManyResult(raw_results=..., inserted_ids=[... ...])
        >>> my_coll.count_documents({}, upper_bound=100)
        53
        >>> my_coll.insert_many(
        ...     [
        ...         {"tag": "a", "$vector": [1, 2]},
        ...         {"tag": "b", "$vector": [3, 4]},
        ...     ]
        ... )
        InsertManyResult(...)

    Note:
        Unordered insertions are executed with some degree of concurrency,
        so it is usually better to prefer this mode unless the order in the
        document sequence is important.

    Note:
        A failure mode for this command is related to certain faulty documents
        found among those to insert: a document may have the an `_id` already
        present on the collection, or its vector dimension may not
        match the collection setting.

        For an ordered insertion, the method will raise an exception at
        the first such faulty document -- nevertheless, all documents processed
        until then will end up being written to the database.

        For unordered insertions, if the error stems from faulty documents
        the insertion proceeds until exhausting the input documents: then,
        an exception is raised -- and all insertable documents will have been
        written to the database, including those "after" the troublesome ones.

        If, on the other hand, there are errors not related to individual
        documents (such as a network connectivity error), the whole
        `insert_many` operation will stop in mid-way, an exception will be raised,
        and only a certain amount of the input documents will
        have made their way to the database.
    """

    check_deprecated_vector_ize(
        vector=None, vectors=vectors, vectorize=vectorize, kind="insert"
    )
    if concurrency is None:
        if ordered:
            _concurrency = 1
        else:
            _concurrency = DEFAULT_INSERT_MANY_CONCURRENCY
    else:
        _concurrency = concurrency
    if _concurrency > 1 and ordered:
        raise ValueError("Cannot run ordered insert_many concurrently.")
    if chunk_size is None:
        _chunk_size = DEFAULT_INSERT_MANY_CHUNK_SIZE
    else:
        _chunk_size = chunk_size
    _documents = _collate_vectors_to_documents(documents, vectors, vectorize)
    _max_time_ms = max_time_ms or self.api_options.max_time_ms
    logger.info(f"inserting {len(_documents)} documents in '{self.name}'")
    raw_results: list[dict[str, Any]] = []
    timeout_manager = MultiCallTimeoutManager(overall_max_time_ms=_max_time_ms)
    if ordered:
        options = {"ordered": True}
        inserted_ids: list[Any] = []
        for i in range(0, len(_documents), _chunk_size):
            im_payload = {
                "insertMany": {
                    "documents": _documents[i : i + _chunk_size],
                    "options": options,
                },
            }
            logger.info(f"insertMany on '{self.name}'")
            chunk_response = self._api_commander.request(
                payload=im_payload,
                raise_api_errors=False,
                timeout_info=timeout_manager.remaining_timeout_info(),
            )
            logger.info(f"finished insertMany on '{self.name}'")
            # accumulate the results in this call
            chunk_inserted_ids = (chunk_response.get("status") or {}).get(
                "insertedIds", []
            )
            inserted_ids += chunk_inserted_ids
            raw_results += [chunk_response]
            # if errors, quit early
            if chunk_response.get("errors", []):
                partial_result = InsertManyResult(
                    raw_results=raw_results,
                    inserted_ids=inserted_ids,
                )
                raise InsertManyException.from_response(
                    command=None,
                    raw_response=chunk_response,
                    partial_result=partial_result,
                )

        # return
        full_result = InsertManyResult(
            raw_results=raw_results,
            inserted_ids=inserted_ids,
        )
        logger.info(
            f"finished inserting {len(_documents)} documents in '{self.name}'"
        )
        return full_result

    else:
        # unordered: concurrent or not, do all of them and parse the results
        options = {"ordered": False}
        if _concurrency > 1:
            with ThreadPoolExecutor(max_workers=_concurrency) as executor:

                def _chunk_insertor(
                    document_chunk: list[dict[str, Any]],
                ) -> dict[str, Any]:
                    im_payload = {
                        "insertMany": {
                            "documents": document_chunk,
                            "options": options,
                        },
                    }
                    logger.info(f"insertMany(chunk) on '{self.name}'")
                    im_response = self._api_commander.request(
                        payload=im_payload,
                        raise_api_errors=False,
                        timeout_info=timeout_manager.remaining_timeout_info(),
                    )
                    logger.info(f"finished insertMany(chunk) on '{self.name}'")
                    return im_response

                raw_results = list(
                    executor.map(
                        _chunk_insertor,
                        (
                            _documents[i : i + _chunk_size]
                            for i in range(0, len(_documents), _chunk_size)
                        ),
                    )
                )
        else:
            for i in range(0, len(_documents), _chunk_size):
                im_payload = {
                    "insertMany": {
                        "documents": _documents[i : i + _chunk_size],
                        "options": options,
                    },
                }
                logger.info(f"insertMany(chunk) on '{self.name}'")
                im_response = self._api_commander.request(
                    payload=im_payload,
                    raise_api_errors=False,
                    timeout_info=timeout_manager.remaining_timeout_info(),
                )
                logger.info(f"finished insertMany(chunk) on '{self.name}'")
                raw_results.append(im_response)
        # recast raw_results
        inserted_ids = [
            inserted_id
            for chunk_response in raw_results
            for inserted_id in (chunk_response.get("status") or {}).get(
                "insertedIds", []
            )
        ]

        # check-raise
        if any(
            [chunk_response.get("errors", []) for chunk_response in raw_results]
        ):
            partial_result = InsertManyResult(
                raw_results=raw_results,
                inserted_ids=inserted_ids,
            )
            raise InsertManyException.from_responses(
                commands=[None for _ in raw_results],
                raw_responses=raw_results,
                partial_result=partial_result,
            )

        # return
        full_result = InsertManyResult(
            raw_results=raw_results,
            inserted_ids=inserted_ids,
        )
        logger.info(
            f"finished inserting {len(_documents)} documents in '{self.name}'"
        )
        return full_result
def insert_one(self, document: DocumentType, *, vector: VectorType | None = None, vectorize: str | None = None, max_time_ms: int | None = None) ‑> InsertOneResult

Insert a single document in the collection in an atomic operation.

Args

document
the dictionary expressing the document to insert. The _id field of the document can be left out, in which case it will be created automatically.
vector
a vector (a list of numbers appropriate for the collection) for the document. Passing this parameter is equivalent to providing a $vector field within the document itself, however the two are mutually exclusive. DEPRECATED (removal in 2.0). Use a $vector key in the document instead.
vectorize
a string to be made into a vector, if such a service is configured for the collection. Passing this parameter is equivalent to providing a $vectorize field in the document itself, however the two are mutually exclusive. Moreover, this parameter cannot coexist with vector. DEPRECATED (removal in 2.0). Use a $vectorize key in the document instead.
max_time_ms
a timeout, in milliseconds, for the underlying HTTP request. If not passed, the collection-level setting is used instead.

Returns

an InsertOneResult object.

Examples

>>> my_coll.count_documents({}, upper_bound=10)
0
>>> my_coll.insert_one(
...     {
...         "age": 30,
...         "name": "Smith",
...         "food": ["pear", "peach"],
...         "likes_fruit": True,
...     },
... )
InsertOneResult(raw_results=..., inserted_id='ed4587a4-...-...-...')
>>> my_coll.insert_one({"_id": "user-123", "age": 50, "name": "Maccio"})
InsertOneResult(raw_results=..., inserted_id='user-123')
>>> my_coll.count_documents({}, upper_bound=10)
2
>>> my_coll.insert_one({"tag": "v", "$vector": [10, 11]})
InsertOneResult(...)

Note

If an _id is explicitly provided, which corresponds to a document that exists already in the collection, an error is raised and the insertion fails.

Expand source code
def insert_one(
    self,
    document: DocumentType,
    *,
    vector: VectorType | None = None,
    vectorize: str | None = None,
    max_time_ms: int | None = None,
) -> InsertOneResult:
    """
    Insert a single document in the collection in an atomic operation.

    Args:
        document: the dictionary expressing the document to insert.
            The `_id` field of the document can be left out, in which
            case it will be created automatically.
        vector: a vector (a list of numbers appropriate for the collection)
            for the document. Passing this parameter is equivalent to
            providing a `$vector` field within the document itself,
            however the two are mutually exclusive.
            *DEPRECATED* (removal in 2.0). Use a `$vector` key in the document instead.
        vectorize: a string to be made into a vector, if such a service
            is configured for the collection. Passing this parameter is
            equivalent to providing a `$vectorize` field in the document itself,
            however the two are mutually exclusive.
            Moreover, this parameter cannot coexist with `vector`.
            *DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the document instead.
        max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
            If not passed, the collection-level setting is used instead.

    Returns:
        an InsertOneResult object.

    Examples:
        >>> my_coll.count_documents({}, upper_bound=10)
        0
        >>> my_coll.insert_one(
        ...     {
        ...         "age": 30,
        ...         "name": "Smith",
        ...         "food": ["pear", "peach"],
        ...         "likes_fruit": True,
        ...     },
        ... )
        InsertOneResult(raw_results=..., inserted_id='ed4587a4-...-...-...')
        >>> my_coll.insert_one({"_id": "user-123", "age": 50, "name": "Maccio"})
        InsertOneResult(raw_results=..., inserted_id='user-123')
        >>> my_coll.count_documents({}, upper_bound=10)
        2

        >>> my_coll.insert_one({"tag": "v", "$vector": [10, 11]})
        InsertOneResult(...)

    Note:
        If an `_id` is explicitly provided, which corresponds to a document
        that exists already in the collection, an error is raised and
        the insertion fails.
    """

    check_deprecated_vector_ize(
        vector=vector, vectors=None, vectorize=vectorize, kind="insert"
    )
    _document = _collate_vector_to_document(document, vector, vectorize)
    _max_time_ms = max_time_ms or self.api_options.max_time_ms
    io_payload = {"insertOne": {"document": _document}}
    logger.info(f"insertOne on '{self.name}'")
    io_response = self._api_commander.request(
        payload=io_payload,
        timeout_info=base_timeout_info(_max_time_ms),
    )
    logger.info(f"finished insertOne on '{self.name}'")
    if "insertedIds" in io_response.get("status", {}):
        if io_response["status"]["insertedIds"]:
            inserted_id = io_response["status"]["insertedIds"][0]
            return InsertOneResult(
                raw_results=[io_response],
                inserted_id=inserted_id,
            )
        else:
            raise DataAPIFaultyResponseException(
                text="Faulty response from insert_one API command.",
                raw_response=io_response,
            )
    else:
        raise DataAPIFaultyResponseException(
            text="Faulty response from insert_one API command.",
            raw_response=io_response,
        )
def options(self, *, max_time_ms: int | None = None) ‑> CollectionOptions

Get the collection options, i.e. its configuration as read from the database.

The method issues a request to the Data API each time is invoked, without caching mechanisms: this ensures up-to-date information for usages such as real-time collection validation by the application.

Args

max_time_ms
a timeout, in milliseconds, for the underlying HTTP request. If not passed, the collection-level setting is used instead.

Returns

a CollectionOptions instance describing the collection. (See also the database list_collections method.)

Example

>>> my_coll.options()
CollectionOptions(vector=CollectionVectorOptions(dimension=3, metric='cosine'))
Expand source code
def options(self, *, max_time_ms: int | None = None) -> CollectionOptions:
    """
    Get the collection options, i.e. its configuration as read from the database.

    The method issues a request to the Data API each time is invoked,
    without caching mechanisms: this ensures up-to-date information
    for usages such as real-time collection validation by the application.

    Args:
        max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
            If not passed, the collection-level setting is used instead.

    Returns:
        a CollectionOptions instance describing the collection.
        (See also the database `list_collections` method.)

    Example:
        >>> my_coll.options()
        CollectionOptions(vector=CollectionVectorOptions(dimension=3, metric='cosine'))
    """

    _max_time_ms = max_time_ms or self.api_options.max_time_ms
    logger.info(f"getting collections in search of '{self.name}'")
    self_descriptors = [
        coll_desc
        for coll_desc in self.database.list_collections(max_time_ms=_max_time_ms)
        if coll_desc.name == self.name
    ]
    logger.info(f"finished getting collections in search of '{self.name}'")
    if self_descriptors:
        return self_descriptors[0].options
    else:
        raise CollectionNotFoundException(
            text=f"Collection {self.keyspace}.{self.name} not found.",
            keyspace=self.keyspace,
            collection_name=self.name,
        )
def replace_one(self, filter: FilterType, replacement: DocumentType, *, vector: VectorType | None = None, vectorize: str | None = None, sort: SortType | None = None, upsert: bool = False, max_time_ms: int | None = None) ‑> UpdateResult

Replace a single document on the collection with a new one, optionally inserting a new one if no match is found.

Args

filter
a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
replacement
the new document to write into the collection.
vector
a suitable vector, i.e. a list of float numbers of the appropriate dimensionality, to use vector search (i.e. ANN, or "approximate nearest-neighbours" search), as the sorting criterion. In this way, the matched document (if any) will be the one that is most similar to the provided vector. DEPRECATED (removal in 2.0). Use a $vector key in the sort clause dict instead.
vectorize
a string to be made into a vector to perform vector search. Using vectorize assumes a suitable service is configured for the collection. DEPRECATED (removal in 2.0). Use a $vectorize key in the sort clause dict instead.
sort
with this dictionary parameter one can control the sorting order of the documents matching the filter, effectively determining what document will come first and hence be the replaced one. See the find method for more on sorting. Vector-based ANN sorting is achieved by providing a "$vector" or a "$vectorize" key in sort.
upsert
this parameter controls the behavior in absence of matches. If True, replacement is inserted as a new document if no matches are found on the collection. If False, the operation silently does nothing in case of no matches.
max_time_ms
a timeout, in milliseconds, for the underlying HTTP request. If not passed, the collection-level setting is used instead.

Returns

an UpdateResult object summarizing the outcome of the replace operation.

Example

>>> my_coll.insert_one({"Marco": "Polo"})
InsertOneResult(...)
>>> my_coll.replace_one({"Marco": {"$exists": True}}, {"Buda": "Pest"})
UpdateResult(raw_results=..., update_info={'n': 1, 'updatedExisting': True, 'ok': 1.0, 'nModified': 1})
>>> my_coll.find_one({"Buda": "Pest"})
{'_id': '8424905a-...', 'Buda': 'Pest'}
>>> my_coll.replace_one({"Mirco": {"$exists": True}}, {"Oh": "yeah?"})
UpdateResult(raw_results=..., update_info={'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0})
>>> my_coll.replace_one({"Mirco": {"$exists": True}}, {"Oh": "yeah?"}, upsert=True)
UpdateResult(raw_results=..., update_info={'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '931b47d6-...'})
Expand source code
def replace_one(
    self,
    filter: FilterType,
    replacement: DocumentType,
    *,
    vector: VectorType | None = None,
    vectorize: str | None = None,
    sort: SortType | None = None,
    upsert: bool = False,
    max_time_ms: int | None = None,
) -> UpdateResult:
    """
    Replace a single document on the collection with a new one,
    optionally inserting a new one if no match is found.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        replacement: the new document to write into the collection.
        vector: a suitable vector, i.e. a list of float numbers of the appropriate
            dimensionality, to use vector search (i.e. ANN,
            or "approximate nearest-neighbours" search), as the sorting criterion.
            In this way, the matched document (if any) will be the one
            that is most similar to the provided vector.
            *DEPRECATED* (removal in 2.0). Use a `$vector` key in the
            sort clause dict instead.
        vectorize: a string to be made into a vector to perform vector search.
            Using vectorize assumes a suitable service is configured for the collection.
            *DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the
            sort clause dict instead.
        sort: with this dictionary parameter one can control the sorting
            order of the documents matching the filter, effectively
            determining what document will come first and hence be the
            replaced one. See the `find` method for more on sorting.
            Vector-based ANN sorting is achieved by providing a "$vector"
            or a "$vectorize" key in `sort`.
        upsert: this parameter controls the behavior in absence of matches.
            If True, `replacement` is inserted as a new document
            if no matches are found on the collection. If False,
            the operation silently does nothing in case of no matches.
        max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
            If not passed, the collection-level setting is used instead.

    Returns:
        an UpdateResult object summarizing the outcome of the replace operation.

    Example:
        >>> my_coll.insert_one({"Marco": "Polo"})
        InsertOneResult(...)
        >>> my_coll.replace_one({"Marco": {"$exists": True}}, {"Buda": "Pest"})
        UpdateResult(raw_results=..., update_info={'n': 1, 'updatedExisting': True, 'ok': 1.0, 'nModified': 1})
        >>> my_coll.find_one({"Buda": "Pest"})
        {'_id': '8424905a-...', 'Buda': 'Pest'}
        >>> my_coll.replace_one({"Mirco": {"$exists": True}}, {"Oh": "yeah?"})
        UpdateResult(raw_results=..., update_info={'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0})
        >>> my_coll.replace_one({"Mirco": {"$exists": True}}, {"Oh": "yeah?"}, upsert=True)
        UpdateResult(raw_results=..., update_info={'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '931b47d6-...'})
    """

    check_deprecated_vector_ize(
        vector=vector, vectors=None, vectorize=vectorize, kind="find"
    )
    _sort = _collate_vector_to_sort(sort, vector, vectorize)
    options = {
        "upsert": upsert,
    }
    _max_time_ms = max_time_ms or self.api_options.max_time_ms
    fo_payload = {
        "findOneAndReplace": {
            k: v
            for k, v in {
                "filter": filter,
                "replacement": replacement,
                "options": options,
                "sort": _sort,
            }.items()
            if v is not None
        }
    }
    logger.info(f"findOneAndReplace on '{self.name}'")
    fo_response = self._api_commander.request(
        payload=fo_payload,
        timeout_info=base_timeout_info(_max_time_ms),
    )
    logger.info(f"finished findOneAndReplace on '{self.name}'")
    if "document" in fo_response.get("data", {}):
        fo_status = fo_response.get("status") or {}
        _update_info = _prepare_update_info([fo_status])
        return UpdateResult(
            raw_results=[fo_response],
            update_info=_update_info,
        )
    else:
        raise DataAPIFaultyResponseException(
            text="Faulty response from find_one_and_replace API command.",
            raw_response=fo_response,
        )
def set_caller(self, caller_name: str | None = None, caller_version: str | None = None) ‑> None

Set a new identity for the application/framework on behalf of which the Data API calls are performed (the "caller").

Args

caller_name
name of the application, or framework, on behalf of which the Data API calls are performed. This ends up in the request user-agent.
caller_version
version of the caller.

Example

>>> my_coll.set_caller(caller_name="the_caller", caller_version="0.1.0")

Deprecated since version: 1.5.1

This will be removed in 2.0.0. Please provide the caller(s) at constructor time through the callers list parameter.

Expand source code
@deprecation.deprecated(  # type: ignore[misc]
    deprecated_in="1.5.1",
    removed_in="2.0.0",
    current_version=__version__,
    details=SET_CALLER_DEPRECATION_NOTICE,
)
def set_caller(
    self,
    caller_name: str | None = None,
    caller_version: str | None = None,
) -> None:
    """
    Set a new identity for the application/framework on behalf of which
    the Data API calls are performed (the "caller").

    Args:
        caller_name: name of the application, or framework, on behalf of which
            the Data API calls are performed. This ends up in the request user-agent.
        caller_version: version of the caller.

    Example:
        >>> my_coll.set_caller(caller_name="the_caller", caller_version="0.1.0")
    """

    logger.info(f"setting caller to {caller_name}/{caller_version}")
    callers_param = check_caller_parameters([], caller_name, caller_version)
    self.callers = callers_param or self.callers
    self._api_commander = self._get_api_commander()
def to_async(self, *, database: AsyncDatabase | None = None, name: str | None = None, keyspace: str | None = None, namespace: str | None = None, embedding_api_key: str | EmbeddingHeadersProvider | None = None, collection_max_time_ms: int | None = None, callers: Sequence[CallerType] = [], caller_name: str | None = None, caller_version: str | None = None) ‑> AsyncCollection

Create an AsyncCollection from this one. Save for the arguments explicitly provided as overrides, everything else is kept identical to this collection in the copy (the database is converted into an async object).

Args

database
an AsyncDatabase object, instantiated earlier. This represents the database the new collection belongs to.
name
the collection name. This parameter should match an existing collection on the database.
keyspace
this is the keyspace to which the collection belongs. If not specified, the database's working keyspace is used.
namespace
an alias for keyspace. DEPRECATED, removal in 2.0.
embedding_api_key
optional API key(s) for interacting with the collection. If an embedding service is configured, and this parameter is not None, each Data API call will include the necessary embedding-related headers as specified by this parameter. If a string is passed, it translates into the one "embedding api key" header (i.e. EmbeddingAPIKeyHeaderProvider). For some vectorize providers/models, if using header-based authentication, specialized subclasses of EmbeddingHeadersProvider should be supplied.
collection_max_time_ms
a default timeout, in millisecond, for the duration of each operation on the collection. Individual timeouts can be provided to each collection method call and will take precedence, with this value being an overall default. Note that for some methods involving multiple API calls (such as find, delete_many, insert_many and so on), it is strongly suggested to provide a specific timeout as the default one likely wouldn't make much sense.
callers
a list of caller identities, i.e. applications, or frameworks, on behalf of which the Data API calls are performed. These end up in the request user-agent. Each caller identity is a ("caller_name", "caller_version") pair.
caller_name
DEPRECATED, use callers. Removal 2.0. Name of the application, or framework, on behalf of which the Data API calls are performed. This ends up in the request user-agent.
caller_version
version of the caller. DEPRECATED, use callers. Removal 2.0.

Returns

the new copy, an AsyncCollection instance.

Example

>>> asyncio.run(my_coll.to_async().count_documents({},upper_bound=100))
77
Expand source code
def to_async(
    self,
    *,
    database: AsyncDatabase | None = None,
    name: str | None = None,
    keyspace: str | None = None,
    namespace: str | None = None,
    embedding_api_key: str | EmbeddingHeadersProvider | None = None,
    collection_max_time_ms: int | None = None,
    callers: Sequence[CallerType] = [],
    caller_name: str | None = None,
    caller_version: str | None = None,
) -> AsyncCollection:
    """
    Create an AsyncCollection from this one. Save for the arguments
    explicitly provided as overrides, everything else is kept identical
    to this collection in the copy (the database is converted into
    an async object).

    Args:
        database: an AsyncDatabase object, instantiated earlier.
            This represents the database the new collection belongs to.
        name: the collection name. This parameter should match an existing
            collection on the database.
        keyspace: this is the keyspace to which the collection belongs.
            If not specified, the database's working keyspace is used.
        namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
        embedding_api_key: optional API key(s) for interacting with the collection.
            If an embedding service is configured, and this parameter is not None,
            each Data API call will include the necessary embedding-related headers
            as specified by this parameter. If a string is passed, it translates
            into the one "embedding api key" header
            (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
            For some vectorize providers/models, if using header-based authentication,
            specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
            should be supplied.
        collection_max_time_ms: a default timeout, in millisecond, for the duration of each
            operation on the collection. Individual timeouts can be provided to
            each collection method call and will take precedence, with this value
            being an overall default.
            Note that for some methods involving multiple API calls (such as
            `find`, `delete_many`, `insert_many` and so on), it is strongly suggested
            to provide a specific timeout as the default one likely wouldn't make
            much sense.
        callers: a list of caller identities, i.e. applications, or frameworks,
            on behalf of which the Data API calls are performed. These end up
            in the request user-agent.
            Each caller identity is a ("caller_name", "caller_version") pair.
        caller_name: *DEPRECATED*, use `callers`. Removal 2.0. Name of the
            application, or framework, on behalf of which the Data API calls
            are performed. This ends up in the request user-agent.
        caller_version: version of the caller. *DEPRECATED*, use `callers`.
            Removal 2.0.

    Returns:
        the new copy, an AsyncCollection instance.

    Example:
        >>> asyncio.run(my_coll.to_async().count_documents({},upper_bound=100))
        77
    """

    callers_param = check_caller_parameters(callers, caller_name, caller_version)
    keyspace_param = check_namespace_keyspace(
        keyspace=keyspace,
        namespace=namespace,
    )
    _api_options = CollectionAPIOptions(
        embedding_api_key=coerce_embedding_headers_provider(embedding_api_key),
        max_time_ms=collection_max_time_ms,
    )

    return AsyncCollection(
        database=database or self.database.to_async(),
        name=name or self.name,
        keyspace=keyspace_param or self.keyspace,
        api_options=self.api_options.with_override(_api_options),
        callers=callers_param or self.callers,
    )
def update_many(self, filter: FilterType, update: dict[str, Any], *, upsert: bool = False, max_time_ms: int | None = None) ‑> UpdateResult

Apply an update operations to all documents matching a condition, optionally inserting one documents in absence of matches.

Args

filter
a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
update
the update prescription to apply to the documents, expressed as a dictionary as per Data API syntax. Examples are: {"$set": {"field": "value}} {"$inc": {"counter": 10}} {"$unset": {"field": ""}} See the Data API documentation for the full syntax.
upsert
this parameter controls the behavior in absence of matches. If True, a single new document (resulting from applying update to an empty document) is inserted if no matches are found on the collection. If False, the operation silently does nothing in case of no matches.
max_time_ms
a timeout, in milliseconds, for the operation. If not passed, the collection-level setting is used instead: if a large number of document updates is anticipated, it is suggested to specify a larger timeout than in most other operations as the update will span several HTTP calls to the API in sequence.

Returns

an UpdateResult object summarizing the outcome of the update operation.

Example

>>> my_coll.insert_many([{"c": "red"}, {"c": "green"}, {"c": "blue"}])
InsertManyResult(...)
>>> my_coll.update_many({"c": {"$ne": "green"}}, {"$set": {"nongreen": True}})
UpdateResult(raw_results=..., update_info={'n': 2, 'updatedExisting': True, 'ok': 1.0, 'nModified': 2})
>>> my_coll.update_many({"c": "orange"}, {"$set": {"is_also_fruit": True}})
UpdateResult(raw_results=..., update_info={'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0})
>>> my_coll.update_many(
...     {"c": "orange"},
...     {"$set": {"is_also_fruit": True}},
...     upsert=True,
... )
UpdateResult(raw_results=..., update_info={'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '46643050-...'})

Note

Similarly to the case of find (see its docstring for more details), running this command while, at the same time, another process is inserting new documents which match the filter of the update_many can result in an unpredictable fraction of these documents being updated. In other words, it cannot be easily predicted whether a given newly-inserted document will be picked up by the update_many command or not.

Expand source code
def update_many(
    self,
    filter: FilterType,
    update: dict[str, Any],
    *,
    upsert: bool = False,
    max_time_ms: int | None = None,
) -> UpdateResult:
    """
    Apply an update operations to all documents matching a condition,
    optionally inserting one documents in absence of matches.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        update: the update prescription to apply to the documents, expressed
            as a dictionary as per Data API syntax. Examples are:
                {"$set": {"field": "value}}
                {"$inc": {"counter": 10}}
                {"$unset": {"field": ""}}
            See the Data API documentation for the full syntax.
        upsert: this parameter controls the behavior in absence of matches.
            If True, a single new document (resulting from applying `update`
            to an empty document) is inserted if no matches are found on
            the collection. If False, the operation silently does nothing
            in case of no matches.
        max_time_ms: a timeout, in milliseconds, for the operation.
            If not passed, the collection-level setting is used instead:
            if a large number of document updates is anticipated, it is suggested
            to specify a larger timeout than in most other operations as the
            update will span several HTTP calls to the API in sequence.

    Returns:
        an UpdateResult object summarizing the outcome of the update operation.

    Example:
        >>> my_coll.insert_many([{"c": "red"}, {"c": "green"}, {"c": "blue"}])
        InsertManyResult(...)
        >>> my_coll.update_many({"c": {"$ne": "green"}}, {"$set": {"nongreen": True}})
        UpdateResult(raw_results=..., update_info={'n': 2, 'updatedExisting': True, 'ok': 1.0, 'nModified': 2})
        >>> my_coll.update_many({"c": "orange"}, {"$set": {"is_also_fruit": True}})
        UpdateResult(raw_results=..., update_info={'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0})
        >>> my_coll.update_many(
        ...     {"c": "orange"},
        ...     {"$set": {"is_also_fruit": True}},
        ...     upsert=True,
        ... )
        UpdateResult(raw_results=..., update_info={'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '46643050-...'})

    Note:
        Similarly to the case of `find` (see its docstring for more details),
        running this command while, at the same time, another process is
        inserting new documents which match the filter of the `update_many`
        can result in an unpredictable fraction of these documents being updated.
        In other words, it cannot be easily predicted whether a given
        newly-inserted document will be picked up by the update_many command or not.
    """

    api_options = {
        "upsert": upsert,
    }
    page_state_options: dict[str, str] = {}
    um_responses: list[dict[str, Any]] = []
    um_statuses: list[dict[str, Any]] = []
    must_proceed = True
    _max_time_ms = max_time_ms or self.api_options.max_time_ms
    logger.info(f"starting update_many on '{self.name}'")
    timeout_manager = MultiCallTimeoutManager(overall_max_time_ms=_max_time_ms)
    while must_proceed:
        options = {**api_options, **page_state_options}
        this_um_payload = {
            "updateMany": {
                k: v
                for k, v in {
                    "filter": filter,
                    "update": update,
                    "options": options,
                }.items()
                if v is not None
            }
        }
        logger.info(f"updateMany on '{self.name}'")
        this_um_response = self._api_commander.request(
            payload=this_um_payload,
            timeout_info=timeout_manager.remaining_timeout_info(),
        )
        logger.info(f"finished updateMany on '{self.name}'")
        this_um_status = this_um_response.get("status") or {}
        #
        # if errors, quit early
        if this_um_response.get("errors", []):
            partial_update_info = _prepare_update_info(um_statuses)
            partial_result = UpdateResult(
                raw_results=um_responses,
                update_info=partial_update_info,
            )
            all_um_responses = um_responses + [this_um_response]
            raise UpdateManyException.from_responses(
                commands=[None for _ in all_um_responses],
                raw_responses=all_um_responses,
                partial_result=partial_result,
            )
        else:
            if "status" not in this_um_response:
                raise DataAPIFaultyResponseException(
                    text="Faulty response from update_many API command.",
                    raw_response=this_um_response,
                )
            um_responses.append(this_um_response)
            um_statuses.append(this_um_status)
            next_page_state = this_um_status.get("nextPageState")
            if next_page_state is not None:
                must_proceed = True
                page_state_options = {"pageState": next_page_state}
            else:
                must_proceed = False
                page_state_options = {}

    update_info = _prepare_update_info(um_statuses)
    logger.info(f"finished update_many on '{self.name}'")
    return UpdateResult(
        raw_results=um_responses,
        update_info=update_info,
    )
def update_one(self, filter: FilterType, update: dict[str, Any], *, vector: VectorType | None = None, vectorize: str | None = None, sort: SortType | None = None, upsert: bool = False, max_time_ms: int | None = None) ‑> UpdateResult

Update a single document on the collection as requested, optionally inserting a new one if no match is found.

Args

filter
a predicate expressed as a dictionary according to the Data API filter syntax. Examples are: {} {"name": "John"} {"price": {"$lt": 100}} {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]} See the Data API documentation for the full set of operators.
update
the update prescription to apply to the document, expressed as a dictionary as per Data API syntax. Examples are: {"$set": {"field": "value}} {"$inc": {"counter": 10}} {"$unset": {"field": ""}} See the Data API documentation for the full syntax.
vector
a suitable vector, i.e. a list of float numbers of the appropriate dimensionality, to use vector search (i.e. ANN, or "approximate nearest-neighbours" search), as the sorting criterion. In this way, the matched document (if any) will be the one that is most similar to the provided vector. DEPRECATED (removal in 2.0). Use a $vector key in the sort clause dict instead.
vectorize
a string to be made into a vector to perform vector search. Using vectorize assumes a suitable service is configured for the collection. DEPRECATED (removal in 2.0). Use a $vectorize key in the sort clause dict instead.
sort
with this dictionary parameter one can control the sorting order of the documents matching the filter, effectively determining what document will come first and hence be the replaced one. See the find method for more on sorting. Vector-based ANN sorting is achieved by providing a "$vector" or a "$vectorize" key in sort.
upsert
this parameter controls the behavior in absence of matches. If True, a new document (resulting from applying the update to an empty document) is inserted if no matches are found on the collection. If False, the operation silently does nothing in case of no matches.
max_time_ms
a timeout, in milliseconds, for the underlying HTTP request. If not passed, the collection-level setting is used instead.

Returns

an UpdateResult object summarizing the outcome of the update operation.

Example

>>> my_coll.insert_one({"Marco": "Polo"})
InsertOneResult(...)
>>> my_coll.update_one({"Marco": {"$exists": True}}, {"$inc": {"rank": 3}})
UpdateResult(raw_results=..., update_info={'n': 1, 'updatedExisting': True, 'ok': 1.0, 'nModified': 1})
>>> my_coll.update_one({"Mirko": {"$exists": True}}, {"$inc": {"rank": 3}})
UpdateResult(raw_results=..., update_info={'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0})
>>> my_coll.update_one({"Mirko": {"$exists": True}}, {"$inc": {"rank": 3}}, upsert=True)
UpdateResult(raw_results=..., update_info={'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '2a45ff60-...'})
Expand source code
def update_one(
    self,
    filter: FilterType,
    update: dict[str, Any],
    *,
    vector: VectorType | None = None,
    vectorize: str | None = None,
    sort: SortType | None = None,
    upsert: bool = False,
    max_time_ms: int | None = None,
) -> UpdateResult:
    """
    Update a single document on the collection as requested,
    optionally inserting a new one if no match is found.

    Args:
        filter: a predicate expressed as a dictionary according to the
            Data API filter syntax. Examples are:
                {}
                {"name": "John"}
                {"price": {"$lt": 100}}
                {"$and": [{"name": "John"}, {"price": {"$lt": 100}}]}
            See the Data API documentation for the full set of operators.
        update: the update prescription to apply to the document, expressed
            as a dictionary as per Data API syntax. Examples are:
                {"$set": {"field": "value}}
                {"$inc": {"counter": 10}}
                {"$unset": {"field": ""}}
            See the Data API documentation for the full syntax.
        vector: a suitable vector, i.e. a list of float numbers of the appropriate
            dimensionality, to use vector search (i.e. ANN,
            or "approximate nearest-neighbours" search), as the sorting criterion.
            In this way, the matched document (if any) will be the one
            that is most similar to the provided vector.
            *DEPRECATED* (removal in 2.0). Use a `$vector` key in the
            sort clause dict instead.
        vectorize: a string to be made into a vector to perform vector search.
            Using vectorize assumes a suitable service is configured for the collection.
            *DEPRECATED* (removal in 2.0). Use a `$vectorize` key in the
            sort clause dict instead.
        sort: with this dictionary parameter one can control the sorting
            order of the documents matching the filter, effectively
            determining what document will come first and hence be the
            replaced one. See the `find` method for more on sorting.
            Vector-based ANN sorting is achieved by providing a "$vector"
            or a "$vectorize" key in `sort`.
        upsert: this parameter controls the behavior in absence of matches.
            If True, a new document (resulting from applying the `update`
            to an empty document) is inserted if no matches are found on
            the collection. If False, the operation silently does nothing
            in case of no matches.
        max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
            If not passed, the collection-level setting is used instead.

    Returns:
        an UpdateResult object summarizing the outcome of the update operation.

    Example:
        >>> my_coll.insert_one({"Marco": "Polo"})
        InsertOneResult(...)
        >>> my_coll.update_one({"Marco": {"$exists": True}}, {"$inc": {"rank": 3}})
        UpdateResult(raw_results=..., update_info={'n': 1, 'updatedExisting': True, 'ok': 1.0, 'nModified': 1})
        >>> my_coll.update_one({"Mirko": {"$exists": True}}, {"$inc": {"rank": 3}})
        UpdateResult(raw_results=..., update_info={'n': 0, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0})
        >>> my_coll.update_one({"Mirko": {"$exists": True}}, {"$inc": {"rank": 3}}, upsert=True)
        UpdateResult(raw_results=..., update_info={'n': 1, 'updatedExisting': False, 'ok': 1.0, 'nModified': 0, 'upserted': '2a45ff60-...'})
    """

    check_deprecated_vector_ize(
        vector=vector, vectors=None, vectorize=vectorize, kind="find"
    )
    _sort = _collate_vector_to_sort(sort, vector, vectorize)
    options = {
        "upsert": upsert,
    }
    _max_time_ms = max_time_ms or self.api_options.max_time_ms
    uo_payload = {
        "updateOne": {
            k: v
            for k, v in {
                "filter": filter,
                "update": update,
                "options": options,
                "sort": _sort,
            }.items()
            if v is not None
        }
    }
    logger.info(f"updateOne on '{self.name}'")
    uo_response = self._api_commander.request(
        payload=uo_payload,
        timeout_info=base_timeout_info(_max_time_ms),
    )
    logger.info(f"finished updateOne on '{self.name}'")
    if "status" in uo_response:
        uo_status = uo_response["status"]
        _update_info = _prepare_update_info([uo_status])
        return UpdateResult(
            raw_results=[uo_response],
            update_info=_update_info,
        )
    else:
        raise DataAPIFaultyResponseException(
            text="Faulty response from update_one API command.",
            raw_response=uo_response,
        )
def with_options(self, *, name: str | None = None, embedding_api_key: str | EmbeddingHeadersProvider | None = None, collection_max_time_ms: int | None = None, callers: Sequence[CallerType] = [], caller_name: str | None = None, caller_version: str | None = None) ‑> Collection

Create a clone of this collection with some changed attributes.

Args

name
the name of the collection. This parameter is useful to quickly spawn Collection instances each pointing to a different collection existing in the same keyspace.
embedding_api_key
optional API key(s) for interacting with the collection. If an embedding service is configured, and this parameter is not None, each Data API call will include the necessary embedding-related headers as specified by this parameter. If a string is passed, it translates into the one "embedding api key" header (i.e. EmbeddingAPIKeyHeaderProvider). For some vectorize providers/models, if using header-based authentication, specialized subclasses of EmbeddingHeadersProvider should be supplied.
collection_max_time_ms
a default timeout, in millisecond, for the duration of each operation on the collection. Individual timeouts can be provided to each collection method call and will take precedence, with this value being an overall default. Note that for some methods involving multiple API calls (such as find, delete_many, insert_many and so on), it is strongly suggested to provide a specific timeout as the default one likely wouldn't make much sense.
callers
a list of caller identities, i.e. applications, or frameworks, on behalf of which the Data API calls are performed. These end up in the request user-agent. Each caller identity is a ("caller_name", "caller_version") pair.
caller_name
DEPRECATED, use callers. Removal 2.0. Name of the application, or framework, on behalf of which the Data API calls are performed. This ends up in the request user-agent.
caller_version
version of the caller. DEPRECATED, use callers. Removal 2.0.

Returns

a new Collection instance.

Example

>>> my_other_coll = my_coll.with_options(
...     name="the_other_coll",
...     callers=[("caller_identity", "0.1.2")],
... )
Expand source code
def with_options(
    self,
    *,
    name: str | None = None,
    embedding_api_key: str | EmbeddingHeadersProvider | None = None,
    collection_max_time_ms: int | None = None,
    callers: Sequence[CallerType] = [],
    caller_name: str | None = None,
    caller_version: str | None = None,
) -> Collection:
    """
    Create a clone of this collection with some changed attributes.

    Args:
        name: the name of the collection. This parameter is useful to
            quickly spawn Collection instances each pointing to a different
            collection existing in the same keyspace.
        embedding_api_key: optional API key(s) for interacting with the collection.
            If an embedding service is configured, and this parameter is not None,
            each Data API call will include the necessary embedding-related headers
            as specified by this parameter. If a string is passed, it translates
            into the one "embedding api key" header
            (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
            For some vectorize providers/models, if using header-based authentication,
            specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
            should be supplied.
        collection_max_time_ms: a default timeout, in millisecond, for the duration of each
            operation on the collection. Individual timeouts can be provided to
            each collection method call and will take precedence, with this value
            being an overall default.
            Note that for some methods involving multiple API calls (such as
            `find`, `delete_many`, `insert_many` and so on), it is strongly suggested
            to provide a specific timeout as the default one likely wouldn't make
            much sense.
        callers: a list of caller identities, i.e. applications, or frameworks,
            on behalf of which the Data API calls are performed. These end up
            in the request user-agent.
            Each caller identity is a ("caller_name", "caller_version") pair.
        caller_name: *DEPRECATED*, use `callers`. Removal 2.0. Name of the
            application, or framework, on behalf of which the Data API calls
            are performed. This ends up in the request user-agent.
        caller_version: version of the caller. *DEPRECATED*, use `callers`.
            Removal 2.0.

    Returns:
        a new Collection instance.

    Example:
        >>> my_other_coll = my_coll.with_options(
        ...     name="the_other_coll",
        ...     callers=[("caller_identity", "0.1.2")],
        ... )
    """

    callers_param = check_caller_parameters(callers, caller_name, caller_version)
    _api_options = CollectionAPIOptions(
        embedding_api_key=coerce_embedding_headers_provider(embedding_api_key),
        max_time_ms=collection_max_time_ms,
    )

    return self._copy(
        name=name,
        api_options=_api_options,
        callers=callers_param,
    )
class DataAPIClient (token: str | TokenProvider | None = None, *, environment: str | None = None, callers: Sequence[CallerType] = [], caller_name: str | None = None, caller_version: str | None = None)

A client for using the Data API. This is the main entry point and sits at the top of the conceptual "client -> database -> collection" hierarchy.

A client is created first, optionally passing it a suitable Access Token. Starting from the client, then: - databases (Database and AsyncDatabase) are created for working with data - AstraDBAdmin objects can be created for admin-level work

Args

token
an Access Token to the database. Example: "AstraCS:xyz...". This can be either a literal token string or a subclass of TokenProvider.
environment
a string representing the target Data API environment. It can be left unspecified for the default value of Environment.PROD; other values include Environment.OTHER, Environment.DSE.
callers
a list of caller identities, i.e. applications, or frameworks, on behalf of which Data API and DevOps API calls are performed. These end up in the request user-agent. Each caller identity is a ("caller_name", "caller_version") pair.
caller_name
DEPRECATED, use callers. Removal 2.0. Name of the application, or framework, on behalf of which the Data API and DevOps API calls are performed. This ends up in the request user-agent.
caller_version
version of the caller. DEPRECATED, use callers. Removal 2.0.

Example

>>> from astrapy import DataAPIClient
>>> my_client = DataAPIClient("AstraCS:...")
>>> my_db0 = my_client.get_database(
...     "https://01234567-....apps.astra.datastax.com"
... )
>>> my_coll = my_db0.create_collection("movies", dimension=2)
>>> my_coll.insert_one({"title": "The Title", "$vector": [0.1, 0.3]})
>>> my_db1 = my_client.get_database("01234567-...")
>>> my_db2 = my_client.get_database("01234567-...", region="us-east1")
>>> my_adm0 = my_client.get_admin()
>>> my_adm1 = my_client.get_admin(token=more_powerful_token_override)
>>> database_list = my_adm0.list_databases()
Expand source code
class DataAPIClient:
    """
    A client for using the Data API. This is the main entry point and sits
    at the top of the conceptual "client -> database -> collection" hierarchy.

    A client is created first, optionally passing it a suitable Access Token.
    Starting from the client, then:
        - databases (Database and AsyncDatabase) are created for working with data
        - AstraDBAdmin objects can be created for admin-level work

    Args:
        token: an Access Token to the database. Example: `"AstraCS:xyz..."`.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        environment: a string representing the target Data API environment.
            It can be left unspecified for the default value of `Environment.PROD`;
            other values include `Environment.OTHER`, `Environment.DSE`.
        callers: a list of caller identities, i.e. applications, or frameworks,
            on behalf of which Data API and DevOps API calls are performed.
            These end up in the request user-agent.
            Each caller identity is a ("caller_name", "caller_version") pair.
        caller_name: *DEPRECATED*, use `callers`. Removal 2.0. Name of the
            application, or framework, on behalf of which the Data API and
            DevOps API calls are performed. This ends up in the request user-agent.
        caller_version: version of the caller. *DEPRECATED*, use `callers`.
            Removal 2.0.

    Example:
        >>> from astrapy import DataAPIClient
        >>> my_client = DataAPIClient("AstraCS:...")
        >>> my_db0 = my_client.get_database(
        ...     "https://01234567-....apps.astra.datastax.com"
        ... )
        >>> my_coll = my_db0.create_collection("movies", dimension=2)
        >>> my_coll.insert_one({"title": "The Title", "$vector": [0.1, 0.3]})
        >>> my_db1 = my_client.get_database("01234567-...")
        >>> my_db2 = my_client.get_database("01234567-...", region="us-east1")
        >>> my_adm0 = my_client.get_admin()
        >>> my_adm1 = my_client.get_admin(token=more_powerful_token_override)
        >>> database_list = my_adm0.list_databases()
    """

    def __init__(
        self,
        token: str | TokenProvider | None = None,
        *,
        environment: str | None = None,
        callers: Sequence[CallerType] = [],
        caller_name: str | None = None,
        caller_version: str | None = None,
    ) -> None:
        callers_param = check_caller_parameters(callers, caller_name, caller_version)
        self.token_provider = coerce_token_provider(token)
        self.environment = (environment or Environment.PROD).lower()

        if self.environment not in Environment.values:
            raise ValueError(f"Unsupported `environment` value: '{self.environment}'.")

        self.callers = callers_param

    def __repr__(self) -> str:
        token_desc: str | None
        if self.token_provider:
            token_desc = f'"{redact_secret(str(self.token_provider), 15)}"'
        else:
            token_desc = None
        env_desc: str | None
        if self.environment == Environment.PROD:
            env_desc = None
        else:
            env_desc = f'environment="{self.environment}"'
        parts = [pt for pt in [token_desc, env_desc] if pt is not None]
        return f"{self.__class__.__name__}({', '.join(parts)})"

    def __eq__(self, other: Any) -> bool:
        if isinstance(other, DataAPIClient):
            return all(
                [
                    self.token_provider == other.token_provider,
                    self.environment == other.environment,
                    self.callers == other.callers,
                ]
            )
        else:
            return False

    def __getitem__(self, database_id_or_api_endpoint: str) -> Database:
        return self.get_database(api_endpoint_or_id=database_id_or_api_endpoint)

    def _copy(
        self,
        *,
        token: str | TokenProvider | None = None,
        environment: str | None = None,
        callers: Sequence[CallerType] = [],
        caller_name: str | None = None,
        caller_version: str | None = None,
    ) -> DataAPIClient:
        callers_param = check_caller_parameters(callers, caller_name, caller_version)
        return DataAPIClient(
            token=coerce_token_provider(token) or self.token_provider,
            environment=environment or self.environment,
            callers=callers_param or self.callers,
        )

    def with_options(
        self,
        *,
        token: str | TokenProvider | None = None,
        callers: Sequence[CallerType] = [],
        caller_name: str | None = None,
        caller_version: str | None = None,
    ) -> DataAPIClient:
        """
        Create a clone of this DataAPIClient with some changed attributes.

        Args:
            token: an Access Token to the database. Example: `"AstraCS:xyz..."`.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            callers: a list of caller identities, i.e. applications, or frameworks,
                on behalf of which Data API and DevOps API calls are performed.
                These end up in the request user-agent.
                Each caller identity is a ("caller_name", "caller_version") pair.
            caller_name: *DEPRECATED*, use `callers`. Removal 2.0. Name of the
                application, or framework, on behalf of which the Data API and
                DevOps API calls are performed. This ends up in the request user-agent.
            caller_version: version of the caller. *DEPRECATED*, use `callers`.
                Removal 2.0.

        Returns:
            a new DataAPIClient instance.

        Example:
            >>> another_client = my_client.with_options(
            ...     callers=[("caller_identity", "1.2.0")],
            ... )
        """

        callers_param = check_caller_parameters(callers, caller_name, caller_version)
        return self._copy(
            token=token,
            callers=callers_param,
        )

    @deprecation.deprecated(  # type: ignore[misc]
        deprecated_in="1.5.1",
        removed_in="2.0.0",
        current_version=__version__,
        details=SET_CALLER_DEPRECATION_NOTICE,
    )
    def set_caller(
        self,
        caller_name: str | None = None,
        caller_version: str | None = None,
    ) -> None:
        """
        Set a new identity for the application/framework on behalf of which
        the API calls will be performed (the "caller").

        New objects spawned from this client afterwards will inherit the new settings.

        Args:
            caller_name: name of the application, or framework, on behalf of which
                the API API calls are performed. This ends up in the request user-agent.
            caller_version: version of the caller.

        Example:
            >>> my_client.set_caller(caller_name="the_caller", caller_version="0.1.0")
        """

        logger.info(f"setting caller to {caller_name}/{caller_version}")
        callers_param = check_caller_parameters([], caller_name, caller_version)
        self.callers = callers_param

    def get_database(
        self,
        api_endpoint_or_id: str | None = None,
        *,
        api_endpoint: str | None = None,
        token: str | TokenProvider | None = None,
        keyspace: str | None = None,
        namespace: str | None = None,
        id: str | None = None,
        region: str | None = None,
        api_path: str | None = None,
        api_version: str | None = None,
        max_time_ms: int | None = None,
    ) -> Database:
        """
        Get a Database object from this client, for doing data-related work.

        Args:
            api_endpoint_or_id: positional parameter that can stand for both
                `api_endpoint` and `id`. Passing them together is an error.
            api_endpoint: the API Endpoint for the target database
                (e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`).
                The database must exist already for the resulting object
                to be effectively used; in other words, this invocation
                does not create the database, just the object instance.
                Actual admin work can be achieved by using the AstraDBAdmin object.
            token: if supplied, is passed to the Database instead of the client token.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            keyspace: if provided, it is passed to the Database; otherwise
                the Database class will apply an environment-specific default.
            namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
            id: the target database ID. This is alternative to using the API Endpoint.
            region: the region to use for connecting to the database. The
                database must be located in that region. This parameter can be used
                only if the database is specified by its ID (instead of API Endpoint).
                If this parameter is not passed, and cannot be inferred
                from the API endpoint, an additional DevOps API request is made
                to determine the default region and use it subsequently.
            api_path: path to append to the API Endpoint. In typical usage, this
                should be left to its default of "/api/json".
            api_version: version specifier to append to the API path. In typical
                usage, this should be left to its default of "v1".
            max_time_ms: a timeout, in milliseconds, for the DevOps API
                HTTP request should it be necessary (see the `region` argument).

        Returns:
            a Database object with which to work on Data API collections.

        Example:
            >>> my_db0 = my_client.get_database("01234567-...")
            >>> my_db1 = my_client.get_database(
            ...     "https://01234567-...us-west1.apps.astra.datastax.com",
            ... )
            >>> my_db2 = my_client.get_database("01234567-...", token="AstraCS:...")
            >>> my_db3 = my_client.get_database("01234567-...", region="us-west1")
            >>> my_coll = my_db0.create_collection("movies", dimension=2)
            >>> my_coll.insert_one({"title": "The Title", "$vector": [0.3, 0.4]})

        Note:
            This method does not perform any admin-level operation through
            the DevOps API. For actual creation of a database, see the
            `create_database` method of class AstraDBAdmin.
        """

        _api_endpoint_p, _id_p = check_id_endpoint_parg_kwargs(
            p_arg=api_endpoint_or_id, api_endpoint=api_endpoint, id=id
        )
        check_deprecated_id_region(_id_p, region)
        keyspace_param = check_namespace_keyspace(
            keyspace=keyspace,
            namespace=namespace,
        )

        # lazy importing here to avoid circular dependency
        from astrapy import Database

        if self.environment in Environment.astra_db_values:
            # handle the "endpoint passed as id" case first:
            if _api_endpoint_p is not None:
                if region is not None:
                    raise ValueError(
                        "Parameter `region` not supported with an API endpoint."
                    )
                # in this case max_time_ms is ignored (no calls take place)
                return self.get_database_by_api_endpoint(
                    api_endpoint=_api_endpoint_p,
                    token=token,
                    keyspace=keyspace_param,
                    api_path=api_path,
                    api_version=api_version,
                )
            else:
                if _id_p is None:
                    raise ValueError("Either `api_endpoint` or `id` must be supplied.")
                _token = coerce_token_provider(token) or self.token_provider
                _region = normalize_region_for_id(
                    database_id=_id_p,
                    token_str=_token.get_token(),
                    environment=self.environment,
                    region_param=region,
                    max_time_ms=max_time_ms,
                )
                _api_endpoint = build_api_endpoint(
                    environment=self.environment,
                    database_id=_id_p,
                    region=_region,
                )
                return Database(
                    api_endpoint=_api_endpoint,
                    token=_token,
                    keyspace=keyspace_param,
                    callers=self.callers,
                    environment=self.environment,
                    api_path=api_path,
                    api_version=api_version,
                )
        else:
            # in this case, this call is an alias for get_database_by_api_endpoint
            #   - max_time_ms ignored
            #   - require the endpoint to be passed
            if _id_p is not None:
                raise ValueError("Cannot use a Database ID outside of Astra DB.")
            if region is not None:
                raise ValueError(
                    "Parameter `region` not supported outside of Astra DB."
                )
            if _api_endpoint_p is None:
                raise ValueError("Parameter `api_endpoint` is required.")
            # _api_endpoint_p guaranteed not null at this point
            return self.get_database_by_api_endpoint(
                api_endpoint=_api_endpoint_p,
                token=token,
                keyspace=keyspace_param,
                api_path=api_path,
                api_version=api_version,
            )

    def get_async_database(
        self,
        api_endpoint_or_id: str | None = None,
        *,
        api_endpoint: str | None = None,
        token: str | TokenProvider | None = None,
        keyspace: str | None = None,
        namespace: str | None = None,
        id: str | None = None,
        region: str | None = None,
        api_path: str | None = None,
        api_version: str | None = None,
        max_time_ms: int | None = None,
    ) -> AsyncDatabase:
        """
        Get an AsyncDatabase object from this client, for doing data-related work.

        Args:
            api_endpoint_or_id: positional parameter that can stand for both
                `api_endpoint` and `id`. Passing them together is an error.
            api_endpoint: the API Endpoint for the target database
                (e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`).
                The database must exist already for the resulting object
                to be effectively used; in other words, this invocation
                does not create the database, just the object instance.
                Actual admin work can be achieved by using the AstraDBAdmin object.
            token: if supplied, is passed to the Database instead of the client token.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            keyspace: if provided, it is passed to the Database; otherwise
                the Database class will apply an environment-specific default.
            namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
            id: the target database ID. This is alternative to using the API Endpoint.
            region: the region to use for connecting to the database. The
                database must be located in that region. This parameter can be used
                only if the database is specified by its ID (instead of API Endpoint).
                If this parameter is not passed, and cannot be inferred
                from the API endpoint, an additional DevOps API request is made
                to determine the default region and use it subsequently.
            api_path: path to append to the API Endpoint. In typical usage, this
                should be left to its default of "/api/json".
            api_version: version specifier to append to the API path. In typical
                usage, this should be left to its default of "v1".
            max_time_ms: a timeout, in milliseconds, for the DevOps API
                HTTP request should it be necessary (see the `region` argument).

        Returns:
            a Database object with which to work on Data API collections.

        Example:
            >>> async def create_use_db(cl: DataAPIClient, api_ep: str) -> None:
            ...     async_db = cl.get_async_database(api_ep)
            ...     my_a_coll = await async_db.create_collection("movies", dimension=2)
            ...     await my_a_coll.insert_one({"title": "The Title", "$vector": [0.3, 0.4]})
            ...
            >>> asyncio.run(
            ...   create_use_db(
            ...       my_client,
            ...       "https://01234567-...us-west1.apps.astra.datastax.com",
            ...   )
            ... )

        Note:
            This method does not perform any admin-level operation through
            the DevOps API. For actual creation of a database, see the
            `create_database` method of class AstraDBAdmin.
        """

        _api_endpoint_p, _id_p = check_id_endpoint_parg_kwargs(
            p_arg=api_endpoint_or_id, api_endpoint=api_endpoint, id=id
        )
        check_deprecated_id_region(_id_p, region)
        keyspace_param = check_namespace_keyspace(
            keyspace=keyspace,
            namespace=namespace,
        )
        return self.get_database(
            api_endpoint=_api_endpoint_p,
            token=token,
            keyspace=keyspace_param,
            id=_id_p,
            region=region,
            api_path=api_path,
            api_version=api_version,
            max_time_ms=max_time_ms,
        ).to_async()

    def get_database_by_api_endpoint(
        self,
        api_endpoint: str,
        *,
        token: str | TokenProvider | None = None,
        keyspace: str | None = None,
        namespace: str | None = None,
        api_path: str | None = None,
        api_version: str | None = None,
    ) -> Database:
        """
        Get a Database object from this client, for doing data-related work.
        The Database is specified by an API Endpoint instead of the ID and a region.

        Note that using this method is generally equivalent to passing
        an API Endpoint as parameter to the `get_database` method (see).

        Args:
            api_endpoint: the full "API Endpoint" string used to reach the Data API.
                Example: "https://DATABASE_ID-REGION.apps.astra.datastax.com"
            token: if supplied, is passed to the Database instead of the client token.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            keyspace: if provided, it is passed to the Database; otherwise
                the Database class will apply an environment-specific default.
            namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
            api_path: path to append to the API Endpoint. In typical usage, this
                should be left to its default of "/api/json".
            api_version: version specifier to append to the API path. In typical
                usage, this should be left to its default of "v1".

        Returns:
            a Database object with which to work on Data API collections.

        Example:
            >>> my_db0 = my_client.get_database_by_api_endpoint("01234567-...")
            >>> my_db1 = my_client.get_database_by_api_endpoint(
            ...     "https://01234567-....apps.astra.datastax.com",
            ...     token="AstraCS:...",
            ... )
            >>> my_db2 = my_client.get_database_by_api_endpoint(
            ...     "https://01234567-....apps.astra.datastax.com",
            ...     keyspace="the_other_keyspace",
            ... )
            >>> my_coll = my_db0.create_collection("movies", dimension=2)
            >>> my_coll.insert_one({"title": "The Title", "$vector": [0.5, 0.6]})

        Note:
            This method does not perform any admin-level operation through
            the DevOps API. For actual creation of a database, see the
            `create_database` method of class AstraDBAdmin.
        """

        keyspace_param = check_namespace_keyspace(
            keyspace=keyspace,
            namespace=namespace,
        )

        # lazy importing here to avoid circular dependency
        from astrapy import Database

        if self.environment in Environment.astra_db_values:
            parsed_api_endpoint = parse_api_endpoint(api_endpoint)
            if parsed_api_endpoint is not None:
                if parsed_api_endpoint.environment != self.environment:
                    raise ValueError(
                        "Environment mismatch between client and provided "
                        "API endpoint. You can try adding "
                        f'`environment="{parsed_api_endpoint.environment}"` '
                        "to the DataAPIClient creation statement."
                    )
                _token = coerce_token_provider(token) or self.token_provider
                return Database(
                    api_endpoint=api_endpoint,
                    token=_token,
                    keyspace=keyspace_param,
                    callers=self.callers,
                    environment=self.environment,
                    api_path=api_path,
                    api_version=api_version,
                )
            else:
                msg = api_endpoint_parsing_error_message(api_endpoint)
                raise ValueError(msg)
        else:
            parsed_generic_api_endpoint = parse_generic_api_url(api_endpoint)
            if parsed_generic_api_endpoint:
                _token = coerce_token_provider(token) or self.token_provider
                return Database(
                    api_endpoint=parsed_generic_api_endpoint,
                    token=_token,
                    keyspace=keyspace_param,
                    callers=self.callers,
                    environment=self.environment,
                    api_path=api_path,
                    api_version=api_version,
                )
            else:
                msg = generic_api_url_parsing_error_message(api_endpoint)
                raise ValueError(msg)

    def get_async_database_by_api_endpoint(
        self,
        api_endpoint: str,
        *,
        token: str | TokenProvider | None = None,
        keyspace: str | None = None,
        namespace: str | None = None,
        api_path: str | None = None,
        api_version: str | None = None,
    ) -> AsyncDatabase:
        """
        Get an AsyncDatabase object from this client, for doing data-related work.
        The Database is specified by an API Endpoint instead of the ID and a region.

        Note that using this method is generally equivalent to passing
        an API Endpoint as parameter to the `get_async_database` method (see).

        This method has identical behavior and signature as the sync
        counterpart `get_database_by_api_endpoint`: please see that one
        for more details.
        """

        keyspace_param = check_namespace_keyspace(
            keyspace=keyspace,
            namespace=namespace,
        )
        return self.get_database_by_api_endpoint(
            api_endpoint=api_endpoint,
            token=token,
            keyspace=keyspace_param,
            api_path=api_path,
            api_version=api_version,
        ).to_async()

    def get_admin(
        self,
        *,
        token: str | TokenProvider | None = None,
        dev_ops_url: str | None = None,
        dev_ops_api_version: str | None = None,
    ) -> AstraDBAdmin:
        """
        Get an AstraDBAdmin instance corresponding to this client, for
        admin work such as managing databases.

        Args:
            token: if supplied, is passed to the Astra DB Admin instead of the
                client token. This may be useful when switching to a more powerful,
                admin-capable permission set.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            dev_ops_url: in case of custom deployments, this can be used to specify
                the URL to the DevOps API, such as "https://api.astra.datastax.com".
                Generally it can be omitted. The environment (prod/dev/...) is
                determined from the API Endpoint.
            dev_ops_api_version: this can specify a custom version of the DevOps API
                (such as "v2"). Generally not needed.

        Returns:
            An AstraDBAdmin instance, wich which to perform management at the
            database level.

        Example:
            >>> my_adm0 = my_client.get_admin()
            >>> my_adm1 = my_client.get_admin(token=more_powerful_token_override)
            >>> database_list = my_adm0.list_databases()
            >>> my_db_admin = my_adm0.create_database(
            ...     "the_other_database",
            ...     cloud_provider="AWS",
            ...     region="eu-west-1",
            ... )
            >>> my_db_admin.list_keyspaces()
            ['default_keyspace', 'that_other_one']
        """

        # lazy importing here to avoid circular dependency
        from astrapy.admin import AstraDBAdmin

        if self.environment not in Environment.astra_db_values:
            raise ValueError("Method not supported outside of Astra DB.")

        return AstraDBAdmin(
            token=coerce_token_provider(token) or self.token_provider,
            environment=self.environment,
            callers=self.callers,
            dev_ops_url=dev_ops_url,
            dev_ops_api_version=dev_ops_api_version,
        )

Methods

def get_admin(self, *, token: str | TokenProvider | None = None, dev_ops_url: str | None = None, dev_ops_api_version: str | None = None) ‑> AstraDBAdmin

Get an AstraDBAdmin instance corresponding to this client, for admin work such as managing databases.

Args

token
if supplied, is passed to the Astra DB Admin instead of the client token. This may be useful when switching to a more powerful, admin-capable permission set. This can be either a literal token string or a subclass of TokenProvider.
dev_ops_url
in case of custom deployments, this can be used to specify the URL to the DevOps API, such as "https://api.astra.datastax.com". Generally it can be omitted. The environment (prod/dev/…) is determined from the API Endpoint.
dev_ops_api_version
this can specify a custom version of the DevOps API (such as "v2"). Generally not needed.

Returns

An AstraDBAdmin instance, wich which to perform management at the database level.

Example

>>> my_adm0 = my_client.get_admin()
>>> my_adm1 = my_client.get_admin(token=more_powerful_token_override)
>>> database_list = my_adm0.list_databases()
>>> my_db_admin = my_adm0.create_database(
...     "the_other_database",
...     cloud_provider="AWS",
...     region="eu-west-1",
... )
>>> my_db_admin.list_keyspaces()
['default_keyspace', 'that_other_one']
Expand source code
def get_admin(
    self,
    *,
    token: str | TokenProvider | None = None,
    dev_ops_url: str | None = None,
    dev_ops_api_version: str | None = None,
) -> AstraDBAdmin:
    """
    Get an AstraDBAdmin instance corresponding to this client, for
    admin work such as managing databases.

    Args:
        token: if supplied, is passed to the Astra DB Admin instead of the
            client token. This may be useful when switching to a more powerful,
            admin-capable permission set.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        dev_ops_url: in case of custom deployments, this can be used to specify
            the URL to the DevOps API, such as "https://api.astra.datastax.com".
            Generally it can be omitted. The environment (prod/dev/...) is
            determined from the API Endpoint.
        dev_ops_api_version: this can specify a custom version of the DevOps API
            (such as "v2"). Generally not needed.

    Returns:
        An AstraDBAdmin instance, wich which to perform management at the
        database level.

    Example:
        >>> my_adm0 = my_client.get_admin()
        >>> my_adm1 = my_client.get_admin(token=more_powerful_token_override)
        >>> database_list = my_adm0.list_databases()
        >>> my_db_admin = my_adm0.create_database(
        ...     "the_other_database",
        ...     cloud_provider="AWS",
        ...     region="eu-west-1",
        ... )
        >>> my_db_admin.list_keyspaces()
        ['default_keyspace', 'that_other_one']
    """

    # lazy importing here to avoid circular dependency
    from astrapy.admin import AstraDBAdmin

    if self.environment not in Environment.astra_db_values:
        raise ValueError("Method not supported outside of Astra DB.")

    return AstraDBAdmin(
        token=coerce_token_provider(token) or self.token_provider,
        environment=self.environment,
        callers=self.callers,
        dev_ops_url=dev_ops_url,
        dev_ops_api_version=dev_ops_api_version,
    )
def get_async_database(self, api_endpoint_or_id: str | None = None, *, api_endpoint: str | None = None, token: str | TokenProvider | None = None, keyspace: str | None = None, namespace: str | None = None, id: str | None = None, region: str | None = None, api_path: str | None = None, api_version: str | None = None, max_time_ms: int | None = None) ‑> AsyncDatabase

Get an AsyncDatabase object from this client, for doing data-related work.

Args

api_endpoint_or_id
positional parameter that can stand for both api_endpoint and id. Passing them together is an error.
api_endpoint
the API Endpoint for the target database (e.g. https://<ID>-<REGION>.apps.astra.datastax.com). The database must exist already for the resulting object to be effectively used; in other words, this invocation does not create the database, just the object instance. Actual admin work can be achieved by using the AstraDBAdmin object.
token
if supplied, is passed to the Database instead of the client token. This can be either a literal token string or a subclass of TokenProvider.
keyspace
if provided, it is passed to the Database; otherwise the Database class will apply an environment-specific default.
namespace
an alias for keyspace. DEPRECATED, removal in 2.0.
id
the target database ID. This is alternative to using the API Endpoint.
region
the region to use for connecting to the database. The database must be located in that region. This parameter can be used only if the database is specified by its ID (instead of API Endpoint). If this parameter is not passed, and cannot be inferred from the API endpoint, an additional DevOps API request is made to determine the default region and use it subsequently.
api_path
path to append to the API Endpoint. In typical usage, this should be left to its default of "/api/json".
api_version
version specifier to append to the API path. In typical usage, this should be left to its default of "v1".
max_time_ms
a timeout, in milliseconds, for the DevOps API HTTP request should it be necessary (see the region argument).

Returns

a Database object with which to work on Data API collections.

Example

>>> async def create_use_db(cl: DataAPIClient, api_ep: str) -> None:
...     async_db = cl.get_async_database(api_ep)
...     my_a_coll = await async_db.create_collection("movies", dimension=2)
...     await my_a_coll.insert_one({"title": "The Title", "$vector": [0.3, 0.4]})
...
>>> asyncio.run(
...   create_use_db(
...       my_client,
...       "https://01234567-...us-west1.apps.astra.datastax.com",
...   )
... )

Note

This method does not perform any admin-level operation through the DevOps API. For actual creation of a database, see the create_database method of class AstraDBAdmin.

Expand source code
def get_async_database(
    self,
    api_endpoint_or_id: str | None = None,
    *,
    api_endpoint: str | None = None,
    token: str | TokenProvider | None = None,
    keyspace: str | None = None,
    namespace: str | None = None,
    id: str | None = None,
    region: str | None = None,
    api_path: str | None = None,
    api_version: str | None = None,
    max_time_ms: int | None = None,
) -> AsyncDatabase:
    """
    Get an AsyncDatabase object from this client, for doing data-related work.

    Args:
        api_endpoint_or_id: positional parameter that can stand for both
            `api_endpoint` and `id`. Passing them together is an error.
        api_endpoint: the API Endpoint for the target database
            (e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`).
            The database must exist already for the resulting object
            to be effectively used; in other words, this invocation
            does not create the database, just the object instance.
            Actual admin work can be achieved by using the AstraDBAdmin object.
        token: if supplied, is passed to the Database instead of the client token.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        keyspace: if provided, it is passed to the Database; otherwise
            the Database class will apply an environment-specific default.
        namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
        id: the target database ID. This is alternative to using the API Endpoint.
        region: the region to use for connecting to the database. The
            database must be located in that region. This parameter can be used
            only if the database is specified by its ID (instead of API Endpoint).
            If this parameter is not passed, and cannot be inferred
            from the API endpoint, an additional DevOps API request is made
            to determine the default region and use it subsequently.
        api_path: path to append to the API Endpoint. In typical usage, this
            should be left to its default of "/api/json".
        api_version: version specifier to append to the API path. In typical
            usage, this should be left to its default of "v1".
        max_time_ms: a timeout, in milliseconds, for the DevOps API
            HTTP request should it be necessary (see the `region` argument).

    Returns:
        a Database object with which to work on Data API collections.

    Example:
        >>> async def create_use_db(cl: DataAPIClient, api_ep: str) -> None:
        ...     async_db = cl.get_async_database(api_ep)
        ...     my_a_coll = await async_db.create_collection("movies", dimension=2)
        ...     await my_a_coll.insert_one({"title": "The Title", "$vector": [0.3, 0.4]})
        ...
        >>> asyncio.run(
        ...   create_use_db(
        ...       my_client,
        ...       "https://01234567-...us-west1.apps.astra.datastax.com",
        ...   )
        ... )

    Note:
        This method does not perform any admin-level operation through
        the DevOps API. For actual creation of a database, see the
        `create_database` method of class AstraDBAdmin.
    """

    _api_endpoint_p, _id_p = check_id_endpoint_parg_kwargs(
        p_arg=api_endpoint_or_id, api_endpoint=api_endpoint, id=id
    )
    check_deprecated_id_region(_id_p, region)
    keyspace_param = check_namespace_keyspace(
        keyspace=keyspace,
        namespace=namespace,
    )
    return self.get_database(
        api_endpoint=_api_endpoint_p,
        token=token,
        keyspace=keyspace_param,
        id=_id_p,
        region=region,
        api_path=api_path,
        api_version=api_version,
        max_time_ms=max_time_ms,
    ).to_async()
def get_async_database_by_api_endpoint(self, api_endpoint: str, *, token: str | TokenProvider | None = None, keyspace: str | None = None, namespace: str | None = None, api_path: str | None = None, api_version: str | None = None) ‑> AsyncDatabase

Get an AsyncDatabase object from this client, for doing data-related work. The Database is specified by an API Endpoint instead of the ID and a region.

Note that using this method is generally equivalent to passing an API Endpoint as parameter to the get_async_database method (see).

This method has identical behavior and signature as the sync counterpart get_database_by_api_endpoint: please see that one for more details.

Expand source code
def get_async_database_by_api_endpoint(
    self,
    api_endpoint: str,
    *,
    token: str | TokenProvider | None = None,
    keyspace: str | None = None,
    namespace: str | None = None,
    api_path: str | None = None,
    api_version: str | None = None,
) -> AsyncDatabase:
    """
    Get an AsyncDatabase object from this client, for doing data-related work.
    The Database is specified by an API Endpoint instead of the ID and a region.

    Note that using this method is generally equivalent to passing
    an API Endpoint as parameter to the `get_async_database` method (see).

    This method has identical behavior and signature as the sync
    counterpart `get_database_by_api_endpoint`: please see that one
    for more details.
    """

    keyspace_param = check_namespace_keyspace(
        keyspace=keyspace,
        namespace=namespace,
    )
    return self.get_database_by_api_endpoint(
        api_endpoint=api_endpoint,
        token=token,
        keyspace=keyspace_param,
        api_path=api_path,
        api_version=api_version,
    ).to_async()
def get_database(self, api_endpoint_or_id: str | None = None, *, api_endpoint: str | None = None, token: str | TokenProvider | None = None, keyspace: str | None = None, namespace: str | None = None, id: str | None = None, region: str | None = None, api_path: str | None = None, api_version: str | None = None, max_time_ms: int | None = None) ‑> Database

Get a Database object from this client, for doing data-related work.

Args

api_endpoint_or_id
positional parameter that can stand for both api_endpoint and id. Passing them together is an error.
api_endpoint
the API Endpoint for the target database (e.g. https://<ID>-<REGION>.apps.astra.datastax.com). The database must exist already for the resulting object to be effectively used; in other words, this invocation does not create the database, just the object instance. Actual admin work can be achieved by using the AstraDBAdmin object.
token
if supplied, is passed to the Database instead of the client token. This can be either a literal token string or a subclass of TokenProvider.
keyspace
if provided, it is passed to the Database; otherwise the Database class will apply an environment-specific default.
namespace
an alias for keyspace. DEPRECATED, removal in 2.0.
id
the target database ID. This is alternative to using the API Endpoint.
region
the region to use for connecting to the database. The database must be located in that region. This parameter can be used only if the database is specified by its ID (instead of API Endpoint). If this parameter is not passed, and cannot be inferred from the API endpoint, an additional DevOps API request is made to determine the default region and use it subsequently.
api_path
path to append to the API Endpoint. In typical usage, this should be left to its default of "/api/json".
api_version
version specifier to append to the API path. In typical usage, this should be left to its default of "v1".
max_time_ms
a timeout, in milliseconds, for the DevOps API HTTP request should it be necessary (see the region argument).

Returns

a Database object with which to work on Data API collections.

Example

>>> my_db0 = my_client.get_database("01234567-...")
>>> my_db1 = my_client.get_database(
...     "https://01234567-...us-west1.apps.astra.datastax.com",
... )
>>> my_db2 = my_client.get_database("01234567-...", token="AstraCS:...")
>>> my_db3 = my_client.get_database("01234567-...", region="us-west1")
>>> my_coll = my_db0.create_collection("movies", dimension=2)
>>> my_coll.insert_one({"title": "The Title", "$vector": [0.3, 0.4]})

Note

This method does not perform any admin-level operation through the DevOps API. For actual creation of a database, see the create_database method of class AstraDBAdmin.

Expand source code
def get_database(
    self,
    api_endpoint_or_id: str | None = None,
    *,
    api_endpoint: str | None = None,
    token: str | TokenProvider | None = None,
    keyspace: str | None = None,
    namespace: str | None = None,
    id: str | None = None,
    region: str | None = None,
    api_path: str | None = None,
    api_version: str | None = None,
    max_time_ms: int | None = None,
) -> Database:
    """
    Get a Database object from this client, for doing data-related work.

    Args:
        api_endpoint_or_id: positional parameter that can stand for both
            `api_endpoint` and `id`. Passing them together is an error.
        api_endpoint: the API Endpoint for the target database
            (e.g. `https://<ID>-<REGION>.apps.astra.datastax.com`).
            The database must exist already for the resulting object
            to be effectively used; in other words, this invocation
            does not create the database, just the object instance.
            Actual admin work can be achieved by using the AstraDBAdmin object.
        token: if supplied, is passed to the Database instead of the client token.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        keyspace: if provided, it is passed to the Database; otherwise
            the Database class will apply an environment-specific default.
        namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
        id: the target database ID. This is alternative to using the API Endpoint.
        region: the region to use for connecting to the database. The
            database must be located in that region. This parameter can be used
            only if the database is specified by its ID (instead of API Endpoint).
            If this parameter is not passed, and cannot be inferred
            from the API endpoint, an additional DevOps API request is made
            to determine the default region and use it subsequently.
        api_path: path to append to the API Endpoint. In typical usage, this
            should be left to its default of "/api/json".
        api_version: version specifier to append to the API path. In typical
            usage, this should be left to its default of "v1".
        max_time_ms: a timeout, in milliseconds, for the DevOps API
            HTTP request should it be necessary (see the `region` argument).

    Returns:
        a Database object with which to work on Data API collections.

    Example:
        >>> my_db0 = my_client.get_database("01234567-...")
        >>> my_db1 = my_client.get_database(
        ...     "https://01234567-...us-west1.apps.astra.datastax.com",
        ... )
        >>> my_db2 = my_client.get_database("01234567-...", token="AstraCS:...")
        >>> my_db3 = my_client.get_database("01234567-...", region="us-west1")
        >>> my_coll = my_db0.create_collection("movies", dimension=2)
        >>> my_coll.insert_one({"title": "The Title", "$vector": [0.3, 0.4]})

    Note:
        This method does not perform any admin-level operation through
        the DevOps API. For actual creation of a database, see the
        `create_database` method of class AstraDBAdmin.
    """

    _api_endpoint_p, _id_p = check_id_endpoint_parg_kwargs(
        p_arg=api_endpoint_or_id, api_endpoint=api_endpoint, id=id
    )
    check_deprecated_id_region(_id_p, region)
    keyspace_param = check_namespace_keyspace(
        keyspace=keyspace,
        namespace=namespace,
    )

    # lazy importing here to avoid circular dependency
    from astrapy import Database

    if self.environment in Environment.astra_db_values:
        # handle the "endpoint passed as id" case first:
        if _api_endpoint_p is not None:
            if region is not None:
                raise ValueError(
                    "Parameter `region` not supported with an API endpoint."
                )
            # in this case max_time_ms is ignored (no calls take place)
            return self.get_database_by_api_endpoint(
                api_endpoint=_api_endpoint_p,
                token=token,
                keyspace=keyspace_param,
                api_path=api_path,
                api_version=api_version,
            )
        else:
            if _id_p is None:
                raise ValueError("Either `api_endpoint` or `id` must be supplied.")
            _token = coerce_token_provider(token) or self.token_provider
            _region = normalize_region_for_id(
                database_id=_id_p,
                token_str=_token.get_token(),
                environment=self.environment,
                region_param=region,
                max_time_ms=max_time_ms,
            )
            _api_endpoint = build_api_endpoint(
                environment=self.environment,
                database_id=_id_p,
                region=_region,
            )
            return Database(
                api_endpoint=_api_endpoint,
                token=_token,
                keyspace=keyspace_param,
                callers=self.callers,
                environment=self.environment,
                api_path=api_path,
                api_version=api_version,
            )
    else:
        # in this case, this call is an alias for get_database_by_api_endpoint
        #   - max_time_ms ignored
        #   - require the endpoint to be passed
        if _id_p is not None:
            raise ValueError("Cannot use a Database ID outside of Astra DB.")
        if region is not None:
            raise ValueError(
                "Parameter `region` not supported outside of Astra DB."
            )
        if _api_endpoint_p is None:
            raise ValueError("Parameter `api_endpoint` is required.")
        # _api_endpoint_p guaranteed not null at this point
        return self.get_database_by_api_endpoint(
            api_endpoint=_api_endpoint_p,
            token=token,
            keyspace=keyspace_param,
            api_path=api_path,
            api_version=api_version,
        )
def get_database_by_api_endpoint(self, api_endpoint: str, *, token: str | TokenProvider | None = None, keyspace: str | None = None, namespace: str | None = None, api_path: str | None = None, api_version: str | None = None) ‑> Database

Get a Database object from this client, for doing data-related work. The Database is specified by an API Endpoint instead of the ID and a region.

Note that using this method is generally equivalent to passing an API Endpoint as parameter to the get_database method (see).

Args

api_endpoint
the full "API Endpoint" string used to reach the Data API. Example: "https://DATABASE_ID-REGION.apps.astra.datastax.com"
token
if supplied, is passed to the Database instead of the client token. This can be either a literal token string or a subclass of TokenProvider.
keyspace
if provided, it is passed to the Database; otherwise the Database class will apply an environment-specific default.
namespace
an alias for keyspace. DEPRECATED, removal in 2.0.
api_path
path to append to the API Endpoint. In typical usage, this should be left to its default of "/api/json".
api_version
version specifier to append to the API path. In typical usage, this should be left to its default of "v1".

Returns

a Database object with which to work on Data API collections.

Example

>>> my_db0 = my_client.get_database_by_api_endpoint("01234567-...")
>>> my_db1 = my_client.get_database_by_api_endpoint(
...     "https://01234567-....apps.astra.datastax.com",
...     token="AstraCS:...",
... )
>>> my_db2 = my_client.get_database_by_api_endpoint(
...     "https://01234567-....apps.astra.datastax.com",
...     keyspace="the_other_keyspace",
... )
>>> my_coll = my_db0.create_collection("movies", dimension=2)
>>> my_coll.insert_one({"title": "The Title", "$vector": [0.5, 0.6]})

Note

This method does not perform any admin-level operation through the DevOps API. For actual creation of a database, see the create_database method of class AstraDBAdmin.

Expand source code
def get_database_by_api_endpoint(
    self,
    api_endpoint: str,
    *,
    token: str | TokenProvider | None = None,
    keyspace: str | None = None,
    namespace: str | None = None,
    api_path: str | None = None,
    api_version: str | None = None,
) -> Database:
    """
    Get a Database object from this client, for doing data-related work.
    The Database is specified by an API Endpoint instead of the ID and a region.

    Note that using this method is generally equivalent to passing
    an API Endpoint as parameter to the `get_database` method (see).

    Args:
        api_endpoint: the full "API Endpoint" string used to reach the Data API.
            Example: "https://DATABASE_ID-REGION.apps.astra.datastax.com"
        token: if supplied, is passed to the Database instead of the client token.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        keyspace: if provided, it is passed to the Database; otherwise
            the Database class will apply an environment-specific default.
        namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
        api_path: path to append to the API Endpoint. In typical usage, this
            should be left to its default of "/api/json".
        api_version: version specifier to append to the API path. In typical
            usage, this should be left to its default of "v1".

    Returns:
        a Database object with which to work on Data API collections.

    Example:
        >>> my_db0 = my_client.get_database_by_api_endpoint("01234567-...")
        >>> my_db1 = my_client.get_database_by_api_endpoint(
        ...     "https://01234567-....apps.astra.datastax.com",
        ...     token="AstraCS:...",
        ... )
        >>> my_db2 = my_client.get_database_by_api_endpoint(
        ...     "https://01234567-....apps.astra.datastax.com",
        ...     keyspace="the_other_keyspace",
        ... )
        >>> my_coll = my_db0.create_collection("movies", dimension=2)
        >>> my_coll.insert_one({"title": "The Title", "$vector": [0.5, 0.6]})

    Note:
        This method does not perform any admin-level operation through
        the DevOps API. For actual creation of a database, see the
        `create_database` method of class AstraDBAdmin.
    """

    keyspace_param = check_namespace_keyspace(
        keyspace=keyspace,
        namespace=namespace,
    )

    # lazy importing here to avoid circular dependency
    from astrapy import Database

    if self.environment in Environment.astra_db_values:
        parsed_api_endpoint = parse_api_endpoint(api_endpoint)
        if parsed_api_endpoint is not None:
            if parsed_api_endpoint.environment != self.environment:
                raise ValueError(
                    "Environment mismatch between client and provided "
                    "API endpoint. You can try adding "
                    f'`environment="{parsed_api_endpoint.environment}"` '
                    "to the DataAPIClient creation statement."
                )
            _token = coerce_token_provider(token) or self.token_provider
            return Database(
                api_endpoint=api_endpoint,
                token=_token,
                keyspace=keyspace_param,
                callers=self.callers,
                environment=self.environment,
                api_path=api_path,
                api_version=api_version,
            )
        else:
            msg = api_endpoint_parsing_error_message(api_endpoint)
            raise ValueError(msg)
    else:
        parsed_generic_api_endpoint = parse_generic_api_url(api_endpoint)
        if parsed_generic_api_endpoint:
            _token = coerce_token_provider(token) or self.token_provider
            return Database(
                api_endpoint=parsed_generic_api_endpoint,
                token=_token,
                keyspace=keyspace_param,
                callers=self.callers,
                environment=self.environment,
                api_path=api_path,
                api_version=api_version,
            )
        else:
            msg = generic_api_url_parsing_error_message(api_endpoint)
            raise ValueError(msg)
def set_caller(self, caller_name: str | None = None, caller_version: str | None = None) ‑> None

Set a new identity for the application/framework on behalf of which the API calls will be performed (the "caller").

New objects spawned from this client afterwards will inherit the new settings.

Args

caller_name
name of the application, or framework, on behalf of which the API API calls are performed. This ends up in the request user-agent.
caller_version
version of the caller.

Example

>>> my_client.set_caller(caller_name="the_caller", caller_version="0.1.0")

Deprecated since version: 1.5.1

This will be removed in 2.0.0. Please provide the caller(s) at constructor time through the callers list parameter.

Expand source code
@deprecation.deprecated(  # type: ignore[misc]
    deprecated_in="1.5.1",
    removed_in="2.0.0",
    current_version=__version__,
    details=SET_CALLER_DEPRECATION_NOTICE,
)
def set_caller(
    self,
    caller_name: str | None = None,
    caller_version: str | None = None,
) -> None:
    """
    Set a new identity for the application/framework on behalf of which
    the API calls will be performed (the "caller").

    New objects spawned from this client afterwards will inherit the new settings.

    Args:
        caller_name: name of the application, or framework, on behalf of which
            the API API calls are performed. This ends up in the request user-agent.
        caller_version: version of the caller.

    Example:
        >>> my_client.set_caller(caller_name="the_caller", caller_version="0.1.0")
    """

    logger.info(f"setting caller to {caller_name}/{caller_version}")
    callers_param = check_caller_parameters([], caller_name, caller_version)
    self.callers = callers_param
def with_options(self, *, token: str | TokenProvider | None = None, callers: Sequence[CallerType] = [], caller_name: str | None = None, caller_version: str | None = None) ‑> DataAPIClient

Create a clone of this DataAPIClient with some changed attributes.

Args

token
an Access Token to the database. Example: "AstraCS:xyz...". This can be either a literal token string or a subclass of TokenProvider.
callers
a list of caller identities, i.e. applications, or frameworks, on behalf of which Data API and DevOps API calls are performed. These end up in the request user-agent. Each caller identity is a ("caller_name", "caller_version") pair.
caller_name
DEPRECATED, use callers. Removal 2.0. Name of the application, or framework, on behalf of which the Data API and DevOps API calls are performed. This ends up in the request user-agent.
caller_version
version of the caller. DEPRECATED, use callers. Removal 2.0.

Returns

a new DataAPIClient instance.

Example

>>> another_client = my_client.with_options(
...     callers=[("caller_identity", "1.2.0")],
... )
Expand source code
def with_options(
    self,
    *,
    token: str | TokenProvider | None = None,
    callers: Sequence[CallerType] = [],
    caller_name: str | None = None,
    caller_version: str | None = None,
) -> DataAPIClient:
    """
    Create a clone of this DataAPIClient with some changed attributes.

    Args:
        token: an Access Token to the database. Example: `"AstraCS:xyz..."`.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        callers: a list of caller identities, i.e. applications, or frameworks,
            on behalf of which Data API and DevOps API calls are performed.
            These end up in the request user-agent.
            Each caller identity is a ("caller_name", "caller_version") pair.
        caller_name: *DEPRECATED*, use `callers`. Removal 2.0. Name of the
            application, or framework, on behalf of which the Data API and
            DevOps API calls are performed. This ends up in the request user-agent.
        caller_version: version of the caller. *DEPRECATED*, use `callers`.
            Removal 2.0.

    Returns:
        a new DataAPIClient instance.

    Example:
        >>> another_client = my_client.with_options(
        ...     callers=[("caller_identity", "1.2.0")],
        ... )
    """

    callers_param = check_caller_parameters(callers, caller_name, caller_version)
    return self._copy(
        token=token,
        callers=callers_param,
    )
class DataAPIDatabaseAdmin (api_endpoint: str, *, token: str | TokenProvider | None = None, environment: str | None = None, api_path: str | None = None, api_version: str | None = None, callers: Sequence[CallerType] = [], caller_name: str | None = None, caller_version: str | None = None, spawner_database: Database | AsyncDatabase | None = None)

An "admin" object for non-Astra Data API environments, to perform administrative tasks at the keyspaces level such as creating/listing/dropping keyspaces.

Conforming to the architecture of non-Astra deployments of the Data API, this object works within the one existing database. It is within that database that the keyspace CRUD operations (and possibly other admin operations) are performed. Since non-Astra environment lack the concept of an overall admin (such as the all-databases AstraDBAdmin class), a DataAPIDatabaseAdmin is generally created by invoking the get_database_admin method of the corresponding Database object (which in turn is spawned by a DataAPIClient).

Args

api_endpoint
the full URI to access the Data API, e.g. "http://localhost:8181".
token
an access token with enough permission to perform admin tasks. This can be either a literal token string or a subclass of TokenProvider.
environment
a label, whose value is one of Environment.OTHER (default) or other non-Astra environment values in the Environment enum.
api_path
path to append to the API Endpoint. In typical usage, this class is created by a method such as Database.get_database_admin(), which passes the matching value. Defaults to this portion of the path being absent.
api_version
version specifier to append to the API path. In typical usage, this class is created by a method such as Database.get_database_admin(), which passes the matching value. Defaults to this portion of the path being absent.
callers
a list of caller identities, i.e. applications, or frameworks, on behalf of which Data API calls are performed. These end up in the request user-agent. Each caller identity is a ("caller_name", "caller_version") pair.
caller_name
DEPRECATED, use callers. Removal 2.0. Name of the application, or framework, on behalf of which the Data API calls are performed. This ends up in the request user-agent.
caller_version
version of the caller. DEPRECATED, use callers. Removal 2.0.
spawner_database
either a Database or an AsyncDatabase instance. This represents the database class which spawns this admin object, so that, if required, a keyspace creation can retroactively "use" the new keyspace in the spawner. Used to enable the Async/Database.get_admin_database().create_keyspace() pattern.

Example

>>> from astrapy import DataAPIClient
>>> from astrapy.constants import Environment
>>> from astrapy.authentication import UsernamePasswordTokenProvider
>>>
>>> token_provider = UsernamePasswordTokenProvider("username", "password")
>>> endpoint = "http://localhost:8181"
>>>
>>> client = DataAPIClient(
>>>     token=token_provider,
>>>     environment=Environment.OTHER,
>>> )
>>> database = client.get_database(endpoint)
>>> admin_for_my_db = database.get_database_admin()
>>>
>>> admin_for_my_db.list_keyspaces()
['keyspace1', 'keyspace2']
Expand source code
class DataAPIDatabaseAdmin(DatabaseAdmin):
    """
    An "admin" object for non-Astra Data API environments, to perform administrative
    tasks at the keyspaces level such as creating/listing/dropping keyspaces.

    Conforming to the architecture of non-Astra deployments of the Data API,
    this object works within the one existing database. It is within that database
    that the keyspace CRUD operations (and possibly other admin operations)
    are performed. Since non-Astra environment lack the concept of an overall
    admin (such as the all-databases AstraDBAdmin class), a `DataAPIDatabaseAdmin`
    is generally created by invoking the `get_database_admin` method of the
    corresponding `Database` object (which in turn is spawned by a DataAPIClient).

    Args:
        api_endpoint: the full URI to access the Data API,
            e.g. "http://localhost:8181".
        token: an access token with enough permission to perform admin tasks.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        environment: a label, whose value is one of Environment.OTHER (default)
            or other non-Astra environment values in the `Environment` enum.
        api_path: path to append to the API Endpoint. In typical usage, this
            class is created by a method such as `Database.get_database_admin()`,
            which passes the matching value. Defaults to this portion of the path
            being absent.
        api_version: version specifier to append to the API path. In typical
            usage, this class is created by a method such as
            `Database.get_database_admin()`, which passes the matching value.
            Defaults to this portion of the path being absent.
        callers: a list of caller identities, i.e. applications, or frameworks,
            on behalf of which Data API calls are performed. These end up in the
            request user-agent.
            Each caller identity is a ("caller_name", "caller_version") pair.
        caller_name: *DEPRECATED*, use `callers`. Removal 2.0. Name of the
            application, or framework, on behalf of which the Data API calls
            are performed. This ends up in the request user-agent.
        caller_version: version of the caller. *DEPRECATED*, use `callers`. Removal 2.0.
        spawner_database: either a Database or an AsyncDatabase instance.
            This represents the database class which spawns this admin object, so that,
            if required, a keyspace creation can retroactively "use" the new keyspace
            in the spawner. Used to enable the
            Async/Database.get_admin_database().create_keyspace() pattern.

    Example:
        >>> from astrapy import DataAPIClient
        >>> from astrapy.constants import Environment
        >>> from astrapy.authentication import UsernamePasswordTokenProvider
        >>>
        >>> token_provider = UsernamePasswordTokenProvider("username", "password")
        >>> endpoint = "http://localhost:8181"
        >>>
        >>> client = DataAPIClient(
        >>>     token=token_provider,
        >>>     environment=Environment.OTHER,
        >>> )
        >>> database = client.get_database(endpoint)
        >>> admin_for_my_db = database.get_database_admin()
        >>>
        >>> admin_for_my_db.list_keyspaces()
        ['keyspace1', 'keyspace2']
    """

    def __init__(
        self,
        api_endpoint: str,
        *,
        token: str | TokenProvider | None = None,
        environment: str | None = None,
        api_path: str | None = None,
        api_version: str | None = None,
        callers: Sequence[CallerType] = [],
        caller_name: str | None = None,
        caller_version: str | None = None,
        spawner_database: Database | AsyncDatabase | None = None,
    ) -> None:
        # lazy import here to avoid circular dependency
        from astrapy.database import Database

        callers_param = check_caller_parameters(callers, caller_name, caller_version)
        self.environment = (environment or Environment.OTHER).lower()
        self.token_provider = coerce_token_provider(token)
        self.api_endpoint = api_endpoint
        self.callers = callers_param
        self.api_path = api_path if api_path is not None else ""
        self.api_version = api_version if api_version is not None else ""
        self._commander_headers = {
            DEFAULT_DATA_API_AUTH_HEADER: self.token_provider.get_token(),
        }
        self._api_commander = self._get_api_commander()

        if spawner_database is not None:
            self.spawner_database = spawner_database
        else:
            # leaving the keyspace to its per-environment default
            # (a task for the Database)
            self.spawner_database = Database(
                api_endpoint=self.api_endpoint,
                token=self.token_provider,
                keyspace=None,
                callers=self.callers,
                environment=self.environment,
                api_path=self.api_path,
                api_version=self.api_version,
            )

    def __repr__(self) -> str:
        ep_desc = f'api_endpoint="{self.api_endpoint}"'
        token_desc: str | None
        if self.token_provider:
            token_desc = f'token="{redact_secret(str(self.token_provider), 15)}"'
        else:
            token_desc = None
        env_desc = f'environment="{self.environment}"'
        parts = [pt for pt in [ep_desc, token_desc, env_desc] if pt is not None]
        return f"{self.__class__.__name__}({', '.join(parts)})"

    def __eq__(self, other: Any) -> bool:
        if isinstance(other, DataAPIDatabaseAdmin):
            return all(
                [
                    self.environment == other.environment,
                    self._api_commander == other._api_commander,
                ]
            )
        else:
            return False

    def _get_api_commander(self) -> APICommander:
        base_path = "/".join(comp for comp in [self.api_path, self.api_version] if comp)
        api_commander = APICommander(
            api_endpoint=self.api_endpoint,
            path=base_path,
            headers=self._commander_headers,
            callers=self.callers,
        )
        return api_commander

    def _copy(
        self,
        api_endpoint: str | None = None,
        token: str | TokenProvider | None = None,
        environment: str | None = None,
        api_path: str | None = None,
        api_version: str | None = None,
        callers: Sequence[CallerType] = [],
        caller_name: str | None = None,
        caller_version: str | None = None,
    ) -> DataAPIDatabaseAdmin:
        callers_param = check_caller_parameters(callers, caller_name, caller_version)
        return DataAPIDatabaseAdmin(
            api_endpoint=api_endpoint or self.api_endpoint,
            token=coerce_token_provider(token) or self.token_provider,
            environment=environment or self.environment,
            api_path=api_path or self.api_path,
            api_version=api_version or self.api_version,
            callers=callers_param or self.callers,
        )

    def with_options(
        self,
        *,
        api_endpoint: str | None = None,
        token: str | TokenProvider | None = None,
        callers: Sequence[CallerType] = [],
        caller_name: str | None = None,
        caller_version: str | None = None,
    ) -> DataAPIDatabaseAdmin:
        """
        Create a clone of this DataAPIDatabaseAdmin with some changed attributes.

        Args:
            api_endpoint: the full URI to access the Data API,
                e.g. "http://localhost:8181".
            token: an access token with enough permission to perform admin tasks.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            callers: a list of caller identities, i.e. applications, or frameworks,
                on behalf of which Data API calls are performed. These end up in the
                request user-agent.
                Each caller identity is a ("caller_name", "caller_version") pair.
            caller_name: *DEPRECATED*, use `callers`. Removal 2.0. Name of the
                application, or framework, on behalf of which the Data API calls
                are performed. This ends up in the request user-agent.
            caller_version: version of the caller. *DEPRECATED*, use `callers`.
                Removal 2.0.

        Returns:
            a new DataAPIDatabaseAdmin instance.

        Example:
            >>> admin_for_my_other_db = admin_for_my_db.with_options(
            ...     api_endpoint="http://10.1.1.5:8181",
            ... )
        """

        callers_param = check_caller_parameters(callers, caller_name, caller_version)
        return self._copy(
            api_endpoint=api_endpoint,
            token=token,
            callers=callers_param,
        )

    @deprecation.deprecated(  # type: ignore[misc]
        deprecated_in="1.5.1",
        removed_in="2.0.0",
        current_version=__version__,
        details=SET_CALLER_DEPRECATION_NOTICE,
    )
    def set_caller(
        self,
        caller_name: str | None = None,
        caller_version: str | None = None,
    ) -> None:
        """
        Set a new identity for the application/framework on behalf of which
        the DevOps API calls will be performed (the "caller").

        New objects spawned from this client afterwards will inherit the new settings.

        Args:
            caller_name: name of the application, or framework, on behalf of which
                the DevOps API calls are performed. This ends up in the request user-agent.
            caller_version: version of the caller.

        Example:
            >>> admin_for_my_db.set_caller(
            ...     caller_name="the_caller",
            ...     caller_version="0.1.0",
            ... )
        """

        logger.info(f"setting caller to {caller_name}/{caller_version}")
        callers_param = check_caller_parameters([], caller_name, caller_version)
        self.callers = callers_param
        self._api_commander = self._get_api_commander()

    @deprecation.deprecated(  # type: ignore[misc]
        deprecated_in="1.5.0",
        removed_in="2.0.0",
        current_version=__version__,
        details=NAMESPACE_DEPRECATION_NOTICE_METHOD,
    )
    def list_namespaces(self, *, max_time_ms: int | None = None) -> list[str]:
        """
        Query the API for a list of the namespaces in the database.

        *DEPRECATED* (removal in 2.0). Switch to the "list_keyspaces" method.**

        Args:
            max_time_ms: a timeout, in milliseconds, for the DevOps API request.

        Returns:
            A list of the namespaces, each a string, in no particular order.

        Example:
            >>> admin_for_my_db.list_namespaces()
            ['default_keyspace', 'staging_namespace']
        """
        logger.info("getting list of namespaces")
        fn_response = self._api_commander.request(
            payload={"findNamespaces": {}},
            timeout_info=base_timeout_info(max_time_ms),
        )
        if "namespaces" not in fn_response.get("status", {}):
            raise DataAPIFaultyResponseException(
                text="Faulty response from findNamespaces API command.",
                raw_response=fn_response,
            )
        else:
            logger.info("finished getting list of namespaces")
            return fn_response["status"]["namespaces"]  # type: ignore[no-any-return]

    def list_keyspaces(self, *, max_time_ms: int | None = None) -> list[str]:
        """
        Query the API for a list of the keyspaces in the database.

        Args:
            max_time_ms: a timeout, in milliseconds, for the DevOps API request.

        Returns:
            A list of the keyspaces, each a string, in no particular order.

        Example:
            >>> admin_for_my_db.list_keyspaces()
            ['default_keyspace', 'staging_keyspace']
        """
        logger.info("getting list of keyspaces")
        fn_response = self._api_commander.request(
            payload={"findKeyspaces": {}},
            timeout_info=base_timeout_info(max_time_ms),
        )
        if "keyspaces" not in fn_response.get("status", {}):
            raise DataAPIFaultyResponseException(
                text="Faulty response from findKeyspaces API command.",
                raw_response=fn_response,
            )
        else:
            logger.info("finished getting list of keyspaces")
            return fn_response["status"]["keyspaces"]  # type: ignore[no-any-return]

    @deprecation.deprecated(  # type: ignore[misc]
        deprecated_in="1.5.0",
        removed_in="2.0.0",
        current_version=__version__,
        details=NAMESPACE_DEPRECATION_NOTICE_METHOD,
    )
    def create_namespace(
        self,
        name: str,
        *,
        replication_options: dict[str, Any] | None = None,
        update_db_keyspace: bool | None = None,
        update_db_namespace: bool | None = None,
        max_time_ms: int | None = None,
        **kwargs: Any,
    ) -> dict[str, Any]:
        """
        Create a namespace in the database, returning {'ok': 1} if successful.

        *DEPRECATED* (removal in 2.0). Switch to the "create_keyspace" method.**

        Args:
            name: the namespace name. If supplying a namespace that exists
                already, the method call proceeds as usual, no errors are
                raised, and the whole invocation is a no-op.
            replication_options: this dictionary can specify the options about
                replication of the namespace (across database nodes). If provided,
                it must have a structure similar to:
                `{"class": "SimpleStrategy", "replication_factor": 1}`.
            update_db_keyspace: if True, the `Database` or `AsyncDatabase` class
                that spawned this DatabaseAdmin, if any, gets updated to work on
                the newly-created keyspace starting when this method returns.
            update_db_namespace: an alias for update_db_keyspace.
                *DEPRECATED* as of v1.5.0, removal in v2.0.0.
            max_time_ms: a timeout, in milliseconds, for the whole requested
                operation to complete.
                Note that a timeout is no guarantee that the creation request
                has not reached the API server.

        Returns:
            A dictionary of the form {"ok": 1} in case of success.
            Otherwise, an exception is raised.

        Example:
            >>> admin_for_my_db.list_namespaces()
            ['default_keyspace']
            >>> admin_for_my_db.create_namespace("that_other_one")
            {'ok': 1}
            >>> admin_for_my_db.list_namespaces()
            ['default_keyspace', 'that_other_one']
        """

        _update_db_keyspace = check_update_db_namespace_keyspace(
            update_db_keyspace=update_db_keyspace,
            update_db_namespace=update_db_namespace,
        )

        options = {
            k: v
            for k, v in {
                "replication": replication_options,
            }.items()
            if v
        }
        payload = {
            "createNamespace": {
                **{"name": name},
                **({"options": options} if options else {}),
            }
        }
        logger.info("creating namespace")
        cn_response = self._api_commander.request(
            payload=payload,
            timeout_info=base_timeout_info(max_time_ms),
        )
        if (cn_response.get("status") or {}).get("ok") != 1:
            raise DataAPIFaultyResponseException(
                text="Faulty response from createNamespace API command.",
                raw_response=cn_response,
            )
        else:
            logger.info("finished creating namespace")
            if _update_db_keyspace:
                self.spawner_database.use_keyspace(name)
            return {k: v for k, v in cn_response["status"].items() if k == "ok"}

    def create_keyspace(
        self,
        name: str,
        *,
        replication_options: dict[str, Any] | None = None,
        update_db_keyspace: bool | None = None,
        update_db_namespace: bool | None = None,
        max_time_ms: int | None = None,
        **kwargs: Any,
    ) -> dict[str, Any]:
        """
        Create a keyspace in the database, returning {'ok': 1} if successful.

        Args:
            name: the keyspace name. If supplying a keyspace that exists
                already, the method call proceeds as usual, no errors are
                raised, and the whole invocation is a no-op.
            replication_options: this dictionary can specify the options about
                replication of the keyspace (across database nodes). If provided,
                it must have a structure similar to:
                `{"class": "SimpleStrategy", "replication_factor": 1}`.
            update_db_keyspace: if True, the `Database` or `AsyncDatabase` class
                that spawned this DatabaseAdmin, if any, gets updated to work on
                the newly-created keyspace starting when this method returns.
            update_db_namespace: an alias for update_db_keyspace.
                *DEPRECATED* as of v1.5.0, removal in v2.0.0.
            max_time_ms: a timeout, in milliseconds, for the whole requested
                operation to complete.
                Note that a timeout is no guarantee that the creation request
                has not reached the API server.

        Returns:
            A dictionary of the form {"ok": 1} in case of success.
            Otherwise, an exception is raised.

        Example:
            >>> admin_for_my_db.list_keyspaces()
            ['default_keyspace']
            >>> admin_for_my_db.create_keyspace("that_other_one")
            {'ok': 1}
            >>> admin_for_my_db.list_keyspaces()
            ['default_keyspace', 'that_other_one']
        """

        _update_db_keyspace = check_update_db_namespace_keyspace(
            update_db_keyspace=update_db_keyspace,
            update_db_namespace=update_db_namespace,
        )

        options = {
            k: v
            for k, v in {
                "replication": replication_options,
            }.items()
            if v
        }
        payload = {
            "createKeyspace": {
                **{"name": name},
                **({"options": options} if options else {}),
            }
        }
        logger.info("creating keyspace")
        cn_response = self._api_commander.request(
            payload=payload,
            timeout_info=base_timeout_info(max_time_ms),
        )
        if (cn_response.get("status") or {}).get("ok") != 1:
            raise DataAPIFaultyResponseException(
                text="Faulty response from createKeyspace API command.",
                raw_response=cn_response,
            )
        else:
            logger.info("finished creating keyspace")
            if _update_db_keyspace:
                self.spawner_database.use_keyspace(name)
            return {k: v for k, v in cn_response["status"].items() if k == "ok"}

    @deprecation.deprecated(  # type: ignore[misc]
        deprecated_in="1.5.0",
        removed_in="2.0.0",
        current_version=__version__,
        details=NAMESPACE_DEPRECATION_NOTICE_METHOD,
    )
    def drop_namespace(
        self,
        name: str,
        *,
        max_time_ms: int | None = None,
    ) -> dict[str, Any]:
        """
        Drop (delete) a namespace from the database.

        *DEPRECATED* (removal in 2.0). Switch to the "drop_namespace" method.**

        Args:
            name: the namespace to delete. If it does not exist in this database,
                an error is raised.
            max_time_ms: a timeout, in milliseconds, for the whole requested
                operation to complete.
                Note that a timeout is no guarantee that the deletion request
                has not reached the API server.

        Returns:
            A dictionary of the form {"ok": 1} in case of success.
            Otherwise, an exception is raised.

        Example:
            >>> admin_for_my_db.list_namespaces()
            ['default_keyspace', 'that_other_one']
            >>> admin_for_my_db.drop_namespace("that_other_one")
            {'ok': 1}
            >>> admin_for_my_db.list_namespaces()
            ['default_keyspace']
        """
        logger.info("dropping namespace")
        dn_response = self._api_commander.request(
            payload={"dropNamespace": {"name": name}},
            timeout_info=base_timeout_info(max_time_ms),
        )
        if (dn_response.get("status") or {}).get("ok") != 1:
            raise DataAPIFaultyResponseException(
                text="Faulty response from dropNamespace API command.",
                raw_response=dn_response,
            )
        else:
            logger.info("finished dropping namespace")
            return {k: v for k, v in dn_response["status"].items() if k == "ok"}

    def drop_keyspace(
        self,
        name: str,
        *,
        max_time_ms: int | None = None,
    ) -> dict[str, Any]:
        """
        Drop (delete) a keyspace from the database.

        Args:
            name: the keyspace to delete. If it does not exist in this database,
                an error is raised.
            max_time_ms: a timeout, in milliseconds, for the whole requested
                operation to complete.
                Note that a timeout is no guarantee that the deletion request
                has not reached the API server.

        Returns:
            A dictionary of the form {"ok": 1} in case of success.
            Otherwise, an exception is raised.

        Example:
            >>> admin_for_my_db.list_keyspaces()
            ['default_keyspace', 'that_other_one']
            >>> admin_for_my_db.drop_keyspace("that_other_one")
            {'ok': 1}
            >>> admin_for_my_db.list_keyspaces()
            ['default_keyspace']
        """
        logger.info("dropping keyspace")
        dn_response = self._api_commander.request(
            payload={"dropKeyspace": {"name": name}},
            timeout_info=base_timeout_info(max_time_ms),
        )
        if (dn_response.get("status") or {}).get("ok") != 1:
            raise DataAPIFaultyResponseException(
                text="Faulty response from dropKeyspace API command.",
                raw_response=dn_response,
            )
        else:
            logger.info("finished dropping keyspace")
            return {k: v for k, v in dn_response["status"].items() if k == "ok"}

    @deprecation.deprecated(  # type: ignore[misc]
        deprecated_in="1.5.0",
        removed_in="2.0.0",
        current_version=__version__,
        details=NAMESPACE_DEPRECATION_NOTICE_METHOD,
    )
    async def async_list_namespaces(
        self, *, max_time_ms: int | None = None
    ) -> list[str]:
        """
        Query the API for a list of the namespaces in the database.
        Async version of the method, for use in an asyncio context.

        *DEPRECATED* (removal in 2.0). Switch to the "async_list_keyspaces" method.**

        Args:
            max_time_ms: a timeout, in milliseconds, for the DevOps API request.

        Returns:
            A list of the namespaces, each a string, in no particular order.

        Example:
            >>> asyncio.run(admin_for_my_db.async_list_namespaces())
            ['default_keyspace', 'staging_namespace']
        """
        logger.info("getting list of namespaces, async")
        fn_response = await self._api_commander.async_request(
            payload={"findNamespaces": {}},
            timeout_info=base_timeout_info(max_time_ms),
        )
        if "namespaces" not in fn_response.get("status", {}):
            raise DataAPIFaultyResponseException(
                text="Faulty response from findNamespaces API command.",
                raw_response=fn_response,
            )
        else:
            logger.info("finished getting list of namespaces, async")
            return fn_response["status"]["namespaces"]  # type: ignore[no-any-return]

    async def async_list_keyspaces(
        self, *, max_time_ms: int | None = None
    ) -> list[str]:
        """
        Query the API for a list of the keyspaces in the database.
        Async version of the method, for use in an asyncio context.

        Args:
            max_time_ms: a timeout, in milliseconds, for the DevOps API request.

        Returns:
            A list of the keyspaces, each a string, in no particular order.

        Example:
            >>> asyncio.run(admin_for_my_db.async_list_keyspaces())
            ['default_keyspace', 'staging_keyspace']
        """
        logger.info("getting list of keyspaces, async")
        fn_response = await self._api_commander.async_request(
            payload={"findKeyspaces": {}},
            timeout_info=base_timeout_info(max_time_ms),
        )
        if "keyspaces" not in fn_response.get("status", {}):
            raise DataAPIFaultyResponseException(
                text="Faulty response from findKeyspaces API command.",
                raw_response=fn_response,
            )
        else:
            logger.info("finished getting list of keyspaces, async")
            return fn_response["status"]["keyspaces"]  # type: ignore[no-any-return]

    @deprecation.deprecated(  # type: ignore[misc]
        deprecated_in="1.5.0",
        removed_in="2.0.0",
        current_version=__version__,
        details=NAMESPACE_DEPRECATION_NOTICE_METHOD,
    )
    async def async_create_namespace(
        self,
        name: str,
        *,
        replication_options: dict[str, Any] | None = None,
        update_db_keyspace: bool | None = None,
        update_db_namespace: bool | None = None,
        max_time_ms: int | None = None,
        **kwargs: Any,
    ) -> dict[str, Any]:
        """
        Create a namespace in the database, returning {'ok': 1} if successful.
        Async version of the method, for use in an asyncio context.

        *DEPRECATED* (removal in 2.0). Switch to the "async_create_keyspace" method.**

        Args:
            name: the namespace name. If supplying a namespace that exists
                already, the method call proceeds as usual, no errors are
                raised, and the whole invocation is a no-op.
            replication_options: this dictionary can specify the options about
                replication of the namespace (across database nodes). If provided,
                it must have a structure similar to:
                `{"class": "SimpleStrategy", "replication_factor": 1}`.
            update_db_keyspace: if True, the `Database` or `AsyncDatabase` class
                that spawned this DatabaseAdmin, if any, gets updated to work on
                the newly-created keyspace starting when this method returns.
            update_db_namespace: an alias for update_db_keyspace.
                *DEPRECATED* as of v1.5.0, removal in v2.0.0.
            max_time_ms: a timeout, in milliseconds, for the whole requested
                operation to complete.
                Note that a timeout is no guarantee that the creation request
                has not reached the API server.

        Returns:
            A dictionary of the form {"ok": 1} in case of success.
            Otherwise, an exception is raised.

        Example:
            >>> admin_for_my_db.list_namespaces()
            ['default_keyspace']
            >>> asyncio.run(admin_for_my_db.async_create_namespace(
            ...     "that_other_one"
            ... ))
            {'ok': 1}
            >>> admin_for_my_db.list_namespaces()
            ['default_keyspace', 'that_other_one']
        """

        _update_db_keyspace = check_update_db_namespace_keyspace(
            update_db_keyspace=update_db_keyspace,
            update_db_namespace=update_db_namespace,
        )

        options = {
            k: v
            for k, v in {
                "replication": replication_options,
            }.items()
            if v
        }
        payload = {
            "createNamespace": {
                **{"name": name},
                **({"options": options} if options else {}),
            }
        }
        logger.info("creating namespace, async")
        cn_response = await self._api_commander.async_request(
            payload=payload,
            timeout_info=base_timeout_info(max_time_ms),
        )
        if (cn_response.get("status") or {}).get("ok") != 1:
            raise DataAPIFaultyResponseException(
                text="Faulty response from createNamespace API command.",
                raw_response=cn_response,
            )
        else:
            logger.info("finished creating namespace, async")
            if _update_db_keyspace:
                self.spawner_database.use_keyspace(name)
            return {k: v for k, v in cn_response["status"].items() if k == "ok"}

    async def async_create_keyspace(
        self,
        name: str,
        *,
        replication_options: dict[str, Any] | None = None,
        update_db_keyspace: bool | None = None,
        update_db_namespace: bool | None = None,
        max_time_ms: int | None = None,
        **kwargs: Any,
    ) -> dict[str, Any]:
        """
        Create a keyspace in the database, returning {'ok': 1} if successful.
        Async version of the method, for use in an asyncio context.

        Args:
            name: the keyspace name. If supplying a keyspace that exists
                already, the method call proceeds as usual, no errors are
                raised, and the whole invocation is a no-op.
            replication_options: this dictionary can specify the options about
                replication of the keyspace (across database nodes). If provided,
                it must have a structure similar to:
                `{"class": "SimpleStrategy", "replication_factor": 1}`.
            update_db_keyspace: if True, the `Database` or `AsyncDatabase` class
                that spawned this DatabaseAdmin, if any, gets updated to work on
                the newly-created keyspace starting when this method returns.
            update_db_namespace: an alias for update_db_keyspace.
                *DEPRECATED* as of v1.5.0, removal in v2.0.0.
            max_time_ms: a timeout, in milliseconds, for the whole requested
                operation to complete.
                Note that a timeout is no guarantee that the creation request
                has not reached the API server.

        Returns:
            A dictionary of the form {"ok": 1} in case of success.
            Otherwise, an exception is raised.

        Example:
            >>> admin_for_my_db.list_keyspaces()
            ['default_keyspace']
            >>> asyncio.run(admin_for_my_db.async_create_keyspace(
            ...     "that_other_one"
            ... ))
            {'ok': 1}
            >>> admin_for_my_db.list_leyspaces()
            ['default_keyspace', 'that_other_one']
        """

        _update_db_keyspace = check_update_db_namespace_keyspace(
            update_db_keyspace=update_db_keyspace,
            update_db_namespace=update_db_namespace,
        )

        options = {
            k: v
            for k, v in {
                "replication": replication_options,
            }.items()
            if v
        }
        payload = {
            "createKeyspace": {
                **{"name": name},
                **({"options": options} if options else {}),
            }
        }
        logger.info("creating keyspace, async")
        cn_response = await self._api_commander.async_request(
            payload=payload,
            timeout_info=base_timeout_info(max_time_ms),
        )
        if (cn_response.get("status") or {}).get("ok") != 1:
            raise DataAPIFaultyResponseException(
                text="Faulty response from createKeyspace API command.",
                raw_response=cn_response,
            )
        else:
            logger.info("finished creating keyspace, async")
            if _update_db_keyspace:
                self.spawner_database.use_keyspace(name)
            return {k: v for k, v in cn_response["status"].items() if k == "ok"}

    @deprecation.deprecated(  # type: ignore[misc]
        deprecated_in="1.5.0",
        removed_in="2.0.0",
        current_version=__version__,
        details=NAMESPACE_DEPRECATION_NOTICE_METHOD,
    )
    async def async_drop_namespace(
        self,
        name: str,
        *,
        max_time_ms: int | None = None,
    ) -> dict[str, Any]:
        """
        Drop (delete) a namespace from the database.
        Async version of the method, for use in an asyncio context.

        *DEPRECATED* (removal in 2.0). Switch to the "async_drop_keyspace" method.**

        Args:
            name: the namespace to delete. If it does not exist in this database,
                an error is raised.
            max_time_ms: a timeout, in milliseconds, for the whole requested
                operation to complete.
                Note that a timeout is no guarantee that the deletion request
                has not reached the API server.

        Returns:
            A dictionary of the form {"ok": 1} in case of success.
            Otherwise, an exception is raised.

        Example:
            >>> admin_for_my_db.list_namespaces()
            ['that_other_one', 'default_keyspace']
            >>> asyncio.run(admin_for_my_db.async_drop_namespace(
            ...     "that_other_one"
            ... ))
            {'ok': 1}
            >>> admin_for_my_db.list_namespaces()
            ['default_keyspace']
        """
        logger.info("dropping namespace, async")
        dn_response = await self._api_commander.async_request(
            payload={"dropNamespace": {"name": name}},
            timeout_info=base_timeout_info(max_time_ms),
        )
        if (dn_response.get("status") or {}).get("ok") != 1:
            raise DataAPIFaultyResponseException(
                text="Faulty response from dropNamespace API command.",
                raw_response=dn_response,
            )
        else:
            logger.info("finished dropping namespace, async")
            return {k: v for k, v in dn_response["status"].items() if k == "ok"}

    async def async_drop_keyspace(
        self,
        name: str,
        *,
        max_time_ms: int | None = None,
    ) -> dict[str, Any]:
        """
        Drop (delete) a keyspace from the database.
        Async version of the method, for use in an asyncio context.

        Args:
            name: the keyspace to delete. If it does not exist in this database,
                an error is raised.
            max_time_ms: a timeout, in milliseconds, for the whole requested
                operation to complete.
                Note that a timeout is no guarantee that the deletion request
                has not reached the API server.

        Returns:
            A dictionary of the form {"ok": 1} in case of success.
            Otherwise, an exception is raised.

        Example:
            >>> admin_for_my_db.list_keyspaces()
            ['that_other_one', 'default_keyspace']
            >>> asyncio.run(admin_for_my_db.async_drop_keyspace(
            ...     "that_other_one"
            ... ))
            {'ok': 1}
            >>> admin_for_my_db.list_keyspaces()
            ['default_keyspace']
        """
        logger.info("dropping keyspace, async")
        dn_response = await self._api_commander.async_request(
            payload={"dropKeyspace": {"name": name}},
            timeout_info=base_timeout_info(max_time_ms),
        )
        if (dn_response.get("status") or {}).get("ok") != 1:
            raise DataAPIFaultyResponseException(
                text="Faulty response from dropKeyspace API command.",
                raw_response=dn_response,
            )
        else:
            logger.info("finished dropping keyspace, async")
            return {k: v for k, v in dn_response["status"].items() if k == "ok"}

    def get_database(
        self,
        *,
        token: str | TokenProvider | None = None,
        keyspace: str | None = None,
        namespace: str | None = None,
        api_path: str | None = None,
        api_version: str | None = None,
    ) -> Database:
        """
        Create a Database instance out of this class for working with the data in it.

        Args:
            token: if supplied, is passed to the Database instead of
                the one set for this object. Useful if one wants to work in
                a least-privilege manner, limiting the permissions for non-admin work.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            keyspace: an optional keyspace to set in the resulting Database.
                If not provided, no keyspace is set, limiting what the Database
                can do until setting it with e.g. a `use_keyspace` method call.
            namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
            api_path: path to append to the API Endpoint. In typical usage, this
                should be left to its default of "".
            api_version: version specifier to append to the API path. In typical
                usage, this should be left to its default of "v1".

        Returns:
            A Database object, ready to be used for working with data and collections.

        Example:
            >>> my_db = admin_for_my_db.get_database()
            >>> my_db.list_collection_names()
            ['movies', 'another_collection']

        Note:
            creating an instance of Database does not trigger actual creation
            of the database itself, which should exist beforehand.
        """

        keyspace_param = check_namespace_keyspace(
            keyspace=keyspace,
            namespace=namespace,
        )

        # lazy importing here to avoid circular dependency
        from astrapy import Database

        return Database(
            api_endpoint=self.api_endpoint,
            token=coerce_token_provider(token) or self.token_provider,
            keyspace=keyspace_param,
            callers=self.callers,
            environment=self.environment,
            api_path=api_path,
            api_version=api_version,
        )

    def get_async_database(
        self,
        *,
        token: str | TokenProvider | None = None,
        keyspace: str | None = None,
        namespace: str | None = None,
        api_path: str | None = None,
        api_version: str | None = None,
    ) -> AsyncDatabase:
        """
        Create an AsyncDatabase instance for the database, to be used
        when doing data-level work (such as creating/managing collections).

        This method has identical behavior and signature as the sync
        counterpart `get_database`: please see that one for more details.
        """

        keyspace_param = check_namespace_keyspace(
            keyspace=keyspace,
            namespace=namespace,
        )

        return self.get_database(
            token=token,
            keyspace=keyspace_param,
            api_path=api_path,
            api_version=api_version,
        ).to_async()

    def find_embedding_providers(
        self, *, max_time_ms: int | None = None
    ) -> FindEmbeddingProvidersResult:
        """
        Query the API for the full information on available embedding providers.

        Args:
            max_time_ms: a timeout, in milliseconds, for the DevOps API request.

        Returns:
            A `FindEmbeddingProvidersResult` object with the complete information
            returned by the API about available embedding providers

        Example (output abridged and indented for clarity):
            >>> admin_for_my_db.find_embedding_providers()
            FindEmbeddingProvidersResult(embedding_providers=..., openai, ...)
            >>> admin_for_my_db.find_embedding_providers().embedding_providers
            {
                'openai': EmbeddingProvider(
                    display_name='OpenAI',
                    models=[
                        EmbeddingProviderModel(name='text-embedding-3-small'),
                        ...
                    ]
                ),
                ...
            }
        """

        logger.info("findEmbeddingProviders")
        fe_response = self._api_commander.request(
            payload={"findEmbeddingProviders": {}},
            timeout_info=base_timeout_info(max_time_ms),
        )
        if "embeddingProviders" not in fe_response.get("status", {}):
            raise DataAPIFaultyResponseException(
                text="Faulty response from findEmbeddingProviders API command.",
                raw_response=fe_response,
            )
        else:
            logger.info("finished findEmbeddingProviders")
            return FindEmbeddingProvidersResult.from_dict(fe_response["status"])

    async def async_find_embedding_providers(
        self, *, max_time_ms: int | None = None
    ) -> FindEmbeddingProvidersResult:
        """
        Query the API for the full information on available embedding providers.
        Async version of the method, for use in an asyncio context.

        Args:
            max_time_ms: a timeout, in milliseconds, for the DevOps API request.

        Returns:
            A `FindEmbeddingProvidersResult` object with the complete information
            returned by the API about available embedding providers

        Example (output abridged and indented for clarity):
            >>> admin_for_my_db.find_embedding_providers()
            FindEmbeddingProvidersResult(embedding_providers=..., openai, ...)
            >>> admin_for_my_db.find_embedding_providers().embedding_providers
            {
                'openai': EmbeddingProvider(
                    display_name='OpenAI',
                    models=[
                        EmbeddingProviderModel(name='text-embedding-3-small'),
                        ...
                    ]
                ),
                ...
            }
        """

        logger.info("findEmbeddingProviders, async")
        fe_response = await self._api_commander.async_request(
            payload={"findEmbeddingProviders": {}},
            timeout_info=base_timeout_info(max_time_ms),
        )
        if "embeddingProviders" not in fe_response.get("status", {}):
            raise DataAPIFaultyResponseException(
                text="Faulty response from findEmbeddingProviders API command.",
                raw_response=fe_response,
            )
        else:
            logger.info("finished findEmbeddingProviders, async")
            return FindEmbeddingProvidersResult.from_dict(fe_response["status"])

Ancestors

Methods

async def async_create_keyspace(self, name: str, *, replication_options: dict[str, Any] | None = None, update_db_keyspace: bool | None = None, update_db_namespace: bool | None = None, max_time_ms: int | None = None, **kwargs: Any) ‑> dict[str, typing.Any]

Create a keyspace in the database, returning {'ok': 1} if successful. Async version of the method, for use in an asyncio context.

Args

name
the keyspace name. If supplying a keyspace that exists already, the method call proceeds as usual, no errors are raised, and the whole invocation is a no-op.
replication_options
this dictionary can specify the options about replication of the keyspace (across database nodes). If provided, it must have a structure similar to: {"class": "SimpleStrategy", "replication_factor": 1}.
update_db_keyspace
if True, the Database or AsyncDatabase class that spawned this DatabaseAdmin, if any, gets updated to work on the newly-created keyspace starting when this method returns.
update_db_namespace
an alias for update_db_keyspace. DEPRECATED as of v1.5.0, removal in v2.0.0.
max_time_ms
a timeout, in milliseconds, for the whole requested operation to complete. Note that a timeout is no guarantee that the creation request has not reached the API server.

Returns

A dictionary of the form {"ok": 1} in case of success. Otherwise, an exception is raised.

Example

>>> admin_for_my_db.list_keyspaces()
['default_keyspace']
>>> asyncio.run(admin_for_my_db.async_create_keyspace(
...     "that_other_one"
... ))
{'ok': 1}
>>> admin_for_my_db.list_leyspaces()
['default_keyspace', 'that_other_one']
Expand source code
async def async_create_keyspace(
    self,
    name: str,
    *,
    replication_options: dict[str, Any] | None = None,
    update_db_keyspace: bool | None = None,
    update_db_namespace: bool | None = None,
    max_time_ms: int | None = None,
    **kwargs: Any,
) -> dict[str, Any]:
    """
    Create a keyspace in the database, returning {'ok': 1} if successful.
    Async version of the method, for use in an asyncio context.

    Args:
        name: the keyspace name. If supplying a keyspace that exists
            already, the method call proceeds as usual, no errors are
            raised, and the whole invocation is a no-op.
        replication_options: this dictionary can specify the options about
            replication of the keyspace (across database nodes). If provided,
            it must have a structure similar to:
            `{"class": "SimpleStrategy", "replication_factor": 1}`.
        update_db_keyspace: if True, the `Database` or `AsyncDatabase` class
            that spawned this DatabaseAdmin, if any, gets updated to work on
            the newly-created keyspace starting when this method returns.
        update_db_namespace: an alias for update_db_keyspace.
            *DEPRECATED* as of v1.5.0, removal in v2.0.0.
        max_time_ms: a timeout, in milliseconds, for the whole requested
            operation to complete.
            Note that a timeout is no guarantee that the creation request
            has not reached the API server.

    Returns:
        A dictionary of the form {"ok": 1} in case of success.
        Otherwise, an exception is raised.

    Example:
        >>> admin_for_my_db.list_keyspaces()
        ['default_keyspace']
        >>> asyncio.run(admin_for_my_db.async_create_keyspace(
        ...     "that_other_one"
        ... ))
        {'ok': 1}
        >>> admin_for_my_db.list_leyspaces()
        ['default_keyspace', 'that_other_one']
    """

    _update_db_keyspace = check_update_db_namespace_keyspace(
        update_db_keyspace=update_db_keyspace,
        update_db_namespace=update_db_namespace,
    )

    options = {
        k: v
        for k, v in {
            "replication": replication_options,
        }.items()
        if v
    }
    payload = {
        "createKeyspace": {
            **{"name": name},
            **({"options": options} if options else {}),
        }
    }
    logger.info("creating keyspace, async")
    cn_response = await self._api_commander.async_request(
        payload=payload,
        timeout_info=base_timeout_info(max_time_ms),
    )
    if (cn_response.get("status") or {}).get("ok") != 1:
        raise DataAPIFaultyResponseException(
            text="Faulty response from createKeyspace API command.",
            raw_response=cn_response,
        )
    else:
        logger.info("finished creating keyspace, async")
        if _update_db_keyspace:
            self.spawner_database.use_keyspace(name)
        return {k: v for k, v in cn_response["status"].items() if k == "ok"}
async def async_create_namespace(self, name: str, *, replication_options: dict[str, Any] | None = None, update_db_keyspace: bool | None = None, update_db_namespace: bool | None = None, max_time_ms: int | None = None, **kwargs: Any) ‑> dict[str, typing.Any]

Create a namespace in the database, returning {'ok': 1} if successful. Async version of the method, for use in an asyncio context.

DEPRECATED (removal in 2.0). Switch to the "async_create_keyspace" method.**

Args

name
the namespace name. If supplying a namespace that exists already, the method call proceeds as usual, no errors are raised, and the whole invocation is a no-op.
replication_options
this dictionary can specify the options about replication of the namespace (across database nodes). If provided, it must have a structure similar to: {"class": "SimpleStrategy", "replication_factor": 1}.
update_db_keyspace
if True, the Database or AsyncDatabase class that spawned this DatabaseAdmin, if any, gets updated to work on the newly-created keyspace starting when this method returns.
update_db_namespace
an alias for update_db_keyspace. DEPRECATED as of v1.5.0, removal in v2.0.0.
max_time_ms
a timeout, in milliseconds, for the whole requested operation to complete. Note that a timeout is no guarantee that the creation request has not reached the API server.

Returns

A dictionary of the form {"ok": 1} in case of success. Otherwise, an exception is raised.

Example

>>> admin_for_my_db.list_namespaces()
['default_keyspace']
>>> asyncio.run(admin_for_my_db.async_create_namespace(
...     "that_other_one"
... ))
{'ok': 1}
>>> admin_for_my_db.list_namespaces()
['default_keyspace', 'that_other_one']

Deprecated since version: 1.5.0

This will be removed in 2.0.0. The term 'namespace' is being replaced by 'keyspace' throughout the Data API and the clients. Please adapt method and parameter names consistently (examples: db_admin.findNamespaces => db_admin.findKeyspaces; collection.namespace => collection.keyspace; database.list_collections(namespace=...) => database.list_collections(keyspace=...)). See https://docs.datastax.com/en/astra-db-serverless/api-reference/client-versions.html#version-1-5 for more information.

Expand source code
@deprecation.deprecated(  # type: ignore[misc]
    deprecated_in="1.5.0",
    removed_in="2.0.0",
    current_version=__version__,
    details=NAMESPACE_DEPRECATION_NOTICE_METHOD,
)
async def async_create_namespace(
    self,
    name: str,
    *,
    replication_options: dict[str, Any] | None = None,
    update_db_keyspace: bool | None = None,
    update_db_namespace: bool | None = None,
    max_time_ms: int | None = None,
    **kwargs: Any,
) -> dict[str, Any]:
    """
    Create a namespace in the database, returning {'ok': 1} if successful.
    Async version of the method, for use in an asyncio context.

    *DEPRECATED* (removal in 2.0). Switch to the "async_create_keyspace" method.**

    Args:
        name: the namespace name. If supplying a namespace that exists
            already, the method call proceeds as usual, no errors are
            raised, and the whole invocation is a no-op.
        replication_options: this dictionary can specify the options about
            replication of the namespace (across database nodes). If provided,
            it must have a structure similar to:
            `{"class": "SimpleStrategy", "replication_factor": 1}`.
        update_db_keyspace: if True, the `Database` or `AsyncDatabase` class
            that spawned this DatabaseAdmin, if any, gets updated to work on
            the newly-created keyspace starting when this method returns.
        update_db_namespace: an alias for update_db_keyspace.
            *DEPRECATED* as of v1.5.0, removal in v2.0.0.
        max_time_ms: a timeout, in milliseconds, for the whole requested
            operation to complete.
            Note that a timeout is no guarantee that the creation request
            has not reached the API server.

    Returns:
        A dictionary of the form {"ok": 1} in case of success.
        Otherwise, an exception is raised.

    Example:
        >>> admin_for_my_db.list_namespaces()
        ['default_keyspace']
        >>> asyncio.run(admin_for_my_db.async_create_namespace(
        ...     "that_other_one"
        ... ))
        {'ok': 1}
        >>> admin_for_my_db.list_namespaces()
        ['default_keyspace', 'that_other_one']
    """

    _update_db_keyspace = check_update_db_namespace_keyspace(
        update_db_keyspace=update_db_keyspace,
        update_db_namespace=update_db_namespace,
    )

    options = {
        k: v
        for k, v in {
            "replication": replication_options,
        }.items()
        if v
    }
    payload = {
        "createNamespace": {
            **{"name": name},
            **({"options": options} if options else {}),
        }
    }
    logger.info("creating namespace, async")
    cn_response = await self._api_commander.async_request(
        payload=payload,
        timeout_info=base_timeout_info(max_time_ms),
    )
    if (cn_response.get("status") or {}).get("ok") != 1:
        raise DataAPIFaultyResponseException(
            text="Faulty response from createNamespace API command.",
            raw_response=cn_response,
        )
    else:
        logger.info("finished creating namespace, async")
        if _update_db_keyspace:
            self.spawner_database.use_keyspace(name)
        return {k: v for k, v in cn_response["status"].items() if k == "ok"}
async def async_drop_keyspace(self, name: str, *, max_time_ms: int | None = None) ‑> dict[str, typing.Any]

Drop (delete) a keyspace from the database. Async version of the method, for use in an asyncio context.

Args

name
the keyspace to delete. If it does not exist in this database, an error is raised.
max_time_ms
a timeout, in milliseconds, for the whole requested operation to complete. Note that a timeout is no guarantee that the deletion request has not reached the API server.

Returns

A dictionary of the form {"ok": 1} in case of success. Otherwise, an exception is raised.

Example

>>> admin_for_my_db.list_keyspaces()
['that_other_one', 'default_keyspace']
>>> asyncio.run(admin_for_my_db.async_drop_keyspace(
...     "that_other_one"
... ))
{'ok': 1}
>>> admin_for_my_db.list_keyspaces()
['default_keyspace']
Expand source code
async def async_drop_keyspace(
    self,
    name: str,
    *,
    max_time_ms: int | None = None,
) -> dict[str, Any]:
    """
    Drop (delete) a keyspace from the database.
    Async version of the method, for use in an asyncio context.

    Args:
        name: the keyspace to delete. If it does not exist in this database,
            an error is raised.
        max_time_ms: a timeout, in milliseconds, for the whole requested
            operation to complete.
            Note that a timeout is no guarantee that the deletion request
            has not reached the API server.

    Returns:
        A dictionary of the form {"ok": 1} in case of success.
        Otherwise, an exception is raised.

    Example:
        >>> admin_for_my_db.list_keyspaces()
        ['that_other_one', 'default_keyspace']
        >>> asyncio.run(admin_for_my_db.async_drop_keyspace(
        ...     "that_other_one"
        ... ))
        {'ok': 1}
        >>> admin_for_my_db.list_keyspaces()
        ['default_keyspace']
    """
    logger.info("dropping keyspace, async")
    dn_response = await self._api_commander.async_request(
        payload={"dropKeyspace": {"name": name}},
        timeout_info=base_timeout_info(max_time_ms),
    )
    if (dn_response.get("status") or {}).get("ok") != 1:
        raise DataAPIFaultyResponseException(
            text="Faulty response from dropKeyspace API command.",
            raw_response=dn_response,
        )
    else:
        logger.info("finished dropping keyspace, async")
        return {k: v for k, v in dn_response["status"].items() if k == "ok"}
async def async_drop_namespace(self, name: str, *, max_time_ms: int | None = None) ‑> dict[str, typing.Any]

Drop (delete) a namespace from the database. Async version of the method, for use in an asyncio context.

DEPRECATED (removal in 2.0). Switch to the "async_drop_keyspace" method.**

Args

name
the namespace to delete. If it does not exist in this database, an error is raised.
max_time_ms
a timeout, in milliseconds, for the whole requested operation to complete. Note that a timeout is no guarantee that the deletion request has not reached the API server.

Returns

A dictionary of the form {"ok": 1} in case of success. Otherwise, an exception is raised.

Example

>>> admin_for_my_db.list_namespaces()
['that_other_one', 'default_keyspace']
>>> asyncio.run(admin_for_my_db.async_drop_namespace(
...     "that_other_one"
... ))
{'ok': 1}
>>> admin_for_my_db.list_namespaces()
['default_keyspace']

Deprecated since version: 1.5.0

This will be removed in 2.0.0. The term 'namespace' is being replaced by 'keyspace' throughout the Data API and the clients. Please adapt method and parameter names consistently (examples: db_admin.findNamespaces => db_admin.findKeyspaces; collection.namespace => collection.keyspace; database.list_collections(namespace=...) => database.list_collections(keyspace=...)). See https://docs.datastax.com/en/astra-db-serverless/api-reference/client-versions.html#version-1-5 for more information.

Expand source code
@deprecation.deprecated(  # type: ignore[misc]
    deprecated_in="1.5.0",
    removed_in="2.0.0",
    current_version=__version__,
    details=NAMESPACE_DEPRECATION_NOTICE_METHOD,
)
async def async_drop_namespace(
    self,
    name: str,
    *,
    max_time_ms: int | None = None,
) -> dict[str, Any]:
    """
    Drop (delete) a namespace from the database.
    Async version of the method, for use in an asyncio context.

    *DEPRECATED* (removal in 2.0). Switch to the "async_drop_keyspace" method.**

    Args:
        name: the namespace to delete. If it does not exist in this database,
            an error is raised.
        max_time_ms: a timeout, in milliseconds, for the whole requested
            operation to complete.
            Note that a timeout is no guarantee that the deletion request
            has not reached the API server.

    Returns:
        A dictionary of the form {"ok": 1} in case of success.
        Otherwise, an exception is raised.

    Example:
        >>> admin_for_my_db.list_namespaces()
        ['that_other_one', 'default_keyspace']
        >>> asyncio.run(admin_for_my_db.async_drop_namespace(
        ...     "that_other_one"
        ... ))
        {'ok': 1}
        >>> admin_for_my_db.list_namespaces()
        ['default_keyspace']
    """
    logger.info("dropping namespace, async")
    dn_response = await self._api_commander.async_request(
        payload={"dropNamespace": {"name": name}},
        timeout_info=base_timeout_info(max_time_ms),
    )
    if (dn_response.get("status") or {}).get("ok") != 1:
        raise DataAPIFaultyResponseException(
            text="Faulty response from dropNamespace API command.",
            raw_response=dn_response,
        )
    else:
        logger.info("finished dropping namespace, async")
        return {k: v for k, v in dn_response["status"].items() if k == "ok"}
async def async_find_embedding_providers(self, *, max_time_ms: int | None = None) ‑> FindEmbeddingProvidersResult

Query the API for the full information on available embedding providers. Async version of the method, for use in an asyncio context.

Args

max_time_ms
a timeout, in milliseconds, for the DevOps API request.

Returns

A FindEmbeddingProvidersResult object with the complete information returned by the API about available embedding providers Example (output abridged and indented for clarity): >>> admin_for_my_db.find_embedding_providers() FindEmbeddingProvidersResult(embedding_providers=…, openai, …) >>> admin_for_my_db.find_embedding_providers().embedding_providers { 'openai': EmbeddingProvider( display_name='OpenAI', models=[ EmbeddingProviderModel(name='text-embedding-3-small'), … ] ), … }

Expand source code
async def async_find_embedding_providers(
    self, *, max_time_ms: int | None = None
) -> FindEmbeddingProvidersResult:
    """
    Query the API for the full information on available embedding providers.
    Async version of the method, for use in an asyncio context.

    Args:
        max_time_ms: a timeout, in milliseconds, for the DevOps API request.

    Returns:
        A `FindEmbeddingProvidersResult` object with the complete information
        returned by the API about available embedding providers

    Example (output abridged and indented for clarity):
        >>> admin_for_my_db.find_embedding_providers()
        FindEmbeddingProvidersResult(embedding_providers=..., openai, ...)
        >>> admin_for_my_db.find_embedding_providers().embedding_providers
        {
            'openai': EmbeddingProvider(
                display_name='OpenAI',
                models=[
                    EmbeddingProviderModel(name='text-embedding-3-small'),
                    ...
                ]
            ),
            ...
        }
    """

    logger.info("findEmbeddingProviders, async")
    fe_response = await self._api_commander.async_request(
        payload={"findEmbeddingProviders": {}},
        timeout_info=base_timeout_info(max_time_ms),
    )
    if "embeddingProviders" not in fe_response.get("status", {}):
        raise DataAPIFaultyResponseException(
            text="Faulty response from findEmbeddingProviders API command.",
            raw_response=fe_response,
        )
    else:
        logger.info("finished findEmbeddingProviders, async")
        return FindEmbeddingProvidersResult.from_dict(fe_response["status"])
async def async_list_keyspaces(self, *, max_time_ms: int | None = None) ‑> list[str]

Query the API for a list of the keyspaces in the database. Async version of the method, for use in an asyncio context.

Args

max_time_ms
a timeout, in milliseconds, for the DevOps API request.

Returns

A list of the keyspaces, each a string, in no particular order.

Example

>>> asyncio.run(admin_for_my_db.async_list_keyspaces())
['default_keyspace', 'staging_keyspace']
Expand source code
async def async_list_keyspaces(
    self, *, max_time_ms: int | None = None
) -> list[str]:
    """
    Query the API for a list of the keyspaces in the database.
    Async version of the method, for use in an asyncio context.

    Args:
        max_time_ms: a timeout, in milliseconds, for the DevOps API request.

    Returns:
        A list of the keyspaces, each a string, in no particular order.

    Example:
        >>> asyncio.run(admin_for_my_db.async_list_keyspaces())
        ['default_keyspace', 'staging_keyspace']
    """
    logger.info("getting list of keyspaces, async")
    fn_response = await self._api_commander.async_request(
        payload={"findKeyspaces": {}},
        timeout_info=base_timeout_info(max_time_ms),
    )
    if "keyspaces" not in fn_response.get("status", {}):
        raise DataAPIFaultyResponseException(
            text="Faulty response from findKeyspaces API command.",
            raw_response=fn_response,
        )
    else:
        logger.info("finished getting list of keyspaces, async")
        return fn_response["status"]["keyspaces"]  # type: ignore[no-any-return]
async def async_list_namespaces(self, *, max_time_ms: int | None = None) ‑> list[str]

Query the API for a list of the namespaces in the database. Async version of the method, for use in an asyncio context.

DEPRECATED (removal in 2.0). Switch to the "async_list_keyspaces" method.**

Args

max_time_ms
a timeout, in milliseconds, for the DevOps API request.

Returns

A list of the namespaces, each a string, in no particular order.

Example

>>> asyncio.run(admin_for_my_db.async_list_namespaces())
['default_keyspace', 'staging_namespace']

Deprecated since version: 1.5.0

This will be removed in 2.0.0. The term 'namespace' is being replaced by 'keyspace' throughout the Data API and the clients. Please adapt method and parameter names consistently (examples: db_admin.findNamespaces => db_admin.findKeyspaces; collection.namespace => collection.keyspace; database.list_collections(namespace=...) => database.list_collections(keyspace=...)). See https://docs.datastax.com/en/astra-db-serverless/api-reference/client-versions.html#version-1-5 for more information.

Expand source code
@deprecation.deprecated(  # type: ignore[misc]
    deprecated_in="1.5.0",
    removed_in="2.0.0",
    current_version=__version__,
    details=NAMESPACE_DEPRECATION_NOTICE_METHOD,
)
async def async_list_namespaces(
    self, *, max_time_ms: int | None = None
) -> list[str]:
    """
    Query the API for a list of the namespaces in the database.
    Async version of the method, for use in an asyncio context.

    *DEPRECATED* (removal in 2.0). Switch to the "async_list_keyspaces" method.**

    Args:
        max_time_ms: a timeout, in milliseconds, for the DevOps API request.

    Returns:
        A list of the namespaces, each a string, in no particular order.

    Example:
        >>> asyncio.run(admin_for_my_db.async_list_namespaces())
        ['default_keyspace', 'staging_namespace']
    """
    logger.info("getting list of namespaces, async")
    fn_response = await self._api_commander.async_request(
        payload={"findNamespaces": {}},
        timeout_info=base_timeout_info(max_time_ms),
    )
    if "namespaces" not in fn_response.get("status", {}):
        raise DataAPIFaultyResponseException(
            text="Faulty response from findNamespaces API command.",
            raw_response=fn_response,
        )
    else:
        logger.info("finished getting list of namespaces, async")
        return fn_response["status"]["namespaces"]  # type: ignore[no-any-return]
def create_keyspace(self, name: str, *, replication_options: dict[str, Any] | None = None, update_db_keyspace: bool | None = None, update_db_namespace: bool | None = None, max_time_ms: int | None = None, **kwargs: Any) ‑> dict[str, typing.Any]

Create a keyspace in the database, returning {'ok': 1} if successful.

Args

name
the keyspace name. If supplying a keyspace that exists already, the method call proceeds as usual, no errors are raised, and the whole invocation is a no-op.
replication_options
this dictionary can specify the options about replication of the keyspace (across database nodes). If provided, it must have a structure similar to: {"class": "SimpleStrategy", "replication_factor": 1}.
update_db_keyspace
if True, the Database or AsyncDatabase class that spawned this DatabaseAdmin, if any, gets updated to work on the newly-created keyspace starting when this method returns.
update_db_namespace
an alias for update_db_keyspace. DEPRECATED as of v1.5.0, removal in v2.0.0.
max_time_ms
a timeout, in milliseconds, for the whole requested operation to complete. Note that a timeout is no guarantee that the creation request has not reached the API server.

Returns

A dictionary of the form {"ok": 1} in case of success. Otherwise, an exception is raised.

Example

>>> admin_for_my_db.list_keyspaces()
['default_keyspace']
>>> admin_for_my_db.create_keyspace("that_other_one")
{'ok': 1}
>>> admin_for_my_db.list_keyspaces()
['default_keyspace', 'that_other_one']
Expand source code
def create_keyspace(
    self,
    name: str,
    *,
    replication_options: dict[str, Any] | None = None,
    update_db_keyspace: bool | None = None,
    update_db_namespace: bool | None = None,
    max_time_ms: int | None = None,
    **kwargs: Any,
) -> dict[str, Any]:
    """
    Create a keyspace in the database, returning {'ok': 1} if successful.

    Args:
        name: the keyspace name. If supplying a keyspace that exists
            already, the method call proceeds as usual, no errors are
            raised, and the whole invocation is a no-op.
        replication_options: this dictionary can specify the options about
            replication of the keyspace (across database nodes). If provided,
            it must have a structure similar to:
            `{"class": "SimpleStrategy", "replication_factor": 1}`.
        update_db_keyspace: if True, the `Database` or `AsyncDatabase` class
            that spawned this DatabaseAdmin, if any, gets updated to work on
            the newly-created keyspace starting when this method returns.
        update_db_namespace: an alias for update_db_keyspace.
            *DEPRECATED* as of v1.5.0, removal in v2.0.0.
        max_time_ms: a timeout, in milliseconds, for the whole requested
            operation to complete.
            Note that a timeout is no guarantee that the creation request
            has not reached the API server.

    Returns:
        A dictionary of the form {"ok": 1} in case of success.
        Otherwise, an exception is raised.

    Example:
        >>> admin_for_my_db.list_keyspaces()
        ['default_keyspace']
        >>> admin_for_my_db.create_keyspace("that_other_one")
        {'ok': 1}
        >>> admin_for_my_db.list_keyspaces()
        ['default_keyspace', 'that_other_one']
    """

    _update_db_keyspace = check_update_db_namespace_keyspace(
        update_db_keyspace=update_db_keyspace,
        update_db_namespace=update_db_namespace,
    )

    options = {
        k: v
        for k, v in {
            "replication": replication_options,
        }.items()
        if v
    }
    payload = {
        "createKeyspace": {
            **{"name": name},
            **({"options": options} if options else {}),
        }
    }
    logger.info("creating keyspace")
    cn_response = self._api_commander.request(
        payload=payload,
        timeout_info=base_timeout_info(max_time_ms),
    )
    if (cn_response.get("status") or {}).get("ok") != 1:
        raise DataAPIFaultyResponseException(
            text="Faulty response from createKeyspace API command.",
            raw_response=cn_response,
        )
    else:
        logger.info("finished creating keyspace")
        if _update_db_keyspace:
            self.spawner_database.use_keyspace(name)
        return {k: v for k, v in cn_response["status"].items() if k == "ok"}
def create_namespace(self, name: str, *, replication_options: dict[str, Any] | None = None, update_db_keyspace: bool | None = None, update_db_namespace: bool | None = None, max_time_ms: int | None = None, **kwargs: Any) ‑> dict[str, typing.Any]

Create a namespace in the database, returning {'ok': 1} if successful.

DEPRECATED (removal in 2.0). Switch to the "create_keyspace" method.**

Args

name
the namespace name. If supplying a namespace that exists already, the method call proceeds as usual, no errors are raised, and the whole invocation is a no-op.
replication_options
this dictionary can specify the options about replication of the namespace (across database nodes). If provided, it must have a structure similar to: {"class": "SimpleStrategy", "replication_factor": 1}.
update_db_keyspace
if True, the Database or AsyncDatabase class that spawned this DatabaseAdmin, if any, gets updated to work on the newly-created keyspace starting when this method returns.
update_db_namespace
an alias for update_db_keyspace. DEPRECATED as of v1.5.0, removal in v2.0.0.
max_time_ms
a timeout, in milliseconds, for the whole requested operation to complete. Note that a timeout is no guarantee that the creation request has not reached the API server.

Returns

A dictionary of the form {"ok": 1} in case of success. Otherwise, an exception is raised.

Example

>>> admin_for_my_db.list_namespaces()
['default_keyspace']
>>> admin_for_my_db.create_namespace("that_other_one")
{'ok': 1}
>>> admin_for_my_db.list_namespaces()
['default_keyspace', 'that_other_one']

Deprecated since version: 1.5.0

This will be removed in 2.0.0. The term 'namespace' is being replaced by 'keyspace' throughout the Data API and the clients. Please adapt method and parameter names consistently (examples: db_admin.findNamespaces => db_admin.findKeyspaces; collection.namespace => collection.keyspace; database.list_collections(namespace=...) => database.list_collections(keyspace=...)). See https://docs.datastax.com/en/astra-db-serverless/api-reference/client-versions.html#version-1-5 for more information.

Expand source code
@deprecation.deprecated(  # type: ignore[misc]
    deprecated_in="1.5.0",
    removed_in="2.0.0",
    current_version=__version__,
    details=NAMESPACE_DEPRECATION_NOTICE_METHOD,
)
def create_namespace(
    self,
    name: str,
    *,
    replication_options: dict[str, Any] | None = None,
    update_db_keyspace: bool | None = None,
    update_db_namespace: bool | None = None,
    max_time_ms: int | None = None,
    **kwargs: Any,
) -> dict[str, Any]:
    """
    Create a namespace in the database, returning {'ok': 1} if successful.

    *DEPRECATED* (removal in 2.0). Switch to the "create_keyspace" method.**

    Args:
        name: the namespace name. If supplying a namespace that exists
            already, the method call proceeds as usual, no errors are
            raised, and the whole invocation is a no-op.
        replication_options: this dictionary can specify the options about
            replication of the namespace (across database nodes). If provided,
            it must have a structure similar to:
            `{"class": "SimpleStrategy", "replication_factor": 1}`.
        update_db_keyspace: if True, the `Database` or `AsyncDatabase` class
            that spawned this DatabaseAdmin, if any, gets updated to work on
            the newly-created keyspace starting when this method returns.
        update_db_namespace: an alias for update_db_keyspace.
            *DEPRECATED* as of v1.5.0, removal in v2.0.0.
        max_time_ms: a timeout, in milliseconds, for the whole requested
            operation to complete.
            Note that a timeout is no guarantee that the creation request
            has not reached the API server.

    Returns:
        A dictionary of the form {"ok": 1} in case of success.
        Otherwise, an exception is raised.

    Example:
        >>> admin_for_my_db.list_namespaces()
        ['default_keyspace']
        >>> admin_for_my_db.create_namespace("that_other_one")
        {'ok': 1}
        >>> admin_for_my_db.list_namespaces()
        ['default_keyspace', 'that_other_one']
    """

    _update_db_keyspace = check_update_db_namespace_keyspace(
        update_db_keyspace=update_db_keyspace,
        update_db_namespace=update_db_namespace,
    )

    options = {
        k: v
        for k, v in {
            "replication": replication_options,
        }.items()
        if v
    }
    payload = {
        "createNamespace": {
            **{"name": name},
            **({"options": options} if options else {}),
        }
    }
    logger.info("creating namespace")
    cn_response = self._api_commander.request(
        payload=payload,
        timeout_info=base_timeout_info(max_time_ms),
    )
    if (cn_response.get("status") or {}).get("ok") != 1:
        raise DataAPIFaultyResponseException(
            text="Faulty response from createNamespace API command.",
            raw_response=cn_response,
        )
    else:
        logger.info("finished creating namespace")
        if _update_db_keyspace:
            self.spawner_database.use_keyspace(name)
        return {k: v for k, v in cn_response["status"].items() if k == "ok"}
def drop_keyspace(self, name: str, *, max_time_ms: int | None = None) ‑> dict[str, typing.Any]

Drop (delete) a keyspace from the database.

Args

name
the keyspace to delete. If it does not exist in this database, an error is raised.
max_time_ms
a timeout, in milliseconds, for the whole requested operation to complete. Note that a timeout is no guarantee that the deletion request has not reached the API server.

Returns

A dictionary of the form {"ok": 1} in case of success. Otherwise, an exception is raised.

Example

>>> admin_for_my_db.list_keyspaces()
['default_keyspace', 'that_other_one']
>>> admin_for_my_db.drop_keyspace("that_other_one")
{'ok': 1}
>>> admin_for_my_db.list_keyspaces()
['default_keyspace']
Expand source code
def drop_keyspace(
    self,
    name: str,
    *,
    max_time_ms: int | None = None,
) -> dict[str, Any]:
    """
    Drop (delete) a keyspace from the database.

    Args:
        name: the keyspace to delete. If it does not exist in this database,
            an error is raised.
        max_time_ms: a timeout, in milliseconds, for the whole requested
            operation to complete.
            Note that a timeout is no guarantee that the deletion request
            has not reached the API server.

    Returns:
        A dictionary of the form {"ok": 1} in case of success.
        Otherwise, an exception is raised.

    Example:
        >>> admin_for_my_db.list_keyspaces()
        ['default_keyspace', 'that_other_one']
        >>> admin_for_my_db.drop_keyspace("that_other_one")
        {'ok': 1}
        >>> admin_for_my_db.list_keyspaces()
        ['default_keyspace']
    """
    logger.info("dropping keyspace")
    dn_response = self._api_commander.request(
        payload={"dropKeyspace": {"name": name}},
        timeout_info=base_timeout_info(max_time_ms),
    )
    if (dn_response.get("status") or {}).get("ok") != 1:
        raise DataAPIFaultyResponseException(
            text="Faulty response from dropKeyspace API command.",
            raw_response=dn_response,
        )
    else:
        logger.info("finished dropping keyspace")
        return {k: v for k, v in dn_response["status"].items() if k == "ok"}
def drop_namespace(self, name: str, *, max_time_ms: int | None = None) ‑> dict[str, typing.Any]

Drop (delete) a namespace from the database.

DEPRECATED (removal in 2.0). Switch to the "drop_namespace" method.**

Args

name
the namespace to delete. If it does not exist in this database, an error is raised.
max_time_ms
a timeout, in milliseconds, for the whole requested operation to complete. Note that a timeout is no guarantee that the deletion request has not reached the API server.

Returns

A dictionary of the form {"ok": 1} in case of success. Otherwise, an exception is raised.

Example

>>> admin_for_my_db.list_namespaces()
['default_keyspace', 'that_other_one']
>>> admin_for_my_db.drop_namespace("that_other_one")
{'ok': 1}
>>> admin_for_my_db.list_namespaces()
['default_keyspace']

Deprecated since version: 1.5.0

This will be removed in 2.0.0. The term 'namespace' is being replaced by 'keyspace' throughout the Data API and the clients. Please adapt method and parameter names consistently (examples: db_admin.findNamespaces => db_admin.findKeyspaces; collection.namespace => collection.keyspace; database.list_collections(namespace=...) => database.list_collections(keyspace=...)). See https://docs.datastax.com/en/astra-db-serverless/api-reference/client-versions.html#version-1-5 for more information.

Expand source code
@deprecation.deprecated(  # type: ignore[misc]
    deprecated_in="1.5.0",
    removed_in="2.0.0",
    current_version=__version__,
    details=NAMESPACE_DEPRECATION_NOTICE_METHOD,
)
def drop_namespace(
    self,
    name: str,
    *,
    max_time_ms: int | None = None,
) -> dict[str, Any]:
    """
    Drop (delete) a namespace from the database.

    *DEPRECATED* (removal in 2.0). Switch to the "drop_namespace" method.**

    Args:
        name: the namespace to delete. If it does not exist in this database,
            an error is raised.
        max_time_ms: a timeout, in milliseconds, for the whole requested
            operation to complete.
            Note that a timeout is no guarantee that the deletion request
            has not reached the API server.

    Returns:
        A dictionary of the form {"ok": 1} in case of success.
        Otherwise, an exception is raised.

    Example:
        >>> admin_for_my_db.list_namespaces()
        ['default_keyspace', 'that_other_one']
        >>> admin_for_my_db.drop_namespace("that_other_one")
        {'ok': 1}
        >>> admin_for_my_db.list_namespaces()
        ['default_keyspace']
    """
    logger.info("dropping namespace")
    dn_response = self._api_commander.request(
        payload={"dropNamespace": {"name": name}},
        timeout_info=base_timeout_info(max_time_ms),
    )
    if (dn_response.get("status") or {}).get("ok") != 1:
        raise DataAPIFaultyResponseException(
            text="Faulty response from dropNamespace API command.",
            raw_response=dn_response,
        )
    else:
        logger.info("finished dropping namespace")
        return {k: v for k, v in dn_response["status"].items() if k == "ok"}
def find_embedding_providers(self, *, max_time_ms: int | None = None) ‑> FindEmbeddingProvidersResult

Query the API for the full information on available embedding providers.

Args

max_time_ms
a timeout, in milliseconds, for the DevOps API request.

Returns

A FindEmbeddingProvidersResult object with the complete information returned by the API about available embedding providers Example (output abridged and indented for clarity): >>> admin_for_my_db.find_embedding_providers() FindEmbeddingProvidersResult(embedding_providers=…, openai, …) >>> admin_for_my_db.find_embedding_providers().embedding_providers { 'openai': EmbeddingProvider( display_name='OpenAI', models=[ EmbeddingProviderModel(name='text-embedding-3-small'), … ] ), … }

Expand source code
def find_embedding_providers(
    self, *, max_time_ms: int | None = None
) -> FindEmbeddingProvidersResult:
    """
    Query the API for the full information on available embedding providers.

    Args:
        max_time_ms: a timeout, in milliseconds, for the DevOps API request.

    Returns:
        A `FindEmbeddingProvidersResult` object with the complete information
        returned by the API about available embedding providers

    Example (output abridged and indented for clarity):
        >>> admin_for_my_db.find_embedding_providers()
        FindEmbeddingProvidersResult(embedding_providers=..., openai, ...)
        >>> admin_for_my_db.find_embedding_providers().embedding_providers
        {
            'openai': EmbeddingProvider(
                display_name='OpenAI',
                models=[
                    EmbeddingProviderModel(name='text-embedding-3-small'),
                    ...
                ]
            ),
            ...
        }
    """

    logger.info("findEmbeddingProviders")
    fe_response = self._api_commander.request(
        payload={"findEmbeddingProviders": {}},
        timeout_info=base_timeout_info(max_time_ms),
    )
    if "embeddingProviders" not in fe_response.get("status", {}):
        raise DataAPIFaultyResponseException(
            text="Faulty response from findEmbeddingProviders API command.",
            raw_response=fe_response,
        )
    else:
        logger.info("finished findEmbeddingProviders")
        return FindEmbeddingProvidersResult.from_dict(fe_response["status"])
def get_async_database(self, *, token: str | TokenProvider | None = None, keyspace: str | None = None, namespace: str | None = None, api_path: str | None = None, api_version: str | None = None) ‑> AsyncDatabase

Create an AsyncDatabase instance for the database, to be used when doing data-level work (such as creating/managing collections).

This method has identical behavior and signature as the sync counterpart get_database: please see that one for more details.

Expand source code
def get_async_database(
    self,
    *,
    token: str | TokenProvider | None = None,
    keyspace: str | None = None,
    namespace: str | None = None,
    api_path: str | None = None,
    api_version: str | None = None,
) -> AsyncDatabase:
    """
    Create an AsyncDatabase instance for the database, to be used
    when doing data-level work (such as creating/managing collections).

    This method has identical behavior and signature as the sync
    counterpart `get_database`: please see that one for more details.
    """

    keyspace_param = check_namespace_keyspace(
        keyspace=keyspace,
        namespace=namespace,
    )

    return self.get_database(
        token=token,
        keyspace=keyspace_param,
        api_path=api_path,
        api_version=api_version,
    ).to_async()
def get_database(self, *, token: str | TokenProvider | None = None, keyspace: str | None = None, namespace: str | None = None, api_path: str | None = None, api_version: str | None = None) ‑> Database

Create a Database instance out of this class for working with the data in it.

Args

token
if supplied, is passed to the Database instead of the one set for this object. Useful if one wants to work in a least-privilege manner, limiting the permissions for non-admin work. This can be either a literal token string or a subclass of TokenProvider.
keyspace
an optional keyspace to set in the resulting Database. If not provided, no keyspace is set, limiting what the Database can do until setting it with e.g. a use_keyspace method call.
namespace
an alias for keyspace. DEPRECATED, removal in 2.0.
api_path
path to append to the API Endpoint. In typical usage, this should be left to its default of "".
api_version
version specifier to append to the API path. In typical usage, this should be left to its default of "v1".

Returns

A Database object, ready to be used for working with data and collections.

Example

>>> my_db = admin_for_my_db.get_database()
>>> my_db.list_collection_names()
['movies', 'another_collection']

Note

creating an instance of Database does not trigger actual creation of the database itself, which should exist beforehand.

Expand source code
def get_database(
    self,
    *,
    token: str | TokenProvider | None = None,
    keyspace: str | None = None,
    namespace: str | None = None,
    api_path: str | None = None,
    api_version: str | None = None,
) -> Database:
    """
    Create a Database instance out of this class for working with the data in it.

    Args:
        token: if supplied, is passed to the Database instead of
            the one set for this object. Useful if one wants to work in
            a least-privilege manner, limiting the permissions for non-admin work.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        keyspace: an optional keyspace to set in the resulting Database.
            If not provided, no keyspace is set, limiting what the Database
            can do until setting it with e.g. a `use_keyspace` method call.
        namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
        api_path: path to append to the API Endpoint. In typical usage, this
            should be left to its default of "".
        api_version: version specifier to append to the API path. In typical
            usage, this should be left to its default of "v1".

    Returns:
        A Database object, ready to be used for working with data and collections.

    Example:
        >>> my_db = admin_for_my_db.get_database()
        >>> my_db.list_collection_names()
        ['movies', 'another_collection']

    Note:
        creating an instance of Database does not trigger actual creation
        of the database itself, which should exist beforehand.
    """

    keyspace_param = check_namespace_keyspace(
        keyspace=keyspace,
        namespace=namespace,
    )

    # lazy importing here to avoid circular dependency
    from astrapy import Database

    return Database(
        api_endpoint=self.api_endpoint,
        token=coerce_token_provider(token) or self.token_provider,
        keyspace=keyspace_param,
        callers=self.callers,
        environment=self.environment,
        api_path=api_path,
        api_version=api_version,
    )
def list_keyspaces(self, *, max_time_ms: int | None = None) ‑> list[str]

Query the API for a list of the keyspaces in the database.

Args

max_time_ms
a timeout, in milliseconds, for the DevOps API request.

Returns

A list of the keyspaces, each a string, in no particular order.

Example

>>> admin_for_my_db.list_keyspaces()
['default_keyspace', 'staging_keyspace']
Expand source code
def list_keyspaces(self, *, max_time_ms: int | None = None) -> list[str]:
    """
    Query the API for a list of the keyspaces in the database.

    Args:
        max_time_ms: a timeout, in milliseconds, for the DevOps API request.

    Returns:
        A list of the keyspaces, each a string, in no particular order.

    Example:
        >>> admin_for_my_db.list_keyspaces()
        ['default_keyspace', 'staging_keyspace']
    """
    logger.info("getting list of keyspaces")
    fn_response = self._api_commander.request(
        payload={"findKeyspaces": {}},
        timeout_info=base_timeout_info(max_time_ms),
    )
    if "keyspaces" not in fn_response.get("status", {}):
        raise DataAPIFaultyResponseException(
            text="Faulty response from findKeyspaces API command.",
            raw_response=fn_response,
        )
    else:
        logger.info("finished getting list of keyspaces")
        return fn_response["status"]["keyspaces"]  # type: ignore[no-any-return]
def list_namespaces(self, *, max_time_ms: int | None = None) ‑> list[str]

Query the API for a list of the namespaces in the database.

DEPRECATED (removal in 2.0). Switch to the "list_keyspaces" method.**

Args

max_time_ms
a timeout, in milliseconds, for the DevOps API request.

Returns

A list of the namespaces, each a string, in no particular order.

Example

>>> admin_for_my_db.list_namespaces()
['default_keyspace', 'staging_namespace']

Deprecated since version: 1.5.0

This will be removed in 2.0.0. The term 'namespace' is being replaced by 'keyspace' throughout the Data API and the clients. Please adapt method and parameter names consistently (examples: db_admin.findNamespaces => db_admin.findKeyspaces; collection.namespace => collection.keyspace; database.list_collections(namespace=...) => database.list_collections(keyspace=...)). See https://docs.datastax.com/en/astra-db-serverless/api-reference/client-versions.html#version-1-5 for more information.

Expand source code
@deprecation.deprecated(  # type: ignore[misc]
    deprecated_in="1.5.0",
    removed_in="2.0.0",
    current_version=__version__,
    details=NAMESPACE_DEPRECATION_NOTICE_METHOD,
)
def list_namespaces(self, *, max_time_ms: int | None = None) -> list[str]:
    """
    Query the API for a list of the namespaces in the database.

    *DEPRECATED* (removal in 2.0). Switch to the "list_keyspaces" method.**

    Args:
        max_time_ms: a timeout, in milliseconds, for the DevOps API request.

    Returns:
        A list of the namespaces, each a string, in no particular order.

    Example:
        >>> admin_for_my_db.list_namespaces()
        ['default_keyspace', 'staging_namespace']
    """
    logger.info("getting list of namespaces")
    fn_response = self._api_commander.request(
        payload={"findNamespaces": {}},
        timeout_info=base_timeout_info(max_time_ms),
    )
    if "namespaces" not in fn_response.get("status", {}):
        raise DataAPIFaultyResponseException(
            text="Faulty response from findNamespaces API command.",
            raw_response=fn_response,
        )
    else:
        logger.info("finished getting list of namespaces")
        return fn_response["status"]["namespaces"]  # type: ignore[no-any-return]
def set_caller(self, caller_name: str | None = None, caller_version: str | None = None) ‑> None

Set a new identity for the application/framework on behalf of which the DevOps API calls will be performed (the "caller").

New objects spawned from this client afterwards will inherit the new settings.

Args

caller_name
name of the application, or framework, on behalf of which the DevOps API calls are performed. This ends up in the request user-agent.
caller_version
version of the caller.

Example

>>> admin_for_my_db.set_caller(
...     caller_name="the_caller",
...     caller_version="0.1.0",
... )

Deprecated since version: 1.5.1

This will be removed in 2.0.0. Please provide the caller(s) at constructor time through the callers list parameter.

Expand source code
@deprecation.deprecated(  # type: ignore[misc]
    deprecated_in="1.5.1",
    removed_in="2.0.0",
    current_version=__version__,
    details=SET_CALLER_DEPRECATION_NOTICE,
)
def set_caller(
    self,
    caller_name: str | None = None,
    caller_version: str | None = None,
) -> None:
    """
    Set a new identity for the application/framework on behalf of which
    the DevOps API calls will be performed (the "caller").

    New objects spawned from this client afterwards will inherit the new settings.

    Args:
        caller_name: name of the application, or framework, on behalf of which
            the DevOps API calls are performed. This ends up in the request user-agent.
        caller_version: version of the caller.

    Example:
        >>> admin_for_my_db.set_caller(
        ...     caller_name="the_caller",
        ...     caller_version="0.1.0",
        ... )
    """

    logger.info(f"setting caller to {caller_name}/{caller_version}")
    callers_param = check_caller_parameters([], caller_name, caller_version)
    self.callers = callers_param
    self._api_commander = self._get_api_commander()
def with_options(self, *, api_endpoint: str | None = None, token: str | TokenProvider | None = None, callers: Sequence[CallerType] = [], caller_name: str | None = None, caller_version: str | None = None) ‑> DataAPIDatabaseAdmin

Create a clone of this DataAPIDatabaseAdmin with some changed attributes.

Args

api_endpoint
the full URI to access the Data API, e.g. "http://localhost:8181".
token
an access token with enough permission to perform admin tasks. This can be either a literal token string or a subclass of TokenProvider.
callers
a list of caller identities, i.e. applications, or frameworks, on behalf of which Data API calls are performed. These end up in the request user-agent. Each caller identity is a ("caller_name", "caller_version") pair.
caller_name
DEPRECATED, use callers. Removal 2.0. Name of the application, or framework, on behalf of which the Data API calls are performed. This ends up in the request user-agent.
caller_version
version of the caller. DEPRECATED, use callers. Removal 2.0.

Returns

a new DataAPIDatabaseAdmin instance.

Example

>>> admin_for_my_other_db = admin_for_my_db.with_options(
...     api_endpoint="http://10.1.1.5:8181",
... )
Expand source code
def with_options(
    self,
    *,
    api_endpoint: str | None = None,
    token: str | TokenProvider | None = None,
    callers: Sequence[CallerType] = [],
    caller_name: str | None = None,
    caller_version: str | None = None,
) -> DataAPIDatabaseAdmin:
    """
    Create a clone of this DataAPIDatabaseAdmin with some changed attributes.

    Args:
        api_endpoint: the full URI to access the Data API,
            e.g. "http://localhost:8181".
        token: an access token with enough permission to perform admin tasks.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        callers: a list of caller identities, i.e. applications, or frameworks,
            on behalf of which Data API calls are performed. These end up in the
            request user-agent.
            Each caller identity is a ("caller_name", "caller_version") pair.
        caller_name: *DEPRECATED*, use `callers`. Removal 2.0. Name of the
            application, or framework, on behalf of which the Data API calls
            are performed. This ends up in the request user-agent.
        caller_version: version of the caller. *DEPRECATED*, use `callers`.
            Removal 2.0.

    Returns:
        a new DataAPIDatabaseAdmin instance.

    Example:
        >>> admin_for_my_other_db = admin_for_my_db.with_options(
        ...     api_endpoint="http://10.1.1.5:8181",
        ... )
    """

    callers_param = check_caller_parameters(callers, caller_name, caller_version)
    return self._copy(
        api_endpoint=api_endpoint,
        token=token,
        callers=callers_param,
    )
class Database (api_endpoint: str, token: str | TokenProvider | None = None, *, keyspace: str | None = None, namespace: str | None = None, callers: Sequence[CallerType] = [], caller_name: str | None = None, caller_version: str | None = None, environment: str | None = None, api_path: str | None = None, api_version: str | None = None)

A Data API database. This is the object for doing database-level DML, such as creating/deleting collections, and for obtaining Collection objects themselves. This class has a synchronous interface.

The usual way of obtaining one Database is through the get_database method of a DataAPIClient.

On Astra DB, a Database comes with an "API Endpoint", which implies a Database object instance reaches a specific region (relevant point in case of multi-region databases).

Args

api_endpoint
the full "API Endpoint" string used to reach the Data API. Example: "https://-.apps.astra.datastax.com"
token
an Access Token to the database. Example: "AstraCS:xyz…" This can be either a literal token string or a subclass of TokenProvider.
keyspace
this is the keyspace all method calls will target, unless one is explicitly specified in the call. If no keyspace is supplied when creating a Database, on Astra DB the name "default_keyspace" is set, while on other environments the keyspace is left unspecified: in this case, most operations are unavailable until a keyspace is set (through an explicit use_keyspace invocation or equivalent).
namespace
an alias for keyspace. DEPRECATED, removal in 2.0.
callers
a list of caller identities, i.e. applications, or frameworks, on behalf of which the Data API calls are performed. These end up in the request user-agent. Each caller identity is a ("caller_name", "caller_version") pair.
caller_name
DEPRECATED, use callers. Removal 2.0. Name of the application, or framework, on behalf of which the Data API calls are performed. This ends up in the request user-agent.
caller_version
version of the caller. DEPRECATED, use callers. Removal 2.0.
environment
a string representing the target Data API environment. It can be left unspecified for the default value of Environment.PROD; other values include Environment.OTHER, Environment.DSE.
api_path
path to append to the API Endpoint. In typical usage, this should be left to its default (sensibly chosen based on the environment).
api_version
version specifier to append to the API path. In typical usage, this should be left to its default of "v1".

Example

>>> from astrapy import DataAPIClient
>>> my_client = astrapy.DataAPIClient("AstraCS:...")
>>> my_db = my_client.get_database(
...    "https://01234567-....apps.astra.datastax.com"
... )

Note

creating an instance of Database does not trigger actual creation of the database itself, which should exist beforehand. To create databases, see the AstraDBAdmin class.

Expand source code
class Database:
    """
    A Data API database. This is the object for doing database-level
    DML, such as creating/deleting collections, and for obtaining Collection
    objects themselves. This class has a synchronous interface.

    The usual way of obtaining one Database is through the `get_database`
    method of a `DataAPIClient`.

    On Astra DB, a Database comes with an "API Endpoint", which implies
    a Database object instance reaches a specific region (relevant point in
    case of multi-region databases).

    Args:
        api_endpoint: the full "API Endpoint" string used to reach the Data API.
            Example: "https://<database_id>-<region>.apps.astra.datastax.com"
        token: an Access Token to the database. Example: "AstraCS:xyz..."
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        keyspace: this is the keyspace all method calls will target, unless
            one is explicitly specified in the call. If no keyspace is supplied
            when creating a Database, on Astra DB the name "default_keyspace" is set,
            while on other environments the keyspace is left unspecified: in this case,
            most operations are unavailable until a keyspace is set (through an explicit
            `use_keyspace` invocation or equivalent).
        namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
        callers: a list of caller identities, i.e. applications, or frameworks,
            on behalf of which the Data API calls are performed. These end up
            in the request user-agent.
            Each caller identity is a ("caller_name", "caller_version") pair.
        caller_name: *DEPRECATED*, use `callers`. Removal 2.0. Name of the
            application, or framework, on behalf of which the Data API calls
            are performed. This ends up in the request user-agent.
        caller_version: version of the caller. *DEPRECATED*, use `callers`. Removal 2.0.
        environment: a string representing the target Data API environment.
            It can be left unspecified for the default value of `Environment.PROD`;
            other values include `Environment.OTHER`, `Environment.DSE`.
        api_path: path to append to the API Endpoint. In typical usage, this
            should be left to its default (sensibly chosen based on the environment).
        api_version: version specifier to append to the API path. In typical
            usage, this should be left to its default of "v1".

    Example:
        >>> from astrapy import DataAPIClient
        >>> my_client = astrapy.DataAPIClient("AstraCS:...")
        >>> my_db = my_client.get_database(
        ...    "https://01234567-....apps.astra.datastax.com"
        ... )

    Note:
        creating an instance of Database does not trigger actual creation
        of the database itself, which should exist beforehand. To create databases,
        see the AstraDBAdmin class.
    """

    def __init__(
        self,
        api_endpoint: str,
        token: str | TokenProvider | None = None,
        *,
        keyspace: str | None = None,
        namespace: str | None = None,
        callers: Sequence[CallerType] = [],
        caller_name: str | None = None,
        caller_version: str | None = None,
        environment: str | None = None,
        api_path: str | None = None,
        api_version: str | None = None,
    ) -> None:
        callers_param = check_caller_parameters(callers, caller_name, caller_version)
        keyspace_param = check_namespace_keyspace(
            keyspace=keyspace,
            namespace=namespace,
        )
        self.environment = (environment or Environment.PROD).lower()
        #
        _api_path: str | None
        _api_version: str | None
        if api_path is None:
            _api_path = API_PATH_ENV_MAP[self.environment]
        else:
            _api_path = api_path
        if api_version is None:
            _api_version = API_VERSION_ENV_MAP[self.environment]
        else:
            _api_version = api_version
        self.token_provider = coerce_token_provider(token)
        self.api_endpoint = api_endpoint.strip("/")
        self.api_path = _api_path
        self.api_version = _api_version

        # enforce defaults if on Astra DB:
        self._using_keyspace: str | None
        if keyspace_param is None and self.environment in Environment.astra_db_values:
            self._using_keyspace = DEFAULT_ASTRA_DB_KEYSPACE
        else:
            self._using_keyspace = keyspace_param

        self._commander_headers = {
            DEFAULT_DATA_API_AUTH_HEADER: self.token_provider.get_token(),
        }

        self.callers = callers_param
        self._api_commander = self._get_api_commander(keyspace=self.keyspace)
        self._name: str | None = None

    def __getattr__(self, collection_name: str) -> Collection:
        return self.get_collection(name=collection_name)

    def __getitem__(self, collection_name: str) -> Collection:
        return self.get_collection(name=collection_name)

    def __repr__(self) -> str:
        ep_desc = f'api_endpoint="{self.api_endpoint}"'
        token_desc: str | None
        if self.token_provider:
            token_desc = f'token="{redact_secret(str(self.token_provider), 15)}"'
        else:
            token_desc = None
        keyspace_desc: str | None
        if self.keyspace is None:
            keyspace_desc = "keyspace not set"
        else:
            keyspace_desc = f'keyspace="{self.keyspace}"'
        parts = [pt for pt in [ep_desc, token_desc, keyspace_desc] if pt is not None]
        return f"{self.__class__.__name__}({', '.join(parts)})"

    def __eq__(self, other: Any) -> bool:
        if isinstance(other, Database):
            return all(
                [
                    self.token_provider == other.token_provider,
                    self.api_endpoint == other.api_endpoint,
                    self.api_path == other.api_path,
                    self.api_version == other.api_version,
                    self.keyspace == other.keyspace,
                    self.callers == other.callers,
                    self.api_commander == other.api_commander,
                ]
            )
        else:
            return False

    def _get_api_commander(self, keyspace: str | None) -> APICommander | None:
        """
        Instantiate a new APICommander based on the properties of this class
        and a provided keyspace.

        If keyspace is None, return None (signaling a "keyspace not set").
        """

        if keyspace is None:
            return None
        else:
            base_path_components = [
                comp
                for comp in (
                    self.api_path.strip("/"),
                    self.api_version.strip("/"),
                    keyspace,
                )
                if comp != ""
            ]
            base_path = f"/{'/'.join(base_path_components)}"
            api_commander = APICommander(
                api_endpoint=self.api_endpoint,
                path=base_path,
                headers=self._commander_headers,
                callers=self.callers,
            )
            return api_commander

    def _get_driver_commander(self, keyspace: str | None) -> APICommander:
        """
        Building on _get_api_commander, fall back to class keyspace in
        creating/returning a commander, and in any case raise an error if not set.
        """
        driver_commander: APICommander | None
        if keyspace:
            driver_commander = self._get_api_commander(keyspace=keyspace)
        else:
            driver_commander = self._api_commander
        if driver_commander is None:
            raise ValueError(
                "No keyspace specified. This operation requires a keyspace to "
                "be set, e.g. through the `use_keyspace` method."
            )
        return driver_commander

    def _copy(
        self,
        *,
        api_endpoint: str | None = None,
        token: str | TokenProvider | None = None,
        keyspace: str | None = None,
        namespace: str | None = None,
        callers: Sequence[CallerType] = [],
        caller_name: str | None = None,
        caller_version: str | None = None,
        environment: str | None = None,
        api_path: str | None = None,
        api_version: str | None = None,
    ) -> Database:
        callers_param = check_caller_parameters(callers, caller_name, caller_version)
        keyspace_param = check_namespace_keyspace(
            keyspace=keyspace,
            namespace=namespace,
        )
        return Database(
            api_endpoint=api_endpoint or self.api_endpoint,
            token=coerce_token_provider(token) or self.token_provider,
            keyspace=keyspace_param or self.keyspace,
            callers=callers_param or self.callers,
            environment=environment or self.environment,
            api_path=api_path or self.api_path,
            api_version=api_version or self.api_version,
        )

    def with_options(
        self,
        *,
        keyspace: str | None = None,
        namespace: str | None = None,
        callers: Sequence[CallerType] = [],
        caller_name: str | None = None,
        caller_version: str | None = None,
    ) -> Database:
        """
        Create a clone of this database with some changed attributes.

        Args:
            keyspace: this is the keyspace all method calls will target, unless
                one is explicitly specified in the call. If no keyspace is supplied
                when creating a Database, the name "default_keyspace" is set.
            namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
            callers: a list of caller identities, i.e. applications, or frameworks,
                on behalf of which the Data API calls are performed. These end up
                in the request user-agent.
                Each caller identity is a ("caller_name", "caller_version") pair.
            caller_name: *DEPRECATED*, use `callers`. Removal 2.0. Name of the
                application, or framework, on behalf of which the Data API calls
                are performed. This ends up in the request user-agent.
            caller_version: version of the caller. *DEPRECATED*, use `callers`.
                Removal 2.0.

        Returns:
            a new `Database` instance.

        Example:
            >>> my_db_2 = my_db.with_options(
            ...     keyspace="the_other_keyspace",
            ...     callers=[("the_caller", "0.1.0")],
            ... )
        """

        callers_param = check_caller_parameters(callers, caller_name, caller_version)
        keyspace_param = check_namespace_keyspace(
            keyspace=keyspace,
            namespace=namespace,
        )
        return self._copy(
            keyspace=keyspace_param,
            callers=callers_param,
        )

    def to_async(
        self,
        *,
        api_endpoint: str | None = None,
        token: str | TokenProvider | None = None,
        keyspace: str | None = None,
        namespace: str | None = None,
        callers: Sequence[CallerType] = [],
        caller_name: str | None = None,
        caller_version: str | None = None,
        environment: str | None = None,
        api_path: str | None = None,
        api_version: str | None = None,
    ) -> AsyncDatabase:
        """
        Create an AsyncDatabase from this one. Save for the arguments
        explicitly provided as overrides, everything else is kept identical
        to this database in the copy.

        Args:
            api_endpoint: the full "API Endpoint" string used to reach the Data API.
                Example: "https://<database_id>-<region>.apps.astra.datastax.com"
            token: an Access Token to the database. Example: "AstraCS:xyz..."
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            keyspace: this is the keyspace all method calls will target, unless
                one is explicitly specified in the call. If no keyspace is supplied
                when creating a Database, the name "default_keyspace" is set.
            namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
            callers: a list of caller identities, i.e. applications, or frameworks,
                on behalf of which the Data API calls are performed. These end up
                in the request user-agent.
                Each caller identity is a ("caller_name", "caller_version") pair.
            caller_name: *DEPRECATED*, use `callers`. Removal 2.0. Name of the
                application, or framework, on behalf of which the Data API calls
                are performed. This ends up in the request user-agent.
            caller_version: version of the caller. *DEPRECATED*, use `callers`.
                Removal 2.0.
            environment: a string representing the target Data API environment.
                Values are, for example, `Environment.PROD`, `Environment.OTHER`,
                or `Environment.DSE`.
            api_path: path to append to the API Endpoint. In typical usage, this
                should be left to its default of "/api/json".
            api_version: version specifier to append to the API path. In typical
                usage, this should be left to its default of "v1".

        Returns:
            the new copy, an `AsyncDatabase` instance.

        Example:
            >>> my_async_db = my_db.to_async()
            >>> asyncio.run(my_async_db.list_collection_names())
        """

        callers_param = check_caller_parameters(callers, caller_name, caller_version)
        keyspace_param = check_namespace_keyspace(
            keyspace=keyspace,
            namespace=namespace,
        )
        return AsyncDatabase(
            api_endpoint=api_endpoint or self.api_endpoint,
            token=coerce_token_provider(token) or self.token_provider,
            keyspace=keyspace_param or self.keyspace,
            callers=callers_param or self.callers,
            environment=environment or self.environment,
            api_path=api_path or self.api_path,
            api_version=api_version or self.api_version,
        )

    @deprecation.deprecated(  # type: ignore[misc]
        deprecated_in="1.5.1",
        removed_in="2.0.0",
        current_version=__version__,
        details=SET_CALLER_DEPRECATION_NOTICE,
    )
    def set_caller(
        self,
        caller_name: str | None = None,
        caller_version: str | None = None,
    ) -> None:
        """
        Set a new identity for the application/framework on behalf of which
        the Data API calls are performed (the "caller").

        Args:
            caller_name: name of the application, or framework, on behalf of which
                the Data API calls are performed. This ends up in the request user-agent.
            caller_version: version of the caller.

        Example:
            >>> my_db.set_caller(caller_name="the_caller", caller_version="0.1.0")
        """

        logger.info(f"setting caller to {caller_name}/{caller_version}")
        callers_param = check_caller_parameters([], caller_name, caller_version)
        self.callers = callers_param
        self._api_commander = self._get_api_commander(keyspace=self.keyspace)

    @deprecation.deprecated(  # type: ignore[misc]
        deprecated_in="1.5.0",
        removed_in="2.0.0",
        current_version=__version__,
        details=NAMESPACE_DEPRECATION_NOTICE_METHOD,
    )
    def use_namespace(self, namespace: str) -> None:
        """
        Switch to a new working namespace for this database.
        This method changes (mutates) the Database instance.

        *DEPRECATED* (removal in 2.0). Switch to the "use_keyspace" method.**

        Note that this method does not create the namespace, which should exist
        already (created for instance with a `DatabaseAdmin.create_namespace` call).

        Args:
            namespace: the new namespace to use as the database working namespace.

        Returns:
            None.

        Example:
            >>> my_db.list_collection_names()
            ['coll_1', 'coll_2']
            >>> my_db.use_namespace("an_empty_namespace")
            >>> my_db.list_collection_names()
            []
        """
        return self.use_keyspace(keyspace=namespace)

    def use_keyspace(self, keyspace: str) -> None:
        """
        Switch to a new working keyspace for this database.
        This method changes (mutates) the Database instance.

        Note that this method does not create the keyspace, which should exist
        already (created for instance with a `DatabaseAdmin.create_keyspace` call).

        Args:
            keyspace: the new keyspace to use as the database working keyspace.

        Returns:
            None.

        Example:
            >>> my_db.list_collection_names()
            ['coll_1', 'coll_2']
            >>> my_db.use_keyspace("an_empty_keyspace")
            >>> my_db.list_collection_names()
            []
        """
        logger.info(f"switching to keyspace '{keyspace}'")
        self._using_keyspace = keyspace
        self._api_commander = self._get_api_commander(keyspace=self.keyspace)

    def info(self) -> DatabaseInfo:
        """
        Additional information on the database as a DatabaseInfo instance.

        Some of the returned properties are dynamic throughout the lifetime
        of the database (such as raw_info["keyspaces"]). For this reason,
        each invocation of this method triggers a new request to the DevOps API.

        Example:
            >>> my_db.info().region
            'eu-west-1'

            >>> my_db.info().raw_info['datacenters'][0]['dateCreated']
            '2023-01-30T12:34:56Z'

        Note:
            see the DatabaseInfo documentation for a caveat about the difference
            between the `region` and the `raw_info["region"]` attributes.
        """

        logger.info("getting database info")
        database_info = fetch_database_info(
            self.api_endpoint,
            token=self.token_provider.get_token(),
            keyspace=self.keyspace,
        )
        if database_info is not None:
            logger.info("finished getting database info")
            return database_info
        else:
            raise DevOpsAPIException(
                "Database is not in a supported environment for this operation."
            )

    @property
    def id(self) -> str:
        """
        The ID of this database.

        Example:
            >>> my_db.id
            '01234567-89ab-cdef-0123-456789abcdef'
        """

        parsed_api_endpoint = parse_api_endpoint(self.api_endpoint)
        if parsed_api_endpoint is not None:
            return parsed_api_endpoint.database_id
        else:
            raise DevOpsAPIException(
                "Database is not in a supported environment for this operation."
            )

    def name(self) -> str:
        """
        The name of this database. Note that this bears no unicity guarantees.

        Calling this method the first time involves a request
        to the DevOps API (the resulting database name is then cached).
        See the `info()` method for more details.

        Example:
            >>> my_db.name()
            'the_application_database'
        """

        if self._name is None:
            self._name = self.info().name
        return self._name

    @property
    def namespace(self) -> str | None:
        """
        The namespace this database uses as target for all commands when
        no method-call-specific namespace is specified.

        *DEPRECATED* (removal in 2.0). Switch to the "keyspace" property.**

        Returns:
            the working namespace (a string), or None if not set.

        Example:
            >>> my_db.namespace
            'the_keyspace'
        """

        the_warning = deprecation.DeprecatedWarning(
            "the 'namespace' property",
            deprecated_in="1.5.0",
            removed_in="2.0.0",
            details=NAMESPACE_DEPRECATION_NOTICE_METHOD,
        )
        warnings.warn(the_warning, stacklevel=2)

        return self.keyspace

    @property
    def keyspace(self) -> str | None:
        """
        The keyspace this database uses as target for all commands when
        no method-call-specific keyspace is specified.

        Returns:
            the working keyspace (a string), or None if not set.

        Example:
            >>> my_db.keyspace
            'the_keyspace'
        """

        return self._using_keyspace

    def get_collection(
        self,
        name: str,
        *,
        keyspace: str | None = None,
        namespace: str | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | None = None,
        collection_max_time_ms: int | None = None,
    ) -> Collection:
        """
        Spawn a `Collection` object instance representing a collection
        on this database.

        Creating a `Collection` instance does not have any effect on the
        actual state of the database: in other words, for the created
        `Collection` instance to be used meaningfully, the collection
        must exist already (for instance, it should have been created
        previously by calling the `create_collection` method).

        Args:
            name: the name of the collection.
            keyspace: the keyspace containing the collection. If no keyspace
                is specified, the general setting for this database is used.
            namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
            embedding_api_key: optional API key(s) for interacting with the collection.
                If an embedding service is configured, and this parameter is not None,
                each Data API call will include the necessary embedding-related headers
                as specified by this parameter. If a string is passed, it translates
                into the one "embedding api key" header
                (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
                For some vectorize providers/models, if using header-based
                authentication, specialized subclasses of
                `astrapy.authentication.EmbeddingHeadersProvider` should be supplied.
            collection_max_time_ms: a default timeout, in millisecond, for the duration
                of each operation on the collection. Individual timeouts can be
                provided to each collection method call and will take precedence, with
                this value being an overall default.
                Note that for some methods involving multiple API calls (such as `find`,
                `delete_many`, `insert_many` and so on), it is strongly suggested
                to provide a specific timeout as the default one likely wouldn't make
                much sense.

        Returns:
            a `Collection` instance, representing the desired collection
                (but without any form of validation).

        Example:
            >>> my_col = my_db.get_collection("my_collection")
            >>> my_col.count_documents({}, upper_bound=100)
            41

        Note:
            The attribute and indexing syntax forms achieve the same effect
            as this method. In other words, the following are equivalent:
                my_db.get_collection("coll_name")
                my_db.coll_name
                my_db["coll_name"]
        """

        keyspace_param = check_namespace_keyspace(
            keyspace=keyspace,
            namespace=namespace,
        )

        # lazy importing here against circular-import error
        from astrapy.collection import Collection

        _keyspace = keyspace_param or self.keyspace
        if _keyspace is None:
            raise ValueError(
                "No keyspace specified. This operation requires a keyspace to "
                "be set, e.g. through the `use_keyspace` method."
            )
        return Collection(
            self,
            name,
            keyspace=_keyspace,
            api_options=CollectionAPIOptions(
                embedding_api_key=coerce_embedding_headers_provider(embedding_api_key),
                max_time_ms=collection_max_time_ms,
            ),
            callers=self.callers,
        )

    def create_collection(
        self,
        name: str,
        *,
        keyspace: str | None = None,
        namespace: str | None = None,
        dimension: int | None = None,
        metric: str | None = None,
        service: CollectionVectorServiceOptions | dict[str, Any] | None = None,
        indexing: dict[str, Any] | None = None,
        default_id_type: str | None = None,
        additional_options: dict[str, Any] | None = None,
        check_exists: bool | None = None,
        max_time_ms: int | None = None,
        embedding_api_key: str | EmbeddingHeadersProvider | None = None,
        collection_max_time_ms: int | None = None,
    ) -> Collection:
        """
        Creates a collection on the database and return the Collection
        instance that represents it.

        This is a blocking operation: the method returns when the collection
        is ready to be used. As opposed to the `get_collection` instance,
        this method triggers causes the collection to be actually created on DB.

        Args:
            name: the name of the collection.
            keyspace: the keyspace where the collection is to be created.
                If not specified, the general setting for this database is used.
            namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
            dimension: for vector collections, the dimension of the vectors
                (i.e. the number of their components).
            metric: the similarity metric used for vector searches.
                Allowed values are `VectorMetric.DOT_PRODUCT`, `VectorMetric.EUCLIDEAN`
                or `VectorMetric.COSINE` (default).
            service: a dictionary describing a service for
                embedding computation, e.g. `{"provider": "ab", "modelName": "xy"}`.
                Alternatively, a CollectionVectorServiceOptions object to the same effect.
            indexing: optional specification of the indexing options for
                the collection, in the form of a dictionary such as
                    {"deny": [...]}
                or
                    {"allow": [...]}
            default_id_type: this sets what type of IDs the API server will
                generate when inserting documents that do not specify their
                `_id` field explicitly. Can be set to any of the values
                `DefaultIdType.UUID`, `DefaultIdType.OBJECTID`,
                `DefaultIdType.UUIDV6`, `DefaultIdType.UUIDV7`,
                `DefaultIdType.DEFAULT`.
            additional_options: any further set of key-value pairs that will
                be added to the "options" part of the payload when sending
                the Data API command to create a collection.
            check_exists: whether to run an existence check for the collection
                name before attempting to create the collection:
                If check_exists is True, an error is raised when creating
                an existing collection.
                If it is False, the creation is attempted. In this case, for
                preexisting collections, the command will succeed or fail
                depending on whether the options match or not.
            max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
            embedding_api_key: optional API key(s) for interacting with the collection.
                If an embedding service is configured, and this parameter is not None,
                each Data API call will include the necessary embedding-related headers
                as specified by this parameter. If a string is passed, it translates
                into the one "embedding api key" header
                (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
                For some vectorize providers/models, if using header-based authentication,
                specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
                should be supplied.
            collection_max_time_ms: a default timeout, in millisecond, for the duration of each
                operation on the collection. Individual timeouts can be provided to
                each collection method call and will take precedence, with this value
                being an overall default.
                Note that for some methods involving multiple API calls (such as
                `find`, `delete_many`, `insert_many` and so on), it is strongly suggested
                to provide a specific timeout as the default one likely wouldn't make
                much sense.

        Returns:
            a (synchronous) `Collection` instance, representing the
            newly-created collection.

        Example:
            >>> new_col = my_db.create_collection("my_v_col", dimension=3)
            >>> new_col.insert_one({"name": "the_row", "$vector": [0.4, 0.5, 0.7]})
            InsertOneResult(raw_results=..., inserted_id='e22dd65e-...-...-...')

        Note:
            A collection is considered a vector collection if at least one of
            `dimension` or `service` are provided and not null. In that case,
            and only in that case, is `metric` an accepted parameter.
            Note, moreover, that if passing both these parameters, then
            the dimension must be compatible with the chosen service.
        """

        keyspace_param = check_namespace_keyspace(
            keyspace=keyspace,
            namespace=namespace,
        )

        cc_options = _normalize_create_collection_options(
            dimension=dimension,
            metric=metric,
            service=service,
            indexing=indexing,
            default_id_type=default_id_type,
            additional_options=additional_options,
        )

        timeout_manager = MultiCallTimeoutManager(overall_max_time_ms=max_time_ms)

        if check_exists is None:
            _check_exists = True
        else:
            _check_exists = check_exists
        if _check_exists:
            logger.info(f"checking collection existence for '{name}'")
            existing_names = self.list_collection_names(
                keyspace=keyspace_param,
                max_time_ms=timeout_manager.remaining_timeout_ms(),
            )
            if name in existing_names:
                raise CollectionAlreadyExistsException(
                    text=f"Collection {name} already exists",
                    keyspace=keyspace_param or self.keyspace or "(unspecified)",
                    collection_name=name,
                )

        driver_commander = self._get_driver_commander(keyspace=keyspace_param)
        cc_payload = {"createCollection": {"name": name, "options": cc_options}}
        logger.info(f"createCollection('{name}')")
        driver_commander.request(
            payload=cc_payload,
            timeout_info=timeout_manager.remaining_timeout_info(),
        )
        logger.info(f"finished createCollection('{name}')")
        return self.get_collection(
            name,
            keyspace=keyspace_param,
            embedding_api_key=coerce_embedding_headers_provider(embedding_api_key),
            collection_max_time_ms=collection_max_time_ms,
        )

    def drop_collection(
        self,
        name_or_collection: str | Collection,
        *,
        max_time_ms: int | None = None,
    ) -> dict[str, Any]:
        """
        Drop a collection from the database, along with all documents therein.

        Args:
            name_or_collection: either the name of a collection or
                a `Collection` instance.
            max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.

        Returns:
            a dictionary in the form {"ok": 1} if the command succeeds.

        Example:
            >>> my_db.list_collection_names()
            ['a_collection', 'my_v_col', 'another_col']
            >>> my_db.drop_collection("my_v_col")
            {'ok': 1}
            >>> my_db.list_collection_names()
            ['a_collection', 'another_col']

        Note:
            when providing a collection name, it is assumed that the collection
            is to be found in the keyspace that was set at database instance level.
        """

        # lazy importing here against circular-import error
        from astrapy.collection import Collection

        _keyspace: str | None
        _collection_name: str
        if isinstance(name_or_collection, Collection):
            _keyspace = name_or_collection.keyspace
            _collection_name = name_or_collection.name
        else:
            _keyspace = self.keyspace
            _collection_name = name_or_collection
        driver_commander = self._get_driver_commander(keyspace=_keyspace)
        dc_payload = {"deleteCollection": {"name": _collection_name}}
        logger.info(f"deleteCollection('{_collection_name}')")
        dc_response = driver_commander.request(
            payload=dc_payload,
            timeout_info=base_timeout_info(max_time_ms),
        )
        logger.info(f"finished deleteCollection('{_collection_name}')")
        return dc_response.get("status", {})  # type: ignore[no-any-return]

    def list_collections(
        self,
        *,
        keyspace: str | None = None,
        namespace: str | None = None,
        max_time_ms: int | None = None,
    ) -> CommandCursor[CollectionDescriptor]:
        """
        List all collections in a given keyspace for this database.

        Args:
            keyspace: the keyspace to be inspected. If not specified,
                the general setting for this database is assumed.
            namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
            max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.

        Returns:
            a `CommandCursor` to iterate over CollectionDescriptor instances,
            each corresponding to a collection.

        Example:
            >>> ccur = my_db.list_collections()
            >>> ccur
            <astrapy.cursors.CommandCursor object at ...>
            >>> list(ccur)
            [CollectionDescriptor(name='my_v_col', options=CollectionOptions())]
            >>> for coll_dict in my_db.list_collections():
            ...     print(coll_dict)
            ...
            CollectionDescriptor(name='my_v_col', options=CollectionOptions())
        """

        keyspace_param = check_namespace_keyspace(
            keyspace=keyspace,
            namespace=namespace,
        )

        driver_commander = self._get_driver_commander(keyspace=keyspace_param)
        gc_payload = {"findCollections": {"options": {"explain": True}}}
        logger.info("findCollections")
        gc_response = driver_commander.request(
            payload=gc_payload,
            timeout_info=base_timeout_info(max_time_ms),
        )
        if "collections" not in gc_response.get("status", {}):
            raise DataAPIFaultyResponseException(
                text="Faulty response from get_collections API command.",
                raw_response=gc_response,
            )
        else:
            # we know this is a list of dicts, to marshal into "descriptors"
            logger.info("finished findCollections")
            return CommandCursor(
                address=driver_commander.full_path,
                items=[
                    CollectionDescriptor.from_dict(col_dict)
                    for col_dict in gc_response["status"]["collections"]
                ],
            )

    def list_collection_names(
        self,
        *,
        keyspace: str | None = None,
        namespace: str | None = None,
        max_time_ms: int | None = None,
    ) -> list[str]:
        """
        List the names of all collections in a given keyspace of this database.

        Args:
            keyspace: the keyspace to be inspected. If not specified,
                the general setting for this database is assumed.
            namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
            max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.

        Returns:
            a list of the collection names as strings, in no particular order.

        Example:
            >>> my_db.list_collection_names()
            ['a_collection', 'another_col']
        """

        keyspace_param = check_namespace_keyspace(
            keyspace=keyspace,
            namespace=namespace,
        )

        driver_commander = self._get_driver_commander(keyspace=keyspace_param)
        gc_payload: dict[str, Any] = {"findCollections": {}}
        logger.info("findCollections")
        gc_response = driver_commander.request(
            payload=gc_payload,
            timeout_info=base_timeout_info(max_time_ms),
        )
        if "collections" not in gc_response.get("status", {}):
            raise DataAPIFaultyResponseException(
                text="Faulty response from get_collections API command.",
                raw_response=gc_response,
            )
        else:
            # we know this is a list of dicts, to marshal into "descriptors"
            logger.info("finished findCollections")
            return gc_response["status"]["collections"]  # type: ignore[no-any-return]

    def command(
        self,
        body: dict[str, Any],
        *,
        keyspace: str | None = None,
        namespace: str | None = None,
        collection_name: str | None = None,
        raise_api_errors: bool = True,
        max_time_ms: int | None = None,
    ) -> dict[str, Any]:
        """
        Send a POST request to the Data API for this database with
        an arbitrary, caller-provided payload.

        Args:
            body: a JSON-serializable dictionary, the payload of the request.
            keyspace: the keyspace to use. Requests always target a keyspace:
                if not specified, the general setting for this database is assumed.
            namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
            collection_name: if provided, the collection name is appended at the end
                of the endpoint. In this way, this method allows collection-level
                arbitrary POST requests as well.
            raise_api_errors: if True, responses with a nonempty 'errors' field
                result in an astrapy exception being raised.
            max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.

        Returns:
            a dictionary with the response of the HTTP request.

        Example:
            >>> my_db.command({"findCollections": {}})
            {'status': {'collections': ['my_coll']}}
            >>> my_db.command({"countDocuments": {}}, collection_name="my_coll")
            {'status': {'count': 123}}
        """

        keyspace_param = check_namespace_keyspace(
            keyspace=keyspace,
            namespace=namespace,
        )

        if collection_name:
            # if keyspace and collection_name both passed, a new database is needed
            _database: Database
            if keyspace_param:
                _database = self._copy(keyspace=keyspace_param)
            else:
                _database = self
            logger.info("deferring to collection " f"'{collection_name}' for command.")
            coll_req_response = _database.get_collection(collection_name).command(
                body=body,
                raise_api_errors=raise_api_errors,
                max_time_ms=max_time_ms,
            )
            logger.info(
                "finished deferring to collection " f"'{collection_name}' for command."
            )
            return coll_req_response
        else:
            driver_commander = self._get_driver_commander(keyspace=keyspace_param)
            _cmd_desc = ",".join(sorted(body.keys()))
            logger.info(f"command={_cmd_desc} on {self.__class__.__name__}")
            req_response = driver_commander.request(
                payload=body,
                raise_api_errors=raise_api_errors,
                timeout_info=base_timeout_info(max_time_ms),
            )
            logger.info(f"command={_cmd_desc} on {self.__class__.__name__}")
            return req_response

    def get_database_admin(
        self,
        *,
        token: str | TokenProvider | None = None,
        dev_ops_url: str | None = None,
        dev_ops_api_version: str | None = None,
    ) -> DatabaseAdmin:
        """
        Return a DatabaseAdmin object corresponding to this database, for
        use in admin tasks such as managing keyspaces.

        This method, depending on the environment where the database resides,
        returns an appropriate subclass of DatabaseAdmin.

        Args:
            token: an access token with enough permission on the database to
                perform the desired tasks. If omitted (as it can generally be done),
                the token of this Database is used.
                This can be either a literal token string or a subclass of
                `astrapy.authentication.TokenProvider`.
            dev_ops_url: in case of custom deployments, this can be used to specify
                the URL to the DevOps API, such as "https://api.astra.datastax.com".
                Generally it can be omitted. The environment (prod/dev/...) is
                determined from the API Endpoint.
                Note that this parameter is allowed only for Astra DB environments.
            dev_ops_api_version: this can specify a custom version of the DevOps API
                (such as "v2"). Generally not needed.
                Note that this parameter is allowed only for Astra DB environments.

        Returns:
            A DatabaseAdmin instance targeting this database. More precisely,
            for Astra DB an instance of `AstraDBDatabaseAdmin` is returned;
            for other environments, an instance of `DataAPIDatabaseAdmin` is returned.

        Example:
            >>> my_db_admin = my_db.get_database_admin()
            >>> if "new_keyspace" not in my_db_admin.list_keyspaces():
            ...     my_db_admin.create_keyspace("new_keyspace")
            >>> my_db_admin.list_keyspaces()
            ['default_keyspace', 'new_keyspace']
        """

        # lazy importing here to avoid circular dependency
        from astrapy.admin import AstraDBDatabaseAdmin, DataAPIDatabaseAdmin

        if self.environment in Environment.astra_db_values:
            return AstraDBDatabaseAdmin(
                api_endpoint=self.api_endpoint,
                token=coerce_token_provider(token) or self.token_provider,
                environment=self.environment,
                callers=self.callers,
                dev_ops_url=dev_ops_url,
                dev_ops_api_version=dev_ops_api_version,
                spawner_database=self,
            )
        else:
            if dev_ops_url is not None:
                raise ValueError(
                    "Parameter `dev_ops_url` not supported outside of Astra DB."
                )
            if dev_ops_api_version is not None:
                raise ValueError(
                    "Parameter `dev_ops_api_version` not supported outside of Astra DB."
                )
            return DataAPIDatabaseAdmin(
                api_endpoint=self.api_endpoint,
                token=coerce_token_provider(token) or self.token_provider,
                environment=self.environment,
                api_path=self.api_path,
                api_version=self.api_version,
                callers=self.callers,
                spawner_database=self,
            )

Instance variables

var id : str

The ID of this database.

Example

>>> my_db.id
'01234567-89ab-cdef-0123-456789abcdef'
Expand source code
@property
def id(self) -> str:
    """
    The ID of this database.

    Example:
        >>> my_db.id
        '01234567-89ab-cdef-0123-456789abcdef'
    """

    parsed_api_endpoint = parse_api_endpoint(self.api_endpoint)
    if parsed_api_endpoint is not None:
        return parsed_api_endpoint.database_id
    else:
        raise DevOpsAPIException(
            "Database is not in a supported environment for this operation."
        )
var keyspace : str | None

The keyspace this database uses as target for all commands when no method-call-specific keyspace is specified.

Returns

the working keyspace (a string), or None if not set.

Example

>>> my_db.keyspace
'the_keyspace'
Expand source code
@property
def keyspace(self) -> str | None:
    """
    The keyspace this database uses as target for all commands when
    no method-call-specific keyspace is specified.

    Returns:
        the working keyspace (a string), or None if not set.

    Example:
        >>> my_db.keyspace
        'the_keyspace'
    """

    return self._using_keyspace
var namespace : str | None

The namespace this database uses as target for all commands when no method-call-specific namespace is specified.

DEPRECATED (removal in 2.0). Switch to the "keyspace" property.**

Returns

the working namespace (a string), or None if not set.

Example

>>> my_db.namespace
'the_keyspace'
Expand source code
@property
def namespace(self) -> str | None:
    """
    The namespace this database uses as target for all commands when
    no method-call-specific namespace is specified.

    *DEPRECATED* (removal in 2.0). Switch to the "keyspace" property.**

    Returns:
        the working namespace (a string), or None if not set.

    Example:
        >>> my_db.namespace
        'the_keyspace'
    """

    the_warning = deprecation.DeprecatedWarning(
        "the 'namespace' property",
        deprecated_in="1.5.0",
        removed_in="2.0.0",
        details=NAMESPACE_DEPRECATION_NOTICE_METHOD,
    )
    warnings.warn(the_warning, stacklevel=2)

    return self.keyspace

Methods

def command(self, body: dict[str, Any], *, keyspace: str | None = None, namespace: str | None = None, collection_name: str | None = None, raise_api_errors: bool = True, max_time_ms: int | None = None) ‑> dict[str, typing.Any]

Send a POST request to the Data API for this database with an arbitrary, caller-provided payload.

Args

body
a JSON-serializable dictionary, the payload of the request.
keyspace
the keyspace to use. Requests always target a keyspace: if not specified, the general setting for this database is assumed.
namespace
an alias for keyspace. DEPRECATED, removal in 2.0.
collection_name
if provided, the collection name is appended at the end of the endpoint. In this way, this method allows collection-level arbitrary POST requests as well.
raise_api_errors
if True, responses with a nonempty 'errors' field result in an astrapy exception being raised.
max_time_ms
a timeout, in milliseconds, for the underlying HTTP request.

Returns

a dictionary with the response of the HTTP request.

Example

>>> my_db.command({"findCollections": {}})
{'status': {'collections': ['my_coll']}}
>>> my_db.command({"countDocuments": {}}, collection_name="my_coll")
{'status': {'count': 123}}
Expand source code
def command(
    self,
    body: dict[str, Any],
    *,
    keyspace: str | None = None,
    namespace: str | None = None,
    collection_name: str | None = None,
    raise_api_errors: bool = True,
    max_time_ms: int | None = None,
) -> dict[str, Any]:
    """
    Send a POST request to the Data API for this database with
    an arbitrary, caller-provided payload.

    Args:
        body: a JSON-serializable dictionary, the payload of the request.
        keyspace: the keyspace to use. Requests always target a keyspace:
            if not specified, the general setting for this database is assumed.
        namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
        collection_name: if provided, the collection name is appended at the end
            of the endpoint. In this way, this method allows collection-level
            arbitrary POST requests as well.
        raise_api_errors: if True, responses with a nonempty 'errors' field
            result in an astrapy exception being raised.
        max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.

    Returns:
        a dictionary with the response of the HTTP request.

    Example:
        >>> my_db.command({"findCollections": {}})
        {'status': {'collections': ['my_coll']}}
        >>> my_db.command({"countDocuments": {}}, collection_name="my_coll")
        {'status': {'count': 123}}
    """

    keyspace_param = check_namespace_keyspace(
        keyspace=keyspace,
        namespace=namespace,
    )

    if collection_name:
        # if keyspace and collection_name both passed, a new database is needed
        _database: Database
        if keyspace_param:
            _database = self._copy(keyspace=keyspace_param)
        else:
            _database = self
        logger.info("deferring to collection " f"'{collection_name}' for command.")
        coll_req_response = _database.get_collection(collection_name).command(
            body=body,
            raise_api_errors=raise_api_errors,
            max_time_ms=max_time_ms,
        )
        logger.info(
            "finished deferring to collection " f"'{collection_name}' for command."
        )
        return coll_req_response
    else:
        driver_commander = self._get_driver_commander(keyspace=keyspace_param)
        _cmd_desc = ",".join(sorted(body.keys()))
        logger.info(f"command={_cmd_desc} on {self.__class__.__name__}")
        req_response = driver_commander.request(
            payload=body,
            raise_api_errors=raise_api_errors,
            timeout_info=base_timeout_info(max_time_ms),
        )
        logger.info(f"command={_cmd_desc} on {self.__class__.__name__}")
        return req_response
def create_collection(self, name: str, *, keyspace: str | None = None, namespace: str | None = None, dimension: int | None = None, metric: str | None = None, service: CollectionVectorServiceOptions | dict[str, Any] | None = None, indexing: dict[str, Any] | None = None, default_id_type: str | None = None, additional_options: dict[str, Any] | None = None, check_exists: bool | None = None, max_time_ms: int | None = None, embedding_api_key: str | EmbeddingHeadersProvider | None = None, collection_max_time_ms: int | None = None) ‑> Collection

Creates a collection on the database and return the Collection instance that represents it.

This is a blocking operation: the method returns when the collection is ready to be used. As opposed to the get_collection instance, this method triggers causes the collection to be actually created on DB.

Args

name
the name of the collection.
keyspace
the keyspace where the collection is to be created. If not specified, the general setting for this database is used.
namespace
an alias for keyspace. DEPRECATED, removal in 2.0.
dimension
for vector collections, the dimension of the vectors (i.e. the number of their components).
metric
the similarity metric used for vector searches. Allowed values are VectorMetric.DOT_PRODUCT, VectorMetric.EUCLIDEAN or VectorMetric.COSINE (default).
service
a dictionary describing a service for embedding computation, e.g. {"provider": "ab", "modelName": "xy"}. Alternatively, a CollectionVectorServiceOptions object to the same effect.
indexing
optional specification of the indexing options for the collection, in the form of a dictionary such as {"deny": […]} or
default_id_type
this sets what type of IDs the API server will generate when inserting documents that do not specify their _id field explicitly. Can be set to any of the values DefaultIdType.UUID, DefaultIdType.OBJECTID, DefaultIdType.UUIDV6, DefaultIdType.UUIDV7, DefaultIdType.DEFAULT.
additional_options
any further set of key-value pairs that will be added to the "options" part of the payload when sending the Data API command to create a collection.
check_exists
whether to run an existence check for the collection name before attempting to create the collection: If check_exists is True, an error is raised when creating an existing collection. If it is False, the creation is attempted. In this case, for preexisting collections, the command will succeed or fail depending on whether the options match or not.
max_time_ms
a timeout, in milliseconds, for the underlying HTTP request.
embedding_api_key
optional API key(s) for interacting with the collection. If an embedding service is configured, and this parameter is not None, each Data API call will include the necessary embedding-related headers as specified by this parameter. If a string is passed, it translates into the one "embedding api key" header (i.e. EmbeddingAPIKeyHeaderProvider). For some vectorize providers/models, if using header-based authentication, specialized subclasses of EmbeddingHeadersProvider should be supplied.
collection_max_time_ms
a default timeout, in millisecond, for the duration of each operation on the collection. Individual timeouts can be provided to each collection method call and will take precedence, with this value being an overall default. Note that for some methods involving multiple API calls (such as find, delete_many, insert_many and so on), it is strongly suggested to provide a specific timeout as the default one likely wouldn't make much sense.

Returns

a (synchronous) Collection instance, representing the newly-created collection.

Example

>>> new_col = my_db.create_collection("my_v_col", dimension=3)
>>> new_col.insert_one({"name": "the_row", "$vector": [0.4, 0.5, 0.7]})
InsertOneResult(raw_results=..., inserted_id='e22dd65e-...-...-...')

Note

A collection is considered a vector collection if at least one of dimension or service are provided and not null. In that case, and only in that case, is metric an accepted parameter. Note, moreover, that if passing both these parameters, then the dimension must be compatible with the chosen service.

Expand source code
def create_collection(
    self,
    name: str,
    *,
    keyspace: str | None = None,
    namespace: str | None = None,
    dimension: int | None = None,
    metric: str | None = None,
    service: CollectionVectorServiceOptions | dict[str, Any] | None = None,
    indexing: dict[str, Any] | None = None,
    default_id_type: str | None = None,
    additional_options: dict[str, Any] | None = None,
    check_exists: bool | None = None,
    max_time_ms: int | None = None,
    embedding_api_key: str | EmbeddingHeadersProvider | None = None,
    collection_max_time_ms: int | None = None,
) -> Collection:
    """
    Creates a collection on the database and return the Collection
    instance that represents it.

    This is a blocking operation: the method returns when the collection
    is ready to be used. As opposed to the `get_collection` instance,
    this method triggers causes the collection to be actually created on DB.

    Args:
        name: the name of the collection.
        keyspace: the keyspace where the collection is to be created.
            If not specified, the general setting for this database is used.
        namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
        dimension: for vector collections, the dimension of the vectors
            (i.e. the number of their components).
        metric: the similarity metric used for vector searches.
            Allowed values are `VectorMetric.DOT_PRODUCT`, `VectorMetric.EUCLIDEAN`
            or `VectorMetric.COSINE` (default).
        service: a dictionary describing a service for
            embedding computation, e.g. `{"provider": "ab", "modelName": "xy"}`.
            Alternatively, a CollectionVectorServiceOptions object to the same effect.
        indexing: optional specification of the indexing options for
            the collection, in the form of a dictionary such as
                {"deny": [...]}
            or
                {"allow": [...]}
        default_id_type: this sets what type of IDs the API server will
            generate when inserting documents that do not specify their
            `_id` field explicitly. Can be set to any of the values
            `DefaultIdType.UUID`, `DefaultIdType.OBJECTID`,
            `DefaultIdType.UUIDV6`, `DefaultIdType.UUIDV7`,
            `DefaultIdType.DEFAULT`.
        additional_options: any further set of key-value pairs that will
            be added to the "options" part of the payload when sending
            the Data API command to create a collection.
        check_exists: whether to run an existence check for the collection
            name before attempting to create the collection:
            If check_exists is True, an error is raised when creating
            an existing collection.
            If it is False, the creation is attempted. In this case, for
            preexisting collections, the command will succeed or fail
            depending on whether the options match or not.
        max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.
        embedding_api_key: optional API key(s) for interacting with the collection.
            If an embedding service is configured, and this parameter is not None,
            each Data API call will include the necessary embedding-related headers
            as specified by this parameter. If a string is passed, it translates
            into the one "embedding api key" header
            (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
            For some vectorize providers/models, if using header-based authentication,
            specialized subclasses of `astrapy.authentication.EmbeddingHeadersProvider`
            should be supplied.
        collection_max_time_ms: a default timeout, in millisecond, for the duration of each
            operation on the collection. Individual timeouts can be provided to
            each collection method call and will take precedence, with this value
            being an overall default.
            Note that for some methods involving multiple API calls (such as
            `find`, `delete_many`, `insert_many` and so on), it is strongly suggested
            to provide a specific timeout as the default one likely wouldn't make
            much sense.

    Returns:
        a (synchronous) `Collection` instance, representing the
        newly-created collection.

    Example:
        >>> new_col = my_db.create_collection("my_v_col", dimension=3)
        >>> new_col.insert_one({"name": "the_row", "$vector": [0.4, 0.5, 0.7]})
        InsertOneResult(raw_results=..., inserted_id='e22dd65e-...-...-...')

    Note:
        A collection is considered a vector collection if at least one of
        `dimension` or `service` are provided and not null. In that case,
        and only in that case, is `metric` an accepted parameter.
        Note, moreover, that if passing both these parameters, then
        the dimension must be compatible with the chosen service.
    """

    keyspace_param = check_namespace_keyspace(
        keyspace=keyspace,
        namespace=namespace,
    )

    cc_options = _normalize_create_collection_options(
        dimension=dimension,
        metric=metric,
        service=service,
        indexing=indexing,
        default_id_type=default_id_type,
        additional_options=additional_options,
    )

    timeout_manager = MultiCallTimeoutManager(overall_max_time_ms=max_time_ms)

    if check_exists is None:
        _check_exists = True
    else:
        _check_exists = check_exists
    if _check_exists:
        logger.info(f"checking collection existence for '{name}'")
        existing_names = self.list_collection_names(
            keyspace=keyspace_param,
            max_time_ms=timeout_manager.remaining_timeout_ms(),
        )
        if name in existing_names:
            raise CollectionAlreadyExistsException(
                text=f"Collection {name} already exists",
                keyspace=keyspace_param or self.keyspace or "(unspecified)",
                collection_name=name,
            )

    driver_commander = self._get_driver_commander(keyspace=keyspace_param)
    cc_payload = {"createCollection": {"name": name, "options": cc_options}}
    logger.info(f"createCollection('{name}')")
    driver_commander.request(
        payload=cc_payload,
        timeout_info=timeout_manager.remaining_timeout_info(),
    )
    logger.info(f"finished createCollection('{name}')")
    return self.get_collection(
        name,
        keyspace=keyspace_param,
        embedding_api_key=coerce_embedding_headers_provider(embedding_api_key),
        collection_max_time_ms=collection_max_time_ms,
    )
def drop_collection(self, name_or_collection: str | Collection, *, max_time_ms: int | None = None) ‑> dict[str, Any]

Drop a collection from the database, along with all documents therein.

Args

name_or_collection
either the name of a collection or a Collection instance.
max_time_ms
a timeout, in milliseconds, for the underlying HTTP request.

Returns

a dictionary in the form {"ok": 1} if the command succeeds.

Example

>>> my_db.list_collection_names()
['a_collection', 'my_v_col', 'another_col']
>>> my_db.drop_collection("my_v_col")
{'ok': 1}
>>> my_db.list_collection_names()
['a_collection', 'another_col']

Note

when providing a collection name, it is assumed that the collection is to be found in the keyspace that was set at database instance level.

Expand source code
def drop_collection(
    self,
    name_or_collection: str | Collection,
    *,
    max_time_ms: int | None = None,
) -> dict[str, Any]:
    """
    Drop a collection from the database, along with all documents therein.

    Args:
        name_or_collection: either the name of a collection or
            a `Collection` instance.
        max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.

    Returns:
        a dictionary in the form {"ok": 1} if the command succeeds.

    Example:
        >>> my_db.list_collection_names()
        ['a_collection', 'my_v_col', 'another_col']
        >>> my_db.drop_collection("my_v_col")
        {'ok': 1}
        >>> my_db.list_collection_names()
        ['a_collection', 'another_col']

    Note:
        when providing a collection name, it is assumed that the collection
        is to be found in the keyspace that was set at database instance level.
    """

    # lazy importing here against circular-import error
    from astrapy.collection import Collection

    _keyspace: str | None
    _collection_name: str
    if isinstance(name_or_collection, Collection):
        _keyspace = name_or_collection.keyspace
        _collection_name = name_or_collection.name
    else:
        _keyspace = self.keyspace
        _collection_name = name_or_collection
    driver_commander = self._get_driver_commander(keyspace=_keyspace)
    dc_payload = {"deleteCollection": {"name": _collection_name}}
    logger.info(f"deleteCollection('{_collection_name}')")
    dc_response = driver_commander.request(
        payload=dc_payload,
        timeout_info=base_timeout_info(max_time_ms),
    )
    logger.info(f"finished deleteCollection('{_collection_name}')")
    return dc_response.get("status", {})  # type: ignore[no-any-return]
def get_collection(self, name: str, *, keyspace: str | None = None, namespace: str | None = None, embedding_api_key: str | EmbeddingHeadersProvider | None = None, collection_max_time_ms: int | None = None) ‑> Collection

Spawn a Collection object instance representing a collection on this database.

Creating a Collection instance does not have any effect on the actual state of the database: in other words, for the created Collection instance to be used meaningfully, the collection must exist already (for instance, it should have been created previously by calling the create_collection method).

Args

name
the name of the collection.
keyspace
the keyspace containing the collection. If no keyspace is specified, the general setting for this database is used.
namespace
an alias for keyspace. DEPRECATED, removal in 2.0.
embedding_api_key
optional API key(s) for interacting with the collection. If an embedding service is configured, and this parameter is not None, each Data API call will include the necessary embedding-related headers as specified by this parameter. If a string is passed, it translates into the one "embedding api key" header (i.e. EmbeddingAPIKeyHeaderProvider). For some vectorize providers/models, if using header-based authentication, specialized subclasses of EmbeddingHeadersProvider should be supplied.
collection_max_time_ms
a default timeout, in millisecond, for the duration of each operation on the collection. Individual timeouts can be provided to each collection method call and will take precedence, with this value being an overall default. Note that for some methods involving multiple API calls (such as find, delete_many, insert_many and so on), it is strongly suggested to provide a specific timeout as the default one likely wouldn't make much sense.

Returns

a Collection instance, representing the desired collection (but without any form of validation).

Example

>>> my_col = my_db.get_collection("my_collection")
>>> my_col.count_documents({}, upper_bound=100)
41

Note

The attribute and indexing syntax forms achieve the same effect as this method. In other words, the following are equivalent: my_db.get_collection("coll_name") my_db.coll_name my_db["coll_name"]

Expand source code
def get_collection(
    self,
    name: str,
    *,
    keyspace: str | None = None,
    namespace: str | None = None,
    embedding_api_key: str | EmbeddingHeadersProvider | None = None,
    collection_max_time_ms: int | None = None,
) -> Collection:
    """
    Spawn a `Collection` object instance representing a collection
    on this database.

    Creating a `Collection` instance does not have any effect on the
    actual state of the database: in other words, for the created
    `Collection` instance to be used meaningfully, the collection
    must exist already (for instance, it should have been created
    previously by calling the `create_collection` method).

    Args:
        name: the name of the collection.
        keyspace: the keyspace containing the collection. If no keyspace
            is specified, the general setting for this database is used.
        namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
        embedding_api_key: optional API key(s) for interacting with the collection.
            If an embedding service is configured, and this parameter is not None,
            each Data API call will include the necessary embedding-related headers
            as specified by this parameter. If a string is passed, it translates
            into the one "embedding api key" header
            (i.e. `astrapy.authentication.EmbeddingAPIKeyHeaderProvider`).
            For some vectorize providers/models, if using header-based
            authentication, specialized subclasses of
            `astrapy.authentication.EmbeddingHeadersProvider` should be supplied.
        collection_max_time_ms: a default timeout, in millisecond, for the duration
            of each operation on the collection. Individual timeouts can be
            provided to each collection method call and will take precedence, with
            this value being an overall default.
            Note that for some methods involving multiple API calls (such as `find`,
            `delete_many`, `insert_many` and so on), it is strongly suggested
            to provide a specific timeout as the default one likely wouldn't make
            much sense.

    Returns:
        a `Collection` instance, representing the desired collection
            (but without any form of validation).

    Example:
        >>> my_col = my_db.get_collection("my_collection")
        >>> my_col.count_documents({}, upper_bound=100)
        41

    Note:
        The attribute and indexing syntax forms achieve the same effect
        as this method. In other words, the following are equivalent:
            my_db.get_collection("coll_name")
            my_db.coll_name
            my_db["coll_name"]
    """

    keyspace_param = check_namespace_keyspace(
        keyspace=keyspace,
        namespace=namespace,
    )

    # lazy importing here against circular-import error
    from astrapy.collection import Collection

    _keyspace = keyspace_param or self.keyspace
    if _keyspace is None:
        raise ValueError(
            "No keyspace specified. This operation requires a keyspace to "
            "be set, e.g. through the `use_keyspace` method."
        )
    return Collection(
        self,
        name,
        keyspace=_keyspace,
        api_options=CollectionAPIOptions(
            embedding_api_key=coerce_embedding_headers_provider(embedding_api_key),
            max_time_ms=collection_max_time_ms,
        ),
        callers=self.callers,
    )
def get_database_admin(self, *, token: str | TokenProvider | None = None, dev_ops_url: str | None = None, dev_ops_api_version: str | None = None) ‑> DatabaseAdmin

Return a DatabaseAdmin object corresponding to this database, for use in admin tasks such as managing keyspaces.

This method, depending on the environment where the database resides, returns an appropriate subclass of DatabaseAdmin.

Args

token
an access token with enough permission on the database to perform the desired tasks. If omitted (as it can generally be done), the token of this Database is used. This can be either a literal token string or a subclass of TokenProvider.
dev_ops_url
in case of custom deployments, this can be used to specify the URL to the DevOps API, such as "https://api.astra.datastax.com". Generally it can be omitted. The environment (prod/dev/…) is determined from the API Endpoint. Note that this parameter is allowed only for Astra DB environments.
dev_ops_api_version
this can specify a custom version of the DevOps API (such as "v2"). Generally not needed. Note that this parameter is allowed only for Astra DB environments.

Returns

A DatabaseAdmin instance targeting this database. More precisely, for Astra DB an instance of AstraDBDatabaseAdmin is returned; for other environments, an instance of DataAPIDatabaseAdmin is returned.

Example

>>> my_db_admin = my_db.get_database_admin()
>>> if "new_keyspace" not in my_db_admin.list_keyspaces():
...     my_db_admin.create_keyspace("new_keyspace")
>>> my_db_admin.list_keyspaces()
['default_keyspace', 'new_keyspace']
Expand source code
def get_database_admin(
    self,
    *,
    token: str | TokenProvider | None = None,
    dev_ops_url: str | None = None,
    dev_ops_api_version: str | None = None,
) -> DatabaseAdmin:
    """
    Return a DatabaseAdmin object corresponding to this database, for
    use in admin tasks such as managing keyspaces.

    This method, depending on the environment where the database resides,
    returns an appropriate subclass of DatabaseAdmin.

    Args:
        token: an access token with enough permission on the database to
            perform the desired tasks. If omitted (as it can generally be done),
            the token of this Database is used.
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        dev_ops_url: in case of custom deployments, this can be used to specify
            the URL to the DevOps API, such as "https://api.astra.datastax.com".
            Generally it can be omitted. The environment (prod/dev/...) is
            determined from the API Endpoint.
            Note that this parameter is allowed only for Astra DB environments.
        dev_ops_api_version: this can specify a custom version of the DevOps API
            (such as "v2"). Generally not needed.
            Note that this parameter is allowed only for Astra DB environments.

    Returns:
        A DatabaseAdmin instance targeting this database. More precisely,
        for Astra DB an instance of `AstraDBDatabaseAdmin` is returned;
        for other environments, an instance of `DataAPIDatabaseAdmin` is returned.

    Example:
        >>> my_db_admin = my_db.get_database_admin()
        >>> if "new_keyspace" not in my_db_admin.list_keyspaces():
        ...     my_db_admin.create_keyspace("new_keyspace")
        >>> my_db_admin.list_keyspaces()
        ['default_keyspace', 'new_keyspace']
    """

    # lazy importing here to avoid circular dependency
    from astrapy.admin import AstraDBDatabaseAdmin, DataAPIDatabaseAdmin

    if self.environment in Environment.astra_db_values:
        return AstraDBDatabaseAdmin(
            api_endpoint=self.api_endpoint,
            token=coerce_token_provider(token) or self.token_provider,
            environment=self.environment,
            callers=self.callers,
            dev_ops_url=dev_ops_url,
            dev_ops_api_version=dev_ops_api_version,
            spawner_database=self,
        )
    else:
        if dev_ops_url is not None:
            raise ValueError(
                "Parameter `dev_ops_url` not supported outside of Astra DB."
            )
        if dev_ops_api_version is not None:
            raise ValueError(
                "Parameter `dev_ops_api_version` not supported outside of Astra DB."
            )
        return DataAPIDatabaseAdmin(
            api_endpoint=self.api_endpoint,
            token=coerce_token_provider(token) or self.token_provider,
            environment=self.environment,
            api_path=self.api_path,
            api_version=self.api_version,
            callers=self.callers,
            spawner_database=self,
        )
def info(self) ‑> DatabaseInfo

Additional information on the database as a DatabaseInfo instance.

Some of the returned properties are dynamic throughout the lifetime of the database (such as raw_info["keyspaces"]). For this reason, each invocation of this method triggers a new request to the DevOps API.

Example

>>> my_db.info().region
'eu-west-1'
>>> my_db.info().raw_info['datacenters'][0]['dateCreated']
'2023-01-30T12:34:56Z'

Note

see the DatabaseInfo documentation for a caveat about the difference between the region and the raw_info["region"] attributes.

Expand source code
def info(self) -> DatabaseInfo:
    """
    Additional information on the database as a DatabaseInfo instance.

    Some of the returned properties are dynamic throughout the lifetime
    of the database (such as raw_info["keyspaces"]). For this reason,
    each invocation of this method triggers a new request to the DevOps API.

    Example:
        >>> my_db.info().region
        'eu-west-1'

        >>> my_db.info().raw_info['datacenters'][0]['dateCreated']
        '2023-01-30T12:34:56Z'

    Note:
        see the DatabaseInfo documentation for a caveat about the difference
        between the `region` and the `raw_info["region"]` attributes.
    """

    logger.info("getting database info")
    database_info = fetch_database_info(
        self.api_endpoint,
        token=self.token_provider.get_token(),
        keyspace=self.keyspace,
    )
    if database_info is not None:
        logger.info("finished getting database info")
        return database_info
    else:
        raise DevOpsAPIException(
            "Database is not in a supported environment for this operation."
        )
def list_collection_names(self, *, keyspace: str | None = None, namespace: str | None = None, max_time_ms: int | None = None) ‑> list[str]

List the names of all collections in a given keyspace of this database.

Args

keyspace
the keyspace to be inspected. If not specified, the general setting for this database is assumed.
namespace
an alias for keyspace. DEPRECATED, removal in 2.0.
max_time_ms
a timeout, in milliseconds, for the underlying HTTP request.

Returns

a list of the collection names as strings, in no particular order.

Example

>>> my_db.list_collection_names()
['a_collection', 'another_col']
Expand source code
def list_collection_names(
    self,
    *,
    keyspace: str | None = None,
    namespace: str | None = None,
    max_time_ms: int | None = None,
) -> list[str]:
    """
    List the names of all collections in a given keyspace of this database.

    Args:
        keyspace: the keyspace to be inspected. If not specified,
            the general setting for this database is assumed.
        namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
        max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.

    Returns:
        a list of the collection names as strings, in no particular order.

    Example:
        >>> my_db.list_collection_names()
        ['a_collection', 'another_col']
    """

    keyspace_param = check_namespace_keyspace(
        keyspace=keyspace,
        namespace=namespace,
    )

    driver_commander = self._get_driver_commander(keyspace=keyspace_param)
    gc_payload: dict[str, Any] = {"findCollections": {}}
    logger.info("findCollections")
    gc_response = driver_commander.request(
        payload=gc_payload,
        timeout_info=base_timeout_info(max_time_ms),
    )
    if "collections" not in gc_response.get("status", {}):
        raise DataAPIFaultyResponseException(
            text="Faulty response from get_collections API command.",
            raw_response=gc_response,
        )
    else:
        # we know this is a list of dicts, to marshal into "descriptors"
        logger.info("finished findCollections")
        return gc_response["status"]["collections"]  # type: ignore[no-any-return]
def list_collections(self, *, keyspace: str | None = None, namespace: str | None = None, max_time_ms: int | None = None) ‑> CommandCursor[CollectionDescriptor]

List all collections in a given keyspace for this database.

Args

keyspace
the keyspace to be inspected. If not specified, the general setting for this database is assumed.
namespace
an alias for keyspace. DEPRECATED, removal in 2.0.
max_time_ms
a timeout, in milliseconds, for the underlying HTTP request.

Returns

a CommandCursor to iterate over CollectionDescriptor instances, each corresponding to a collection.

Example

>>> ccur = my_db.list_collections()
>>> ccur
<astrapy.cursors.CommandCursor object at ...>
>>> list(ccur)
[CollectionDescriptor(name='my_v_col', options=CollectionOptions())]
>>> for coll_dict in my_db.list_collections():
...     print(coll_dict)
...
CollectionDescriptor(name='my_v_col', options=CollectionOptions())
Expand source code
def list_collections(
    self,
    *,
    keyspace: str | None = None,
    namespace: str | None = None,
    max_time_ms: int | None = None,
) -> CommandCursor[CollectionDescriptor]:
    """
    List all collections in a given keyspace for this database.

    Args:
        keyspace: the keyspace to be inspected. If not specified,
            the general setting for this database is assumed.
        namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
        max_time_ms: a timeout, in milliseconds, for the underlying HTTP request.

    Returns:
        a `CommandCursor` to iterate over CollectionDescriptor instances,
        each corresponding to a collection.

    Example:
        >>> ccur = my_db.list_collections()
        >>> ccur
        <astrapy.cursors.CommandCursor object at ...>
        >>> list(ccur)
        [CollectionDescriptor(name='my_v_col', options=CollectionOptions())]
        >>> for coll_dict in my_db.list_collections():
        ...     print(coll_dict)
        ...
        CollectionDescriptor(name='my_v_col', options=CollectionOptions())
    """

    keyspace_param = check_namespace_keyspace(
        keyspace=keyspace,
        namespace=namespace,
    )

    driver_commander = self._get_driver_commander(keyspace=keyspace_param)
    gc_payload = {"findCollections": {"options": {"explain": True}}}
    logger.info("findCollections")
    gc_response = driver_commander.request(
        payload=gc_payload,
        timeout_info=base_timeout_info(max_time_ms),
    )
    if "collections" not in gc_response.get("status", {}):
        raise DataAPIFaultyResponseException(
            text="Faulty response from get_collections API command.",
            raw_response=gc_response,
        )
    else:
        # we know this is a list of dicts, to marshal into "descriptors"
        logger.info("finished findCollections")
        return CommandCursor(
            address=driver_commander.full_path,
            items=[
                CollectionDescriptor.from_dict(col_dict)
                for col_dict in gc_response["status"]["collections"]
            ],
        )
def name(self) ‑> str

The name of this database. Note that this bears no unicity guarantees.

Calling this method the first time involves a request to the DevOps API (the resulting database name is then cached). See the astrapy.info method for more details.

Example

>>> my_db.name()
'the_application_database'
Expand source code
def name(self) -> str:
    """
    The name of this database. Note that this bears no unicity guarantees.

    Calling this method the first time involves a request
    to the DevOps API (the resulting database name is then cached).
    See the `info()` method for more details.

    Example:
        >>> my_db.name()
        'the_application_database'
    """

    if self._name is None:
        self._name = self.info().name
    return self._name
def set_caller(self, caller_name: str | None = None, caller_version: str | None = None) ‑> None

Set a new identity for the application/framework on behalf of which the Data API calls are performed (the "caller").

Args

caller_name
name of the application, or framework, on behalf of which the Data API calls are performed. This ends up in the request user-agent.
caller_version
version of the caller.

Example

>>> my_db.set_caller(caller_name="the_caller", caller_version="0.1.0")

Deprecated since version: 1.5.1

This will be removed in 2.0.0. Please provide the caller(s) at constructor time through the callers list parameter.

Expand source code
@deprecation.deprecated(  # type: ignore[misc]
    deprecated_in="1.5.1",
    removed_in="2.0.0",
    current_version=__version__,
    details=SET_CALLER_DEPRECATION_NOTICE,
)
def set_caller(
    self,
    caller_name: str | None = None,
    caller_version: str | None = None,
) -> None:
    """
    Set a new identity for the application/framework on behalf of which
    the Data API calls are performed (the "caller").

    Args:
        caller_name: name of the application, or framework, on behalf of which
            the Data API calls are performed. This ends up in the request user-agent.
        caller_version: version of the caller.

    Example:
        >>> my_db.set_caller(caller_name="the_caller", caller_version="0.1.0")
    """

    logger.info(f"setting caller to {caller_name}/{caller_version}")
    callers_param = check_caller_parameters([], caller_name, caller_version)
    self.callers = callers_param
    self._api_commander = self._get_api_commander(keyspace=self.keyspace)
def to_async(self, *, api_endpoint: str | None = None, token: str | TokenProvider | None = None, keyspace: str | None = None, namespace: str | None = None, callers: Sequence[CallerType] = [], caller_name: str | None = None, caller_version: str | None = None, environment: str | None = None, api_path: str | None = None, api_version: str | None = None) ‑> AsyncDatabase

Create an AsyncDatabase from this one. Save for the arguments explicitly provided as overrides, everything else is kept identical to this database in the copy.

Args

api_endpoint
the full "API Endpoint" string used to reach the Data API. Example: "https://-.apps.astra.datastax.com"
token
an Access Token to the database. Example: "AstraCS:xyz…" This can be either a literal token string or a subclass of TokenProvider.
keyspace
this is the keyspace all method calls will target, unless one is explicitly specified in the call. If no keyspace is supplied when creating a Database, the name "default_keyspace" is set.
namespace
an alias for keyspace. DEPRECATED, removal in 2.0.
callers
a list of caller identities, i.e. applications, or frameworks, on behalf of which the Data API calls are performed. These end up in the request user-agent. Each caller identity is a ("caller_name", "caller_version") pair.
caller_name
DEPRECATED, use callers. Removal 2.0. Name of the application, or framework, on behalf of which the Data API calls are performed. This ends up in the request user-agent.
caller_version
version of the caller. DEPRECATED, use callers. Removal 2.0.
environment
a string representing the target Data API environment. Values are, for example, Environment.PROD, Environment.OTHER, or Environment.DSE.
api_path
path to append to the API Endpoint. In typical usage, this should be left to its default of "/api/json".
api_version
version specifier to append to the API path. In typical usage, this should be left to its default of "v1".

Returns

the new copy, an AsyncDatabase instance.

Example

>>> my_async_db = my_db.to_async()
>>> asyncio.run(my_async_db.list_collection_names())
Expand source code
def to_async(
    self,
    *,
    api_endpoint: str | None = None,
    token: str | TokenProvider | None = None,
    keyspace: str | None = None,
    namespace: str | None = None,
    callers: Sequence[CallerType] = [],
    caller_name: str | None = None,
    caller_version: str | None = None,
    environment: str | None = None,
    api_path: str | None = None,
    api_version: str | None = None,
) -> AsyncDatabase:
    """
    Create an AsyncDatabase from this one. Save for the arguments
    explicitly provided as overrides, everything else is kept identical
    to this database in the copy.

    Args:
        api_endpoint: the full "API Endpoint" string used to reach the Data API.
            Example: "https://<database_id>-<region>.apps.astra.datastax.com"
        token: an Access Token to the database. Example: "AstraCS:xyz..."
            This can be either a literal token string or a subclass of
            `astrapy.authentication.TokenProvider`.
        keyspace: this is the keyspace all method calls will target, unless
            one is explicitly specified in the call. If no keyspace is supplied
            when creating a Database, the name "default_keyspace" is set.
        namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
        callers: a list of caller identities, i.e. applications, or frameworks,
            on behalf of which the Data API calls are performed. These end up
            in the request user-agent.
            Each caller identity is a ("caller_name", "caller_version") pair.
        caller_name: *DEPRECATED*, use `callers`. Removal 2.0. Name of the
            application, or framework, on behalf of which the Data API calls
            are performed. This ends up in the request user-agent.
        caller_version: version of the caller. *DEPRECATED*, use `callers`.
            Removal 2.0.
        environment: a string representing the target Data API environment.
            Values are, for example, `Environment.PROD`, `Environment.OTHER`,
            or `Environment.DSE`.
        api_path: path to append to the API Endpoint. In typical usage, this
            should be left to its default of "/api/json".
        api_version: version specifier to append to the API path. In typical
            usage, this should be left to its default of "v1".

    Returns:
        the new copy, an `AsyncDatabase` instance.

    Example:
        >>> my_async_db = my_db.to_async()
        >>> asyncio.run(my_async_db.list_collection_names())
    """

    callers_param = check_caller_parameters(callers, caller_name, caller_version)
    keyspace_param = check_namespace_keyspace(
        keyspace=keyspace,
        namespace=namespace,
    )
    return AsyncDatabase(
        api_endpoint=api_endpoint or self.api_endpoint,
        token=coerce_token_provider(token) or self.token_provider,
        keyspace=keyspace_param or self.keyspace,
        callers=callers_param or self.callers,
        environment=environment or self.environment,
        api_path=api_path or self.api_path,
        api_version=api_version or self.api_version,
    )
def use_keyspace(self, keyspace: str) ‑> None

Switch to a new working keyspace for this database. This method changes (mutates) the Database instance.

Note that this method does not create the keyspace, which should exist already (created for instance with a DatabaseAdmin.create_keyspace call).

Args

keyspace
the new keyspace to use as the database working keyspace.

Returns

None.

Example

>>> my_db.list_collection_names()
['coll_1', 'coll_2']
>>> my_db.use_keyspace("an_empty_keyspace")
>>> my_db.list_collection_names()
[]
Expand source code
def use_keyspace(self, keyspace: str) -> None:
    """
    Switch to a new working keyspace for this database.
    This method changes (mutates) the Database instance.

    Note that this method does not create the keyspace, which should exist
    already (created for instance with a `DatabaseAdmin.create_keyspace` call).

    Args:
        keyspace: the new keyspace to use as the database working keyspace.

    Returns:
        None.

    Example:
        >>> my_db.list_collection_names()
        ['coll_1', 'coll_2']
        >>> my_db.use_keyspace("an_empty_keyspace")
        >>> my_db.list_collection_names()
        []
    """
    logger.info(f"switching to keyspace '{keyspace}'")
    self._using_keyspace = keyspace
    self._api_commander = self._get_api_commander(keyspace=self.keyspace)
def use_namespace(self, namespace: str) ‑> None

Switch to a new working namespace for this database. This method changes (mutates) the Database instance.

DEPRECATED (removal in 2.0). Switch to the "use_keyspace" method.**

Note that this method does not create the namespace, which should exist already (created for instance with a DatabaseAdmin.create_namespace call).

Args

namespace
the new namespace to use as the database working namespace.

Returns

None.

Example

>>> my_db.list_collection_names()
['coll_1', 'coll_2']
>>> my_db.use_namespace("an_empty_namespace")
>>> my_db.list_collection_names()
[]

Deprecated since version: 1.5.0

This will be removed in 2.0.0. The term 'namespace' is being replaced by 'keyspace' throughout the Data API and the clients. Please adapt method and parameter names consistently (examples: db_admin.findNamespaces => db_admin.findKeyspaces; collection.namespace => collection.keyspace; database.list_collections(namespace=...) => database.list_collections(keyspace=...)). See https://docs.datastax.com/en/astra-db-serverless/api-reference/client-versions.html#version-1-5 for more information.

Expand source code
@deprecation.deprecated(  # type: ignore[misc]
    deprecated_in="1.5.0",
    removed_in="2.0.0",
    current_version=__version__,
    details=NAMESPACE_DEPRECATION_NOTICE_METHOD,
)
def use_namespace(self, namespace: str) -> None:
    """
    Switch to a new working namespace for this database.
    This method changes (mutates) the Database instance.

    *DEPRECATED* (removal in 2.0). Switch to the "use_keyspace" method.**

    Note that this method does not create the namespace, which should exist
    already (created for instance with a `DatabaseAdmin.create_namespace` call).

    Args:
        namespace: the new namespace to use as the database working namespace.

    Returns:
        None.

    Example:
        >>> my_db.list_collection_names()
        ['coll_1', 'coll_2']
        >>> my_db.use_namespace("an_empty_namespace")
        >>> my_db.list_collection_names()
        []
    """
    return self.use_keyspace(keyspace=namespace)
def with_options(self, *, keyspace: str | None = None, namespace: str | None = None, callers: Sequence[CallerType] = [], caller_name: str | None = None, caller_version: str | None = None) ‑> Database

Create a clone of this database with some changed attributes.

Args

keyspace
this is the keyspace all method calls will target, unless one is explicitly specified in the call. If no keyspace is supplied when creating a Database, the name "default_keyspace" is set.
namespace
an alias for keyspace. DEPRECATED, removal in 2.0.
callers
a list of caller identities, i.e. applications, or frameworks, on behalf of which the Data API calls are performed. These end up in the request user-agent. Each caller identity is a ("caller_name", "caller_version") pair.
caller_name
DEPRECATED, use callers. Removal 2.0. Name of the application, or framework, on behalf of which the Data API calls are performed. This ends up in the request user-agent.
caller_version
version of the caller. DEPRECATED, use callers. Removal 2.0.

Returns

a new Database instance.

Example

>>> my_db_2 = my_db.with_options(
...     keyspace="the_other_keyspace",
...     callers=[("the_caller", "0.1.0")],
... )
Expand source code
def with_options(
    self,
    *,
    keyspace: str | None = None,
    namespace: str | None = None,
    callers: Sequence[CallerType] = [],
    caller_name: str | None = None,
    caller_version: str | None = None,
) -> Database:
    """
    Create a clone of this database with some changed attributes.

    Args:
        keyspace: this is the keyspace all method calls will target, unless
            one is explicitly specified in the call. If no keyspace is supplied
            when creating a Database, the name "default_keyspace" is set.
        namespace: an alias for `keyspace`. *DEPRECATED*, removal in 2.0.
        callers: a list of caller identities, i.e. applications, or frameworks,
            on behalf of which the Data API calls are performed. These end up
            in the request user-agent.
            Each caller identity is a ("caller_name", "caller_version") pair.
        caller_name: *DEPRECATED*, use `callers`. Removal 2.0. Name of the
            application, or framework, on behalf of which the Data API calls
            are performed. This ends up in the request user-agent.
        caller_version: version of the caller. *DEPRECATED*, use `callers`.
            Removal 2.0.

    Returns:
        a new `Database` instance.

    Example:
        >>> my_db_2 = my_db.with_options(
        ...     keyspace="the_other_keyspace",
        ...     callers=[("the_caller", "0.1.0")],
        ... )
    """

    callers_param = check_caller_parameters(callers, caller_name, caller_version)
    keyspace_param = check_namespace_keyspace(
        keyspace=keyspace,
        namespace=namespace,
    )
    return self._copy(
        keyspace=keyspace_param,
        callers=callers_param,
    )