Module `astrapy.cursors`

Expand source code

# Copyright DataStax, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations

from astrapy.data.cursors.cursor import (
    AbstractCursor,
    CursorState,
)
from astrapy.data.cursors.farr_cursor import (
    AsyncCollectionFindAndRerankCursor,
    CollectionFindAndRerankCursor,
)
from astrapy.data.cursors.find_cursor import (
    AsyncCollectionFindCursor,
    AsyncTableFindCursor,
    CollectionFindCursor,
    TableFindCursor,
)
from astrapy.data.cursors.pagination import FindAndRerankPage, FindPage
from astrapy.data.cursors.reranked_result import RerankedResult

__all__ = [
    "AbstractCursor",
    "AsyncCollectionFindAndRerankCursor",
    "AsyncCollectionFindCursor",
    "AsyncTableFindCursor",
    "CollectionFindAndRerankCursor",
    "CollectionFindCursor",
    "CursorState",
    "FindAndRerankPage",
    "FindPage",
    "RerankedResult",
    "TableFindCursor",
]

Classes

class AbstractCursor (*, initial_page_state: str | UnsetType)

A cursor obtained from the invocation of a find-type method over a table or a collection. This is the main interface to scroll through the results (resp. rows or documents).

This class is not meant to be directly instantiated by the user, rather it is a superclass capturing some basic mechanisms common to all find cursors.

Cursors provide a seamless interface to the caller code, allowing iteration over results while chunks of new data (pages) are exchanged periodically with the API. For this reason, cursors internally manage a local buffer that is progressively emptied and re-filled with a new page in a manner hidden from the user – except, some cursor methods allow to peek into this buffer should it be necessary.

Expand source code

class AbstractCursor(ABC, Generic[TRAW]):
    """
    A cursor obtained from the invocation of a find-type method over a table or
    a collection.
    This is the main interface to scroll through the results (resp. rows or documents).

    This class is not meant to be directly instantiated by the user, rather it
    is a superclass capturing some basic mechanisms common to all find cursors.

    Cursors provide a seamless interface to the caller code, allowing iteration
    over results while chunks of new data (pages) are exchanged periodically with
    the API. For this reason, cursors internally manage a local buffer that is
    progressively emptied and re-filled with a new page in a manner hidden from the
    user -- except, some cursor methods allow to peek into this buffer should it
    be necessary.
    """

    _state: CursorState
    _buffer: list[TRAW]
    _pages_retrieved: int
    _consumed: int
    _next_page_state: str | None
    _last_response_status: dict[str, Any] | None

    def __init__(
        self,
        *,
        initial_page_state: str | UnsetType,
    ) -> None:
        self.rewind(initial_page_state=initial_page_state)

    def _imprint_internal_state(self, other: AbstractCursor[TRAW]) -> None:
        """Mutably copy the internal state of this cursor onto another one."""
        other._state = self._state
        other._buffer = self._buffer
        other._pages_retrieved = self._pages_retrieved
        other._consumed = self._consumed
        other._next_page_state = self._next_page_state
        other._last_response_status = self._last_response_status

    def _ensure_alive(self) -> None:
        if self._state == CursorState.CLOSED:
            raise CursorException(
                text="Cursor is stopped.",
                cursor_state=self._state.value,
            )

    def _ensure_idle(self) -> None:
        if self._state != CursorState.IDLE:
            raise CursorException(
                text="Cursor is not idle anymore.",
                cursor_state=self._state.value,
            )

    @property
    def state(self) -> CursorState:
        """
        The current state of this cursor.

        Returns:
            a value in `astrapy.cursors.CursorState`.
        """

        return self._state

    @property
    def consumed(self) -> int:
        """
        The number of items the cursors has yielded, i.e. how many items
        have been already read by the code consuming the cursor.

        Returns:
            consumed: a non-negative integer, the count of items yielded so far.
        """

        return self._consumed

    @property
    def cursor_id(self) -> int:
        """
        An integer uniquely identifying this cursor.

        Returns:
            cursor_id: an integer number uniquely identifying the cursor.
        """

        return id(self)

    @property
    def buffered_count(self) -> int:
        """
        The number of items (documents, rows) currently stored in the client-side
        buffer of this cursor. Reading this property never triggers new API calls
        to re-fill the buffer.

        Returns:
            buffered_count: a non-negative integer, the amount of items currently
                stored in the local buffer.
        """

        return len(self._buffer)

    def close(self) -> None:
        """
        Close the cursor, regardless of its state. A cursor can be closed at any
        time, possibly discarding the portion of results that has not yet been
        consumed, if any.

        This is an in-place modification of the cursor.
        """

        self._state = CursorState.CLOSED
        self._buffer = []

    def rewind(
        self,
        *,
        initial_page_state: str | UnsetType = _UNSET,
    ) -> None:
        """
        Rewind the cursor, bringing it back to its pristine state of no items
        retrieved/consumed yet, regardless of its current state.
        All cursor settings (filter, mapping, projection, etc) are retained.

        A cursor can be rewound at any time. Keep in mind that, subject to changes
        occurred on the table or collection the results may be different if a cursor
        is browsed a second time after rewinding it.

        Args:
            initial_page_state: if a value is provided, it must be the `next_page_state`
                from the response of `fetch_next_page()` called on a cursor with the
                same settings. In that case, the repositioning of this cursor is such
                that the next data-fetch will specify the page state to the Data API.

        This is an in-place modification of the cursor.
        """
        self._state = CursorState.IDLE
        self._buffer = []
        self._pages_retrieved = 0
        self._consumed = 0
        if initial_page_state is None:
            msg = "Passing an explicit null for initial_page_state is not allowed."
            raise ValueError(msg)
        elif isinstance(initial_page_state, UnsetType):
            self._next_page_state = None
        else:
            self._next_page_state = initial_page_state
        self._last_response_status = None

    def consume_buffer(self, n: int | None = None) -> list[TRAW]:
        """
        Consume (return) up to the requested number of buffered items (rows/documents).
        The returned items are marked as consumed, meaning that subsequently consuming
        the cursor will start after those items.

        This method is an in-place modification of the cursor and only concerns
        the local buffer: it never triggers fetching of new pages from the Data API.

        This method can be called regardless of the cursor state without exceptions
        being raised.

        Args:
            n: amount of items to return. If omitted, the whole buffer is returned.

        Returns:
            list: a list of items (rows/document dictionaries). If there are fewer
                items than requested, the whole buffer is returned without errors:
                in particular, if it is empty (such as when the cursor is closed),
                an empty list is returned.
        """
        _n = n if n is not None else len(self._buffer)
        if _n < 0:
            raise ValueError("A negative amount of items was requested.")
        returned, remaining = self._buffer[:_n], self._buffer[_n:]
        self._buffer = remaining
        self._consumed += len(returned)
        return returned

Ancestors

abc.ABC
typing.Generic

Subclasses

Instance variables

var buffered_count : int

The number of items (documents, rows) currently stored in the client-side buffer of this cursor. Reading this property never triggers new API calls to re-fill the buffer.

Returns

buffered_count: a non-negative integer, the amount of items currently stored in the local buffer.

Expand source code

@property
def buffered_count(self) -> int:
    """
    The number of items (documents, rows) currently stored in the client-side
    buffer of this cursor. Reading this property never triggers new API calls
    to re-fill the buffer.

    Returns:
        buffered_count: a non-negative integer, the amount of items currently
            stored in the local buffer.
    """

    return len(self._buffer)

var consumed : int

The number of items the cursors has yielded, i.e. how many items have been already read by the code consuming the cursor.

Returns

consumed: a non-negative integer, the count of items yielded so far.

Expand source code

@property
def consumed(self) -> int:
    """
    The number of items the cursors has yielded, i.e. how many items
    have been already read by the code consuming the cursor.

    Returns:
        consumed: a non-negative integer, the count of items yielded so far.
    """

    return self._consumed

var cursor_id : int

An integer uniquely identifying this cursor.

Returns

cursor_id: an integer number uniquely identifying the cursor.

Expand source code

@property
def cursor_id(self) -> int:
    """
    An integer uniquely identifying this cursor.

    Returns:
        cursor_id: an integer number uniquely identifying the cursor.
    """

    return id(self)

var state : CursorState

The current state of this cursor.

Returns

a value in CursorState.

Expand source code

@property
def state(self) -> CursorState:
    """
    The current state of this cursor.

    Returns:
        a value in `astrapy.cursors.CursorState`.
    """

    return self._state

Methods

def close(self) ‑> None

Close the cursor, regardless of its state. A cursor can be closed at any time, possibly discarding the portion of results that has not yet been consumed, if any.

This is an in-place modification of the cursor.

Expand source code

def close(self) -> None:
    """
    Close the cursor, regardless of its state. A cursor can be closed at any
    time, possibly discarding the portion of results that has not yet been
    consumed, if any.

    This is an in-place modification of the cursor.
    """

    self._state = CursorState.CLOSED
    self._buffer = []

def consume_buffer(self, n: int | None = None) ‑> list[~TRAW]

Consume (return) up to the requested number of buffered items (rows/documents). The returned items are marked as consumed, meaning that subsequently consuming the cursor will start after those items.

This method is an in-place modification of the cursor and only concerns the local buffer: it never triggers fetching of new pages from the Data API.

This method can be called regardless of the cursor state without exceptions being raised.

Args

n: amount of items to return. If omitted, the whole buffer is returned.

Returns

list: a list of items (rows/document dictionaries). If there are fewer items than requested, the whole buffer is returned without errors: in particular, if it is empty (such as when the cursor is closed), an empty list is returned.

Expand source code

def consume_buffer(self, n: int | None = None) -> list[TRAW]:
    """
    Consume (return) up to the requested number of buffered items (rows/documents).
    The returned items are marked as consumed, meaning that subsequently consuming
    the cursor will start after those items.

    This method is an in-place modification of the cursor and only concerns
    the local buffer: it never triggers fetching of new pages from the Data API.

    This method can be called regardless of the cursor state without exceptions
    being raised.

    Args:
        n: amount of items to return. If omitted, the whole buffer is returned.

    Returns:
        list: a list of items (rows/document dictionaries). If there are fewer
            items than requested, the whole buffer is returned without errors:
            in particular, if it is empty (such as when the cursor is closed),
            an empty list is returned.
    """
    _n = n if n is not None else len(self._buffer)
    if _n < 0:
        raise ValueError("A negative amount of items was requested.")
    returned, remaining = self._buffer[:_n], self._buffer[_n:]
    self._buffer = remaining
    self._consumed += len(returned)
    return returned

def rewind(self, *, initial_page_state: str | UnsetType = (unset)) ‑> None

Rewind the cursor, bringing it back to its pristine state of no items retrieved/consumed yet, regardless of its current state. All cursor settings (filter, mapping, projection, etc) are retained.

A cursor can be rewound at any time. Keep in mind that, subject to changes occurred on the table or collection the results may be different if a cursor is browsed a second time after rewinding it.

Args

initial_page_state: if a value is provided, it must be the next_page_state from the response of fetch_next_page() called on a cursor with the same settings. In that case, the repositioning of this cursor is such that the next data-fetch will specify the page state to the Data API.

This is an in-place modification of the cursor.

Expand source code

def rewind(
    self,
    *,
    initial_page_state: str | UnsetType = _UNSET,
) -> None:
    """
    Rewind the cursor, bringing it back to its pristine state of no items
    retrieved/consumed yet, regardless of its current state.
    All cursor settings (filter, mapping, projection, etc) are retained.

    A cursor can be rewound at any time. Keep in mind that, subject to changes
    occurred on the table or collection the results may be different if a cursor
    is browsed a second time after rewinding it.

    Args:
        initial_page_state: if a value is provided, it must be the `next_page_state`
            from the response of `fetch_next_page()` called on a cursor with the
            same settings. In that case, the repositioning of this cursor is such
            that the next data-fetch will specify the page state to the Data API.

    This is an in-place modification of the cursor.
    """
    self._state = CursorState.IDLE
    self._buffer = []
    self._pages_retrieved = 0
    self._consumed = 0
    if initial_page_state is None:
        msg = "Passing an explicit null for initial_page_state is not allowed."
        raise ValueError(msg)
    elif isinstance(initial_page_state, UnsetType):
        self._next_page_state = None
    else:
        self._next_page_state = initial_page_state
    self._last_response_status = None

An asynchronous cursor over documents, as returned by a find_and_rerank invocation on an AsyncCollection. A cursor can be iterated over, materialized into a list, and queried/manipulated in various ways.

Some cursor operations mutate it in-place (such as consuming its documents), other return a new cursor without changing the original one. See the documentation for the various methods and the AsyncCollection find_and_rerank method for more details and usage patterns.

This cursor has two type parameters: TRAW and T. The first is the type of the "raw" documents as they are found on the collection, the second is the type of the items after the optional mapping function (see the .map() method). If no mapping is specified, T = RerankedResult[TRAW]: the items yielded by the cursor are a RerankedResult wrapping the type (possibly after projection) of the documents found on the collection: in other words, such a cursor returns the documents, as they come back from the API, with their associated scores from the find-and-rerank operation. In general, consuming a cursor returns items of type T, except for the consume_buffer primitive that draws directly from the buffer and always returns items of type RerankedResult[TRAW].

This class is the async counterpart of the CollectionFindAndRerankCursor, for use with asyncio. Other than the async interface, its behavior is identical: please refer to the documentation for CollectionFindAndRerankCursor for examples and details.

Expand source code

class AsyncCollectionFindAndRerankCursor(
    Generic[TRAW, T], AbstractCursor[RerankedResult[TRAW]]
):
    """
    An asynchronous cursor over documents, as returned by a `find_and_rerank` invocation
    on an AsyncCollection. A cursor can be iterated over, materialized into a list,
    and queried/manipulated in various ways.

    Some cursor operations mutate it in-place (such as consuming its documents),
    other return a new cursor without changing the original one. See the documentation
    for the various methods and the AsyncCollection `find_and_rerank` method for more
    details and usage patterns.

    This cursor has two type parameters: TRAW and T. The first is the type
    of the "raw" documents as they are found on the collection, the second
    is the type of the items after the optional mapping function (see the `.map()`
    method).
    If no mapping is specified, `T = RerankedResult[TRAW]`: the items yielded by
    the cursor are a `RerankedResult` wrapping the type (possibly after projection)
    of the documents found on the collection: in other words, such a cursor returns
    the documents, as they come back from the API, with their associated scores
    from the find-and-rerank operation.
    In general, consuming a cursor returns items of type T, except for the
    `consume_buffer` primitive that draws directly from the buffer and always
    returns items of type RerankedResult[TRAW].

    This class is the async counterpart of the CollectionFindAndRerankCursor, for use
    with asyncio. Other than the async interface, its behavior is identical: please
    refer to the documentation for `CollectionFindAndRerankCursor` for examples
    and details.
    """

    _query_engine: _CollectionFindAndRerankQueryEngine[TRAW]
    _request_timeout_ms: int | None
    _overall_timeout_ms: int | None
    _request_timeout_label: str | None
    _overall_timeout_label: str | None
    _timeout_manager: MultiCallTimeoutManager
    _filter: FilterType | None
    _projection: ProjectionType | None
    _sort: HybridSortType | None
    _limit: int | None
    _hybrid_limits: int | dict[str, int] | None
    _initial_page_state: str | UnsetType
    _include_scores: bool | None
    _include_sort_vector: bool | None
    _rerank_on: str | None
    _rerank_query: str | None
    _mapper: Callable[[RerankedResult[TRAW]], T] | None

    def __init__(
        self,
        *,
        collection: AsyncCollection[TRAW],
        request_timeout_ms: int | None,
        overall_timeout_ms: int | None,
        request_timeout_label: str | None = None,
        overall_timeout_label: str | None = None,
        filter: FilterType | None = None,
        projection: ProjectionType | None = None,
        sort: HybridSortType | None = None,
        limit: int | None = None,
        hybrid_limits: int | dict[str, int] | None = None,
        initial_page_state: str | UnsetType = _UNSET,
        include_scores: bool | None = None,
        include_sort_vector: bool | None = None,
        rerank_on: str | None = None,
        rerank_query: str | None = None,
        mapper: Callable[[RerankedResult[TRAW]], T] | None = None,
    ) -> None:
        self._filter = deepcopy(filter)
        self._projection = projection
        self._sort = deepcopy(sort)
        self._limit = limit
        self._hybrid_limits = deepcopy(hybrid_limits)
        self._initial_page_state = initial_page_state
        self._include_scores = include_scores
        self._include_sort_vector = include_sort_vector
        self._rerank_on = rerank_on
        self._rerank_query = rerank_query
        self._mapper = mapper
        self._request_timeout_ms = request_timeout_ms
        self._overall_timeout_ms = overall_timeout_ms
        self._request_timeout_label = request_timeout_label
        self._overall_timeout_label = overall_timeout_label
        self._query_engine = _CollectionFindAndRerankQueryEngine(
            collection=None,
            async_collection=collection,
            filter=self._filter,
            projection=self._projection,
            sort=self._sort,
            limit=self._limit,
            hybrid_limits=self._hybrid_limits,
            include_scores=self._include_scores,
            include_sort_vector=self._include_sort_vector,
            rerank_on=self._rerank_on,
            rerank_query=self._rerank_query,
        )
        AbstractCursor.__init__(self, initial_page_state=initial_page_state)
        self._timeout_manager = MultiCallTimeoutManager(
            overall_timeout_ms=self._overall_timeout_ms,
            timeout_label=self._overall_timeout_label,
        )

    def _copy(
        self: AsyncCollectionFindAndRerankCursor[TRAW, T],
        *,
        request_timeout_ms: int | None | UnsetType = _UNSET,
        overall_timeout_ms: int | None | UnsetType = _UNSET,
        request_timeout_label: str | None | UnsetType = _UNSET,
        overall_timeout_label: str | None | UnsetType = _UNSET,
        filter: FilterType | None | UnsetType = _UNSET,
        projection: ProjectionType | None | UnsetType = _UNSET,
        sort: dict[str, Any] | None | UnsetType = _UNSET,
        limit: int | None | UnsetType = _UNSET,
        hybrid_limits: int | dict[str, int] | None | UnsetType = _UNSET,
        initial_page_state: str | None | UnsetType = _UNSET,
        include_scores: bool | None | UnsetType = _UNSET,
        include_sort_vector: bool | None | UnsetType = _UNSET,
        rerank_on: str | None | UnsetType = _UNSET,
        rerank_query: str | None | UnsetType = _UNSET,
    ) -> AsyncCollectionFindAndRerankCursor[TRAW, T]:
        if self._query_engine.async_collection is None:
            raise RuntimeError("Query engine has no async collection.")
        return AsyncCollectionFindAndRerankCursor(
            collection=self._query_engine.async_collection,
            request_timeout_ms=self._request_timeout_ms
            if isinstance(request_timeout_ms, UnsetType)
            else request_timeout_ms,
            overall_timeout_ms=self._overall_timeout_ms
            if isinstance(overall_timeout_ms, UnsetType)
            else overall_timeout_ms,
            request_timeout_label=self._request_timeout_label
            if isinstance(request_timeout_label, UnsetType)
            else request_timeout_label,
            overall_timeout_label=self._overall_timeout_label
            if isinstance(overall_timeout_label, UnsetType)
            else overall_timeout_label,
            filter=self._filter if isinstance(filter, UnsetType) else filter,
            projection=self._projection
            if isinstance(projection, UnsetType)
            else projection,
            sort=self._sort if isinstance(sort, UnsetType) else sort,
            limit=self._limit if isinstance(limit, UnsetType) else limit,
            hybrid_limits=self._hybrid_limits
            if isinstance(hybrid_limits, UnsetType)
            else hybrid_limits,
            # special treatment: passing None erases (hence we must supply unset and not None):
            initial_page_state=self._initial_page_state
            if isinstance(initial_page_state, UnsetType)
            else (initial_page_state if initial_page_state is not None else _UNSET),
            include_scores=self._include_scores
            if isinstance(include_scores, UnsetType)
            else include_scores,
            include_sort_vector=self._include_sort_vector
            if isinstance(include_sort_vector, UnsetType)
            else include_sort_vector,
            rerank_on=self._rerank_on
            if isinstance(rerank_on, UnsetType)
            else rerank_on,
            rerank_query=self._rerank_query
            if isinstance(rerank_query, UnsetType)
            else rerank_query,
            mapper=self._mapper,
        )

    async def _try_ensure_fill_buffer(self) -> None:
        """
        If buffer is empty, try to fill with next page, if applicable.
        If not possible, silently do nothing.
        This method never changes the cursor state.
        """

        if self._state == CursorState.CLOSED:
            return
        if not self._buffer:
            if self._next_page_state is not None or self._state == CursorState.IDLE:
                (
                    new_buffer,
                    next_page_state,
                    resp_status,
                ) = await self._query_engine._async_fetch_page(
                    page_state=self._next_page_state,
                    timeout_context=self._timeout_manager.remaining_timeout(
                        cap_time_ms=self._request_timeout_ms,
                        cap_timeout_label=self._request_timeout_label,
                    ),
                )
                self._next_page_state = next_page_state
                self._last_response_status = resp_status
                self._pages_retrieved += 1
                self._buffer = new_buffer

    def __repr__(self) -> str:
        return (
            f'{self.__class__.__name__}("{self.data_source.name}", '
            f"{self._state.value}, "
            f"consumed so far: {self.consumed})"
        )

    def __aiter__(
        self: AsyncCollectionFindAndRerankCursor[TRAW, T],
    ) -> AsyncCollectionFindAndRerankCursor[TRAW, T]:
        self._ensure_alive()
        return self

    async def __anext__(self) -> T:
        if self.state == CursorState.CLOSED:
            raise StopAsyncIteration
        await self._try_ensure_fill_buffer()
        if not self._buffer:
            self._state = CursorState.CLOSED
            raise StopAsyncIteration
        self._state = CursorState.STARTED
        # consume one item from buffer
        traw0, rest_buffer = self._buffer[0], self._buffer[1:]
        self._buffer = rest_buffer
        self._consumed += 1
        return cast(T, self._mapper(traw0) if self._mapper is not None else traw0)

    @property
    def data_source(self) -> AsyncCollection[TRAW]:
        """
        The AsyncCollection object that originated this cursor through a
        `find_and_rerank` operation.

        Returns:
            an AsyncCollection instance.
        """

        if self._query_engine.async_collection is None:
            raise RuntimeError("Query engine has no async collection.")
        return self._query_engine.async_collection

    def clone(self) -> AsyncCollectionFindAndRerankCursor[TRAW, T]:
        """
        Create a copy of this cursor with:
        - the same parameters (timeouts, filter, projection, etc)
        - and the cursor is rewound to its pristine IDLE state.

        For usage examples, please refer to the same method of the
        equivalent synchronous CollectionFindCursor class, and apply the necessary
        adaptations to the async interface.

        Returns:
            a new AsyncCollectionFindAndRerankCursor, similar to this one but
            rewound to its initial state.
        """

        if self._query_engine.async_collection is None:
            raise RuntimeError("Query engine has no async collection.")
        return AsyncCollectionFindAndRerankCursor(
            collection=self._query_engine.async_collection,
            request_timeout_ms=self._request_timeout_ms,
            overall_timeout_ms=self._overall_timeout_ms,
            request_timeout_label=self._request_timeout_label,
            overall_timeout_label=self._overall_timeout_label,
            filter=self._filter,
            projection=self._projection,
            sort=self._sort,
            limit=self._limit,
            hybrid_limits=self._hybrid_limits,
            initial_page_state=self._initial_page_state,
            include_scores=self._include_scores,
            include_sort_vector=self._include_sort_vector,
            rerank_on=self._rerank_on,
            rerank_query=self._rerank_query,
            mapper=self._mapper,
        )

    def filter(
        self, filter: FilterType | None
    ) -> AsyncCollectionFindAndRerankCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new filter setting.
        This operation is allowed only if the cursor state is still IDLE.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the AsyncCollection `find_and_rerank` method.

        Args:
            filter: a new filter setting to apply to the returned new cursor.

        Returns:
            a new AsyncCollectionFindAndRerankCursor with the same settings as this one,
                except for `filter` which is the provided value.
        """

        self._ensure_idle()
        return self._copy(filter=filter)

    def project(
        self, projection: ProjectionType | None
    ) -> AsyncCollectionFindAndRerankCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new projection setting.
        This operation is allowed only if the cursor state is still IDLE and if
        no mapping has been set on it.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the AsyncCollection `find_and_rerank` method.

        Args:
            projection: a new projection setting to apply to the returned new cursor.

        Returns:
            a new AsyncCollectionFindAndRerankCursor with the same settings as this one,
                except for `projection` which is the provided value.
        """

        self._ensure_idle()
        if self._mapper is not None:
            raise CursorException(
                "Cannot set projection after map.",
                cursor_state=self._state.value,
            )
        return self._copy(projection=projection)

    def sort(
        self, sort: HybridSortType | None
    ) -> AsyncCollectionFindAndRerankCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new sort setting.
        This operation is allowed only if the cursor state is still IDLE.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the AsyncCollection `find_and_rerank` method.

        Args:
            sort: a new sort setting to apply to the returned new cursor.

        Returns:
            a new AsyncCollectionFindAndRerankCursor with the same settings as this one,
                except for `sort` which is the provided value.
        """

        self._ensure_idle()
        return self._copy(sort=sort)

    def limit(self, limit: int | None) -> AsyncCollectionFindAndRerankCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new limit setting.
        This operation is allowed only if the cursor state is still IDLE.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the AsyncCollection `find_and_rerank` method.

        Args:
            limit: a new limit setting to apply to the returned new cursor.

        Returns:
            a new AsyncCollectionFindAndRerankCursor with the same settings as this one,
                except for `limit` which is the provided value.
        """

        self._ensure_idle()
        return self._copy(limit=limit)

    def hybrid_limits(
        self, hybrid_limits: int | dict[str, int] | None
    ) -> AsyncCollectionFindAndRerankCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new hybrid_limits setting.
        This operation is allowed only if the cursor state is still IDLE.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the AsyncCollection `find_and_rerank` method.

        Args:
            hybrid_limits: a new setting to apply to the returned new cursor.

        Returns:
            a new AsyncCollectionFindAndRerankCursor with the same settings as this one,
                except for `hybrid_limits` which is the provided value.
        """

        self._ensure_idle()
        return self._copy(hybrid_limits=hybrid_limits)

    def initial_page_state(
        self, initial_page_state: str | UnsetType
    ) -> AsyncCollectionFindAndRerankCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new initial_page_state setting.
        This operation is allowed only if the cursor state is still IDLE.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the Collection `find` method.

        Args:
            initial_page_state: a new initial_page_state setting to apply to the
                returned new cursor. Passing an explicit None raises an error.

        Returns:
            a new AsyncCollectionFindAndRerankCursor with the same settings as this one,
                except for `initial_page_state` which is the provided value.
        """

        self._ensure_idle()
        return self._copy(initial_page_state=initial_page_state)

    def include_scores(
        self, include_scores: bool | None
    ) -> AsyncCollectionFindAndRerankCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new include_scores setting.
        This operation is allowed only if the cursor state is still IDLE.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the AsyncCollection `find_and_rerank` method.

        Args:
            include_scores: a new include_scores setting to apply
                to the returned new cursor.

        Returns:
            a new AsyncCollectionFindAndRerankCursor with the same settings as this one,
                except for `include_scores` which is the provided value.
        """

        self._ensure_idle()
        return self._copy(include_scores=include_scores)

    def include_sort_vector(
        self, include_sort_vector: bool | None
    ) -> AsyncCollectionFindAndRerankCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new include_sort_vector setting.
        This operation is allowed only if the cursor state is still IDLE.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the AsyncCollection `find_and_rerank` method.

        Args:
            include_sort_vector: a new include_sort_vector setting to apply
                to the returned new cursor.

        Returns:
            a new AsyncCollectionFindAndRerankCursor with the same settings as this one,
                except for `include_sort_vector` which is the provided value.
        """

        self._ensure_idle()
        return self._copy(include_sort_vector=include_sort_vector)

    def rerank_on(
        self, rerank_on: str | None
    ) -> AsyncCollectionFindAndRerankCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new rerank_on setting.
        This operation is allowed only if the cursor state is still IDLE.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the AsyncCollection `find_and_rerank` method.

        Args:
            rerank_on: a new setting to apply to the returned new cursor.

        Returns:
            a new AsyncCollectionFindAndRerankCursor with the same settings as this one,
                except for `rerank_on` which is the provided value.
        """

        self._ensure_idle()
        return self._copy(rerank_on=rerank_on)

    def rerank_query(
        self, rerank_query: str | None
    ) -> AsyncCollectionFindAndRerankCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new rerank_query setting.
        This operation is allowed only if the cursor state is still IDLE.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the AsyncCollection `find_and_rerank` method.

        Args:
            rerank_query: a new setting to apply to the returned new cursor.

        Returns:
            a new AsyncCollectionFindAndRerankCursor with the same settings as this one,
                except for `rerank_query` which is the provided value.
        """

        self._ensure_idle()
        return self._copy(rerank_query=rerank_query)

    def map(
        self, mapper: Callable[[T], TNEW]
    ) -> AsyncCollectionFindAndRerankCursor[TRAW, TNEW]:
        """
        Return a copy of this cursor with a mapping function to transform
        the returned items. Calling this method on a cursor with a mapping
        already set results in the mapping functions being composed.

        This operation is allowed only if the cursor state is still IDLE.

        For usage examples, please refer to the same method of the
        equivalent synchronous CollectionFindCursor class, and apply the necessary
        adaptations to the async interface.

        Args:
            mapper: a function transforming the objects returned by the cursor
                into something else (i.e. a function T => TNEW).
                If the map is imposed on a cursor without mapping yet, its input
                argument must be a `RerankedResult[TRAW]`, where TRAW
                stands for the type of the documents from the collection.

        Returns:
            a new AsyncCollectionFindAndRerankCursor with a new mapping function on the
                results, possibly composed with any pre-existing mapping function.
        """
        self._ensure_idle()
        if self._query_engine.async_collection is None:
            raise RuntimeError("Query engine has no async collection.")
        composite_mapper: Callable[[RerankedResult[TRAW]], TNEW]
        if self._mapper is not None:

            def _composite(document: RerankedResult[TRAW]) -> TNEW:
                return mapper(self._mapper(document))  # type: ignore[misc]

            composite_mapper = _composite
        else:
            composite_mapper = cast(Callable[[RerankedResult[TRAW]], TNEW], mapper)
        return AsyncCollectionFindAndRerankCursor(
            collection=self._query_engine.async_collection,
            request_timeout_ms=self._request_timeout_ms,
            overall_timeout_ms=self._overall_timeout_ms,
            request_timeout_label=self._request_timeout_label,
            overall_timeout_label=self._overall_timeout_label,
            filter=self._filter,
            projection=self._projection,
            sort=self._sort,
            limit=self._limit,
            hybrid_limits=self._hybrid_limits,
            initial_page_state=self._initial_page_state,
            include_scores=self._include_scores,
            include_sort_vector=self._include_sort_vector,
            rerank_on=self._rerank_on,
            rerank_query=self._rerank_query,
            mapper=composite_mapper,
        )

    async def for_each(
        self,
        function: Callable[[T], bool | None] | Callable[[T], Awaitable[bool | None]],
        *,
        general_method_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Consume the remaining documents in the cursor, invoking a provided callback
        function -- or coroutine -- on each of them.

        Calling this method on a CLOSED cursor results in an error.

        The callback function can return any value. The return value is generally
        discarded, with the following exception: if the function returns the boolean
        `False`, it is taken to signify that the method should quit early, leaving the
        cursor half-consumed (ACTIVE state). If this does not occur, this method
        results in the cursor entering CLOSED state once it is exhausted.

        For usage examples, please refer to the same method of the
        equivalent synchronous CollectionFindCursor class, and apply the necessary
        adaptations to the async interface.

        Args:
            function: a callback function, or a coroutine, whose only parameter is of
                the type returned by the cursor.
                This callback is invoked once per each document yielded
                by the cursor. If the callback returns a `False`, the `for_each`
                invocation stops early and returns without consuming further documents.
            general_method_timeout_ms: a timeout, in milliseconds, for the whole
                duration of this method. If not provided, there is no such timeout.
                Note that the per-request timeout set on the cursor still applies.
            timeout_ms: an alias for `general_method_timeout_ms`.
        """

        self._ensure_alive()
        copy_req_ms, copy_ovr_ms = _revise_timeouts_for_cursor_copy(
            new_general_method_timeout_ms=general_method_timeout_ms,
            new_timeout_ms=timeout_ms,
            old_request_timeout_ms=self._request_timeout_ms,
        )
        _cursor = self._copy(
            request_timeout_ms=copy_req_ms,
            overall_timeout_ms=copy_ovr_ms,
        )
        self._imprint_internal_state(_cursor)
        is_coro = iscoroutinefunction(function)
        async for document in _cursor:
            if is_coro:
                res = await function(document)  # type: ignore[misc]
            else:
                res = function(document)
            if res is False:
                break
        _cursor._imprint_internal_state(self)

    async def to_list(
        self,
        *,
        general_method_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> list[T]:
        """
        Materialize all documents that remain to be consumed from a cursor into a list.

        Calling this method on a CLOSED cursor results in an error.

        If the cursor is IDLE, the result will be the whole set of documents returned
        by the `find_and_rerank` operation; otherwise, the documents already consumed
        by the cursor will not be in the resulting list.

        Calling this method is not recommended if a huge list of results is anticipated:
        it would involve a large number of data exchanges with the Data API and possibly
        a massive memory usage to construct the list. In such cases, a lazy pattern
        of iterating and consuming the documents is to be preferred.

        For usage examples, please refer to the same method of the
        equivalent synchronous CollectionFindCursor class, and apply the necessary
        adaptations to the async interface.

        Args:
            general_method_timeout_ms: a timeout, in milliseconds, for the whole
                duration of this method. If not provided, there is no such timeout.
                Note that the per-request timeout set on the cursor still applies.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a list of documents (or other values depending on the mapping
                function, if one is set). These are all items that were left
                to be consumed on the cursor when `to_list` is called.
        """

        self._ensure_alive()
        copy_req_ms, copy_ovr_ms = _revise_timeouts_for_cursor_copy(
            new_general_method_timeout_ms=general_method_timeout_ms,
            new_timeout_ms=timeout_ms,
            old_request_timeout_ms=self._request_timeout_ms,
        )
        _cursor = self._copy(
            request_timeout_ms=copy_req_ms,
            overall_timeout_ms=copy_ovr_ms,
        )
        self._imprint_internal_state(_cursor)
        documents = [document async for document in _cursor]
        _cursor._imprint_internal_state(self)
        return documents

    async def has_next(self) -> bool:
        """
        Whether the cursor actually has more documents to return.

        `has_next` can be called on any cursor, but on a CLOSED cursor
        will always return False.

        This method can trigger the fetch operation of a new page, if the current
        buffer is empty.

        Calling `has_next` on an IDLE cursor triggers the first page fetch, but the
        cursor stays in the IDLE state until actual consumption starts.

        Returns:
            a boolean value of True if there is at least one further item
                available to consume; False otherwise (including the case of CLOSED
                cursor).
        """

        if self._state == CursorState.CLOSED:
            return False
        await self._try_ensure_fill_buffer()
        return len(self._buffer) > 0

    async def get_sort_vector(self) -> list[float] | DataAPIVector | None:
        """
        Return the query vector used in the vector (ANN) search that was run as
        part of the search expressed by this cursor, if applicable.

        Calling `get_sort_vector` on an IDLE cursor triggers the first page fetch,
        but the cursor stays in the IDLE state until actual consumption starts.

        The method can be invoked on a CLOSED cursor and will return either None
        or the sort vector used in the search.

        Returns:
            the query vector used in the search, if it was requested by passing
                `include_sort_vector=True` to the `find_and_rerank` call that originated
                the cursor.
                If the sort vector is not available, None is returned.
                Otherwise, the vector is returned as either a DataAPIVector
                or a plain list of number depending on the setting for
                `APIOptions.serdes_options`.
        """

        await self._try_ensure_fill_buffer()
        if self._last_response_status:
            return _ensure_vector(
                self._last_response_status.get("sortVector"),
                self.data_source.api_options.serdes_options,
            )
        else:
            return None

    async def fetch_next_page(self) -> FindAndRerankPage[T]:
        """
        Retrieve a single, whole page of results from the Data API and return it
        at once, together with associated "out-of-band" information.

        This method is meant to be the way a cursor is consumed when the caller
        needs to explicitly operate on a page-by-page basis, and is to be paired
        with creation of cursor objects 'set to start from a certain page' via the
        `initial_page_state` constructor parameter/builder method.
        In this case, the supplied initial page state typically comes from having
        consumed a previous page, for the same find operation: the page state, a string,
        is found within the `FindAndRerankPage` object returned by this method.

        Note: As long as the findAndRerank Data API command does not paginate
        its results, returning all results at once, this method is of little interest.

        Returns:
            a `FindAndRerankPage` object for the full Data API response, including
            the resulting `RerankedResult` items (or suitable objects from the cursor
            mapping function, if one is defined), as well as the state to use to
            query for the next page (a string) and the sort vector if requested
            and applicable.
        """

        self._ensure_alive()
        if self._buffer:
            msg = "Paginated retrieval cannot be mixed with regular cursor iteration."
            raise CursorException(
                text=msg,
                cursor_state=self._state.value,
            )

        await self._try_ensure_fill_buffer()

        _buffer_count = len(self._buffer)
        _tr_next_ps = self._next_page_state
        _tr_results: list[T] = []
        for _ in range(_buffer_count):
            _tr_results.append(await self.__anext__())
        _tr_sort_vector: list[float] | DataAPIVector | None
        if self._last_response_status:
            _tr_sort_vector = _ensure_vector(
                self._last_response_status.get("sortVector"),
                self.data_source.api_options.serdes_options,
            )
        else:
            _tr_sort_vector = None

        return FindAndRerankPage(
            results=_tr_results,
            next_page_state=_tr_next_ps,
            sort_vector=_tr_sort_vector,
        )

Ancestors

AbstractCursor
abc.ABC
typing.Generic

Instance variables

var data_source : AsyncCollection[~TRAW]

The AsyncCollection object that originated this cursor through a find_and_rerank operation.

Returns

an AsyncCollection instance.

Expand source code

@property
def data_source(self) -> AsyncCollection[TRAW]:
    """
    The AsyncCollection object that originated this cursor through a
    `find_and_rerank` operation.

    Returns:
        an AsyncCollection instance.
    """

    if self._query_engine.async_collection is None:
        raise RuntimeError("Query engine has no async collection.")
    return self._query_engine.async_collection

Methods

def clone(self) ‑> AsyncCollectionFindAndRerankCursor[~TRAW, ~T]

Create a copy of this cursor with: - the same parameters (timeouts, filter, projection, etc) - and the cursor is rewound to its pristine IDLE state.

For usage examples, please refer to the same method of the equivalent synchronous CollectionFindCursor class, and apply the necessary adaptations to the async interface.

Returns

a new AsyncCollectionFindAndRerankCursor, similar to this one but rewound to its initial state.

Expand source code

def clone(self) -> AsyncCollectionFindAndRerankCursor[TRAW, T]:
    """
    Create a copy of this cursor with:
    - the same parameters (timeouts, filter, projection, etc)
    - and the cursor is rewound to its pristine IDLE state.

    For usage examples, please refer to the same method of the
    equivalent synchronous CollectionFindCursor class, and apply the necessary
    adaptations to the async interface.

    Returns:
        a new AsyncCollectionFindAndRerankCursor, similar to this one but
        rewound to its initial state.
    """

    if self._query_engine.async_collection is None:
        raise RuntimeError("Query engine has no async collection.")
    return AsyncCollectionFindAndRerankCursor(
        collection=self._query_engine.async_collection,
        request_timeout_ms=self._request_timeout_ms,
        overall_timeout_ms=self._overall_timeout_ms,
        request_timeout_label=self._request_timeout_label,
        overall_timeout_label=self._overall_timeout_label,
        filter=self._filter,
        projection=self._projection,
        sort=self._sort,
        limit=self._limit,
        hybrid_limits=self._hybrid_limits,
        initial_page_state=self._initial_page_state,
        include_scores=self._include_scores,
        include_sort_vector=self._include_sort_vector,
        rerank_on=self._rerank_on,
        rerank_query=self._rerank_query,
        mapper=self._mapper,
    )

async def fetch_next_page(self) ‑> FindAndRerankPage[~T]

Retrieve a single, whole page of results from the Data API and return it at once, together with associated "out-of-band" information.

This method is meant to be the way a cursor is consumed when the caller needs to explicitly operate on a page-by-page basis, and is to be paired with creation of cursor objects 'set to start from a certain page' via the initial_page_state constructor parameter/builder method. In this case, the supplied initial page state typically comes from having consumed a previous page, for the same find operation: the page state, a string, is found within the FindAndRerankPage object returned by this method.

Note: As long as the findAndRerank Data API command does not paginate its results, returning all results at once, this method is of little interest.

Returns

a FindAndRerankPage object for the full Data API response, including the resulting RerankedResult items (or suitable objects from the cursor mapping function, if one is defined), as well as the state to use to query for the next page (a string) and the sort vector if requested and applicable.

Expand source code

async def fetch_next_page(self) -> FindAndRerankPage[T]:
    """
    Retrieve a single, whole page of results from the Data API and return it
    at once, together with associated "out-of-band" information.

    This method is meant to be the way a cursor is consumed when the caller
    needs to explicitly operate on a page-by-page basis, and is to be paired
    with creation of cursor objects 'set to start from a certain page' via the
    `initial_page_state` constructor parameter/builder method.
    In this case, the supplied initial page state typically comes from having
    consumed a previous page, for the same find operation: the page state, a string,
    is found within the `FindAndRerankPage` object returned by this method.

    Note: As long as the findAndRerank Data API command does not paginate
    its results, returning all results at once, this method is of little interest.

    Returns:
        a `FindAndRerankPage` object for the full Data API response, including
        the resulting `RerankedResult` items (or suitable objects from the cursor
        mapping function, if one is defined), as well as the state to use to
        query for the next page (a string) and the sort vector if requested
        and applicable.
    """

    self._ensure_alive()
    if self._buffer:
        msg = "Paginated retrieval cannot be mixed with regular cursor iteration."
        raise CursorException(
            text=msg,
            cursor_state=self._state.value,
        )

    await self._try_ensure_fill_buffer()

    _buffer_count = len(self._buffer)
    _tr_next_ps = self._next_page_state
    _tr_results: list[T] = []
    for _ in range(_buffer_count):
        _tr_results.append(await self.__anext__())
    _tr_sort_vector: list[float] | DataAPIVector | None
    if self._last_response_status:
        _tr_sort_vector = _ensure_vector(
            self._last_response_status.get("sortVector"),
            self.data_source.api_options.serdes_options,
        )
    else:
        _tr_sort_vector = None

    return FindAndRerankPage(
        results=_tr_results,
        next_page_state=_tr_next_ps,
        sort_vector=_tr_sort_vector,
    )

def filter(self, filter: FilterType | None) ‑> AsyncCollectionFindAndRerankCursor[~TRAW, ~T]

Return a copy of this cursor with a new filter setting. This operation is allowed only if the cursor state is still IDLE.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the AsyncCollection find_and_rerank method.

Args

filter: a new filter setting to apply to the returned new cursor.

Returns

a new AsyncCollectionFindAndRerankCursor with the same settings as this one, except for filter which is the provided value.

Expand source code

def filter(
    self, filter: FilterType | None
) -> AsyncCollectionFindAndRerankCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new filter setting.
    This operation is allowed only if the cursor state is still IDLE.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the AsyncCollection `find_and_rerank` method.

    Args:
        filter: a new filter setting to apply to the returned new cursor.

    Returns:
        a new AsyncCollectionFindAndRerankCursor with the same settings as this one,
            except for `filter` which is the provided value.
    """

    self._ensure_idle()
    return self._copy(filter=filter)

Consume the remaining documents in the cursor, invoking a provided callback function – or coroutine – on each of them.

Calling this method on a CLOSED cursor results in an error.

The callback function can return any value. The return value is generally discarded, with the following exception: if the function returns the boolean False, it is taken to signify that the method should quit early, leaving the cursor half-consumed (ACTIVE state). If this does not occur, this method results in the cursor entering CLOSED state once it is exhausted.

For usage examples, please refer to the same method of the equivalent synchronous CollectionFindCursor class, and apply the necessary adaptations to the async interface.

Args

function: a callback function, or a coroutine, whose only parameter is of the type returned by the cursor. This callback is invoked once per each document yielded by the cursor. If the callback returns a False, the for_each invocation stops early and returns without consuming further documents.
general_method_timeout_ms: a timeout, in milliseconds, for the whole duration of this method. If not provided, there is no such timeout. Note that the per-request timeout set on the cursor still applies.
timeout_ms: an alias for general_method_timeout_ms.

Expand source code

async def for_each(
    self,
    function: Callable[[T], bool | None] | Callable[[T], Awaitable[bool | None]],
    *,
    general_method_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Consume the remaining documents in the cursor, invoking a provided callback
    function -- or coroutine -- on each of them.

    Calling this method on a CLOSED cursor results in an error.

    The callback function can return any value. The return value is generally
    discarded, with the following exception: if the function returns the boolean
    `False`, it is taken to signify that the method should quit early, leaving the
    cursor half-consumed (ACTIVE state). If this does not occur, this method
    results in the cursor entering CLOSED state once it is exhausted.

    For usage examples, please refer to the same method of the
    equivalent synchronous CollectionFindCursor class, and apply the necessary
    adaptations to the async interface.

    Args:
        function: a callback function, or a coroutine, whose only parameter is of
            the type returned by the cursor.
            This callback is invoked once per each document yielded
            by the cursor. If the callback returns a `False`, the `for_each`
            invocation stops early and returns without consuming further documents.
        general_method_timeout_ms: a timeout, in milliseconds, for the whole
            duration of this method. If not provided, there is no such timeout.
            Note that the per-request timeout set on the cursor still applies.
        timeout_ms: an alias for `general_method_timeout_ms`.
    """

    self._ensure_alive()
    copy_req_ms, copy_ovr_ms = _revise_timeouts_for_cursor_copy(
        new_general_method_timeout_ms=general_method_timeout_ms,
        new_timeout_ms=timeout_ms,
        old_request_timeout_ms=self._request_timeout_ms,
    )
    _cursor = self._copy(
        request_timeout_ms=copy_req_ms,
        overall_timeout_ms=copy_ovr_ms,
    )
    self._imprint_internal_state(_cursor)
    is_coro = iscoroutinefunction(function)
    async for document in _cursor:
        if is_coro:
            res = await function(document)  # type: ignore[misc]
        else:
            res = function(document)
        if res is False:
            break
    _cursor._imprint_internal_state(self)

async def get_sort_vector(self) ‑> list[float] | DataAPIVector | None

Return the query vector used in the vector (ANN) search that was run as part of the search expressed by this cursor, if applicable.

Calling get_sort_vector on an IDLE cursor triggers the first page fetch, but the cursor stays in the IDLE state until actual consumption starts.

The method can be invoked on a CLOSED cursor and will return either None or the sort vector used in the search.

Returns

the query vector used in the search, if it was requested by passing include_sort_vector=True to the find_and_rerank call that originated the cursor. If the sort vector is not available, None is returned. Otherwise, the vector is returned as either a DataAPIVector or a plain list of number depending on the setting for APIOptions.serdes_options.

Expand source code

async def get_sort_vector(self) -> list[float] | DataAPIVector | None:
    """
    Return the query vector used in the vector (ANN) search that was run as
    part of the search expressed by this cursor, if applicable.

    Calling `get_sort_vector` on an IDLE cursor triggers the first page fetch,
    but the cursor stays in the IDLE state until actual consumption starts.

    The method can be invoked on a CLOSED cursor and will return either None
    or the sort vector used in the search.

    Returns:
        the query vector used in the search, if it was requested by passing
            `include_sort_vector=True` to the `find_and_rerank` call that originated
            the cursor.
            If the sort vector is not available, None is returned.
            Otherwise, the vector is returned as either a DataAPIVector
            or a plain list of number depending on the setting for
            `APIOptions.serdes_options`.
    """

    await self._try_ensure_fill_buffer()
    if self._last_response_status:
        return _ensure_vector(
            self._last_response_status.get("sortVector"),
            self.data_source.api_options.serdes_options,
        )
    else:
        return None

async def has_next(self) ‑> bool

Whether the cursor actually has more documents to return.

has_next can be called on any cursor, but on a CLOSED cursor will always return False.

This method can trigger the fetch operation of a new page, if the current buffer is empty.

Calling has_next on an IDLE cursor triggers the first page fetch, but the cursor stays in the IDLE state until actual consumption starts.

Returns

a boolean value of True if there is at least one further item available to consume; False otherwise (including the case of CLOSED cursor).

Expand source code

async def has_next(self) -> bool:
    """
    Whether the cursor actually has more documents to return.

    `has_next` can be called on any cursor, but on a CLOSED cursor
    will always return False.

    This method can trigger the fetch operation of a new page, if the current
    buffer is empty.

    Calling `has_next` on an IDLE cursor triggers the first page fetch, but the
    cursor stays in the IDLE state until actual consumption starts.

    Returns:
        a boolean value of True if there is at least one further item
            available to consume; False otherwise (including the case of CLOSED
            cursor).
    """

    if self._state == CursorState.CLOSED:
        return False
    await self._try_ensure_fill_buffer()
    return len(self._buffer) > 0

def hybrid_limits(self, hybrid_limits: int | dict[str, int] | None) ‑> AsyncCollectionFindAndRerankCursor[~TRAW, ~T]

Return a copy of this cursor with a new hybrid_limits setting. This operation is allowed only if the cursor state is still IDLE.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the AsyncCollection find_and_rerank method.

Args

hybrid_limits: a new setting to apply to the returned new cursor.

Returns

a new AsyncCollectionFindAndRerankCursor with the same settings as this one, except for hybrid_limits which is the provided value.

Expand source code

def hybrid_limits(
    self, hybrid_limits: int | dict[str, int] | None
) -> AsyncCollectionFindAndRerankCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new hybrid_limits setting.
    This operation is allowed only if the cursor state is still IDLE.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the AsyncCollection `find_and_rerank` method.

    Args:
        hybrid_limits: a new setting to apply to the returned new cursor.

    Returns:
        a new AsyncCollectionFindAndRerankCursor with the same settings as this one,
            except for `hybrid_limits` which is the provided value.
    """

    self._ensure_idle()
    return self._copy(hybrid_limits=hybrid_limits)

def include_scores(self, include_scores: bool | None) ‑> AsyncCollectionFindAndRerankCursor[~TRAW, ~T]

Return a copy of this cursor with a new include_scores setting. This operation is allowed only if the cursor state is still IDLE.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the AsyncCollection find_and_rerank method.

Args

include_scores: a new include_scores setting to apply to the returned new cursor.

Returns

a new AsyncCollectionFindAndRerankCursor with the same settings as this one, except for include_scores which is the provided value.

Expand source code

def include_scores(
    self, include_scores: bool | None
) -> AsyncCollectionFindAndRerankCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new include_scores setting.
    This operation is allowed only if the cursor state is still IDLE.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the AsyncCollection `find_and_rerank` method.

    Args:
        include_scores: a new include_scores setting to apply
            to the returned new cursor.

    Returns:
        a new AsyncCollectionFindAndRerankCursor with the same settings as this one,
            except for `include_scores` which is the provided value.
    """

    self._ensure_idle()
    return self._copy(include_scores=include_scores)

def include_sort_vector(self, include_sort_vector: bool | None) ‑> AsyncCollectionFindAndRerankCursor[~TRAW, ~T]

Return a copy of this cursor with a new include_sort_vector setting. This operation is allowed only if the cursor state is still IDLE.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the AsyncCollection find_and_rerank method.

Args

include_sort_vector: a new include_sort_vector setting to apply to the returned new cursor.

Returns

a new AsyncCollectionFindAndRerankCursor with the same settings as this one, except for include_sort_vector which is the provided value.

Expand source code

def include_sort_vector(
    self, include_sort_vector: bool | None
) -> AsyncCollectionFindAndRerankCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new include_sort_vector setting.
    This operation is allowed only if the cursor state is still IDLE.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the AsyncCollection `find_and_rerank` method.

    Args:
        include_sort_vector: a new include_sort_vector setting to apply
            to the returned new cursor.

    Returns:
        a new AsyncCollectionFindAndRerankCursor with the same settings as this one,
            except for `include_sort_vector` which is the provided value.
    """

    self._ensure_idle()
    return self._copy(include_sort_vector=include_sort_vector)

def initial_page_state(self, initial_page_state: str | UnsetType) ‑> AsyncCollectionFindAndRerankCursor[~TRAW, ~T]

Return a copy of this cursor with a new initial_page_state setting. This operation is allowed only if the cursor state is still IDLE.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the Collection find method.

Args

initial_page_state: a new initial_page_state setting to apply to the returned new cursor. Passing an explicit None raises an error.

Returns

a new AsyncCollectionFindAndRerankCursor with the same settings as this one, except for initial_page_state which is the provided value.

Expand source code

def initial_page_state(
    self, initial_page_state: str | UnsetType
) -> AsyncCollectionFindAndRerankCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new initial_page_state setting.
    This operation is allowed only if the cursor state is still IDLE.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the Collection `find` method.

    Args:
        initial_page_state: a new initial_page_state setting to apply to the
            returned new cursor. Passing an explicit None raises an error.

    Returns:
        a new AsyncCollectionFindAndRerankCursor with the same settings as this one,
            except for `initial_page_state` which is the provided value.
    """

    self._ensure_idle()
    return self._copy(initial_page_state=initial_page_state)

def limit(self, limit: int | None) ‑> AsyncCollectionFindAndRerankCursor[~TRAW, ~T]

Return a copy of this cursor with a new limit setting. This operation is allowed only if the cursor state is still IDLE.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the AsyncCollection find_and_rerank method.

Args

limit: a new limit setting to apply to the returned new cursor.

Returns

a new AsyncCollectionFindAndRerankCursor with the same settings as this one, except for limit which is the provided value.

Expand source code

def limit(self, limit: int | None) -> AsyncCollectionFindAndRerankCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new limit setting.
    This operation is allowed only if the cursor state is still IDLE.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the AsyncCollection `find_and_rerank` method.

    Args:
        limit: a new limit setting to apply to the returned new cursor.

    Returns:
        a new AsyncCollectionFindAndRerankCursor with the same settings as this one,
            except for `limit` which is the provided value.
    """

    self._ensure_idle()
    return self._copy(limit=limit)

def map(self, mapper: Callable[[T], TNEW]) ‑> AsyncCollectionFindAndRerankCursor[~TRAW, ~TNEW]

Return a copy of this cursor with a mapping function to transform the returned items. Calling this method on a cursor with a mapping already set results in the mapping functions being composed.

This operation is allowed only if the cursor state is still IDLE.

For usage examples, please refer to the same method of the equivalent synchronous CollectionFindCursor class, and apply the necessary adaptations to the async interface.

Args

mapper: a function transforming the objects returned by the cursor into something else (i.e. a function T => TNEW). If the map is imposed on a cursor without mapping yet, its input argument must be a RerankedResult[TRAW], where TRAW stands for the type of the documents from the collection.

Returns

a new AsyncCollectionFindAndRerankCursor with a new mapping function on the results, possibly composed with any pre-existing mapping function.

Expand source code

def map(
    self, mapper: Callable[[T], TNEW]
) -> AsyncCollectionFindAndRerankCursor[TRAW, TNEW]:
    """
    Return a copy of this cursor with a mapping function to transform
    the returned items. Calling this method on a cursor with a mapping
    already set results in the mapping functions being composed.

    This operation is allowed only if the cursor state is still IDLE.

    For usage examples, please refer to the same method of the
    equivalent synchronous CollectionFindCursor class, and apply the necessary
    adaptations to the async interface.

    Args:
        mapper: a function transforming the objects returned by the cursor
            into something else (i.e. a function T => TNEW).
            If the map is imposed on a cursor without mapping yet, its input
            argument must be a `RerankedResult[TRAW]`, where TRAW
            stands for the type of the documents from the collection.

    Returns:
        a new AsyncCollectionFindAndRerankCursor with a new mapping function on the
            results, possibly composed with any pre-existing mapping function.
    """
    self._ensure_idle()
    if self._query_engine.async_collection is None:
        raise RuntimeError("Query engine has no async collection.")
    composite_mapper: Callable[[RerankedResult[TRAW]], TNEW]
    if self._mapper is not None:

        def _composite(document: RerankedResult[TRAW]) -> TNEW:
            return mapper(self._mapper(document))  # type: ignore[misc]

        composite_mapper = _composite
    else:
        composite_mapper = cast(Callable[[RerankedResult[TRAW]], TNEW], mapper)
    return AsyncCollectionFindAndRerankCursor(
        collection=self._query_engine.async_collection,
        request_timeout_ms=self._request_timeout_ms,
        overall_timeout_ms=self._overall_timeout_ms,
        request_timeout_label=self._request_timeout_label,
        overall_timeout_label=self._overall_timeout_label,
        filter=self._filter,
        projection=self._projection,
        sort=self._sort,
        limit=self._limit,
        hybrid_limits=self._hybrid_limits,
        initial_page_state=self._initial_page_state,
        include_scores=self._include_scores,
        include_sort_vector=self._include_sort_vector,
        rerank_on=self._rerank_on,
        rerank_query=self._rerank_query,
        mapper=composite_mapper,
    )

def project(self, projection: ProjectionType | None) ‑> AsyncCollectionFindAndRerankCursor[~TRAW, ~T]

Return a copy of this cursor with a new projection setting. This operation is allowed only if the cursor state is still IDLE and if no mapping has been set on it.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the AsyncCollection find_and_rerank method.

Args

projection: a new projection setting to apply to the returned new cursor.

Returns

a new AsyncCollectionFindAndRerankCursor with the same settings as this one, except for projection which is the provided value.

Expand source code

def project(
    self, projection: ProjectionType | None
) -> AsyncCollectionFindAndRerankCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new projection setting.
    This operation is allowed only if the cursor state is still IDLE and if
    no mapping has been set on it.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the AsyncCollection `find_and_rerank` method.

    Args:
        projection: a new projection setting to apply to the returned new cursor.

    Returns:
        a new AsyncCollectionFindAndRerankCursor with the same settings as this one,
            except for `projection` which is the provided value.
    """

    self._ensure_idle()
    if self._mapper is not None:
        raise CursorException(
            "Cannot set projection after map.",
            cursor_state=self._state.value,
        )
    return self._copy(projection=projection)

def rerank_on(self, rerank_on: str | None) ‑> AsyncCollectionFindAndRerankCursor[~TRAW, ~T]

Return a copy of this cursor with a new rerank_on setting. This operation is allowed only if the cursor state is still IDLE.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the AsyncCollection find_and_rerank method.

Args

rerank_on: a new setting to apply to the returned new cursor.

Returns

a new AsyncCollectionFindAndRerankCursor with the same settings as this one, except for rerank_on which is the provided value.

Expand source code

def rerank_on(
    self, rerank_on: str | None
) -> AsyncCollectionFindAndRerankCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new rerank_on setting.
    This operation is allowed only if the cursor state is still IDLE.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the AsyncCollection `find_and_rerank` method.

    Args:
        rerank_on: a new setting to apply to the returned new cursor.

    Returns:
        a new AsyncCollectionFindAndRerankCursor with the same settings as this one,
            except for `rerank_on` which is the provided value.
    """

    self._ensure_idle()
    return self._copy(rerank_on=rerank_on)

def rerank_query(self, rerank_query: str | None) ‑> AsyncCollectionFindAndRerankCursor[~TRAW, ~T]

Return a copy of this cursor with a new rerank_query setting. This operation is allowed only if the cursor state is still IDLE.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the AsyncCollection find_and_rerank method.

Args

rerank_query: a new setting to apply to the returned new cursor.

Returns

a new AsyncCollectionFindAndRerankCursor with the same settings as this one, except for rerank_query which is the provided value.

Expand source code

def rerank_query(
    self, rerank_query: str | None
) -> AsyncCollectionFindAndRerankCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new rerank_query setting.
    This operation is allowed only if the cursor state is still IDLE.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the AsyncCollection `find_and_rerank` method.

    Args:
        rerank_query: a new setting to apply to the returned new cursor.

    Returns:
        a new AsyncCollectionFindAndRerankCursor with the same settings as this one,
            except for `rerank_query` which is the provided value.
    """

    self._ensure_idle()
    return self._copy(rerank_query=rerank_query)

def sort(self, sort: HybridSortType | None) ‑> AsyncCollectionFindAndRerankCursor[~TRAW, ~T]

Return a copy of this cursor with a new sort setting. This operation is allowed only if the cursor state is still IDLE.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the AsyncCollection find_and_rerank method.

Args

sort: a new sort setting to apply to the returned new cursor.

Returns

a new AsyncCollectionFindAndRerankCursor with the same settings as this one, except for sort which is the provided value.

Expand source code

def sort(
    self, sort: HybridSortType | None
) -> AsyncCollectionFindAndRerankCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new sort setting.
    This operation is allowed only if the cursor state is still IDLE.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the AsyncCollection `find_and_rerank` method.

    Args:
        sort: a new sort setting to apply to the returned new cursor.

    Returns:
        a new AsyncCollectionFindAndRerankCursor with the same settings as this one,
            except for `sort` which is the provided value.
    """

    self._ensure_idle()
    return self._copy(sort=sort)

async def to_list(self, *, general_method_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> list[~T]

Materialize all documents that remain to be consumed from a cursor into a list.

Calling this method on a CLOSED cursor results in an error.

If the cursor is IDLE, the result will be the whole set of documents returned by the find_and_rerank operation; otherwise, the documents already consumed by the cursor will not be in the resulting list.

Calling this method is not recommended if a huge list of results is anticipated: it would involve a large number of data exchanges with the Data API and possibly a massive memory usage to construct the list. In such cases, a lazy pattern of iterating and consuming the documents is to be preferred.

For usage examples, please refer to the same method of the equivalent synchronous CollectionFindCursor class, and apply the necessary adaptations to the async interface.

Args

general_method_timeout_ms: a timeout, in milliseconds, for the whole duration of this method. If not provided, there is no such timeout. Note that the per-request timeout set on the cursor still applies.
timeout_ms: an alias for general_method_timeout_ms.

Returns

a list of documents (or other values depending on the mapping function, if one is set). These are all items that were left to be consumed on the cursor when to_list is called.

Expand source code

async def to_list(
    self,
    *,
    general_method_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> list[T]:
    """
    Materialize all documents that remain to be consumed from a cursor into a list.

    Calling this method on a CLOSED cursor results in an error.

    If the cursor is IDLE, the result will be the whole set of documents returned
    by the `find_and_rerank` operation; otherwise, the documents already consumed
    by the cursor will not be in the resulting list.

    Calling this method is not recommended if a huge list of results is anticipated:
    it would involve a large number of data exchanges with the Data API and possibly
    a massive memory usage to construct the list. In such cases, a lazy pattern
    of iterating and consuming the documents is to be preferred.

    For usage examples, please refer to the same method of the
    equivalent synchronous CollectionFindCursor class, and apply the necessary
    adaptations to the async interface.

    Args:
        general_method_timeout_ms: a timeout, in milliseconds, for the whole
            duration of this method. If not provided, there is no such timeout.
            Note that the per-request timeout set on the cursor still applies.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a list of documents (or other values depending on the mapping
            function, if one is set). These are all items that were left
            to be consumed on the cursor when `to_list` is called.
    """

    self._ensure_alive()
    copy_req_ms, copy_ovr_ms = _revise_timeouts_for_cursor_copy(
        new_general_method_timeout_ms=general_method_timeout_ms,
        new_timeout_ms=timeout_ms,
        old_request_timeout_ms=self._request_timeout_ms,
    )
    _cursor = self._copy(
        request_timeout_ms=copy_req_ms,
        overall_timeout_ms=copy_ovr_ms,
    )
    self._imprint_internal_state(_cursor)
    documents = [document async for document in _cursor]
    _cursor._imprint_internal_state(self)
    return documents

Inherited members

AbstractCursor:
- buffered_count
- close
- consume_buffer
- consumed
- cursor_id
- rewind
- state

An asynchronous cursor over documents, as returned by a find invocation on an AsyncCollection. A cursor can be iterated over, materialized into a list, and queried/manipulated in various ways.

A cursor has two type parameters: TRAW and T. The first is the type of the "raw" documents as they are obtained from the Data API, the second is the type of the items after the optional mapping function (see the .map() method). If there is no mapping, TRAW = T. In general, consuming a cursor returns items of type T, except for the consume_buffer primitive that draws directly from the buffer and always returns items of type TRAW.

This class is the async counterpart of the CollectionFindCursor, for use with asyncio. Other than the async interface, its behavior is identical: please refer to the documentation for CollectionFindCursor for examples and details.

Expand source code

class AsyncCollectionFindCursor(Generic[TRAW, T], AbstractCursor[TRAW]):
    """
    An asynchronous cursor over documents, as returned by a `find` invocation on
    an AsyncCollection. A cursor can be iterated over, materialized into a list,
    and queried/manipulated in various ways.

    Some cursor operations mutate it in-place (such as consuming its documents),
    other return a new cursor without changing the original one. See the documentation
    for the various methods and the AsyncCollection `find` method for more details
    and usage patterns.

    A cursor has two type parameters: TRAW and T. The first is the type of the "raw"
    documents as they are obtained from the Data API, the second is the type of the
    items after the optional mapping function (see the `.map()` method). If there is
    no mapping, TRAW = T. In general, consuming a cursor returns items of type T,
    except for the `consume_buffer` primitive that draws directly from the buffer
    and always returns items of type TRAW.

    This class is the async counterpart of the CollectionFindCursor, for use with
    asyncio. Other than the async interface, its behavior is identical: please refer
    to the documentation for `CollectionFindCursor` for examples and details.
    """

    _query_engine: _CollectionFindQueryEngine[TRAW]
    _request_timeout_ms: int | None
    _overall_timeout_ms: int | None
    _request_timeout_label: str | None
    _overall_timeout_label: str | None
    _timeout_manager: MultiCallTimeoutManager
    _filter: FilterType | None
    _projection: ProjectionType | None
    _sort: dict[str, Any] | None
    _limit: int | None
    _initial_page_state: str | UnsetType
    _include_similarity: bool | None
    _include_sort_vector: bool | None
    _skip: int | None
    _mapper: Callable[[TRAW], T] | None

    def __init__(
        self,
        *,
        collection: AsyncCollection[TRAW],
        request_timeout_ms: int | None,
        overall_timeout_ms: int | None,
        request_timeout_label: str | None = None,
        overall_timeout_label: str | None = None,
        filter: FilterType | None = None,
        projection: ProjectionType | None = None,
        sort: dict[str, Any] | None = None,
        limit: int | None = None,
        initial_page_state: str | UnsetType = _UNSET,
        include_similarity: bool | None = None,
        include_sort_vector: bool | None = None,
        skip: int | None = None,
        mapper: Callable[[TRAW], T] | None = None,
    ) -> None:
        self._filter = deepcopy(filter)
        self._projection = projection
        self._sort = deepcopy(sort)
        self._limit = limit
        self._initial_page_state = initial_page_state
        self._include_similarity = include_similarity
        self._include_sort_vector = include_sort_vector
        self._skip = skip
        self._mapper = mapper
        self._request_timeout_ms = request_timeout_ms
        self._overall_timeout_ms = overall_timeout_ms
        self._request_timeout_label = request_timeout_label
        self._overall_timeout_label = overall_timeout_label
        self._query_engine = _CollectionFindQueryEngine(
            collection=None,
            async_collection=collection,
            filter=self._filter,
            projection=self._projection,
            sort=self._sort,
            limit=self._limit,
            include_similarity=self._include_similarity,
            include_sort_vector=self._include_sort_vector,
            skip=self._skip,
        )
        AbstractCursor.__init__(self, initial_page_state=initial_page_state)
        self._timeout_manager = MultiCallTimeoutManager(
            overall_timeout_ms=self._overall_timeout_ms,
            timeout_label=self._overall_timeout_label,
        )

    def _copy(
        self: AsyncCollectionFindCursor[TRAW, T],
        *,
        request_timeout_ms: int | None | UnsetType = _UNSET,
        overall_timeout_ms: int | None | UnsetType = _UNSET,
        request_timeout_label: str | None | UnsetType = _UNSET,
        overall_timeout_label: str | None | UnsetType = _UNSET,
        filter: FilterType | None | UnsetType = _UNSET,
        projection: ProjectionType | None | UnsetType = _UNSET,
        sort: dict[str, Any] | None | UnsetType = _UNSET,
        limit: int | None | UnsetType = _UNSET,
        initial_page_state: str | None | UnsetType = _UNSET,
        include_similarity: bool | None | UnsetType = _UNSET,
        include_sort_vector: bool | None | UnsetType = _UNSET,
        skip: int | None | UnsetType = _UNSET,
    ) -> AsyncCollectionFindCursor[TRAW, T]:
        if self._query_engine.async_collection is None:
            raise RuntimeError("Query engine has no async collection.")
        return AsyncCollectionFindCursor(
            collection=self._query_engine.async_collection,
            request_timeout_ms=self._request_timeout_ms
            if isinstance(request_timeout_ms, UnsetType)
            else request_timeout_ms,
            overall_timeout_ms=self._overall_timeout_ms
            if isinstance(overall_timeout_ms, UnsetType)
            else overall_timeout_ms,
            request_timeout_label=self._request_timeout_label
            if isinstance(request_timeout_label, UnsetType)
            else request_timeout_label,
            overall_timeout_label=self._overall_timeout_label
            if isinstance(overall_timeout_label, UnsetType)
            else overall_timeout_label,
            filter=self._filter if isinstance(filter, UnsetType) else filter,
            projection=self._projection
            if isinstance(projection, UnsetType)
            else projection,
            sort=self._sort if isinstance(sort, UnsetType) else sort,
            limit=self._limit if isinstance(limit, UnsetType) else limit,
            # special treatment: passing None erases (hence we must supply unset and not None):
            initial_page_state=self._initial_page_state
            if isinstance(initial_page_state, UnsetType)
            else (initial_page_state if initial_page_state is not None else _UNSET),
            include_similarity=self._include_similarity
            if isinstance(include_similarity, UnsetType)
            else include_similarity,
            include_sort_vector=self._include_sort_vector
            if isinstance(include_sort_vector, UnsetType)
            else include_sort_vector,
            skip=self._skip if isinstance(skip, UnsetType) else skip,
            mapper=self._mapper,
        )

    async def _try_ensure_fill_buffer(self) -> None:
        """
        If buffer is empty, try to fill with next page, if applicable.
        If not possible, silently do nothing.
        This method never changes the cursor state.
        """

        if self._state == CursorState.CLOSED:
            return
        if not self._buffer:
            if self._next_page_state is not None or self._state == CursorState.IDLE:
                (
                    new_buffer,
                    next_page_state,
                    resp_status,
                ) = await self._query_engine._async_fetch_page(
                    page_state=self._next_page_state,
                    timeout_context=self._timeout_manager.remaining_timeout(
                        cap_time_ms=self._request_timeout_ms,
                        cap_timeout_label=self._request_timeout_label,
                    ),
                )
                self._next_page_state = next_page_state
                self._last_response_status = resp_status
                self._pages_retrieved += 1
                self._buffer = new_buffer

    def __repr__(self) -> str:
        return (
            f'{self.__class__.__name__}("{self.data_source.name}", '
            f"{self._state.value}, "
            f"consumed so far: {self.consumed})"
        )

    def __aiter__(
        self: AsyncCollectionFindCursor[TRAW, T],
    ) -> AsyncCollectionFindCursor[TRAW, T]:
        self._ensure_alive()
        return self

    async def __anext__(self) -> T:
        if self._state == CursorState.CLOSED:
            raise StopAsyncIteration
        await self._try_ensure_fill_buffer()
        if not self._buffer:
            self._state = CursorState.CLOSED
            raise StopAsyncIteration
        self._state = CursorState.STARTED
        # consume one item from buffer
        traw0, rest_buffer = self._buffer[0], self._buffer[1:]
        self._buffer = rest_buffer
        self._consumed += 1
        return cast(T, self._mapper(traw0) if self._mapper is not None else traw0)

    @property
    def data_source(self) -> AsyncCollection[TRAW]:
        """
        The AsyncCollection object that originated this cursor through
        a `find` operation.

        Returns:
            an AsyncCollection instance.
        """

        if self._query_engine.async_collection is None:
            raise RuntimeError("Query engine has no async collection.")
        return self._query_engine.async_collection

    def clone(self) -> AsyncCollectionFindCursor[TRAW, T]:
        """
        Create a copy of this cursor with:
        - the same parameters (timeouts, filter, projection, etc)
        - and the cursor is rewound to its pristine IDLE state.

        For usage examples, please refer to the same method of the
        equivalent synchronous CollectionFindCursor class, and apply the necessary
        adaptations to the async interface.

        Returns:
            a new AsyncCollectionFindCursor, similar to this one but
            rewound to its initial state.
        """

        if self._query_engine.async_collection is None:
            raise RuntimeError("Query engine has no async collection.")
        return AsyncCollectionFindCursor(
            collection=self._query_engine.async_collection,
            request_timeout_ms=self._request_timeout_ms,
            overall_timeout_ms=self._overall_timeout_ms,
            request_timeout_label=self._request_timeout_label,
            overall_timeout_label=self._overall_timeout_label,
            filter=self._filter,
            projection=self._projection,
            sort=self._sort,
            limit=self._limit,
            initial_page_state=self._initial_page_state,
            include_similarity=self._include_similarity,
            include_sort_vector=self._include_sort_vector,
            skip=self._skip,
            mapper=self._mapper,
        )

    def filter(self, filter: FilterType | None) -> AsyncCollectionFindCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new filter setting.
        This operation is allowed only if the cursor state is still IDLE.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the AsyncCollection `find` method.

        Args:
            filter: a new filter setting to apply to the returned new cursor.

        Returns:
            a new AsyncCollectionFindCursor with the same settings as this one,
                except for `filter` which is the provided value.
        """

        self._ensure_idle()
        return self._copy(filter=filter)

    def project(
        self, projection: ProjectionType | None
    ) -> AsyncCollectionFindCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new projection setting.
        This operation is allowed only if the cursor state is still IDLE and if
        no mapping has been set on it.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the AsyncCollection `find` method.

        Args:
            projection: a new projection setting to apply to the returned new cursor.

        Returns:
            a new AsyncCollectionFindCursor with the same settings as this one,
                except for `projection` which is the provided value.
        """

        self._ensure_idle()
        if self._mapper is not None:
            raise CursorException(
                "Cannot set projection after map.",
                cursor_state=self._state.value,
            )
        return self._copy(projection=projection)

    def sort(self, sort: dict[str, Any] | None) -> AsyncCollectionFindCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new sort setting.
        This operation is allowed only if the cursor state is still IDLE.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the AsyncCollection `find` method.

        Args:
            sort: a new sort setting to apply to the returned new cursor.

        Returns:
            a new AsyncCollectionFindCursor with the same settings as this one,
                except for `sort` which is the provided value.
        """

        self._ensure_idle()
        return self._copy(sort=sort)

    def limit(self, limit: int | None) -> AsyncCollectionFindCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new limit setting.
        This operation is allowed only if the cursor state is still IDLE.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the AsyncCollection `find` method.

        Args:
            limit: a new limit setting to apply to the returned new cursor.

        Returns:
            a new AsyncCollectionFindCursor with the same settings as this one,
                except for `limit` which is the provided value.
        """

        self._ensure_idle()
        return self._copy(limit=limit)

    def initial_page_state(
        self, initial_page_state: str | UnsetType
    ) -> AsyncCollectionFindCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new initial_page_state setting.
        This operation is allowed only if the cursor state is still IDLE.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the Collection `find` method.

        Args:
            initial_page_state: a new initial_page_state setting to apply to the
                returned new cursor. Passing an explicit None raises an error.

        Returns:
            a new AsyncCollectionFindCursor with the same settings as this one,
                except for `initial_page_state` which is the provided value.
        """

        self._ensure_idle()
        return self._copy(initial_page_state=initial_page_state)

    def include_similarity(
        self, include_similarity: bool | None
    ) -> AsyncCollectionFindCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new include_similarity setting.
        This operation is allowed only if the cursor state is still IDLE and if
        no mapping has been set on it.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the AsyncCollection `find` method.

        Args:
            include_similarity: a new include_similarity setting to apply
                to the returned new cursor.

        Returns:
            a new AsyncCollectionFindCursor with the same settings as this one,
                except for `include_similarity` which is the provided value.
        """

        self._ensure_idle()
        if self._mapper is not None:
            raise CursorException(
                "Cannot set include_similarity after map.",
                cursor_state=self._state.value,
            )
        return self._copy(include_similarity=include_similarity)

    def include_sort_vector(
        self, include_sort_vector: bool | None
    ) -> AsyncCollectionFindCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new include_sort_vector setting.
        This operation is allowed only if the cursor state is still IDLE.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the AsyncCollection `find` method.

        Args:
            include_sort_vector: a new include_sort_vector setting to apply
                to the returned new cursor.

        Returns:
            a new AsyncCollectionFindCursor with the same settings as this one,
                except for `include_sort_vector` which is the provided value.
        """

        self._ensure_idle()
        return self._copy(include_sort_vector=include_sort_vector)

    def skip(self, skip: int | None) -> AsyncCollectionFindCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new skip setting.
        This operation is allowed only if the cursor state is still IDLE.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the AsyncCollection `find` method.

        Args:
            skip: a new skip setting to apply to the returned new cursor.

        Returns:
            a new AsyncCollectionFindCursor with the same settings as this one,
                except for `skip` which is the provided value.
        """

        self._ensure_idle()
        return self._copy(skip=skip)

    def map(self, mapper: Callable[[T], TNEW]) -> AsyncCollectionFindCursor[TRAW, TNEW]:
        """
        Return a copy of this cursor with a mapping function to transform
        the returned items. Calling this method on a cursor with a mapping
        already set results in the mapping functions being composed.

        This operation is allowed only if the cursor state is still IDLE.

        For usage examples, please refer to the same method of the
        equivalent synchronous CollectionFindCursor class, and apply the necessary
        adaptations to the async interface.

        Args:
            mapper: a function transforming the objects returned by the cursor
                into something else (i.e. a function T => TNEW).

        Returns:
            a new AsyncCollectionFindCursor with a new mapping function on the results,
                possibly composed with any pre-existing mapping function.
        """

        self._ensure_idle()
        if self._query_engine.async_collection is None:
            raise RuntimeError("Query engine has no async collection.")
        composite_mapper: Callable[[TRAW], TNEW]
        if self._mapper is not None:

            def _composite(document: TRAW) -> TNEW:
                return mapper(self._mapper(document))  # type: ignore[misc]

            composite_mapper = _composite
        else:
            composite_mapper = cast(Callable[[TRAW], TNEW], mapper)
        return AsyncCollectionFindCursor(
            collection=self._query_engine.async_collection,
            request_timeout_ms=self._request_timeout_ms,
            overall_timeout_ms=self._overall_timeout_ms,
            request_timeout_label=self._request_timeout_label,
            overall_timeout_label=self._overall_timeout_label,
            filter=self._filter,
            projection=self._projection,
            sort=self._sort,
            limit=self._limit,
            initial_page_state=self._initial_page_state,
            include_similarity=self._include_similarity,
            include_sort_vector=self._include_sort_vector,
            skip=self._skip,
            mapper=composite_mapper,
        )

    async def for_each(
        self,
        function: Callable[[T], bool | None] | Callable[[T], Awaitable[bool | None]],
        *,
        general_method_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Consume the remaining documents in the cursor, invoking a provided callback
        function -- or coroutine -- on each of them.

        Calling this method on a CLOSED cursor results in an error.

        The callback function can return any value. The return value is generally
        discarded, with the following exception: if the function returns the boolean
        `False`, it is taken to signify that the method should quit early, leaving the
        cursor half-consumed (ACTIVE state). If this does not occur, this method
        results in the cursor entering CLOSED state once it is exhausted.

        For usage examples, please refer to the same method of the
        equivalent synchronous CollectionFindCursor class, and apply the necessary
        adaptations to the async interface.

        Args:
            function: a callback function, or a coroutine, whose only parameter is of
                the type returned by the cursor.
                This callback is invoked once per each document yielded
                by the cursor. If the callback returns a `False`, the `for_each`
                invocation stops early and returns without consuming further documents.
            general_method_timeout_ms: a timeout, in milliseconds, for the whole
                duration of this method. If not provided, there is no such timeout.
                Note that the per-request timeout set on the cursor still applies.
            timeout_ms: an alias for `general_method_timeout_ms`.
        """

        self._ensure_alive()
        copy_req_ms, copy_ovr_ms = _revise_timeouts_for_cursor_copy(
            new_general_method_timeout_ms=general_method_timeout_ms,
            new_timeout_ms=timeout_ms,
            old_request_timeout_ms=self._request_timeout_ms,
        )
        _cursor = self._copy(
            request_timeout_ms=copy_req_ms,
            overall_timeout_ms=copy_ovr_ms,
        )
        self._imprint_internal_state(_cursor)
        is_coro = iscoroutinefunction(function)
        async for document in _cursor:
            if is_coro:
                res = await function(document)  # type: ignore[misc]
            else:
                res = function(document)
            if res is False:
                break
        _cursor._imprint_internal_state(self)

    async def to_list(
        self,
        *,
        general_method_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> list[T]:
        """
        Materialize all documents that remain to be consumed from a cursor into a list.

        Calling this method on a CLOSED cursor results in an error.

        If the cursor is IDLE, the result will be the whole set of documents returned
        by the `find` operation; otherwise, the documents already consumed by the cursor
        will not be in the resulting list.

        Calling this method is not recommended if a huge list of results is anticipated:
        it would involve a large number of data exchanges with the Data API and possibly
        a massive memory usage to construct the list. In such cases, a lazy pattern
        of iterating and consuming the documents is to be preferred.

        For usage examples, please refer to the same method of the
        equivalent synchronous CollectionFindCursor class, and apply the necessary
        adaptations to the async interface.

        Args:
            general_method_timeout_ms: a timeout, in milliseconds, for the whole
                duration of this method. If not provided, there is no such timeout.
                Note that the per-request timeout set on the cursor still applies.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a list of documents (or other values depending on the mapping
                function, if one is set). These are all items that were left
                to be consumed on the cursor when `to_list` is called.
        """

        self._ensure_alive()
        copy_req_ms, copy_ovr_ms = _revise_timeouts_for_cursor_copy(
            new_general_method_timeout_ms=general_method_timeout_ms,
            new_timeout_ms=timeout_ms,
            old_request_timeout_ms=self._request_timeout_ms,
        )
        _cursor = self._copy(
            request_timeout_ms=copy_req_ms,
            overall_timeout_ms=copy_ovr_ms,
        )
        self._imprint_internal_state(_cursor)
        documents = [document async for document in _cursor]
        _cursor._imprint_internal_state(self)
        return documents

    async def has_next(self) -> bool:
        """
        Whether the cursor actually has more documents to return.

        `has_next` can be called on any cursor, but on a CLOSED cursor
        will always return False.

        This method can trigger the fetch operation of a new page, if the current
        buffer is empty.

        Calling `has_next` on an IDLE cursor triggers the first page fetch, but the
        cursor stays in the IDLE state until actual consumption starts.

        Returns:
            a boolean value of True if there is at least one further item
                available to consume; False otherwise (including the case of CLOSED
                cursor).
        """

        if self._state == CursorState.CLOSED:
            return False
        await self._try_ensure_fill_buffer()
        return len(self._buffer) > 0

    async def get_sort_vector(self) -> list[float] | DataAPIVector | None:
        """
        Return the query vector used in the vector (ANN) search that originated
        this cursor, if applicable. If this is not an ANN search, or it was invoked
        without the `include_sort_vector` flag, return None.

        Calling `get_sort_vector` on an IDLE cursor triggers the first page fetch,
        but the cursor stays in the IDLE state until actual consumption starts.

        The method can be invoked on a CLOSED cursor and will return either None
        or the sort vector used in the search.

        Returns:
            the query vector used in the search if this was a
                vector search (otherwise None). The vector is returned either
                as a DataAPIVector or a plain list of number depending on the
                `APIOptions.serdes_options` that apply. The query vector is available
                also for vectorize-based ANN searches.
        """

        await self._try_ensure_fill_buffer()
        if self._last_response_status:
            return _ensure_vector(
                self._last_response_status.get("sortVector"),
                self.data_source.api_options.serdes_options,
            )
        else:
            return None

    async def fetch_next_page(self) -> FindPage[T]:
        """
        Retrieve a single, whole page of results from the Data API and return it
        at once, together with associated "out-of-band" information.

        This method is meant to be the way a cursor is consumed when the caller
        needs to explicitly operate on a page-by-page basis, and is to be paired
        with creation of cursor objects 'set to start from a certain page' via the
        `initial_page_state` constructor parameter/builder method.
        In this case, the supplied initial page state typically comes from having
        consumed a previous page, for the same find operation: the page state, a string,
        is found within the `FindPage` object returned by this method.

        Returns:
            a `FindPage` object expressing the full Data API response, including
            the resulting documents (after applying the cursor mapping function,
            if one is defined), as well as the state to use to query for the next
            page (a string) and the sort vector if requested and applicable.
        """

        self._ensure_alive()
        if self._buffer:
            msg = "Paginated retrieval cannot be mixed with regular cursor iteration."
            raise CursorException(
                text=msg,
                cursor_state=self._state.value,
            )

        await self._try_ensure_fill_buffer()

        _buffer_count = len(self._buffer)
        _tr_next_ps = self._next_page_state
        _tr_results: list[T] = []
        for _ in range(_buffer_count):
            _tr_results.append(await self.__anext__())
        _tr_sort_vector: list[float] | DataAPIVector | None
        if self._last_response_status:
            _tr_sort_vector = _ensure_vector(
                self._last_response_status.get("sortVector"),
                self.data_source.api_options.serdes_options,
            )
        else:
            _tr_sort_vector = None

        return FindPage(
            results=_tr_results,
            next_page_state=_tr_next_ps,
            sort_vector=_tr_sort_vector,
        )

Ancestors

AbstractCursor
abc.ABC
typing.Generic

Instance variables

var data_source : AsyncCollection[~TRAW]

The AsyncCollection object that originated this cursor through a find operation.

Returns

an AsyncCollection instance.

Expand source code

@property
def data_source(self) -> AsyncCollection[TRAW]:
    """
    The AsyncCollection object that originated this cursor through
    a `find` operation.

    Returns:
        an AsyncCollection instance.
    """

    if self._query_engine.async_collection is None:
        raise RuntimeError("Query engine has no async collection.")
    return self._query_engine.async_collection

Methods

def clone(self) ‑> AsyncCollectionFindCursor[~TRAW, ~T]

Create a copy of this cursor with: - the same parameters (timeouts, filter, projection, etc) - and the cursor is rewound to its pristine IDLE state.

For usage examples, please refer to the same method of the equivalent synchronous CollectionFindCursor class, and apply the necessary adaptations to the async interface.

Returns

a new AsyncCollectionFindCursor, similar to this one but rewound to its initial state.

Expand source code

def clone(self) -> AsyncCollectionFindCursor[TRAW, T]:
    """
    Create a copy of this cursor with:
    - the same parameters (timeouts, filter, projection, etc)
    - and the cursor is rewound to its pristine IDLE state.

    For usage examples, please refer to the same method of the
    equivalent synchronous CollectionFindCursor class, and apply the necessary
    adaptations to the async interface.

    Returns:
        a new AsyncCollectionFindCursor, similar to this one but
        rewound to its initial state.
    """

    if self._query_engine.async_collection is None:
        raise RuntimeError("Query engine has no async collection.")
    return AsyncCollectionFindCursor(
        collection=self._query_engine.async_collection,
        request_timeout_ms=self._request_timeout_ms,
        overall_timeout_ms=self._overall_timeout_ms,
        request_timeout_label=self._request_timeout_label,
        overall_timeout_label=self._overall_timeout_label,
        filter=self._filter,
        projection=self._projection,
        sort=self._sort,
        limit=self._limit,
        initial_page_state=self._initial_page_state,
        include_similarity=self._include_similarity,
        include_sort_vector=self._include_sort_vector,
        skip=self._skip,
        mapper=self._mapper,
    )

async def fetch_next_page(self) ‑> FindPage[~T]

Retrieve a single, whole page of results from the Data API and return it at once, together with associated "out-of-band" information.

Returns

a FindPage object expressing the full Data API response, including the resulting documents (after applying the cursor mapping function, if one is defined), as well as the state to use to query for the next page (a string) and the sort vector if requested and applicable.

Expand source code

async def fetch_next_page(self) -> FindPage[T]:
    """
    Retrieve a single, whole page of results from the Data API and return it
    at once, together with associated "out-of-band" information.

    This method is meant to be the way a cursor is consumed when the caller
    needs to explicitly operate on a page-by-page basis, and is to be paired
    with creation of cursor objects 'set to start from a certain page' via the
    `initial_page_state` constructor parameter/builder method.
    In this case, the supplied initial page state typically comes from having
    consumed a previous page, for the same find operation: the page state, a string,
    is found within the `FindPage` object returned by this method.

    Returns:
        a `FindPage` object expressing the full Data API response, including
        the resulting documents (after applying the cursor mapping function,
        if one is defined), as well as the state to use to query for the next
        page (a string) and the sort vector if requested and applicable.
    """

    self._ensure_alive()
    if self._buffer:
        msg = "Paginated retrieval cannot be mixed with regular cursor iteration."
        raise CursorException(
            text=msg,
            cursor_state=self._state.value,
        )

    await self._try_ensure_fill_buffer()

    _buffer_count = len(self._buffer)
    _tr_next_ps = self._next_page_state
    _tr_results: list[T] = []
    for _ in range(_buffer_count):
        _tr_results.append(await self.__anext__())
    _tr_sort_vector: list[float] | DataAPIVector | None
    if self._last_response_status:
        _tr_sort_vector = _ensure_vector(
            self._last_response_status.get("sortVector"),
            self.data_source.api_options.serdes_options,
        )
    else:
        _tr_sort_vector = None

    return FindPage(
        results=_tr_results,
        next_page_state=_tr_next_ps,
        sort_vector=_tr_sort_vector,
    )

def filter(self, filter: FilterType | None) ‑> AsyncCollectionFindCursor[~TRAW, ~T]

Return a copy of this cursor with a new filter setting. This operation is allowed only if the cursor state is still IDLE.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the AsyncCollection find method.

Args

filter: a new filter setting to apply to the returned new cursor.

Returns

a new AsyncCollectionFindCursor with the same settings as this one, except for filter which is the provided value.

Expand source code

def filter(self, filter: FilterType | None) -> AsyncCollectionFindCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new filter setting.
    This operation is allowed only if the cursor state is still IDLE.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the AsyncCollection `find` method.

    Args:
        filter: a new filter setting to apply to the returned new cursor.

    Returns:
        a new AsyncCollectionFindCursor with the same settings as this one,
            except for `filter` which is the provided value.
    """

    self._ensure_idle()
    return self._copy(filter=filter)

Consume the remaining documents in the cursor, invoking a provided callback function – or coroutine – on each of them.

Calling this method on a CLOSED cursor results in an error.

For usage examples, please refer to the same method of the equivalent synchronous CollectionFindCursor class, and apply the necessary adaptations to the async interface.

Args

function: a callback function, or a coroutine, whose only parameter is of the type returned by the cursor. This callback is invoked once per each document yielded by the cursor. If the callback returns a False, the for_each invocation stops early and returns without consuming further documents.
general_method_timeout_ms: a timeout, in milliseconds, for the whole duration of this method. If not provided, there is no such timeout. Note that the per-request timeout set on the cursor still applies.
timeout_ms: an alias for general_method_timeout_ms.

Expand source code

async def for_each(
    self,
    function: Callable[[T], bool | None] | Callable[[T], Awaitable[bool | None]],
    *,
    general_method_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Consume the remaining documents in the cursor, invoking a provided callback
    function -- or coroutine -- on each of them.

    Calling this method on a CLOSED cursor results in an error.

    The callback function can return any value. The return value is generally
    discarded, with the following exception: if the function returns the boolean
    `False`, it is taken to signify that the method should quit early, leaving the
    cursor half-consumed (ACTIVE state). If this does not occur, this method
    results in the cursor entering CLOSED state once it is exhausted.

    For usage examples, please refer to the same method of the
    equivalent synchronous CollectionFindCursor class, and apply the necessary
    adaptations to the async interface.

    Args:
        function: a callback function, or a coroutine, whose only parameter is of
            the type returned by the cursor.
            This callback is invoked once per each document yielded
            by the cursor. If the callback returns a `False`, the `for_each`
            invocation stops early and returns without consuming further documents.
        general_method_timeout_ms: a timeout, in milliseconds, for the whole
            duration of this method. If not provided, there is no such timeout.
            Note that the per-request timeout set on the cursor still applies.
        timeout_ms: an alias for `general_method_timeout_ms`.
    """

    self._ensure_alive()
    copy_req_ms, copy_ovr_ms = _revise_timeouts_for_cursor_copy(
        new_general_method_timeout_ms=general_method_timeout_ms,
        new_timeout_ms=timeout_ms,
        old_request_timeout_ms=self._request_timeout_ms,
    )
    _cursor = self._copy(
        request_timeout_ms=copy_req_ms,
        overall_timeout_ms=copy_ovr_ms,
    )
    self._imprint_internal_state(_cursor)
    is_coro = iscoroutinefunction(function)
    async for document in _cursor:
        if is_coro:
            res = await function(document)  # type: ignore[misc]
        else:
            res = function(document)
        if res is False:
            break
    _cursor._imprint_internal_state(self)

async def get_sort_vector(self) ‑> list[float] | DataAPIVector | None

Return the query vector used in the vector (ANN) search that originated this cursor, if applicable. If this is not an ANN search, or it was invoked without the include_sort_vector flag, return None.

Calling get_sort_vector on an IDLE cursor triggers the first page fetch, but the cursor stays in the IDLE state until actual consumption starts.

The method can be invoked on a CLOSED cursor and will return either None or the sort vector used in the search.

Returns

the query vector used in the search if this was a vector search (otherwise None). The vector is returned either as a DataAPIVector or a plain list of number depending on the APIOptions.serdes_options that apply. The query vector is available also for vectorize-based ANN searches.

Expand source code

async def get_sort_vector(self) -> list[float] | DataAPIVector | None:
    """
    Return the query vector used in the vector (ANN) search that originated
    this cursor, if applicable. If this is not an ANN search, or it was invoked
    without the `include_sort_vector` flag, return None.

    Calling `get_sort_vector` on an IDLE cursor triggers the first page fetch,
    but the cursor stays in the IDLE state until actual consumption starts.

    The method can be invoked on a CLOSED cursor and will return either None
    or the sort vector used in the search.

    Returns:
        the query vector used in the search if this was a
            vector search (otherwise None). The vector is returned either
            as a DataAPIVector or a plain list of number depending on the
            `APIOptions.serdes_options` that apply. The query vector is available
            also for vectorize-based ANN searches.
    """

    await self._try_ensure_fill_buffer()
    if self._last_response_status:
        return _ensure_vector(
            self._last_response_status.get("sortVector"),
            self.data_source.api_options.serdes_options,
        )
    else:
        return None

async def has_next(self) ‑> bool

Whether the cursor actually has more documents to return.

has_next can be called on any cursor, but on a CLOSED cursor will always return False.

This method can trigger the fetch operation of a new page, if the current buffer is empty.

Calling has_next on an IDLE cursor triggers the first page fetch, but the cursor stays in the IDLE state until actual consumption starts.

Returns

a boolean value of True if there is at least one further item available to consume; False otherwise (including the case of CLOSED cursor).

Expand source code

async def has_next(self) -> bool:
    """
    Whether the cursor actually has more documents to return.

    `has_next` can be called on any cursor, but on a CLOSED cursor
    will always return False.

    This method can trigger the fetch operation of a new page, if the current
    buffer is empty.

    Calling `has_next` on an IDLE cursor triggers the first page fetch, but the
    cursor stays in the IDLE state until actual consumption starts.

    Returns:
        a boolean value of True if there is at least one further item
            available to consume; False otherwise (including the case of CLOSED
            cursor).
    """

    if self._state == CursorState.CLOSED:
        return False
    await self._try_ensure_fill_buffer()
    return len(self._buffer) > 0

def include_similarity(self, include_similarity: bool | None) ‑> AsyncCollectionFindCursor[~TRAW, ~T]

Return a copy of this cursor with a new include_similarity setting. This operation is allowed only if the cursor state is still IDLE and if no mapping has been set on it.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the AsyncCollection find method.

Args

include_similarity: a new include_similarity setting to apply to the returned new cursor.

Returns

a new AsyncCollectionFindCursor with the same settings as this one, except for include_similarity which is the provided value.

Expand source code

def include_similarity(
    self, include_similarity: bool | None
) -> AsyncCollectionFindCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new include_similarity setting.
    This operation is allowed only if the cursor state is still IDLE and if
    no mapping has been set on it.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the AsyncCollection `find` method.

    Args:
        include_similarity: a new include_similarity setting to apply
            to the returned new cursor.

    Returns:
        a new AsyncCollectionFindCursor with the same settings as this one,
            except for `include_similarity` which is the provided value.
    """

    self._ensure_idle()
    if self._mapper is not None:
        raise CursorException(
            "Cannot set include_similarity after map.",
            cursor_state=self._state.value,
        )
    return self._copy(include_similarity=include_similarity)

def include_sort_vector(self, include_sort_vector: bool | None) ‑> AsyncCollectionFindCursor[~TRAW, ~T]

Return a copy of this cursor with a new include_sort_vector setting. This operation is allowed only if the cursor state is still IDLE.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the AsyncCollection find method.

Args

include_sort_vector: a new include_sort_vector setting to apply to the returned new cursor.

Returns

a new AsyncCollectionFindCursor with the same settings as this one, except for include_sort_vector which is the provided value.

Expand source code

def include_sort_vector(
    self, include_sort_vector: bool | None
) -> AsyncCollectionFindCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new include_sort_vector setting.
    This operation is allowed only if the cursor state is still IDLE.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the AsyncCollection `find` method.

    Args:
        include_sort_vector: a new include_sort_vector setting to apply
            to the returned new cursor.

    Returns:
        a new AsyncCollectionFindCursor with the same settings as this one,
            except for `include_sort_vector` which is the provided value.
    """

    self._ensure_idle()
    return self._copy(include_sort_vector=include_sort_vector)

def initial_page_state(self, initial_page_state: str | UnsetType) ‑> AsyncCollectionFindCursor[~TRAW, ~T]

Return a copy of this cursor with a new initial_page_state setting. This operation is allowed only if the cursor state is still IDLE.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the Collection find method.

Args

initial_page_state: a new initial_page_state setting to apply to the returned new cursor. Passing an explicit None raises an error.

Returns

a new AsyncCollectionFindCursor with the same settings as this one, except for initial_page_state which is the provided value.

Expand source code

def initial_page_state(
    self, initial_page_state: str | UnsetType
) -> AsyncCollectionFindCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new initial_page_state setting.
    This operation is allowed only if the cursor state is still IDLE.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the Collection `find` method.

    Args:
        initial_page_state: a new initial_page_state setting to apply to the
            returned new cursor. Passing an explicit None raises an error.

    Returns:
        a new AsyncCollectionFindCursor with the same settings as this one,
            except for `initial_page_state` which is the provided value.
    """

    self._ensure_idle()
    return self._copy(initial_page_state=initial_page_state)

def limit(self, limit: int | None) ‑> AsyncCollectionFindCursor[~TRAW, ~T]

Return a copy of this cursor with a new limit setting. This operation is allowed only if the cursor state is still IDLE.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the AsyncCollection find method.

Args

limit: a new limit setting to apply to the returned new cursor.

Returns

a new AsyncCollectionFindCursor with the same settings as this one, except for limit which is the provided value.

Expand source code

def limit(self, limit: int | None) -> AsyncCollectionFindCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new limit setting.
    This operation is allowed only if the cursor state is still IDLE.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the AsyncCollection `find` method.

    Args:
        limit: a new limit setting to apply to the returned new cursor.

    Returns:
        a new AsyncCollectionFindCursor with the same settings as this one,
            except for `limit` which is the provided value.
    """

    self._ensure_idle()
    return self._copy(limit=limit)

def map(self, mapper: Callable[[T], TNEW]) ‑> AsyncCollectionFindCursor[~TRAW, ~TNEW]

Return a copy of this cursor with a mapping function to transform the returned items. Calling this method on a cursor with a mapping already set results in the mapping functions being composed.

This operation is allowed only if the cursor state is still IDLE.

For usage examples, please refer to the same method of the equivalent synchronous CollectionFindCursor class, and apply the necessary adaptations to the async interface.

Args

mapper: a function transforming the objects returned by the cursor into something else (i.e. a function T => TNEW).

Returns

a new AsyncCollectionFindCursor with a new mapping function on the results, possibly composed with any pre-existing mapping function.

Expand source code

def map(self, mapper: Callable[[T], TNEW]) -> AsyncCollectionFindCursor[TRAW, TNEW]:
    """
    Return a copy of this cursor with a mapping function to transform
    the returned items. Calling this method on a cursor with a mapping
    already set results in the mapping functions being composed.

    This operation is allowed only if the cursor state is still IDLE.

    For usage examples, please refer to the same method of the
    equivalent synchronous CollectionFindCursor class, and apply the necessary
    adaptations to the async interface.

    Args:
        mapper: a function transforming the objects returned by the cursor
            into something else (i.e. a function T => TNEW).

    Returns:
        a new AsyncCollectionFindCursor with a new mapping function on the results,
            possibly composed with any pre-existing mapping function.
    """

    self._ensure_idle()
    if self._query_engine.async_collection is None:
        raise RuntimeError("Query engine has no async collection.")
    composite_mapper: Callable[[TRAW], TNEW]
    if self._mapper is not None:

        def _composite(document: TRAW) -> TNEW:
            return mapper(self._mapper(document))  # type: ignore[misc]

        composite_mapper = _composite
    else:
        composite_mapper = cast(Callable[[TRAW], TNEW], mapper)
    return AsyncCollectionFindCursor(
        collection=self._query_engine.async_collection,
        request_timeout_ms=self._request_timeout_ms,
        overall_timeout_ms=self._overall_timeout_ms,
        request_timeout_label=self._request_timeout_label,
        overall_timeout_label=self._overall_timeout_label,
        filter=self._filter,
        projection=self._projection,
        sort=self._sort,
        limit=self._limit,
        initial_page_state=self._initial_page_state,
        include_similarity=self._include_similarity,
        include_sort_vector=self._include_sort_vector,
        skip=self._skip,
        mapper=composite_mapper,
    )

def project(self, projection: ProjectionType | None) ‑> AsyncCollectionFindCursor[~TRAW, ~T]

Return a copy of this cursor with a new projection setting. This operation is allowed only if the cursor state is still IDLE and if no mapping has been set on it.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the AsyncCollection find method.

Args

projection: a new projection setting to apply to the returned new cursor.

Returns

a new AsyncCollectionFindCursor with the same settings as this one, except for projection which is the provided value.

Expand source code

def project(
    self, projection: ProjectionType | None
) -> AsyncCollectionFindCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new projection setting.
    This operation is allowed only if the cursor state is still IDLE and if
    no mapping has been set on it.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the AsyncCollection `find` method.

    Args:
        projection: a new projection setting to apply to the returned new cursor.

    Returns:
        a new AsyncCollectionFindCursor with the same settings as this one,
            except for `projection` which is the provided value.
    """

    self._ensure_idle()
    if self._mapper is not None:
        raise CursorException(
            "Cannot set projection after map.",
            cursor_state=self._state.value,
        )
    return self._copy(projection=projection)

def skip(self, skip: int | None) ‑> AsyncCollectionFindCursor[~TRAW, ~T]

Return a copy of this cursor with a new skip setting. This operation is allowed only if the cursor state is still IDLE.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the AsyncCollection find method.

Args

skip: a new skip setting to apply to the returned new cursor.

Returns

a new AsyncCollectionFindCursor with the same settings as this one, except for skip which is the provided value.

Expand source code

def skip(self, skip: int | None) -> AsyncCollectionFindCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new skip setting.
    This operation is allowed only if the cursor state is still IDLE.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the AsyncCollection `find` method.

    Args:
        skip: a new skip setting to apply to the returned new cursor.

    Returns:
        a new AsyncCollectionFindCursor with the same settings as this one,
            except for `skip` which is the provided value.
    """

    self._ensure_idle()
    return self._copy(skip=skip)

def sort(self, sort: dict[str, Any] | None) ‑> AsyncCollectionFindCursor[~TRAW, ~T]

Return a copy of this cursor with a new sort setting. This operation is allowed only if the cursor state is still IDLE.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the AsyncCollection find method.

Args

sort: a new sort setting to apply to the returned new cursor.

Returns

a new AsyncCollectionFindCursor with the same settings as this one, except for sort which is the provided value.

Expand source code

def sort(self, sort: dict[str, Any] | None) -> AsyncCollectionFindCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new sort setting.
    This operation is allowed only if the cursor state is still IDLE.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the AsyncCollection `find` method.

    Args:
        sort: a new sort setting to apply to the returned new cursor.

    Returns:
        a new AsyncCollectionFindCursor with the same settings as this one,
            except for `sort` which is the provided value.
    """

    self._ensure_idle()
    return self._copy(sort=sort)

async def to_list(self, *, general_method_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> list[~T]

Materialize all documents that remain to be consumed from a cursor into a list.

Calling this method on a CLOSED cursor results in an error.

If the cursor is IDLE, the result will be the whole set of documents returned by the find operation; otherwise, the documents already consumed by the cursor will not be in the resulting list.

For usage examples, please refer to the same method of the equivalent synchronous CollectionFindCursor class, and apply the necessary adaptations to the async interface.

Args

general_method_timeout_ms: a timeout, in milliseconds, for the whole duration of this method. If not provided, there is no such timeout. Note that the per-request timeout set on the cursor still applies.
timeout_ms: an alias for general_method_timeout_ms.

Returns

a list of documents (or other values depending on the mapping function, if one is set). These are all items that were left to be consumed on the cursor when to_list is called.

Expand source code

async def to_list(
    self,
    *,
    general_method_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> list[T]:
    """
    Materialize all documents that remain to be consumed from a cursor into a list.

    Calling this method on a CLOSED cursor results in an error.

    If the cursor is IDLE, the result will be the whole set of documents returned
    by the `find` operation; otherwise, the documents already consumed by the cursor
    will not be in the resulting list.

    Calling this method is not recommended if a huge list of results is anticipated:
    it would involve a large number of data exchanges with the Data API and possibly
    a massive memory usage to construct the list. In such cases, a lazy pattern
    of iterating and consuming the documents is to be preferred.

    For usage examples, please refer to the same method of the
    equivalent synchronous CollectionFindCursor class, and apply the necessary
    adaptations to the async interface.

    Args:
        general_method_timeout_ms: a timeout, in milliseconds, for the whole
            duration of this method. If not provided, there is no such timeout.
            Note that the per-request timeout set on the cursor still applies.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a list of documents (or other values depending on the mapping
            function, if one is set). These are all items that were left
            to be consumed on the cursor when `to_list` is called.
    """

    self._ensure_alive()
    copy_req_ms, copy_ovr_ms = _revise_timeouts_for_cursor_copy(
        new_general_method_timeout_ms=general_method_timeout_ms,
        new_timeout_ms=timeout_ms,
        old_request_timeout_ms=self._request_timeout_ms,
    )
    _cursor = self._copy(
        request_timeout_ms=copy_req_ms,
        overall_timeout_ms=copy_ovr_ms,
    )
    self._imprint_internal_state(_cursor)
    documents = [document async for document in _cursor]
    _cursor._imprint_internal_state(self)
    return documents

Inherited members

AbstractCursor:
- buffered_count
- close
- consume_buffer
- consumed
- cursor_id
- rewind
- state

A synchronous cursor over rows, as returned by a find invocation on an AsyncTable. A cursor can be iterated over, materialized into a list, and queried/manipulated in various ways.

Some cursor operations mutate it in-place (such as consuming its rows), other return a new cursor without changing the original one. See the documentation for the various methods and the AsyncTable find method for more details and usage patterns.

A cursor has two type parameters: TRAW and T. The first is the type of the "raw" rows as they are obtained from the Data API, the second is the type of the items after the optional mapping function (see the .map() method). If there is no mapping, TRAW = T. In general, consuming a cursor returns items of type T, except for the consume_buffer primitive that draws directly from the buffer and always returns items of type TRAW.

This class is the async counterpart of the TableFindCursor, for use with asyncio. Other than the async interface, its behavior is identical: please refer to the documentation for TableFindCursor for examples and details.

Expand source code

class AsyncTableFindCursor(Generic[TRAW, T], AbstractCursor[TRAW]):
    """
    A synchronous cursor over rows, as returned by a `find` invocation on
    an AsyncTable. A cursor can be iterated over, materialized into a list,
    and queried/manipulated in various ways.

    Some cursor operations mutate it in-place (such as consuming its rows),
    other return a new cursor without changing the original one. See the documentation
    for the various methods and the AsyncTable `find` method for more details
    and usage patterns.

    A cursor has two type parameters: TRAW and T. The first is the type of the "raw"
    rows as they are obtained from the Data API, the second is the type of the
    items after the optional mapping function (see the `.map()` method). If there is
    no mapping, TRAW = T. In general, consuming a cursor returns items of type T,
    except for the `consume_buffer` primitive that draws directly from the buffer
    and always returns items of type TRAW.

    This class is the async counterpart of the TableFindCursor, for use with
    asyncio. Other than the async interface, its behavior is identical: please refer
    to the documentation for `TableFindCursor` for examples and details.
    """

    _query_engine: _TableFindQueryEngine[TRAW]
    _request_timeout_ms: int | None
    _overall_timeout_ms: int | None
    _request_timeout_label: str | None
    _overall_timeout_label: str | None
    _timeout_manager: MultiCallTimeoutManager
    _filter: FilterType | None
    _projection: ProjectionType | None
    _sort: dict[str, Any] | None
    _limit: int | None
    _initial_page_state: str | UnsetType
    _include_similarity: bool | None
    _include_sort_vector: bool | None
    _skip: int | None
    _mapper: Callable[[TRAW], T] | None

    def __init__(
        self,
        *,
        table: AsyncTable[TRAW],
        request_timeout_ms: int | None,
        overall_timeout_ms: int | None,
        request_timeout_label: str | None = None,
        overall_timeout_label: str | None = None,
        filter: FilterType | None = None,
        projection: ProjectionType | None = None,
        sort: dict[str, Any] | None = None,
        limit: int | None = None,
        initial_page_state: str | UnsetType = _UNSET,
        include_similarity: bool | None = None,
        include_sort_vector: bool | None = None,
        skip: int | None = None,
        mapper: Callable[[TRAW], T] | None = None,
    ) -> None:
        self._filter = deepcopy(filter)
        self._projection = projection
        self._sort = deepcopy(sort)
        self._limit = limit
        self._initial_page_state = initial_page_state
        self._include_similarity = include_similarity
        self._include_sort_vector = include_sort_vector
        self._skip = skip
        self._mapper = mapper
        self._request_timeout_ms = request_timeout_ms
        self._overall_timeout_ms = overall_timeout_ms
        self._request_timeout_label = request_timeout_label
        self._overall_timeout_label = overall_timeout_label
        self._query_engine = _TableFindQueryEngine(
            table=None,
            async_table=table,
            filter=self._filter,
            projection=self._projection,
            sort=self._sort,
            limit=self._limit,
            include_similarity=self._include_similarity,
            include_sort_vector=self._include_sort_vector,
            skip=self._skip,
        )
        AbstractCursor.__init__(self, initial_page_state=initial_page_state)
        self._timeout_manager = MultiCallTimeoutManager(
            overall_timeout_ms=self._overall_timeout_ms,
            timeout_label=self._overall_timeout_label,
        )

    def _copy(
        self: AsyncTableFindCursor[TRAW, T],
        *,
        request_timeout_ms: int | None | UnsetType = _UNSET,
        overall_timeout_ms: int | None | UnsetType = _UNSET,
        request_timeout_label: str | None | UnsetType = _UNSET,
        overall_timeout_label: str | None | UnsetType = _UNSET,
        filter: FilterType | None | UnsetType = _UNSET,
        projection: ProjectionType | None | UnsetType = _UNSET,
        sort: dict[str, Any] | None | UnsetType = _UNSET,
        limit: int | None | UnsetType = _UNSET,
        initial_page_state: str | None | UnsetType = _UNSET,
        include_similarity: bool | None | UnsetType = _UNSET,
        include_sort_vector: bool | None | UnsetType = _UNSET,
        skip: int | None | UnsetType = _UNSET,
    ) -> AsyncTableFindCursor[TRAW, T]:
        if self._query_engine.async_table is None:
            raise RuntimeError("Query engine has no async table.")
        return AsyncTableFindCursor(
            table=self._query_engine.async_table,
            request_timeout_ms=self._request_timeout_ms
            if isinstance(request_timeout_ms, UnsetType)
            else request_timeout_ms,
            overall_timeout_ms=self._overall_timeout_ms
            if isinstance(overall_timeout_ms, UnsetType)
            else overall_timeout_ms,
            request_timeout_label=self._request_timeout_label
            if isinstance(request_timeout_label, UnsetType)
            else request_timeout_label,
            overall_timeout_label=self._overall_timeout_label
            if isinstance(overall_timeout_label, UnsetType)
            else overall_timeout_label,
            filter=self._filter if isinstance(filter, UnsetType) else filter,
            projection=self._projection
            if isinstance(projection, UnsetType)
            else projection,
            sort=self._sort if isinstance(sort, UnsetType) else sort,
            limit=self._limit if isinstance(limit, UnsetType) else limit,
            # special treatment: passing None erases (hence we must supply unset and not None):
            initial_page_state=self._initial_page_state
            if isinstance(initial_page_state, UnsetType)
            else (initial_page_state if initial_page_state is not None else _UNSET),
            include_similarity=self._include_similarity
            if isinstance(include_similarity, UnsetType)
            else include_similarity,
            include_sort_vector=self._include_sort_vector
            if isinstance(include_sort_vector, UnsetType)
            else include_sort_vector,
            skip=self._skip if isinstance(skip, UnsetType) else skip,
            mapper=self._mapper,
        )

    async def _try_ensure_fill_buffer(self) -> None:
        """
        If buffer is empty, try to fill with next page, if applicable.
        If not possible, silently do nothing.
        This method never changes the cursor state.
        """

        if self._state == CursorState.CLOSED:
            return
        if not self._buffer:
            if self._next_page_state is not None or self._state == CursorState.IDLE:
                (
                    new_buffer,
                    next_page_state,
                    resp_status,
                ) = await self._query_engine._async_fetch_page(
                    page_state=self._next_page_state,
                    timeout_context=self._timeout_manager.remaining_timeout(
                        cap_time_ms=self._request_timeout_ms,
                        cap_timeout_label=self._request_timeout_label,
                    ),
                )
                self._next_page_state = next_page_state
                self._last_response_status = resp_status
                self._pages_retrieved += 1
                self._buffer = new_buffer

    def __repr__(self) -> str:
        return (
            f'{self.__class__.__name__}("{self.data_source.name}", '
            f"{self._state.value}, "
            f"consumed so far: {self.consumed})"
        )

    def __aiter__(
        self: AsyncTableFindCursor[TRAW, T],
    ) -> AsyncTableFindCursor[TRAW, T]:
        self._ensure_alive()
        return self

    async def __anext__(self) -> T:
        if self._state == CursorState.CLOSED:
            raise StopAsyncIteration
        await self._try_ensure_fill_buffer()
        if not self._buffer:
            self._state = CursorState.CLOSED
            raise StopAsyncIteration
        self._state = CursorState.STARTED
        # consume one item from buffer
        traw0, rest_buffer = self._buffer[0], self._buffer[1:]
        self._buffer = rest_buffer
        self._consumed += 1
        return cast(T, self._mapper(traw0) if self._mapper is not None else traw0)

    @property
    def data_source(self) -> AsyncTable[TRAW]:
        """
        The AsyncTable object that originated this cursor through a `find` operation.

        Returns:
            an AsyncTable instance.
        """

        if self._query_engine.async_table is None:
            raise RuntimeError("Query engine has no async table.")
        return self._query_engine.async_table

    def clone(self) -> AsyncTableFindCursor[TRAW, T]:
        """
        Create a copy of this cursor with:
        - the same parameters (timeouts, filter, projection, etc)
        - and the cursor is rewound to its pristine IDLE state.

        For usage examples, please refer to the same method of the
        equivalent synchronous TableFindCursor class, and apply the necessary
        adaptations to the async interface.

        Returns:
            a new AsyncTableFindCursor, similar to this one but
            rewound to its initial state.
        """

        if self._query_engine.async_table is None:
            raise RuntimeError("Query engine has no async table.")
        return AsyncTableFindCursor(
            table=self._query_engine.async_table,
            request_timeout_ms=self._request_timeout_ms,
            overall_timeout_ms=self._overall_timeout_ms,
            request_timeout_label=self._request_timeout_label,
            overall_timeout_label=self._overall_timeout_label,
            filter=self._filter,
            projection=self._projection,
            sort=self._sort,
            limit=self._limit,
            initial_page_state=self._initial_page_state,
            include_similarity=self._include_similarity,
            include_sort_vector=self._include_sort_vector,
            skip=self._skip,
            mapper=self._mapper,
        )

    def filter(self, filter: FilterType | None) -> AsyncTableFindCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new filter setting.
        This operation is allowed only if the cursor state is still IDLE.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the AsyncTable `find` method.

        Args:
            filter: a new filter setting to apply to the returned new cursor.

        Returns:
            a new AsyncTableFindCursor with the same settings as this one,
                except for `filter` which is the provided value.
        """

        self._ensure_idle()
        return self._copy(filter=filter)

    def project(
        self, projection: ProjectionType | None
    ) -> AsyncTableFindCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new projection setting.
        This operation is allowed only if the cursor state is still IDLE and if
        no mapping has been set on it.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the AsyncTable `find` method.

        Args:
            projection: a new projection setting to apply to the returned new cursor.

        Returns:
            a new AsyncTableFindCursor with the same settings as this one,
                except for `projection` which is the provided value.
        """

        self._ensure_idle()
        if self._mapper is not None:
            raise CursorException(
                "Cannot set projection after map.",
                cursor_state=self._state.value,
            )
        return self._copy(projection=projection)

    def sort(self, sort: dict[str, Any] | None) -> AsyncTableFindCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new sort setting.
        This operation is allowed only if the cursor state is still IDLE.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the AsyncTable `find` method.

        Args:
            sort: a new sort setting to apply to the returned new cursor.

        Returns:
            a new AsyncTableFindCursor with the same settings as this one,
                except for `sort` which is the provided value.
        """

        self._ensure_idle()
        return self._copy(sort=sort)

    def limit(self, limit: int | None) -> AsyncTableFindCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new limit setting.
        This operation is allowed only if the cursor state is still IDLE.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the AsyncTable `find` method.

        Args:
            limit: a new limit setting to apply to the returned new cursor.

        Returns:
            a new AsyncTableFindCursor with the same settings as this one,
                except for `limit` which is the provided value.
        """

        self._ensure_idle()
        return self._copy(limit=limit)

    def initial_page_state(
        self, initial_page_state: str | UnsetType
    ) -> AsyncTableFindCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new initial_page_state setting.
        This operation is allowed only if the cursor state is still IDLE.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the Collection `find` method.

        Args:
            initial_page_state: a new initial_page_state setting to apply to the
                returned new cursor. Passing an explicit None raises an error.

        Returns:
            a new AsyncTableFindCursor with the same settings as this one,
                except for `initial_page_state` which is the provided value.
        """

        self._ensure_idle()
        return self._copy(initial_page_state=initial_page_state)

    def include_similarity(
        self, include_similarity: bool | None
    ) -> AsyncTableFindCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new include_similarity setting.
        This operation is allowed only if the cursor state is still IDLE and if
        no mapping has been set on it.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the AsyncTable `find` method.

        Args:
            include_similarity: a new include_similarity setting to apply
                to the returned new cursor.

        Returns:
            a new AsyncTableFindCursor with the same settings as this one,
                except for `include_similarity` which is the provided value.
        """

        self._ensure_idle()
        if self._mapper is not None:
            raise CursorException(
                "Cannot set include_similarity after map.",
                cursor_state=self._state.value,
            )
        return self._copy(include_similarity=include_similarity)

    def include_sort_vector(
        self, include_sort_vector: bool | None
    ) -> AsyncTableFindCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new include_sort_vector setting.
        This operation is allowed only if the cursor state is still IDLE.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the AsyncTable `find` method.

        Args:
            include_sort_vector: a new include_sort_vector setting to apply
                to the returned new cursor.

        Returns:
            a new AsyncTableFindCursor with the same settings as this one,
                except for `include_sort_vector` which is the provided value.
        """

        self._ensure_idle()
        return self._copy(include_sort_vector=include_sort_vector)

    def skip(self, skip: int | None) -> AsyncTableFindCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new skip setting.
        This operation is allowed only if the cursor state is still IDLE.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the AsyncTable `find` method.

        Args:
            skip: a new skip setting to apply to the returned new cursor.

        Returns:
            a new AsyncTableFindCursor with the same settings as this one,
                except for `skip` which is the provided value.
        """

        self._ensure_idle()
        return self._copy(skip=skip)

    def map(self, mapper: Callable[[T], TNEW]) -> AsyncTableFindCursor[TRAW, TNEW]:
        """
        Return a copy of this cursor with a mapping function to transform
        the returned items. Calling this method on a cursor with a mapping
        already set results in the mapping functions being composed.

        This operation is allowed only if the cursor state is still IDLE.

        For usage examples, please refer to the same method of the
        equivalent synchronous TableFindCursor class, and apply the necessary
        adaptations to the async interface.

        Args:
            mapper: a function transforming the objects returned by the cursor
                into something else (i.e. a function T => TNEW).

        Returns:
            a new AsyncTableFindCursor with a new mapping function on the results,
                possibly composed with any pre-existing mapping function.
        """

        self._ensure_idle()
        if self._query_engine.async_table is None:
            raise RuntimeError("Query engine has no async table.")
        composite_mapper: Callable[[TRAW], TNEW]
        if self._mapper is not None:

            def _composite(document: TRAW) -> TNEW:
                return mapper(self._mapper(document))  # type: ignore[misc]

            composite_mapper = _composite
        else:
            composite_mapper = cast(Callable[[TRAW], TNEW], mapper)
        return AsyncTableFindCursor(
            table=self._query_engine.async_table,
            request_timeout_ms=self._request_timeout_ms,
            overall_timeout_ms=self._overall_timeout_ms,
            request_timeout_label=self._request_timeout_label,
            overall_timeout_label=self._overall_timeout_label,
            filter=self._filter,
            projection=self._projection,
            sort=self._sort,
            limit=self._limit,
            initial_page_state=self._initial_page_state,
            include_similarity=self._include_similarity,
            include_sort_vector=self._include_sort_vector,
            skip=self._skip,
            mapper=composite_mapper,
        )

    async def for_each(
        self,
        function: Callable[[T], bool | None] | Callable[[T], Awaitable[bool | None]],
        *,
        general_method_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Consume the remaining rows in the cursor, invoking a provided callback
        function -- or coroutine -- on each of them.

        Calling this method on a CLOSED cursor results in an error.

        The callback function can return any value. The return value is generally
        discarded, with the following exception: if the function returns the boolean
        `False`, it is taken to signify that the method should quit early, leaving the
        cursor half-consumed (ACTIVE state). If this does not occur, this method
        results in the cursor entering CLOSED state once it is exhausted.

        For usage examples, please refer to the same method of the
        equivalent synchronous TableFindCursor class, and apply the necessary
        adaptations to the async interface.

        Args:
            function: a callback function, or a coroutine, whose only parameter is of
                the type returned by the cursor.
                This callback is invoked once per each row yielded
                by the cursor. If the callback returns a `False`, the `for_each`
                invocation stops early and returns without consuming further rows.
            general_method_timeout_ms: a timeout, in milliseconds, for the whole
                duration of this method. If not provided, there is no such timeout.
                Note that the per-request timeout set on the cursor still applies.
            timeout_ms: an alias for `general_method_timeout_ms`.
        """

        self._ensure_alive()
        copy_req_ms, copy_ovr_ms = _revise_timeouts_for_cursor_copy(
            new_general_method_timeout_ms=general_method_timeout_ms,
            new_timeout_ms=timeout_ms,
            old_request_timeout_ms=self._request_timeout_ms,
        )
        _cursor = self._copy(
            request_timeout_ms=copy_req_ms,
            overall_timeout_ms=copy_ovr_ms,
        )
        self._imprint_internal_state(_cursor)
        is_coro = iscoroutinefunction(function)
        async for row in _cursor:
            if is_coro:
                res = await function(row)  # type: ignore[misc]
            else:
                res = function(row)
            if res is False:
                break
        _cursor._imprint_internal_state(self)

    async def to_list(
        self,
        *,
        general_method_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> list[T]:
        """
        Materialize all rows that remain to be consumed from a cursor into a list.

        Calling this method on a CLOSED cursor results in an error.

        If the cursor is IDLE, the result will be the whole set of rows returned
        by the `find` operation; otherwise, the rows already consumed by the cursor
        will not be in the resulting list.

        Calling this method is not recommended if a huge list of results is anticipated:
        it would involve a large number of data exchanges with the Data API and possibly
        a massive memory usage to construct the list. In such cases, a lazy pattern
        of iterating and consuming the rows is to be preferred.

        For usage examples, please refer to the same method of the
        equivalent synchronous TableFindCursor class, and apply the necessary
        adaptations to the async interface.

        Args:
            general_method_timeout_ms: a timeout, in milliseconds, for the whole
                duration of this method. If not provided, there is no such timeout.
                Note that the per-request timeout set on the cursor still applies.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            list: a list of rows (or other values depending on the mapping
                function, if one is set). These are all items that were left
                to be consumed on the cursor when `to_list` is called.
        """

        self._ensure_alive()
        copy_req_ms, copy_ovr_ms = _revise_timeouts_for_cursor_copy(
            new_general_method_timeout_ms=general_method_timeout_ms,
            new_timeout_ms=timeout_ms,
            old_request_timeout_ms=self._request_timeout_ms,
        )
        _cursor = self._copy(
            request_timeout_ms=copy_req_ms,
            overall_timeout_ms=copy_ovr_ms,
        )
        self._imprint_internal_state(_cursor)
        documents = [document async for document in _cursor]
        _cursor._imprint_internal_state(self)
        return documents

    async def has_next(self) -> bool:
        """
        Whether the cursor actually has more documents to return.

        `has_next` can be called on any cursor, but on a CLOSED cursor
        will always return False.

        This method can trigger the fetch operation of a new page, if the current
        buffer is empty.

        Calling `has_next` on an IDLE cursor triggers the first page fetch, but the
        cursor stays in the IDLE state until actual consumption starts.

        Returns:
            a boolean value of True if there is at least one further item
                available to consume; False otherwise (including the case of CLOSED
                cursor).
        """

        if self._state == CursorState.CLOSED:
            return False
        await self._try_ensure_fill_buffer()
        return len(self._buffer) > 0

    async def get_sort_vector(self) -> list[float] | DataAPIVector | None:
        """
        Return the query vector used in the vector (ANN) search that originated
        this cursor, if applicable. If this is not an ANN search, or it was invoked
        without the `include_sort_vector` flag, return None.

        Calling `get_sort_vector` on an IDLE cursor triggers the first page fetch,
        but the cursor stays in the IDLE state until actual consumption starts.

        The method can be invoked on a CLOSED cursor and will return either None
        or the sort vector used in the search.

        Returns:
            the query vector used in the search if this was a
                vector search (otherwise None). The vector is returned either
                as a DataAPIVector or a plain list of number depending on the
                `APIOptions.serdes_options` that apply. The query vector is available
                also for vectorize-based ANN searches.
        """

        await self._try_ensure_fill_buffer()
        if self._last_response_status:
            return _ensure_vector(
                self._last_response_status.get("sortVector"),
                self.data_source.api_options.serdes_options,
            )
        else:
            return None

    async def fetch_next_page(self) -> FindPage[T]:
        """
        Retrieve a single, whole page of results from the Data API and return it
        at once, together with associated "out-of-band" information.

        This method is meant to be the way a cursor is consumed when the caller
        needs to explicitly operate on a page-by-page basis, and is to be paired
        with creation of cursor objects 'set to start from a certain page' via the
        `initial_page_state` constructor parameter/builder method.
        In this case, the supplied initial page state typically comes from having
        consumed a previous page, for the same find operation: the page state, a string,
        is found within the `FindPage` object returned by this method.

        Returns:
            a `FindPage` object expressing the full Data API response, including
            the resulting rows (after applying the cursor mapping function,
            if one is defined), as well as the state to use to query for the next
            page (a string) and the sort vector if requested and applicable.
        """

        self._ensure_alive()
        if self._buffer:
            msg = "Paginated retrieval cannot be mixed with regular cursor iteration."
            raise CursorException(
                text=msg,
                cursor_state=self._state.value,
            )

        await self._try_ensure_fill_buffer()

        _buffer_count = len(self._buffer)
        _tr_next_ps = self._next_page_state
        _tr_results: list[T] = []
        for _ in range(_buffer_count):
            _tr_results.append(await self.__anext__())
        _tr_sort_vector: list[float] | DataAPIVector | None
        if self._last_response_status:
            _tr_sort_vector = _ensure_vector(
                self._last_response_status.get("sortVector"),
                self.data_source.api_options.serdes_options,
            )
        else:
            _tr_sort_vector = None

        return FindPage(
            results=_tr_results,
            next_page_state=_tr_next_ps,
            sort_vector=_tr_sort_vector,
        )

Ancestors

AbstractCursor
abc.ABC
typing.Generic

Instance variables

var data_source : AsyncTable[~TRAW]

The AsyncTable object that originated this cursor through a find operation.

Returns

an AsyncTable instance.

Expand source code

@property
def data_source(self) -> AsyncTable[TRAW]:
    """
    The AsyncTable object that originated this cursor through a `find` operation.

    Returns:
        an AsyncTable instance.
    """

    if self._query_engine.async_table is None:
        raise RuntimeError("Query engine has no async table.")
    return self._query_engine.async_table

Methods

def clone(self) ‑> AsyncTableFindCursor[~TRAW, ~T]

Create a copy of this cursor with: - the same parameters (timeouts, filter, projection, etc) - and the cursor is rewound to its pristine IDLE state.

For usage examples, please refer to the same method of the equivalent synchronous TableFindCursor class, and apply the necessary adaptations to the async interface.

Returns

a new AsyncTableFindCursor, similar to this one but rewound to its initial state.

Expand source code

def clone(self) -> AsyncTableFindCursor[TRAW, T]:
    """
    Create a copy of this cursor with:
    - the same parameters (timeouts, filter, projection, etc)
    - and the cursor is rewound to its pristine IDLE state.

    For usage examples, please refer to the same method of the
    equivalent synchronous TableFindCursor class, and apply the necessary
    adaptations to the async interface.

    Returns:
        a new AsyncTableFindCursor, similar to this one but
        rewound to its initial state.
    """

    if self._query_engine.async_table is None:
        raise RuntimeError("Query engine has no async table.")
    return AsyncTableFindCursor(
        table=self._query_engine.async_table,
        request_timeout_ms=self._request_timeout_ms,
        overall_timeout_ms=self._overall_timeout_ms,
        request_timeout_label=self._request_timeout_label,
        overall_timeout_label=self._overall_timeout_label,
        filter=self._filter,
        projection=self._projection,
        sort=self._sort,
        limit=self._limit,
        initial_page_state=self._initial_page_state,
        include_similarity=self._include_similarity,
        include_sort_vector=self._include_sort_vector,
        skip=self._skip,
        mapper=self._mapper,
    )

async def fetch_next_page(self) ‑> FindPage[~T]

Retrieve a single, whole page of results from the Data API and return it at once, together with associated "out-of-band" information.

Returns

a FindPage object expressing the full Data API response, including the resulting rows (after applying the cursor mapping function, if one is defined), as well as the state to use to query for the next page (a string) and the sort vector if requested and applicable.

Expand source code

async def fetch_next_page(self) -> FindPage[T]:
    """
    Retrieve a single, whole page of results from the Data API and return it
    at once, together with associated "out-of-band" information.

    This method is meant to be the way a cursor is consumed when the caller
    needs to explicitly operate on a page-by-page basis, and is to be paired
    with creation of cursor objects 'set to start from a certain page' via the
    `initial_page_state` constructor parameter/builder method.
    In this case, the supplied initial page state typically comes from having
    consumed a previous page, for the same find operation: the page state, a string,
    is found within the `FindPage` object returned by this method.

    Returns:
        a `FindPage` object expressing the full Data API response, including
        the resulting rows (after applying the cursor mapping function,
        if one is defined), as well as the state to use to query for the next
        page (a string) and the sort vector if requested and applicable.
    """

    self._ensure_alive()
    if self._buffer:
        msg = "Paginated retrieval cannot be mixed with regular cursor iteration."
        raise CursorException(
            text=msg,
            cursor_state=self._state.value,
        )

    await self._try_ensure_fill_buffer()

    _buffer_count = len(self._buffer)
    _tr_next_ps = self._next_page_state
    _tr_results: list[T] = []
    for _ in range(_buffer_count):
        _tr_results.append(await self.__anext__())
    _tr_sort_vector: list[float] | DataAPIVector | None
    if self._last_response_status:
        _tr_sort_vector = _ensure_vector(
            self._last_response_status.get("sortVector"),
            self.data_source.api_options.serdes_options,
        )
    else:
        _tr_sort_vector = None

    return FindPage(
        results=_tr_results,
        next_page_state=_tr_next_ps,
        sort_vector=_tr_sort_vector,
    )

def filter(self, filter: FilterType | None) ‑> AsyncTableFindCursor[~TRAW, ~T]

Return a copy of this cursor with a new filter setting. This operation is allowed only if the cursor state is still IDLE.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the AsyncTable find method.

Args

filter: a new filter setting to apply to the returned new cursor.

Returns

a new AsyncTableFindCursor with the same settings as this one, except for filter which is the provided value.

Expand source code

def filter(self, filter: FilterType | None) -> AsyncTableFindCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new filter setting.
    This operation is allowed only if the cursor state is still IDLE.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the AsyncTable `find` method.

    Args:
        filter: a new filter setting to apply to the returned new cursor.

    Returns:
        a new AsyncTableFindCursor with the same settings as this one,
            except for `filter` which is the provided value.
    """

    self._ensure_idle()
    return self._copy(filter=filter)

Consume the remaining rows in the cursor, invoking a provided callback function – or coroutine – on each of them.

Calling this method on a CLOSED cursor results in an error.

For usage examples, please refer to the same method of the equivalent synchronous TableFindCursor class, and apply the necessary adaptations to the async interface.

Args

function: a callback function, or a coroutine, whose only parameter is of the type returned by the cursor. This callback is invoked once per each row yielded by the cursor. If the callback returns a False, the for_each invocation stops early and returns without consuming further rows.
general_method_timeout_ms: a timeout, in milliseconds, for the whole duration of this method. If not provided, there is no such timeout. Note that the per-request timeout set on the cursor still applies.
timeout_ms: an alias for general_method_timeout_ms.

Expand source code

async def for_each(
    self,
    function: Callable[[T], bool | None] | Callable[[T], Awaitable[bool | None]],
    *,
    general_method_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Consume the remaining rows in the cursor, invoking a provided callback
    function -- or coroutine -- on each of them.

    Calling this method on a CLOSED cursor results in an error.

    The callback function can return any value. The return value is generally
    discarded, with the following exception: if the function returns the boolean
    `False`, it is taken to signify that the method should quit early, leaving the
    cursor half-consumed (ACTIVE state). If this does not occur, this method
    results in the cursor entering CLOSED state once it is exhausted.

    For usage examples, please refer to the same method of the
    equivalent synchronous TableFindCursor class, and apply the necessary
    adaptations to the async interface.

    Args:
        function: a callback function, or a coroutine, whose only parameter is of
            the type returned by the cursor.
            This callback is invoked once per each row yielded
            by the cursor. If the callback returns a `False`, the `for_each`
            invocation stops early and returns without consuming further rows.
        general_method_timeout_ms: a timeout, in milliseconds, for the whole
            duration of this method. If not provided, there is no such timeout.
            Note that the per-request timeout set on the cursor still applies.
        timeout_ms: an alias for `general_method_timeout_ms`.
    """

    self._ensure_alive()
    copy_req_ms, copy_ovr_ms = _revise_timeouts_for_cursor_copy(
        new_general_method_timeout_ms=general_method_timeout_ms,
        new_timeout_ms=timeout_ms,
        old_request_timeout_ms=self._request_timeout_ms,
    )
    _cursor = self._copy(
        request_timeout_ms=copy_req_ms,
        overall_timeout_ms=copy_ovr_ms,
    )
    self._imprint_internal_state(_cursor)
    is_coro = iscoroutinefunction(function)
    async for row in _cursor:
        if is_coro:
            res = await function(row)  # type: ignore[misc]
        else:
            res = function(row)
        if res is False:
            break
    _cursor._imprint_internal_state(self)

async def get_sort_vector(self) ‑> list[float] | DataAPIVector | None

Calling get_sort_vector on an IDLE cursor triggers the first page fetch, but the cursor stays in the IDLE state until actual consumption starts.

The method can be invoked on a CLOSED cursor and will return either None or the sort vector used in the search.

Returns

Expand source code

async def get_sort_vector(self) -> list[float] | DataAPIVector | None:
    """
    Return the query vector used in the vector (ANN) search that originated
    this cursor, if applicable. If this is not an ANN search, or it was invoked
    without the `include_sort_vector` flag, return None.

    Calling `get_sort_vector` on an IDLE cursor triggers the first page fetch,
    but the cursor stays in the IDLE state until actual consumption starts.

    The method can be invoked on a CLOSED cursor and will return either None
    or the sort vector used in the search.

    Returns:
        the query vector used in the search if this was a
            vector search (otherwise None). The vector is returned either
            as a DataAPIVector or a plain list of number depending on the
            `APIOptions.serdes_options` that apply. The query vector is available
            also for vectorize-based ANN searches.
    """

    await self._try_ensure_fill_buffer()
    if self._last_response_status:
        return _ensure_vector(
            self._last_response_status.get("sortVector"),
            self.data_source.api_options.serdes_options,
        )
    else:
        return None

async def has_next(self) ‑> bool

Whether the cursor actually has more documents to return.

has_next can be called on any cursor, but on a CLOSED cursor will always return False.

This method can trigger the fetch operation of a new page, if the current buffer is empty.

Calling has_next on an IDLE cursor triggers the first page fetch, but the cursor stays in the IDLE state until actual consumption starts.

Returns

a boolean value of True if there is at least one further item available to consume; False otherwise (including the case of CLOSED cursor).

Expand source code

async def has_next(self) -> bool:
    """
    Whether the cursor actually has more documents to return.

    `has_next` can be called on any cursor, but on a CLOSED cursor
    will always return False.

    This method can trigger the fetch operation of a new page, if the current
    buffer is empty.

    Calling `has_next` on an IDLE cursor triggers the first page fetch, but the
    cursor stays in the IDLE state until actual consumption starts.

    Returns:
        a boolean value of True if there is at least one further item
            available to consume; False otherwise (including the case of CLOSED
            cursor).
    """

    if self._state == CursorState.CLOSED:
        return False
    await self._try_ensure_fill_buffer()
    return len(self._buffer) > 0

def include_similarity(self, include_similarity: bool | None) ‑> AsyncTableFindCursor[~TRAW, ~T]

Return a copy of this cursor with a new include_similarity setting. This operation is allowed only if the cursor state is still IDLE and if no mapping has been set on it.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the AsyncTable find method.

Args

include_similarity: a new include_similarity setting to apply to the returned new cursor.

Returns

a new AsyncTableFindCursor with the same settings as this one, except for include_similarity which is the provided value.

Expand source code

def include_similarity(
    self, include_similarity: bool | None
) -> AsyncTableFindCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new include_similarity setting.
    This operation is allowed only if the cursor state is still IDLE and if
    no mapping has been set on it.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the AsyncTable `find` method.

    Args:
        include_similarity: a new include_similarity setting to apply
            to the returned new cursor.

    Returns:
        a new AsyncTableFindCursor with the same settings as this one,
            except for `include_similarity` which is the provided value.
    """

    self._ensure_idle()
    if self._mapper is not None:
        raise CursorException(
            "Cannot set include_similarity after map.",
            cursor_state=self._state.value,
        )
    return self._copy(include_similarity=include_similarity)

def include_sort_vector(self, include_sort_vector: bool | None) ‑> AsyncTableFindCursor[~TRAW, ~T]

Return a copy of this cursor with a new include_sort_vector setting. This operation is allowed only if the cursor state is still IDLE.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the AsyncTable find method.

Args

include_sort_vector: a new include_sort_vector setting to apply to the returned new cursor.

Returns

a new AsyncTableFindCursor with the same settings as this one, except for include_sort_vector which is the provided value.

Expand source code

def include_sort_vector(
    self, include_sort_vector: bool | None
) -> AsyncTableFindCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new include_sort_vector setting.
    This operation is allowed only if the cursor state is still IDLE.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the AsyncTable `find` method.

    Args:
        include_sort_vector: a new include_sort_vector setting to apply
            to the returned new cursor.

    Returns:
        a new AsyncTableFindCursor with the same settings as this one,
            except for `include_sort_vector` which is the provided value.
    """

    self._ensure_idle()
    return self._copy(include_sort_vector=include_sort_vector)

def initial_page_state(self, initial_page_state: str | UnsetType) ‑> AsyncTableFindCursor[~TRAW, ~T]

Return a copy of this cursor with a new initial_page_state setting. This operation is allowed only if the cursor state is still IDLE.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the Collection find method.

Args

initial_page_state: a new initial_page_state setting to apply to the returned new cursor. Passing an explicit None raises an error.

Returns

a new AsyncTableFindCursor with the same settings as this one, except for initial_page_state which is the provided value.

Expand source code

def initial_page_state(
    self, initial_page_state: str | UnsetType
) -> AsyncTableFindCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new initial_page_state setting.
    This operation is allowed only if the cursor state is still IDLE.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the Collection `find` method.

    Args:
        initial_page_state: a new initial_page_state setting to apply to the
            returned new cursor. Passing an explicit None raises an error.

    Returns:
        a new AsyncTableFindCursor with the same settings as this one,
            except for `initial_page_state` which is the provided value.
    """

    self._ensure_idle()
    return self._copy(initial_page_state=initial_page_state)

def limit(self, limit: int | None) ‑> AsyncTableFindCursor[~TRAW, ~T]

Return a copy of this cursor with a new limit setting. This operation is allowed only if the cursor state is still IDLE.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the AsyncTable find method.

Args

limit: a new limit setting to apply to the returned new cursor.

Returns

a new AsyncTableFindCursor with the same settings as this one, except for limit which is the provided value.

Expand source code

def limit(self, limit: int | None) -> AsyncTableFindCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new limit setting.
    This operation is allowed only if the cursor state is still IDLE.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the AsyncTable `find` method.

    Args:
        limit: a new limit setting to apply to the returned new cursor.

    Returns:
        a new AsyncTableFindCursor with the same settings as this one,
            except for `limit` which is the provided value.
    """

    self._ensure_idle()
    return self._copy(limit=limit)

def map(self, mapper: Callable[[T], TNEW]) ‑> AsyncTableFindCursor[~TRAW, ~TNEW]

Return a copy of this cursor with a mapping function to transform the returned items. Calling this method on a cursor with a mapping already set results in the mapping functions being composed.

This operation is allowed only if the cursor state is still IDLE.

For usage examples, please refer to the same method of the equivalent synchronous TableFindCursor class, and apply the necessary adaptations to the async interface.

Args

mapper: a function transforming the objects returned by the cursor into something else (i.e. a function T => TNEW).

Returns

a new AsyncTableFindCursor with a new mapping function on the results, possibly composed with any pre-existing mapping function.

Expand source code

def map(self, mapper: Callable[[T], TNEW]) -> AsyncTableFindCursor[TRAW, TNEW]:
    """
    Return a copy of this cursor with a mapping function to transform
    the returned items. Calling this method on a cursor with a mapping
    already set results in the mapping functions being composed.

    This operation is allowed only if the cursor state is still IDLE.

    For usage examples, please refer to the same method of the
    equivalent synchronous TableFindCursor class, and apply the necessary
    adaptations to the async interface.

    Args:
        mapper: a function transforming the objects returned by the cursor
            into something else (i.e. a function T => TNEW).

    Returns:
        a new AsyncTableFindCursor with a new mapping function on the results,
            possibly composed with any pre-existing mapping function.
    """

    self._ensure_idle()
    if self._query_engine.async_table is None:
        raise RuntimeError("Query engine has no async table.")
    composite_mapper: Callable[[TRAW], TNEW]
    if self._mapper is not None:

        def _composite(document: TRAW) -> TNEW:
            return mapper(self._mapper(document))  # type: ignore[misc]

        composite_mapper = _composite
    else:
        composite_mapper = cast(Callable[[TRAW], TNEW], mapper)
    return AsyncTableFindCursor(
        table=self._query_engine.async_table,
        request_timeout_ms=self._request_timeout_ms,
        overall_timeout_ms=self._overall_timeout_ms,
        request_timeout_label=self._request_timeout_label,
        overall_timeout_label=self._overall_timeout_label,
        filter=self._filter,
        projection=self._projection,
        sort=self._sort,
        limit=self._limit,
        initial_page_state=self._initial_page_state,
        include_similarity=self._include_similarity,
        include_sort_vector=self._include_sort_vector,
        skip=self._skip,
        mapper=composite_mapper,
    )

def project(self, projection: ProjectionType | None) ‑> AsyncTableFindCursor[~TRAW, ~T]

Return a copy of this cursor with a new projection setting. This operation is allowed only if the cursor state is still IDLE and if no mapping has been set on it.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the AsyncTable find method.

Args

projection: a new projection setting to apply to the returned new cursor.

Returns

a new AsyncTableFindCursor with the same settings as this one, except for projection which is the provided value.

Expand source code

def project(
    self, projection: ProjectionType | None
) -> AsyncTableFindCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new projection setting.
    This operation is allowed only if the cursor state is still IDLE and if
    no mapping has been set on it.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the AsyncTable `find` method.

    Args:
        projection: a new projection setting to apply to the returned new cursor.

    Returns:
        a new AsyncTableFindCursor with the same settings as this one,
            except for `projection` which is the provided value.
    """

    self._ensure_idle()
    if self._mapper is not None:
        raise CursorException(
            "Cannot set projection after map.",
            cursor_state=self._state.value,
        )
    return self._copy(projection=projection)

def skip(self, skip: int | None) ‑> AsyncTableFindCursor[~TRAW, ~T]

Return a copy of this cursor with a new skip setting. This operation is allowed only if the cursor state is still IDLE.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the AsyncTable find method.

Args

skip: a new skip setting to apply to the returned new cursor.

Returns

a new AsyncTableFindCursor with the same settings as this one, except for skip which is the provided value.

Expand source code

def skip(self, skip: int | None) -> AsyncTableFindCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new skip setting.
    This operation is allowed only if the cursor state is still IDLE.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the AsyncTable `find` method.

    Args:
        skip: a new skip setting to apply to the returned new cursor.

    Returns:
        a new AsyncTableFindCursor with the same settings as this one,
            except for `skip` which is the provided value.
    """

    self._ensure_idle()
    return self._copy(skip=skip)

def sort(self, sort: dict[str, Any] | None) ‑> AsyncTableFindCursor[~TRAW, ~T]

Return a copy of this cursor with a new sort setting. This operation is allowed only if the cursor state is still IDLE.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the AsyncTable find method.

Args

sort: a new sort setting to apply to the returned new cursor.

Returns

a new AsyncTableFindCursor with the same settings as this one, except for sort which is the provided value.

Expand source code

def sort(self, sort: dict[str, Any] | None) -> AsyncTableFindCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new sort setting.
    This operation is allowed only if the cursor state is still IDLE.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the AsyncTable `find` method.

    Args:
        sort: a new sort setting to apply to the returned new cursor.

    Returns:
        a new AsyncTableFindCursor with the same settings as this one,
            except for `sort` which is the provided value.
    """

    self._ensure_idle()
    return self._copy(sort=sort)

async def to_list(self, *, general_method_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> list[~T]

Materialize all rows that remain to be consumed from a cursor into a list.

Calling this method on a CLOSED cursor results in an error.

If the cursor is IDLE, the result will be the whole set of rows returned by the find operation; otherwise, the rows already consumed by the cursor will not be in the resulting list.

For usage examples, please refer to the same method of the equivalent synchronous TableFindCursor class, and apply the necessary adaptations to the async interface.

Args

general_method_timeout_ms: a timeout, in milliseconds, for the whole duration of this method. If not provided, there is no such timeout. Note that the per-request timeout set on the cursor still applies.
timeout_ms: an alias for general_method_timeout_ms.

Returns

list: a list of rows (or other values depending on the mapping function, if one is set). These are all items that were left to be consumed on the cursor when to_list is called.

Expand source code

async def to_list(
    self,
    *,
    general_method_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> list[T]:
    """
    Materialize all rows that remain to be consumed from a cursor into a list.

    Calling this method on a CLOSED cursor results in an error.

    If the cursor is IDLE, the result will be the whole set of rows returned
    by the `find` operation; otherwise, the rows already consumed by the cursor
    will not be in the resulting list.

    Calling this method is not recommended if a huge list of results is anticipated:
    it would involve a large number of data exchanges with the Data API and possibly
    a massive memory usage to construct the list. In such cases, a lazy pattern
    of iterating and consuming the rows is to be preferred.

    For usage examples, please refer to the same method of the
    equivalent synchronous TableFindCursor class, and apply the necessary
    adaptations to the async interface.

    Args:
        general_method_timeout_ms: a timeout, in milliseconds, for the whole
            duration of this method. If not provided, there is no such timeout.
            Note that the per-request timeout set on the cursor still applies.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        list: a list of rows (or other values depending on the mapping
            function, if one is set). These are all items that were left
            to be consumed on the cursor when `to_list` is called.
    """

    self._ensure_alive()
    copy_req_ms, copy_ovr_ms = _revise_timeouts_for_cursor_copy(
        new_general_method_timeout_ms=general_method_timeout_ms,
        new_timeout_ms=timeout_ms,
        old_request_timeout_ms=self._request_timeout_ms,
    )
    _cursor = self._copy(
        request_timeout_ms=copy_req_ms,
        overall_timeout_ms=copy_ovr_ms,
    )
    self._imprint_internal_state(_cursor)
    documents = [document async for document in _cursor]
    _cursor._imprint_internal_state(self)
    return documents

Inherited members

AbstractCursor:
- buffered_count
- close
- consume_buffer
- consumed
- cursor_id
- rewind
- state

A synchronous cursor over documents, as returned by a find_and_rerank invocation on a Collection. A cursor can be iterated over, materialized into a list, and queried/manipulated in various ways.

Some cursor operations mutate it in-place (such as consuming its documents), other return a new cursor without changing the original one. See the documentation for the various methods and the Collection find_and_rerank method for more details and usage patterns.

Example

>>> # (this assumes 'vectorize'. See <code>Collection.find\_and\_rerank</code> for more.)
>>> cursor = collection.find_and_rerank(
...     sort={"$hybrid": "Weekdays?"},
...     projection={"wkd": True},
...     limit=5,
...     include_scores=True,
... )
>>> for r_result in cursor:
...     print(f"{r_result.document['wkd']}: {r_result.scores['$rerank']}")
...
Wed: -9.1015625
Mon: -10.2421875
Tue: -10.2421875
Sun: -11.375
Fri: -12.515625

Expand source code

class CollectionFindAndRerankCursor(
    Generic[TRAW, T], AbstractCursor[RerankedResult[TRAW]]
):
    """
    A synchronous cursor over documents, as returned by a `find_and_rerank` invocation
    on a Collection. A cursor can be iterated over, materialized into a list,
    and queried/manipulated in various ways.

    Some cursor operations mutate it in-place (such as consuming its documents),
    other return a new cursor without changing the original one. See the documentation
    for the various methods and the Collection `find_and_rerank` method for more details
    and usage patterns.

    This cursor has two type parameters: TRAW and T. The first is the type
    of the "raw" documents as they are found on the collection, the second
    is the type of the items after the optional mapping function (see the `.map()`
    method).
    If no mapping is specified, `T = RerankedResult[TRAW]`: the items yielded by
    the cursor are a `RerankedResult` wrapping the type (possibly after projection)
    of the documents found on the collection: in other words, such a cursor returns
    the documents, as they come back from the API, with their associated scores
    from the find-and-rerank operation.
    In general, consuming a cursor returns items of type T, except for the
    `consume_buffer` primitive that draws directly from the buffer and always
    returns items of type RerankedResult[TRAW].

    Example:
        >>> # (this assumes 'vectorize'. See `Collection.find_and_rerank` for more.)
        >>> cursor = collection.find_and_rerank(
        ...     sort={"$hybrid": "Weekdays?"},
        ...     projection={"wkd": True},
        ...     limit=5,
        ...     include_scores=True,
        ... )
        >>> for r_result in cursor:
        ...     print(f"{r_result.document['wkd']}: {r_result.scores['$rerank']}")
        ...
        Wed: -9.1015625
        Mon: -10.2421875
        Tue: -10.2421875
        Sun: -11.375
        Fri: -12.515625
    """

    _query_engine: _CollectionFindAndRerankQueryEngine[TRAW]
    _request_timeout_ms: int | None
    _overall_timeout_ms: int | None
    _request_timeout_label: str | None
    _overall_timeout_label: str | None
    _timeout_manager: MultiCallTimeoutManager
    _filter: FilterType | None
    _projection: ProjectionType | None
    _sort: HybridSortType | None
    _limit: int | None
    _hybrid_limits: int | dict[str, int] | None
    _initial_page_state: str | UnsetType
    _include_scores: bool | None
    _include_sort_vector: bool | None
    _rerank_on: str | None
    _rerank_query: str | None
    _mapper: Callable[[RerankedResult[TRAW]], T] | None

    def __init__(
        self,
        *,
        collection: Collection[TRAW],
        request_timeout_ms: int | None,
        overall_timeout_ms: int | None,
        request_timeout_label: str | None = None,
        overall_timeout_label: str | None = None,
        filter: FilterType | None = None,
        projection: ProjectionType | None = None,
        sort: HybridSortType | None = None,
        limit: int | None = None,
        hybrid_limits: int | dict[str, int] | None = None,
        initial_page_state: str | UnsetType = _UNSET,
        include_scores: bool | None = None,
        include_sort_vector: bool | None = None,
        rerank_on: str | None = None,
        rerank_query: str | None = None,
        mapper: Callable[[RerankedResult[TRAW]], T] | None = None,
    ) -> None:
        self._filter = deepcopy(filter)
        self._projection = projection
        self._sort = deepcopy(sort)
        self._limit = limit
        self._hybrid_limits = deepcopy(hybrid_limits)
        self._initial_page_state = initial_page_state
        self._include_scores = include_scores
        self._include_sort_vector = include_sort_vector
        self._rerank_on = rerank_on
        self._rerank_query = rerank_query
        self._mapper = mapper
        self._request_timeout_ms = request_timeout_ms
        self._overall_timeout_ms = overall_timeout_ms
        self._request_timeout_label = request_timeout_label
        self._overall_timeout_label = overall_timeout_label
        self._query_engine = _CollectionFindAndRerankQueryEngine(
            collection=collection,
            async_collection=None,
            filter=self._filter,
            projection=self._projection,
            sort=self._sort,
            limit=self._limit,
            hybrid_limits=self._hybrid_limits,
            include_scores=self._include_scores,
            include_sort_vector=self._include_sort_vector,
            rerank_on=self._rerank_on,
            rerank_query=self._rerank_query,
        )
        AbstractCursor.__init__(self, initial_page_state=initial_page_state)
        self._timeout_manager = MultiCallTimeoutManager(
            overall_timeout_ms=self._overall_timeout_ms,
            timeout_label=self._overall_timeout_label,
        )

    def _copy(
        self: CollectionFindAndRerankCursor[TRAW, T],
        *,
        request_timeout_ms: int | None | UnsetType = _UNSET,
        overall_timeout_ms: int | None | UnsetType = _UNSET,
        request_timeout_label: str | None | UnsetType = _UNSET,
        overall_timeout_label: str | None | UnsetType = _UNSET,
        filter: FilterType | None | UnsetType = _UNSET,
        projection: ProjectionType | None | UnsetType = _UNSET,
        sort: dict[str, Any] | None | UnsetType = _UNSET,
        limit: int | None | UnsetType = _UNSET,
        hybrid_limits: int | dict[str, int] | None | UnsetType = _UNSET,
        initial_page_state: str | None | UnsetType = _UNSET,
        include_scores: bool | None | UnsetType = _UNSET,
        include_sort_vector: bool | None | UnsetType = _UNSET,
        rerank_on: str | None | UnsetType = _UNSET,
        rerank_query: str | None | UnsetType = _UNSET,
    ) -> CollectionFindAndRerankCursor[TRAW, T]:
        if self._query_engine.collection is None:
            raise RuntimeError("Query engine has no collection.")
        return CollectionFindAndRerankCursor(
            collection=self._query_engine.collection,
            request_timeout_ms=self._request_timeout_ms
            if isinstance(request_timeout_ms, UnsetType)
            else request_timeout_ms,
            overall_timeout_ms=self._overall_timeout_ms
            if isinstance(overall_timeout_ms, UnsetType)
            else overall_timeout_ms,
            request_timeout_label=self._request_timeout_label
            if isinstance(request_timeout_label, UnsetType)
            else request_timeout_label,
            overall_timeout_label=self._overall_timeout_label
            if isinstance(overall_timeout_label, UnsetType)
            else overall_timeout_label,
            filter=self._filter if isinstance(filter, UnsetType) else filter,
            projection=self._projection
            if isinstance(projection, UnsetType)
            else projection,
            sort=self._sort if isinstance(sort, UnsetType) else sort,
            limit=self._limit if isinstance(limit, UnsetType) else limit,
            hybrid_limits=self._hybrid_limits
            if isinstance(hybrid_limits, UnsetType)
            else hybrid_limits,
            # special treatment: passing None erases (hence we must supply unset and not None):
            initial_page_state=self._initial_page_state
            if isinstance(initial_page_state, UnsetType)
            else (initial_page_state if initial_page_state is not None else _UNSET),
            include_scores=self._include_scores
            if isinstance(include_scores, UnsetType)
            else include_scores,
            include_sort_vector=self._include_sort_vector
            if isinstance(include_sort_vector, UnsetType)
            else include_sort_vector,
            rerank_on=self._rerank_on
            if isinstance(rerank_on, UnsetType)
            else rerank_on,
            rerank_query=self._rerank_query
            if isinstance(rerank_query, UnsetType)
            else rerank_query,
            mapper=self._mapper,
        )

    def _try_ensure_fill_buffer(self) -> None:
        """
        If buffer is empty, try to fill with next page, if applicable.
        If not possible, silently do nothing.
        This method never changes the cursor state.
        """

        if self._state == CursorState.CLOSED:
            return
        if not self._buffer:
            if self._next_page_state is not None or self._state == CursorState.IDLE:
                new_buffer, next_page_state, resp_status = (
                    self._query_engine._fetch_page(
                        page_state=self._next_page_state,
                        timeout_context=self._timeout_manager.remaining_timeout(
                            cap_time_ms=self._request_timeout_ms,
                            cap_timeout_label=self._request_timeout_label,
                        ),
                    )
                )
                self._next_page_state = next_page_state
                self._last_response_status = resp_status
                self._pages_retrieved += 1
                self._buffer = new_buffer

    def __repr__(self) -> str:
        return (
            f'{self.__class__.__name__}("{self.data_source.name}", '
            f"{self._state.value}, "
            f"consumed so far: {self.consumed})"
        )

    def __iter__(
        self: CollectionFindAndRerankCursor[TRAW, T],
    ) -> CollectionFindAndRerankCursor[TRAW, T]:
        self._ensure_alive()
        return self

    def __next__(self) -> T:
        if self.state == CursorState.CLOSED:
            raise StopIteration
        self._try_ensure_fill_buffer()
        if not self._buffer:
            self._state = CursorState.CLOSED
            raise StopIteration
        self._state = CursorState.STARTED
        # consume one item from buffer
        traw0, rest_buffer = self._buffer[0], self._buffer[1:]
        self._buffer = rest_buffer
        self._consumed += 1
        return cast(T, self._mapper(traw0) if self._mapper is not None else traw0)

    @property
    def data_source(self) -> Collection[TRAW]:
        """
        The Collection object that originated this cursor through a `find_and_rerank`
        operation.

        Returns:
            a Collection instance.
        """

        if self._query_engine.collection is None:
            raise RuntimeError("Query engine has no collection.")
        return self._query_engine.collection

    def clone(self) -> CollectionFindAndRerankCursor[TRAW, T]:
        """
        Create a copy of this cursor with:
        - the same parameters (timeouts, filter, projection, etc)
        - and the cursor is rewound to its pristine IDLE state.

        Returns:
            a new CollectionFindAndRerankCursor, similar to this one but without mapping
            and rewound to its initial state.

        Example:
            >>> # (this assumes 'vectorize'. See `Collection.find_and_rerank` for more.)
            >>> cursor = collection.find_and_rerank(
            ...     sort={"$hybrid": "Weekdays?"},
            ...     projection={"wkd": True},
            ...     limit=3,
            ... ).map(lambda r_result: r_result.document["wkd"].upper())
            >>> for idx, value in zip([0, 1], cursor):
            ...     print(f"{idx} ==> {value}")
            ...
            0 ==> MON
            1 ==> TUE
            >>> cloned_cursor = cursor.clone()
            >>> for value in cloned_cursor:
            ...     print(f"(cloned) {value}")
            ...
            (cloned) MON
            (cloned) TUE
            (cloned) SUN
            >>>
            >>> print(f"n ==> {next(cursor)}")
            n ==> SUN
        """

        if self._query_engine.collection is None:
            raise RuntimeError("Query engine has no collection.")
        return CollectionFindAndRerankCursor(
            collection=self._query_engine.collection,
            request_timeout_ms=self._request_timeout_ms,
            overall_timeout_ms=self._overall_timeout_ms,
            request_timeout_label=self._request_timeout_label,
            overall_timeout_label=self._overall_timeout_label,
            filter=self._filter,
            projection=self._projection,
            sort=self._sort,
            limit=self._limit,
            hybrid_limits=self._hybrid_limits,
            initial_page_state=self._initial_page_state,
            include_scores=self._include_scores,
            include_sort_vector=self._include_sort_vector,
            rerank_on=self._rerank_on,
            rerank_query=self._rerank_query,
            mapper=self._mapper,
        )

    def filter(
        self, filter: FilterType | None
    ) -> CollectionFindAndRerankCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new filter setting.
        This operation is allowed only if the cursor state is still IDLE.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the Collection `find_and_rerank` method.

        Args:
            filter: a new filter setting to apply to the returned new cursor.

        Returns:
            a new CollectionFindAndRerankCursor with the same settings as this one,
                except for `filter` which is the provided value.
        """

        self._ensure_idle()
        return self._copy(filter=filter)

    def project(
        self, projection: ProjectionType | None
    ) -> CollectionFindAndRerankCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new projection setting.
        This operation is allowed only if the cursor state is still IDLE and if
        no mapping has been set on it.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the Collection `find_and_rerank` method.

        Args:
            projection: a new projection setting to apply to the returned new cursor.

        Returns:
            a new CollectionFindAndRerankCursor with the same settings as this one,
                except for `projection` which is the provided value.
        """

        self._ensure_idle()
        if self._mapper is not None:
            raise CursorException(
                "Cannot set projection after map.",
                cursor_state=self._state.value,
            )
        return self._copy(projection=projection)

    def sort(
        self, sort: HybridSortType | None
    ) -> CollectionFindAndRerankCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new sort setting.
        This operation is allowed only if the cursor state is still IDLE.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the Collection `find_and_rerank` method.

        Args:
            sort: a new sort setting to apply to the returned new cursor.

        Returns:
            a new CollectionFindAndRerankCursor with the same settings as this one,
                except for `sort` which is the provided value.
        """

        self._ensure_idle()
        return self._copy(sort=sort)

    def limit(self, limit: int | None) -> CollectionFindAndRerankCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new limit setting.
        This operation is allowed only if the cursor state is still IDLE.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the Collection `find_and_rerank` method.

        Args:
            limit: a new limit setting to apply to the returned new cursor.

        Returns:
            a new CollectionFindAndRerankCursor with the same settings as this one,
                except for `limit` which is the provided value.
        """

        self._ensure_idle()
        return self._copy(limit=limit)

    def hybrid_limits(
        self, hybrid_limits: int | dict[str, int] | None
    ) -> CollectionFindAndRerankCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new hybrid_limits setting.
        This operation is allowed only if the cursor state is still IDLE.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the Collection `find_and_rerank` method.

        Args:
            hybrid_limits: a new setting to apply to the returned new cursor.

        Returns:
            a new CollectionFindAndRerankCursor with the same settings as this one,
                except for `hybrid_limits` which is the provided value.
        """

        self._ensure_idle()
        return self._copy(hybrid_limits=hybrid_limits)

    def initial_page_state(
        self, initial_page_state: str | UnsetType
    ) -> CollectionFindAndRerankCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new initial_page_state setting.
        This operation is allowed only if the cursor state is still IDLE.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the Collection `find` method.

        Args:
            initial_page_state: a new initial_page_state setting to apply to the
                returned new cursor. Passing an explicit None raises an error.

        Returns:
            a new CollectionFindAndRerankCursor with the same settings as this one,
                except for `initial_page_state` which is the provided value.
        """

        self._ensure_idle()
        return self._copy(initial_page_state=initial_page_state)

    def include_scores(
        self, include_scores: bool | None
    ) -> CollectionFindAndRerankCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new include_scores setting.
        This operation is allowed only if the cursor state is still IDLE.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the Collection `find_and_rerank` method.

        Args:
            include_scores: a new include_scores setting to apply
                to the returned new cursor.

        Returns:
            a new CollectionFindAndRerankCursor with the same settings as this one,
                except for `include_scores` which is the provided value.
        """

        self._ensure_idle()
        return self._copy(include_scores=include_scores)

    def include_sort_vector(
        self, include_sort_vector: bool | None
    ) -> CollectionFindAndRerankCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new include_sort_vector setting.
        This operation is allowed only if the cursor state is still IDLE.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the Collection `find_and_rerank` method.

        Args:
            include_sort_vector: a new include_sort_vector setting to apply
                to the returned new cursor.

        Returns:
            a new CollectionFindAndRerankCursor with the same settings as this one,
                except for `include_sort_vector` which is the provided value.
        """

        self._ensure_idle()
        return self._copy(include_sort_vector=include_sort_vector)

    def rerank_on(
        self, rerank_on: str | None
    ) -> CollectionFindAndRerankCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new rerank_on setting.
        This operation is allowed only if the cursor state is still IDLE.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the Collection `find_and_rerank` method.

        Args:
            rerank_on: a new setting to apply to the returned new cursor.

        Returns:
            a new CollectionFindAndRerankCursor with the same settings as this one,
                except for `rerank_on` which is the provided value.
        """

        self._ensure_idle()
        return self._copy(rerank_on=rerank_on)

    def rerank_query(
        self, rerank_query: str | None
    ) -> CollectionFindAndRerankCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new rerank_query setting.
        This operation is allowed only if the cursor state is still IDLE.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the Collection `find_and_rerank` method.

        Args:
            rerank_query: a new setting to apply to the returned new cursor.

        Returns:
            a new CollectionFindAndRerankCursor with the same settings as this one,
                except for `rerank_query` which is the provided value.
        """

        self._ensure_idle()
        return self._copy(rerank_query=rerank_query)

    def map(
        self, mapper: Callable[[T], TNEW]
    ) -> CollectionFindAndRerankCursor[TRAW, TNEW]:
        """
        Return a copy of this cursor with a mapping function to transform
        the returned items. Calling this method on a cursor with a mapping
        already set results in the mapping functions being composed.

        This operation is allowed only if the cursor state is still IDLE.

        Args:
            mapper: a function transforming the objects returned by the cursor
                into something else (i.e. a function T => TNEW).
                If the map is imposed on a cursor without mapping yet, its input
                argument must be a `RerankedResult[TRAW]`, where TRAW
                stands for the type of the documents from the collection.

        Returns:
            a new CollectionFindAndRerankCursor with a new mapping function on the results,
                possibly composed with any pre-existing mapping function.

        Example:
            >>> # (this assumes 'vectorize'. See `Collection.find_and_rerank` for more.)
            >>> cursor = collection.find_and_rerank(
            ...     sort={"$hybrid": "Weekdays?"},
            ...     projection={"wkd": True},
            ...     limit=3,
            ... )
            >>> for r_result in cursor:
            ...     print(r_result.document)
            ...
            {'_id': 'A', 'wkd': 'Mon'}
            {'_id': 'B', 'wkd': 'Tue'}
            {'_id': 'G', 'wkd': 'Sun'}
            >>> cursor_mapped = cursor.clone().map(
            ...     lambda r_result: r_result.document["wkd"]
            ... )
            >>> for value in cursor_mapped:
            ...     print(value)
            ...
            Mon
            Tue
            Sun
            >>> cursor_mapped_twice = cursor_mapped.clone().map(
            ...     lambda wkd: f"<{wkd[:2].lower()}>"
            ... )
            >>> for value in cursor_mapped_twice:
            ...     print(value)
            ...
            <mo>
            <tu>
            <su>
        """
        self._ensure_idle()
        if self._query_engine.collection is None:
            raise RuntimeError("Query engine has no collection.")
        composite_mapper: Callable[[RerankedResult[TRAW]], TNEW]
        if self._mapper is not None:

            def _composite(document: RerankedResult[TRAW]) -> TNEW:
                return mapper(self._mapper(document))  # type: ignore[misc]

            composite_mapper = _composite
        else:
            composite_mapper = cast(Callable[[RerankedResult[TRAW]], TNEW], mapper)
        return CollectionFindAndRerankCursor(
            collection=self._query_engine.collection,
            request_timeout_ms=self._request_timeout_ms,
            overall_timeout_ms=self._overall_timeout_ms,
            request_timeout_label=self._request_timeout_label,
            overall_timeout_label=self._overall_timeout_label,
            filter=self._filter,
            projection=self._projection,
            sort=self._sort,
            limit=self._limit,
            hybrid_limits=self._hybrid_limits,
            initial_page_state=self._initial_page_state,
            include_scores=self._include_scores,
            include_sort_vector=self._include_sort_vector,
            rerank_on=self._rerank_on,
            rerank_query=self._rerank_query,
            mapper=composite_mapper,
        )

    def for_each(
        self,
        function: Callable[[T], bool | None],
        *,
        general_method_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Consume the remaining documents in the cursor, invoking a provided callback
        function on each of them.

        Calling this method on a CLOSED cursor results in an error.

        The callback function can return any value. The return value is generally
        discarded, with the following exception: if the function returns the boolean
        `False`, it is taken to signify that the method should quit early, leaving the
        cursor half-consumed (ACTIVE state). If this does not occur, this method
        results in the cursor entering CLOSED state once it is exhausted.

        Args:
            function: a callback function whose only parameter is of the type returned
                by the cursor. This callback is invoked once per each document yielded
                by the cursor. If the callback returns a `False`, the `for_each`
                invocation stops early and returns without consuming further documents.
            general_method_timeout_ms: a timeout, in milliseconds, for the whole
                duration of this method. If not provided, there is no such timeout.
                Note that the per-request timeout set on the cursor still applies.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Example:
            >>> # (this assumes 'vectorize'. See `Collection.find_and_rerank` for more.)
            >>> from astrapy.cursors import CursorState, RerankedResult
            >>>
            >>> cursor = collection.find_and_rerank(
            ...     sort={"$hybrid": "Weekdays?"},
            ...     projection={"wkd": True},
            ...     limit=3,
            ... )
            >>> def printer(r_result: RerankedResult):
            ...     print(f"-> {r_result.document['wkd']}")
            ...
            >>> cursor.for_each(printer)
            -> Mon
            -> Tue
            -> Sun
            >>>
            >>> if cursor.state != CursorState.CLOSED:
            ...     print(f"alive: {cursor.to_list()}")
            ... else:
            ...     print("(closed)")
            ...
            (closed)
            >>> cursor2 = cursor.clone()
            >>> def checker(r_result: RerankedResult):
            ...     print(f"-> {r_result.document['wkd']}")
            ...     return r_result.document["wkd"] != "Tue"
            ...
            >>> cursor2.for_each(checker)
            -> Mon
            -> Tue
            >>>
            >>> if cursor2.state != CursorState.CLOSED:
            ...     print(f"alive: {list(cursor2)}")
            ... else:
            ...     print("(closed)")
            ...
            alive: [RerankedResult(document={'_id': 'G', 'wkd': 'Sun'}, scores={})]
        """

        self._ensure_alive()
        copy_req_ms, copy_ovr_ms = _revise_timeouts_for_cursor_copy(
            new_general_method_timeout_ms=general_method_timeout_ms,
            new_timeout_ms=timeout_ms,
            old_request_timeout_ms=self._request_timeout_ms,
        )
        _cursor = self._copy(
            request_timeout_ms=copy_req_ms,
            overall_timeout_ms=copy_ovr_ms,
        )
        self._imprint_internal_state(_cursor)
        for document in _cursor:
            res = function(document)
            if res is False:
                break
        _cursor._imprint_internal_state(self)

    def to_list(
        self,
        *,
        general_method_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> list[T]:
        """
        Materialize all documents that remain to be consumed from a cursor into a list.

        Calling this method on a CLOSED cursor results in an error.

        If the cursor is IDLE, the result will be the whole set of documents returned
        by the `find_and_rerank` operation; otherwise, the documents already consumed
        by the cursor will not be in the resulting list.

        Calling this method is not recommended if a huge list of results is anticipated:
        it would involve a large number of data exchanges with the Data API and possibly
        a massive memory usage to construct the list. In such cases, a lazy pattern
        of iterating and consuming the documents is to be preferred.

        Args:
            general_method_timeout_ms: a timeout, in milliseconds, for the whole
                duration of this method. If not provided, there is no such timeout.
                Note that the per-request timeout set on the cursor still applies.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a list of documents (or other values depending on the mapping
                function, if one is set). These are all items that were left
                to be consumed on the cursor when `to_list` is called.

        Example:
            >>> # (this assumes 'vectorize'. See `Collection.find_and_rerank` for more.)
            >>> collection.find_and_rerank(
            ...     sort={"$hybrid": "Weekdays?"},
            ...     projection={"wkd": True},
            ...     limit=4,
            ... ).map(
            ...     lambda r_result: r_result.document["wkd"]
            ... ).to_list()
            ['Wed', 'Mon', 'Tue', 'Sun']
            >>>
            >>> cursor = collection.find_and_rerank(
            ...     sort={"$hybrid": "Weekdays?"},
            ...     projection={"wkd": True},
            ...     limit=4,
            ... ).map(lambda r_result: r_result.document["wkd"])
            >>> print(f"First item: {cursor.__next__()}.")
            First item: Wed.
            >>> cursor.to_list()
            ['Mon', 'Tue', 'Sun']
        """

        self._ensure_alive()
        copy_req_ms, copy_ovr_ms = _revise_timeouts_for_cursor_copy(
            new_general_method_timeout_ms=general_method_timeout_ms,
            new_timeout_ms=timeout_ms,
            old_request_timeout_ms=self._request_timeout_ms,
        )
        _cursor = self._copy(
            request_timeout_ms=copy_req_ms,
            overall_timeout_ms=copy_ovr_ms,
        )
        self._imprint_internal_state(_cursor)
        documents = [document for document in _cursor]
        _cursor._imprint_internal_state(self)
        return documents

    def has_next(self) -> bool:
        """
        Whether the cursor actually has more documents to return.

        `has_next` can be called on any cursor, but on a CLOSED cursor
        will always return False.

        This method can trigger the fetch operation of a new page, if the current
        buffer is empty.

        Calling `has_next` on an IDLE cursor triggers the first page fetch, but the
        cursor stays in the IDLE state until actual consumption starts.

        Returns:
            a boolean value of True if there is at least one further item
                available to consume; False otherwise (including the case of CLOSED
                cursor).
        """

        if self._state == CursorState.CLOSED:
            return False
        self._try_ensure_fill_buffer()
        return len(self._buffer) > 0

    def get_sort_vector(self) -> list[float] | DataAPIVector | None:
        """
        Return the query vector used in the vector (ANN) search that was run as
        part of the search expressed by this cursor, if applicable.

        Calling `get_sort_vector` on an IDLE cursor triggers the first page fetch,
        but the cursor stays in the IDLE state until actual consumption starts.

        The method can be invoked on a CLOSED cursor and will return either None
        or the sort vector used in the search.

        Returns:
            the query vector used in the search, if it was requested by passing
                `include_sort_vector=True` to the `find_and_rerank` call that originated
                the cursor.
                If the sort vector is not available, None is returned.
                Otherwise, the vector is returned as either a DataAPIVector
                or a plain list of number depending on the setting for
                `APIOptions.serdes_options`.
        """

        self._try_ensure_fill_buffer()
        if self._last_response_status:
            return _ensure_vector(
                self._last_response_status.get("sortVector"),
                self.data_source.api_options.serdes_options,
            )
        else:
            return None

    def fetch_next_page(self) -> FindAndRerankPage[T]:
        """
        Retrieve a single, whole page of results from the Data API and return it
        at once, together with associated "out-of-band" information.

        This method is meant to be the way a cursor is consumed when the caller
        needs to explicitly operate on a page-by-page basis, and is to be paired
        with creation of cursor objects 'set to start from a certain page' via the
        `initial_page_state` constructor parameter/builder method.
        In this case, the supplied initial page state typically comes from having
        consumed a previous page, for the same find operation: the page state, a string,
        is found within the `FindAndRerankPage` object returned by this method.

        Note: As long as the findAndRerank Data API command does not paginate
        its results, returning all results at once, this method is of little interest.

        Returns:
            a `FindAndRerankPage` object for the full Data API response, including
            the resulting `RerankedResult` items (or suitable objects from the cursor
            mapping function, if one is defined), as well as the state to use to
            query for the next page (a string) and the sort vector if requested
            and applicable.
        """

        self._ensure_alive()
        if self._buffer:
            msg = "Paginated retrieval cannot be mixed with regular cursor iteration."
            raise CursorException(
                text=msg,
                cursor_state=self._state.value,
            )

        self._try_ensure_fill_buffer()

        _buffer_count = len(self._buffer)
        _tr_next_ps = self._next_page_state
        _tr_results = [document for _, document in zip(range(_buffer_count), self)]
        _tr_sort_vector: list[float] | DataAPIVector | None
        if self._last_response_status:
            _tr_sort_vector = _ensure_vector(
                self._last_response_status.get("sortVector"),
                self.data_source.api_options.serdes_options,
            )
        else:
            _tr_sort_vector = None

        return FindAndRerankPage(
            results=_tr_results,
            next_page_state=_tr_next_ps,
            sort_vector=_tr_sort_vector,
        )

Ancestors

AbstractCursor
abc.ABC
typing.Generic

Instance variables

var data_source : Collection[~TRAW]

The Collection object that originated this cursor through a find_and_rerank operation.

Returns

a Collection instance.

Expand source code

@property
def data_source(self) -> Collection[TRAW]:
    """
    The Collection object that originated this cursor through a `find_and_rerank`
    operation.

    Returns:
        a Collection instance.
    """

    if self._query_engine.collection is None:
        raise RuntimeError("Query engine has no collection.")
    return self._query_engine.collection

Methods

def clone(self) ‑> CollectionFindAndRerankCursor[~TRAW, ~T]

Create a copy of this cursor with: - the same parameters (timeouts, filter, projection, etc) - and the cursor is rewound to its pristine IDLE state.

Returns

a new CollectionFindAndRerankCursor, similar to this one but without mapping and rewound to its initial state.

Example

>>> # (this assumes 'vectorize'. See <code>Collection.find\_and\_rerank</code> for more.)
>>> cursor = collection.find_and_rerank(
...     sort={"$hybrid": "Weekdays?"},
...     projection={"wkd": True},
...     limit=3,
... ).map(lambda r_result: r_result.document["wkd"].upper())
>>> for idx, value in zip([0, 1], cursor):
...     print(f"{idx} ==> {value}")
...
0 ==> MON
1 ==> TUE
>>> cloned_cursor = cursor.clone()
>>> for value in cloned_cursor:
...     print(f"(cloned) {value}")
...
(cloned) MON
(cloned) TUE
(cloned) SUN
>>>
>>> print(f"n ==> {next(cursor)}")
n ==> SUN

Expand source code

def clone(self) -> CollectionFindAndRerankCursor[TRAW, T]:
    """
    Create a copy of this cursor with:
    - the same parameters (timeouts, filter, projection, etc)
    - and the cursor is rewound to its pristine IDLE state.

    Returns:
        a new CollectionFindAndRerankCursor, similar to this one but without mapping
        and rewound to its initial state.

    Example:
        >>> # (this assumes 'vectorize'. See `Collection.find_and_rerank` for more.)
        >>> cursor = collection.find_and_rerank(
        ...     sort={"$hybrid": "Weekdays?"},
        ...     projection={"wkd": True},
        ...     limit=3,
        ... ).map(lambda r_result: r_result.document["wkd"].upper())
        >>> for idx, value in zip([0, 1], cursor):
        ...     print(f"{idx} ==> {value}")
        ...
        0 ==> MON
        1 ==> TUE
        >>> cloned_cursor = cursor.clone()
        >>> for value in cloned_cursor:
        ...     print(f"(cloned) {value}")
        ...
        (cloned) MON
        (cloned) TUE
        (cloned) SUN
        >>>
        >>> print(f"n ==> {next(cursor)}")
        n ==> SUN
    """

    if self._query_engine.collection is None:
        raise RuntimeError("Query engine has no collection.")
    return CollectionFindAndRerankCursor(
        collection=self._query_engine.collection,
        request_timeout_ms=self._request_timeout_ms,
        overall_timeout_ms=self._overall_timeout_ms,
        request_timeout_label=self._request_timeout_label,
        overall_timeout_label=self._overall_timeout_label,
        filter=self._filter,
        projection=self._projection,
        sort=self._sort,
        limit=self._limit,
        hybrid_limits=self._hybrid_limits,
        initial_page_state=self._initial_page_state,
        include_scores=self._include_scores,
        include_sort_vector=self._include_sort_vector,
        rerank_on=self._rerank_on,
        rerank_query=self._rerank_query,
        mapper=self._mapper,
    )

def fetch_next_page(self) ‑> FindAndRerankPage[~T]

Retrieve a single, whole page of results from the Data API and return it at once, together with associated "out-of-band" information.

Note: As long as the findAndRerank Data API command does not paginate its results, returning all results at once, this method is of little interest.

Returns

Expand source code

def fetch_next_page(self) -> FindAndRerankPage[T]:
    """
    Retrieve a single, whole page of results from the Data API and return it
    at once, together with associated "out-of-band" information.

    This method is meant to be the way a cursor is consumed when the caller
    needs to explicitly operate on a page-by-page basis, and is to be paired
    with creation of cursor objects 'set to start from a certain page' via the
    `initial_page_state` constructor parameter/builder method.
    In this case, the supplied initial page state typically comes from having
    consumed a previous page, for the same find operation: the page state, a string,
    is found within the `FindAndRerankPage` object returned by this method.

    Note: As long as the findAndRerank Data API command does not paginate
    its results, returning all results at once, this method is of little interest.

    Returns:
        a `FindAndRerankPage` object for the full Data API response, including
        the resulting `RerankedResult` items (or suitable objects from the cursor
        mapping function, if one is defined), as well as the state to use to
        query for the next page (a string) and the sort vector if requested
        and applicable.
    """

    self._ensure_alive()
    if self._buffer:
        msg = "Paginated retrieval cannot be mixed with regular cursor iteration."
        raise CursorException(
            text=msg,
            cursor_state=self._state.value,
        )

    self._try_ensure_fill_buffer()

    _buffer_count = len(self._buffer)
    _tr_next_ps = self._next_page_state
    _tr_results = [document for _, document in zip(range(_buffer_count), self)]
    _tr_sort_vector: list[float] | DataAPIVector | None
    if self._last_response_status:
        _tr_sort_vector = _ensure_vector(
            self._last_response_status.get("sortVector"),
            self.data_source.api_options.serdes_options,
        )
    else:
        _tr_sort_vector = None

    return FindAndRerankPage(
        results=_tr_results,
        next_page_state=_tr_next_ps,
        sort_vector=_tr_sort_vector,
    )

def filter(self, filter: FilterType | None) ‑> CollectionFindAndRerankCursor[~TRAW, ~T]

Return a copy of this cursor with a new filter setting. This operation is allowed only if the cursor state is still IDLE.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the Collection find_and_rerank method.

Args

filter: a new filter setting to apply to the returned new cursor.

Returns

a new CollectionFindAndRerankCursor with the same settings as this one, except for filter which is the provided value.

Expand source code

def filter(
    self, filter: FilterType | None
) -> CollectionFindAndRerankCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new filter setting.
    This operation is allowed only if the cursor state is still IDLE.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the Collection `find_and_rerank` method.

    Args:
        filter: a new filter setting to apply to the returned new cursor.

    Returns:
        a new CollectionFindAndRerankCursor with the same settings as this one,
            except for `filter` which is the provided value.
    """

    self._ensure_idle()
    return self._copy(filter=filter)

def for_each(self, function: Callable[[T], bool | None], *, general_method_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> None

Consume the remaining documents in the cursor, invoking a provided callback function on each of them.

Calling this method on a CLOSED cursor results in an error.

Args

function: a callback function whose only parameter is of the type returned by the cursor. This callback is invoked once per each document yielded by the cursor. If the callback returns a False, the for_each invocation stops early and returns without consuming further documents.
general_method_timeout_ms: a timeout, in milliseconds, for the whole duration of this method. If not provided, there is no such timeout. Note that the per-request timeout set on the cursor still applies.
timeout_ms: an alias for general_method_timeout_ms.

Example

>>> # (this assumes 'vectorize'. See <code>Collection.find\_and\_rerank</code> for more.)
>>> from astrapy.cursors import CursorState, RerankedResult
>>>
>>> cursor = collection.find_and_rerank(
...     sort={"$hybrid": "Weekdays?"},
...     projection={"wkd": True},
...     limit=3,
... )
>>> def printer(r_result: RerankedResult):
...     print(f"-> {r_result.document['wkd']}")
...
>>> cursor.for_each(printer)
-> Mon
-> Tue
-> Sun
>>>
>>> if cursor.state != CursorState.CLOSED:
...     print(f"alive: {cursor.to_list()}")
... else:
...     print("(closed)")
...
(closed)
>>> cursor2 = cursor.clone()
>>> def checker(r_result: RerankedResult):
...     print(f"-> {r_result.document['wkd']}")
...     return r_result.document["wkd"] != "Tue"
...
>>> cursor2.for_each(checker)
-> Mon
-> Tue
>>>
>>> if cursor2.state != CursorState.CLOSED:
...     print(f"alive: {list(cursor2)}")
... else:
...     print("(closed)")
...
alive: [RerankedResult(document={'_id': 'G', 'wkd': 'Sun'}, scores={})]

Expand source code

def for_each(
    self,
    function: Callable[[T], bool | None],
    *,
    general_method_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Consume the remaining documents in the cursor, invoking a provided callback
    function on each of them.

    Calling this method on a CLOSED cursor results in an error.

    The callback function can return any value. The return value is generally
    discarded, with the following exception: if the function returns the boolean
    `False`, it is taken to signify that the method should quit early, leaving the
    cursor half-consumed (ACTIVE state). If this does not occur, this method
    results in the cursor entering CLOSED state once it is exhausted.

    Args:
        function: a callback function whose only parameter is of the type returned
            by the cursor. This callback is invoked once per each document yielded
            by the cursor. If the callback returns a `False`, the `for_each`
            invocation stops early and returns without consuming further documents.
        general_method_timeout_ms: a timeout, in milliseconds, for the whole
            duration of this method. If not provided, there is no such timeout.
            Note that the per-request timeout set on the cursor still applies.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Example:
        >>> # (this assumes 'vectorize'. See `Collection.find_and_rerank` for more.)
        >>> from astrapy.cursors import CursorState, RerankedResult
        >>>
        >>> cursor = collection.find_and_rerank(
        ...     sort={"$hybrid": "Weekdays?"},
        ...     projection={"wkd": True},
        ...     limit=3,
        ... )
        >>> def printer(r_result: RerankedResult):
        ...     print(f"-> {r_result.document['wkd']}")
        ...
        >>> cursor.for_each(printer)
        -> Mon
        -> Tue
        -> Sun
        >>>
        >>> if cursor.state != CursorState.CLOSED:
        ...     print(f"alive: {cursor.to_list()}")
        ... else:
        ...     print("(closed)")
        ...
        (closed)
        >>> cursor2 = cursor.clone()
        >>> def checker(r_result: RerankedResult):
        ...     print(f"-> {r_result.document['wkd']}")
        ...     return r_result.document["wkd"] != "Tue"
        ...
        >>> cursor2.for_each(checker)
        -> Mon
        -> Tue
        >>>
        >>> if cursor2.state != CursorState.CLOSED:
        ...     print(f"alive: {list(cursor2)}")
        ... else:
        ...     print("(closed)")
        ...
        alive: [RerankedResult(document={'_id': 'G', 'wkd': 'Sun'}, scores={})]
    """

    self._ensure_alive()
    copy_req_ms, copy_ovr_ms = _revise_timeouts_for_cursor_copy(
        new_general_method_timeout_ms=general_method_timeout_ms,
        new_timeout_ms=timeout_ms,
        old_request_timeout_ms=self._request_timeout_ms,
    )
    _cursor = self._copy(
        request_timeout_ms=copy_req_ms,
        overall_timeout_ms=copy_ovr_ms,
    )
    self._imprint_internal_state(_cursor)
    for document in _cursor:
        res = function(document)
        if res is False:
            break
    _cursor._imprint_internal_state(self)

def get_sort_vector(self) ‑> list[float] | DataAPIVector | None

Return the query vector used in the vector (ANN) search that was run as part of the search expressed by this cursor, if applicable.

Calling get_sort_vector on an IDLE cursor triggers the first page fetch, but the cursor stays in the IDLE state until actual consumption starts.

The method can be invoked on a CLOSED cursor and will return either None or the sort vector used in the search.

Returns

Expand source code

def get_sort_vector(self) -> list[float] | DataAPIVector | None:
    """
    Return the query vector used in the vector (ANN) search that was run as
    part of the search expressed by this cursor, if applicable.

    Calling `get_sort_vector` on an IDLE cursor triggers the first page fetch,
    but the cursor stays in the IDLE state until actual consumption starts.

    The method can be invoked on a CLOSED cursor and will return either None
    or the sort vector used in the search.

    Returns:
        the query vector used in the search, if it was requested by passing
            `include_sort_vector=True` to the `find_and_rerank` call that originated
            the cursor.
            If the sort vector is not available, None is returned.
            Otherwise, the vector is returned as either a DataAPIVector
            or a plain list of number depending on the setting for
            `APIOptions.serdes_options`.
    """

    self._try_ensure_fill_buffer()
    if self._last_response_status:
        return _ensure_vector(
            self._last_response_status.get("sortVector"),
            self.data_source.api_options.serdes_options,
        )
    else:
        return None

def has_next(self) ‑> bool

Whether the cursor actually has more documents to return.

has_next can be called on any cursor, but on a CLOSED cursor will always return False.

This method can trigger the fetch operation of a new page, if the current buffer is empty.

Calling has_next on an IDLE cursor triggers the first page fetch, but the cursor stays in the IDLE state until actual consumption starts.

Returns

a boolean value of True if there is at least one further item available to consume; False otherwise (including the case of CLOSED cursor).

Expand source code

def has_next(self) -> bool:
    """
    Whether the cursor actually has more documents to return.

    `has_next` can be called on any cursor, but on a CLOSED cursor
    will always return False.

    This method can trigger the fetch operation of a new page, if the current
    buffer is empty.

    Calling `has_next` on an IDLE cursor triggers the first page fetch, but the
    cursor stays in the IDLE state until actual consumption starts.

    Returns:
        a boolean value of True if there is at least one further item
            available to consume; False otherwise (including the case of CLOSED
            cursor).
    """

    if self._state == CursorState.CLOSED:
        return False
    self._try_ensure_fill_buffer()
    return len(self._buffer) > 0

def hybrid_limits(self, hybrid_limits: int | dict[str, int] | None) ‑> CollectionFindAndRerankCursor[~TRAW, ~T]

Return a copy of this cursor with a new hybrid_limits setting. This operation is allowed only if the cursor state is still IDLE.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the Collection find_and_rerank method.

Args

hybrid_limits: a new setting to apply to the returned new cursor.

Returns

a new CollectionFindAndRerankCursor with the same settings as this one, except for hybrid_limits which is the provided value.

Expand source code

def hybrid_limits(
    self, hybrid_limits: int | dict[str, int] | None
) -> CollectionFindAndRerankCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new hybrid_limits setting.
    This operation is allowed only if the cursor state is still IDLE.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the Collection `find_and_rerank` method.

    Args:
        hybrid_limits: a new setting to apply to the returned new cursor.

    Returns:
        a new CollectionFindAndRerankCursor with the same settings as this one,
            except for `hybrid_limits` which is the provided value.
    """

    self._ensure_idle()
    return self._copy(hybrid_limits=hybrid_limits)

def include_scores(self, include_scores: bool | None) ‑> CollectionFindAndRerankCursor[~TRAW, ~T]

Return a copy of this cursor with a new include_scores setting. This operation is allowed only if the cursor state is still IDLE.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the Collection find_and_rerank method.

Args

include_scores: a new include_scores setting to apply to the returned new cursor.

Returns

a new CollectionFindAndRerankCursor with the same settings as this one, except for include_scores which is the provided value.

Expand source code

def include_scores(
    self, include_scores: bool | None
) -> CollectionFindAndRerankCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new include_scores setting.
    This operation is allowed only if the cursor state is still IDLE.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the Collection `find_and_rerank` method.

    Args:
        include_scores: a new include_scores setting to apply
            to the returned new cursor.

    Returns:
        a new CollectionFindAndRerankCursor with the same settings as this one,
            except for `include_scores` which is the provided value.
    """

    self._ensure_idle()
    return self._copy(include_scores=include_scores)

def include_sort_vector(self, include_sort_vector: bool | None) ‑> CollectionFindAndRerankCursor[~TRAW, ~T]

Return a copy of this cursor with a new include_sort_vector setting. This operation is allowed only if the cursor state is still IDLE.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the Collection find_and_rerank method.

Args

include_sort_vector: a new include_sort_vector setting to apply to the returned new cursor.

Returns

a new CollectionFindAndRerankCursor with the same settings as this one, except for include_sort_vector which is the provided value.

Expand source code

def include_sort_vector(
    self, include_sort_vector: bool | None
) -> CollectionFindAndRerankCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new include_sort_vector setting.
    This operation is allowed only if the cursor state is still IDLE.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the Collection `find_and_rerank` method.

    Args:
        include_sort_vector: a new include_sort_vector setting to apply
            to the returned new cursor.

    Returns:
        a new CollectionFindAndRerankCursor with the same settings as this one,
            except for `include_sort_vector` which is the provided value.
    """

    self._ensure_idle()
    return self._copy(include_sort_vector=include_sort_vector)

def initial_page_state(self, initial_page_state: str | UnsetType) ‑> CollectionFindAndRerankCursor[~TRAW, ~T]

Return a copy of this cursor with a new initial_page_state setting. This operation is allowed only if the cursor state is still IDLE.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the Collection find method.

Args

initial_page_state: a new initial_page_state setting to apply to the returned new cursor. Passing an explicit None raises an error.

Returns

a new CollectionFindAndRerankCursor with the same settings as this one, except for initial_page_state which is the provided value.

Expand source code

def initial_page_state(
    self, initial_page_state: str | UnsetType
) -> CollectionFindAndRerankCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new initial_page_state setting.
    This operation is allowed only if the cursor state is still IDLE.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the Collection `find` method.

    Args:
        initial_page_state: a new initial_page_state setting to apply to the
            returned new cursor. Passing an explicit None raises an error.

    Returns:
        a new CollectionFindAndRerankCursor with the same settings as this one,
            except for `initial_page_state` which is the provided value.
    """

    self._ensure_idle()
    return self._copy(initial_page_state=initial_page_state)

def limit(self, limit: int | None) ‑> CollectionFindAndRerankCursor[~TRAW, ~T]

Return a copy of this cursor with a new limit setting. This operation is allowed only if the cursor state is still IDLE.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the Collection find_and_rerank method.

Args

limit: a new limit setting to apply to the returned new cursor.

Returns

a new CollectionFindAndRerankCursor with the same settings as this one, except for limit which is the provided value.

Expand source code

def limit(self, limit: int | None) -> CollectionFindAndRerankCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new limit setting.
    This operation is allowed only if the cursor state is still IDLE.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the Collection `find_and_rerank` method.

    Args:
        limit: a new limit setting to apply to the returned new cursor.

    Returns:
        a new CollectionFindAndRerankCursor with the same settings as this one,
            except for `limit` which is the provided value.
    """

    self._ensure_idle()
    return self._copy(limit=limit)

def map(self, mapper: Callable[[T], TNEW]) ‑> CollectionFindAndRerankCursor[~TRAW, ~TNEW]

Return a copy of this cursor with a mapping function to transform the returned items. Calling this method on a cursor with a mapping already set results in the mapping functions being composed.

This operation is allowed only if the cursor state is still IDLE.

Args

mapper: a function transforming the objects returned by the cursor into something else (i.e. a function T => TNEW). If the map is imposed on a cursor without mapping yet, its input argument must be a RerankedResult[TRAW], where TRAW stands for the type of the documents from the collection.

Returns

a new CollectionFindAndRerankCursor with a new mapping function on the results, possibly composed with any pre-existing mapping function.

Example

>>> # (this assumes 'vectorize'. See <code>Collection.find\_and\_rerank</code> for more.)
>>> cursor = collection.find_and_rerank(
...     sort={"$hybrid": "Weekdays?"},
...     projection={"wkd": True},
...     limit=3,
... )
>>> for r_result in cursor:
...     print(r_result.document)
...
{'_id': 'A', 'wkd': 'Mon'}
{'_id': 'B', 'wkd': 'Tue'}
{'_id': 'G', 'wkd': 'Sun'}
>>> cursor_mapped = cursor.clone().map(
...     lambda r_result: r_result.document["wkd"]
... )
>>> for value in cursor_mapped:
...     print(value)
...
Mon
Tue
Sun
>>> cursor_mapped_twice = cursor_mapped.clone().map(
...     lambda wkd: f"<{wkd[:2].lower()}>"
... )
>>> for value in cursor_mapped_twice:
...     print(value)
...
<mo>
<tu>
<su>

Expand source code

def map(
    self, mapper: Callable[[T], TNEW]
) -> CollectionFindAndRerankCursor[TRAW, TNEW]:
    """
    Return a copy of this cursor with a mapping function to transform
    the returned items. Calling this method on a cursor with a mapping
    already set results in the mapping functions being composed.

    This operation is allowed only if the cursor state is still IDLE.

    Args:
        mapper: a function transforming the objects returned by the cursor
            into something else (i.e. a function T => TNEW).
            If the map is imposed on a cursor without mapping yet, its input
            argument must be a `RerankedResult[TRAW]`, where TRAW
            stands for the type of the documents from the collection.

    Returns:
        a new CollectionFindAndRerankCursor with a new mapping function on the results,
            possibly composed with any pre-existing mapping function.

    Example:
        >>> # (this assumes 'vectorize'. See `Collection.find_and_rerank` for more.)
        >>> cursor = collection.find_and_rerank(
        ...     sort={"$hybrid": "Weekdays?"},
        ...     projection={"wkd": True},
        ...     limit=3,
        ... )
        >>> for r_result in cursor:
        ...     print(r_result.document)
        ...
        {'_id': 'A', 'wkd': 'Mon'}
        {'_id': 'B', 'wkd': 'Tue'}
        {'_id': 'G', 'wkd': 'Sun'}
        >>> cursor_mapped = cursor.clone().map(
        ...     lambda r_result: r_result.document["wkd"]
        ... )
        >>> for value in cursor_mapped:
        ...     print(value)
        ...
        Mon
        Tue
        Sun
        >>> cursor_mapped_twice = cursor_mapped.clone().map(
        ...     lambda wkd: f"<{wkd[:2].lower()}>"
        ... )
        >>> for value in cursor_mapped_twice:
        ...     print(value)
        ...
        <mo>
        <tu>
        <su>
    """
    self._ensure_idle()
    if self._query_engine.collection is None:
        raise RuntimeError("Query engine has no collection.")
    composite_mapper: Callable[[RerankedResult[TRAW]], TNEW]
    if self._mapper is not None:

        def _composite(document: RerankedResult[TRAW]) -> TNEW:
            return mapper(self._mapper(document))  # type: ignore[misc]

        composite_mapper = _composite
    else:
        composite_mapper = cast(Callable[[RerankedResult[TRAW]], TNEW], mapper)
    return CollectionFindAndRerankCursor(
        collection=self._query_engine.collection,
        request_timeout_ms=self._request_timeout_ms,
        overall_timeout_ms=self._overall_timeout_ms,
        request_timeout_label=self._request_timeout_label,
        overall_timeout_label=self._overall_timeout_label,
        filter=self._filter,
        projection=self._projection,
        sort=self._sort,
        limit=self._limit,
        hybrid_limits=self._hybrid_limits,
        initial_page_state=self._initial_page_state,
        include_scores=self._include_scores,
        include_sort_vector=self._include_sort_vector,
        rerank_on=self._rerank_on,
        rerank_query=self._rerank_query,
        mapper=composite_mapper,
    )

def project(self, projection: ProjectionType | None) ‑> CollectionFindAndRerankCursor[~TRAW, ~T]

Return a copy of this cursor with a new projection setting. This operation is allowed only if the cursor state is still IDLE and if no mapping has been set on it.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the Collection find_and_rerank method.

Args

projection: a new projection setting to apply to the returned new cursor.

Returns

a new CollectionFindAndRerankCursor with the same settings as this one, except for projection which is the provided value.

Expand source code

def project(
    self, projection: ProjectionType | None
) -> CollectionFindAndRerankCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new projection setting.
    This operation is allowed only if the cursor state is still IDLE and if
    no mapping has been set on it.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the Collection `find_and_rerank` method.

    Args:
        projection: a new projection setting to apply to the returned new cursor.

    Returns:
        a new CollectionFindAndRerankCursor with the same settings as this one,
            except for `projection` which is the provided value.
    """

    self._ensure_idle()
    if self._mapper is not None:
        raise CursorException(
            "Cannot set projection after map.",
            cursor_state=self._state.value,
        )
    return self._copy(projection=projection)

def rerank_on(self, rerank_on: str | None) ‑> CollectionFindAndRerankCursor[~TRAW, ~T]

Return a copy of this cursor with a new rerank_on setting. This operation is allowed only if the cursor state is still IDLE.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the Collection find_and_rerank method.

Args

rerank_on: a new setting to apply to the returned new cursor.

Returns

a new CollectionFindAndRerankCursor with the same settings as this one, except for rerank_on which is the provided value.

Expand source code

def rerank_on(
    self, rerank_on: str | None
) -> CollectionFindAndRerankCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new rerank_on setting.
    This operation is allowed only if the cursor state is still IDLE.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the Collection `find_and_rerank` method.

    Args:
        rerank_on: a new setting to apply to the returned new cursor.

    Returns:
        a new CollectionFindAndRerankCursor with the same settings as this one,
            except for `rerank_on` which is the provided value.
    """

    self._ensure_idle()
    return self._copy(rerank_on=rerank_on)

def rerank_query(self, rerank_query: str | None) ‑> CollectionFindAndRerankCursor[~TRAW, ~T]

Return a copy of this cursor with a new rerank_query setting. This operation is allowed only if the cursor state is still IDLE.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the Collection find_and_rerank method.

Args

rerank_query: a new setting to apply to the returned new cursor.

Returns

a new CollectionFindAndRerankCursor with the same settings as this one, except for rerank_query which is the provided value.

Expand source code

def rerank_query(
    self, rerank_query: str | None
) -> CollectionFindAndRerankCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new rerank_query setting.
    This operation is allowed only if the cursor state is still IDLE.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the Collection `find_and_rerank` method.

    Args:
        rerank_query: a new setting to apply to the returned new cursor.

    Returns:
        a new CollectionFindAndRerankCursor with the same settings as this one,
            except for `rerank_query` which is the provided value.
    """

    self._ensure_idle()
    return self._copy(rerank_query=rerank_query)

def sort(self, sort: HybridSortType | None) ‑> CollectionFindAndRerankCursor[~TRAW, ~T]

Return a copy of this cursor with a new sort setting. This operation is allowed only if the cursor state is still IDLE.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the Collection find_and_rerank method.

Args

sort: a new sort setting to apply to the returned new cursor.

Returns

a new CollectionFindAndRerankCursor with the same settings as this one, except for sort which is the provided value.

Expand source code

def sort(
    self, sort: HybridSortType | None
) -> CollectionFindAndRerankCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new sort setting.
    This operation is allowed only if the cursor state is still IDLE.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the Collection `find_and_rerank` method.

    Args:
        sort: a new sort setting to apply to the returned new cursor.

    Returns:
        a new CollectionFindAndRerankCursor with the same settings as this one,
            except for `sort` which is the provided value.
    """

    self._ensure_idle()
    return self._copy(sort=sort)

def to_list(self, *, general_method_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> list[~T]

Materialize all documents that remain to be consumed from a cursor into a list.

Calling this method on a CLOSED cursor results in an error.

Args

general_method_timeout_ms: a timeout, in milliseconds, for the whole duration of this method. If not provided, there is no such timeout. Note that the per-request timeout set on the cursor still applies.
timeout_ms: an alias for general_method_timeout_ms.

Returns

a list of documents (or other values depending on the mapping function, if one is set). These are all items that were left to be consumed on the cursor when to_list is called.

Example

>>> # (this assumes 'vectorize'. See <code>Collection.find\_and\_rerank</code> for more.)
>>> collection.find_and_rerank(
...     sort={"$hybrid": "Weekdays?"},
...     projection={"wkd": True},
...     limit=4,
... ).map(
...     lambda r_result: r_result.document["wkd"]
... ).to_list()
['Wed', 'Mon', 'Tue', 'Sun']
>>>
>>> cursor = collection.find_and_rerank(
...     sort={"$hybrid": "Weekdays?"},
...     projection={"wkd": True},
...     limit=4,
... ).map(lambda r_result: r_result.document["wkd"])
>>> print(f"First item: {cursor.__next__()}.")
First item: Wed.
>>> cursor.to_list()
['Mon', 'Tue', 'Sun']

Expand source code

def to_list(
    self,
    *,
    general_method_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> list[T]:
    """
    Materialize all documents that remain to be consumed from a cursor into a list.

    Calling this method on a CLOSED cursor results in an error.

    If the cursor is IDLE, the result will be the whole set of documents returned
    by the `find_and_rerank` operation; otherwise, the documents already consumed
    by the cursor will not be in the resulting list.

    Calling this method is not recommended if a huge list of results is anticipated:
    it would involve a large number of data exchanges with the Data API and possibly
    a massive memory usage to construct the list. In such cases, a lazy pattern
    of iterating and consuming the documents is to be preferred.

    Args:
        general_method_timeout_ms: a timeout, in milliseconds, for the whole
            duration of this method. If not provided, there is no such timeout.
            Note that the per-request timeout set on the cursor still applies.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a list of documents (or other values depending on the mapping
            function, if one is set). These are all items that were left
            to be consumed on the cursor when `to_list` is called.

    Example:
        >>> # (this assumes 'vectorize'. See `Collection.find_and_rerank` for more.)
        >>> collection.find_and_rerank(
        ...     sort={"$hybrid": "Weekdays?"},
        ...     projection={"wkd": True},
        ...     limit=4,
        ... ).map(
        ...     lambda r_result: r_result.document["wkd"]
        ... ).to_list()
        ['Wed', 'Mon', 'Tue', 'Sun']
        >>>
        >>> cursor = collection.find_and_rerank(
        ...     sort={"$hybrid": "Weekdays?"},
        ...     projection={"wkd": True},
        ...     limit=4,
        ... ).map(lambda r_result: r_result.document["wkd"])
        >>> print(f"First item: {cursor.__next__()}.")
        First item: Wed.
        >>> cursor.to_list()
        ['Mon', 'Tue', 'Sun']
    """

    self._ensure_alive()
    copy_req_ms, copy_ovr_ms = _revise_timeouts_for_cursor_copy(
        new_general_method_timeout_ms=general_method_timeout_ms,
        new_timeout_ms=timeout_ms,
        old_request_timeout_ms=self._request_timeout_ms,
    )
    _cursor = self._copy(
        request_timeout_ms=copy_req_ms,
        overall_timeout_ms=copy_ovr_ms,
    )
    self._imprint_internal_state(_cursor)
    documents = [document for document in _cursor]
    _cursor._imprint_internal_state(self)
    return documents

Inherited members

AbstractCursor:
- buffered_count
- close
- consume_buffer
- consumed
- cursor_id
- rewind
- state

A synchronous cursor over documents, as returned by a find invocation on a Collection. A cursor can be iterated over, materialized into a list, and queried/manipulated in various ways.

Some cursor operations mutate it in-place (such as consuming its documents), other return a new cursor without changing the original one. See the documentation for the various methods and the Collection find method for more details and usage patterns.

Example

>>> cursor = collection.find(
...     {},
...     projection={"seq": True, "_id": False},
...     limit=5,
... )
>>> for document in cursor:
...     print(document)
...
{'seq': 1}
{'seq': 4}
{'seq': 15}
{'seq': 22}
{'seq': 11}

Expand source code

class CollectionFindCursor(Generic[TRAW, T], AbstractCursor[TRAW]):
    """
    A synchronous cursor over documents, as returned by a `find` invocation on
    a Collection. A cursor can be iterated over, materialized into a list,
    and queried/manipulated in various ways.

    Some cursor operations mutate it in-place (such as consuming its documents),
    other return a new cursor without changing the original one. See the documentation
    for the various methods and the Collection `find` method for more details
    and usage patterns.

    A cursor has two type parameters: TRAW and T. The first is the type of the "raw"
    documents as they are obtained from the Data API, the second is the type of the
    items after the optional mapping function (see the `.map()` method). If there is
    no mapping, TRAW = T. In general, consuming a cursor returns items of type T,
    except for the `consume_buffer` primitive that draws directly from the buffer
    and always returns items of type TRAW.

    Example:
        >>> cursor = collection.find(
        ...     {},
        ...     projection={"seq": True, "_id": False},
        ...     limit=5,
        ... )
        >>> for document in cursor:
        ...     print(document)
        ...
        {'seq': 1}
        {'seq': 4}
        {'seq': 15}
        {'seq': 22}
        {'seq': 11}
    """

    _query_engine: _CollectionFindQueryEngine[TRAW]
    _request_timeout_ms: int | None
    _overall_timeout_ms: int | None
    _request_timeout_label: str | None
    _overall_timeout_label: str | None
    _timeout_manager: MultiCallTimeoutManager
    _filter: FilterType | None
    _projection: ProjectionType | None
    _sort: dict[str, Any] | None
    _limit: int | None
    _initial_page_state: str | UnsetType
    _include_similarity: bool | None
    _include_sort_vector: bool | None
    _skip: int | None
    _mapper: Callable[[TRAW], T] | None

    def __init__(
        self,
        *,
        collection: Collection[TRAW],
        request_timeout_ms: int | None,
        overall_timeout_ms: int | None,
        request_timeout_label: str | None = None,
        overall_timeout_label: str | None = None,
        filter: FilterType | None = None,
        projection: ProjectionType | None = None,
        sort: dict[str, Any] | None = None,
        limit: int | None = None,
        initial_page_state: str | UnsetType = _UNSET,
        include_similarity: bool | None = None,
        include_sort_vector: bool | None = None,
        skip: int | None = None,
        mapper: Callable[[TRAW], T] | None = None,
    ) -> None:
        self._filter = deepcopy(filter)
        self._projection = projection
        self._sort = deepcopy(sort)
        self._limit = limit
        self._initial_page_state = initial_page_state
        self._include_similarity = include_similarity
        self._include_sort_vector = include_sort_vector
        self._skip = skip
        self._mapper = mapper
        self._request_timeout_ms = request_timeout_ms
        self._overall_timeout_ms = overall_timeout_ms
        self._request_timeout_label = request_timeout_label
        self._overall_timeout_label = overall_timeout_label
        self._query_engine = _CollectionFindQueryEngine(
            collection=collection,
            async_collection=None,
            filter=self._filter,
            projection=self._projection,
            sort=self._sort,
            limit=self._limit,
            include_similarity=self._include_similarity,
            include_sort_vector=self._include_sort_vector,
            skip=self._skip,
        )
        AbstractCursor.__init__(self, initial_page_state=initial_page_state)
        self._timeout_manager = MultiCallTimeoutManager(
            overall_timeout_ms=self._overall_timeout_ms,
            timeout_label=self._overall_timeout_label,
        )

    def _copy(
        self: CollectionFindCursor[TRAW, T],
        *,
        request_timeout_ms: int | None | UnsetType = _UNSET,
        overall_timeout_ms: int | None | UnsetType = _UNSET,
        request_timeout_label: str | None | UnsetType = _UNSET,
        overall_timeout_label: str | None | UnsetType = _UNSET,
        filter: FilterType | None | UnsetType = _UNSET,
        projection: ProjectionType | None | UnsetType = _UNSET,
        sort: dict[str, Any] | None | UnsetType = _UNSET,
        limit: int | None | UnsetType = _UNSET,
        initial_page_state: str | None | UnsetType = _UNSET,
        include_similarity: bool | None | UnsetType = _UNSET,
        include_sort_vector: bool | None | UnsetType = _UNSET,
        skip: int | None | UnsetType = _UNSET,
    ) -> CollectionFindCursor[TRAW, T]:
        if self._query_engine.collection is None:
            raise RuntimeError("Query engine has no collection.")
        return CollectionFindCursor(
            collection=self._query_engine.collection,
            request_timeout_ms=self._request_timeout_ms
            if isinstance(request_timeout_ms, UnsetType)
            else request_timeout_ms,
            overall_timeout_ms=self._overall_timeout_ms
            if isinstance(overall_timeout_ms, UnsetType)
            else overall_timeout_ms,
            request_timeout_label=self._request_timeout_label
            if isinstance(request_timeout_label, UnsetType)
            else request_timeout_label,
            overall_timeout_label=self._overall_timeout_label
            if isinstance(overall_timeout_label, UnsetType)
            else overall_timeout_label,
            filter=self._filter if isinstance(filter, UnsetType) else filter,
            projection=self._projection
            if isinstance(projection, UnsetType)
            else projection,
            sort=self._sort if isinstance(sort, UnsetType) else sort,
            limit=self._limit if isinstance(limit, UnsetType) else limit,
            # special treatment: passing None erases (hence we must supply unset and not None):
            initial_page_state=self._initial_page_state
            if isinstance(initial_page_state, UnsetType)
            else (initial_page_state if initial_page_state is not None else _UNSET),
            include_similarity=self._include_similarity
            if isinstance(include_similarity, UnsetType)
            else include_similarity,
            include_sort_vector=self._include_sort_vector
            if isinstance(include_sort_vector, UnsetType)
            else include_sort_vector,
            skip=self._skip if isinstance(skip, UnsetType) else skip,
            mapper=self._mapper,
        )

    def _try_ensure_fill_buffer(self) -> None:
        """
        If buffer is empty, try to fill with next page, if applicable.
        If not possible, silently do nothing.
        This method never changes the cursor state.
        """

        if self._state == CursorState.CLOSED:
            return
        if not self._buffer:
            if self._next_page_state is not None or self._state == CursorState.IDLE:
                new_buffer, next_page_state, resp_status = (
                    self._query_engine._fetch_page(
                        page_state=self._next_page_state,
                        timeout_context=self._timeout_manager.remaining_timeout(
                            cap_time_ms=self._request_timeout_ms,
                            cap_timeout_label=self._request_timeout_label,
                        ),
                    )
                )
                self._next_page_state = next_page_state
                self._last_response_status = resp_status
                self._pages_retrieved += 1
                self._buffer = new_buffer

    def __repr__(self) -> str:
        return (
            f'{self.__class__.__name__}("{self.data_source.name}", '
            f"{self._state.value}, "
            f"consumed so far: {self.consumed})"
        )

    def __iter__(self: CollectionFindCursor[TRAW, T]) -> CollectionFindCursor[TRAW, T]:
        self._ensure_alive()
        return self

    def __next__(self) -> T:
        if self._state == CursorState.CLOSED:
            raise StopIteration
        self._try_ensure_fill_buffer()
        if not self._buffer:
            self._state = CursorState.CLOSED
            raise StopIteration
        self._state = CursorState.STARTED
        # consume one item from buffer
        traw0, rest_buffer = self._buffer[0], self._buffer[1:]
        self._buffer = rest_buffer
        self._consumed += 1
        return cast(T, self._mapper(traw0) if self._mapper is not None else traw0)

    @property
    def data_source(self) -> Collection[TRAW]:
        """
        The Collection object that originated this cursor through a `find` operation.

        Returns:
            a Collection instance.
        """

        if self._query_engine.collection is None:
            raise RuntimeError("Query engine has no collection.")
        return self._query_engine.collection

    def clone(self) -> CollectionFindCursor[TRAW, T]:
        """
        Create a copy of this cursor with:
        - the same parameters (timeouts, filter, projection, etc)
        - and the cursor is rewound to its pristine IDLE state.

        Returns:
            a new CollectionFindCursor, similar to this one but
            rewound to its initial state.

        Example:
            >>> cursor = collection.find(
            ...     {},
            ...     projection={"seq": True, "_id": False},
            ...     limit=2,
            ... ).map(lambda doc: doc["seq"])
            >>> for value in cursor:
            ...     print(value)
            ...
            1
            4
            >>> cloned_cursor = cursor.clone()
            >>> for document in cloned_cursor:
            ...     print(document)
            ...
            1
            4
        """

        if self._query_engine.collection is None:
            raise RuntimeError("Query engine has no collection.")
        return CollectionFindCursor(
            collection=self._query_engine.collection,
            request_timeout_ms=self._request_timeout_ms,
            overall_timeout_ms=self._overall_timeout_ms,
            request_timeout_label=self._request_timeout_label,
            overall_timeout_label=self._overall_timeout_label,
            filter=self._filter,
            projection=self._projection,
            sort=self._sort,
            limit=self._limit,
            initial_page_state=self._initial_page_state,
            include_similarity=self._include_similarity,
            include_sort_vector=self._include_sort_vector,
            skip=self._skip,
            mapper=self._mapper,
        )

    def filter(self, filter: FilterType | None) -> CollectionFindCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new filter setting.
        This operation is allowed only if the cursor state is still IDLE.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the Collection `find` method.

        Args:
            filter: a new filter setting to apply to the returned new cursor.

        Returns:
            a new CollectionFindCursor with the same settings as this one,
                except for `filter` which is the provided value.
        """

        self._ensure_idle()
        return self._copy(filter=filter)

    def project(
        self, projection: ProjectionType | None
    ) -> CollectionFindCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new projection setting.
        This operation is allowed only if the cursor state is still IDLE and if
        no mapping has been set on it.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the Collection `find` method.

        Args:
            projection: a new projection setting to apply to the returned new cursor.

        Returns:
            a new CollectionFindCursor with the same settings as this one,
                except for `projection` which is the provided value.
        """

        self._ensure_idle()
        if self._mapper is not None:
            raise CursorException(
                "Cannot set projection after map.",
                cursor_state=self._state.value,
            )
        return self._copy(projection=projection)

    def sort(self, sort: dict[str, Any] | None) -> CollectionFindCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new sort setting.
        This operation is allowed only if the cursor state is still IDLE.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the Collection `find` method.

        Args:
            sort: a new sort setting to apply to the returned new cursor.

        Returns:
            a new CollectionFindCursor with the same settings as this one,
                except for `sort` which is the provided value.
        """

        self._ensure_idle()
        return self._copy(sort=sort)

    def limit(self, limit: int | None) -> CollectionFindCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new limit setting.
        This operation is allowed only if the cursor state is still IDLE.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the Collection `find` method.

        Args:
            limit: a new limit setting to apply to the returned new cursor.

        Returns:
            a new CollectionFindCursor with the same settings as this one,
                except for `limit` which is the provided value.
        """

        self._ensure_idle()
        return self._copy(limit=limit)

    def initial_page_state(
        self, initial_page_state: str | UnsetType
    ) -> CollectionFindCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new initial_page_state setting.
        This operation is allowed only if the cursor state is still IDLE.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the Collection `find` method.

        Args:
            initial_page_state: a new initial_page_state setting to apply to the
                returned new cursor. Passing an explicit None raises an error.

        Returns:
            a new CollectionFindCursor with the same settings as this one,
                except for `initial_page_state` which is the provided value.
        """

        self._ensure_idle()
        return self._copy(initial_page_state=initial_page_state)

    def include_similarity(
        self, include_similarity: bool | None
    ) -> CollectionFindCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new include_similarity setting.
        This operation is allowed only if the cursor state is still IDLE and if
        no mapping has been set on it.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the Collection `find` method.

        Args:
            include_similarity: a new include_similarity setting to apply
                to the returned new cursor.

        Returns:
            a new CollectionFindCursor with the same settings as this one,
                except for `include_similarity` which is the provided value.
        """

        self._ensure_idle()
        if self._mapper is not None:
            raise CursorException(
                "Cannot set include_similarity after map.",
                cursor_state=self._state.value,
            )
        return self._copy(include_similarity=include_similarity)

    def include_sort_vector(
        self, include_sort_vector: bool | None
    ) -> CollectionFindCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new include_sort_vector setting.
        This operation is allowed only if the cursor state is still IDLE.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the Collection `find` method.

        Args:
            include_sort_vector: a new include_sort_vector setting to apply
                to the returned new cursor.

        Returns:
            a new CollectionFindCursor with the same settings as this one,
                except for `include_sort_vector` which is the provided value.
        """

        self._ensure_idle()
        return self._copy(include_sort_vector=include_sort_vector)

    def skip(self, skip: int | None) -> CollectionFindCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new skip setting.
        This operation is allowed only if the cursor state is still IDLE.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the Collection `find` method.

        Args:
            skip: a new skip setting to apply to the returned new cursor.

        Returns:
            a new CollectionFindCursor with the same settings as this one,
                except for `skip` which is the provided value.
        """

        self._ensure_idle()
        return self._copy(skip=skip)

    def map(self, mapper: Callable[[T], TNEW]) -> CollectionFindCursor[TRAW, TNEW]:
        """
        Return a copy of this cursor with a mapping function to transform
        the returned items. Calling this method on a cursor with a mapping
        already set results in the mapping functions being composed.

        This operation is allowed only if the cursor state is still IDLE.

        Args:
            mapper: a function transforming the objects returned by the cursor
                into something else (i.e. a function T => TNEW).

        Returns:
            a new CollectionFindCursor with a new mapping function on the results,
                possibly composed with any pre-existing mapping function.

        Example:
            >>> cursor = collection.find(
            ...     {},
            ...     projection={"seq": True, "_id": False},
            ...     limit=2,
            ... )
            >>> for doc in cursor:
            ...     print(doc)
            ...
            {'seq': 1}
            {'seq': 4}
            >>> cursor_mapped = collection.find(
            ...     {},
            ...     projection={"seq": True, "_id": False},
            ...     limit=2,
            ... ).map(lambda doc: doc["seq"])
            >>> for value in cursor_mapped:
            ...     print(value)
            ...
            1
            4
            >>>
            >>> cursor_mapped_twice = collection.find(
            ...     {},
            ...     projection={"seq": True, "_id": False},
            ...     limit=2,
            ... ).map(lambda doc: doc["seq"]).map(lambda num: "x" * num)
            >>> for value in cursor_mapped_twice:
            ...     print(value)
            ...
            x
            xxxx
        """
        self._ensure_idle()
        if self._query_engine.collection is None:
            raise RuntimeError("Query engine has no collection.")
        composite_mapper: Callable[[TRAW], TNEW]
        if self._mapper is not None:

            def _composite(document: TRAW) -> TNEW:
                return mapper(self._mapper(document))  # type: ignore[misc]

            composite_mapper = _composite
        else:
            composite_mapper = cast(Callable[[TRAW], TNEW], mapper)
        return CollectionFindCursor(
            collection=self._query_engine.collection,
            request_timeout_ms=self._request_timeout_ms,
            overall_timeout_ms=self._overall_timeout_ms,
            request_timeout_label=self._request_timeout_label,
            overall_timeout_label=self._overall_timeout_label,
            filter=self._filter,
            projection=self._projection,
            sort=self._sort,
            limit=self._limit,
            initial_page_state=self._initial_page_state,
            include_similarity=self._include_similarity,
            include_sort_vector=self._include_sort_vector,
            skip=self._skip,
            mapper=composite_mapper,
        )

    def for_each(
        self,
        function: Callable[[T], bool | None],
        *,
        general_method_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Consume the remaining documents in the cursor, invoking a provided callback
        function on each of them.

        Calling this method on a CLOSED cursor results in an error.

        The callback function can return any value. The return value is generally
        discarded, with the following exception: if the function returns the boolean
        `False`, it is taken to signify that the method should quit early, leaving the
        cursor half-consumed (ACTIVE state). If this does not occur, this method
        results in the cursor entering CLOSED state once it is exhausted.

        Args:
            function: a callback function whose only parameter is of the type returned
                by the cursor. This callback is invoked once per each document yielded
                by the cursor. If the callback returns a `False`, the `for_each`
                invocation stops early and returns without consuming further documents.
            general_method_timeout_ms: a timeout, in milliseconds, for the whole
                duration of this method. If not provided, there is no such timeout.
                Note that the per-request timeout set on the cursor still applies.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Example:
            >>> cursor = collection.find(
            ...     {},
            ...     projection={"seq": True, "_id": False},
            ...     limit=3,
            ... )
            >>> def printer(doc):
            ...     print(f"-> {doc['seq']}")
            ...
            >>> cursor.for_each(printer)
            -> 1
            -> 4
            -> 15
            >>>
            >>> if cursor.state != CursorState.CLOSED:
            ...     print(f"alive: {list(cursor)}")
            ... else:
            ...     print("(closed)")
            ...
            (closed)
            >>> cursor2 = collection.find(
            ...     {},
            ...     projection={"seq": True, "_id": False},
            ...     limit=3,
            ... )
            >>> def checker(doc):
            ...     print(f"-> {doc['seq']}")
            ...     return doc["seq"] != 4
            ...
            >>> cursor2.for_each(checker)
            -> 1
            -> 4
            >>>
            >>> if cursor2.state != CursorState.CLOSED:
            ...     print(f"alive: {list(cursor2)}")
            ... else:
            ...     print("(closed)")
            ...
            alive: [{'seq': 15}]
        """

        self._ensure_alive()
        copy_req_ms, copy_ovr_ms = _revise_timeouts_for_cursor_copy(
            new_general_method_timeout_ms=general_method_timeout_ms,
            new_timeout_ms=timeout_ms,
            old_request_timeout_ms=self._request_timeout_ms,
        )
        _cursor = self._copy(
            request_timeout_ms=copy_req_ms,
            overall_timeout_ms=copy_ovr_ms,
        )
        self._imprint_internal_state(_cursor)
        for document in _cursor:
            res = function(document)
            if res is False:
                break
        _cursor._imprint_internal_state(self)

    def to_list(
        self,
        *,
        general_method_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> list[T]:
        """
        Materialize all documents that remain to be consumed from a cursor into a list.

        Calling this method on a CLOSED cursor results in an error.

        If the cursor is IDLE, the result will be the whole set of documents returned
        by the `find` operation; otherwise, the documents already consumed by the cursor
        will not be in the resulting list.

        Calling this method is not recommended if a huge list of results is anticipated:
        it would involve a large number of data exchanges with the Data API and possibly
        a massive memory usage to construct the list. In such cases, a lazy pattern
        of iterating and consuming the documents is to be preferred.

        Args:
            general_method_timeout_ms: a timeout, in milliseconds, for the whole
                duration of this method. If not provided, there is no such timeout.
                Note that the per-request timeout set on the cursor still applies.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            a list of documents (or other values depending on the mapping
                function, if one is set). These are all items that were left
                to be consumed on the cursor when `to_list` is called.

        Example:
            >>> collection.find(
            ...     {},
            ...     projection={"seq": True, "_id": False},
            ...     limit=3,
            ... ).to_list()
            [{'seq': 1}, {'seq': 4}, {'seq': 15}]
            >>>
            >>> cursor = collection.find(
            ...     {},
            ...     projection={"seq": True, "_id": False},
            ...     limit=5,
            ... ).map(lambda doc: doc["seq"])
            >>>
            >>> first_value = cursor.__next__()
            >>> cursor.to_list()
            [4, 15, 22, 11]
        """

        self._ensure_alive()
        copy_req_ms, copy_ovr_ms = _revise_timeouts_for_cursor_copy(
            new_general_method_timeout_ms=general_method_timeout_ms,
            new_timeout_ms=timeout_ms,
            old_request_timeout_ms=self._request_timeout_ms,
        )
        _cursor = self._copy(
            request_timeout_ms=copy_req_ms,
            overall_timeout_ms=copy_ovr_ms,
        )
        self._imprint_internal_state(_cursor)
        documents = [document for document in _cursor]
        _cursor._imprint_internal_state(self)
        return documents

    def has_next(self) -> bool:
        """
        Whether the cursor actually has more documents to return.

        `has_next` can be called on any cursor, but on a CLOSED cursor
        will always return False.

        This method can trigger the fetch operation of a new page, if the current
        buffer is empty.

        Calling `has_next` on an IDLE cursor triggers the first page fetch, but the
        cursor stays in the IDLE state until actual consumption starts.

        Returns:
            a boolean value of True if there is at least one further item
                available to consume; False otherwise (including the case of CLOSED
                cursor).
        """

        if self._state == CursorState.CLOSED:
            return False
        self._try_ensure_fill_buffer()
        return len(self._buffer) > 0

    def get_sort_vector(self) -> list[float] | DataAPIVector | None:
        """
        Return the query vector used in the vector (ANN) search that originated
        this cursor, if applicable. If this is not an ANN search, or it was invoked
        without the `include_sort_vector` flag, return None.

        Calling `get_sort_vector` on an IDLE cursor triggers the first page fetch,
        but the cursor stays in the IDLE state until actual consumption starts.

        The method can be invoked on a CLOSED cursor and will return either None
        or the sort vector used in the search.

        Returns:
            the query vector used in the search if this was a
                vector search (otherwise None). The vector is returned either
                as a DataAPIVector or a plain list of number depending on the
                `APIOptions.serdes_options` that apply. The query vector is available
                also for vectorize-based ANN searches.
        """

        self._try_ensure_fill_buffer()
        if self._last_response_status:
            return _ensure_vector(
                self._last_response_status.get("sortVector"),
                self.data_source.api_options.serdes_options,
            )
        else:
            return None

    def fetch_next_page(self) -> FindPage[T]:
        """
        Retrieve a single, whole page of results from the Data API and return it
        at once, together with associated "out-of-band" information.

        This method is meant to be the way a cursor is consumed when the caller
        needs to explicitly operate on a page-by-page basis, and is to be paired
        with creation of cursor objects 'set to start from a certain page' via the
        `initial_page_state` constructor parameter/builder method.
        In this case, the supplied initial page state typically comes from having
        consumed a previous page, for the same find operation: the page state, a string,
        is found within the `FindPage` object returned by this method.

        Returns:
            a `FindPage` object expressing the full Data API response, including
            the resulting documents (after applying the cursor mapping function,
            if one is defined), as well as the state to use to query for the next
            page (a string) and the sort vector if requested and applicable.

        Example:
            >>> # A cursor to get the first page:
            >>> cursor0 = collection.find({})
            >>> page0 = cursor0.fetch_next_page()
            >>> page0
            FindPage(results=<20 entries>, next_page_state=...)
            >>> page0.results[0]
            {'_id': 40, 'text': 'doc num 40', 'even': True}
            >>> page0.next_page_state
            'CwAAAAECAAAAAjg5APB////rAA=='
            >>>
            >>> # Get the next page through a new cursor:
            >>> cursor1 = collection.find(
            ...     {},
            ...     initial_page_state=page0.next_page_state,
            ... )
            >>> page1 = cursor1.fetch_next_page()
            >>> page1.results[0]
            {'_id': 124, 'text': 'doc num 124', 'even': True}
            >>> page1.next_page_state
            'CgAAAAECAAAAATYA8H///9cA'
            >>>
            >>> # (...)
            >>> # Eventually there's nothing more to retrieve:
            >>> page_N.next_page_state is None
            True
        """

        self._ensure_alive()
        if self._buffer:
            msg = "Paginated retrieval cannot be mixed with regular cursor iteration."
            raise CursorException(
                text=msg,
                cursor_state=self._state.value,
            )

        self._try_ensure_fill_buffer()

        _buffer_count = len(self._buffer)
        _tr_next_ps = self._next_page_state
        _tr_results = [document for _, document in zip(range(_buffer_count), self)]
        _tr_sort_vector: list[float] | DataAPIVector | None
        if self._last_response_status:
            _tr_sort_vector = _ensure_vector(
                self._last_response_status.get("sortVector"),
                self.data_source.api_options.serdes_options,
            )
        else:
            _tr_sort_vector = None

        return FindPage(
            results=_tr_results,
            next_page_state=_tr_next_ps,
            sort_vector=_tr_sort_vector,
        )

Ancestors

AbstractCursor
abc.ABC
typing.Generic

Instance variables

var data_source : Collection[~TRAW]

The Collection object that originated this cursor through a find operation.

Returns

a Collection instance.

Expand source code

@property
def data_source(self) -> Collection[TRAW]:
    """
    The Collection object that originated this cursor through a `find` operation.

    Returns:
        a Collection instance.
    """

    if self._query_engine.collection is None:
        raise RuntimeError("Query engine has no collection.")
    return self._query_engine.collection

Methods

def clone(self) ‑> CollectionFindCursor[~TRAW, ~T]

Create a copy of this cursor with: - the same parameters (timeouts, filter, projection, etc) - and the cursor is rewound to its pristine IDLE state.

Returns

a new CollectionFindCursor, similar to this one but rewound to its initial state.

Example

>>> cursor = collection.find(
...     {},
...     projection={"seq": True, "_id": False},
...     limit=2,
... ).map(lambda doc: doc["seq"])
>>> for value in cursor:
...     print(value)
...
1
4
>>> cloned_cursor = cursor.clone()
>>> for document in cloned_cursor:
...     print(document)
...
1
4

Expand source code

def clone(self) -> CollectionFindCursor[TRAW, T]:
    """
    Create a copy of this cursor with:
    - the same parameters (timeouts, filter, projection, etc)
    - and the cursor is rewound to its pristine IDLE state.

    Returns:
        a new CollectionFindCursor, similar to this one but
        rewound to its initial state.

    Example:
        >>> cursor = collection.find(
        ...     {},
        ...     projection={"seq": True, "_id": False},
        ...     limit=2,
        ... ).map(lambda doc: doc["seq"])
        >>> for value in cursor:
        ...     print(value)
        ...
        1
        4
        >>> cloned_cursor = cursor.clone()
        >>> for document in cloned_cursor:
        ...     print(document)
        ...
        1
        4
    """

    if self._query_engine.collection is None:
        raise RuntimeError("Query engine has no collection.")
    return CollectionFindCursor(
        collection=self._query_engine.collection,
        request_timeout_ms=self._request_timeout_ms,
        overall_timeout_ms=self._overall_timeout_ms,
        request_timeout_label=self._request_timeout_label,
        overall_timeout_label=self._overall_timeout_label,
        filter=self._filter,
        projection=self._projection,
        sort=self._sort,
        limit=self._limit,
        initial_page_state=self._initial_page_state,
        include_similarity=self._include_similarity,
        include_sort_vector=self._include_sort_vector,
        skip=self._skip,
        mapper=self._mapper,
    )

def fetch_next_page(self) ‑> FindPage[~T]

Retrieve a single, whole page of results from the Data API and return it at once, together with associated "out-of-band" information.

Returns

Example

>>> # A cursor to get the first page:
>>> cursor0 = collection.find({})
>>> page0 = cursor0.fetch_next_page()
>>> page0
FindPage(results=<20 entries>, next_page_state=...)
>>> page0.results[0]
{'_id': 40, 'text': 'doc num 40', 'even': True}
>>> page0.next_page_state
'CwAAAAECAAAAAjg5APB////rAA=='
>>>
>>> # Get the next page through a new cursor:
>>> cursor1 = collection.find(
...     {},
...     initial_page_state=page0.next_page_state,
... )
>>> page1 = cursor1.fetch_next_page()
>>> page1.results[0]
{'_id': 124, 'text': 'doc num 124', 'even': True}
>>> page1.next_page_state
'CgAAAAECAAAAATYA8H///9cA'
>>>
>>> # (...)
>>> # Eventually there's nothing more to retrieve:
>>> page_N.next_page_state is None
True

Expand source code

def fetch_next_page(self) -> FindPage[T]:
    """
    Retrieve a single, whole page of results from the Data API and return it
    at once, together with associated "out-of-band" information.

    This method is meant to be the way a cursor is consumed when the caller
    needs to explicitly operate on a page-by-page basis, and is to be paired
    with creation of cursor objects 'set to start from a certain page' via the
    `initial_page_state` constructor parameter/builder method.
    In this case, the supplied initial page state typically comes from having
    consumed a previous page, for the same find operation: the page state, a string,
    is found within the `FindPage` object returned by this method.

    Returns:
        a `FindPage` object expressing the full Data API response, including
        the resulting documents (after applying the cursor mapping function,
        if one is defined), as well as the state to use to query for the next
        page (a string) and the sort vector if requested and applicable.

    Example:
        >>> # A cursor to get the first page:
        >>> cursor0 = collection.find({})
        >>> page0 = cursor0.fetch_next_page()
        >>> page0
        FindPage(results=<20 entries>, next_page_state=...)
        >>> page0.results[0]
        {'_id': 40, 'text': 'doc num 40', 'even': True}
        >>> page0.next_page_state
        'CwAAAAECAAAAAjg5APB////rAA=='
        >>>
        >>> # Get the next page through a new cursor:
        >>> cursor1 = collection.find(
        ...     {},
        ...     initial_page_state=page0.next_page_state,
        ... )
        >>> page1 = cursor1.fetch_next_page()
        >>> page1.results[0]
        {'_id': 124, 'text': 'doc num 124', 'even': True}
        >>> page1.next_page_state
        'CgAAAAECAAAAATYA8H///9cA'
        >>>
        >>> # (...)
        >>> # Eventually there's nothing more to retrieve:
        >>> page_N.next_page_state is None
        True
    """

    self._ensure_alive()
    if self._buffer:
        msg = "Paginated retrieval cannot be mixed with regular cursor iteration."
        raise CursorException(
            text=msg,
            cursor_state=self._state.value,
        )

    self._try_ensure_fill_buffer()

    _buffer_count = len(self._buffer)
    _tr_next_ps = self._next_page_state
    _tr_results = [document for _, document in zip(range(_buffer_count), self)]
    _tr_sort_vector: list[float] | DataAPIVector | None
    if self._last_response_status:
        _tr_sort_vector = _ensure_vector(
            self._last_response_status.get("sortVector"),
            self.data_source.api_options.serdes_options,
        )
    else:
        _tr_sort_vector = None

    return FindPage(
        results=_tr_results,
        next_page_state=_tr_next_ps,
        sort_vector=_tr_sort_vector,
    )

def filter(self, filter: FilterType | None) ‑> CollectionFindCursor[~TRAW, ~T]

Return a copy of this cursor with a new filter setting. This operation is allowed only if the cursor state is still IDLE.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the Collection find method.

Args

filter: a new filter setting to apply to the returned new cursor.

Returns

a new CollectionFindCursor with the same settings as this one, except for filter which is the provided value.

Expand source code

def filter(self, filter: FilterType | None) -> CollectionFindCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new filter setting.
    This operation is allowed only if the cursor state is still IDLE.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the Collection `find` method.

    Args:
        filter: a new filter setting to apply to the returned new cursor.

    Returns:
        a new CollectionFindCursor with the same settings as this one,
            except for `filter` which is the provided value.
    """

    self._ensure_idle()
    return self._copy(filter=filter)

def for_each(self, function: Callable[[T], bool | None], *, general_method_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> None

Consume the remaining documents in the cursor, invoking a provided callback function on each of them.

Calling this method on a CLOSED cursor results in an error.

Args

function: a callback function whose only parameter is of the type returned by the cursor. This callback is invoked once per each document yielded by the cursor. If the callback returns a False, the for_each invocation stops early and returns without consuming further documents.
general_method_timeout_ms: a timeout, in milliseconds, for the whole duration of this method. If not provided, there is no such timeout. Note that the per-request timeout set on the cursor still applies.
timeout_ms: an alias for general_method_timeout_ms.

Example

>>> cursor = collection.find(
...     {},
...     projection={"seq": True, "_id": False},
...     limit=3,
... )
>>> def printer(doc):
...     print(f"-> {doc['seq']}")
...
>>> cursor.for_each(printer)
-> 1
-> 4
-> 15
>>>
>>> if cursor.state != CursorState.CLOSED:
...     print(f"alive: {list(cursor)}")
... else:
...     print("(closed)")
...
(closed)
>>> cursor2 = collection.find(
...     {},
...     projection={"seq": True, "_id": False},
...     limit=3,
... )
>>> def checker(doc):
...     print(f"-> {doc['seq']}")
...     return doc["seq"] != 4
...
>>> cursor2.for_each(checker)
-> 1
-> 4
>>>
>>> if cursor2.state != CursorState.CLOSED:
...     print(f"alive: {list(cursor2)}")
... else:
...     print("(closed)")
...
alive: [{'seq': 15}]

Expand source code

def for_each(
    self,
    function: Callable[[T], bool | None],
    *,
    general_method_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Consume the remaining documents in the cursor, invoking a provided callback
    function on each of them.

    Calling this method on a CLOSED cursor results in an error.

    The callback function can return any value. The return value is generally
    discarded, with the following exception: if the function returns the boolean
    `False`, it is taken to signify that the method should quit early, leaving the
    cursor half-consumed (ACTIVE state). If this does not occur, this method
    results in the cursor entering CLOSED state once it is exhausted.

    Args:
        function: a callback function whose only parameter is of the type returned
            by the cursor. This callback is invoked once per each document yielded
            by the cursor. If the callback returns a `False`, the `for_each`
            invocation stops early and returns without consuming further documents.
        general_method_timeout_ms: a timeout, in milliseconds, for the whole
            duration of this method. If not provided, there is no such timeout.
            Note that the per-request timeout set on the cursor still applies.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Example:
        >>> cursor = collection.find(
        ...     {},
        ...     projection={"seq": True, "_id": False},
        ...     limit=3,
        ... )
        >>> def printer(doc):
        ...     print(f"-> {doc['seq']}")
        ...
        >>> cursor.for_each(printer)
        -> 1
        -> 4
        -> 15
        >>>
        >>> if cursor.state != CursorState.CLOSED:
        ...     print(f"alive: {list(cursor)}")
        ... else:
        ...     print("(closed)")
        ...
        (closed)
        >>> cursor2 = collection.find(
        ...     {},
        ...     projection={"seq": True, "_id": False},
        ...     limit=3,
        ... )
        >>> def checker(doc):
        ...     print(f"-> {doc['seq']}")
        ...     return doc["seq"] != 4
        ...
        >>> cursor2.for_each(checker)
        -> 1
        -> 4
        >>>
        >>> if cursor2.state != CursorState.CLOSED:
        ...     print(f"alive: {list(cursor2)}")
        ... else:
        ...     print("(closed)")
        ...
        alive: [{'seq': 15}]
    """

    self._ensure_alive()
    copy_req_ms, copy_ovr_ms = _revise_timeouts_for_cursor_copy(
        new_general_method_timeout_ms=general_method_timeout_ms,
        new_timeout_ms=timeout_ms,
        old_request_timeout_ms=self._request_timeout_ms,
    )
    _cursor = self._copy(
        request_timeout_ms=copy_req_ms,
        overall_timeout_ms=copy_ovr_ms,
    )
    self._imprint_internal_state(_cursor)
    for document in _cursor:
        res = function(document)
        if res is False:
            break
    _cursor._imprint_internal_state(self)

def get_sort_vector(self) ‑> list[float] | DataAPIVector | None

Calling get_sort_vector on an IDLE cursor triggers the first page fetch, but the cursor stays in the IDLE state until actual consumption starts.

The method can be invoked on a CLOSED cursor and will return either None or the sort vector used in the search.

Returns

Expand source code

def get_sort_vector(self) -> list[float] | DataAPIVector | None:
    """
    Return the query vector used in the vector (ANN) search that originated
    this cursor, if applicable. If this is not an ANN search, or it was invoked
    without the `include_sort_vector` flag, return None.

    Calling `get_sort_vector` on an IDLE cursor triggers the first page fetch,
    but the cursor stays in the IDLE state until actual consumption starts.

    The method can be invoked on a CLOSED cursor and will return either None
    or the sort vector used in the search.

    Returns:
        the query vector used in the search if this was a
            vector search (otherwise None). The vector is returned either
            as a DataAPIVector or a plain list of number depending on the
            `APIOptions.serdes_options` that apply. The query vector is available
            also for vectorize-based ANN searches.
    """

    self._try_ensure_fill_buffer()
    if self._last_response_status:
        return _ensure_vector(
            self._last_response_status.get("sortVector"),
            self.data_source.api_options.serdes_options,
        )
    else:
        return None

def has_next(self) ‑> bool

Whether the cursor actually has more documents to return.

has_next can be called on any cursor, but on a CLOSED cursor will always return False.

This method can trigger the fetch operation of a new page, if the current buffer is empty.

Calling has_next on an IDLE cursor triggers the first page fetch, but the cursor stays in the IDLE state until actual consumption starts.

Returns

a boolean value of True if there is at least one further item available to consume; False otherwise (including the case of CLOSED cursor).

Expand source code

def has_next(self) -> bool:
    """
    Whether the cursor actually has more documents to return.

    `has_next` can be called on any cursor, but on a CLOSED cursor
    will always return False.

    This method can trigger the fetch operation of a new page, if the current
    buffer is empty.

    Calling `has_next` on an IDLE cursor triggers the first page fetch, but the
    cursor stays in the IDLE state until actual consumption starts.

    Returns:
        a boolean value of True if there is at least one further item
            available to consume; False otherwise (including the case of CLOSED
            cursor).
    """

    if self._state == CursorState.CLOSED:
        return False
    self._try_ensure_fill_buffer()
    return len(self._buffer) > 0

def include_similarity(self, include_similarity: bool | None) ‑> CollectionFindCursor[~TRAW, ~T]

Return a copy of this cursor with a new include_similarity setting. This operation is allowed only if the cursor state is still IDLE and if no mapping has been set on it.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the Collection find method.

Args

include_similarity: a new include_similarity setting to apply to the returned new cursor.

Returns

a new CollectionFindCursor with the same settings as this one, except for include_similarity which is the provided value.

Expand source code

def include_similarity(
    self, include_similarity: bool | None
) -> CollectionFindCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new include_similarity setting.
    This operation is allowed only if the cursor state is still IDLE and if
    no mapping has been set on it.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the Collection `find` method.

    Args:
        include_similarity: a new include_similarity setting to apply
            to the returned new cursor.

    Returns:
        a new CollectionFindCursor with the same settings as this one,
            except for `include_similarity` which is the provided value.
    """

    self._ensure_idle()
    if self._mapper is not None:
        raise CursorException(
            "Cannot set include_similarity after map.",
            cursor_state=self._state.value,
        )
    return self._copy(include_similarity=include_similarity)

def include_sort_vector(self, include_sort_vector: bool | None) ‑> CollectionFindCursor[~TRAW, ~T]

Return a copy of this cursor with a new include_sort_vector setting. This operation is allowed only if the cursor state is still IDLE.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the Collection find method.

Args

include_sort_vector: a new include_sort_vector setting to apply to the returned new cursor.

Returns

a new CollectionFindCursor with the same settings as this one, except for include_sort_vector which is the provided value.

Expand source code

def include_sort_vector(
    self, include_sort_vector: bool | None
) -> CollectionFindCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new include_sort_vector setting.
    This operation is allowed only if the cursor state is still IDLE.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the Collection `find` method.

    Args:
        include_sort_vector: a new include_sort_vector setting to apply
            to the returned new cursor.

    Returns:
        a new CollectionFindCursor with the same settings as this one,
            except for `include_sort_vector` which is the provided value.
    """

    self._ensure_idle()
    return self._copy(include_sort_vector=include_sort_vector)

def initial_page_state(self, initial_page_state: str | UnsetType) ‑> CollectionFindCursor[~TRAW, ~T]

Return a copy of this cursor with a new initial_page_state setting. This operation is allowed only if the cursor state is still IDLE.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the Collection find method.

Args

initial_page_state: a new initial_page_state setting to apply to the returned new cursor. Passing an explicit None raises an error.

Returns

a new CollectionFindCursor with the same settings as this one, except for initial_page_state which is the provided value.

Expand source code

def initial_page_state(
    self, initial_page_state: str | UnsetType
) -> CollectionFindCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new initial_page_state setting.
    This operation is allowed only if the cursor state is still IDLE.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the Collection `find` method.

    Args:
        initial_page_state: a new initial_page_state setting to apply to the
            returned new cursor. Passing an explicit None raises an error.

    Returns:
        a new CollectionFindCursor with the same settings as this one,
            except for `initial_page_state` which is the provided value.
    """

    self._ensure_idle()
    return self._copy(initial_page_state=initial_page_state)

def limit(self, limit: int | None) ‑> CollectionFindCursor[~TRAW, ~T]

Return a copy of this cursor with a new limit setting. This operation is allowed only if the cursor state is still IDLE.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the Collection find method.

Args

limit: a new limit setting to apply to the returned new cursor.

Returns

a new CollectionFindCursor with the same settings as this one, except for limit which is the provided value.

Expand source code

def limit(self, limit: int | None) -> CollectionFindCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new limit setting.
    This operation is allowed only if the cursor state is still IDLE.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the Collection `find` method.

    Args:
        limit: a new limit setting to apply to the returned new cursor.

    Returns:
        a new CollectionFindCursor with the same settings as this one,
            except for `limit` which is the provided value.
    """

    self._ensure_idle()
    return self._copy(limit=limit)

def map(self, mapper: Callable[[T], TNEW]) ‑> CollectionFindCursor[~TRAW, ~TNEW]

Return a copy of this cursor with a mapping function to transform the returned items. Calling this method on a cursor with a mapping already set results in the mapping functions being composed.

This operation is allowed only if the cursor state is still IDLE.

Args

mapper: a function transforming the objects returned by the cursor into something else (i.e. a function T => TNEW).

Returns

a new CollectionFindCursor with a new mapping function on the results, possibly composed with any pre-existing mapping function.

Example

>>> cursor = collection.find(
...     {},
...     projection={"seq": True, "_id": False},
...     limit=2,
... )
>>> for doc in cursor:
...     print(doc)
...
{'seq': 1}
{'seq': 4}
>>> cursor_mapped = collection.find(
...     {},
...     projection={"seq": True, "_id": False},
...     limit=2,
... ).map(lambda doc: doc["seq"])
>>> for value in cursor_mapped:
...     print(value)
...
1
4
>>>
>>> cursor_mapped_twice = collection.find(
...     {},
...     projection={"seq": True, "_id": False},
...     limit=2,
... ).map(lambda doc: doc["seq"]).map(lambda num: "x" * num)
>>> for value in cursor_mapped_twice:
...     print(value)
...
x
xxxx

Expand source code

def map(self, mapper: Callable[[T], TNEW]) -> CollectionFindCursor[TRAW, TNEW]:
    """
    Return a copy of this cursor with a mapping function to transform
    the returned items. Calling this method on a cursor with a mapping
    already set results in the mapping functions being composed.

    This operation is allowed only if the cursor state is still IDLE.

    Args:
        mapper: a function transforming the objects returned by the cursor
            into something else (i.e. a function T => TNEW).

    Returns:
        a new CollectionFindCursor with a new mapping function on the results,
            possibly composed with any pre-existing mapping function.

    Example:
        >>> cursor = collection.find(
        ...     {},
        ...     projection={"seq": True, "_id": False},
        ...     limit=2,
        ... )
        >>> for doc in cursor:
        ...     print(doc)
        ...
        {'seq': 1}
        {'seq': 4}
        >>> cursor_mapped = collection.find(
        ...     {},
        ...     projection={"seq": True, "_id": False},
        ...     limit=2,
        ... ).map(lambda doc: doc["seq"])
        >>> for value in cursor_mapped:
        ...     print(value)
        ...
        1
        4
        >>>
        >>> cursor_mapped_twice = collection.find(
        ...     {},
        ...     projection={"seq": True, "_id": False},
        ...     limit=2,
        ... ).map(lambda doc: doc["seq"]).map(lambda num: "x" * num)
        >>> for value in cursor_mapped_twice:
        ...     print(value)
        ...
        x
        xxxx
    """
    self._ensure_idle()
    if self._query_engine.collection is None:
        raise RuntimeError("Query engine has no collection.")
    composite_mapper: Callable[[TRAW], TNEW]
    if self._mapper is not None:

        def _composite(document: TRAW) -> TNEW:
            return mapper(self._mapper(document))  # type: ignore[misc]

        composite_mapper = _composite
    else:
        composite_mapper = cast(Callable[[TRAW], TNEW], mapper)
    return CollectionFindCursor(
        collection=self._query_engine.collection,
        request_timeout_ms=self._request_timeout_ms,
        overall_timeout_ms=self._overall_timeout_ms,
        request_timeout_label=self._request_timeout_label,
        overall_timeout_label=self._overall_timeout_label,
        filter=self._filter,
        projection=self._projection,
        sort=self._sort,
        limit=self._limit,
        initial_page_state=self._initial_page_state,
        include_similarity=self._include_similarity,
        include_sort_vector=self._include_sort_vector,
        skip=self._skip,
        mapper=composite_mapper,
    )

def project(self, projection: ProjectionType | None) ‑> CollectionFindCursor[~TRAW, ~T]

Return a copy of this cursor with a new projection setting. This operation is allowed only if the cursor state is still IDLE and if no mapping has been set on it.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the Collection find method.

Args

projection: a new projection setting to apply to the returned new cursor.

Returns

a new CollectionFindCursor with the same settings as this one, except for projection which is the provided value.

Expand source code

def project(
    self, projection: ProjectionType | None
) -> CollectionFindCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new projection setting.
    This operation is allowed only if the cursor state is still IDLE and if
    no mapping has been set on it.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the Collection `find` method.

    Args:
        projection: a new projection setting to apply to the returned new cursor.

    Returns:
        a new CollectionFindCursor with the same settings as this one,
            except for `projection` which is the provided value.
    """

    self._ensure_idle()
    if self._mapper is not None:
        raise CursorException(
            "Cannot set projection after map.",
            cursor_state=self._state.value,
        )
    return self._copy(projection=projection)

def skip(self, skip: int | None) ‑> CollectionFindCursor[~TRAW, ~T]

Return a copy of this cursor with a new skip setting. This operation is allowed only if the cursor state is still IDLE.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the Collection find method.

Args

skip: a new skip setting to apply to the returned new cursor.

Returns

a new CollectionFindCursor with the same settings as this one, except for skip which is the provided value.

Expand source code

def skip(self, skip: int | None) -> CollectionFindCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new skip setting.
    This operation is allowed only if the cursor state is still IDLE.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the Collection `find` method.

    Args:
        skip: a new skip setting to apply to the returned new cursor.

    Returns:
        a new CollectionFindCursor with the same settings as this one,
            except for `skip` which is the provided value.
    """

    self._ensure_idle()
    return self._copy(skip=skip)

def sort(self, sort: dict[str, Any] | None) ‑> CollectionFindCursor[~TRAW, ~T]

Return a copy of this cursor with a new sort setting. This operation is allowed only if the cursor state is still IDLE.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the Collection find method.

Args

sort: a new sort setting to apply to the returned new cursor.

Returns

a new CollectionFindCursor with the same settings as this one, except for sort which is the provided value.

Expand source code

def sort(self, sort: dict[str, Any] | None) -> CollectionFindCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new sort setting.
    This operation is allowed only if the cursor state is still IDLE.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the Collection `find` method.

    Args:
        sort: a new sort setting to apply to the returned new cursor.

    Returns:
        a new CollectionFindCursor with the same settings as this one,
            except for `sort` which is the provided value.
    """

    self._ensure_idle()
    return self._copy(sort=sort)

def to_list(self, *, general_method_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> list[~T]

Materialize all documents that remain to be consumed from a cursor into a list.

Calling this method on a CLOSED cursor results in an error.

If the cursor is IDLE, the result will be the whole set of documents returned by the find operation; otherwise, the documents already consumed by the cursor will not be in the resulting list.

Args

general_method_timeout_ms: a timeout, in milliseconds, for the whole duration of this method. If not provided, there is no such timeout. Note that the per-request timeout set on the cursor still applies.
timeout_ms: an alias for general_method_timeout_ms.

Returns

a list of documents (or other values depending on the mapping function, if one is set). These are all items that were left to be consumed on the cursor when to_list is called.

Example

>>> collection.find(
...     {},
...     projection={"seq": True, "_id": False},
...     limit=3,
... ).to_list()
[{'seq': 1}, {'seq': 4}, {'seq': 15}]
>>>
>>> cursor = collection.find(
...     {},
...     projection={"seq": True, "_id": False},
...     limit=5,
... ).map(lambda doc: doc["seq"])
>>>
>>> first_value = cursor.__next__()
>>> cursor.to_list()
[4, 15, 22, 11]

Expand source code

def to_list(
    self,
    *,
    general_method_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> list[T]:
    """
    Materialize all documents that remain to be consumed from a cursor into a list.

    Calling this method on a CLOSED cursor results in an error.

    If the cursor is IDLE, the result will be the whole set of documents returned
    by the `find` operation; otherwise, the documents already consumed by the cursor
    will not be in the resulting list.

    Calling this method is not recommended if a huge list of results is anticipated:
    it would involve a large number of data exchanges with the Data API and possibly
    a massive memory usage to construct the list. In such cases, a lazy pattern
    of iterating and consuming the documents is to be preferred.

    Args:
        general_method_timeout_ms: a timeout, in milliseconds, for the whole
            duration of this method. If not provided, there is no such timeout.
            Note that the per-request timeout set on the cursor still applies.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        a list of documents (or other values depending on the mapping
            function, if one is set). These are all items that were left
            to be consumed on the cursor when `to_list` is called.

    Example:
        >>> collection.find(
        ...     {},
        ...     projection={"seq": True, "_id": False},
        ...     limit=3,
        ... ).to_list()
        [{'seq': 1}, {'seq': 4}, {'seq': 15}]
        >>>
        >>> cursor = collection.find(
        ...     {},
        ...     projection={"seq": True, "_id": False},
        ...     limit=5,
        ... ).map(lambda doc: doc["seq"])
        >>>
        >>> first_value = cursor.__next__()
        >>> cursor.to_list()
        [4, 15, 22, 11]
    """

    self._ensure_alive()
    copy_req_ms, copy_ovr_ms = _revise_timeouts_for_cursor_copy(
        new_general_method_timeout_ms=general_method_timeout_ms,
        new_timeout_ms=timeout_ms,
        old_request_timeout_ms=self._request_timeout_ms,
    )
    _cursor = self._copy(
        request_timeout_ms=copy_req_ms,
        overall_timeout_ms=copy_ovr_ms,
    )
    self._imprint_internal_state(_cursor)
    documents = [document for document in _cursor]
    _cursor._imprint_internal_state(self)
    return documents

Inherited members

AbstractCursor:
- buffered_count
- close
- consume_buffer
- consumed
- cursor_id
- rewind
- state

class CursorState (*args, **kwds)

This enum expresses the possible states for a Cursor.

Values

IDLE: Iteration over results has not started yet (alive=T, started=F) STARTED: Iteration has started, can still yield results (alive=T, started=T) CLOSED: Finished/forcibly stopped. Won't return more documents (alive=F)

Expand source code

class CursorState(Enum):
    """
    This enum expresses the possible states for a `Cursor`.

    Values:
        IDLE: Iteration over results has not started yet (alive=T, started=F)
        STARTED: Iteration has started, *can* still yield results (alive=T, started=T)
        CLOSED: Finished/forcibly stopped. Won't return more documents (alive=F)
    """

    # Iteration over results has not started yet (alive=T, started=F)
    IDLE = "idle"
    # Iteration has started, *can* still yield results (alive=T, started=T)
    STARTED = "started"
    # Finished/forcibly stopped. Won't return more documents (alive=F)
    CLOSED = "closed"

Ancestors

enum.Enum

Class variables

var CLOSED
var IDLE
var STARTED

class FindAndRerankPage (results: list[TRAW], next_page_state: str | None, sort_vector: list[float] | DataAPIVector | None)

A whole pageful of results from a findAndRerank operation. This object represents the form taken by the returned items when using the pagination interface explicitly.

Attributes

results: the list of entries obtained on the retrieved page (possibly after applying a mapping function, if one is specified in the cursor). In absence of mapping functions, this is a list of RerankedResult objects.
next_page_state: a string encoding the pagination state. If the find-and-rerank operation does not admit any further page, this is returned as None. Otherwise, its value can be used to resume consuming the find_and_rerank results on another cursor instantiated independently later on.
sort_vector: if the find-and-rerank operation was done with the "include sort vector" flag set to True, and the sort criterion is a vector sorting, this contains the query vector used for the search. The query vector is expressed as a list of floats or a DataAPIVector depending on the serdes settings for the collection/table that originated the cursor. If not applicable, this attribute is returned as None.

Expand source code

@dataclass
class FindAndRerankPage(Generic[TRAW]):
    """
    A whole pageful of results from a findAndRerank operation. This object represents
    the form taken by the returned items when using the pagination interface
    explicitly.

    Attributes:
        results: the list of entries obtained on the retrieved page (possibly
            after applying a mapping function, if one is specified in the cursor).
            In absence of mapping functions, this is a list of RerankedResult objects.
        next_page_state: a string encoding the pagination state. If the find-and-rerank
            operation does not admit any further page, this is returned as None.
            Otherwise, its value can be used to resume consuming the `find_and_rerank`
            results on another cursor instantiated independently later on.
        sort_vector: if the find-and-rerank operation was done with the "include
            sort vector" flag set to True, and the sort criterion is a vector sorting,
            this contains the query vector used for the search. The query vector is
            expressed as a list of floats or a DataAPIVector depending on the serdes
            settings for the collection/table that originated the cursor.
            If not applicable, this attribute is returned as None.
    """

    results: list[TRAW]
    next_page_state: str | None
    sort_vector: list[float] | DataAPIVector | None

    def __repr__(self) -> str:
        pieces = [
            pc
            for pc in (
                f"results=<{len(self.results)} entries>",
                "next_page_state=..." if self.next_page_state else None,
                "sort_vector=..." if self.sort_vector else None,
            )
            if pc is not None
        ]
        return f"{self.__class__.__name__}({', '.join(pieces)})"

Ancestors

typing.Generic

Class variables

var next_page_state : str | None
var results : list[~TRAW]
var sort_vector : list[float] | DataAPIVector | None

class FindPage (results: list[TRAW], next_page_state: str | None, sort_vector: list[float] | DataAPIVector | None)

A whole pageful of results from a find operation. This object represents the form taken by the returned items when using the pagination interface explicitly.

Attributes

results: the list of entries obtained on the retrieved page (possibly after applying a mapping function, if one is specified in the cursor).
next_page_state: a string encoding the pagination state. If the find operation does not admit any further page, this is returned as None. Otherwise, its value can be used to resume consuming the find results on another cursor instantiated independently later on.
sort_vector: if the find operation was done with the "include sort vector" flag set to True, and the sort criterion is a vector sorting, this contains the query vector used for the search. The query vector is expressed as a list of floats or a DataAPIVector depending on the serdes settings for the collection/table that originated the cursor. If not applicable, this attribute is returned as None.

Expand source code

@dataclass
class FindPage(Generic[TRAW]):
    """
    A whole pageful of results from a find operation. This object represents
    the form taken by the returned items when using the pagination interface
    explicitly.

    Attributes:
        results: the list of entries obtained on the retrieved page (possibly
            after applying a mapping function, if one is specified in the cursor).
        next_page_state: a string encoding the pagination state. If the find
            operation does not admit any further page, this is returned as None.
            Otherwise, its value can be used to resume consuming the `find`
            results on another cursor instantiated independently later on.
        sort_vector: if the find operation was done with the "include
            sort vector" flag set to True, and the sort criterion is a vector sorting,
            this contains the query vector used for the search. The query vector is
            expressed as a list of floats or a DataAPIVector depending on the serdes
            settings for the collection/table that originated the cursor.
            If not applicable, this attribute is returned as None.
    """

    results: list[TRAW]
    next_page_state: str | None
    sort_vector: list[float] | DataAPIVector | None

    def __repr__(self) -> str:
        pieces = [
            pc
            for pc in (
                f"results=<{len(self.results)} entries>",
                "next_page_state=..." if self.next_page_state else None,
                "sort_vector=..." if self.sort_vector else None,
            )
            if pc is not None
        ]
        return f"{self.__class__.__name__}({', '.join(pieces)})"

Ancestors

typing.Generic

Class variables

var next_page_state : str | None
var results : list[~TRAW]
var sort_vector : list[float] | DataAPIVector | None

class RerankedResult (document: TRAW, scores: dict[str, float | int | None])

A single result coming find_and_rerank command, i.e. an item from DB with scores.

Attributes

document: a document/row as returned by find_and_rerank API command.
scores: a dictionary of score labels to score float values, such as {"$rerank": 0.87, "$vector" : 0.65, "$lexical" : 0.91}.

Expand source code

@dataclass
class RerankedResult(Generic[TRAW]):
    """
    A single result coming `find_and_rerank` command, i.e. an item from DB with scores.

    Attributes:
        document: a document/row as returned by `find_and_rerank` API command.
        scores: a dictionary of score labels to score float values, such as
            `{"$rerank": 0.87, "$vector" : 0.65, "$lexical" : 0.91}`.
    """

    document: TRAW
    scores: dict[str, float | int | None]

Ancestors

typing.Generic

Class variables

var document : ~TRAW
var scores : dict[str, float | int | None]

A synchronous cursor over rows, as returned by a find invocation on a Table. A cursor can be iterated over, materialized into a list, and queried/manipulated in various ways.

Some cursor operations mutate it in-place (such as consuming its rows), other return a new cursor without changing the original one. See the documentation for the various methods and the Table find method for more details and usage patterns.

Example

>>> cursor = my_table.find(
...     {"match_id": "challenge6"},
...     projection={"winner": True},
...     limit=5,
... )
>>> for row in cursor:
...     print(row)
...
{'winner': 'Donna'}
{'winner': 'Erick'}
{'winner': 'Fiona'}
{'winner': 'Georg'}
{'winner': 'Helen'}

Expand source code

class TableFindCursor(Generic[TRAW, T], AbstractCursor[TRAW]):
    """
    A synchronous cursor over rows, as returned by a `find` invocation on
    a Table. A cursor can be iterated over, materialized into a list,
    and queried/manipulated in various ways.

    Some cursor operations mutate it in-place (such as consuming its rows),
    other return a new cursor without changing the original one. See the documentation
    for the various methods and the Table `find` method for more details
    and usage patterns.

    A cursor has two type parameters: TRAW and T. The first is the type of the "raw"
    rows as they are obtained from the Data API, the second is the type of the
    items after the optional mapping function (see the `.map()` method). If there is
    no mapping, TRAW = T. In general, consuming a cursor returns items of type T,
    except for the `consume_buffer` primitive that draws directly from the buffer
    and always returns items of type TRAW.

    Example:
        >>> cursor = my_table.find(
        ...     {"match_id": "challenge6"},
        ...     projection={"winner": True},
        ...     limit=5,
        ... )
        >>> for row in cursor:
        ...     print(row)
        ...
        {'winner': 'Donna'}
        {'winner': 'Erick'}
        {'winner': 'Fiona'}
        {'winner': 'Georg'}
        {'winner': 'Helen'}
    """

    _query_engine: _TableFindQueryEngine[TRAW]
    _request_timeout_ms: int | None
    _overall_timeout_ms: int | None
    _request_timeout_label: str | None
    _overall_timeout_label: str | None
    _timeout_manager: MultiCallTimeoutManager
    _filter: FilterType | None
    _projection: ProjectionType | None
    _sort: dict[str, Any] | None
    _limit: int | None
    _initial_page_state: str | UnsetType
    _include_similarity: bool | None
    _include_sort_vector: bool | None
    _skip: int | None
    _mapper: Callable[[TRAW], T] | None

    def __init__(
        self,
        *,
        table: Table[TRAW],
        request_timeout_ms: int | None,
        overall_timeout_ms: int | None,
        request_timeout_label: str | None = None,
        overall_timeout_label: str | None = None,
        filter: FilterType | None = None,
        projection: ProjectionType | None = None,
        sort: dict[str, Any] | None = None,
        limit: int | None = None,
        initial_page_state: str | UnsetType = _UNSET,
        include_similarity: bool | None = None,
        include_sort_vector: bool | None = None,
        skip: int | None = None,
        mapper: Callable[[TRAW], T] | None = None,
    ) -> None:
        self._filter = deepcopy(filter)
        self._projection = projection
        self._sort = deepcopy(sort)
        self._limit = limit
        self._initial_page_state = initial_page_state
        self._include_similarity = include_similarity
        self._include_sort_vector = include_sort_vector
        self._skip = skip
        self._mapper = mapper
        self._request_timeout_ms = request_timeout_ms
        self._overall_timeout_ms = overall_timeout_ms
        self._request_timeout_label = request_timeout_label
        self._overall_timeout_label = overall_timeout_label
        self._query_engine = _TableFindQueryEngine(
            table=table,
            async_table=None,
            filter=self._filter,
            projection=self._projection,
            sort=self._sort,
            limit=self._limit,
            include_similarity=self._include_similarity,
            include_sort_vector=self._include_sort_vector,
            skip=self._skip,
        )
        AbstractCursor.__init__(self, initial_page_state=initial_page_state)
        self._timeout_manager = MultiCallTimeoutManager(
            overall_timeout_ms=self._overall_timeout_ms,
            timeout_label=self._overall_timeout_label,
        )

    def _copy(
        self: TableFindCursor[TRAW, T],
        *,
        request_timeout_ms: int | None | UnsetType = _UNSET,
        overall_timeout_ms: int | None | UnsetType = _UNSET,
        request_timeout_label: str | None | UnsetType = _UNSET,
        overall_timeout_label: str | None | UnsetType = _UNSET,
        filter: FilterType | None | UnsetType = _UNSET,
        projection: ProjectionType | None | UnsetType = _UNSET,
        sort: dict[str, Any] | None | UnsetType = _UNSET,
        limit: int | None | UnsetType = _UNSET,
        initial_page_state: str | None | UnsetType = _UNSET,
        include_similarity: bool | None | UnsetType = _UNSET,
        include_sort_vector: bool | None | UnsetType = _UNSET,
        skip: int | None | UnsetType = _UNSET,
    ) -> TableFindCursor[TRAW, T]:
        if self._query_engine.table is None:
            raise RuntimeError("Query engine has no table.")
        return TableFindCursor(
            table=self._query_engine.table,
            request_timeout_ms=self._request_timeout_ms
            if isinstance(request_timeout_ms, UnsetType)
            else request_timeout_ms,
            overall_timeout_ms=self._overall_timeout_ms
            if isinstance(overall_timeout_ms, UnsetType)
            else overall_timeout_ms,
            request_timeout_label=self._request_timeout_label
            if isinstance(request_timeout_label, UnsetType)
            else request_timeout_label,
            overall_timeout_label=self._overall_timeout_label
            if isinstance(overall_timeout_label, UnsetType)
            else overall_timeout_label,
            filter=self._filter if isinstance(filter, UnsetType) else filter,
            projection=self._projection
            if isinstance(projection, UnsetType)
            else projection,
            sort=self._sort if isinstance(sort, UnsetType) else sort,
            limit=self._limit if isinstance(limit, UnsetType) else limit,
            # special treatment: passing None erases (hence we must supply unset and not None):
            initial_page_state=self._initial_page_state
            if isinstance(initial_page_state, UnsetType)
            else (initial_page_state if initial_page_state is not None else _UNSET),
            include_similarity=self._include_similarity
            if isinstance(include_similarity, UnsetType)
            else include_similarity,
            include_sort_vector=self._include_sort_vector
            if isinstance(include_sort_vector, UnsetType)
            else include_sort_vector,
            skip=self._skip if isinstance(skip, UnsetType) else skip,
            mapper=self._mapper,
        )

    def _try_ensure_fill_buffer(self) -> None:
        """
        If buffer is empty, try to fill with next page, if applicable.
        If not possible, silently do nothing.
        This method never changes the cursor state.
        """

        if self._state == CursorState.CLOSED:
            return
        if not self._buffer:
            if self._next_page_state is not None or self._state == CursorState.IDLE:
                new_buffer, next_page_state, resp_status = (
                    self._query_engine._fetch_page(
                        page_state=self._next_page_state,
                        timeout_context=self._timeout_manager.remaining_timeout(
                            cap_time_ms=self._request_timeout_ms,
                            cap_timeout_label=self._request_timeout_label,
                        ),
                    )
                )
                self._next_page_state = next_page_state
                self._last_response_status = resp_status
                self._pages_retrieved += 1
                self._buffer = new_buffer

    def __repr__(self) -> str:
        return (
            f'{self.__class__.__name__}("{self.data_source.name}", '
            f"{self._state.value}, "
            f"consumed so far: {self.consumed})"
        )

    def __iter__(self: TableFindCursor[TRAW, T]) -> TableFindCursor[TRAW, T]:
        self._ensure_alive()
        return self

    def __next__(self) -> T:
        if self._state == CursorState.CLOSED:
            raise StopIteration
        self._try_ensure_fill_buffer()
        if not self._buffer:
            self._state = CursorState.CLOSED
            raise StopIteration
        self._state = CursorState.STARTED
        # consume one item from buffer
        traw0, rest_buffer = self._buffer[0], self._buffer[1:]
        self._buffer = rest_buffer
        self._consumed += 1
        return cast(T, self._mapper(traw0) if self._mapper is not None else traw0)

    @property
    def data_source(self) -> Table[TRAW]:
        """
        The Table object that originated this cursor through a `find` operation.

        Returns:
            a Table instance.
        """

        if self._query_engine.table is None:
            raise RuntimeError("Query engine has no table.")
        return self._query_engine.table

    def clone(self) -> TableFindCursor[TRAW, T]:
        """
        Create a copy of this cursor with:
        - the same parameters (timeouts, filter, projection, etc)
        - and the cursor is rewound to its pristine IDLE state.

        Returns:
            a new TableFindCursor, similar to this one but
            rewound to its initial state.

        Example:
            >>> cursor = my_table.find(
            ...     {"match_id": "challenge6"},
            ...     projection={"winner": True},
            ...     limit=2,
            ... ).map(lambda row: row["winner"])
            >>> for value in cursor:
            ...     print(value)
            ...
            Donna
            Erick
            >>> cloned_cursor = cursor.clone()
            >>> for row in cloned_cursor:
            ...     print(row)
            ...
            Donna
            Erick
        """

        if self._query_engine.table is None:
            raise RuntimeError("Query engine has no table.")
        return TableFindCursor(
            table=self._query_engine.table,
            request_timeout_ms=self._request_timeout_ms,
            overall_timeout_ms=self._overall_timeout_ms,
            request_timeout_label=self._request_timeout_label,
            overall_timeout_label=self._overall_timeout_label,
            filter=self._filter,
            projection=self._projection,
            sort=self._sort,
            limit=self._limit,
            initial_page_state=self._initial_page_state,
            include_similarity=self._include_similarity,
            include_sort_vector=self._include_sort_vector,
            skip=self._skip,
            mapper=self._mapper,
        )

    def filter(self, filter: FilterType | None) -> TableFindCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new filter setting.
        This operation is allowed only if the cursor state is still IDLE.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the Table `find` method.

        Args:
            filter: a new filter setting to apply to the returned new cursor.

        Returns:
            a new TableFindCursor with the same settings as this one,
                except for `filter` which is the provided value.
        """

        self._ensure_idle()
        return self._copy(filter=filter)

    def project(self, projection: ProjectionType | None) -> TableFindCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new projection setting.
        This operation is allowed only if the cursor state is still IDLE and if
        no mapping has been set on it.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the Table `find` method.

        Args:
            projection: a new projection setting to apply to the returned new cursor.

        Returns:
            a new TableFindCursor with the same settings as this one,
                except for `projection` which is the provided value.
        """

        self._ensure_idle()
        if self._mapper is not None:
            raise CursorException(
                "Cannot set projection after map.",
                cursor_state=self._state.value,
            )
        return self._copy(projection=projection)

    def sort(self, sort: dict[str, Any] | None) -> TableFindCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new sort setting.
        This operation is allowed only if the cursor state is still IDLE.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the Table `find` method.

        Args:
            sort: a new sort setting to apply to the returned new cursor.

        Returns:
            a new TableFindCursor with the same settings as this one,
                except for `sort` which is the provided value.
        """

        self._ensure_idle()
        return self._copy(sort=sort)

    def limit(self, limit: int | None) -> TableFindCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new limit setting.
        This operation is allowed only if the cursor state is still IDLE.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the Table `find` method.

        Args:
            limit: a new limit setting to apply to the returned new cursor.

        Returns:
            a new TableFindCursor with the same settings as this one,
                except for `limit` which is the provided value.
        """

        self._ensure_idle()
        return self._copy(limit=limit)

    def initial_page_state(
        self, initial_page_state: str | UnsetType
    ) -> TableFindCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new initial_page_state setting.
        This operation is allowed only if the cursor state is still IDLE.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the Collection `find` method.

        Args:
            initial_page_state: a new initial_page_state setting to apply to the
                returned new cursor. Passing an explicit None raises an error.

        Returns:
            a new TableFindCursor with the same settings as this one,
                except for `initial_page_state` which is the provided value.
        """

        self._ensure_idle()
        return self._copy(initial_page_state=initial_page_state)

    def include_similarity(
        self, include_similarity: bool | None
    ) -> TableFindCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new include_similarity setting.
        This operation is allowed only if the cursor state is still IDLE and if
        no mapping has been set on it.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the Table `find` method.

        Args:
            include_similarity: a new include_similarity setting to apply
                to the returned new cursor.

        Returns:
            a new TableFindCursor with the same settings as this one,
                except for `include_similarity` which is the provided value.
        """

        self._ensure_idle()
        if self._mapper is not None:
            raise CursorException(
                "Cannot set include_similarity after map.",
                cursor_state=self._state.value,
            )
        return self._copy(include_similarity=include_similarity)

    def include_sort_vector(
        self, include_sort_vector: bool | None
    ) -> TableFindCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new include_sort_vector setting.
        This operation is allowed only if the cursor state is still IDLE.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the Table `find` method.

        Args:
            include_sort_vector: a new include_sort_vector setting to apply
                to the returned new cursor.

        Returns:
            a new TableFindCursor with the same settings as this one,
                except for `include_sort_vector` which is the provided value.
        """

        self._ensure_idle()
        return self._copy(include_sort_vector=include_sort_vector)

    def skip(self, skip: int | None) -> TableFindCursor[TRAW, T]:
        """
        Return a copy of this cursor with a new skip setting.
        This operation is allowed only if the cursor state is still IDLE.

        Instead of explicitly invoking this method, the typical usage consists
        in passing arguments to the Table `find` method.

        Args:
            skip: a new skip setting to apply to the returned new cursor.

        Returns:
            a new TableFindCursor with the same settings as this one,
                except for `skip` which is the provided value.
        """

        self._ensure_idle()
        return self._copy(skip=skip)

    def map(self, mapper: Callable[[T], TNEW]) -> TableFindCursor[TRAW, TNEW]:
        """
        Return a copy of this cursor with a mapping function to transform
        the returned items. Calling this method on a cursor with a mapping
        already set results in the mapping functions being composed.

        This operation is allowed only if the cursor state is still IDLE.

        Args:
            mapper: a function transforming the objects returned by the cursor
                into something else (i.e. a function T => TNEW).

        Returns:
            a new TableFindCursor with a new mapping function on the results,
                possibly composed with any pre-existing mapping function.

        Example:
            >>> cursor = my_table.find(
            ...     {"match_id": "challenge6"},
            ...     projection={"winner": True},
            ...     limit=2,
            ... )
            >>> for row in cursor:
            ...     print(row)
            ...
            {'winner': 'Donna'}
            {'winner': 'Erick'}
            >>> cursor_mapped = my_table.find(
            ...     {"match_id": "challenge6"},
            ...     projection={"winner": True},
            ...     limit=2,
            ... ).map(lambda row: row["winner"])
            >>> for value in cursor_mapped:
            ...     print(value)
            ...
            Donna
            Erick
            >>> cursor_mapped_twice = my_table.find(
            ...     {"match_id": "challenge6"},
            ...     projection={"winner": True},
            ...     limit=2,
            ... ).map(lambda row: row["winner"]).map(lambda w: w.upper())
            >>> for value in cursor_mapped_twice:
            ...     print(value)
            ...
            DONNA
            ERICK
        """

        self._ensure_idle()
        if self._query_engine.table is None:
            raise RuntimeError("Query engine has no table.")
        composite_mapper: Callable[[TRAW], TNEW]
        if self._mapper is not None:

            def _composite(document: TRAW) -> TNEW:
                return mapper(self._mapper(document))  # type: ignore[misc]

            composite_mapper = _composite
        else:
            composite_mapper = cast(Callable[[TRAW], TNEW], mapper)
        return TableFindCursor(
            table=self._query_engine.table,
            request_timeout_ms=self._request_timeout_ms,
            overall_timeout_ms=self._overall_timeout_ms,
            request_timeout_label=self._request_timeout_label,
            overall_timeout_label=self._overall_timeout_label,
            filter=self._filter,
            projection=self._projection,
            sort=self._sort,
            limit=self._limit,
            initial_page_state=self._initial_page_state,
            include_similarity=self._include_similarity,
            include_sort_vector=self._include_sort_vector,
            skip=self._skip,
            mapper=composite_mapper,
        )

    def for_each(
        self,
        function: Callable[[T], bool | None],
        *,
        general_method_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> None:
        """
        Consume the remaining rows in the cursor, invoking a provided callback
        function on each of them.

        Calling this method on a CLOSED cursor results in an error.

        The callback function can return any value. The return value is generally
        discarded, with the following exception: if the function returns the boolean
        `False`, it is taken to signify that the method should quit early, leaving the
        cursor half-consumed (ACTIVE state). If this does not occur, this method
        results in the cursor entering CLOSED state once it is exhausted.

        Args:
            function: a callback function whose only parameter is of the type returned
                by the cursor. This callback is invoked once per each row yielded
                by the cursor. If the callback returns a `False`, the `for_each`
                invocation stops early and returns without consuming further rows.
            general_method_timeout_ms: a timeout, in milliseconds, for the whole
                duration of this method. If not provided, there is no such timeout.
                Note that the per-request timeout set on the cursor still applies.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Example:
            >>> cursor = my_table.find(
            ...     {"match_id": "challenge6"},
            ...     projection={"winner": True},
            ...     limit=3,
            ... )
            >>> def printer(row):
            ...     print(f"-> {row['winner']}")
            ...
            >>> cursor.for_each(printer)
            -> Donna
            -> Erick
            -> Fiona
            >>>
            >>> if cursor.state != CursorState.CLOSED:
            ...     print(f"alive: {list(cursor)}")
            ... else:
            ...     print("(closed)")
            ...
            (closed)
            >>> cursor2 = my_table.find(
            ...     {"match_id": "challenge6"},
            ...     projection={"winner": True},
            ...     limit=3,
            ... )
            >>> def checker(row):
            ...     print(f"-> {row['winner']}")
            ...     return row["winner"] != "Erick"
            ...
            >>> cursor2.for_each(checker)
            -> Donna
            -> Erick
            >>>
            >>> if cursor2.state != CursorState.CLOSED:
            ...     print(f"alive: {list(cursor2)}")
            ... else:
            ...     print("(closed)")
            ...
            alive: [{'winner': 'Fiona'}]
        """

        self._ensure_alive()
        copy_req_ms, copy_ovr_ms = _revise_timeouts_for_cursor_copy(
            new_general_method_timeout_ms=general_method_timeout_ms,
            new_timeout_ms=timeout_ms,
            old_request_timeout_ms=self._request_timeout_ms,
        )
        _cursor = self._copy(
            request_timeout_ms=copy_req_ms,
            overall_timeout_ms=copy_ovr_ms,
        )
        self._imprint_internal_state(_cursor)
        for row in _cursor:
            res = function(row)
            if res is False:
                break
        _cursor._imprint_internal_state(self)

    def to_list(
        self,
        *,
        general_method_timeout_ms: int | None = None,
        timeout_ms: int | None = None,
    ) -> list[T]:
        """
        Materialize all rows that remain to be consumed from a cursor into a list.

        Calling this method on a CLOSED cursor results in an error.

        If the cursor is IDLE, the result will be the whole set of rows returned
        by the `find` operation; otherwise, the rows already consumed by the cursor
        will not be in the resulting list.

        Calling this method is not recommended if a huge list of results is anticipated:
        it would involve a large number of data exchanges with the Data API and possibly
        a massive memory usage to construct the list. In such cases, a lazy pattern
        of iterating and consuming the rows is to be preferred.

        Args:
            general_method_timeout_ms: a timeout, in milliseconds, for the whole
                duration of this method. If not provided, there is no such timeout.
                Note that the per-request timeout set on the cursor still applies.
            timeout_ms: an alias for `general_method_timeout_ms`.

        Returns:
            list: a list of rows (or other values depending on the mapping
                function, if one is set). These are all items that were left
                to be consumed on the cursor when `to_list` is called.

        Example:
            >>> my_table.find(
            ...     {"match_id": "challenge6"},
            ...     projection={"winner": True},
            ...     limit=3,
            ... ).to_list()
            [{'winner': 'Donna'}, {'winner': 'Erick'}, {'winner': 'Fiona'}]
            >>>
            >>> cursor = my_table.find(
            ...     {"match_id": "challenge6"},
            ...     projection={"winner": True},
            ...     limit=5,
            ... ).map(lambda doc: doc["winner"])
            >>>
            >>> first_value = cursor.__next__()
            >>> cursor.to_list()
            ['Erick', 'Fiona', 'Georg', 'Helen']
        """

        self._ensure_alive()
        copy_req_ms, copy_ovr_ms = _revise_timeouts_for_cursor_copy(
            new_general_method_timeout_ms=general_method_timeout_ms,
            new_timeout_ms=timeout_ms,
            old_request_timeout_ms=self._request_timeout_ms,
        )
        _cursor = self._copy(
            request_timeout_ms=copy_req_ms,
            overall_timeout_ms=copy_ovr_ms,
        )
        self._imprint_internal_state(_cursor)
        documents = [document for document in _cursor]
        _cursor._imprint_internal_state(self)
        return documents

    def has_next(self) -> bool:
        """
        Whether the cursor actually has more documents to return.

        `has_next` can be called on any cursor, but on a CLOSED cursor
        will always return False.

        This method can trigger the fetch operation of a new page, if the current
        buffer is empty.

        Calling `has_next` on an IDLE cursor triggers the first page fetch, but the
        cursor stays in the IDLE state until actual consumption starts.

        Returns:
            a boolean value of True if there is at least one further item
                available to consume; False otherwise (including the case of CLOSED
                cursor).
        """

        if self._state == CursorState.CLOSED:
            return False
        self._try_ensure_fill_buffer()
        return len(self._buffer) > 0

    def get_sort_vector(self) -> list[float] | DataAPIVector | None:
        """
        Return the query vector used in the vector (ANN) search that originated
        this cursor, if applicable. If this is not an ANN search, or it was invoked
        without the `include_sort_vector` flag, return None.

        Calling `get_sort_vector` on an IDLE cursor triggers the first page fetch,
        but the cursor stays in the IDLE state until actual consumption starts.

        The method can be invoked on a CLOSED cursor and will return either None
        or the sort vector used in the search.

        Returns:
            the query vector used in the search if this was a
                vector search (otherwise None). The vector is returned either
                as a DataAPIVector or a plain list of number depending on the
                `APIOptions.serdes_options` that apply. The query vector is available
                also for vectorize-based ANN searches.
        """

        self._try_ensure_fill_buffer()
        if self._last_response_status:
            return _ensure_vector(
                self._last_response_status.get("sortVector"),
                self.data_source.api_options.serdes_options,
            )
        else:
            return None

    def fetch_next_page(self) -> FindPage[T]:
        """
        Retrieve a single, whole page of results from the Data API and return it
        at once, together with associated "out-of-band" information.

        This method is meant to be the way a cursor is consumed when the caller
        needs to explicitly operate on a page-by-page basis, and is to be paired
        with creation of cursor objects 'set to start from a certain page' via the
        `initial_page_state` constructor parameter/builder method.
        In this case, the supplied initial page state typically comes from having
        consumed a previous page, for the same find operation: the page state, a string,
        is found within the `FindPage` object returned by this method.

        Returns:
            a `FindPage` object expressing the full Data API response, including
            the resulting rows (after applying the cursor mapping function,
            if one is defined), as well as the state to use to query for the next
            page (a string) and the sort vector if requested and applicable.

        Example:
            >>> # A cursor to get the first page:
            >>> cursor0 = table.find({})
            >>> page0 = cursor0.fetch_next_page()
            >>> page0
            FindPage(results=<20 entries>, next_page_state=...)
            >>> page0.results[0]
            {'id': 'row_31', 'value': 31}
            >>> page0.next_page_state
            'BXJvd18zAPB////rAA=='
            >>> # Get the next page through a new cursor:
            >>> cursor1 = table.find(
            ...     {},
            ...     initial_page_state=page0.next_page_state,
            ... )
            >>> page1 = cursor1.fetch_next_page()
            >>> page1.results[0]
            {'id': 'row_25', 'value': 25}
            >>> page1.next_page_state
            'BnJvd18zOQDwf///1wA='
            >>>
            >>> # (...)
            >>> # Eventually there's nothing more to retrieve:
            >>> page_N.next_page_state is None
            True
        """

        self._ensure_alive()
        if self._buffer:
            msg = "Paginated retrieval cannot be mixed with regular cursor iteration."
            raise CursorException(
                text=msg,
                cursor_state=self._state.value,
            )

        self._try_ensure_fill_buffer()

        _buffer_count = len(self._buffer)
        _tr_next_ps = self._next_page_state
        _tr_results = [document for _, document in zip(range(_buffer_count), self)]
        _tr_sort_vector: list[float] | DataAPIVector | None
        if self._last_response_status:
            _tr_sort_vector = _ensure_vector(
                self._last_response_status.get("sortVector"),
                self.data_source.api_options.serdes_options,
            )
        else:
            _tr_sort_vector = None

        return FindPage(
            results=_tr_results,
            next_page_state=_tr_next_ps,
            sort_vector=_tr_sort_vector,
        )

Ancestors

AbstractCursor
abc.ABC
typing.Generic

Instance variables

var data_source : Table[~TRAW]

The Table object that originated this cursor through a find operation.

Returns

a Table instance.

Expand source code

@property
def data_source(self) -> Table[TRAW]:
    """
    The Table object that originated this cursor through a `find` operation.

    Returns:
        a Table instance.
    """

    if self._query_engine.table is None:
        raise RuntimeError("Query engine has no table.")
    return self._query_engine.table

Methods

def clone(self) ‑> TableFindCursor[~TRAW, ~T]

Create a copy of this cursor with: - the same parameters (timeouts, filter, projection, etc) - and the cursor is rewound to its pristine IDLE state.

Returns

a new TableFindCursor, similar to this one but rewound to its initial state.

Example

>>> cursor = my_table.find(
...     {"match_id": "challenge6"},
...     projection={"winner": True},
...     limit=2,
... ).map(lambda row: row["winner"])
>>> for value in cursor:
...     print(value)
...
Donna
Erick
>>> cloned_cursor = cursor.clone()
>>> for row in cloned_cursor:
...     print(row)
...
Donna
Erick

Expand source code

def clone(self) -> TableFindCursor[TRAW, T]:
    """
    Create a copy of this cursor with:
    - the same parameters (timeouts, filter, projection, etc)
    - and the cursor is rewound to its pristine IDLE state.

    Returns:
        a new TableFindCursor, similar to this one but
        rewound to its initial state.

    Example:
        >>> cursor = my_table.find(
        ...     {"match_id": "challenge6"},
        ...     projection={"winner": True},
        ...     limit=2,
        ... ).map(lambda row: row["winner"])
        >>> for value in cursor:
        ...     print(value)
        ...
        Donna
        Erick
        >>> cloned_cursor = cursor.clone()
        >>> for row in cloned_cursor:
        ...     print(row)
        ...
        Donna
        Erick
    """

    if self._query_engine.table is None:
        raise RuntimeError("Query engine has no table.")
    return TableFindCursor(
        table=self._query_engine.table,
        request_timeout_ms=self._request_timeout_ms,
        overall_timeout_ms=self._overall_timeout_ms,
        request_timeout_label=self._request_timeout_label,
        overall_timeout_label=self._overall_timeout_label,
        filter=self._filter,
        projection=self._projection,
        sort=self._sort,
        limit=self._limit,
        initial_page_state=self._initial_page_state,
        include_similarity=self._include_similarity,
        include_sort_vector=self._include_sort_vector,
        skip=self._skip,
        mapper=self._mapper,
    )

def fetch_next_page(self) ‑> FindPage[~T]

Retrieve a single, whole page of results from the Data API and return it at once, together with associated "out-of-band" information.

Returns

Example

>>> # A cursor to get the first page:
>>> cursor0 = table.find({})
>>> page0 = cursor0.fetch_next_page()
>>> page0
FindPage(results=<20 entries>, next_page_state=...)
>>> page0.results[0]
{'id': 'row_31', 'value': 31}
>>> page0.next_page_state
'BXJvd18zAPB////rAA=='
>>> # Get the next page through a new cursor:
>>> cursor1 = table.find(
...     {},
...     initial_page_state=page0.next_page_state,
... )
>>> page1 = cursor1.fetch_next_page()
>>> page1.results[0]
{'id': 'row_25', 'value': 25}
>>> page1.next_page_state
'BnJvd18zOQDwf///1wA='
>>>
>>> # (...)
>>> # Eventually there's nothing more to retrieve:
>>> page_N.next_page_state is None
True

Expand source code

def fetch_next_page(self) -> FindPage[T]:
    """
    Retrieve a single, whole page of results from the Data API and return it
    at once, together with associated "out-of-band" information.

    This method is meant to be the way a cursor is consumed when the caller
    needs to explicitly operate on a page-by-page basis, and is to be paired
    with creation of cursor objects 'set to start from a certain page' via the
    `initial_page_state` constructor parameter/builder method.
    In this case, the supplied initial page state typically comes from having
    consumed a previous page, for the same find operation: the page state, a string,
    is found within the `FindPage` object returned by this method.

    Returns:
        a `FindPage` object expressing the full Data API response, including
        the resulting rows (after applying the cursor mapping function,
        if one is defined), as well as the state to use to query for the next
        page (a string) and the sort vector if requested and applicable.

    Example:
        >>> # A cursor to get the first page:
        >>> cursor0 = table.find({})
        >>> page0 = cursor0.fetch_next_page()
        >>> page0
        FindPage(results=<20 entries>, next_page_state=...)
        >>> page0.results[0]
        {'id': 'row_31', 'value': 31}
        >>> page0.next_page_state
        'BXJvd18zAPB////rAA=='
        >>> # Get the next page through a new cursor:
        >>> cursor1 = table.find(
        ...     {},
        ...     initial_page_state=page0.next_page_state,
        ... )
        >>> page1 = cursor1.fetch_next_page()
        >>> page1.results[0]
        {'id': 'row_25', 'value': 25}
        >>> page1.next_page_state
        'BnJvd18zOQDwf///1wA='
        >>>
        >>> # (...)
        >>> # Eventually there's nothing more to retrieve:
        >>> page_N.next_page_state is None
        True
    """

    self._ensure_alive()
    if self._buffer:
        msg = "Paginated retrieval cannot be mixed with regular cursor iteration."
        raise CursorException(
            text=msg,
            cursor_state=self._state.value,
        )

    self._try_ensure_fill_buffer()

    _buffer_count = len(self._buffer)
    _tr_next_ps = self._next_page_state
    _tr_results = [document for _, document in zip(range(_buffer_count), self)]
    _tr_sort_vector: list[float] | DataAPIVector | None
    if self._last_response_status:
        _tr_sort_vector = _ensure_vector(
            self._last_response_status.get("sortVector"),
            self.data_source.api_options.serdes_options,
        )
    else:
        _tr_sort_vector = None

    return FindPage(
        results=_tr_results,
        next_page_state=_tr_next_ps,
        sort_vector=_tr_sort_vector,
    )

def filter(self, filter: FilterType | None) ‑> TableFindCursor[~TRAW, ~T]

Return a copy of this cursor with a new filter setting. This operation is allowed only if the cursor state is still IDLE.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the Table find method.

Args

filter: a new filter setting to apply to the returned new cursor.

Returns

a new TableFindCursor with the same settings as this one, except for filter which is the provided value.

Expand source code

def filter(self, filter: FilterType | None) -> TableFindCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new filter setting.
    This operation is allowed only if the cursor state is still IDLE.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the Table `find` method.

    Args:
        filter: a new filter setting to apply to the returned new cursor.

    Returns:
        a new TableFindCursor with the same settings as this one,
            except for `filter` which is the provided value.
    """

    self._ensure_idle()
    return self._copy(filter=filter)

def for_each(self, function: Callable[[T], bool | None], *, general_method_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> None

Consume the remaining rows in the cursor, invoking a provided callback function on each of them.

Calling this method on a CLOSED cursor results in an error.

Args

function: a callback function whose only parameter is of the type returned by the cursor. This callback is invoked once per each row yielded by the cursor. If the callback returns a False, the for_each invocation stops early and returns without consuming further rows.
general_method_timeout_ms: a timeout, in milliseconds, for the whole duration of this method. If not provided, there is no such timeout. Note that the per-request timeout set on the cursor still applies.
timeout_ms: an alias for general_method_timeout_ms.

Example

>>> cursor = my_table.find(
...     {"match_id": "challenge6"},
...     projection={"winner": True},
...     limit=3,
... )
>>> def printer(row):
...     print(f"-> {row['winner']}")
...
>>> cursor.for_each(printer)
-> Donna
-> Erick
-> Fiona
>>>
>>> if cursor.state != CursorState.CLOSED:
...     print(f"alive: {list(cursor)}")
... else:
...     print("(closed)")
...
(closed)
>>> cursor2 = my_table.find(
...     {"match_id": "challenge6"},
...     projection={"winner": True},
...     limit=3,
... )
>>> def checker(row):
...     print(f"-> {row['winner']}")
...     return row["winner"] != "Erick"
...
>>> cursor2.for_each(checker)
-> Donna
-> Erick
>>>
>>> if cursor2.state != CursorState.CLOSED:
...     print(f"alive: {list(cursor2)}")
... else:
...     print("(closed)")
...
alive: [{'winner': 'Fiona'}]

Expand source code

def for_each(
    self,
    function: Callable[[T], bool | None],
    *,
    general_method_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> None:
    """
    Consume the remaining rows in the cursor, invoking a provided callback
    function on each of them.

    Calling this method on a CLOSED cursor results in an error.

    The callback function can return any value. The return value is generally
    discarded, with the following exception: if the function returns the boolean
    `False`, it is taken to signify that the method should quit early, leaving the
    cursor half-consumed (ACTIVE state). If this does not occur, this method
    results in the cursor entering CLOSED state once it is exhausted.

    Args:
        function: a callback function whose only parameter is of the type returned
            by the cursor. This callback is invoked once per each row yielded
            by the cursor. If the callback returns a `False`, the `for_each`
            invocation stops early and returns without consuming further rows.
        general_method_timeout_ms: a timeout, in milliseconds, for the whole
            duration of this method. If not provided, there is no such timeout.
            Note that the per-request timeout set on the cursor still applies.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Example:
        >>> cursor = my_table.find(
        ...     {"match_id": "challenge6"},
        ...     projection={"winner": True},
        ...     limit=3,
        ... )
        >>> def printer(row):
        ...     print(f"-> {row['winner']}")
        ...
        >>> cursor.for_each(printer)
        -> Donna
        -> Erick
        -> Fiona
        >>>
        >>> if cursor.state != CursorState.CLOSED:
        ...     print(f"alive: {list(cursor)}")
        ... else:
        ...     print("(closed)")
        ...
        (closed)
        >>> cursor2 = my_table.find(
        ...     {"match_id": "challenge6"},
        ...     projection={"winner": True},
        ...     limit=3,
        ... )
        >>> def checker(row):
        ...     print(f"-> {row['winner']}")
        ...     return row["winner"] != "Erick"
        ...
        >>> cursor2.for_each(checker)
        -> Donna
        -> Erick
        >>>
        >>> if cursor2.state != CursorState.CLOSED:
        ...     print(f"alive: {list(cursor2)}")
        ... else:
        ...     print("(closed)")
        ...
        alive: [{'winner': 'Fiona'}]
    """

    self._ensure_alive()
    copy_req_ms, copy_ovr_ms = _revise_timeouts_for_cursor_copy(
        new_general_method_timeout_ms=general_method_timeout_ms,
        new_timeout_ms=timeout_ms,
        old_request_timeout_ms=self._request_timeout_ms,
    )
    _cursor = self._copy(
        request_timeout_ms=copy_req_ms,
        overall_timeout_ms=copy_ovr_ms,
    )
    self._imprint_internal_state(_cursor)
    for row in _cursor:
        res = function(row)
        if res is False:
            break
    _cursor._imprint_internal_state(self)

def get_sort_vector(self) ‑> list[float] | DataAPIVector | None

Calling get_sort_vector on an IDLE cursor triggers the first page fetch, but the cursor stays in the IDLE state until actual consumption starts.

The method can be invoked on a CLOSED cursor and will return either None or the sort vector used in the search.

Returns

Expand source code

def get_sort_vector(self) -> list[float] | DataAPIVector | None:
    """
    Return the query vector used in the vector (ANN) search that originated
    this cursor, if applicable. If this is not an ANN search, or it was invoked
    without the `include_sort_vector` flag, return None.

    Calling `get_sort_vector` on an IDLE cursor triggers the first page fetch,
    but the cursor stays in the IDLE state until actual consumption starts.

    The method can be invoked on a CLOSED cursor and will return either None
    or the sort vector used in the search.

    Returns:
        the query vector used in the search if this was a
            vector search (otherwise None). The vector is returned either
            as a DataAPIVector or a plain list of number depending on the
            `APIOptions.serdes_options` that apply. The query vector is available
            also for vectorize-based ANN searches.
    """

    self._try_ensure_fill_buffer()
    if self._last_response_status:
        return _ensure_vector(
            self._last_response_status.get("sortVector"),
            self.data_source.api_options.serdes_options,
        )
    else:
        return None

def has_next(self) ‑> bool

Whether the cursor actually has more documents to return.

has_next can be called on any cursor, but on a CLOSED cursor will always return False.

This method can trigger the fetch operation of a new page, if the current buffer is empty.

Calling has_next on an IDLE cursor triggers the first page fetch, but the cursor stays in the IDLE state until actual consumption starts.

Returns

a boolean value of True if there is at least one further item available to consume; False otherwise (including the case of CLOSED cursor).

Expand source code

def has_next(self) -> bool:
    """
    Whether the cursor actually has more documents to return.

    `has_next` can be called on any cursor, but on a CLOSED cursor
    will always return False.

    This method can trigger the fetch operation of a new page, if the current
    buffer is empty.

    Calling `has_next` on an IDLE cursor triggers the first page fetch, but the
    cursor stays in the IDLE state until actual consumption starts.

    Returns:
        a boolean value of True if there is at least one further item
            available to consume; False otherwise (including the case of CLOSED
            cursor).
    """

    if self._state == CursorState.CLOSED:
        return False
    self._try_ensure_fill_buffer()
    return len(self._buffer) > 0

def include_similarity(self, include_similarity: bool | None) ‑> TableFindCursor[~TRAW, ~T]

Return a copy of this cursor with a new include_similarity setting. This operation is allowed only if the cursor state is still IDLE and if no mapping has been set on it.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the Table find method.

Args

include_similarity: a new include_similarity setting to apply to the returned new cursor.

Returns

a new TableFindCursor with the same settings as this one, except for include_similarity which is the provided value.

Expand source code

def include_similarity(
    self, include_similarity: bool | None
) -> TableFindCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new include_similarity setting.
    This operation is allowed only if the cursor state is still IDLE and if
    no mapping has been set on it.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the Table `find` method.

    Args:
        include_similarity: a new include_similarity setting to apply
            to the returned new cursor.

    Returns:
        a new TableFindCursor with the same settings as this one,
            except for `include_similarity` which is the provided value.
    """

    self._ensure_idle()
    if self._mapper is not None:
        raise CursorException(
            "Cannot set include_similarity after map.",
            cursor_state=self._state.value,
        )
    return self._copy(include_similarity=include_similarity)

def include_sort_vector(self, include_sort_vector: bool | None) ‑> TableFindCursor[~TRAW, ~T]

Return a copy of this cursor with a new include_sort_vector setting. This operation is allowed only if the cursor state is still IDLE.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the Table find method.

Args

include_sort_vector: a new include_sort_vector setting to apply to the returned new cursor.

Returns

a new TableFindCursor with the same settings as this one, except for include_sort_vector which is the provided value.

Expand source code

def include_sort_vector(
    self, include_sort_vector: bool | None
) -> TableFindCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new include_sort_vector setting.
    This operation is allowed only if the cursor state is still IDLE.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the Table `find` method.

    Args:
        include_sort_vector: a new include_sort_vector setting to apply
            to the returned new cursor.

    Returns:
        a new TableFindCursor with the same settings as this one,
            except for `include_sort_vector` which is the provided value.
    """

    self._ensure_idle()
    return self._copy(include_sort_vector=include_sort_vector)

def initial_page_state(self, initial_page_state: str | UnsetType) ‑> TableFindCursor[~TRAW, ~T]

Return a copy of this cursor with a new initial_page_state setting. This operation is allowed only if the cursor state is still IDLE.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the Collection find method.

Args

initial_page_state: a new initial_page_state setting to apply to the returned new cursor. Passing an explicit None raises an error.

Returns

a new TableFindCursor with the same settings as this one, except for initial_page_state which is the provided value.

Expand source code

def initial_page_state(
    self, initial_page_state: str | UnsetType
) -> TableFindCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new initial_page_state setting.
    This operation is allowed only if the cursor state is still IDLE.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the Collection `find` method.

    Args:
        initial_page_state: a new initial_page_state setting to apply to the
            returned new cursor. Passing an explicit None raises an error.

    Returns:
        a new TableFindCursor with the same settings as this one,
            except for `initial_page_state` which is the provided value.
    """

    self._ensure_idle()
    return self._copy(initial_page_state=initial_page_state)

def limit(self, limit: int | None) ‑> TableFindCursor[~TRAW, ~T]

Return a copy of this cursor with a new limit setting. This operation is allowed only if the cursor state is still IDLE.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the Table find method.

Args

limit: a new limit setting to apply to the returned new cursor.

Returns

a new TableFindCursor with the same settings as this one, except for limit which is the provided value.

Expand source code

def limit(self, limit: int | None) -> TableFindCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new limit setting.
    This operation is allowed only if the cursor state is still IDLE.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the Table `find` method.

    Args:
        limit: a new limit setting to apply to the returned new cursor.

    Returns:
        a new TableFindCursor with the same settings as this one,
            except for `limit` which is the provided value.
    """

    self._ensure_idle()
    return self._copy(limit=limit)

def map(self, mapper: Callable[[T], TNEW]) ‑> TableFindCursor[~TRAW, ~TNEW]

Return a copy of this cursor with a mapping function to transform the returned items. Calling this method on a cursor with a mapping already set results in the mapping functions being composed.

This operation is allowed only if the cursor state is still IDLE.

Args

mapper: a function transforming the objects returned by the cursor into something else (i.e. a function T => TNEW).

Returns

a new TableFindCursor with a new mapping function on the results, possibly composed with any pre-existing mapping function.

Example

>>> cursor = my_table.find(
...     {"match_id": "challenge6"},
...     projection={"winner": True},
...     limit=2,
... )
>>> for row in cursor:
...     print(row)
...
{'winner': 'Donna'}
{'winner': 'Erick'}
>>> cursor_mapped = my_table.find(
...     {"match_id": "challenge6"},
...     projection={"winner": True},
...     limit=2,
... ).map(lambda row: row["winner"])
>>> for value in cursor_mapped:
...     print(value)
...
Donna
Erick
>>> cursor_mapped_twice = my_table.find(
...     {"match_id": "challenge6"},
...     projection={"winner": True},
...     limit=2,
... ).map(lambda row: row["winner"]).map(lambda w: w.upper())
>>> for value in cursor_mapped_twice:
...     print(value)
...
DONNA
ERICK

Expand source code

def map(self, mapper: Callable[[T], TNEW]) -> TableFindCursor[TRAW, TNEW]:
    """
    Return a copy of this cursor with a mapping function to transform
    the returned items. Calling this method on a cursor with a mapping
    already set results in the mapping functions being composed.

    This operation is allowed only if the cursor state is still IDLE.

    Args:
        mapper: a function transforming the objects returned by the cursor
            into something else (i.e. a function T => TNEW).

    Returns:
        a new TableFindCursor with a new mapping function on the results,
            possibly composed with any pre-existing mapping function.

    Example:
        >>> cursor = my_table.find(
        ...     {"match_id": "challenge6"},
        ...     projection={"winner": True},
        ...     limit=2,
        ... )
        >>> for row in cursor:
        ...     print(row)
        ...
        {'winner': 'Donna'}
        {'winner': 'Erick'}
        >>> cursor_mapped = my_table.find(
        ...     {"match_id": "challenge6"},
        ...     projection={"winner": True},
        ...     limit=2,
        ... ).map(lambda row: row["winner"])
        >>> for value in cursor_mapped:
        ...     print(value)
        ...
        Donna
        Erick
        >>> cursor_mapped_twice = my_table.find(
        ...     {"match_id": "challenge6"},
        ...     projection={"winner": True},
        ...     limit=2,
        ... ).map(lambda row: row["winner"]).map(lambda w: w.upper())
        >>> for value in cursor_mapped_twice:
        ...     print(value)
        ...
        DONNA
        ERICK
    """

    self._ensure_idle()
    if self._query_engine.table is None:
        raise RuntimeError("Query engine has no table.")
    composite_mapper: Callable[[TRAW], TNEW]
    if self._mapper is not None:

        def _composite(document: TRAW) -> TNEW:
            return mapper(self._mapper(document))  # type: ignore[misc]

        composite_mapper = _composite
    else:
        composite_mapper = cast(Callable[[TRAW], TNEW], mapper)
    return TableFindCursor(
        table=self._query_engine.table,
        request_timeout_ms=self._request_timeout_ms,
        overall_timeout_ms=self._overall_timeout_ms,
        request_timeout_label=self._request_timeout_label,
        overall_timeout_label=self._overall_timeout_label,
        filter=self._filter,
        projection=self._projection,
        sort=self._sort,
        limit=self._limit,
        initial_page_state=self._initial_page_state,
        include_similarity=self._include_similarity,
        include_sort_vector=self._include_sort_vector,
        skip=self._skip,
        mapper=composite_mapper,
    )

def project(self, projection: ProjectionType | None) ‑> TableFindCursor[~TRAW, ~T]

Return a copy of this cursor with a new projection setting. This operation is allowed only if the cursor state is still IDLE and if no mapping has been set on it.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the Table find method.

Args

projection: a new projection setting to apply to the returned new cursor.

Returns

a new TableFindCursor with the same settings as this one, except for projection which is the provided value.

Expand source code

def project(self, projection: ProjectionType | None) -> TableFindCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new projection setting.
    This operation is allowed only if the cursor state is still IDLE and if
    no mapping has been set on it.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the Table `find` method.

    Args:
        projection: a new projection setting to apply to the returned new cursor.

    Returns:
        a new TableFindCursor with the same settings as this one,
            except for `projection` which is the provided value.
    """

    self._ensure_idle()
    if self._mapper is not None:
        raise CursorException(
            "Cannot set projection after map.",
            cursor_state=self._state.value,
        )
    return self._copy(projection=projection)

def skip(self, skip: int | None) ‑> TableFindCursor[~TRAW, ~T]

Return a copy of this cursor with a new skip setting. This operation is allowed only if the cursor state is still IDLE.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the Table find method.

Args

skip: a new skip setting to apply to the returned new cursor.

Returns

a new TableFindCursor with the same settings as this one, except for skip which is the provided value.

Expand source code

def skip(self, skip: int | None) -> TableFindCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new skip setting.
    This operation is allowed only if the cursor state is still IDLE.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the Table `find` method.

    Args:
        skip: a new skip setting to apply to the returned new cursor.

    Returns:
        a new TableFindCursor with the same settings as this one,
            except for `skip` which is the provided value.
    """

    self._ensure_idle()
    return self._copy(skip=skip)

def sort(self, sort: dict[str, Any] | None) ‑> TableFindCursor[~TRAW, ~T]

Return a copy of this cursor with a new sort setting. This operation is allowed only if the cursor state is still IDLE.

Instead of explicitly invoking this method, the typical usage consists in passing arguments to the Table find method.

Args

sort: a new sort setting to apply to the returned new cursor.

Returns

a new TableFindCursor with the same settings as this one, except for sort which is the provided value.

Expand source code

def sort(self, sort: dict[str, Any] | None) -> TableFindCursor[TRAW, T]:
    """
    Return a copy of this cursor with a new sort setting.
    This operation is allowed only if the cursor state is still IDLE.

    Instead of explicitly invoking this method, the typical usage consists
    in passing arguments to the Table `find` method.

    Args:
        sort: a new sort setting to apply to the returned new cursor.

    Returns:
        a new TableFindCursor with the same settings as this one,
            except for `sort` which is the provided value.
    """

    self._ensure_idle()
    return self._copy(sort=sort)

def to_list(self, *, general_method_timeout_ms: int | None = None, timeout_ms: int | None = None) ‑> list[~T]

Materialize all rows that remain to be consumed from a cursor into a list.

Calling this method on a CLOSED cursor results in an error.

If the cursor is IDLE, the result will be the whole set of rows returned by the find operation; otherwise, the rows already consumed by the cursor will not be in the resulting list.

Args

general_method_timeout_ms: a timeout, in milliseconds, for the whole duration of this method. If not provided, there is no such timeout. Note that the per-request timeout set on the cursor still applies.
timeout_ms: an alias for general_method_timeout_ms.

Returns

list: a list of rows (or other values depending on the mapping function, if one is set). These are all items that were left to be consumed on the cursor when to_list is called.

Example

>>> my_table.find(
...     {"match_id": "challenge6"},
...     projection={"winner": True},
...     limit=3,
... ).to_list()
[{'winner': 'Donna'}, {'winner': 'Erick'}, {'winner': 'Fiona'}]
>>>
>>> cursor = my_table.find(
...     {"match_id": "challenge6"},
...     projection={"winner": True},
...     limit=5,
... ).map(lambda doc: doc["winner"])
>>>
>>> first_value = cursor.__next__()
>>> cursor.to_list()
['Erick', 'Fiona', 'Georg', 'Helen']

Expand source code

def to_list(
    self,
    *,
    general_method_timeout_ms: int | None = None,
    timeout_ms: int | None = None,
) -> list[T]:
    """
    Materialize all rows that remain to be consumed from a cursor into a list.

    Calling this method on a CLOSED cursor results in an error.

    If the cursor is IDLE, the result will be the whole set of rows returned
    by the `find` operation; otherwise, the rows already consumed by the cursor
    will not be in the resulting list.

    Calling this method is not recommended if a huge list of results is anticipated:
    it would involve a large number of data exchanges with the Data API and possibly
    a massive memory usage to construct the list. In such cases, a lazy pattern
    of iterating and consuming the rows is to be preferred.

    Args:
        general_method_timeout_ms: a timeout, in milliseconds, for the whole
            duration of this method. If not provided, there is no such timeout.
            Note that the per-request timeout set on the cursor still applies.
        timeout_ms: an alias for `general_method_timeout_ms`.

    Returns:
        list: a list of rows (or other values depending on the mapping
            function, if one is set). These are all items that were left
            to be consumed on the cursor when `to_list` is called.

    Example:
        >>> my_table.find(
        ...     {"match_id": "challenge6"},
        ...     projection={"winner": True},
        ...     limit=3,
        ... ).to_list()
        [{'winner': 'Donna'}, {'winner': 'Erick'}, {'winner': 'Fiona'}]
        >>>
        >>> cursor = my_table.find(
        ...     {"match_id": "challenge6"},
        ...     projection={"winner": True},
        ...     limit=5,
        ... ).map(lambda doc: doc["winner"])
        >>>
        >>> first_value = cursor.__next__()
        >>> cursor.to_list()
        ['Erick', 'Fiona', 'Georg', 'Helen']
    """

    self._ensure_alive()
    copy_req_ms, copy_ovr_ms = _revise_timeouts_for_cursor_copy(
        new_general_method_timeout_ms=general_method_timeout_ms,
        new_timeout_ms=timeout_ms,
        old_request_timeout_ms=self._request_timeout_ms,
    )
    _cursor = self._copy(
        request_timeout_ms=copy_req_ms,
        overall_timeout_ms=copy_ovr_ms,
    )
    self._imprint_internal_state(_cursor)
    documents = [document for document in _cursor]
    _cursor._imprint_internal_state(self)
    return documents

Inherited members

AbstractCursor:
- buffered_count
- close
- consume_buffer
- consumed
- cursor_id
- rewind
- state