Sort clauses for collections

Many Data API commands use sort clauses to sort documents by field values, or to perform vector search or hybrid search.

Sort by field values

Use a sort clause to sort documents by field values in ascending or descending order.

When sorting by multiple fields, the order of the fields in the sort clause controls the order of the sorting.

Sort clauses can use only indexed fields. If you apply selective indexing when you create a collection, you cannot reference non-indexed fields in sort queries.

Python
TypeScript
Java
curl

from astrapy import DataAPIClient
from astrapy.constants import SortMode

# Get an existing collection
client = DataAPIClient()
database = client.get_database(
    "API_ENDPOINT",
    token="APPLICATION_TOKEN",
)
collection = database.get_collection("COLLECTION_NAME")

# Find documents
cursor = collection.find(
    {"metadata.language": "English"},
    sort={
        "rating": SortMode.ASCENDING,
        "title": SortMode.DESCENDING,
    },
)

import { DataAPIClient } from "@datastax/astra-db-ts";

// Get an existing collection
const client = new DataAPIClient("APPLICATION_TOKEN");
const database = client.db("API_ENDPOINT");
const collection = database.collection("COLLECTION_NAME");

// Find documents
(async function () {
  const cursor = collection.find(
    { "metadata.language": "English" },
    {
      sort: {
        rating: 1, // ascending
        title: -1, // descending
      },
    },
  );
})();

import com.datastax.astra.client.DataAPIClient;
import com.datastax.astra.client.collections.Collection;
import com.datastax.astra.client.collections.commands.cursor.CollectionFindCursor;
import com.datastax.astra.client.collections.commands.options.CollectionFindOptions;
import com.datastax.astra.client.collections.definition.documents.Document;
import com.datastax.astra.client.core.query.Filter;
import com.datastax.astra.client.core.query.Filters;
import com.datastax.astra.client.core.query.Sort;

public class Example {

  public static void main(String[] args) {
    // Get an existing collection
    Collection<Document> collection =
        new DataAPIClient("APPLICATION_TOKEN")
            .getDatabase("API_ENDPOINT")
            .getCollection("COLLECTION_NAME");

    // Find documents
    Filter filter = Filters.eq("metadata.language", "English");
    CollectionFindOptions options =
        new CollectionFindOptions().sort(Sort.ascending("rating"), Sort.descending("title"));
    CollectionFindCursor<Document, Document> cursor = collection.find(filter, options);
  }
}

curl -sS -L -X POST "API_ENDPOINT/api/json/v1/KEYSPACE_NAME/COLLECTION_NAME" \
  --header "Token: APPLICATION_TOKEN" \
  --header "Content-Type: application/json" \
  --data '{
  "find": {
    "filter": { "metadata.language": "English" },
    "sort": {
      "rating": 1, # ascending
      "title": -1 # descending
    }
  }
}'

Sort by vector similarity (vector search)

To find the documents whose $vector value is most similar to a given vector, use a sort clause with the vector embeddings that you want to match. To find the documents whose $vector value is most similar to the $vector value generated from a given search string, use a sort with the search string that you want to vectorize and match. For more information, see Find data with vector search.

Vector search is only available for vector-enabled collections. For more information, see Create a collection that can store vector embeddings and $vector in collections.

Vector search with vectorize is only available for collections that have vectorize enabled. For more information, see Create a collection that can automatically generate vector embeddings and $vectorize in collections.

Example sorting against a search vector

Python
TypeScript
Java
curl

from astrapy import DataAPIClient

# Get an existing collection
client = DataAPIClient()
database = client.get_database(
    "API_ENDPOINT",
    token="APPLICATION_TOKEN",
)
collection = database.get_collection("COLLECTION_NAME")

# Find documents
cursor = collection.find({}, sort={"$vector": [0.12, 0.52, 0.32]})

import { DataAPIClient } from "@datastax/astra-db-ts";

// Get an existing collection
const client = new DataAPIClient("APPLICATION_TOKEN");
const database = client.db("API_ENDPOINT");
const collection = database.collection("COLLECTION_NAME");

// Find documents
(async function () {
  const cursor = collection.find({}, { sort: { $vector: [0.12, 0.52, 0.32] } });
})();

import com.datastax.astra.client.DataAPIClient;
import com.datastax.astra.client.collections.Collection;
import com.datastax.astra.client.collections.commands.cursor.CollectionFindCursor;
import com.datastax.astra.client.collections.commands.options.CollectionFindOptions;
import com.datastax.astra.client.collections.definition.documents.Document;
import com.datastax.astra.client.core.query.Sort;

public class Example {

  public static void main(String[] args) {
    // Get an existing collection
    Collection<Document> collection =
        new DataAPIClient("APPLICATION_TOKEN")
            .getDatabase("API_ENDPOINT")
            .getCollection("COLLECTION_NAME");

    // Find documents
    CollectionFindOptions options =
        new CollectionFindOptions().sort(Sort.vector(new float[] {0.12f, 0.52f, 0.32f}));
    CollectionFindCursor<Document, Document> cursor = collection.find(options);
  }
}

You can provide the search vector as an array of floats, or you can use $binary to provide the search vector as a Base64-encoded string. $binary can be more performant.

Array of floats
$binary

curl -sS -L -X POST "API_ENDPOINT/api/json/v1/KEYSPACE_NAME/COLLECTION_NAME" \
  --header "Token: APPLICATION_TOKEN" \
  --header "Content-Type: application/json" \
  --data '{
  "find": {
    "sort": { "$vector": [.12, .52, .32] }
  }
}'

curl -sS -L -X POST "API_ENDPOINT/api/json/v1/KEYSPACE_NAME/COLLECTION_NAME" \
  --header "Token: APPLICATION_TOKEN" \
  --header "Content-Type: application/json" \
  --data '{
  "find": {
    "sort": { "$vector": {"$binary": "PfXCjz8FHrg+o9cK"} }
  }
}'

Example sorting against a search string

Python
TypeScript
Java
curl

from astrapy import DataAPIClient

# Get an existing collection
client = DataAPIClient()
database = client.get_database(
    "API_ENDPOINT",
    token="APPLICATION_TOKEN",
)
collection = database.get_collection("COLLECTION_NAME")

# Find documents
cursor = collection.find({}, sort={"$vectorize": "Text to vectorize"})

import { DataAPIClient } from "@datastax/astra-db-ts";

// Get an existing collection
const client = new DataAPIClient("APPLICATION_TOKEN");
const database = client.db("API_ENDPOINT");
const collection = database.collection("COLLECTION_NAME");

// Find documents
(async function () {
  const cursor = collection.find(
    {},
    { sort: { $vectorize: "Text to vectorize" } },
  );
})();

import com.datastax.astra.client.DataAPIClient;
import com.datastax.astra.client.collections.Collection;
import com.datastax.astra.client.collections.commands.cursor.CollectionFindCursor;
import com.datastax.astra.client.collections.commands.options.CollectionFindOptions;
import com.datastax.astra.client.collections.definition.documents.Document;
import com.datastax.astra.client.core.query.Sort;

public class Example {

  public static void main(String[] args) {
    // Get an existing collection
    Collection<Document> collection =
        new DataAPIClient("APPLICATION_TOKEN")
            .getDatabase("API_ENDPOINT")
            .getCollection("COLLECTION_NAME");

    // Find documents
    CollectionFindOptions options =
        new CollectionFindOptions().sort(Sort.vectorize("Text to vectorize"));
    CollectionFindCursor<Document, Document> cursor = collection.find(options);
  }
}

curl -sS -L -X POST "API_ENDPOINT/api/json/v1/KEYSPACE_NAME/COLLECTION_NAME" \
  --header "Token: APPLICATION_TOKEN" \
  --header "Content-Type: application/json" \
  --data '{
  "find": {
    "sort": { "$vectorize": "Text to vectorize" }
  }
}'

Sort by vector similarity and keyword relevance (hybrid search)

A hybrid search uses a reranker model to combine results from a vector search and a lexical search. When you find documents via hybrid search, you use a sort clause to specify the queries for the underlying vector search and hybrid search.

Python
TypeScript
Java
curl

from astrapy import DataAPIClient

# Get an existing collection
client = DataAPIClient()
database = client.get_database(
    "API_ENDPOINT",
    token="APPLICATION_TOKEN",
)
collection = database.get_collection("COLLECTION_NAME")

# Find documents
cursor = collection.find_and_rerank(
    sort={
        "$hybrid": {"$vectorize": "A tree on a hill", "$lexical": "house woods forest"},
    },
)

import { DataAPIClient } from "@datastax/astra-db-ts";

// Get an existing collection
const client = new DataAPIClient();
const db = client.db("API_ENDPOINT", { token: "APPLICATION_TOKEN" });
const collection = db.collection("COLLECTION_NAME");

(async function () {
  // Find documents
  const cursor = await collection.findAndRerank({}).sort({
    $hybrid: { $vectorize: "A tree on a hill", $lexical: "house woods forest" },
  });
})();

import com.datastax.astra.client.DataAPIClient;
import com.datastax.astra.client.collections.Collection;
import com.datastax.astra.client.collections.commands.cursor.CollectionFindAndRerankCursor;
import com.datastax.astra.client.collections.commands.options.CollectionFindAndRerankOptions;
import com.datastax.astra.client.collections.definition.documents.Document;
import com.datastax.astra.client.core.hybrid.Hybrid;
import com.datastax.astra.client.core.query.Sort;

public class Example {

  public static void main(String[] args) {
    // Get an existing collection
    Collection<Document> collection =
        new DataAPIClient("APPLICATION_TOKEN")
            .getDatabase("API_ENDPOINT")
            .getCollection("COLLECTION_NAME");

    // Find documents
    Hybrid hybrid = new Hybrid().vectorize("A house in the woods").lexical("A tree on a hill");
    CollectionFindAndRerankCursor<Document, Document> cursor =
        collection.findAndRerank(new CollectionFindAndRerankOptions().sort(Sort.hybrid(hybrid)));
  }
}

curl -sS -L -X POST "API_ENDPOINT/api/json/v1/KEYSPACE_NAME/COLLECTION_NAME" \
  --header "Token: APPLICATION_TOKEN" \
  --header "Content-Type: application/json" \
  --data '{
  "findAndRerank": {
    "sort": {
      "$hybrid": {
        "$lexical": "tree hill grassy",
        "$vectorize": "A house in the woods"
      }
    }
  }
}'

Commands that support sort clauses for collections

There are many Data API commands that support sort clauses for collections.

For more examples of sort clauses, including vector search and hybrid search examples, see the reference for the command that you want to run: