Sort clauses for collections
Many Data API commands use sort clauses to sort documents by field values, or to perform vector search or hybrid search.
You must use & to escape any . or & in field names in a sort clause.
Dot notation, which is used to reference nested fields, should not be escaped.
For more information, see Work with . and & in field names.
Sort by field values
Use a sort clause to sort documents by field values in ascending or descending order.
When sorting by multiple fields, the order of the fields in the sort clause controls the order of the sorting.
Sort clauses can use only indexed fields. If you apply selective indexing when you create a collection, you cannot reference non-indexed fields in sort queries.
You can’t sort the $lexical field in ascending or descending order.
Instead, you must sort by lexicographical match.
-
Python
-
TypeScript
-
Java
-
curl
from astrapy import DataAPIClient
from astrapy.constants import SortMode
# Get an existing collection
client = DataAPIClient()
database = client.get_database(
"API_ENDPOINT",
token="APPLICATION_TOKEN",
)
collection = database.get_collection("COLLECTION_NAME")
# Find documents
cursor = collection.find(
{"metadata.language": "English"},
sort={
"rating": SortMode.ASCENDING,
"title": SortMode.DESCENDING,
},
)
for document in cursor:
print(document)
import { DataAPIClient } from "@datastax/astra-db-ts";
// Get an existing collection
const client = new DataAPIClient();
const database = client.db("API_ENDPOINT", {
token: "APPLICATION_TOKEN",
});
const collection = database.collection("COLLECTION_NAME");
// Find documents
(async function () {
const cursor = collection.find(
{ "metadata.language": "English" },
{
sort: {
rating: 1, // ascending
title: -1, // descending
},
},
);
for await (const document of cursor) {
console.log(document);
}
})();
import com.datastax.astra.client.DataAPIClient;
import com.datastax.astra.client.collections.Collection;
import com.datastax.astra.client.collections.commands.cursor.CollectionFindCursor;
import com.datastax.astra.client.collections.commands.options.CollectionFindOptions;
import com.datastax.astra.client.collections.definition.documents.Document;
import com.datastax.astra.client.core.query.Filter;
import com.datastax.astra.client.core.query.Filters;
import com.datastax.astra.client.core.query.Sort;
public class Example {
public static void main(String[] args) {
// Get an existing collection
Collection<Document> collection =
new DataAPIClient("APPLICATION_TOKEN")
.getDatabase("API_ENDPOINT")
.getCollection("COLLECTION_NAME");
// Find documents
Filter filter = Filters.eq("metadata.language", "English");
CollectionFindOptions options =
new CollectionFindOptions().sort(Sort.ascending("rating"), Sort.descending("title"));
CollectionFindCursor<Document, Document> cursor = collection.find(filter, options);
for (Document document : cursor) {
System.out.println(document);
}
}
}
curl -sS -L -X POST "API_ENDPOINT/api/json/v1/KEYSPACE_NAME/COLLECTION_NAME" \
--header "Token: APPLICATION_TOKEN" \
--header "Content-Type: application/json" \
--data '{
"find": {
"filter": { "metadata.language": "English" },
"sort": {
"rating": 1,
"title": -1
}
}
}'
Sort by vector similarity (vector search)
To find the documents whose $vector value is most similar to a given vector, use a sort clause with the vector embeddings that you want to match.
To find the documents whose $vector value is most similar to the $vector value generated from a given search string, use a sort with the search string that you want to vectorize and match.
For more information, see Find data with vector search.
Vector search is only available for vector-enabled collections. For more information, see Create a collection that can store vector embeddings and $vector in collections.
Vector search with vectorize is only available for collections that have vectorize enabled. For more information, see Create a collection that can automatically generate vector embeddings and $vectorize in collections.
Example sorting against a search vector
-
Python
-
TypeScript
-
Java
-
curl
from astrapy import DataAPIClient
# Get an existing collection
client = DataAPIClient()
database = client.get_database(
"API_ENDPOINT",
token="APPLICATION_TOKEN",
)
collection = database.get_collection("COLLECTION_NAME")
# Find documents
cursor = collection.find({}, sort={"$vector": [0.08, -0.62, 0.39]})
for document in cursor:
print(document)
import { DataAPIClient } from "@datastax/astra-db-ts";
// Get an existing collection
const client = new DataAPIClient();
const database = client.db("API_ENDPOINT", {
token: "APPLICATION_TOKEN",
});
const collection = database.collection("COLLECTION_NAME");
// Find documents
(async function () {
const cursor = collection.find(
{},
{ sort: { $vector: [0.08, -0.62, 0.39] } },
);
for await (const document of cursor) {
console.log(document);
}
})();
import com.datastax.astra.client.DataAPIClient;
import com.datastax.astra.client.collections.Collection;
import com.datastax.astra.client.collections.commands.cursor.CollectionFindCursor;
import com.datastax.astra.client.collections.commands.options.CollectionFindOptions;
import com.datastax.astra.client.collections.definition.documents.Document;
import com.datastax.astra.client.core.query.Sort;
public class Example {
public static void main(String[] args) {
// Get an existing collection
Collection<Document> collection =
new DataAPIClient("APPLICATION_TOKEN")
.getDatabase("API_ENDPOINT")
.getCollection("COLLECTION_NAME");
// Find documents
CollectionFindOptions options =
new CollectionFindOptions().sort(Sort.vector(new float[] {0.08f, -0.62f, 0.39f}));
CollectionFindCursor<Document, Document> cursor = collection.find(options);
for (Document document : cursor) {
System.out.println(document);
}
}
}
You can provide the search vector as an array of floats, or you can use $binary to provide the search vector as a Base64-encoded string.
$binary can be more performant.
Vector binary encodings specification
A d-dimensional vector is a list of d floating-point numbers that can be binary encoded.
To prepare for encoding, the list must be transformed into a sequence of bytes where each float is represented as four bytes in big-endian format.
Then, the byte sequence is Base64-encoded, with = padding, if needed.
For example, here are some vectors and their resulting Base64 encoded strings:
[0.1, -0.2, 0.3] = "PczMzb5MzM0+mZma" [0.1, 0.2] = "PczMzT5MzM0=" [10, 10.5, 100, -91.19] = "QSAAAEEoAABCyAAAwrZhSA=="
Once encoded, you use $binary to pass the Base64 string to the Data API:
{ "$binary": "BASE64_STRING" }
You can use a script to encode your vectors, for example:
python
import base64
import struct
input_vector = [0.1, -0.2, 0.3]
d = len(input_vector)
pack_format = ">" + "f" * d
binary_encode = base64.b64encode(struct.pack(pack_format, *input_vector)).decode()
-
Array of floats
-
$binary
curl -sS -L -X POST "API_ENDPOINT/api/json/v1/KEYSPACE_NAME/COLLECTION_NAME" \
--header "Token: APPLICATION_TOKEN" \
--header "Content-Type: application/json" \
--data '{
"find": {
"sort": { "$vector": [0.08, -0.62, 0.39] }
}
}'
curl -sS -L -X POST "API_ENDPOINT/api/json/v1/KEYSPACE_NAME/COLLECTION_NAME" \
--header "Token: APPLICATION_TOKEN" \
--header "Content-Type: application/json" \
--data '{
"find": {
"sort": { "$vector": {"$binary": "PaPXCr8euFI+x64U"} }
}
}'
Example sorting against a search string
-
Python
-
TypeScript
-
Java
-
curl
from astrapy import DataAPIClient
# Get an existing collection
client = DataAPIClient()
database = client.get_database(
"API_ENDPOINT",
token="APPLICATION_TOKEN",
)
collection = database.get_collection("COLLECTION_NAME")
# Find documents
cursor = collection.find({}, sort={"$vectorize": "Text to vectorize"})
for document in cursor:
print(document)
import { DataAPIClient } from "@datastax/astra-db-ts";
// Get an existing collection
const client = new DataAPIClient();
const database = client.db("API_ENDPOINT", {
token: "APPLICATION_TOKEN",
});
const collection = database.collection("COLLECTION_NAME");
// Find documents
(async function () {
const cursor = collection.find(
{},
{ sort: { $vectorize: "Text to vectorize" } },
);
for await (const document of cursor) {
console.log(document);
}
})();
import com.datastax.astra.client.DataAPIClient;
import com.datastax.astra.client.collections.Collection;
import com.datastax.astra.client.collections.commands.cursor.CollectionFindCursor;
import com.datastax.astra.client.collections.commands.options.CollectionFindOptions;
import com.datastax.astra.client.collections.definition.documents.Document;
import com.datastax.astra.client.core.query.Sort;
public class Example {
public static void main(String[] args) {
// Get an existing collection
Collection<Document> collection =
new DataAPIClient("APPLICATION_TOKEN")
.getDatabase("API_ENDPOINT")
.getCollection("COLLECTION_NAME");
// Find documents
CollectionFindOptions options =
new CollectionFindOptions().sort(Sort.vectorize("Text to vectorize"));
CollectionFindCursor<Document, Document> cursor = collection.find(options);
for (Document document : cursor) {
System.out.println(document);
}
}
}
curl -sS -L -X POST "API_ENDPOINT/api/json/v1/KEYSPACE_NAME/COLLECTION_NAME" \
--header "Token: APPLICATION_TOKEN" \
--header "Content-Type: application/json" \
--data '{
"find": {
"sort": { "$vectorize": "Text to vectorize" }
}
}'
Sort by lexicographical matching (BM25-based ranking)
Sort on the $lexical field to the documents whose $lexical field value is most relevant to a given string of space-separated keywords or terms.
This is only available for collections with lexical enabled. For more information, see Create a collection that supports lexicographical matching.
When sorting on the $lexical field, you can’t include additional sort clauses.
-
Python
-
TypeScript
-
Java
-
curl
from astrapy import DataAPIClient
# Get an existing collection
client = DataAPIClient()
database = client.get_database(
"API_ENDPOINT",
token="APPLICATION_TOKEN",
)
collection = database.get_collection("COLLECTION_NAME")
# Find documents
cursor = collection.find(
sort={"$lexical": "tree hill grassy"},
)
for document in cursor:
print(document)
import { DataAPIClient } from "@datastax/astra-db-ts";
// Get an existing collection
const client = new DataAPIClient();
const database = client.db("API_ENDPOINT", {
token: "APPLICATION_TOKEN",
});
const collection = database.collection("COLLECTION_NAME");
(async function () {
// Find documents
const cursor = await collection.find({}).sort({
$lexical: "tree hill grassy",
});
for await (const document of cursor) {
console.log(document);
}
})();
import com.datastax.astra.client.DataAPIClient;
import com.datastax.astra.client.collections.Collection;
import com.datastax.astra.client.collections.commands.cursor.CollectionFindCursor;
import com.datastax.astra.client.collections.commands.options.CollectionFindOptions;
import com.datastax.astra.client.collections.definition.documents.Document;
import com.datastax.astra.client.core.query.Filters;
import com.datastax.astra.client.core.query.Sort;
public class Example {
public static void main(String[] args) {
// Get an existing collection
Collection<Document> collection =
new DataAPIClient("APPLICATION_TOKEN")
.getDatabase("API_ENDPOINT")
.getCollection("COLLECTION_NAME");
// Find documents
CollectionFindOptions options =
new CollectionFindOptions().sort(Sort.lexical("tree hill grassy"));
CollectionFindCursor<Document, Document> cursor =
collection.find(Filters.match("tree hill grassy"), options);
for (Document document : cursor) {
System.out.println(document);
}
}
}
curl -sS -L -X POST "API_ENDPOINT/api/json/v1/KEYSPACE_NAME/COLLECTION_NAME" \
--header "Token: APPLICATION_TOKEN" \
--header "Content-Type: application/json" \
--data '{
"find": {
"sort": {
"$lexical": "tree hill grassy"
}
}
}'
Sort by vector similarity and lexicographical matching (hybrid search)
A hybrid search uses a reranker model to combine results from a vector search and a lexical search. When you find documents via hybrid search, you use a sort clause to specify the queries for the underlying vector search and hybrid search.
-
Python
-
TypeScript
-
Java
-
curl
from astrapy import DataAPIClient
# Get an existing collection
client = DataAPIClient()
database = client.get_database(
"API_ENDPOINT",
token="APPLICATION_TOKEN",
)
collection = database.get_collection("COLLECTION_NAME")
# Find documents
cursor = collection.find_and_rerank(
sort={
"$hybrid": {
"$vectorize": "A tree in the woods",
"$lexical": "house hill grassy",
},
},
)
for result in cursor:
print(result.document)
import { DataAPIClient } from "@datastax/astra-db-ts";
// Get an existing collection
const client = new DataAPIClient();
const db = client.db("API_ENDPOINT", { token: "APPLICATION_TOKEN" });
const collection = db.collection("COLLECTION_NAME");
(async function () {
// Find documents
const cursor = await collection.findAndRerank({}).sort({
$hybrid: {
$vectorize: "A tree in the woods",
$lexical: "house hill grassy",
},
});
for await (const result of cursor) {
console.log(result.document);
}
})();
import com.datastax.astra.client.DataAPIClient;
import com.datastax.astra.client.collections.Collection;
import com.datastax.astra.client.collections.commands.cursor.CollectionFindAndRerankCursor;
import com.datastax.astra.client.collections.commands.options.CollectionFindAndRerankOptions;
import com.datastax.astra.client.collections.definition.documents.Document;
import com.datastax.astra.client.core.hybrid.Hybrid;
import com.datastax.astra.client.core.query.Sort;
import com.datastax.astra.client.core.rerank.RerankedResult;
public class Example {
public static void main(String[] args) {
// Get an existing collection
Collection<Document> collection =
new DataAPIClient("APPLICATION_TOKEN")
.getDatabase("API_ENDPOINT")
.getCollection("COLLECTION_NAME");
// Find documents
Hybrid hybrid = new Hybrid().vectorize("A tree in the woods").lexical("house hill grassy");
CollectionFindAndRerankCursor<Document, Document> cursor =
collection.findAndRerank(new CollectionFindAndRerankOptions().sort(Sort.hybrid(hybrid)));
for (RerankedResult<Document> result : cursor) {
System.out.println(result.getDocument());
}
}
}
curl -sS -L -X POST "API_ENDPOINT/api/json/v1/KEYSPACE_NAME/COLLECTION_NAME" \
--header "Token: APPLICATION_TOKEN" \
--header "Content-Type: application/json" \
--data '{
"findAndRerank": {
"sort": {
"$hybrid": {
"$lexical": "house hill grassy",
"$vectorize": "A tree in the woods"
}
}
}
}'
Commands that support sort clauses for collections
There are many Data API commands that support sort clauses for collections.
For more examples of sort clauses, see the reference for the command that you want to run: