Migrate to a new embedding model for a collection

Follow this migration guide if you want to switch embedding models for your collection.

This migration only works for documents that have a $vectorize field.

Create a new collection with the desired embedding provider integration. For examples, see Create a collection that can automatically generate vector embeddings.

Migrate your documents to the new collection.

Exclude the $vector field from the migrated documents. The embedding provider integration for your new collection will automatically generate vector embeddings based on the $vectorize field and store them in the $vector field. Any documents without a $vectorize field will not have their $vector field automatically populated.

For example:

Python
TypeScript
Java

from astrapy import DataAPIClient

client = DataAPIClient()
database = client.get_database("API_ENDPOINT", token="APPLICATION_TOKEN")

old_collection = database.get_collection("OLD_COLLECTION_NAME")
new_collection = database.get_collection("NEW_COLLECTION_NAME")

page_state = None
migrated_count = 0

# Use an empty filter to migrate all documents
filter = {}

# You must explicitly include $vectorize.
# $vector is excluded by default.
# _id and any other fields that don't start with $ are included by default.
projection = {"$vectorize": True}

while True:
    if page_state:
        cursor = old_collection.find(
            filter, projection=projection, initial_page_state=page_state
        )
    else:
        cursor = old_collection.find(filter, projection=projection)

    page = cursor.fetch_next_page()
    documents = page.results
    page_state = page.next_page_state

    if not documents:
        print("✅ No more documents. Migration complete.")
        break

    # Insert the documents to the new collection.
    # _id and the other field values (excluding $vector) will be the same.
    # $vector will automatically be generated based on the value $vectorize.
    new_collection.insert_many(documents)

    migrated_count += len(documents)

    print(f"Migrated {migrated_count} documents. Page state: {page_state}")

    if page_state is None:
        print("✅ Reached final page. Migration complete.")
        break

import {
  DataAPIClient,
  CollectionInsertManyError,
} from "@datastax/astra-db-ts";

const client = new DataAPIClient("APPLICATION_TOKEN");
const database = client.db("API_ENDPOINT");

const oldCollection = database.collection("OLD_COLLECTION_NAME");
const newCollection = database.collection("NEW_COLLECTION_NAME");

let pageState = null;
let migratedCount = 0;

// Use an empty filter to migrate all documents
const filter = {};

// You must explicitly include $vectorize.
// $vector is excluded by default.
// _id and any other fields that don't start with $ are included by default.
const projection = { $vectorize: true };

(async function () {
  while (true) {
    const cursor = oldCollection.find(filter, {
      projection,
      ...(pageState ? { initialPageState: pageState } : {}),
    });

    const page = await cursor.fetchNextPage();
    const documents = page.result;
    pageState = page.nextPageState;

    if (!documents.length) {
      console.log("✅ No more documents. Migration complete.");
      break;
    }

    // Insert the documents to the new collection.
    // _id and the other field values (excluding $vector) will be the same.
    // $vector will automatically be generated based on the value of $vectorize.
    try {
      await newCollection.insertMany(documents);
    } catch (error) {
      if (error instanceof CollectionInsertManyError) {
        console.log(error.insertedIds());
      }
    }

    migratedCount += documents.length;

    console.log(
      `Migrated ${migratedCount} documents. Page state: ${pageState}`,
    );

    if (!pageState) {
      console.log("✅ Reached final page. Migration complete.");
      break;
    }
  }
})();

import com.datastax.astra.client.DataAPIClient;
import com.datastax.astra.client.collections.Collection;
import com.datastax.astra.client.collections.commands.options.CollectionFindOptions;
import com.datastax.astra.client.collections.definition.documents.Document;
import com.datastax.astra.client.core.paging.Page;
import com.datastax.astra.client.core.query.Filter;
import com.datastax.astra.client.core.query.Projection;
import com.datastax.astra.client.databases.Database;
import java.util.List;

public class Example {

  public static void main(String[] args) {

    Database database = new DataAPIClient("APPLICATION_TOKEN").getDatabase("API_ENDPOINT");

    Collection<Document> oldCollection = database.getCollection("OLD_COLLECTION_NAME");
    Collection<Document> newCollection = database.getCollection("NEW_COLLECTION_NAME");

    String pageState = null;
    int migratedCount = 0;

    // Use an empty filter to migrate all documents
    Filter filter = null;

    // You must explicitly include $vectorize.
    // $vector is excluded by default.
    // _id and any other fields that don't start with $ are included by default.
    Projection projection = new Projection("$vectorize", true);

    while (true) {
      Page<Document> page =
          oldCollection.findPage(
              filter, new CollectionFindOptions().projection(projection).pageState(pageState));

      List<Document> documents = page.getResults();

      pageState = page.getPageState().orElse(null);

      if (documents == null || documents.isEmpty()) {
        System.out.println("✅ No more documents. Migration complete.");
        break;
      }

      // Insert the documents to the new collection.
      // _id and the other field values (excluding $vector) will be the same.
      // $vector will automatically be generated based on the value of $vectorize.
      newCollection.insertMany(documents);

      migratedCount += documents.size();

      System.out.println("Migrated " + migratedCount + " documents. Page state: " + pageState);

      if (pageState == null) {
        System.out.println("✅ Reached final page. Migration complete.");
        break;
      }
    }
  }
}

Optionally, delete the collection that stores the old vector embeddings.

Migrate to a new embedding model for a collection

Was this helpful?

Give Feedback