Migrate to a new embedding model for a collection

Follow this migration guide if you want to switch embedding models for your collection.

This migration only works for documents that have a $vectorize field.

  1. Create a new collection with the desired embedding provider integration. For examples, see Create a collection that can automatically generate vector embeddings.

  2. Migrate your documents to the new collection.

    Exclude the $vector field from the migrated documents. The embedding provider integration for your new collection will automatically generate vector embeddings based on the $vectorize field and store them in the $vector field. Any documents without a $vectorize field will not have their $vector field automatically populated.

    For example:

    • Python

    • TypeScript

    • Java

    from astrapy import DataAPIClient
    
    client = DataAPIClient("APPLICATION_TOKEN")
    database = client.get_database("API_ENDPOINT")
    
    old_collection = database.get_collection("OLD_COLLECTION_NAME")
    new_collection = database.get_collection("NEW_COLLECTION_NAME")
    
    page_state = None
    migrated_count = 0
    
    # Use an empty filter to migrate all documents
    filter = {}
    
    # You must explicitly include $vectorize.
    # $vector is excluded by default.
    # _id and any other fields that don't start with $ are included by default.
    projection = {"$vectorize": True}
    
    while True:
        if page_state:
            cursor = old_collection.find(
                filter, projection=projection, initial_page_state=page_state
            )
        else:
            cursor = old_collection.find(filter, projection=projection)
    
        page = cursor.fetch_next_page()
        documents = page.results
        page_state = page.next_page_state
    
        if not documents:
            print("✅ No more documents. Migration complete.")
            break
    
        # Insert the documents to the new collection.
        # _id and the other field values (excluding $vector) will be the same.
        # $vector will automatically be generated based on the value $vectorize.
        new_collection.insert_many(documents)
    
        migrated_count += len(documents)
    
        print(f"Migrated {migrated_count} documents. Page state: {page_state}")
    
        if page_state is None:
            print("✅ Reached final page. Migration complete.")
            break
    import {
      DataAPIClient,
      CollectionInsertManyError,
    } from "@datastax/astra-db-ts";
    
    const client = new DataAPIClient("APPLICATION_TOKEN");
    const database = client.db("API_ENDPOINT");
    
    const oldCollection = database.collection("OLD_COLLECTION_NAME");
    const newCollection = database.collection("NEW_COLLECTION_NAME");
    
    let pageState = null;
    let migratedCount = 0;
    
    // Use an empty filter to migrate all documents
    const filter = {};
    
    // You must explicitly include $vectorize.
    // $vector is excluded by default.
    // _id and any other fields that don't start with $ are included by default.
    const projection = { $vectorize: true };
    
    (async function () {
      while (true) {
        const cursor = oldCollection.find(filter, {
          projection,
          ...(pageState ? { initialPageState: pageState } : {}),
        });
    
        const page = await cursor.fetchNextPage();
        const documents = page.result;
        pageState = page.nextPageState;
    
        if (!documents.length) {
          console.log("✅ No more documents. Migration complete.");
          break;
        }
    
        // Insert the documents to the new collection.
        // _id and the other field values (excluding $vector) will be the same.
        // $vector will automatically be generated based on the value of $vectorize.
        try {
          await newCollection.insertMany(documents);
        } catch (error) {
          if (error instanceof CollectionInsertManyError) {
            console.log(error.insertedIds());
          }
        }
    
        migratedCount += documents.length;
    
        console.log(
          `Migrated ${migratedCount} documents. Page state: ${pageState}`,
        );
    
        if (!pageState) {
          console.log("✅ Reached final page. Migration complete.");
          break;
        }
      }
    })();
    import com.datastax.astra.client.DataAPIClient;
    import com.datastax.astra.client.collections.Collection;
    import com.datastax.astra.client.collections.commands.options.CollectionFindOptions;
    import com.datastax.astra.client.collections.definition.documents.Document;
    import com.datastax.astra.client.core.paging.Page;
    import com.datastax.astra.client.core.query.Filter;
    import com.datastax.astra.client.core.query.Projection;
    import com.datastax.astra.client.databases.Database;
    import java.util.List;
    
    public class Example {
    
      public static void main(String[] args) {
    
        Database database = new DataAPIClient("APPLICATION_TOKEN").getDatabase("API_ENDPOINT");
    
        Collection<Document> oldCollection = database.getCollection("OLD_COLLECTION_NAME");
        Collection<Document> newCollection = database.getCollection("NEW_COLLECTION_NAME");
    
        String pageState = null;
        int migratedCount = 0;
    
        // Use an empty filter to migrate all documents
        Filter filter = null;
    
        // You must explicitly include $vectorize.
        // $vector is excluded by default.
        // _id and any other fields that don't start with $ are included by default.
        Projection projection = new Projection("$vectorize", true);
    
        while (true) {
          Page<Document> page =
              oldCollection.findPage(
                  filter, new CollectionFindOptions().projection(projection).pageState(pageState));
    
          List<Document> documents = page.getResults();
    
          pageState = page.getPageState().orElse(null);
    
          if (documents == null || documents.isEmpty()) {
            System.out.println("✅ No more documents. Migration complete.");
            break;
          }
    
          // Insert the documents to the new collection.
          // _id and the other field values (excluding $vector) will be the same.
          // $vector will automatically be generated based on the value of $vectorize.
          newCollection.insertMany(documents);
    
          migratedCount += documents.size();
    
          System.out.println("Migrated " + migratedCount + " documents. Page state: " + pageState);
    
          if (pageState == null) {
            System.out.println("✅ Reached final page. Migration complete.");
            break;
          }
        }
      }
    }
  3. Optionally, delete the collection that stores the old vector embeddings.

Was this helpful?

Give Feedback

How can we improve the documentation?

© 2025 DataStax, an IBM Company | Privacy policy | Terms of use | Manage Privacy Choices

Apache, Apache Cassandra, Cassandra, Apache Tomcat, Tomcat, Apache Lucene, Apache Solr, Apache Hadoop, Hadoop, Apache Pulsar, Pulsar, Apache Spark, Spark, Apache TinkerPop, TinkerPop, Apache Kafka and Kafka are either registered trademarks or trademarks of the Apache Software Foundation or its subsidiaries in Canada, the United States and/or other countries. Kubernetes is the registered trademark of the Linux Foundation.

General Inquiries: +1 (650) 389-6000, info@datastax.com