Migrate to a new embedding model for a collection
Follow this migration guide if you want to switch embedding models for your collection.
|
This migration only works for documents that have a |
-
Create a new collection with the desired embedding provider integration. For examples, see Create a collection that can automatically generate vector embeddings.
-
Migrate your documents to the new collection.
Exclude the
$vectorfield from the migrated documents. The embedding provider integration for your new collection will automatically generate vector embeddings based on the$vectorizefield and store them in the$vectorfield. Any documents without a$vectorizefield will not have their$vectorfield automatically populated.For example:
-
Python
-
TypeScript
-
Java
from astrapy import DataAPIClient client = DataAPIClient("APPLICATION_TOKEN") database = client.get_database("API_ENDPOINT") old_collection = database.get_collection("OLD_COLLECTION_NAME") new_collection = database.get_collection("NEW_COLLECTION_NAME") page_state = None migrated_count = 0 # Use an empty filter to migrate all documents filter = {} # You must explicitly include $vectorize. # $vector is excluded by default. # _id and any other fields that don't start with $ are included by default. projection = {"$vectorize": True} while True: if page_state: cursor = old_collection.find( filter, projection=projection, initial_page_state=page_state ) else: cursor = old_collection.find(filter, projection=projection) page = cursor.fetch_next_page() documents = page.results page_state = page.next_page_state if not documents: print("✅ No more documents. Migration complete.") break # Insert the documents to the new collection. # _id and the other field values (excluding $vector) will be the same. # $vector will automatically be generated based on the value $vectorize. new_collection.insert_many(documents) migrated_count += len(documents) print(f"Migrated {migrated_count} documents. Page state: {page_state}") if page_state is None: print("✅ Reached final page. Migration complete.") breakimport { DataAPIClient, CollectionInsertManyError, } from "@datastax/astra-db-ts"; const client = new DataAPIClient("APPLICATION_TOKEN"); const database = client.db("API_ENDPOINT"); const oldCollection = database.collection("OLD_COLLECTION_NAME"); const newCollection = database.collection("NEW_COLLECTION_NAME"); let pageState = null; let migratedCount = 0; // Use an empty filter to migrate all documents const filter = {}; // You must explicitly include $vectorize. // $vector is excluded by default. // _id and any other fields that don't start with $ are included by default. const projection = { $vectorize: true }; (async function () { while (true) { const cursor = oldCollection.find(filter, { projection, ...(pageState ? { initialPageState: pageState } : {}), }); const page = await cursor.fetchNextPage(); const documents = page.result; pageState = page.nextPageState; if (!documents.length) { console.log("✅ No more documents. Migration complete."); break; } // Insert the documents to the new collection. // _id and the other field values (excluding $vector) will be the same. // $vector will automatically be generated based on the value of $vectorize. try { await newCollection.insertMany(documents); } catch (error) { if (error instanceof CollectionInsertManyError) { console.log(error.insertedIds()); } } migratedCount += documents.length; console.log( `Migrated ${migratedCount} documents. Page state: ${pageState}`, ); if (!pageState) { console.log("✅ Reached final page. Migration complete."); break; } } })();import com.datastax.astra.client.DataAPIClient; import com.datastax.astra.client.collections.Collection; import com.datastax.astra.client.collections.commands.options.CollectionFindOptions; import com.datastax.astra.client.collections.definition.documents.Document; import com.datastax.astra.client.core.paging.Page; import com.datastax.astra.client.core.query.Filter; import com.datastax.astra.client.core.query.Projection; import com.datastax.astra.client.databases.Database; import java.util.List; public class Example { public static void main(String[] args) { Database database = new DataAPIClient("APPLICATION_TOKEN").getDatabase("API_ENDPOINT"); Collection<Document> oldCollection = database.getCollection("OLD_COLLECTION_NAME"); Collection<Document> newCollection = database.getCollection("NEW_COLLECTION_NAME"); String pageState = null; int migratedCount = 0; // Use an empty filter to migrate all documents Filter filter = null; // You must explicitly include $vectorize. // $vector is excluded by default. // _id and any other fields that don't start with $ are included by default. Projection projection = new Projection("$vectorize", true); while (true) { Page<Document> page = oldCollection.findPage( filter, new CollectionFindOptions().projection(projection).pageState(pageState)); List<Document> documents = page.getResults(); pageState = page.getPageState().orElse(null); if (documents == null || documents.isEmpty()) { System.out.println("✅ No more documents. Migration complete."); break; } // Insert the documents to the new collection. // _id and the other field values (excluding $vector) will be the same. // $vector will automatically be generated based on the value of $vectorize. newCollection.insertMany(documents); migratedCount += documents.size(); System.out.println("Migrated " + migratedCount + " documents. Page state: " + pageState); if (pageState == null) { System.out.println("✅ Reached final page. Migration complete."); break; } } } } -
-
Optionally, delete the collection that stores the old vector embeddings.