Migrate to a new embedding model for a table
Follow this migration guide if you want to switch embedding models for a column in your table.
|
This migration only works if you stored the original text in another column in your table. If you did not store the original text in your table, then you must modify the migration script below to get the original text from another source. If your new embedding model supports a larger context window, then you might also want to re-chunk your data. |
-
Add a new vector column with the desired embedding provider integration to your table. For examples, see Add a vector column and configure an embedding provider integration.
-
Populate the new vector column with the contents of the column that stores the original text.
The embedding provider integration for your new column will automatically generate vector embeddings based on the text.
For example:
-
Python
-
TypeScript
-
Java
from astrapy import DataAPIClient client = DataAPIClient("APPLICATION_TOKEN") database = client.get_database("API_ENDPOINT") table = database.get_table("TABLE_NAME") page_state = None migrated_count = 0 # Use an empty filter to find all rows filter = {} # You must include ALL primary key columns for your table primary_key_columns = [ "PRIMARY_KEY_1", "PRIMARY_KEY_2", ] original_text_column = "NAME_OF_ORIGINAL_TEXT_COLUMN" new_vector_column = "NAME_OF_NEW_VECTOR_COLUMN" # The projection should include ALL primary key columns # and the column that stores the original text projection = { **{column: True for column in primary_key_columns}, original_text_column: True, } while True: if page_state: cursor = table.find( filter, projection=projection, initial_page_state=page_state ) else: cursor = table.find(filter, projection=projection) page = cursor.fetch_next_page() rows = page.results page_state = page.next_page_state if not rows: print("✅ No more rows. Migration complete.") break # Build the updates updated_rows = [] for row in rows: if text := row.get(original_text_column): updated_row = { # Include the full primary key **{column: row[column] for column in primary_key_columns}, # Set the new vector column to the original text new_vector_column: text, } updated_rows.append(updated_row) # Inserting a row with a primary key that already exists in the table will # overwrite the specified column but leave unspecified columns unchanged. table.insert_many(updated_rows) migrated_count += len(updated_rows) print(f"Migrated {migrated_count} rows. Page state: {page_state}") if page_state is None: print("✅ Reached final page. Migration complete.") breakimport { DataAPIClient, TableInsertManyError } from "@datastax/astra-db-ts"; const client = new DataAPIClient("APPLICATION_TOKEN"); const database = client.db("API_ENDPOINT"); const table = database.table("TABLE_NAME"); let pageState = null; let migratedCount = 0; // Use an empty filter to find all rows const filter = {}; // You must include ALL primary key columns for your table const primaryKeyColumns = ["PRIMARY_KEY_1", "PRIMARY_KEY_2"]; const originalTextColumn = "NAME_OF_ORIGINAL_TEXT_COLUMN"; const newVectorColumn = "NAME_OF_NEW_VECTOR_COLUMN"; // The projection should include ALL primary key columns // and the column that stores the original text const projection = { ...Object.fromEntries(primaryKeyColumns.map((column) => [column, true])), [originalTextColumn]: true, }; (async function () { while (true) { const cursor = table.find(filter, { projection, ...(pageState ? { initialPageState: pageState } : {}), }); const page = await cursor.fetchNextPage(); const rows = page.result; pageState = page.nextPageState; if (!rows.length) { console.log("✅ No more rows. Migration complete."); break; } // Build the updates let updatedRows = []; for (const row of rows) { const text = row[originalTextColumn]; if (text) { const updatedRow = { // Include the full primary key ...Object.fromEntries( primaryKeyColumns.map((column) => [column, row[column]]), ), // Set the new vector column to the original text [newVectorColumn]: text, }; updatedRows.push(updatedRow); } } try { // Inserting a row with a primary key that already exists in the table will // overwrite the specified column but leave unspecified columns unchanged. await table.insertMany(rows); } catch (error) { if (error instanceof TableInsertManyError) { console.log(error.insertedIds()); } } migratedCount += rows.length; console.log( "Migrated " + migratedCount + " rows. Page state: " + pageState, ); if (!pageState) { console.log("✅ Reached final page. Migration complete."); break; } } })();import com.datastax.astra.client.DataAPIClient; import com.datastax.astra.client.core.paging.Page; import com.datastax.astra.client.core.query.Filter; import com.datastax.astra.client.core.query.Projection; import com.datastax.astra.client.databases.Database; import com.datastax.astra.client.tables.Table; import com.datastax.astra.client.tables.commands.options.TableFindOptions; import com.datastax.astra.client.tables.definition.rows.Row; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.stream.Stream; public class Example { public static void main(String[] args) { Database database = new DataAPIClient("APPLICATION_TOKEN").getDatabase("API_ENDPOINT"); Table<Row> table = database.getTable("TABLE_NAME"); String pageState = null; int migratedCount = 0; // Use an empty filter to find all rows Filter filter = null; // You must include ALL primary key columns for your table String[] primaryKeyColumns = new String[] {"PRIMARY_KEY_1", "PRIMARY_KEY_2"}; String originalTextColumn = "NAME_OF_ORIGINAL_TEXT_COLUMN"; String newVectorColumn = "NAME_OF_NEW_VECTOR_COLUMN"; // The projection should include ALL primary key columns // and the column that stores the original text String[] projectedColumns = Stream.concat(Arrays.stream(primaryKeyColumns), Stream.of(originalTextColumn)) .toArray(String[]::new); while (true) { Page<Row> page = table.findPage( filter, new TableFindOptions() .projection(Projection.include(projectedColumns)) .pageState(pageState)); List<Row> rows = page.getResults(); pageState = page.getPageState().orElse(null); if (rows == null || rows.isEmpty()) { System.out.println("✅ No more rows. Migration complete."); break; } // Build the updates List<Row> updatedRows = new ArrayList<>(); for (Row row : rows) { Object text = row.get(originalTextColumn); if (text != null) { Row updatedRow = new Row(); // Include the full primary key for (String primaryKeyColumn : primaryKeyColumns) { updatedRow.put(primaryKeyColumn, row.get(primaryKeyColumn)); } // Set the new vector column to the original text updatedRow.put(newVectorColumn, text); updatedRows.add(updatedRow); } } // Inserting a row with a primary key that already exists in the table will // overwrite the specified column but leave unspecified columns unchanged. table.insertMany(updatedRows); migratedCount += updatedRows.size(); System.out.println("Migrated " + migratedCount + " rows. Page state: " + pageState); if (pageState == null) { System.out.println("✅ Reached final page. Migration complete."); break; } } } } -
-
Optionally, delete the column that stores the old vector embeddings.