Migrate to a new embedding model for a table

Follow this migration guide if you want to switch embedding models for a column in your table.

This migration only works if you stored the original text in another column in your table. If you did not store the original text in your table, then you must modify the migration script below to get the original text from another source.

If your new embedding model supports a larger context window, then you might also want to re-chunk your data.

  1. Add a new vector column with the desired embedding provider integration to your table. For examples, see Add a vector column and configure an embedding provider integration.

  2. Populate the new vector column with the contents of the column that stores the original text.

    The embedding provider integration for your new column will automatically generate vector embeddings based on the text.

    For example:

    • Python

    • TypeScript

    • Java

    from astrapy import DataAPIClient
    
    client = DataAPIClient("APPLICATION_TOKEN")
    database = client.get_database("API_ENDPOINT")
    
    table = database.get_table("TABLE_NAME")
    
    page_state = None
    migrated_count = 0
    
    # Use an empty filter to find all rows
    filter = {}
    
    # You must include ALL primary key columns for your table
    primary_key_columns = [
        "PRIMARY_KEY_1",
        "PRIMARY_KEY_2",
    ]
    
    original_text_column = "NAME_OF_ORIGINAL_TEXT_COLUMN"
    
    new_vector_column = "NAME_OF_NEW_VECTOR_COLUMN"
    
    # The projection should include ALL primary key columns
    # and the column that stores the original text
    projection = {
        **{column: True for column in primary_key_columns},
        original_text_column: True,
    }
    
    while True:
        if page_state:
            cursor = table.find(
                filter, projection=projection, initial_page_state=page_state
            )
        else:
            cursor = table.find(filter, projection=projection)
    
        page = cursor.fetch_next_page()
        rows = page.results
        page_state = page.next_page_state
    
        if not rows:
            print("✅ No more rows. Migration complete.")
            break
    
        # Build the updates
        updated_rows = []
        for row in rows:
            if text := row.get(original_text_column):
                updated_row = {
                    # Include the full primary key
                    **{column: row[column] for column in primary_key_columns},
                    # Set the new vector column to the original text
                    new_vector_column: text,
                }
                updated_rows.append(updated_row)
    
        # Inserting a row with a primary key that already exists in the table will
        # overwrite the specified column but leave unspecified columns unchanged.
        table.insert_many(updated_rows)
        migrated_count += len(updated_rows)
    
        print(f"Migrated {migrated_count} rows. Page state: {page_state}")
    
        if page_state is None:
            print("✅ Reached final page. Migration complete.")
            break
    import { DataAPIClient, TableInsertManyError } from "@datastax/astra-db-ts";
    
    const client = new DataAPIClient("APPLICATION_TOKEN");
    const database = client.db("API_ENDPOINT");
    
    const table = database.table("TABLE_NAME");
    
    let pageState = null;
    let migratedCount = 0;
    
    // Use an empty filter to find all rows
    const filter = {};
    
    // You must include ALL primary key columns for your table
    const primaryKeyColumns = ["PRIMARY_KEY_1", "PRIMARY_KEY_2"];
    
    const originalTextColumn = "NAME_OF_ORIGINAL_TEXT_COLUMN";
    
    const newVectorColumn = "NAME_OF_NEW_VECTOR_COLUMN";
    
    // The projection should include ALL primary key columns
    // and the column that stores the original text
    const projection = {
      ...Object.fromEntries(primaryKeyColumns.map((column) => [column, true])),
      [originalTextColumn]: true,
    };
    
    (async function () {
      while (true) {
        const cursor = table.find(filter, {
          projection,
          ...(pageState ? { initialPageState: pageState } : {}),
        });
    
        const page = await cursor.fetchNextPage();
        const rows = page.result;
        pageState = page.nextPageState;
    
        if (!rows.length) {
          console.log("✅ No more rows. Migration complete.");
          break;
        }
    
        // Build the updates
        let updatedRows = [];
        for (const row of rows) {
          const text = row[originalTextColumn];
          if (text) {
            const updatedRow = {
              // Include the full primary key
              ...Object.fromEntries(
                primaryKeyColumns.map((column) => [column, row[column]]),
              ),
    
              // Set the new vector column to the original text
              [newVectorColumn]: text,
            };
            updatedRows.push(updatedRow);
          }
        }
    
        try {
          // Inserting a row with a primary key that already exists in the table will
          // overwrite the specified column but leave unspecified columns unchanged.
          await table.insertMany(rows);
        } catch (error) {
          if (error instanceof TableInsertManyError) {
            console.log(error.insertedIds());
          }
        }
    
        migratedCount += rows.length;
    
        console.log(
          "Migrated " + migratedCount + " rows. Page state: " + pageState,
        );
    
        if (!pageState) {
          console.log("✅ Reached final page. Migration complete.");
          break;
        }
      }
    })();
    import com.datastax.astra.client.DataAPIClient;
    import com.datastax.astra.client.core.paging.Page;
    import com.datastax.astra.client.core.query.Filter;
    import com.datastax.astra.client.core.query.Projection;
    import com.datastax.astra.client.databases.Database;
    import com.datastax.astra.client.tables.Table;
    import com.datastax.astra.client.tables.commands.options.TableFindOptions;
    import com.datastax.astra.client.tables.definition.rows.Row;
    import java.util.ArrayList;
    import java.util.Arrays;
    import java.util.List;
    import java.util.stream.Stream;
    
    public class Example {
    
      public static void main(String[] args) {
    
        Database database = new DataAPIClient("APPLICATION_TOKEN").getDatabase("API_ENDPOINT");
    
        Table<Row> table = database.getTable("TABLE_NAME");
    
        String pageState = null;
        int migratedCount = 0;
    
        // Use an empty filter to find all rows
        Filter filter = null;
    
        // You must include ALL primary key columns for your table
        String[] primaryKeyColumns = new String[] {"PRIMARY_KEY_1", "PRIMARY_KEY_2"};
    
        String originalTextColumn = "NAME_OF_ORIGINAL_TEXT_COLUMN";
    
        String newVectorColumn = "NAME_OF_NEW_VECTOR_COLUMN";
    
        // The projection should include ALL primary key columns
        // and the column that stores the original text
        String[] projectedColumns =
            Stream.concat(Arrays.stream(primaryKeyColumns), Stream.of(originalTextColumn))
                .toArray(String[]::new);
    
        while (true) {
          Page<Row> page =
              table.findPage(
                  filter,
                  new TableFindOptions()
                      .projection(Projection.include(projectedColumns))
                      .pageState(pageState));
    
          List<Row> rows = page.getResults();
    
          pageState = page.getPageState().orElse(null);
    
          if (rows == null || rows.isEmpty()) {
            System.out.println("✅ No more rows. Migration complete.");
            break;
          }
    
          // Build the updates
          List<Row> updatedRows = new ArrayList<>();
          for (Row row : rows) {
            Object text = row.get(originalTextColumn);
    
            if (text != null) {
              Row updatedRow = new Row();
    
              // Include the full primary key
              for (String primaryKeyColumn : primaryKeyColumns) {
                updatedRow.put(primaryKeyColumn, row.get(primaryKeyColumn));
              }
    
              // Set the new vector column to the original text
              updatedRow.put(newVectorColumn, text);
    
              updatedRows.add(updatedRow);
            }
          }
    
          // Inserting a row with a primary key that already exists in the table will
          // overwrite the specified column but leave unspecified columns unchanged.
          table.insertMany(updatedRows);
          migratedCount += updatedRows.size();
    
          System.out.println("Migrated " + migratedCount + " rows. Page state: " + pageState);
    
          if (pageState == null) {
            System.out.println("✅ Reached final page. Migration complete.");
            break;
          }
        }
      }
    }
  3. Optionally, delete the column that stores the old vector embeddings.

Was this helpful?

Give Feedback

How can we improve the documentation?

© 2025 DataStax, an IBM Company | Privacy policy | Terms of use | Manage Privacy Choices

Apache, Apache Cassandra, Cassandra, Apache Tomcat, Tomcat, Apache Lucene, Apache Solr, Apache Hadoop, Hadoop, Apache Pulsar, Pulsar, Apache Spark, Spark, Apache TinkerPop, TinkerPop, Apache Kafka and Kafka are either registered trademarks or trademarks of the Apache Software Foundation or its subsidiaries in Canada, the United States and/or other countries. Kubernetes is the registered trademark of the Linux Foundation.

General Inquiries: +1 (650) 389-6000, info@datastax.com