Migrate to a new embedding model for a table

Follow this migration guide if you want to switch embedding models for a column in your table.

This migration only works if you stored the original text in another column in your table. If you did not store the original text in your table, then you must modify the migration script below to get the original text from another source.

If your new embedding model supports a larger context window, then you might also want to re-chunk your data.

Add a new vector column with the desired embedding provider integration to your table. For examples, see Add a vector column and configure an embedding provider integration.

Populate the new vector column with the contents of the column that stores the original text.

The embedding provider integration for your new column will automatically generate vector embeddings based on the text.

For example:

Python
TypeScript
Java

from astrapy import DataAPIClient

client = DataAPIClient("APPLICATION_TOKEN")
database = client.get_database("API_ENDPOINT")

table = database.get_table("TABLE_NAME")

page_state = None
migrated_count = 0

# Use an empty filter to find all rows
filter = {}

# You must include ALL primary key columns for your table
primary_key_columns = [
    "PRIMARY_KEY_1",
    "PRIMARY_KEY_2",
]

original_text_column = "NAME_OF_ORIGINAL_TEXT_COLUMN"

new_vector_column = "NAME_OF_NEW_VECTOR_COLUMN"

# The projection should include ALL primary key columns
# and the column that stores the original text
projection = {
    **{column: True for column in primary_key_columns},
    original_text_column: True,
}

while True:
    if page_state:
        cursor = table.find(
            filter, projection=projection, initial_page_state=page_state
        )
    else:
        cursor = table.find(filter, projection=projection)

    page = cursor.fetch_next_page()
    rows = page.results
    page_state = page.next_page_state

    if not rows:
        print("✅ No more rows. Migration complete.")
        break

    # Build the updates
    updated_rows = []
    for row in rows:
        if text := row.get(original_text_column):
            updated_row = {
                # Include the full primary key
                **{column: row[column] for column in primary_key_columns},
                # Set the new vector column to the original text
                new_vector_column: text,
            }
            updated_rows.append(updated_row)

    # Inserting a row with a primary key that already exists in the table will
    # overwrite the specified column but leave unspecified columns unchanged.
    table.insert_many(updated_rows)
    migrated_count += len(updated_rows)

    print(f"Migrated {migrated_count} rows. Page state: {page_state}")

    if page_state is None:
        print("✅ Reached final page. Migration complete.")
        break

import { DataAPIClient, TableInsertManyError } from "@datastax/astra-db-ts";

const client = new DataAPIClient();
const database = client.db("API_ENDPOINT", {
  token: "APPLICATION_TOKEN",
});

const table = database.table("TABLE_NAME");

let pageState = null;
let migratedCount = 0;

// Use an empty filter to find all rows
const filter = {};

// You must include ALL primary key columns for your table
const primaryKeyColumns = ["PRIMARY_KEY_1", "PRIMARY_KEY_2"];

const originalTextColumn = "NAME_OF_ORIGINAL_TEXT_COLUMN";

const newVectorColumn = "NAME_OF_NEW_VECTOR_COLUMN";

// The projection should include ALL primary key columns
// and the column that stores the original text
const projection = {
  ...Object.fromEntries(primaryKeyColumns.map((column) => [column, true])),
  [originalTextColumn]: true,
};

(async function () {
  while (true) {
    const cursor = table.find(filter, {
      projection,
      ...(pageState ? { initialPageState: pageState } : {}),
    });

    const page = await cursor.fetchNextPage();
    const rows = page.result;
    pageState = page.nextPageState;

    if (!rows.length) {
      console.log("✅ No more rows. Migration complete.");
      break;
    }

    // Build the updates
    let updatedRows = [];
    for (const row of rows) {
      const text = row[originalTextColumn];
      if (text) {
        const updatedRow = {
          // Include the full primary key
          ...Object.fromEntries(
            primaryKeyColumns.map((column) => [column, row[column]]),
          ),

          // Set the new vector column to the original text
          [newVectorColumn]: text,
        };
        updatedRows.push(updatedRow);
      }
    }

    try {
      // Inserting a row with a primary key that already exists in the table will
      // overwrite the specified column but leave unspecified columns unchanged.
      await table.insertMany(rows);
    } catch (error) {
      if (error instanceof TableInsertManyError) {
        console.log(error.insertedIds());
      }
    }

    migratedCount += rows.length;

    console.log(
      "Migrated " + migratedCount + " rows. Page state: " + pageState,
    );

    if (!pageState) {
      console.log("✅ Reached final page. Migration complete.");
      break;
    }
  }
})();

import com.datastax.astra.client.DataAPIClient;
import com.datastax.astra.client.core.paging.Page;
import com.datastax.astra.client.core.query.Filter;
import com.datastax.astra.client.core.query.Projection;
import com.datastax.astra.client.databases.Database;
import com.datastax.astra.client.tables.Table;
import com.datastax.astra.client.tables.commands.options.TableFindOptions;
import com.datastax.astra.client.tables.definition.rows.Row;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Stream;

public class Example {

  public static void main(String[] args) {

    Database database = new DataAPIClient("APPLICATION_TOKEN").getDatabase("API_ENDPOINT");

    Table<Row> table = database.getTable("TABLE_NAME");

    String pageState = null;
    int migratedCount = 0;

    // Use an empty filter to find all rows
    Filter filter = null;

    // You must include ALL primary key columns for your table
    String[] primaryKeyColumns = new String[] {"PRIMARY_KEY_1", "PRIMARY_KEY_2"};

    String originalTextColumn = "NAME_OF_ORIGINAL_TEXT_COLUMN";

    String newVectorColumn = "NAME_OF_NEW_VECTOR_COLUMN";

    // The projection should include ALL primary key columns
    // and the column that stores the original text
    String[] projectedColumns =
        Stream.concat(Arrays.stream(primaryKeyColumns), Stream.of(originalTextColumn))
            .toArray(String[]::new);

    while (true) {
      Page<Row> page =
          table.findPage(
              filter,
              new TableFindOptions()
                  .projection(Projection.include(projectedColumns))
                  .pageState(pageState));

      List<Row> rows = page.getResults();

      pageState = page.getPageState().orElse(null);

      if (rows == null || rows.isEmpty()) {
        System.out.println("✅ No more rows. Migration complete.");
        break;
      }

      // Build the updates
      List<Row> updatedRows = new ArrayList<>();
      for (Row row : rows) {
        Object text = row.get(originalTextColumn);

        if (text != null) {
          Row updatedRow = new Row();

          // Include the full primary key
          for (String primaryKeyColumn : primaryKeyColumns) {
            updatedRow.put(primaryKeyColumn, row.get(primaryKeyColumn));
          }

          // Set the new vector column to the original text
          updatedRow.put(newVectorColumn, text);

          updatedRows.add(updatedRow);
        }
      }

      // Inserting a row with a primary key that already exists in the table will
      // overwrite the specified column but leave unspecified columns unchanged.
      table.insertMany(updatedRows);
      migratedCount += updatedRows.size();

      System.out.println("Migrated " + migratedCount + " rows. Page state: " + pageState);

      if (pageState == null) {
        System.out.println("✅ Reached final page. Migration complete.");
        break;
      }
    }
  }
}

Optionally, delete the column that stores the old vector embeddings.

Migrate to a new embedding model for a table

Was this helpful?

Give Feedback