migrate article embeddings to support multi-model architecture and enhance data integrity
This commit is contained in:
+13
-13
@@ -85,6 +85,19 @@ const upsertQueryEmbedding = db.prepare(`
|
||||
created_at = datetime('now')
|
||||
`);
|
||||
|
||||
const VEC0_DIM = 8192;
|
||||
|
||||
function serializeEmbedding(values) {
|
||||
return Buffer.from(new Float32Array(values).buffer);
|
||||
}
|
||||
|
||||
function padEmbeddingForVec0(values) {
|
||||
if (values.length === VEC0_DIM) return serializeEmbedding(values);
|
||||
const padded = new Float32Array(VEC0_DIM);
|
||||
padded.set(values);
|
||||
return Buffer.from(padded.buffer);
|
||||
}
|
||||
|
||||
// backfill store from vec0 for any embeddings that predate multi-model support.
|
||||
// only runs when store is completely empty, so we never stamp the wrong model on existing data.
|
||||
try {
|
||||
@@ -193,19 +206,6 @@ function buildEmbeddingInput(article) {
|
||||
return [title, description, content].join('\n\n');
|
||||
}
|
||||
|
||||
const VEC0_DIM = 8192;
|
||||
|
||||
function serializeEmbedding(values) {
|
||||
return Buffer.from(new Float32Array(values).buffer);
|
||||
}
|
||||
|
||||
function padEmbeddingForVec0(values) {
|
||||
if (values.length === VEC0_DIM) return serializeEmbedding(values);
|
||||
const padded = new Float32Array(VEC0_DIM);
|
||||
padded.set(values);
|
||||
return Buffer.from(padded.buffer);
|
||||
}
|
||||
|
||||
function normalizeQuery(input) {
|
||||
return String(input || '')
|
||||
.trim()
|
||||
|
||||
Reference in New Issue
Block a user