From d0f0495c5c1ab37a1b87d185dbc04646b377d923 Mon Sep 17 00:00:00 2001 From: ImBenji Date: Sat, 18 Apr 2026 17:32:21 +0100 Subject: [PATCH] migrate article embeddings to support multi-model architecture and enhance data integrity --- src/embeddings.js | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/src/embeddings.js b/src/embeddings.js index 40e57c9..c76676b 100644 --- a/src/embeddings.js +++ b/src/embeddings.js @@ -111,6 +111,29 @@ function rebuildVec0IfModelChanged() { SELECT 1 FROM article_embedding_meta WHERE model != ? LIMIT 1 `).get(EMBEDDING_MODEL); + // also sync any store entries that didn't make it into vec0 + const missing = db.prepare(` + SELECT article_id, embedding FROM article_embedding_store + WHERE model = ? + AND NOT EXISTS (SELECT 1 FROM article_embeddings WHERE article_id = article_embedding_store.article_id) + `).all(EMBEDDING_MODEL); + + if (missing.length > 0) { + const insertVec = db.prepare(`INSERT OR IGNORE INTO article_embeddings (article_id, embedding) VALUES (?, ?)`); + const insertMeta = db.prepare(`INSERT OR IGNORE INTO article_embedding_meta (article_id, model) VALUES (?, ?)`); + + const sync = db.transaction(() => { + for (const row of missing) { + const vals = new Float32Array(row.embedding.buffer, row.embedding.byteOffset, row.embedding.byteLength / 4); + insertVec.run(BigInt(row.article_id), padEmbeddingForVec0(vals)); + insertMeta.run(row.article_id, EMBEDDING_MODEL); + } + }); + + sync(); + console.log(`synced ${missing.length} store embeddings into vec0`); + } + if (!stale) return; const storeCount = db.prepare(`