diff --git a/src/embeddings.js b/src/embeddings.js index 40e57c9..c76676b 100644 --- a/src/embeddings.js +++ b/src/embeddings.js @@ -111,6 +111,29 @@ function rebuildVec0IfModelChanged() { SELECT 1 FROM article_embedding_meta WHERE model != ? LIMIT 1 `).get(EMBEDDING_MODEL); + // also sync any store entries that didn't make it into vec0 + const missing = db.prepare(` + SELECT article_id, embedding FROM article_embedding_store + WHERE model = ? + AND NOT EXISTS (SELECT 1 FROM article_embeddings WHERE article_id = article_embedding_store.article_id) + `).all(EMBEDDING_MODEL); + + if (missing.length > 0) { + const insertVec = db.prepare(`INSERT OR IGNORE INTO article_embeddings (article_id, embedding) VALUES (?, ?)`); + const insertMeta = db.prepare(`INSERT OR IGNORE INTO article_embedding_meta (article_id, model) VALUES (?, ?)`); + + const sync = db.transaction(() => { + for (const row of missing) { + const vals = new Float32Array(row.embedding.buffer, row.embedding.byteOffset, row.embedding.byteLength / 4); + insertVec.run(BigInt(row.article_id), padEmbeddingForVec0(vals)); + insertMeta.run(row.article_id, EMBEDDING_MODEL); + } + }); + + sync(); + console.log(`synced ${missing.length} store embeddings into vec0`); + } + if (!stale) return; const storeCount = db.prepare(`