From 1e442df426441922fbefc584bd07e8104472d0e1 Mon Sep 17 00:00:00 2001 From: ImBenji Date: Sat, 18 Apr 2026 18:32:51 +0100 Subject: [PATCH] migrate article embeddings to support multi-model architecture and enhance data integrity --- src/embeddings.js | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/src/embeddings.js b/src/embeddings.js index ff2f5f6..a2c2f50 100644 --- a/src/embeddings.js +++ b/src/embeddings.js @@ -117,6 +117,47 @@ try { console.error('embedding store backfill failed:', err); } +// probe the API to get the real dimension count for the current model, then purge +// any store entries that don't match — handles the case where old embeddings +// got stamped with the wrong model name during migration +async function purgeWrongSizeEmbeddings() { + const apiKey = config.openRouter && config.openRouter.apiKey + ? String(config.openRouter.apiKey).trim() + : ''; + + if (!apiKey) return; + + try { + const probe = await requestEmbedding('probe'); + const expectedBytes = probe.length * 4; + + const stale = db.prepare(` + SELECT article_id FROM article_embedding_store + WHERE model = ? AND LENGTH(embedding) != ? + `).all(EMBEDDING_MODEL, expectedBytes); + + if (stale.length === 0) return; + + const deleteStore = db.prepare(`DELETE FROM article_embedding_store WHERE article_id = ? AND model = ?`); + const deleteMeta = db.prepare(`DELETE FROM article_embedding_meta WHERE article_id = ?`); + const deleteVec = db.prepare(`DELETE FROM article_embeddings WHERE article_id = ?`); + + db.transaction(() => { + for (const row of stale) { + deleteStore.run(row.article_id, EMBEDDING_MODEL); + deleteMeta.run(row.article_id); + deleteVec.run(BigInt(row.article_id)); + } + })(); + + console.log(`purged ${stale.length} wrong-size embeddings for model ${EMBEDDING_MODEL} (expected ${probe.length} dims)`); + } catch (err) { + console.error('embedding size validation failed:', err); + } +} + +purgeWrongSizeEmbeddings(); + // if the config model changed, rebuild the vec0 search index from store. // only proceeds if the store actually has embeddings for the new model. function rebuildVec0IfModelChanged() {