migrate article embeddings to support multi-model architecture and enhance data integrity

This commit is contained in:
ImBenji 2026-04-18 18:32:51 +01:00
parent 3e74bd7286
commit 1e442df426

View file

@ -117,6 +117,47 @@ try {
console.error('embedding store backfill failed:', err);
}
// probe the API to get the real dimension count for the current model, then purge
// any store entries that don't match — handles the case where old embeddings
// got stamped with the wrong model name during migration
async function purgeWrongSizeEmbeddings() {
const apiKey = config.openRouter && config.openRouter.apiKey
? String(config.openRouter.apiKey).trim()
: '';
if (!apiKey) return;
try {
const probe = await requestEmbedding('probe');
const expectedBytes = probe.length * 4;
const stale = db.prepare(`
SELECT article_id FROM article_embedding_store
WHERE model = ? AND LENGTH(embedding) != ?
`).all(EMBEDDING_MODEL, expectedBytes);
if (stale.length === 0) return;
const deleteStore = db.prepare(`DELETE FROM article_embedding_store WHERE article_id = ? AND model = ?`);
const deleteMeta = db.prepare(`DELETE FROM article_embedding_meta WHERE article_id = ?`);
const deleteVec = db.prepare(`DELETE FROM article_embeddings WHERE article_id = ?`);
db.transaction(() => {
for (const row of stale) {
deleteStore.run(row.article_id, EMBEDDING_MODEL);
deleteMeta.run(row.article_id);
deleteVec.run(BigInt(row.article_id));
}
})();
console.log(`purged ${stale.length} wrong-size embeddings for model ${EMBEDDING_MODEL} (expected ${probe.length} dims)`);
} catch (err) {
console.error('embedding size validation failed:', err);
}
}
purgeWrongSizeEmbeddings();
// if the config model changed, rebuild the vec0 search index from store.
// only proceeds if the store actually has embeddings for the new model.
function rebuildVec0IfModelChanged() {