migrate article embeddings to support multi-model architecture and enhance data integrity
This commit is contained in:
parent
3e74bd7286
commit
1e442df426
1 changed files with 41 additions and 0 deletions
|
|
@ -117,6 +117,47 @@ try {
|
||||||
console.error('embedding store backfill failed:', err);
|
console.error('embedding store backfill failed:', err);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// probe the API to get the real dimension count for the current model, then purge
|
||||||
|
// any store entries that don't match — handles the case where old embeddings
|
||||||
|
// got stamped with the wrong model name during migration
|
||||||
|
async function purgeWrongSizeEmbeddings() {
|
||||||
|
const apiKey = config.openRouter && config.openRouter.apiKey
|
||||||
|
? String(config.openRouter.apiKey).trim()
|
||||||
|
: '';
|
||||||
|
|
||||||
|
if (!apiKey) return;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const probe = await requestEmbedding('probe');
|
||||||
|
const expectedBytes = probe.length * 4;
|
||||||
|
|
||||||
|
const stale = db.prepare(`
|
||||||
|
SELECT article_id FROM article_embedding_store
|
||||||
|
WHERE model = ? AND LENGTH(embedding) != ?
|
||||||
|
`).all(EMBEDDING_MODEL, expectedBytes);
|
||||||
|
|
||||||
|
if (stale.length === 0) return;
|
||||||
|
|
||||||
|
const deleteStore = db.prepare(`DELETE FROM article_embedding_store WHERE article_id = ? AND model = ?`);
|
||||||
|
const deleteMeta = db.prepare(`DELETE FROM article_embedding_meta WHERE article_id = ?`);
|
||||||
|
const deleteVec = db.prepare(`DELETE FROM article_embeddings WHERE article_id = ?`);
|
||||||
|
|
||||||
|
db.transaction(() => {
|
||||||
|
for (const row of stale) {
|
||||||
|
deleteStore.run(row.article_id, EMBEDDING_MODEL);
|
||||||
|
deleteMeta.run(row.article_id);
|
||||||
|
deleteVec.run(BigInt(row.article_id));
|
||||||
|
}
|
||||||
|
})();
|
||||||
|
|
||||||
|
console.log(`purged ${stale.length} wrong-size embeddings for model ${EMBEDDING_MODEL} (expected ${probe.length} dims)`);
|
||||||
|
} catch (err) {
|
||||||
|
console.error('embedding size validation failed:', err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
purgeWrongSizeEmbeddings();
|
||||||
|
|
||||||
// if the config model changed, rebuild the vec0 search index from store.
|
// if the config model changed, rebuild the vec0 search index from store.
|
||||||
// only proceeds if the store actually has embeddings for the new model.
|
// only proceeds if the store actually has embeddings for the new model.
|
||||||
function rebuildVec0IfModelChanged() {
|
function rebuildVec0IfModelChanged() {
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue