migrate article embeddings to support multi-model architecture and enhance data integrity
This commit is contained in:
parent
d0f0495c5c
commit
bdc7325387
1 changed files with 13 additions and 13 deletions
|
|
@ -85,6 +85,19 @@ const upsertQueryEmbedding = db.prepare(`
|
|||
created_at = datetime('now')
|
||||
`);
|
||||
|
||||
const VEC0_DIM = 8192;
|
||||
|
||||
function serializeEmbedding(values) {
|
||||
return Buffer.from(new Float32Array(values).buffer);
|
||||
}
|
||||
|
||||
function padEmbeddingForVec0(values) {
|
||||
if (values.length === VEC0_DIM) return serializeEmbedding(values);
|
||||
const padded = new Float32Array(VEC0_DIM);
|
||||
padded.set(values);
|
||||
return Buffer.from(padded.buffer);
|
||||
}
|
||||
|
||||
// backfill store from vec0 for any embeddings that predate multi-model support.
|
||||
// only runs when store is completely empty, so we never stamp the wrong model on existing data.
|
||||
try {
|
||||
|
|
@ -193,19 +206,6 @@ function buildEmbeddingInput(article) {
|
|||
return [title, description, content].join('\n\n');
|
||||
}
|
||||
|
||||
const VEC0_DIM = 8192;
|
||||
|
||||
function serializeEmbedding(values) {
|
||||
return Buffer.from(new Float32Array(values).buffer);
|
||||
}
|
||||
|
||||
function padEmbeddingForVec0(values) {
|
||||
if (values.length === VEC0_DIM) return serializeEmbedding(values);
|
||||
const padded = new Float32Array(VEC0_DIM);
|
||||
padded.set(values);
|
||||
return Buffer.from(padded.buffer);
|
||||
}
|
||||
|
||||
function normalizeQuery(input) {
|
||||
return String(input || '')
|
||||
.trim()
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue