// embedding generation and cosine similarity for the intelligence layer async function generateEmbedding(text, openRouterConfig) { const response = await fetch("https://openrouter.ai/api/v1/embeddings", { method: "POST", headers: { "Authorization": `Bearer ${openRouterConfig.apiKey}`, "Content-Type": "application/json", }, body: JSON.stringify({ model: openRouterConfig.embeddingModel, input: text, }), }); if (!response.ok) { let msg = `embedding request failed with ${response.status}`; try { const payload = await response.json(); if (payload?.error?.message) msg = payload.error.message; } catch (_) {} throw new Error(msg); } const payload = await response.json(); const embedding = payload?.data?.[0]?.embedding; if (!Array.isArray(embedding) || embedding.length === 0) { throw new Error("invalid embedding response"); } return embedding; } // Float32 BLOB -> Float32Array function blobToFloat32(buf) { return new Float32Array(buf.buffer, buf.byteOffset, buf.byteLength / 4); } function cosineSimilarity(a, b) { if (a.length !== b.length) { // if dims differ just use the shorter length — handles edge cases gracefully const len = Math.min(a.length, b.length); a = a.subarray(0, len); b = b.subarray(0, len); } let dot = 0, normA = 0, normB = 0; for (let i = 0; i < a.length; i++) { dot += a[i] * b[i]; normA += a[i] * a[i]; normB += b[i] * b[i]; } const denom = Math.sqrt(normA) * Math.sqrt(normB); return denom === 0 ? 0 : dot / denom; } // generates company embeddings for any tracked company that doesnt have one yet async function ensureCompanyEmbeddings(intelligenceDb, openRouterConfig) { const companies = intelligenceDb.prepare("SELECT * FROM tracked_companies").all(); const getEmbed = intelligenceDb.prepare( "SELECT embedding FROM company_embeddings WHERE company_id = ?" ); const upsertEmbed = intelligenceDb.prepare(` INSERT INTO company_embeddings (company_id, embedding, model, generated_at) VALUES (?, ?, ?, CURRENT_TIMESTAMP) ON CONFLICT(company_id) DO UPDATE SET embedding = excluded.embedding, model = excluded.model, generated_at = excluded.generated_at `); for (const company of companies) { const existing = getEmbed.get(company.id); if (existing) continue; const text = `${company.name} is a company with ticker ${company.ticker}`; try { const embedding = await generateEmbedding(text, openRouterConfig); const buf = Buffer.from(new Float32Array(embedding).buffer); upsertEmbed.run(company.id, buf, openRouterConfig.embeddingModel); console.log(`[embeddings] generated embedding for ${company.name}`); } catch (err) { console.error(`[embeddings] failed for ${company.name}:`, err.message); } } } // returns matched company objects from tracked_companies // checks cosine similarity between each company embedding and // the raw embeddings of all articles in the event function findMatchedCompaniesByEmbedding(eventArticleIds, archiveDb, intelligenceDb, config) { const threshold = config.intelligence?.similarityThreshold ?? 0.35; const model = config.openRouter?.embeddingModel; const companies = intelligenceDb.prepare( "SELECT id, name, ticker FROM company_embeddings ce JOIN tracked_companies tc ON tc.id = ce.company_id" ).all(); if (companies.length === 0) return []; // load article embeddings from archive — only articles that have one const articleEmbeddings = []; for (const articleId of eventArticleIds) { const row = archiveDb.prepare( "SELECT embedding FROM article_embedding_store WHERE article_id = ? AND model = ?" ).get(articleId, model); if (row) articleEmbeddings.push(blobToFloat32(row.embedding)); } if (articleEmbeddings.length === 0) return []; const matched = []; for (const company of companies) { const companyRow = intelligenceDb.prepare( "SELECT embedding FROM company_embeddings WHERE company_id = ?" ).get(company.id); if (!companyRow) continue; const companyVec = blobToFloat32(companyRow.embedding); const hit = articleEmbeddings.some(articleVec => { const sim = cosineSimilarity(companyVec, articleVec); return sim >= threshold; }); if (hit) matched.push(company); } return matched; } module.exports = { generateEmbedding, ensureCompanyEmbeddings, findMatchedCompaniesByEmbedding };