Duriin-API/workers/db.js

327 lines
13 KiB
JavaScript

const Database = require("better-sqlite3");
const sqliteVec = require("sqlite-vec");
let archiveDb = null;
let intelligenceDb = null;
function getArchiveDb(dbPath) {
if (!archiveDb) {
archiveDb = new Database(dbPath, { readonly: true });
sqliteVec.load(archiveDb);
archiveDb.pragma("journal_mode = WAL");
}
return archiveDb;
}
function getIntelligenceDb(dbPath) {
if (!intelligenceDb) {
intelligenceDb = new Database(dbPath);
intelligenceDb.pragma("journal_mode = WAL");
}
return intelligenceDb;
}
function runMigrations(db) {
db.exec(`
CREATE TABLE IF NOT EXISTS cursors (
key TEXT PRIMARY KEY,
value INTEGER
);
CREATE TABLE IF NOT EXISTS tracked_companies (
id INTEGER PRIMARY KEY,
name TEXT,
ticker TEXT,
aliases TEXT
);
CREATE TABLE IF NOT EXISTS article_queue (
id INTEGER PRIMARY KEY,
article_id INTEGER UNIQUE,
status TEXT DEFAULT 'pending',
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
updated_at DATETIME
);
CREATE TABLE IF NOT EXISTS event_knowledge (
id INTEGER PRIMARY KEY,
event_id INTEGER,
company_id INTEGER,
type TEXT,
data TEXT,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE IF NOT EXISTS company_embeddings (
company_id INTEGER PRIMARY KEY,
embedding BLOB NOT NULL,
model TEXT NOT NULL,
generated_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE IF NOT EXISTS event_predictions (
id INTEGER PRIMARY KEY,
event_id INTEGER,
company_id INTEGER,
type TEXT,
direction TEXT,
magnitude TEXT,
timeframe TEXT,
rationale TEXT,
event_date TEXT,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE IF NOT EXISTS company_facts (
id INTEGER PRIMARY KEY,
company_id INTEGER,
type TEXT,
claim TEXT,
confidence TEXT,
confirmation_count INTEGER DEFAULT 1,
first_seen_at DATETIME,
last_seen_at DATETIME,
last_event_id INTEGER,
supporting_event_ids TEXT
);
CREATE UNIQUE INDEX IF NOT EXISTS idx_company_facts_unique ON company_facts (company_id, claim);
CREATE TABLE IF NOT EXISTS company_relationships (
id INTEGER PRIMARY KEY,
from_company_id INTEGER,
relationship_type TEXT,
to_entity TEXT,
to_company_id INTEGER,
confidence TEXT,
confirmation_count INTEGER DEFAULT 1,
first_seen_at DATETIME,
last_seen_at DATETIME,
supporting_event_ids TEXT
);
CREATE UNIQUE INDEX IF NOT EXISTS idx_company_relationships_unique
ON company_relationships (from_company_id, relationship_type, to_entity);
`);
}
function runColumnMigrations(db) {
try { db.exec("ALTER TABLE event_predictions ADD COLUMN event_date TEXT"); } catch (_) {}
try { db.exec("ALTER TABLE event_knowledge ADD COLUMN event_date TEXT"); } catch (_) {}
db.exec(`
CREATE TABLE IF NOT EXISTS worker_events (
id INTEGER PRIMARY KEY,
worker TEXT NOT NULL,
completed_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX IF NOT EXISTS idx_worker_events_lookup ON worker_events (worker, completed_at);
`);
db.exec(`
CREATE TABLE IF NOT EXISTS trade_signals (
id INTEGER PRIMARY KEY,
company_id INTEGER,
signal TEXT,
confidence TEXT,
timeframe TEXT,
risk_level TEXT,
risk_factors TEXT,
summary TEXT,
key_drivers TEXT,
supporting_prediction_ids TEXT,
generated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
window_days INTEGER
);
`);
// prune rows older than 1 hour so the table doesnt grow unbounded
db.exec(`DELETE FROM worker_events WHERE completed_at < datetime('now', '-1 hour')`);
}
function seedCompanies(db) {
const exists = db.prepare("SELECT id FROM tracked_companies WHERE name = ?");
const insert = db.prepare(
"INSERT INTO tracked_companies (name, ticker, aliases) VALUES (?, ?, ?)"
);
const companies = [
// semiconductors
{ name: "NVIDIA", ticker: "NVDA", aliases: ["Nvidia Corporation"] },
{ name: "TSMC", ticker: "TSM", aliases: ["Taiwan Semiconductor", "Taiwan Semiconductor Manufacturing Company"] },
{ name: "ASML", ticker: "ASML", aliases: ["ASML Holding"] },
{ name: "Intel", ticker: "INTC", aliases: ["Intel Corporation"] },
{ name: "AMD", ticker: "AMD", aliases: ["Advanced Micro Devices"] },
{ name: "Qualcomm", ticker: "QCOM", aliases: ["Qualcomm Incorporated"] },
{ name: "Broadcom", ticker: "AVGO", aliases: ["Broadcom Inc"] },
{ name: "Micron", ticker: "MU", aliases: ["Micron Technology"] },
{ name: "Texas Instruments", ticker: "TXN", aliases: ["TI"] },
{ name: "Applied Materials", ticker: "AMAT", aliases: ["Applied Materials Inc"] },
{ name: "Lam Research", ticker: "LRCX", aliases: ["Lam Research Corporation"] },
{ name: "KLA Corporation", ticker: "KLAC", aliases: ["KLA"] },
{ name: "Samsung", ticker: "005930.KS", aliases: ["Samsung Electronics", "Samsung Group"] },
{ name: "SK Hynix", ticker: "000660.KS", aliases: ["Hynix"] },
// big tech / cloud
{ name: "Microsoft", ticker: "MSFT", aliases: ["Microsoft Corporation"] },
{ name: "Apple", ticker: "AAPL", aliases: ["Apple Inc"] },
{ name: "Alphabet", ticker: "GOOGL", aliases: ["Google", "Google LLC", "DeepMind"] },
{ name: "Amazon", ticker: "AMZN", aliases: ["Amazon Web Services", "AWS"] },
{ name: "Meta", ticker: "META", aliases: ["Meta Platforms", "Facebook"] },
{ name: "Tesla", ticker: "TSLA", aliases: ["Tesla Inc", "Tesla Motors"] },
// AI / infrastructure
{ name: "OpenAI", ticker: "OPENAI", aliases: ["Open AI"] },
{ name: "Anthropic", ticker: "ANTHROPIC", aliases: [] },
{ name: "xAI", ticker: "XAI", aliases: ["x.AI"] },
{ name: "Palantir", ticker: "PLTR", aliases: ["Palantir Technologies"] },
{ name: "Super Micro Computer", ticker: "SMCI", aliases: ["Supermicro", "SMCI"] },
{ name: "Arista Networks", ticker: "ANET", aliases: ["Arista"] },
// networking / hardware
{ name: "Cisco", ticker: "CSCO", aliases: ["Cisco Systems"] },
{ name: "Marvell Technology", ticker: "MRVL", aliases: ["Marvell"] },
{ name: "Arm Holdings", ticker: "ARM", aliases: ["Arm", "ARM Ltd"] },
// enterprise software
{ name: "Oracle", ticker: "ORCL", aliases: ["Oracle Corporation"] },
{ name: "Salesforce", ticker: "CRM", aliases: ["Salesforce Inc"] },
{ name: "SAP", ticker: "SAP", aliases: ["SAP SE"] },
{ name: "ServiceNow", ticker: "NOW", aliases: [] },
// storage / infra
{ name: "Western Digital", ticker: "WDC", aliases: ["WD", "Western Digital Corporation"] },
{ name: "Seagate", ticker: "STX", aliases: ["Seagate Technology"] },
{ name: "Pure Storage", ticker: "PSTG", aliases: [] },
// more AI labs
{ name: "Mistral AI", ticker: "MISTRAL", aliases: ["Mistral"] },
{ name: "Cohere", ticker: "COHERE", aliases: [] },
{ name: "DeepSeek", ticker: "DEEPSEEK", aliases: ["DeepSeek AI"] },
{ name: "Stability AI", ticker: "STABILITY", aliases: [] },
{ name: "Inflection AI", ticker: "INFLECTION", aliases: [] },
{ name: "Scale AI", ticker: "SCALEAI", aliases: ["Scale"] },
{ name: "Hugging Face", ticker: "HF", aliases: [] },
{ name: "Cerebras Systems", ticker: "CBRS", aliases: ["Cerebras"] },
{ name: "Groq", ticker: "GROQ", aliases: [] },
// chinese tech
{ name: "Alibaba", ticker: "BABA", aliases: ["Alibaba Group", "Taobao", "Alipay"] },
{ name: "Tencent", ticker: "0700.HK", aliases: ["Tencent Holdings", "WeChat"] },
{ name: "Baidu", ticker: "BIDU", aliases: ["Baidu Inc"] },
{ name: "Huawei", ticker: "HUAWEI", aliases: ["Huawei Technologies"] },
{ name: "ByteDance", ticker: "BYTEDANCE", aliases: ["TikTok", "Douyin"] },
{ name: "Xiaomi", ticker: "1810.HK", aliases: [] },
{ name: "SMIC", ticker: "688981.SS", aliases: ["Semiconductor Manufacturing International Corporation"] },
{ name: "DJI", ticker: "DJI", aliases: ["Da-Jiang Innovations"] },
// defense / aerospace
{ name: "Lockheed Martin", ticker: "LMT", aliases: ["Lockheed"] },
{ name: "Raytheon Technologies", ticker: "RTX", aliases: ["RTX", "Raytheon"] },
{ name: "Northrop Grumman", ticker: "NOC", aliases: ["Northrop"] },
{ name: "Boeing", ticker: "BA", aliases: ["Boeing Company"] },
{ name: "General Dynamics", ticker: "GD", aliases: [] },
{ name: "BAE Systems", ticker: "BA.L", aliases: ["BAE"] },
{ name: "L3Harris Technologies", ticker: "LHX", aliases: ["L3Harris"] },
{ name: "Leidos", ticker: "LDOS", aliases: [] },
{ name: "SAIC", ticker: "SAIC", aliases: ["Science Applications International Corporation"] },
{ name: "Booz Allen Hamilton", ticker: "BAH", aliases: ["Booz Allen"] },
{ name: "Thales Group", ticker: "HO.PA", aliases: ["Thales"] },
{ name: "Airbus", ticker: "AIR.PA", aliases: ["Airbus SE"] },
{ name: "Leonardo", ticker: "LDO.MI", aliases: ["Leonardo SpA"] },
{ name: "Rheinmetall", ticker: "RHM.DE", aliases: [] },
// telecom
{ name: "Ericsson", ticker: "ERIC", aliases: ["Telefonaktiebolaget LM Ericsson"] },
{ name: "Nokia", ticker: "NOK", aliases: ["Nokia Corporation"] },
{ name: "AT&T", ticker: "T", aliases: [] },
{ name: "Verizon", ticker: "VZ", aliases: ["Verizon Communications"] },
{ name: "T-Mobile", ticker: "TMUS", aliases: ["T-Mobile US"] },
{ name: "Deutsche Telekom", ticker: "DTE.DE", aliases: [] },
{ name: "SoftBank", ticker: "9984.T", aliases: ["SoftBank Group"] },
// finance / banking
{ name: "JPMorgan Chase", ticker: "JPM", aliases: ["JPMorgan", "JP Morgan"] },
{ name: "Goldman Sachs", ticker: "GS", aliases: ["Goldman"] },
{ name: "BlackRock", ticker: "BLK", aliases: [] },
{ name: "Visa", ticker: "V", aliases: [] },
{ name: "Mastercard", ticker: "MA", aliases: [] },
{ name: "Morgan Stanley", ticker: "MS", aliases: [] },
{ name: "Citigroup", ticker: "C", aliases: ["Citi"] },
{ name: "Bank of America", ticker: "BAC", aliases: ["BofA"] },
{ name: "HSBC", ticker: "HSBA.L", aliases: ["HSBC Holdings"] },
{ name: "UBS", ticker: "UBS", aliases: [] },
// energy / resources
{ name: "ExxonMobil", ticker: "XOM", aliases: ["Exxon"] },
{ name: "Chevron", ticker: "CVX", aliases: [] },
{ name: "Shell", ticker: "SHEL", aliases: ["Shell plc", "Royal Dutch Shell"] },
{ name: "BP", ticker: "BP", aliases: ["British Petroleum"] },
{ name: "TotalEnergies", ticker: "TTE.PA", aliases: ["Total"] },
{ name: "Saudi Aramco", ticker: "2222.SR", aliases: ["Aramco"] },
{ name: "NextEra Energy", ticker: "NEE", aliases: [] },
// pharma / biotech
{ name: "Pfizer", ticker: "PFE", aliases: [] },
{ name: "Johnson & Johnson", ticker: "JNJ", aliases: ["J&J"] },
{ name: "Moderna", ticker: "MRNA", aliases: [] },
{ name: "AstraZeneca", ticker: "AZN", aliases: [] },
{ name: "Novartis", ticker: "NVS", aliases: [] },
{ name: "Roche", ticker: "ROG.SW", aliases: ["Roche Holding"] },
{ name: "Eli Lilly", ticker: "LLY", aliases: [] },
{ name: "CRISPR Therapeutics", ticker: "CRSP", aliases: [] },
// automotive
{ name: "Toyota", ticker: "TM", aliases: ["Toyota Motor"] },
{ name: "Volkswagen", ticker: "VOW.DE", aliases: ["VW"] },
{ name: "Ford", ticker: "F", aliases: ["Ford Motor"] },
{ name: "General Motors", ticker: "GM", aliases: ["GM"] },
{ name: "BYD", ticker: "002594.SZ", aliases: ["BYD Company"] },
{ name: "Rivian", ticker: "RIVN", aliases: [] },
// media / entertainment
{ name: "Netflix", ticker: "NFLX", aliases: [] },
{ name: "Disney", ticker: "DIS", aliases: ["The Walt Disney Company"] },
{ name: "Comcast", ticker: "CMCSA", aliases: ["NBCUniversal"] },
{ name: "Spotify", ticker: "SPOT", aliases: [] },
{ name: "Warner Bros Discovery", ticker: "WBD", aliases: ["Warner Bros", "HBO"] },
{ name: "News Corp", ticker: "NWSA", aliases: ["Rupert Murdoch"] },
// retail / e-commerce
{ name: "Walmart", ticker: "WMT", aliases: [] },
{ name: "Shopify", ticker: "SHOP", aliases: [] },
{ name: "eBay", ticker: "EBAY", aliases: [] },
// cybersecurity
{ name: "CrowdStrike", ticker: "CRWD", aliases: [] },
{ name: "Palo Alto Networks", ticker: "PANW", aliases: [] },
{ name: "SentinelOne", ticker: "S", aliases: [] },
{ name: "Fortinet", ticker: "FTNT", aliases: [] },
{ name: "Cloudflare", ticker: "NET", aliases: [] },
{ name: "Recorded Future", ticker: "RF", aliases: [] },
// space
{ name: "SpaceX", ticker: "SPACEX", aliases: ["Space Exploration Technologies"] },
{ name: "Blue Origin", ticker: "BLUEORIGIN", aliases: [] },
{ name: "Planet Labs", ticker: "PL", aliases: [] },
// consulting / professional services
{ name: "McKinsey", ticker: "MCKINSEY", aliases: ["McKinsey & Company"] },
{ name: "Accenture", ticker: "ACN", aliases: [] },
{ name: "Deloitte", ticker: "DELOITTE", aliases: [] },
];
let added = 0;
for (const c of companies) {
if (!exists.get(c.name)) {
insert.run(c.name, c.ticker, JSON.stringify(c.aliases));
added++;
}
}
if (added > 0) console.log(`[db] seeded ${added} new tracked companies`);
}
module.exports = { getArchiveDb, getIntelligenceDb, runMigrations, runColumnMigrations, seedCompanies };