const Database = require("better-sqlite3"); const sqliteVec = require("sqlite-vec"); let archiveDb = null; let intelligenceDb = null; function getArchiveDb(dbPath) { if (!archiveDb) { archiveDb = new Database(dbPath, { readonly: true }); sqliteVec.load(archiveDb); archiveDb.pragma("journal_mode = WAL"); } return archiveDb; } function getIntelligenceDb(dbPath) { if (!intelligenceDb) { intelligenceDb = new Database(dbPath); intelligenceDb.pragma("journal_mode = WAL"); } return intelligenceDb; } function runMigrations(db) { db.exec(` CREATE TABLE IF NOT EXISTS cursors ( key TEXT PRIMARY KEY, value INTEGER ); CREATE TABLE IF NOT EXISTS tracked_companies ( id INTEGER PRIMARY KEY, name TEXT, ticker TEXT, aliases TEXT ); CREATE TABLE IF NOT EXISTS article_queue ( id INTEGER PRIMARY KEY, article_id INTEGER UNIQUE, status TEXT DEFAULT 'pending', created_at DATETIME DEFAULT CURRENT_TIMESTAMP, updated_at DATETIME ); CREATE TABLE IF NOT EXISTS event_knowledge ( id INTEGER PRIMARY KEY, event_id INTEGER, company_id INTEGER, type TEXT, data TEXT, created_at DATETIME DEFAULT CURRENT_TIMESTAMP ); CREATE TABLE IF NOT EXISTS company_embeddings ( company_id INTEGER PRIMARY KEY, embedding BLOB NOT NULL, model TEXT NOT NULL, generated_at DATETIME DEFAULT CURRENT_TIMESTAMP ); CREATE TABLE IF NOT EXISTS event_predictions ( id INTEGER PRIMARY KEY, event_id INTEGER, company_id INTEGER, type TEXT, direction TEXT, magnitude TEXT, timeframe TEXT, rationale TEXT, event_date TEXT, created_at DATETIME DEFAULT CURRENT_TIMESTAMP ); CREATE TABLE IF NOT EXISTS company_facts ( id INTEGER PRIMARY KEY, company_id INTEGER, type TEXT, claim TEXT, confidence TEXT, confirmation_count INTEGER DEFAULT 1, first_seen_at DATETIME, last_seen_at DATETIME, last_event_id INTEGER, supporting_event_ids TEXT ); CREATE UNIQUE INDEX IF NOT EXISTS idx_company_facts_unique ON company_facts (company_id, claim); CREATE TABLE IF NOT EXISTS company_relationships ( id INTEGER PRIMARY KEY, from_company_id INTEGER, relationship_type TEXT, to_entity TEXT, to_company_id INTEGER, confidence TEXT, confirmation_count INTEGER DEFAULT 1, first_seen_at DATETIME, last_seen_at DATETIME, supporting_event_ids TEXT ); CREATE UNIQUE INDEX IF NOT EXISTS idx_company_relationships_unique ON company_relationships (from_company_id, relationship_type, to_entity); `); } function runColumnMigrations(db) { try { db.exec("ALTER TABLE event_predictions ADD COLUMN event_date TEXT"); } catch (_) {} try { db.exec("ALTER TABLE event_knowledge ADD COLUMN event_date TEXT"); } catch (_) {} db.exec(` CREATE TABLE IF NOT EXISTS worker_events ( id INTEGER PRIMARY KEY, worker TEXT NOT NULL, completed_at DATETIME DEFAULT CURRENT_TIMESTAMP ); CREATE INDEX IF NOT EXISTS idx_worker_events_lookup ON worker_events (worker, completed_at); `); db.exec(` CREATE TABLE IF NOT EXISTS trade_signals ( id INTEGER PRIMARY KEY, company_id INTEGER, signal TEXT, confidence TEXT, timeframe TEXT, risk_level TEXT, risk_factors TEXT, summary TEXT, key_drivers TEXT, supporting_prediction_ids TEXT, generated_at DATETIME DEFAULT CURRENT_TIMESTAMP, window_days INTEGER ); `); // prune rows older than 1 hour so the table doesnt grow unbounded db.exec(`DELETE FROM worker_events WHERE completed_at < datetime('now', '-1 hour')`); } function seedCompanies(db) { const exists = db.prepare("SELECT id FROM tracked_companies WHERE name = ?"); const insert = db.prepare( "INSERT INTO tracked_companies (name, ticker, aliases) VALUES (?, ?, ?)" ); const companies = [ // semiconductors { name: "NVIDIA", ticker: "NVDA", aliases: ["Nvidia Corporation"] }, { name: "TSMC", ticker: "TSM", aliases: ["Taiwan Semiconductor", "Taiwan Semiconductor Manufacturing Company"] }, { name: "ASML", ticker: "ASML", aliases: ["ASML Holding"] }, { name: "Intel", ticker: "INTC", aliases: ["Intel Corporation"] }, { name: "AMD", ticker: "AMD", aliases: ["Advanced Micro Devices"] }, { name: "Qualcomm", ticker: "QCOM", aliases: ["Qualcomm Incorporated"] }, { name: "Broadcom", ticker: "AVGO", aliases: ["Broadcom Inc"] }, { name: "Micron", ticker: "MU", aliases: ["Micron Technology"] }, { name: "Texas Instruments", ticker: "TXN", aliases: ["TI"] }, { name: "Applied Materials", ticker: "AMAT", aliases: ["Applied Materials Inc"] }, { name: "Lam Research", ticker: "LRCX", aliases: ["Lam Research Corporation"] }, { name: "KLA Corporation", ticker: "KLAC", aliases: ["KLA"] }, { name: "Samsung", ticker: "005930.KS", aliases: ["Samsung Electronics", "Samsung Group"] }, { name: "SK Hynix", ticker: "000660.KS", aliases: ["Hynix"] }, // big tech / cloud { name: "Microsoft", ticker: "MSFT", aliases: ["Microsoft Corporation"] }, { name: "Apple", ticker: "AAPL", aliases: ["Apple Inc"] }, { name: "Alphabet", ticker: "GOOGL", aliases: ["Google", "Google LLC", "DeepMind"] }, { name: "Amazon", ticker: "AMZN", aliases: ["Amazon Web Services", "AWS"] }, { name: "Meta", ticker: "META", aliases: ["Meta Platforms", "Facebook"] }, { name: "Tesla", ticker: "TSLA", aliases: ["Tesla Inc", "Tesla Motors"] }, // AI / infrastructure { name: "OpenAI", ticker: "OPENAI", aliases: ["Open AI"] }, { name: "Anthropic", ticker: "ANTHROPIC", aliases: [] }, { name: "xAI", ticker: "XAI", aliases: ["x.AI"] }, { name: "Palantir", ticker: "PLTR", aliases: ["Palantir Technologies"] }, { name: "Super Micro Computer", ticker: "SMCI", aliases: ["Supermicro", "SMCI"] }, { name: "Arista Networks", ticker: "ANET", aliases: ["Arista"] }, // networking / hardware { name: "Cisco", ticker: "CSCO", aliases: ["Cisco Systems"] }, { name: "Marvell Technology", ticker: "MRVL", aliases: ["Marvell"] }, { name: "Arm Holdings", ticker: "ARM", aliases: ["Arm", "ARM Ltd"] }, // enterprise software { name: "Oracle", ticker: "ORCL", aliases: ["Oracle Corporation"] }, { name: "Salesforce", ticker: "CRM", aliases: ["Salesforce Inc"] }, { name: "SAP", ticker: "SAP", aliases: ["SAP SE"] }, { name: "ServiceNow", ticker: "NOW", aliases: [] }, // storage / infra { name: "Western Digital", ticker: "WDC", aliases: ["WD", "Western Digital Corporation"] }, { name: "Seagate", ticker: "STX", aliases: ["Seagate Technology"] }, { name: "Pure Storage", ticker: "PSTG", aliases: [] }, // more AI labs { name: "Mistral AI", ticker: "MISTRAL", aliases: ["Mistral"] }, { name: "Cohere", ticker: "COHERE", aliases: [] }, { name: "DeepSeek", ticker: "DEEPSEEK", aliases: ["DeepSeek AI"] }, { name: "Stability AI", ticker: "STABILITY", aliases: [] }, { name: "Inflection AI", ticker: "INFLECTION", aliases: [] }, { name: "Scale AI", ticker: "SCALEAI", aliases: ["Scale"] }, { name: "Hugging Face", ticker: "HF", aliases: [] }, { name: "Cerebras Systems", ticker: "CBRS", aliases: ["Cerebras"] }, { name: "Groq", ticker: "GROQ", aliases: [] }, // chinese tech { name: "Alibaba", ticker: "BABA", aliases: ["Alibaba Group", "Taobao", "Alipay"] }, { name: "Tencent", ticker: "0700.HK", aliases: ["Tencent Holdings", "WeChat"] }, { name: "Baidu", ticker: "BIDU", aliases: ["Baidu Inc"] }, { name: "Huawei", ticker: "HUAWEI", aliases: ["Huawei Technologies"] }, { name: "ByteDance", ticker: "BYTEDANCE", aliases: ["TikTok", "Douyin"] }, { name: "Xiaomi", ticker: "1810.HK", aliases: [] }, { name: "SMIC", ticker: "688981.SS", aliases: ["Semiconductor Manufacturing International Corporation"] }, { name: "DJI", ticker: "DJI", aliases: ["Da-Jiang Innovations"] }, // defense / aerospace { name: "Lockheed Martin", ticker: "LMT", aliases: ["Lockheed"] }, { name: "Raytheon Technologies", ticker: "RTX", aliases: ["RTX", "Raytheon"] }, { name: "Northrop Grumman", ticker: "NOC", aliases: ["Northrop"] }, { name: "Boeing", ticker: "BA", aliases: ["Boeing Company"] }, { name: "General Dynamics", ticker: "GD", aliases: [] }, { name: "BAE Systems", ticker: "BA.L", aliases: ["BAE"] }, { name: "L3Harris Technologies", ticker: "LHX", aliases: ["L3Harris"] }, { name: "Leidos", ticker: "LDOS", aliases: [] }, { name: "SAIC", ticker: "SAIC", aliases: ["Science Applications International Corporation"] }, { name: "Booz Allen Hamilton", ticker: "BAH", aliases: ["Booz Allen"] }, { name: "Thales Group", ticker: "HO.PA", aliases: ["Thales"] }, { name: "Airbus", ticker: "AIR.PA", aliases: ["Airbus SE"] }, { name: "Leonardo", ticker: "LDO.MI", aliases: ["Leonardo SpA"] }, { name: "Rheinmetall", ticker: "RHM.DE", aliases: [] }, // telecom { name: "Ericsson", ticker: "ERIC", aliases: ["Telefonaktiebolaget LM Ericsson"] }, { name: "Nokia", ticker: "NOK", aliases: ["Nokia Corporation"] }, { name: "AT&T", ticker: "T", aliases: [] }, { name: "Verizon", ticker: "VZ", aliases: ["Verizon Communications"] }, { name: "T-Mobile", ticker: "TMUS", aliases: ["T-Mobile US"] }, { name: "Deutsche Telekom", ticker: "DTE.DE", aliases: [] }, { name: "SoftBank", ticker: "9984.T", aliases: ["SoftBank Group"] }, // finance / banking { name: "JPMorgan Chase", ticker: "JPM", aliases: ["JPMorgan", "JP Morgan"] }, { name: "Goldman Sachs", ticker: "GS", aliases: ["Goldman"] }, { name: "BlackRock", ticker: "BLK", aliases: [] }, { name: "Visa", ticker: "V", aliases: [] }, { name: "Mastercard", ticker: "MA", aliases: [] }, { name: "Morgan Stanley", ticker: "MS", aliases: [] }, { name: "Citigroup", ticker: "C", aliases: ["Citi"] }, { name: "Bank of America", ticker: "BAC", aliases: ["BofA"] }, { name: "HSBC", ticker: "HSBA.L", aliases: ["HSBC Holdings"] }, { name: "UBS", ticker: "UBS", aliases: [] }, // energy / resources { name: "ExxonMobil", ticker: "XOM", aliases: ["Exxon"] }, { name: "Chevron", ticker: "CVX", aliases: [] }, { name: "Shell", ticker: "SHEL", aliases: ["Shell plc", "Royal Dutch Shell"] }, { name: "BP", ticker: "BP", aliases: ["British Petroleum"] }, { name: "TotalEnergies", ticker: "TTE.PA", aliases: ["Total"] }, { name: "Saudi Aramco", ticker: "2222.SR", aliases: ["Aramco"] }, { name: "NextEra Energy", ticker: "NEE", aliases: [] }, // pharma / biotech { name: "Pfizer", ticker: "PFE", aliases: [] }, { name: "Johnson & Johnson", ticker: "JNJ", aliases: ["J&J"] }, { name: "Moderna", ticker: "MRNA", aliases: [] }, { name: "AstraZeneca", ticker: "AZN", aliases: [] }, { name: "Novartis", ticker: "NVS", aliases: [] }, { name: "Roche", ticker: "ROG.SW", aliases: ["Roche Holding"] }, { name: "Eli Lilly", ticker: "LLY", aliases: [] }, { name: "CRISPR Therapeutics", ticker: "CRSP", aliases: [] }, // automotive { name: "Toyota", ticker: "TM", aliases: ["Toyota Motor"] }, { name: "Volkswagen", ticker: "VOW.DE", aliases: ["VW"] }, { name: "Ford", ticker: "F", aliases: ["Ford Motor"] }, { name: "General Motors", ticker: "GM", aliases: ["GM"] }, { name: "BYD", ticker: "002594.SZ", aliases: ["BYD Company"] }, { name: "Rivian", ticker: "RIVN", aliases: [] }, // media / entertainment { name: "Netflix", ticker: "NFLX", aliases: [] }, { name: "Disney", ticker: "DIS", aliases: ["The Walt Disney Company"] }, { name: "Comcast", ticker: "CMCSA", aliases: ["NBCUniversal"] }, { name: "Spotify", ticker: "SPOT", aliases: [] }, { name: "Warner Bros Discovery", ticker: "WBD", aliases: ["Warner Bros", "HBO"] }, { name: "News Corp", ticker: "NWSA", aliases: ["Rupert Murdoch"] }, // retail / e-commerce { name: "Walmart", ticker: "WMT", aliases: [] }, { name: "Shopify", ticker: "SHOP", aliases: [] }, { name: "eBay", ticker: "EBAY", aliases: [] }, // cybersecurity { name: "CrowdStrike", ticker: "CRWD", aliases: [] }, { name: "Palo Alto Networks", ticker: "PANW", aliases: [] }, { name: "SentinelOne", ticker: "S", aliases: [] }, { name: "Fortinet", ticker: "FTNT", aliases: [] }, { name: "Cloudflare", ticker: "NET", aliases: [] }, { name: "Recorded Future", ticker: "RF", aliases: [] }, // space { name: "SpaceX", ticker: "SPACEX", aliases: ["Space Exploration Technologies"] }, { name: "Blue Origin", ticker: "BLUEORIGIN", aliases: [] }, { name: "Planet Labs", ticker: "PL", aliases: [] }, // consulting / professional services { name: "McKinsey", ticker: "MCKINSEY", aliases: ["McKinsey & Company"] }, { name: "Accenture", ticker: "ACN", aliases: [] }, { name: "Deloitte", ticker: "DELOITTE", aliases: [] }, ]; let added = 0; for (const c of companies) { if (!exists.get(c.name)) { insert.run(c.name, c.ticker, JSON.stringify(c.aliases)); added++; } } if (added > 0) console.log(`[db] seeded ${added} new tracked companies`); } module.exports = { getArchiveDb, getIntelligenceDb, runMigrations, runColumnMigrations, seedCompanies };