const path = require('path'); const Database = require('better-sqlite3'); const sqliteVec = require('sqlite-vec'); const config = require('./config'); const dbPath = path.resolve(__dirname, '..', config.database.path || './archive.sqlite'); const db = new Database(dbPath); sqliteVec.load(db); db.pragma('journal_mode = WAL'); db.exec(` CREATE TABLE IF NOT EXISTS articles ( id INTEGER PRIMARY KEY AUTOINCREMENT, title TEXT NOT NULL, description TEXT, content TEXT, image TEXT, content_status TEXT, content_error TEXT, content_attempted_at TEXT, is_index_page INTEGER NOT NULL DEFAULT 0, url TEXT NOT NULL UNIQUE, normalized_title TEXT NOT NULL, source TEXT NOT NULL, pub_date TEXT, ingested_at TEXT NOT NULL DEFAULT (datetime('now')) ); `); function rebuildArticlesTableIfNeeded() { const indexes = db.prepare(`PRAGMA index_list('articles')`).all(); const hasUniqueNormalizedTitleIndex = indexes.some((index) => { if (index.origin !== 'u' || !index.name) { return false; } const columns = db.prepare(`PRAGMA index_info('${index.name.replace(/'/g, "''")}')`).all(); return columns.length === 1 && columns[0].name === 'normalized_title'; }); if (!hasUniqueNormalizedTitleIndex) { return; } db.exec(` BEGIN; CREATE TABLE articles_rebuild ( id INTEGER PRIMARY KEY AUTOINCREMENT, title TEXT NOT NULL, description TEXT, content TEXT, image TEXT, content_status TEXT, content_error TEXT, content_attempted_at TEXT, is_index_page INTEGER NOT NULL DEFAULT 0, url TEXT NOT NULL UNIQUE, normalized_title TEXT NOT NULL, source TEXT NOT NULL, pub_date TEXT, ingested_at TEXT NOT NULL DEFAULT (datetime('now')) ); INSERT INTO articles_rebuild ( id, title, description, content, image, content_status, content_error, content_attempted_at, is_index_page, url, normalized_title, source, pub_date, ingested_at ) SELECT id, title, description, content, image, content_status, content_error, content_attempted_at, 0, url, normalized_title, source, pub_date, ingested_at FROM articles; DROP TABLE articles; ALTER TABLE articles_rebuild RENAME TO articles; COMMIT; `); } rebuildArticlesTableIfNeeded(); db.exec(` CREATE INDEX IF NOT EXISTS idx_articles_source ON articles(source); CREATE INDEX IF NOT EXISTS idx_articles_pub_date ON articles(pub_date); CREATE INDEX IF NOT EXISTS idx_articles_ingested_at ON articles(ingested_at); CREATE INDEX IF NOT EXISTS idx_articles_normalized_title ON articles(normalized_title); `); db.exec(` CREATE VIRTUAL TABLE IF NOT EXISTS article_embeddings USING vec0( article_id INTEGER PRIMARY KEY, embedding FLOAT[1024] ); `); db.exec(` CREATE TABLE IF NOT EXISTS query_embeddings ( query TEXT PRIMARY KEY, embedding BLOB NOT NULL, created_at TEXT NOT NULL DEFAULT (datetime('now')) ); `); for (const statement of [ 'ALTER TABLE articles ADD COLUMN image TEXT', 'ALTER TABLE articles ADD COLUMN content_status TEXT', 'ALTER TABLE articles ADD COLUMN content_error TEXT', 'ALTER TABLE articles ADD COLUMN content_attempted_at TEXT', 'ALTER TABLE articles ADD COLUMN is_index_page INTEGER NOT NULL DEFAULT 0' ]) { try { db.exec(statement); } catch (error) { if (!String(error.message).includes('duplicate column name')) { throw error; } } } db.exec(` UPDATE articles SET is_index_page = 1 WHERE is_index_page = 0 AND ( LOWER(url) LIKE '%/category/%' OR LOWER(url) LIKE '%/categories/%' OR LOWER(url) LIKE '%/tag/%' OR LOWER(url) LIKE '%/tags/%' OR LOWER(url) LIKE '%/topic/%' OR LOWER(url) LIKE '%/topics/%' OR LOWER(url) LIKE '%/section/%' OR LOWER(url) LIKE '%/sections/%' OR LOWER(url) LIKE '%/archive%' OR LOWER(url) LIKE '%/archives/%' OR LOWER(url) LIKE '%/authors/%' OR LOWER(url) LIKE '%/search%' OR LOWER(title) LIKE '%category%' OR LOWER(title) LIKE '%archives%' OR LOWER(title) LIKE '%archive%' OR LOWER(title) LIKE '%latest news%' ) `); module.exports = db;