const db = require('./db'); const { normalizeTitle } = require('./dedup'); const { fetchAndStoreContent } = require('./content'); const { markSourceRun } = require('./state'); const insertArticle = db.prepare(` INSERT INTO articles ( title, description, content, image, url, normalized_title, source, pub_date, ingested_at ) VALUES (?, ?, NULL, NULL, ?, ?, ?, ?, ?) `); const findByUrl = db.prepare('SELECT id FROM articles WHERE url = ?'); function normalizePubDate(value) { if (!value) { return null; } if (typeof value === 'number') { const parsed = new Date(value); return Number.isNaN(parsed.getTime()) ? null : parsed.toISOString(); } const input = String(value).trim(); if (!input) { return null; } if (/^\d{8}T\d{6}$/.test(input)) { const normalized = `${input.slice(0, 4)}-${input.slice(4, 6)}-${input.slice(6, 8)}T${input.slice(9, 11)}:${input.slice(11, 13)}:${input.slice(13, 15)}Z`; const parsed = new Date(normalized); return Number.isNaN(parsed.getTime()) ? null : parsed.toISOString(); } if (/^\d{8}T\d{6}Z$/.test(input)) { const normalized = `${input.slice(0, 4)}-${input.slice(4, 6)}-${input.slice(6, 8)}T${input.slice(9, 11)}:${input.slice(11, 13)}:${input.slice(13, 15)}Z`; const parsed = new Date(normalized); return Number.isNaN(parsed.getTime()) ? null : parsed.toISOString(); } if (/^\d{4}-\d{2}-\d{2}$/.test(input)) { return `${input}T00:00:00.000Z`; } const parsed = new Date(input); return Number.isNaN(parsed.getTime()) ? null : parsed.toISOString(); } function ingestArticle(article) { const title = String(article.title || '').trim(); const url = String(article.url || '').trim(); const source = String(article.source || '').trim(); if (!title || !url || !source) { return { inserted: false, reason: 'missing_required_fields' }; } const normalizedTitle = normalizeTitle(title); if (!normalizedTitle) { return { inserted: false, reason: 'empty_normalized_title' }; } const description = article.description == null ? null : String(article.description).trim() || null; const pubDate = normalizePubDate(article.pubDate); const ingestedAt = new Date().toISOString(); try { const result = insertArticle.run( title, description, url, normalizedTitle, source, pubDate, ingestedAt ); fetchAndStoreContent(result.lastInsertRowid, url); return { inserted: true, id: result.lastInsertRowid }; } catch (error) { if (error.code === 'SQLITE_CONSTRAINT_UNIQUE') { const duplicateByUrl = findByUrl.get(url); if (duplicateByUrl) { return { inserted: false, reason: 'duplicate_url', id: duplicateByUrl.id }; } return { inserted: false, reason: 'duplicate' }; } throw error; } } async function ingestBatch(source, articles) { let inserted = 0; for (const article of articles) { const result = ingestArticle({ ...article, source: article.source || source }); if (result.inserted) { inserted += 1; } } markSourceRun(source); return { source, inserted, total: articles.length }; } module.exports = { ingestArticle, ingestBatch, };