const cron = require('node-cron'); const config = require('./config'); const { ingestBatch } = require('./ingest'); const { fetchRssArticles } = require('./sources/rss'); const { fetchGdeltArticles } = require('./sources/gdelt'); const { fetchEdgarArticles } = require('./sources/edgar'); const { fetchAlphaVantageArticles } = require('./sources/alphavantage'); const { fetchFinnhubArticles } = require('./sources/finnhub'); const { fetchCrawlerArticles } = require('./sources/newsCrawler'); const { backfillMissingContent } = require('./content'); const { backfillMissingEmbeddings } = require('./embeddings'); async function runSource(source, fetcher) { try { const articles = await fetcher(); return await ingestBatch(source, articles); } catch (error) { console.error(`${source} ingestion failed:`, error); return { source, inserted: 0, total: 0, error: error.message }; } } async function runAllIngestions() { const results = []; results.push(await runSource('rss', fetchRssArticles)); results.push(await runSource('gdelt', fetchGdeltArticles)); results.push(await runSource('edgar', fetchEdgarArticles)); results.push(await runSource('alphavantage', fetchAlphaVantageArticles)); results.push(await runSource('finnhub', fetchFinnhubArticles)); results.push(await runSource('news_crawler', fetchCrawlerArticles)); try { await backfillMissingContent(); } catch (error) { console.error('content backfill failed:', error); } try { await backfillMissingEmbeddings(); } catch (error) { console.error('embedding backfill failed:', error); } return results; } function startScheduler() { cron.schedule(config.scheduler.rss, async () => { await runSource('rss', fetchRssArticles); }); cron.schedule(config.scheduler.gdelt, async () => { await runSource('gdelt', fetchGdeltArticles); }); cron.schedule(config.scheduler.edgar, async () => { await runSource('edgar', fetchEdgarArticles); }); cron.schedule(config.scheduler.alphaVantage, async () => { await runSource('alphavantage', fetchAlphaVantageArticles); }); cron.schedule(config.scheduler.finnhub, async () => { await runSource('finnhub', fetchFinnhubArticles); }); if (config.scheduler.newsCrawler) { cron.schedule(config.scheduler.newsCrawler, async () => { await runSource('news_crawler', fetchCrawlerArticles); }); } cron.schedule('0 * * * *', async () => { try { await backfillMissingContent(); } catch (error) { console.error('content backfill failed:', error); } try { await backfillMissingEmbeddings(); } catch (error) { console.error('embedding backfill failed:', error); } }); } module.exports = { startScheduler, runAllIngestions, };