93 lines
2.7 KiB
JavaScript
93 lines
2.7 KiB
JavaScript
const cron = require('node-cron');
|
|
const config = require('./config');
|
|
const { ingestBatch } = require('./ingest');
|
|
const { fetchRssArticles } = require('./sources/rss');
|
|
const { fetchGdeltArticles } = require('./sources/gdelt');
|
|
const { fetchEdgarArticles } = require('./sources/edgar');
|
|
const { fetchAlphaVantageArticles } = require('./sources/alphavantage');
|
|
const { fetchFinnhubArticles } = require('./sources/finnhub');
|
|
const { fetchCrawlerArticles } = require('./sources/newsCrawler');
|
|
const { backfillMissingContent } = require('./content');
|
|
const { backfillMissingEmbeddings } = require('./embeddings');
|
|
|
|
async function runSource(source, fetcher) {
|
|
try {
|
|
const articles = await fetcher();
|
|
return await ingestBatch(source, articles);
|
|
} catch (error) {
|
|
console.error(`${source} ingestion failed:`, error);
|
|
return { source, inserted: 0, total: 0, error: error.message };
|
|
}
|
|
}
|
|
|
|
async function runAllIngestions() {
|
|
const results = [];
|
|
|
|
results.push(await runSource('rss', fetchRssArticles));
|
|
results.push(await runSource('gdelt', fetchGdeltArticles));
|
|
results.push(await runSource('edgar', fetchEdgarArticles));
|
|
results.push(await runSource('alphavantage', fetchAlphaVantageArticles));
|
|
results.push(await runSource('finnhub', fetchFinnhubArticles));
|
|
results.push(await runSource('news_crawler', fetchCrawlerArticles));
|
|
|
|
try {
|
|
await backfillMissingContent();
|
|
} catch (error) {
|
|
console.error('content backfill failed:', error);
|
|
}
|
|
|
|
try {
|
|
await backfillMissingEmbeddings();
|
|
} catch (error) {
|
|
console.error('embedding backfill failed:', error);
|
|
}
|
|
|
|
return results;
|
|
}
|
|
|
|
function startScheduler() {
|
|
cron.schedule(config.scheduler.rss, async () => {
|
|
await runSource('rss', fetchRssArticles);
|
|
});
|
|
|
|
cron.schedule(config.scheduler.gdelt, async () => {
|
|
await runSource('gdelt', fetchGdeltArticles);
|
|
});
|
|
|
|
cron.schedule(config.scheduler.edgar, async () => {
|
|
await runSource('edgar', fetchEdgarArticles);
|
|
});
|
|
|
|
cron.schedule(config.scheduler.alphaVantage, async () => {
|
|
await runSource('alphavantage', fetchAlphaVantageArticles);
|
|
});
|
|
|
|
cron.schedule(config.scheduler.finnhub, async () => {
|
|
await runSource('finnhub', fetchFinnhubArticles);
|
|
});
|
|
|
|
if (config.scheduler.newsCrawler) {
|
|
cron.schedule(config.scheduler.newsCrawler, async () => {
|
|
await runSource('news_crawler', fetchCrawlerArticles);
|
|
});
|
|
}
|
|
|
|
cron.schedule('0 * * * *', async () => {
|
|
try {
|
|
await backfillMissingContent();
|
|
} catch (error) {
|
|
console.error('content backfill failed:', error);
|
|
}
|
|
|
|
try {
|
|
await backfillMissingEmbeddings();
|
|
} catch (error) {
|
|
console.error('embedding backfill failed:', error);
|
|
}
|
|
});
|
|
}
|
|
|
|
module.exports = {
|
|
startScheduler,
|
|
runAllIngestions,
|
|
};
|