From c80558fd506b0e09e6402a2e715a4e01b3d75708 Mon Sep 17 00:00:00 2001 From: ImBenji Date: Fri, 17 Apr 2026 00:11:50 +0100 Subject: [PATCH] update scheduler configuration for news crawler and other sources --- config.json | 4 ++++ src/content.js | 13 +++++++++++-- src/scheduler.js | 2 +- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/config.json b/config.json index 9fbc96b..af2b47a 100644 --- a/config.json +++ b/config.json @@ -658,5 +658,9 @@ "edgar": "15 * * * *", "alphaVantage": "20 * * * *", "finnhub": "25 * * * *" + }, + "contentBackfill": { + "cron": "0 * * * *", + "batchSize": -1 } } diff --git a/src/content.js b/src/content.js index 27f6c73..6f430cc 100644 --- a/src/content.js +++ b/src/content.js @@ -24,6 +24,13 @@ const markContentPending = db.prepare(` SET content_status = NULL, content_error = NULL, content_attempted_at = ? WHERE id = ? `); +const selectAllArticlesMissingContent = db.prepare(` + SELECT id, url + FROM articles + WHERE (content IS NULL OR TRIM(content) = '') + AND (content_status IS NULL OR content_status = 'pending') + ORDER BY ingested_at DESC, id DESC +`); const selectArticlesMissingContent = db.prepare(` SELECT id, url FROM articles @@ -203,7 +210,7 @@ async function fetchAndStoreContent(id, url) { } } -async function backfillMissingContent(limit = 10) { +async function backfillMissingContent(limit = config.contentBackfill.batchSize || 10) { if (contentBackfillRunning) { return; } @@ -211,7 +218,9 @@ async function backfillMissingContent(limit = 10) { contentBackfillRunning = true; try { - const rows = selectArticlesMissingContent.all(limit); + const rows = limit === -1 + ? selectAllArticlesMissingContent.all() + : selectArticlesMissingContent.all(limit); for (const row of rows) { await fetchAndStoreContent(row.id, row.url); diff --git a/src/scheduler.js b/src/scheduler.js index f3b23d1..2dfc6e6 100644 --- a/src/scheduler.js +++ b/src/scheduler.js @@ -82,7 +82,7 @@ function startScheduler() { }); } - cron.schedule('0 * * * *', async () => { + cron.schedule(config.contentBackfill.cron, async () => { try { await backfillMissingContent(); } catch (error) {