From 432c43254b8c9dc5f74f7ff8cff4a5ca76119d97 Mon Sep 17 00:00:00 2001 From: ImBenji Date: Sat, 18 Apr 2026 06:51:36 +0100 Subject: [PATCH] add Google News integration and enhance crawler capabilities --- src/sources/gdelt.js | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/sources/gdelt.js b/src/sources/gdelt.js index cbf4543..4a5345e 100644 --- a/src/sources/gdelt.js +++ b/src/sources/gdelt.js @@ -148,13 +148,24 @@ async function fetchGdeltArticlesBigQuery(onWindow) { const windows = buildWeeklyWindows(); const maxWindowsPerRun = Number(config.gdelt?.maxWindowsPerRun) || 0; const requestDelayMs = Math.max(0, Number(config.gdelt?.requestDelayMs) || 0); + const maxRequestsPerSession = Number(config.gdelt?.bigQueryMaxRequestsPerSession) || 650; const bigquery = getBigQueryClient(); const allArticles = []; + let totalRequests = 0; for (const source of getBackfillSources()) { + if (totalRequests >= maxRequestsPerSession) { + console.warn(`GDELT BigQuery: session request limit (${maxRequestsPerSession}) reached, stopping`); + break; + } + let windowsFetched = 0; for (const window of windows) { + if (totalRequests >= maxRequestsPerSession) { + break; + } + if (maxWindowsPerRun > 0 && windowsFetched >= maxWindowsPerRun) { break; } @@ -167,6 +178,7 @@ async function fetchGdeltArticlesBigQuery(onWindow) { const windowArticles = await fetchWindowBigQuery(source, window, bigquery); markWindowCompleted(source.id, window); windowsFetched += 1; + totalRequests += 1; if (onWindow && windowArticles.length > 0) { await onWindow(windowArticles);