add Google News integration and enhance crawler capabilities
This commit is contained in:
parent
3211c4c29e
commit
432c43254b
1 changed files with 12 additions and 0 deletions
|
|
@ -148,13 +148,24 @@ async function fetchGdeltArticlesBigQuery(onWindow) {
|
||||||
const windows = buildWeeklyWindows();
|
const windows = buildWeeklyWindows();
|
||||||
const maxWindowsPerRun = Number(config.gdelt?.maxWindowsPerRun) || 0;
|
const maxWindowsPerRun = Number(config.gdelt?.maxWindowsPerRun) || 0;
|
||||||
const requestDelayMs = Math.max(0, Number(config.gdelt?.requestDelayMs) || 0);
|
const requestDelayMs = Math.max(0, Number(config.gdelt?.requestDelayMs) || 0);
|
||||||
|
const maxRequestsPerSession = Number(config.gdelt?.bigQueryMaxRequestsPerSession) || 650;
|
||||||
const bigquery = getBigQueryClient();
|
const bigquery = getBigQueryClient();
|
||||||
const allArticles = [];
|
const allArticles = [];
|
||||||
|
let totalRequests = 0;
|
||||||
|
|
||||||
for (const source of getBackfillSources()) {
|
for (const source of getBackfillSources()) {
|
||||||
|
if (totalRequests >= maxRequestsPerSession) {
|
||||||
|
console.warn(`GDELT BigQuery: session request limit (${maxRequestsPerSession}) reached, stopping`);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
let windowsFetched = 0;
|
let windowsFetched = 0;
|
||||||
|
|
||||||
for (const window of windows) {
|
for (const window of windows) {
|
||||||
|
if (totalRequests >= maxRequestsPerSession) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
if (maxWindowsPerRun > 0 && windowsFetched >= maxWindowsPerRun) {
|
if (maxWindowsPerRun > 0 && windowsFetched >= maxWindowsPerRun) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
@ -167,6 +178,7 @@ async function fetchGdeltArticlesBigQuery(onWindow) {
|
||||||
const windowArticles = await fetchWindowBigQuery(source, window, bigquery);
|
const windowArticles = await fetchWindowBigQuery(source, window, bigquery);
|
||||||
markWindowCompleted(source.id, window);
|
markWindowCompleted(source.id, window);
|
||||||
windowsFetched += 1;
|
windowsFetched += 1;
|
||||||
|
totalRequests += 1;
|
||||||
|
|
||||||
if (onWindow && windowArticles.length > 0) {
|
if (onWindow && windowArticles.length > 0) {
|
||||||
await onWindow(windowArticles);
|
await onWindow(windowArticles);
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue