enhance article processing by adding language support and adjusting embedding parameters
This commit is contained in:
parent
37d9dfb083
commit
8805d3a3fc
7 changed files with 129 additions and 9 deletions
7
CLAUDE.md
Normal file
7
CLAUDE.md
Normal file
|
|
@ -0,0 +1,7 @@
|
||||||
|
# Database Policy
|
||||||
|
|
||||||
|
When making any changes to the database schema or data, a strictly no data loss policy must be followed. This means:
|
||||||
|
- Never DROP columns, tables, or indexes that contain data without first migrating that data elsewhere
|
||||||
|
- All schema changes must be additive or safe migrations (e.g. ADD COLUMN, rename via copy+verify+drop)
|
||||||
|
- Always backup or verify row counts before and after any bulk UPDATE or DELETE
|
||||||
|
- Destructive operations require explicit user confirmation before executing
|
||||||
|
|
@ -51,7 +51,7 @@
|
||||||
},
|
},
|
||||||
"embeddingBackfill": {
|
"embeddingBackfill": {
|
||||||
"perRound": 256,
|
"perRound": 256,
|
||||||
"batchSize": 16
|
"batchSize": 128
|
||||||
},
|
},
|
||||||
"browser": {
|
"browser": {
|
||||||
"maxConcurrentPages": 8
|
"maxConcurrentPages": 8
|
||||||
|
|
|
||||||
91
sources.json
91
sources.json
|
|
@ -4,6 +4,7 @@
|
||||||
"label": "Al Jazeera",
|
"label": "Al Jazeera",
|
||||||
"feedUrl": "https://www.aljazeera.com/xml/rss/all.xml",
|
"feedUrl": "https://www.aljazeera.com/xml/rss/all.xml",
|
||||||
"website": "aljazeera.com",
|
"website": "aljazeera.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -14,6 +15,7 @@
|
||||||
"bbc.com",
|
"bbc.com",
|
||||||
"bbc.co.uk"
|
"bbc.co.uk"
|
||||||
],
|
],
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -21,6 +23,7 @@
|
||||||
"label": "Business Insider",
|
"label": "Business Insider",
|
||||||
"feedUrl": "https://feeds.businessinsider.com/custom/all",
|
"feedUrl": "https://feeds.businessinsider.com/custom/all",
|
||||||
"website": "businessinsider.com",
|
"website": "businessinsider.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -28,6 +31,7 @@
|
||||||
"label": "Bloomberg Markets",
|
"label": "Bloomberg Markets",
|
||||||
"feedUrl": "https://feeds.bloomberg.com/markets/news.rss",
|
"feedUrl": "https://feeds.bloomberg.com/markets/news.rss",
|
||||||
"website": "bloomberg.com",
|
"website": "bloomberg.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -35,6 +39,7 @@
|
||||||
"label": "CNBC",
|
"label": "CNBC",
|
||||||
"feedUrl": "https://www.cnbc.com/id/100003114/device/rss/rss.html",
|
"feedUrl": "https://www.cnbc.com/id/100003114/device/rss/rss.html",
|
||||||
"website": "cnbc.com",
|
"website": "cnbc.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -42,6 +47,7 @@
|
||||||
"label": "Wall Street Journal",
|
"label": "Wall Street Journal",
|
||||||
"feedUrl": "https://feeds.a.dj.com/rss/RSSMarketsMain.xml",
|
"feedUrl": "https://feeds.a.dj.com/rss/RSSMarketsMain.xml",
|
||||||
"website": "wsj.com",
|
"website": "wsj.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -49,6 +55,7 @@
|
||||||
"label": "MarketWatch",
|
"label": "MarketWatch",
|
||||||
"feedUrl": "https://feeds.marketwatch.com/marketwatch/topstories/",
|
"feedUrl": "https://feeds.marketwatch.com/marketwatch/topstories/",
|
||||||
"website": "marketwatch.com",
|
"website": "marketwatch.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -59,6 +66,7 @@
|
||||||
"finance.yahoo.com",
|
"finance.yahoo.com",
|
||||||
"yahoo.com"
|
"yahoo.com"
|
||||||
],
|
],
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -66,6 +74,7 @@
|
||||||
"label": "Seeking Alpha",
|
"label": "Seeking Alpha",
|
||||||
"feedUrl": "https://seekingalpha.com/feed.xml",
|
"feedUrl": "https://seekingalpha.com/feed.xml",
|
||||||
"website": "seekingalpha.com",
|
"website": "seekingalpha.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -73,6 +82,7 @@
|
||||||
"label": "Financial Times",
|
"label": "Financial Times",
|
||||||
"feedUrl": "https://www.ft.com/?format=rss",
|
"feedUrl": "https://www.ft.com/?format=rss",
|
||||||
"website": "ft.com",
|
"website": "ft.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -80,6 +90,7 @@
|
||||||
"label": "The Economist",
|
"label": "The Economist",
|
||||||
"feedUrl": "https://www.economist.com/finance-and-economics/rss.xml",
|
"feedUrl": "https://www.economist.com/finance-and-economics/rss.xml",
|
||||||
"website": "economist.com",
|
"website": "economist.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -87,6 +98,7 @@
|
||||||
"label": "Fortune",
|
"label": "Fortune",
|
||||||
"feedUrl": "https://fortune.com/feed",
|
"feedUrl": "https://fortune.com/feed",
|
||||||
"website": "fortune.com",
|
"website": "fortune.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -94,6 +106,7 @@
|
||||||
"label": "Forbes Business",
|
"label": "Forbes Business",
|
||||||
"feedUrl": "https://www.forbes.com/business/feed/",
|
"feedUrl": "https://www.forbes.com/business/feed/",
|
||||||
"website": "forbes.com",
|
"website": "forbes.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -101,6 +114,7 @@
|
||||||
"label": "Inc Magazine",
|
"label": "Inc Magazine",
|
||||||
"feedUrl": "https://www.inc.com/rss",
|
"feedUrl": "https://www.inc.com/rss",
|
||||||
"website": "inc.com",
|
"website": "inc.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -108,6 +122,7 @@
|
||||||
"label": "Fast Company",
|
"label": "Fast Company",
|
||||||
"feedUrl": "https://www.fastcompany.com/latest/rss",
|
"feedUrl": "https://www.fastcompany.com/latest/rss",
|
||||||
"website": "fastcompany.com",
|
"website": "fastcompany.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -115,6 +130,7 @@
|
||||||
"label": "Entrepreneur",
|
"label": "Entrepreneur",
|
||||||
"feedUrl": "https://www.entrepreneur.com/latest.rss",
|
"feedUrl": "https://www.entrepreneur.com/latest.rss",
|
||||||
"website": "entrepreneur.com",
|
"website": "entrepreneur.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -122,6 +138,7 @@
|
||||||
"label": "Axios",
|
"label": "Axios",
|
||||||
"feedUrl": "https://api.axios.com/feed/",
|
"feedUrl": "https://api.axios.com/feed/",
|
||||||
"website": "axios.com",
|
"website": "axios.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -129,6 +146,7 @@
|
||||||
"label": "Wired Business",
|
"label": "Wired Business",
|
||||||
"feedUrl": "https://www.wired.com/feed/category/business/latest/rss",
|
"feedUrl": "https://www.wired.com/feed/category/business/latest/rss",
|
||||||
"website": "wired.com",
|
"website": "wired.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -136,6 +154,7 @@
|
||||||
"label": "NPR Business",
|
"label": "NPR Business",
|
||||||
"feedUrl": "https://feeds.npr.org/1006/rss.xml",
|
"feedUrl": "https://feeds.npr.org/1006/rss.xml",
|
||||||
"website": "npr.org",
|
"website": "npr.org",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -143,6 +162,7 @@
|
||||||
"label": "Federal Reserve",
|
"label": "Federal Reserve",
|
||||||
"feedUrl": "https://www.federalreserve.gov/feeds/press_all.xml",
|
"feedUrl": "https://www.federalreserve.gov/feeds/press_all.xml",
|
||||||
"website": "federalreserve.gov",
|
"website": "federalreserve.gov",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -150,6 +170,7 @@
|
||||||
"label": "TechCrunch",
|
"label": "TechCrunch",
|
||||||
"feedUrl": "https://techcrunch.com/feed/",
|
"feedUrl": "https://techcrunch.com/feed/",
|
||||||
"website": "techcrunch.com",
|
"website": "techcrunch.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -157,6 +178,7 @@
|
||||||
"label": "The Verge",
|
"label": "The Verge",
|
||||||
"feedUrl": "https://www.theverge.com/rss/index.xml",
|
"feedUrl": "https://www.theverge.com/rss/index.xml",
|
||||||
"website": "theverge.com",
|
"website": "theverge.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -164,6 +186,7 @@
|
||||||
"label": "Ars Technica",
|
"label": "Ars Technica",
|
||||||
"feedUrl": "https://feeds.arstechnica.com/arstechnica/index",
|
"feedUrl": "https://feeds.arstechnica.com/arstechnica/index",
|
||||||
"website": "arstechnica.com",
|
"website": "arstechnica.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -171,6 +194,7 @@
|
||||||
"label": "Retail Dive",
|
"label": "Retail Dive",
|
||||||
"feedUrl": "https://www.retaildive.com/feeds/news/",
|
"feedUrl": "https://www.retaildive.com/feeds/news/",
|
||||||
"website": "retaildive.com",
|
"website": "retaildive.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -178,6 +202,7 @@
|
||||||
"label": "Manufacturing Dive",
|
"label": "Manufacturing Dive",
|
||||||
"feedUrl": "https://www.manufacturingdive.com/feeds/news/",
|
"feedUrl": "https://www.manufacturingdive.com/feeds/news/",
|
||||||
"website": "manufacturingdive.com",
|
"website": "manufacturingdive.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -185,6 +210,7 @@
|
||||||
"label": "Banking Dive",
|
"label": "Banking Dive",
|
||||||
"feedUrl": "https://www.bankingdive.com/feeds/news/",
|
"feedUrl": "https://www.bankingdive.com/feeds/news/",
|
||||||
"website": "bankingdive.com",
|
"website": "bankingdive.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -192,6 +218,7 @@
|
||||||
"label": "Financial Post CA",
|
"label": "Financial Post CA",
|
||||||
"feedUrl": "https://financialpost.com/feed",
|
"feedUrl": "https://financialpost.com/feed",
|
||||||
"website": "financialpost.com",
|
"website": "financialpost.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -199,6 +226,7 @@
|
||||||
"label": "Globe and Mail",
|
"label": "Globe and Mail",
|
||||||
"feedUrl": "https://www.theglobeandmail.com/arc/outboundfeeds/rss/category/business/",
|
"feedUrl": "https://www.theglobeandmail.com/arc/outboundfeeds/rss/category/business/",
|
||||||
"website": "theglobeandmail.com",
|
"website": "theglobeandmail.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -206,6 +234,7 @@
|
||||||
"label": "Guardian Business",
|
"label": "Guardian Business",
|
||||||
"feedUrl": "https://www.theguardian.com/uk/business/rss",
|
"feedUrl": "https://www.theguardian.com/uk/business/rss",
|
||||||
"website": "theguardian.com",
|
"website": "theguardian.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -213,6 +242,7 @@
|
||||||
"label": "Sky News Business",
|
"label": "Sky News Business",
|
||||||
"feedUrl": "https://feeds.skynews.com/feeds/rss/business.xml",
|
"feedUrl": "https://feeds.skynews.com/feeds/rss/business.xml",
|
||||||
"website": "skynews.com",
|
"website": "skynews.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -220,6 +250,7 @@
|
||||||
"label": "This Is Money",
|
"label": "This Is Money",
|
||||||
"feedUrl": "[FAILED] https://www.thisismoney.co.uk/money/news/index.rss",
|
"feedUrl": "[FAILED] https://www.thisismoney.co.uk/money/news/index.rss",
|
||||||
"website": "thisismoney.co.uk",
|
"website": "thisismoney.co.uk",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -227,6 +258,7 @@
|
||||||
"label": "City A.M.",
|
"label": "City A.M.",
|
||||||
"feedUrl": "https://www.cityam.com/feed/",
|
"feedUrl": "https://www.cityam.com/feed/",
|
||||||
"website": "cityam.com",
|
"website": "cityam.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -234,6 +266,7 @@
|
||||||
"label": "Spiegel Wirtschaft",
|
"label": "Spiegel Wirtschaft",
|
||||||
"feedUrl": "https://www.spiegel.de/wirtschaft/index.rss",
|
"feedUrl": "https://www.spiegel.de/wirtschaft/index.rss",
|
||||||
"website": "spiegel.de",
|
"website": "spiegel.de",
|
||||||
|
"language": "de",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -241,6 +274,7 @@
|
||||||
"label": "Handelsblatt",
|
"label": "Handelsblatt",
|
||||||
"feedUrl": "https://www.handelsblatt.com/contentexport/feed/schlagzeilen",
|
"feedUrl": "https://www.handelsblatt.com/contentexport/feed/schlagzeilen",
|
||||||
"website": "handelsblatt.com",
|
"website": "handelsblatt.com",
|
||||||
|
"language": "de",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -248,6 +282,7 @@
|
||||||
"label": "FAZ Wirtschaft",
|
"label": "FAZ Wirtschaft",
|
||||||
"feedUrl": "https://www.faz.net/rss/aktuell/wirtschaft/",
|
"feedUrl": "https://www.faz.net/rss/aktuell/wirtschaft/",
|
||||||
"website": "faz.net",
|
"website": "faz.net",
|
||||||
|
"language": "de",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -255,6 +290,7 @@
|
||||||
"label": "Die Welt Wirtschaft",
|
"label": "Die Welt Wirtschaft",
|
||||||
"feedUrl": "https://www.welt.de/feeds/section/wirtschaft.rss",
|
"feedUrl": "https://www.welt.de/feeds/section/wirtschaft.rss",
|
||||||
"website": "welt.de",
|
"website": "welt.de",
|
||||||
|
"language": "de",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -262,6 +298,7 @@
|
||||||
"label": "Les Echos",
|
"label": "Les Echos",
|
||||||
"feedUrl": "[FAILED] https://feeds.lesechos.fr/rss/rss_la_une.xml",
|
"feedUrl": "[FAILED] https://feeds.lesechos.fr/rss/rss_la_une.xml",
|
||||||
"website": "lesechos.fr",
|
"website": "lesechos.fr",
|
||||||
|
"language": "fr",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -269,6 +306,7 @@
|
||||||
"label": "Le Monde Economie",
|
"label": "Le Monde Economie",
|
||||||
"feedUrl": "https://www.lemonde.fr/economie/rss_full.xml",
|
"feedUrl": "https://www.lemonde.fr/economie/rss_full.xml",
|
||||||
"website": "lemonde.fr",
|
"website": "lemonde.fr",
|
||||||
|
"language": "fr",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -276,6 +314,7 @@
|
||||||
"label": "BFM Business",
|
"label": "BFM Business",
|
||||||
"feedUrl": "[FAILED] https://bfmbusiness.bfmtv.com/rss/news-flux-rss/",
|
"feedUrl": "[FAILED] https://bfmbusiness.bfmtv.com/rss/news-flux-rss/",
|
||||||
"website": "bfmbusiness.bfmtv.com",
|
"website": "bfmbusiness.bfmtv.com",
|
||||||
|
"language": "fr",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -283,6 +322,7 @@
|
||||||
"label": "El Economista ES",
|
"label": "El Economista ES",
|
||||||
"feedUrl": "[FAILED] https://www.eleconomista.es/rss/rss-de-portada.php",
|
"feedUrl": "[FAILED] https://www.eleconomista.es/rss/rss-de-portada.php",
|
||||||
"website": "eleconomista.es",
|
"website": "eleconomista.es",
|
||||||
|
"language": "es",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -290,6 +330,7 @@
|
||||||
"label": "Expansion ES",
|
"label": "Expansion ES",
|
||||||
"feedUrl": "https://e00-expansion.uecdn.es/rss/portada.xml",
|
"feedUrl": "https://e00-expansion.uecdn.es/rss/portada.xml",
|
||||||
"website": "expansion.com",
|
"website": "expansion.com",
|
||||||
|
"language": "es",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -297,6 +338,7 @@
|
||||||
"label": "Cinco Dias",
|
"label": "Cinco Dias",
|
||||||
"feedUrl": "[FAILED] https://cincodias.elpais.com/rss/cincodias/ultima_hora_mercados.xml",
|
"feedUrl": "[FAILED] https://cincodias.elpais.com/rss/cincodias/ultima_hora_mercados.xml",
|
||||||
"website": "cincodias.elpais.com",
|
"website": "cincodias.elpais.com",
|
||||||
|
"language": "es",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -304,6 +346,7 @@
|
||||||
"label": "Il Sole 24 Ore",
|
"label": "Il Sole 24 Ore",
|
||||||
"feedUrl": "[FAILED] https://www.ilsole24ore.com/rss/economia--finanza.xml",
|
"feedUrl": "[FAILED] https://www.ilsole24ore.com/rss/economia--finanza.xml",
|
||||||
"website": "ilsole24ore.com",
|
"website": "ilsole24ore.com",
|
||||||
|
"language": "it",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -311,6 +354,7 @@
|
||||||
"label": "FD.nl",
|
"label": "FD.nl",
|
||||||
"feedUrl": "[FAILED] https://fd.nl/rss",
|
"feedUrl": "[FAILED] https://fd.nl/rss",
|
||||||
"website": "fd.nl",
|
"website": "fd.nl",
|
||||||
|
"language": "nl",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -318,6 +362,7 @@
|
||||||
"label": "NZZ Wirtschaft",
|
"label": "NZZ Wirtschaft",
|
||||||
"feedUrl": "https://www.nzz.ch/wirtschaft.rss",
|
"feedUrl": "https://www.nzz.ch/wirtschaft.rss",
|
||||||
"website": "nzz.ch",
|
"website": "nzz.ch",
|
||||||
|
"language": "de",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -325,6 +370,7 @@
|
||||||
"label": "Moscow Times",
|
"label": "Moscow Times",
|
||||||
"feedUrl": "https://www.themoscowtimes.com/rss/news",
|
"feedUrl": "https://www.themoscowtimes.com/rss/news",
|
||||||
"website": "themoscowtimes.com",
|
"website": "themoscowtimes.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -332,6 +378,7 @@
|
||||||
"label": "RBC Russia",
|
"label": "RBC Russia",
|
||||||
"feedUrl": "https://rssexport.rbc.ru/rbcnews/news/30/full.rss",
|
"feedUrl": "https://rssexport.rbc.ru/rbcnews/news/30/full.rss",
|
||||||
"website": "rbc.ru",
|
"website": "rbc.ru",
|
||||||
|
"language": "ru",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -339,6 +386,7 @@
|
||||||
"label": "Economic Times India",
|
"label": "Economic Times India",
|
||||||
"feedUrl": "https://economictimes.indiatimes.com/rssfeedstopstories.cms",
|
"feedUrl": "https://economictimes.indiatimes.com/rssfeedstopstories.cms",
|
||||||
"website": "economictimes.indiatimes.com",
|
"website": "economictimes.indiatimes.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -346,6 +394,7 @@
|
||||||
"label": "Business Standard IN",
|
"label": "Business Standard IN",
|
||||||
"feedUrl": "https://www.business-standard.com/rss/home_page_top_stories.rss",
|
"feedUrl": "https://www.business-standard.com/rss/home_page_top_stories.rss",
|
||||||
"website": "business-standard.com",
|
"website": "business-standard.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -353,6 +402,7 @@
|
||||||
"label": "Live Mint",
|
"label": "Live Mint",
|
||||||
"feedUrl": "[FAILED] https://www.livemint.com/rss/headlines",
|
"feedUrl": "[FAILED] https://www.livemint.com/rss/headlines",
|
||||||
"website": "livemint.com",
|
"website": "livemint.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -360,6 +410,7 @@
|
||||||
"label": "Moneycontrol",
|
"label": "Moneycontrol",
|
||||||
"feedUrl": "https://www.moneycontrol.com/rss/MCtopnews.xml",
|
"feedUrl": "https://www.moneycontrol.com/rss/MCtopnews.xml",
|
||||||
"website": "moneycontrol.com",
|
"website": "moneycontrol.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -367,6 +418,7 @@
|
||||||
"label": "Hindu Business Line",
|
"label": "Hindu Business Line",
|
||||||
"feedUrl": "https://www.thehindubusinessline.com/feeder/default.rss",
|
"feedUrl": "https://www.thehindubusinessline.com/feeder/default.rss",
|
||||||
"website": "thehindubusinessline.com",
|
"website": "thehindubusinessline.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -374,6 +426,7 @@
|
||||||
"label": "Caixin Global",
|
"label": "Caixin Global",
|
||||||
"feedUrl": "[FAILED] https://www.caixinglobal.com/rss/newsfeeds/",
|
"feedUrl": "[FAILED] https://www.caixinglobal.com/rss/newsfeeds/",
|
||||||
"website": "caixinglobal.com",
|
"website": "caixinglobal.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -381,6 +434,7 @@
|
||||||
"label": "China Daily Business",
|
"label": "China Daily Business",
|
||||||
"feedUrl": "https://www.chinadaily.com.cn/rss/bizchina_rss.xml",
|
"feedUrl": "https://www.chinadaily.com.cn/rss/bizchina_rss.xml",
|
||||||
"website": "chinadaily.com.cn",
|
"website": "chinadaily.com.cn",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -388,6 +442,7 @@
|
||||||
"label": "Xinhua Business",
|
"label": "Xinhua Business",
|
||||||
"feedUrl": "[FAILED] https://english.news.cn/rss/business.xml",
|
"feedUrl": "[FAILED] https://english.news.cn/rss/business.xml",
|
||||||
"website": "news.cn",
|
"website": "news.cn",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -395,6 +450,7 @@
|
||||||
"label": "South China Morning Post",
|
"label": "South China Morning Post",
|
||||||
"feedUrl": "https://www.scmp.com/rss/91/feed",
|
"feedUrl": "https://www.scmp.com/rss/91/feed",
|
||||||
"website": "scmp.com",
|
"website": "scmp.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -402,6 +458,7 @@
|
||||||
"label": "Nikkei Asia",
|
"label": "Nikkei Asia",
|
||||||
"feedUrl": "https://asia.nikkei.com/rss/feed/nar",
|
"feedUrl": "https://asia.nikkei.com/rss/feed/nar",
|
||||||
"website": "asia.nikkei.com",
|
"website": "asia.nikkei.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -409,6 +466,7 @@
|
||||||
"label": "Japan Times Business",
|
"label": "Japan Times Business",
|
||||||
"feedUrl": "[FAILED] https://www.japantimes.co.jp/feed/business/",
|
"feedUrl": "[FAILED] https://www.japantimes.co.jp/feed/business/",
|
||||||
"website": "japantimes.co.jp",
|
"website": "japantimes.co.jp",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -416,6 +474,7 @@
|
||||||
"label": "Korea Herald",
|
"label": "Korea Herald",
|
||||||
"feedUrl": "https://www.koreaherald.com/rss/010000000000.xml",
|
"feedUrl": "https://www.koreaherald.com/rss/010000000000.xml",
|
||||||
"website": "koreaherald.com",
|
"website": "koreaherald.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -423,6 +482,7 @@
|
||||||
"label": "Korea JoongAng Daily",
|
"label": "Korea JoongAng Daily",
|
||||||
"feedUrl": "[FAILED] https://koreajoongangdaily.joins.com/rss/",
|
"feedUrl": "[FAILED] https://koreajoongangdaily.joins.com/rss/",
|
||||||
"website": "koreajoongangdaily.joins.com",
|
"website": "koreajoongangdaily.joins.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -430,6 +490,7 @@
|
||||||
"label": "Business Times SG",
|
"label": "Business Times SG",
|
||||||
"feedUrl": "https://www.businesstimes.com.sg/rss.xml",
|
"feedUrl": "https://www.businesstimes.com.sg/rss.xml",
|
||||||
"website": "businesstimes.com.sg",
|
"website": "businesstimes.com.sg",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -437,6 +498,7 @@
|
||||||
"label": "Straits Times Business",
|
"label": "Straits Times Business",
|
||||||
"feedUrl": "https://www.straitstimes.com/news/business/rss.xml",
|
"feedUrl": "https://www.straitstimes.com/news/business/rss.xml",
|
||||||
"website": "straitstimes.com",
|
"website": "straitstimes.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -444,6 +506,7 @@
|
||||||
"label": "Channel NewsAsia",
|
"label": "Channel NewsAsia",
|
||||||
"feedUrl": "https://www.channelnewsasia.com/rssfeeds/8395986",
|
"feedUrl": "https://www.channelnewsasia.com/rssfeeds/8395986",
|
||||||
"website": "channelnewsasia.com",
|
"website": "channelnewsasia.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -451,6 +514,7 @@
|
||||||
"label": "Bangkok Post Business",
|
"label": "Bangkok Post Business",
|
||||||
"feedUrl": "https://www.bangkokpost.com/rss/data/business.xml",
|
"feedUrl": "https://www.bangkokpost.com/rss/data/business.xml",
|
||||||
"website": "bangkokpost.com",
|
"website": "bangkokpost.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -458,6 +522,7 @@
|
||||||
"label": "The Star Malaysia",
|
"label": "The Star Malaysia",
|
||||||
"feedUrl": "[FAILED] https://www.thestar.com.my/rss/Business/Business-News",
|
"feedUrl": "[FAILED] https://www.thestar.com.my/rss/Business/Business-News",
|
||||||
"website": "thestar.com.my",
|
"website": "thestar.com.my",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -465,6 +530,7 @@
|
||||||
"label": "Australian Fin Review",
|
"label": "Australian Fin Review",
|
||||||
"feedUrl": "[FAILED] https://www.afr.com/rss",
|
"feedUrl": "[FAILED] https://www.afr.com/rss",
|
||||||
"website": "afr.com",
|
"website": "afr.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -472,6 +538,7 @@
|
||||||
"label": "ABC Business AU",
|
"label": "ABC Business AU",
|
||||||
"feedUrl": "[FAILED] https://www.abc.net.au/news/feed/52278/rss.xml",
|
"feedUrl": "[FAILED] https://www.abc.net.au/news/feed/52278/rss.xml",
|
||||||
"website": "abc.net.au",
|
"website": "abc.net.au",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -479,6 +546,7 @@
|
||||||
"label": "NZ Herald Business",
|
"label": "NZ Herald Business",
|
||||||
"feedUrl": "https://www.nzherald.co.nz/arc/outboundfeeds/rss/section/business/",
|
"feedUrl": "https://www.nzherald.co.nz/arc/outboundfeeds/rss/section/business/",
|
||||||
"website": "nzherald.co.nz",
|
"website": "nzherald.co.nz",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -486,6 +554,7 @@
|
||||||
"label": "Arabian Business",
|
"label": "Arabian Business",
|
||||||
"feedUrl": "[FAILED] https://www.arabianbusiness.com/rss.xml",
|
"feedUrl": "[FAILED] https://www.arabianbusiness.com/rss.xml",
|
||||||
"website": "arabianbusiness.com",
|
"website": "arabianbusiness.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -493,6 +562,7 @@
|
||||||
"label": "Gulf News Business",
|
"label": "Gulf News Business",
|
||||||
"feedUrl": "[FAILED] https://gulfnews.com/rss/business",
|
"feedUrl": "[FAILED] https://gulfnews.com/rss/business",
|
||||||
"website": "gulfnews.com",
|
"website": "gulfnews.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -500,6 +570,7 @@
|
||||||
"label": "Arab News",
|
"label": "Arab News",
|
||||||
"feedUrl": "[FAILED] https://www.arabnews.com/rss/front_page.xml",
|
"feedUrl": "[FAILED] https://www.arabnews.com/rss/front_page.xml",
|
||||||
"website": "arabnews.com",
|
"website": "arabnews.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -507,6 +578,7 @@
|
||||||
"label": "The National UAE",
|
"label": "The National UAE",
|
||||||
"feedUrl": "https://www.thenationalnews.com/arc/outboundfeeds/rss/?outputType=xml",
|
"feedUrl": "https://www.thenationalnews.com/arc/outboundfeeds/rss/?outputType=xml",
|
||||||
"website": "thenationalnews.com",
|
"website": "thenationalnews.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -514,6 +586,7 @@
|
||||||
"label": "BusinessDay Nigeria",
|
"label": "BusinessDay Nigeria",
|
||||||
"feedUrl": "https://businessday.ng/feed/",
|
"feedUrl": "https://businessday.ng/feed/",
|
||||||
"website": "businessday.ng",
|
"website": "businessday.ng",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -521,6 +594,7 @@
|
||||||
"label": "Moneyweb SA",
|
"label": "Moneyweb SA",
|
||||||
"feedUrl": "https://www.moneyweb.co.za/feed/",
|
"feedUrl": "https://www.moneyweb.co.za/feed/",
|
||||||
"website": "moneyweb.co.za",
|
"website": "moneyweb.co.za",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -528,6 +602,7 @@
|
||||||
"label": "BusinessLive SA",
|
"label": "BusinessLive SA",
|
||||||
"feedUrl": "[FAILED] https://www.businesslive.co.za/rss/bd/",
|
"feedUrl": "[FAILED] https://www.businesslive.co.za/rss/bd/",
|
||||||
"website": "businesslive.co.za",
|
"website": "businesslive.co.za",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -535,6 +610,7 @@
|
||||||
"label": "Business Daily Africa",
|
"label": "Business Daily Africa",
|
||||||
"feedUrl": "[FAILED] https://www.businessdailyafrica.com/rss/",
|
"feedUrl": "[FAILED] https://www.businessdailyafrica.com/rss/",
|
||||||
"website": "businessdailyafrica.com",
|
"website": "businessdailyafrica.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -542,6 +618,7 @@
|
||||||
"label": "Vanguard Business NG",
|
"label": "Vanguard Business NG",
|
||||||
"feedUrl": "https://www.vanguardngr.com/category/business/feed/",
|
"feedUrl": "https://www.vanguardngr.com/category/business/feed/",
|
||||||
"website": "vanguardngr.com",
|
"website": "vanguardngr.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -549,6 +626,7 @@
|
||||||
"label": "Folha Mercado BR",
|
"label": "Folha Mercado BR",
|
||||||
"feedUrl": "https://feeds.folha.uol.com.br/mercado/rss091.xml",
|
"feedUrl": "https://feeds.folha.uol.com.br/mercado/rss091.xml",
|
||||||
"website": "folha.uol.com.br",
|
"website": "folha.uol.com.br",
|
||||||
|
"language": "pt",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -556,6 +634,7 @@
|
||||||
"label": "G1 Economia BR",
|
"label": "G1 Economia BR",
|
||||||
"feedUrl": "https://g1.globo.com/dynamo/economia/rss2.xml",
|
"feedUrl": "https://g1.globo.com/dynamo/economia/rss2.xml",
|
||||||
"website": "g1.globo.com",
|
"website": "g1.globo.com",
|
||||||
|
"language": "pt",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -563,6 +642,7 @@
|
||||||
"label": "Exame BR",
|
"label": "Exame BR",
|
||||||
"feedUrl": "https://exame.com/feed/",
|
"feedUrl": "https://exame.com/feed/",
|
||||||
"website": "exame.com",
|
"website": "exame.com",
|
||||||
|
"language": "pt",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -570,6 +650,7 @@
|
||||||
"label": "El Economista MX",
|
"label": "El Economista MX",
|
||||||
"feedUrl": "[FAILED] https://www.eleconomista.com.mx/rss/rss.html",
|
"feedUrl": "[FAILED] https://www.eleconomista.com.mx/rss/rss.html",
|
||||||
"website": "eleconomista.com.mx",
|
"website": "eleconomista.com.mx",
|
||||||
|
"language": "es",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -577,6 +658,7 @@
|
||||||
"label": "Expansion MX",
|
"label": "Expansion MX",
|
||||||
"feedUrl": "https://expansion.mx/rss",
|
"feedUrl": "https://expansion.mx/rss",
|
||||||
"website": "expansion.mx",
|
"website": "expansion.mx",
|
||||||
|
"language": "es",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -584,6 +666,7 @@
|
||||||
"label": "La Nacion AR",
|
"label": "La Nacion AR",
|
||||||
"feedUrl": "https://www.lanacion.com.ar/arc/outboundfeeds/rss/category/economia/",
|
"feedUrl": "https://www.lanacion.com.ar/arc/outboundfeeds/rss/category/economia/",
|
||||||
"website": "lanacion.com.ar",
|
"website": "lanacion.com.ar",
|
||||||
|
"language": "es",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -591,6 +674,7 @@
|
||||||
"label": "Infobae Economia AR",
|
"label": "Infobae Economia AR",
|
||||||
"feedUrl": "[FAILED] https://www.infobae.com/feeds/rss/economia/",
|
"feedUrl": "[FAILED] https://www.infobae.com/feeds/rss/economia/",
|
||||||
"website": "infobae.com",
|
"website": "infobae.com",
|
||||||
|
"language": "es",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -598,6 +682,7 @@
|
||||||
"label": "Portafolio Colombia",
|
"label": "Portafolio Colombia",
|
||||||
"feedUrl": "[FAILED] https://www.portafolio.co/rss/portafolio.xml",
|
"feedUrl": "[FAILED] https://www.portafolio.co/rss/portafolio.xml",
|
||||||
"website": "portafolio.co",
|
"website": "portafolio.co",
|
||||||
|
"language": "es",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -605,6 +690,7 @@
|
||||||
"label": "El Comercio Peru",
|
"label": "El Comercio Peru",
|
||||||
"feedUrl": "[FAILED] https://elcomercio.pe/arc/outboundfeeds/rss/section/economia/",
|
"feedUrl": "[FAILED] https://elcomercio.pe/arc/outboundfeeds/rss/section/economia/",
|
||||||
"website": "elcomercio.pe",
|
"website": "elcomercio.pe",
|
||||||
|
"language": "es",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -615,6 +701,7 @@
|
||||||
"jamaica-gleaner.com",
|
"jamaica-gleaner.com",
|
||||||
"jamaicagleaner.com"
|
"jamaicagleaner.com"
|
||||||
],
|
],
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -622,6 +709,7 @@
|
||||||
"label": "Jamaica Observer",
|
"label": "Jamaica Observer",
|
||||||
"feedUrl": "https://www.jamaicaobserver.com/app/business/",
|
"feedUrl": "https://www.jamaicaobserver.com/app/business/",
|
||||||
"website": "jamaicaobserver.com",
|
"website": "jamaicaobserver.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -629,6 +717,7 @@
|
||||||
"label": "Stabroek News",
|
"label": "Stabroek News",
|
||||||
"feedUrl": "[FAILED] https://www.stabroeknews.com/feed/",
|
"feedUrl": "[FAILED] https://www.stabroeknews.com/feed/",
|
||||||
"website": "stabroeknews.com",
|
"website": "stabroeknews.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -636,6 +725,7 @@
|
||||||
"label": "Nation News Barbados",
|
"label": "Nation News Barbados",
|
||||||
"feedUrl": "[FAILED] https://nationnews.com/rss-feed/",
|
"feedUrl": "[FAILED] https://nationnews.com/rss-feed/",
|
||||||
"website": "nationnews.com",
|
"website": "nationnews.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": true
|
"backfill": true
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -643,6 +733,7 @@
|
||||||
"label": "Google News",
|
"label": "Google News",
|
||||||
"feedUrl": "https://news.google.com/rss?hl=en-GB&gl=GB&ceid=GB:en",
|
"feedUrl": "https://news.google.com/rss?hl=en-GB&gl=GB&ceid=GB:en",
|
||||||
"website": "news.google.com",
|
"website": "news.google.com",
|
||||||
|
"language": "en",
|
||||||
"backfill": false
|
"backfill": false
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
|
||||||
|
|
@ -65,8 +65,8 @@ const markContentPending = db.prepare(`
|
||||||
const selectPartitionedArticlesMissingContent = db.prepare(`
|
const selectPartitionedArticlesMissingContent = db.prepare(`
|
||||||
SELECT id, url, title, description
|
SELECT id, url, title, description
|
||||||
FROM (
|
FROM (
|
||||||
SELECT id, url, title, description, source,
|
SELECT id, url, title, description, source, pub_date_effective,
|
||||||
ROW_NUMBER() OVER (PARTITION BY source ORDER BY ingested_at DESC, id DESC) AS rn
|
ROW_NUMBER() OVER (PARTITION BY source ORDER BY pub_date_effective DESC, id DESC) AS rn
|
||||||
FROM articles
|
FROM articles
|
||||||
WHERE (content IS NULL OR TRIM(content) = '')
|
WHERE (content IS NULL OR TRIM(content) = '')
|
||||||
AND (content_status IS NULL OR content_status = 'pending')
|
AND (content_status IS NULL OR content_status = 'pending')
|
||||||
|
|
@ -74,7 +74,7 @@ const selectPartitionedArticlesMissingContent = db.prepare(`
|
||||||
AND (id % ?) = ?
|
AND (id % ?) = ?
|
||||||
)
|
)
|
||||||
WHERE rn <= ?
|
WHERE rn <= ?
|
||||||
ORDER BY rn, source
|
ORDER BY pub_date_effective DESC, rn, source
|
||||||
`);
|
`);
|
||||||
|
|
||||||
const selectAttemptCount = db.prepare(`
|
const selectAttemptCount = db.prepare(`
|
||||||
|
|
|
||||||
17
src/db.js
17
src/db.js
|
|
@ -288,7 +288,8 @@ for (const statement of [
|
||||||
'ALTER TABLE articles ADD COLUMN content_retry_after TEXT',
|
'ALTER TABLE articles ADD COLUMN content_retry_after TEXT',
|
||||||
'ALTER TABLE articles ADD COLUMN is_index_page INTEGER NOT NULL DEFAULT 0',
|
'ALTER TABLE articles ADD COLUMN is_index_page INTEGER NOT NULL DEFAULT 0',
|
||||||
'ALTER TABLE articles ADD COLUMN has_embedding INTEGER NOT NULL DEFAULT 0',
|
'ALTER TABLE articles ADD COLUMN has_embedding INTEGER NOT NULL DEFAULT 0',
|
||||||
'ALTER TABLE articles ADD COLUMN pub_date_effective TEXT'
|
'ALTER TABLE articles ADD COLUMN pub_date_effective TEXT',
|
||||||
|
'ALTER TABLE articles ADD COLUMN language TEXT'
|
||||||
]) {
|
]) {
|
||||||
try {
|
try {
|
||||||
db.exec(statement);
|
db.exec(statement);
|
||||||
|
|
@ -312,6 +313,20 @@ db.exec(`
|
||||||
WHERE pub_date_effective IS NULL
|
WHERE pub_date_effective IS NULL
|
||||||
`);
|
`);
|
||||||
|
|
||||||
|
// backfill language from sources.json for existing rows
|
||||||
|
{
|
||||||
|
const sources = require('../sources.json');
|
||||||
|
const updateLang = db.prepare(`UPDATE articles SET language = ? WHERE source = ? AND language IS NULL`);
|
||||||
|
const backfillLang = db.transaction(() => {
|
||||||
|
for (const src of sources) {
|
||||||
|
if (src.language) {
|
||||||
|
updateLang.run(src.language, src.id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
backfillLang();
|
||||||
|
}
|
||||||
|
|
||||||
db.exec(`
|
db.exec(`
|
||||||
CREATE INDEX IF NOT EXISTS idx_articles_has_embedding ON articles(has_embedding);
|
CREATE INDEX IF NOT EXISTS idx_articles_has_embedding ON articles(has_embedding);
|
||||||
CREATE INDEX IF NOT EXISTS idx_articles_pub_date_effective ON articles(pub_date_effective DESC);
|
CREATE INDEX IF NOT EXISTS idx_articles_pub_date_effective ON articles(pub_date_effective DESC);
|
||||||
|
|
|
||||||
|
|
@ -73,7 +73,7 @@ const selectArticlesMissingEmbeddings = db.prepare(`
|
||||||
SELECT 1 FROM article_embedding_store s
|
SELECT 1 FROM article_embedding_store s
|
||||||
WHERE s.article_id = a.id AND s.model = ?
|
WHERE s.article_id = a.id AND s.model = ?
|
||||||
)
|
)
|
||||||
ORDER BY a.ingested_at ASC, a.id ASC
|
ORDER BY a.pub_date_effective DESC, a.id DESC
|
||||||
LIMIT ?
|
LIMIT ?
|
||||||
`);
|
`);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,10 @@ const db = require('./db');
|
||||||
const { normalizeTitle } = require('./dedup');
|
const { normalizeTitle } = require('./dedup');
|
||||||
const { markSourceRun } = require('./state');
|
const { markSourceRun } = require('./state');
|
||||||
|
|
||||||
|
const sourcesById = Object.fromEntries(
|
||||||
|
require('../sources.json').map((s) => [s.id, s])
|
||||||
|
);
|
||||||
|
|
||||||
const insertArticle = db.prepare(`
|
const insertArticle = db.prepare(`
|
||||||
INSERT INTO articles (
|
INSERT INTO articles (
|
||||||
title,
|
title,
|
||||||
|
|
@ -13,8 +17,9 @@ const insertArticle = db.prepare(`
|
||||||
source,
|
source,
|
||||||
pub_date,
|
pub_date,
|
||||||
ingested_at,
|
ingested_at,
|
||||||
pub_date_effective
|
pub_date_effective,
|
||||||
) VALUES (?, ?, NULL, ?, ?, ?, ?, ?, ?, ?)
|
language
|
||||||
|
) VALUES (?, ?, NULL, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||||
`);
|
`);
|
||||||
const findByUrl = db.prepare('SELECT id FROM articles WHERE url = ?');
|
const findByUrl = db.prepare('SELECT id FROM articles WHERE url = ?');
|
||||||
const INDEX_PAGE_URL_HINT = /\/(category|categories|tag|tags|topic|topics|section|sections|archive|archives|authors|search)(?:\/|$)/i;
|
const INDEX_PAGE_URL_HINT = /\/(category|categories|tag|tags|topic|topics|section|sections|archive|archives|authors|search)(?:\/|$)/i;
|
||||||
|
|
@ -81,6 +86,7 @@ function ingestArticle(article) {
|
||||||
const isIndexPage = inferIsIndexPage(article, title, url);
|
const isIndexPage = inferIsIndexPage(article, title, url);
|
||||||
const pubDate = normalizePubDate(article.pubDate);
|
const pubDate = normalizePubDate(article.pubDate);
|
||||||
const ingestedAt = new Date().toISOString();
|
const ingestedAt = new Date().toISOString();
|
||||||
|
const language = (sourcesById[source] && sourcesById[source].language) || null;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const result = insertArticle.run(
|
const result = insertArticle.run(
|
||||||
|
|
@ -92,7 +98,8 @@ function ingestArticle(article) {
|
||||||
source,
|
source,
|
||||||
pubDate,
|
pubDate,
|
||||||
ingestedAt,
|
ingestedAt,
|
||||||
pubDate || ingestedAt
|
pubDate || ingestedAt,
|
||||||
|
language
|
||||||
);
|
);
|
||||||
|
|
||||||
// dont kick off the content fetch here — it used to be fire-and-forget which
|
// dont kick off the content fetch here — it used to be fire-and-forget which
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue