2026-02-15 20:33:20 +04:00
import { getCorsHeaders , isDisallowedOrigin } from './_cors.js' ;
2026-03-01 11:53:20 +04:00
import { validateApiKey } from './_api-key.js' ;
import { checkRateLimit } from './_rate-limit.js' ;
refactor(api): extract shared relay helper into _relay.js (#782)
DRY 7 edge functions that duplicated relay proxy boilerplate (~80%
identical: CORS, origin check, OPTIONS, relay URL construction, auth
headers, fetchWithTimeout, error handling).
- New `api/_relay.js` exports createRelayHandler factory + getRelayBaseUrl,
getRelayHeaders, fetchWithTimeout utilities
- Convert ais-snapshot, polymarket, opensky, oref-alerts, telegram-feed
to ~15-25 line config objects via createRelayHandler
- Refactor youtube/live.js and rss-proxy.js to import shared utils only
(complex handlers not suited for factory pattern)
- 30 unit/behavior tests in tests/relay-helper.test.mjs
Net: -501 lines removed, +72 added across 7 prod files. Relay utility
functions exist in 1 place instead of 7.
2026-03-02 19:28:31 +04:00
import { getRelayBaseUrl , getRelayHeaders , fetchWithTimeout } from './_relay.js' ;
2026-03-04 18:42:00 +04:00
import RSS _ALLOWED _DOMAINS from './_rss-allowed-domains.js' ;
2026-03-16 11:52:56 +04:00
import { jsonResponse } from './_json-response.js' ;
2026-02-15 20:33:20 +04:00
2026-01-11 07:34:57 +04:00
export const config = { runtime : 'edge' } ;
2026-03-03 00:25:26 +04:00
// Domains that consistently block Vercel edge IPs — skip direct fetch,
// go straight to Railway relay to avoid wasted invocation + timeout.
const RELAY _ONLY _DOMAINS = new Set ( [
'rss.cnn.com' ,
'www.defensenews.com' ,
'layoffs.fyi' ,
'news.un.org' ,
'www.cisa.gov' ,
'www.iaea.org' ,
'www.who.int' ,
'www.crisisgroup.org' ,
'english.alarabiya.net' ,
'www.arabnews.com' ,
'www.timesofisrael.com' ,
'www.scmp.com' ,
'kyivindependent.com' ,
'www.themoscowtimes.com' ,
'feeds.24.com' ,
'feeds.capi24.com' ,
'islandtimes.org' ,
'www.atlanticcouncil.org' ,
] ) ;
2026-03-15 02:22:28 +04:00
const DIRECT _FETCH _HEADERS = Object . freeze ( {
'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36' ,
'Accept' : 'application/rss+xml, application/xml, text/xml, */*' ,
'Accept-Language' : 'en-US,en;q=0.9' ,
} ) ;
2026-02-24 16:24:20 +00:00
async function fetchViaRailway ( feedUrl , timeoutMs ) {
const relayBaseUrl = getRelayBaseUrl ( ) ;
if ( ! relayBaseUrl ) return null ;
const relayUrl = ` ${ relayBaseUrl } /rss?url= ${ encodeURIComponent ( feedUrl ) } ` ;
return fetchWithTimeout ( relayUrl , {
headers : getRelayHeaders ( {
'Accept' : 'application/rss+xml, application/xml, text/xml, */*' ,
'User-Agent' : 'WorldMonitor-RSS-Proxy/1.0' ,
} ) ,
} , timeoutMs ) ;
}
2026-03-04 18:42:00 +04:00
// Allowed RSS feed domains — shared source of truth (shared/rss-allowed-domains.js)
const ALLOWED _DOMAINS = RSS _ALLOWED _DOMAINS ;
2026-01-11 07:34:57 +04:00
2026-03-15 02:22:28 +04:00
function isAllowedDomain ( hostname ) {
const bare = hostname . replace ( /^www\./ , '' ) ;
const withWww = hostname . startsWith ( 'www.' ) ? hostname : ` www. ${ hostname } ` ;
return ALLOWED _DOMAINS . includes ( hostname ) || ALLOWED _DOMAINS . includes ( bare ) || ALLOWED _DOMAINS . includes ( withWww ) ;
}
2026-03-20 12:37:24 +04:00
function isGoogleNewsFeedUrl ( feedUrl ) {
try {
return new URL ( feedUrl ) . hostname === 'news.google.com' ;
} catch {
return false ;
}
}
2026-01-11 07:34:57 +04:00
export default async function handler ( req ) {
2026-02-15 20:33:20 +04:00
const corsHeaders = getCorsHeaders ( req , 'GET, OPTIONS' ) ;
2026-01-25 12:32:27 +04:00
2026-03-01 11:53:20 +04:00
if ( isDisallowedOrigin ( req ) ) {
2026-03-16 11:52:56 +04:00
return jsonResponse ( { error : 'Origin not allowed' } , 403 , corsHeaders ) ;
2026-03-01 11:53:20 +04:00
}
2026-01-25 12:32:27 +04:00
// Handle CORS preflight
if ( req . method === 'OPTIONS' ) {
return new Response ( null , { status : 204 , headers : corsHeaders } ) ;
}
2026-03-01 11:53:20 +04:00
if ( req . method !== 'GET' ) {
2026-03-16 11:52:56 +04:00
return jsonResponse ( { error : 'Method not allowed' } , 405 , corsHeaders ) ;
2026-03-01 11:53:20 +04:00
}
const keyCheck = validateApiKey ( req ) ;
if ( keyCheck . required && ! keyCheck . valid ) {
2026-03-16 11:52:56 +04:00
return jsonResponse ( { error : keyCheck . error } , 401 , corsHeaders ) ;
2026-03-01 11:53:20 +04:00
}
const rateLimitResponse = await checkRateLimit ( req , corsHeaders ) ;
if ( rateLimitResponse ) return rateLimitResponse ;
2026-01-25 12:32:27 +04:00
2026-01-11 07:34:57 +04:00
const requestUrl = new URL ( req . url ) ;
const feedUrl = requestUrl . searchParams . get ( 'url' ) ;
if ( ! feedUrl ) {
2026-03-16 11:52:56 +04:00
return jsonResponse ( { error : 'Missing url parameter' } , 400 , corsHeaders ) ;
2026-01-11 07:34:57 +04:00
}
try {
const parsedUrl = new URL ( feedUrl ) ;
2026-02-26 09:37:26 +04:00
// Security: Check if domain is allowed (normalize www prefix)
const hostname = parsedUrl . hostname ;
2026-03-15 02:22:28 +04:00
if ( ! isAllowedDomain ( hostname ) ) {
2026-03-16 11:52:56 +04:00
return jsonResponse ( { error : 'Domain not allowed' } , 403 , corsHeaders ) ;
2026-01-11 07:34:57 +04:00
}
2026-03-03 00:25:26 +04:00
const isRelayOnly = RELAY _ONLY _DOMAINS . has ( hostname ) ;
2026-01-11 10:21:59 +04:00
// Google News is slow - use longer timeout
2026-03-20 12:37:24 +04:00
const isGoogleNews = isGoogleNewsFeedUrl ( feedUrl ) ;
2026-01-11 10:21:59 +04:00
const timeout = isGoogleNews ? 20000 : 12000 ;
2026-02-24 16:24:20 +00:00
const fetchDirect = async ( ) => {
const response = await fetchWithTimeout ( feedUrl , {
2026-03-15 02:22:28 +04:00
headers : DIRECT _FETCH _HEADERS ,
2026-02-24 16:24:20 +00:00
redirect : 'manual' ,
} , timeout ) ;
2026-01-11 07:34:57 +04:00
2026-02-24 16:24:20 +00:00
if ( response . status >= 300 && response . status < 400 ) {
const location = response . headers . get ( 'location' ) ;
if ( location ) {
2026-02-15 20:33:20 +04:00
const redirectUrl = new URL ( location , feedUrl ) ;
2026-03-05 08:48:59 +05:30
// Apply the same www-normalization as the initial domain check so that
// canonical redirects (e.g. bbc.co.uk → www.bbc.co.uk) are not
// incorrectly rejected when only one form is in the allowlist.
const rHost = redirectUrl . hostname ;
2026-03-15 02:22:28 +04:00
if ( ! isAllowedDomain ( rHost ) ) {
2026-02-24 16:24:20 +00:00
throw new Error ( 'Redirect to disallowed domain' ) ;
2026-02-15 20:33:20 +04:00
}
2026-02-24 16:24:20 +00:00
return fetchWithTimeout ( redirectUrl . href , {
2026-03-15 02:22:28 +04:00
headers : DIRECT _FETCH _HEADERS ,
2026-02-15 20:33:20 +04:00
} , timeout ) ;
}
}
2026-02-24 16:24:20 +00:00
return response ;
} ;
let response ;
let usedRelay = false ;
2026-03-03 00:25:26 +04:00
if ( isRelayOnly ) {
// Skip direct fetch entirely — these domains block Vercel IPs
2026-02-24 16:24:20 +00:00
response = await fetchViaRailway ( feedUrl , timeout ) ;
usedRelay = ! ! response ;
2026-03-03 00:25:26 +04:00
if ( ! response ) throw new Error ( ` Railway relay unavailable for relay-only domain: ${ hostname } ` ) ;
} else {
try {
response = await fetchDirect ( ) ;
} catch ( directError ) {
response = await fetchViaRailway ( feedUrl , timeout ) ;
usedRelay = ! ! response ;
if ( ! response ) throw directError ;
}
2026-02-24 16:24:20 +00:00
2026-03-03 00:25:26 +04:00
if ( ! response . ok && ! usedRelay ) {
const relayResponse = await fetchViaRailway ( feedUrl , timeout ) ;
feat: harness engineering P0 - linting, testing, architecture docs (#1587)
* feat: harness engineering P0 - linting, testing, architecture docs
Add foundational infrastructure for agent-first development:
- AGENTS.md: agent entry point with progressive disclosure to deeper docs
- ARCHITECTURE.md: 12-section system reference with source-file refs and ownership rule
- Biome 2.4.7 linter with project-tuned rules, CI workflow (lint-code.yml)
- Architectural boundary lint enforcing forward-only dependency direction (lint-boundaries.mjs)
- Unit test CI workflow (test.yml), all 1083 tests passing
- Fixed 9 pre-existing test failures (bootstrap sync, deploy-config headers, globe parity, redis mocks, geometry URL, import.meta.env null safety)
- Fixed 12 architectural boundary violations (types moved to proper layers)
- Added 3 missing cache tier entries in gateway.ts
- Synced cache-keys.ts with bootstrap.js
- Renamed docs/architecture.mdx to "Design Philosophy" with cross-references
- Deprecated legacy docs/Docs_To_Review/ARCHITECTURE.md
- Harness engineering roadmap tracking doc
* fix: address PR review feedback on harness-engineering-p0
- countries-geojson.test.mjs: skip gracefully when CDN unreachable
instead of failing CI on network issues
- country-geometry-overrides.test.mts: relax timing assertion
(250ms -> 2000ms) for constrained CI environments
- lint-boundaries.mjs: implement the documented api/ boundary check
(was documented but missing, causing false green)
* fix(lint): scan api/ .ts files in boundary check
The api/ boundary check only scanned .js/.mjs files, missing the 25
sebuf RPC .ts edge functions. Now scans .ts files with correct rules:
- Legacy .js: fully self-contained (no server/ or src/ imports)
- RPC .ts: may import server/ and src/generated/ (bundled at deploy),
but blocks imports from src/ application code
* fix(lint): detect import() type expressions in boundary lint
- Move AppContext back to app/app-context.ts (aggregate type that
references components/services/utils belongs at the top, not types/)
- Move HappyContentCategory and TechHQ to types/ (simple enums/interfaces)
- Boundary lint now catches import('@/layer') expressions, not just
from '@/layer' imports
- correlation-engine imports of AppContext marked boundary-ignore
(type-only imports of top-level aggregate)
2026-03-14 21:29:21 +04:00
if ( relayResponse ? . ok ) {
2026-03-03 00:25:26 +04:00
response = relayResponse ;
}
2026-02-24 16:24:20 +00:00
}
2026-02-15 20:33:20 +04:00
}
2026-01-11 07:34:57 +04:00
const data = await response . text ( ) ;
2026-03-01 05:13:54 +04:00
const isSuccess = response . status >= 200 && response . status < 300 ;
2026-03-03 00:25:26 +04:00
// Relay-only feeds are slow-updating institutional sources — cache longer
const cdnTtl = isRelayOnly ? 3600 : 900 ;
const swr = isRelayOnly ? 7200 : 1800 ;
const sie = isRelayOnly ? 14400 : 3600 ;
const browserTtl = isRelayOnly ? 600 : 180 ;
2026-01-11 07:34:57 +04:00
return new Response ( data , {
status : response . status ,
headers : {
2026-02-24 16:24:20 +00:00
'Content-Type' : response . headers . get ( 'content-type' ) || 'application/xml' ,
2026-03-01 05:13:54 +04:00
'Cache-Control' : isSuccess
2026-03-03 00:25:26 +04:00
? ` public, max-age= ${ browserTtl } , s-maxage= ${ cdnTtl } , stale-while-revalidate= ${ swr } , stale-if-error= ${ sie } `
2026-03-01 11:53:20 +04:00
: 'public, max-age=15, s-maxage=60, stale-while-revalidate=120' ,
2026-03-03 00:25:26 +04:00
... ( isSuccess && { 'CDN-Cache-Control' : ` public, s-maxage= ${ cdnTtl } , stale-while-revalidate= ${ swr } , stale-if-error= ${ sie } ` } ) ,
2026-01-25 12:32:27 +04:00
... corsHeaders ,
2026-01-11 07:34:57 +04:00
} ,
} ) ;
} catch ( error ) {
2026-01-11 08:50:19 +04:00
const isTimeout = error . name === 'AbortError' ;
2026-01-11 10:21:13 +04:00
console . error ( 'RSS proxy error:' , feedUrl , error . message ) ;
2026-03-16 11:52:56 +04:00
return jsonResponse ( {
2026-01-11 10:21:13 +04:00
error : isTimeout ? 'Feed timeout' : 'Failed to fetch feed' ,
details : error . message ,
url : feedUrl
2026-03-16 11:52:56 +04:00
} , isTimeout ? 504 : 502 , corsHeaders ) ;
2026-01-11 07:34:57 +04:00
}
}