import httpx import asyncio import re import json from pathlib import Path from datetime import datetime, timezone from xml.etree import ElementTree as ET CACHE_FILE = Path(__file__).parent.parent / ".cache" / "news.json" CACHE_DURATION_SEC = 300 # 5 minutes def _get_cached_news(): if CACHE_FILE.exists() and (datetime.now().timestamp() - CACHE_FILE.stat().st_mtime) < CACHE_DURATION_SEC: try: with open(CACHE_FILE, "r") as f: return json.load(f) except: pass return None def _save_cache(data): CACHE_FILE.parent.mkdir(exist_ok=True, parents=True) try: with open(CACHE_FILE, "w") as f: json.dump(data, f) except: pass FEEDS = [ "http://www.aljazeera.com/xml/rss/all.xml", "http://feeds.bbci.co.uk/news/world/rss.xml", "https://www.reutersagency.com/feed/?best-topics=political-general&post_type=best", "https://www.theguardian.com/world/rss", "https://feeds.npr.org/1004/rss.xml", "https://foreignpolicy.com/feed/", "https://www.cnbc.com/id/100727362/device/rss/rss.html", "https://rss.nytimes.com/services/xml/rss/nyt/World.xml", "https://feeds.washingtonpost.com/rss/world", ] # Expanded keyword geocoder — covers most geopolitically relevant regions GEO_DATA = { # Middle East "Iran": [32.4279, 53.6880], "Israel": [31.0461, 34.8516], "Gaza": [31.3547, 34.3088], "West Bank": [31.9466, 35.3027], "Lebanon": [33.8547, 35.8623], "Syria": [34.8021, 38.9968], "Yemen": [15.5527, 48.5164], "Iraq": [33.2232, 43.6793], "Saudi Arabia": [23.8859, 45.0792], "Jordan": [30.5852, 36.2384], "Kuwait": [29.3117, 47.4818], "Qatar": [25.3548, 51.1839], "UAE": [23.4241, 53.8478], "Bahrain": [26.0667, 50.5577], "Oman": [21.5126, 55.9233], # Europe "Ukraine": [48.3794, 31.1656], "Russia": [61.5240, 105.3188], "Germany": [51.1657, 10.4515], "France": [46.2276, 2.2137], "UK": [55.3781, -3.4360], "Poland": [51.9194, 19.1451], "Romania": [45.9432, 24.9668], "Finland": [61.9241, 25.7482], "Sweden": [60.1282, 18.6435], "Norway": [60.4720, 8.4689], "NATO": [50.8503, 4.3517], "Belarus": [53.7098, 27.9534], "Moldova": [47.4116, 28.3699], "Georgia": [42.3154, 43.3569], "Serbia": [44.0165, 20.9129], "Kosovo": [42.6026, 20.9030], # Asia-Pacific "China": [35.8617, 104.1954], "Taiwan": [23.6978, 120.9605], "North Korea": [40.3399, 127.5101], "South Korea": [35.9078, 127.7669], "Japan": [36.2048, 138.2529], "India": [20.5937, 78.9629], "Pakistan": [30.3753, 69.3451], "Afghanistan": [33.9391, 67.7100], "Myanmar": [21.9162, 95.9560], "Philippines": [12.8797, 121.7740], "Vietnam": [14.0583, 108.2772], "South China Sea": [12.0000, 113.0000], # Americas "USA": [37.0902, -95.7129], "Mexico": [23.6345, -102.5528], "Venezuela": [6.4238, -66.5897], "Colombia": [4.5709, -74.2973], "Cuba": [21.5218, -77.7812], "Nicaragua": [12.8654, -85.2072], "Haiti": [18.9712, -72.2852], "Brazil": [14.2350, -51.9253], "Argentina": [-38.4161, -63.6167], "Chile": [-35.6751, -71.5430], "Peru": [-9.1900, -75.0152], "Guyana": [4.8604, -58.9302], # Central Asia & Caucasus "Kazakhstan": [48.0196, 66.9237], "Azerbaijan": [40.1431, 47.5769], "Armenia": [40.0691, 45.0382], "Nagorno-Karabakh": [39.8177, 46.7528], "Uzbekistan": [41.3775, 64.5853], "Kyrgyzstan": [41.2044, 74.7661], # Specific Conflict Regions & Strategic Spots "Gaza": [31.3547, 34.3088], "West Bank": [31.9466, 35.3027], "Donbas": [48.0159, 37.8028], "Kashmir": [34.0837, 74.7973], "Sudan": [12.8628, 30.2176], "Darfur": [13.4175, 24.3311], "Tigray": [14.0323, 38.3166], "Somalia": [5.1521, 46.1996], "Suez Canal": [29.9329, 32.5539], "Panama Canal": [9.1012, -79.6967], "Bering Strait": [66.0, -169.0], "Malacca": [2.5, 102.0], # Cities "New York": [40.7128, -74.0060], "London": [51.5074, -0.1278], "Paris": [48.8566, 2.3522], "Brussels": [50.8503, 4.3517], "Geneva": [46.2044, 6.1432], "Vienna": [48.2082, 16.3738], "Istanbul": [41.0082, 28.9784], "Kyiv": [50.4501, 30.5234], "Moscow": [55.7558, 37.6173], "Tehran": [35.6892, 51.3890], "Beijing": [39.9042, 116.4074], "Tokyo": [35.6762, 139.6503], "Seoul": [37.5665, 126.9780], } EXCLUDE_KEYWORDS = [ "sport", "football", "soccer", "la liga", "champions league", "cup", "match", "olympics", "tennis", "nfl", "nba", "score", "goal", "premier league", "formula 1", "f1", "golf", "cricket", "rugby", "boxing", "mma", "celebrity", "oscars", "grammy", "fashion", "movie", "film", "series", "recipe", "weather forecast", "horoscope" ] # Strip HTML tags from RSS descriptions _TAG_RE = re.compile(r'<[^>]+>') def _strip_html(s: str) -> str: return _TAG_RE.sub('', s).strip() def _find_text(el: ET.Element, tag: str) -> str: """Find text for a tag, checking common RSS/Atom namespaces.""" node = el.find(tag) if node is not None and node.text: return node.text.strip() # Try with common namespaces for ns in ['{http://purl.org/dc/elements/1.1/}', '{http://purl.org/rss/1.0/}']: node = el.find(f'{ns}{tag}') if node is not None and node.text: return node.text.strip() return '' async def _fetch_single_feed(client: httpx.AsyncClient, feed_url: str) -> list[dict]: """Fetch a single RSS feed directly and parse XML items.""" articles: list[dict] = [] try: resp = await client.get(feed_url, timeout=12.0, follow_redirects=True) if resp.status_code != 200: return [] root = ET.fromstring(resp.content) # Determine feed title channel = root.find('channel') feed_title = 'Global Intel' if channel is not None: ft = channel.findtext('title') if ft: feed_title = ft.strip() else: # Atom feed ft = root.findtext('{http://www.w3.org/2005/Atom}title') if ft: feed_title = ft.strip() # Find items — RSS uses , Atom uses items = root.findall('.//item') if not items: items = root.findall('.//{http://www.w3.org/2005/Atom}entry') for item in items[:10]: title = _find_text(item, 'title') if not title: # Atom title title = item.findtext('{http://www.w3.org/2005/Atom}title') or '' title = title.strip() if not title: continue if any(kw in title.lower() for kw in EXCLUDE_KEYWORDS): continue # Geocode from keywords lat, lon = None, None for region, coords in GEO_DATA.items(): if re.search(r'\b' + re.escape(region) + r'\b', title, re.IGNORECASE): lat, lon = coords break # Link link = _find_text(item, 'link') if not link: link_el = item.find('{http://www.w3.org/2005/Atom}link') if link_el is not None: link = link_el.get('href', '') # Description desc = _find_text(item, 'description') or _find_text(item, 'summary') if not desc: desc = item.findtext('{http://www.w3.org/2005/Atom}summary') or '' desc = _strip_html(desc)[:200] # Publication date pub_date = ( _find_text(item, 'pubDate') or _find_text(item, 'published') or item.findtext('{http://www.w3.org/2005/Atom}published') or datetime.now(timezone.utc).isoformat() ) # Image from enclosure or media:content image = '' enc = item.find('enclosure') if enc is not None: enc_url = enc.get('url', '') enc_type = enc.get('type', '') if 'image' in enc_type or enc_url.lower().endswith(('.jpg', '.jpeg', '.png', '.gif', '.webp')): image = enc_url if not image: media = item.find('{http://search.yahoo.com/mrss/}content') if media is not None: murl = media.get('url', '') if murl.lower().endswith(('.jpg', '.jpeg', '.png', '.gif', '.webp')): image = murl # Determine Category and Severity title_lower = title.lower() category = "GEOPOLITICS" if any(w in title_lower for w in ["cyber", "hacking", "breach", "malware", "botnet"]): category = "CYBER" elif any(w in title_lower for w in ["satellite", "orbit", "rocket", "launch", "space", "iss"]): category = "SPACE" elif any(w in title_lower for w in ["military", "army", "navy", "airforce", "missile", "strike", "war", "conflict", "nato", "defense"]): category = "MILITARY" severity = "MODERATE" if any(w in title_lower for w in ["attack", "strike", "crisis", "invasion", "nuclear", "killed"]): severity = "HIGH" if any(w in title_lower for w in ["critical", "emergency", "declaration", "imminent"]): severity = "CRITICAL" articles.append({ "title": title, "source": feed_title, "url": link, "image": image or None, "lat": lat, "lon": lon, "summary": desc or "No details available.", "published_at": pub_date, "category": category, "severity": severity }) except Exception as e: print(f"[NEWS] Feed error ({feed_url[:60]}): {e}") return articles async def fetch_news(): """Fetch all RSS feeds in parallel and return combined articles.""" cached = _get_cached_news() if cached is not None: return cached try: async with httpx.AsyncClient( headers={"User-Agent": "GodsEye/2.0 RSS Reader"}, ) as client: results = await asyncio.gather( *[_fetch_single_feed(client, url) for url in FEEDS], return_exceptions=True, ) articles: list[dict] = [] for r in results: if isinstance(r, list): articles.extend(r) print(f"[NEWS] Fetched {len(articles)} intelligence items from {len(FEEDS)} feeds.") _save_cache(articles) return articles except Exception as e: print(f"[NEWS] Critical error: {e}") # fallback to stale cache if error if CACHE_FILE.exists(): try: with open(CACHE_FILE, "r") as f: return json.load(f) except: pass return []