import httpx import asyncio import re from datetime import datetime, timezone from difflib import SequenceMatcher # GDELT DOC 2.0 API — free, no key # Focused specifically on military attacks and kinetic events GDELT_URL = ( "https://api.gdeltproject.org/api/v2/doc/doc?" "query=(missile attack OR rocket attack OR airstrike OR air strike OR " "bombing OR drone strike OR artillery fire OR shelling OR warship attack OR " "naval attack OR military strike OR armed attack OR mortar OR explosion site OR " "IED explosion OR sniper OR combat OR military offensive OR invasion force)" "&mode=artlist&format=json&maxrecords=75&sourcelang=english×pan=12h" ) # Geo-coder: keyword → (lat, lon) GEO_DATA = { # Ukraine conflict zones "Ukraine": [48.3794, 31.1656], "Crimea": [44.9521, 34.1024], "Donbas": [48.0159, 37.8028], "Donbass": [48.0159, 37.8028], "Kherson": [46.6354, 32.6169], "Zaporizhzhia": [47.8388, 35.1396], "Kharkiv": [49.9935, 36.2304], "Kyiv": [50.4501, 30.5234], "Kiev": [50.4501, 30.5234], "Odesa": [46.4825, 30.7233], "Mariupol": [47.0958, 37.5483], "Bakhmut": [48.5963, 38.0000], "Avdiivka": [48.1344, 37.7490], # Russia "Russia": [61.5240, 105.3188], "Moscow": [55.7558, 37.6173], "Belarus": [53.7098, 27.9534], # Middle East "Israel": [31.0461, 34.8516], "Gaza": [31.3547, 34.3088], "West Bank": [31.9466, 35.3027], "Rafah": [31.2969, 34.2455], "Jenin": [32.4607, 35.3027], "Lebanon": [33.8547, 35.8623], "Hezbollah": [33.8547, 35.8623], "Syria": [34.8021, 38.9968], "Damascus": [33.5138, 36.2765], "Aleppo": [36.2021, 37.1343], "Iran": [32.4279, 53.6880], "Tehran": [35.6892, 51.3890], "Iraq": [33.2232, 43.6793], "Baghdad": [33.3152, 44.3661], "Yemen": [15.5527, 48.5164], "Houthi": [15.5527, 48.5164], "Saudi Arabia": [23.8859, 45.0792], # Asia-Pacific "China": [35.8617, 104.1954], "Taiwan": [23.6978, 120.9605], "Taipei": [25.0330, 121.5654], "Taiwan Strait": [24.0000, 119.5000], "North Korea": [40.3399, 127.5101], "Pyongyang": [39.0194, 125.7381], "South Korea": [35.9078, 127.7669], "Japan": [36.2048, 138.2529], "Philippines": [12.8797, 121.7740], "South China Sea": [12.0000, 113.0000], "Myanmar": [21.9162, 95.9560], "India": [20.5937, 78.9629], "Pakistan": [30.3753, 69.3451], "Afghanistan": [33.9391, 67.7100], "Kashmir": [34.0837, 74.7973], # Africa "Sudan": [12.8628, 30.2176], "Ethiopia": [9.1450, 40.4897], "Somalia": [5.1521, 46.1996], "Libya": [26.3351, 17.2283], "Mali": [17.5707, -3.9962], "Niger": [17.6078, 8.0817], "Nigeria": [9.0820, 8.6753], "Congo": [-4.0383, 21.7587], "Sahel": [15.4542, 0.0000], "Burkina Faso": [12.2383, -1.5616], "Mozambique": [-18.6657, 35.5296], # Americas "Venezuela": [6.4238, -66.5897], "Colombia": [4.5709, -74.2973], "Mexico": [23.6345, -102.5528], # Strategic waterways "Red Sea": [20.0000, 38.0000], "Strait of Hormuz": [26.5667, 56.2500], "Bab-el-Mandeb": [12.6, 43.5], "Suez Canal": [30.4550, 32.3500], "Black Sea": [43.4000, 34.0000], "Baltic Sea": [58.0000, 20.0000], "East China Sea": [30.0000, 126.0000], "Sea of Azov": [46.0000, 36.5000], "Persian Gulf": [26.0000, 52.0000], # Palestine "Palestine": [31.9522, 35.2332], "Jerusalem": [31.7683, 35.2137], "Tel Aviv": [32.0853, 34.7818], "Nablus": [32.2211, 35.2544], } # Attack type classifier — ordered by priority (most specific first) # Each entry: (search_term, event_label, severity) ATTACK_CLASSIFIER = [ ("ballistic missile", "BALLISTIC MISSILE", "CRITICAL"), ("cruise missile", "CRUISE MISSILE", "CRITICAL"), ("hypersonic missile", "HYPERSONIC MISSILE", "CRITICAL"), ("missile strike", "MISSILE STRIKE", "CRITICAL"), ("missile attack", "MISSILE ATTACK", "CRITICAL"), ("rocket attack", "ROCKET ATTACK", "CRITICAL"), ("rocket barrage", "ROCKET BARRAGE", "CRITICAL"), ("drone strike", "DRONE STRIKE", "CRITICAL"), ("drone attack", "DRONE ATTACK", "CRITICAL"), ("airstrike", "AIRSTRIKE", "CRITICAL"), ("air strike", "AIRSTRIKE", "CRITICAL"), ("air raid", "AIR RAID", "CRITICAL"), ("bombing", "BOMBING", "CRITICAL"), ("bomb", "EXPLOSION", "HIGH"), ("shelling", "ARTILLERY SHELLING", "HIGH"), ("artillery", "ARTILLERY FIRE", "HIGH"), ("mortar", "MORTAR ATTACK", "HIGH"), ("ied explosion", "IED EXPLOSION", "HIGH"), ("explosion", "EXPLOSION", "HIGH"), ("sniper", "SNIPER ACTIVITY", "MODERATE"), ("naval attack", "NAVAL ATTACK", "HIGH"), ("warship", "NAVAL ACTIVITY", "MODERATE"), ("invasion", "INVASION", "CRITICAL"), ("offensive", "MILITARY OFFENSIVE", "HIGH"), ("combat", "COMBAT", "HIGH"), ("attack", "ATTACK", "HIGH"), ("strike", "STRIKE", "HIGH"), ("troops", "GROUND FORCES", "MODERATE"), ("military", "MILITARY ACTIVITY", "MODERATE"), ] SEVERITY_ORDER = {"CRITICAL": 0, "HIGH": 1, "MODERATE": 2} EXCLUDE_KEYWORDS = [ "sport", "football", "soccer", "la liga", "cup", "olympics", "tennis", "nfl", "nba", "golf", "cricket", "rugby", "celebrity", "fashion", "movie", "film", "oscars", "grammy", "recipe", "horoscope", "box office", "netflix", "rocket launch", "spacex", "starship", "nasa launch", # civilian launches ] def classify_event(title: str) -> tuple[str, str]: """Returns (event_label, severity) for a conflict title.""" tl = title.lower() for term, label, severity in ATTACK_CLASSIFIER: if term in tl: return label, severity return "MILITARY EVENT", "MODERATE" def _title_similarity(a: str, b: str) -> float: return SequenceMatcher(None, a.lower(), b.lower()).ratio() def _parse_gdelt_date(seendate: str) -> str: try: dt = datetime.strptime(seendate, "%Y%m%dT%H%M%SZ") return dt.replace(tzinfo=timezone.utc).isoformat() except Exception: return datetime.now(timezone.utc).isoformat() async def fetch_conflicts() -> list: """ Fetch military attack/conflict events from GDELT DOC API. Returns events classified by attack type and severity. """ try: async with httpx.AsyncClient() as client: # Retry with backoff for 429 rate limits for attempt in range(3): if attempt > 0: await asyncio.sleep(6 * attempt) response = await client.get(GDELT_URL, timeout=12.0) if response.status_code == 429: print(f"[CONFLICTS] GDELT rate limited, retry {attempt+1}/3") continue break if response.status_code != 200: print(f"[CONFLICTS] GDELT returned {response.status_code}") return [] try: data = response.json() if response.text.strip() else {} except Exception: print(f"[CONFLICTS] GDELT returned non-JSON body") return [] raw_articles = data.get("articles", []) if not raw_articles: print("[CONFLICTS] No articles in GDELT response") return [] events = [] for article in raw_articles: title = (article.get("title") or "").strip() if not title: continue tl = title.lower() # Filter out non-conflict content if any(kw in tl for kw in EXCLUDE_KEYWORDS): continue # Geocode lat, lon = None, None for region, coords in GEO_DATA.items(): if re.search(r'\b' + re.escape(region) + r'\b', title, re.IGNORECASE): lat, lon = coords break if lat is None or lon is None: continue if not (-90 <= lat <= 90) or not (-180 <= lon <= 180): continue # Deduplicate by title similarity is_duplicate = any( _title_similarity(title, ev["title"]) > 0.75 for ev in events ) if is_duplicate: continue event_label, severity = classify_event(title) seendate = article.get("seendate", "") published = _parse_gdelt_date(seendate) if seendate else datetime.now(timezone.utc).isoformat() events.append({ "title": title, "url": article.get("url", ""), "image": article.get("urlMobileImage", ""), "source": article.get("domain", "Unknown"), "domain": article.get("sourcecountry", "Unknown"), "lat": lat, "lon": lon, "published_at": published, "event_type": event_label, "severity": severity, "type": "conflict", }) # Sort: CRITICAL first, then by date events.sort(key=lambda e: ( SEVERITY_ORDER.get(e["severity"], 3), e["published_at"] ), reverse=False) events.sort(key=lambda e: SEVERITY_ORDER.get(e["severity"], 3)) events = events[:40] by_sev = {} for e in events: s = e["severity"] by_sev[s] = by_sev.get(s, 0) + 1 print(f"[CONFLICTS] {len(events)} military events — {by_sev} (from {len(raw_articles)} raw)") return events except Exception as e: print(f"[CONFLICTS] Fetch error: {e}") return [] if __name__ == "__main__": result = asyncio.run(fetch_conflicts()) print(f"\nMilitary events: {len(result)}") for ev in result[:15]: print(f" [{ev['severity']:8s}] [{ev['event_type']:22s}] {ev['title'][:70]}")