"""Trend analyzer -- RSS feeds + LLM to identify relevant trending topics.""" from __future__ import annotations import json import logging from typing import Any from xml.etree import ElementTree import httpx logger = logging.getLogger(__name__) # --------------------------------------------------------------------------- # RSS feeds relevant to Sami's brand pillars # --------------------------------------------------------------------------- _RSS_FEEDS: list[dict[str, str]] = [ { "name": "Aviation Security International", "url": "https://www.asi-mag.com/feed/", "category": "aviation_security", }, { "name": "Airport Technology", "url": "https://www.airport-technology.com/feed/", "category": "airport_tech", }, { "name": "Security Today", "url": "https://securitytoday.com/rss-feeds/news.aspx", "category": "security_industry", }, { "name": "GACA News (Saudi)", "url": "https://gaca.gov.sa/web/en/rss", "category": "gaca", }, { "name": "ICAO Newsroom", "url": "https://www.icao.int/Newsroom/Pages/RSS.aspx", "category": "icao", }, ] # System prompt for the trend-analysis LLM call _SYSTEM_PROMPT = """\ You are a content strategist for a Field Services Engineer specializing in airport security equipment (Smiths Detection). Analyze the provided news headlines and identify trending topics that are relevant for LinkedIn content creation. For each trend, return a JSON array of objects with: - "topic": concise topic title - "relevance": "high" | "medium" | "low" - "pillar": one of "tech_insights", "field_life", "professional_growth", "industry_news" - "angle": a brief suggestion for how to turn this into engaging LinkedIn content - "source": the feed or keyword that surfaced it Return ONLY a valid JSON array. Limit to the top 10 most relevant trends. """ async def analyze_trends( llm_client: Any, keywords: list[str], brand_profile: dict, ) -> list[dict]: """Scan RSS feeds and use the LLM to identify relevant trending topics. Parameters ---------- llm_client: An :class:`LLMClient` instance. keywords: Search terms aligned with the brand pillars. brand_profile: Parsed ``brand_profile.yaml`` dict. Returns ------- list[dict] Each dict contains ``topic``, ``relevance``, ``pillar``, ``angle``, ``source``. """ # Step 1: Fetch RSS headlines headlines = await _fetch_rss_headlines() # Step 2: Build LLM prompt personal = brand_profile.get("personal", {}) user_prompt = f"""\ Professional context: - Name: {personal.get('name_en', '')} - Role: {personal.get('title_en', '')} - Specialization: Smiths Detection airport security equipment (HI-SCAN, IONSCAN 600, CTX) - Keywords of interest: {', '.join(keywords)} Recent industry headlines: {_format_headlines(headlines)} Identify the top trending topics relevant to this professional's LinkedIn brand. Return ONLY a valid JSON array. """ response = await llm_client.generate( prompt=user_prompt, system_prompt=_SYSTEM_PROMPT, temperature=0.5, max_tokens=2000, ) trends = _parse_trends_response(response.text) logger.info("Identified %d trends from %d headlines", len(trends), len(headlines)) return trends # --------------------------------------------------------------------------- # RSS fetching # --------------------------------------------------------------------------- async def _fetch_rss_headlines(timeout: float = 15.0) -> list[dict]: """Fetch headlines from all configured RSS feeds. Returns a list of dicts with ``title``, ``link``, ``source``, ``published``. Feeds that fail to load are silently skipped. """ headlines: list[dict] = [] async with httpx.AsyncClient(timeout=timeout, follow_redirects=True) as client: for feed in _RSS_FEEDS: try: resp = await client.get(feed["url"]) resp.raise_for_status() items = _parse_rss_xml(resp.text, source=feed["name"]) headlines.extend(items) logger.debug("Fetched %d items from %s", len(items), feed["name"]) except Exception as exc: logger.warning("RSS fetch failed for %s: %s", feed["name"], exc) return headlines def _parse_rss_xml(xml_text: str, source: str) -> list[dict]: """Parse RSS/Atom XML and extract headline items.""" items: list[dict] = [] try: root = ElementTree.fromstring(xml_text) except ElementTree.ParseError: logger.warning("Failed to parse XML from %s", source) return items # Standard RSS 2.0 for item in root.iter("item"): title_el = item.find("title") link_el = item.find("link") pub_el = item.find("pubDate") if title_el is not None and title_el.text: items.append( { "title": title_el.text.strip(), "link": link_el.text.strip() if link_el is not None and link_el.text else "", "source": source, "published": pub_el.text.strip() if pub_el is not None and pub_el.text else "", } ) # Atom feeds (namespace-aware) atom_ns = "{http://www.w3.org/2005/Atom}" for entry in root.iter(f"{atom_ns}entry"): title_el = entry.find(f"{atom_ns}title") link_el = entry.find(f"{atom_ns}link") pub_el = entry.find(f"{atom_ns}published") or entry.find(f"{atom_ns}updated") if title_el is not None and title_el.text: link_href = "" if link_el is not None: link_href = link_el.get("href", link_el.text or "") items.append( { "title": title_el.text.strip(), "link": link_href.strip() if link_href else "", "source": source, "published": pub_el.text.strip() if pub_el is not None and pub_el.text else "", } ) return items[:20] # Cap per feed to keep prompt manageable # --------------------------------------------------------------------------- # Formatting & parsing # --------------------------------------------------------------------------- def _format_headlines(headlines: list[dict]) -> str: """Format headlines into a numbered list for the LLM prompt.""" if not headlines: return "(No headlines fetched -- generate trends based on domain knowledge.)" lines: list[str] = [] for i, h in enumerate(headlines[:50], start=1): # Cap at 50 total lines.append(f"{i}. [{h['source']}] {h['title']}") return "\n".join(lines) def _parse_trends_response(text: str) -> list[dict]: """Extract a JSON array of trends from the LLM response.""" cleaned = text.strip() # Strip markdown code fences if cleaned.startswith("```"): first_newline = cleaned.index("\n") cleaned = cleaned[first_newline + 1 :] if cleaned.endswith("```"): cleaned = cleaned[: -len("```")].rstrip() try: parsed = json.loads(cleaned) if isinstance(parsed, list): return parsed # Some models wrap in an object if isinstance(parsed, dict) and "trends" in parsed: return parsed["trends"] return [parsed] except json.JSONDecodeError: logger.warning("Failed to parse trend analysis JSON; returning empty list") return []