""" Buying Signal Detector — pure-function extractor that turns raw enrichment data (website html, places info, tech stack hits, contact info) into a list of typed buying-signals with confidence + source_url. Signal types: website_form — has /contact /demo form (= takes inbound) whatsapp_button — wa.me link or WhatsApp widget present booking_link — Calendly / direct booking pages pricing_page — /pricing or /baqat exists careers_hiring — /careers /jobs page or hiring text crm_in_use — HubSpot/Salesforce/Zoho/Bitrix snippets payment_mena — Moyasar/Tap/PayTabs/HyperPay snippets ecom_mena — Salla/Zid/Shopify/WooCommerce chat_widget — Intercom/Drift/Crisp/Tawk/WhatsApp ads_pixel — Meta Pixel / GA4 / Google Tag high_review_count — Google Maps reviews_count >= 50 high_rating — rating >= 4.3 with 20+ reviews multi_branch — multiple cities / branches mentioned new_site_or_redirect — recent rebrand signal sector_urgency — sector inherent: real_estate/events/logistics Output: list[BuyingSignal] with type, confidence (0..1), value, source_url. Used by scoring.compute_lead_score (intent_score + urgency_score lift). """ from __future__ import annotations import re from dataclasses import asdict, dataclass, field from typing import Any # ── Sector urgency tiers (always-on signals derived from the sector itself) ─ HIGH_URGENCY_SECTORS = { "real_estate", "real_estate_developer", "events", "logistics", "hospitality", "hotel", "wedding_hall", } MEDIUM_URGENCY_SECTORS = { "restaurant", "cafe", "fitness_gym", "salon_spa", "training_center", "dental_clinic", "medical_clinic", "cosmetic_clinic", } @dataclass class BuyingSignal: type: str value: str confidence: float # 0.0..1.0 source_url: str | None detected_via: str # rule | wappalyzer | google_places | website_crawl def to_dict(self) -> dict[str, Any]: return asdict(self) # Patterns for HTML/markdown body WHATSAPP_PATTERNS = [ re.compile(r"wa\.me/", re.IGNORECASE), re.compile(r"api\.whatsapp\.com/send", re.IGNORECASE), re.compile(r"whatsapp\.com/send\?", re.IGNORECASE), re.compile(r"chat[-_]?on[-_]?whatsapp", re.IGNORECASE), ] BOOKING_PATTERNS = [ re.compile(r"calendly\.com", re.IGNORECASE), re.compile(r"meetings\.hubspot", re.IGNORECASE), re.compile(r"book[-_]?(?:now|appointment|demo)", re.IGNORECASE), re.compile(r"احجز|حجز[\s-]?موعد", re.IGNORECASE), ] PRICING_PATTERNS = [ re.compile(r"/pricing", re.IGNORECASE), re.compile(r"/plans?", re.IGNORECASE), re.compile(r"/baqat", re.IGNORECASE), # باقات re.compile(r"الأسعار|باقات|الباقات", re.IGNORECASE), ] CAREERS_PATTERNS = [ re.compile(r"/careers", re.IGNORECASE), re.compile(r"/jobs", re.IGNORECASE), re.compile(r"الوظائف|توظيف|نبحث عن", re.IGNORECASE), ] FORM_PATTERNS = [ re.compile(r"