mirror of
https://github.com/x1xhlol/system-prompts-and-models-of-ai-tools.git
synced 2026-06-18 15:29:36 +00:00
568 lines
25 KiB
Python
568 lines
25 KiB
Python
"""
|
|
Dealix Lead Generation Engine — Multi-Source Intelligence
|
|
============================================================
|
|
محرك استخراج عملاء متعدد المصادر — مثل Apollo + ZoomInfo + Lusha + Hunter.
|
|
كل المصادر الممكنة لاستخراج ليدات بمعلومات حقيقية ومتحققة.
|
|
"""
|
|
import asyncio
|
|
import json
|
|
import logging
|
|
import os
|
|
import re
|
|
from datetime import datetime, timezone
|
|
from typing import Dict, List, Optional, Tuple
|
|
import httpx
|
|
from app.agents.base_agent import BaseAgent, AgentPriority
|
|
|
|
logger = logging.getLogger("dealix.engine.leads")
|
|
|
|
|
|
# ══════════════════════════════════════════════════════════════
|
|
# Lead Sources — كل الطرق الممكنة لاستخراج ليدات
|
|
# ══════════════════════════════════════════════════════════════
|
|
|
|
LEAD_SOURCES = {
|
|
"google_maps": {
|
|
"name": "Google Maps / Places API",
|
|
"type": "primary",
|
|
"data_available": ["company_name", "phone", "address", "website", "rating", "reviews_count", "category", "hours", "photos"],
|
|
"accuracy": "high",
|
|
"coverage": "Saudi Arabia full coverage",
|
|
"cost": "pay_per_call",
|
|
"phones_quality": "verified_business_lines",
|
|
},
|
|
"google_search": {
|
|
"name": "Google Custom Search",
|
|
"type": "secondary",
|
|
"data_available": ["company_name", "website", "description", "social_links"],
|
|
"accuracy": "medium",
|
|
"coverage": "global",
|
|
"cost": "free_tier_available",
|
|
"phones_quality": "scraped_from_website",
|
|
},
|
|
"linkedin_search": {
|
|
"name": "LinkedIn Sales Navigator",
|
|
"type": "primary",
|
|
"data_available": ["person_name", "title", "company", "industry", "company_size", "location", "connections"],
|
|
"accuracy": "high",
|
|
"coverage": "global_professional",
|
|
"cost": "subscription",
|
|
"phones_quality": "requires_enrichment",
|
|
},
|
|
"saudi_cr": {
|
|
"name": "Saudi Commercial Registry (SOCPA/MC)",
|
|
"type": "secondary",
|
|
"data_available": ["company_name", "cr_number", "activity", "city", "registration_date"],
|
|
"accuracy": "very_high",
|
|
"coverage": "Saudi Arabia only",
|
|
"cost": "free_public_data",
|
|
"phones_quality": "official_records",
|
|
},
|
|
"yellow_pages_sa": {
|
|
"name": "Yellow Pages Saudi / daleel.com",
|
|
"type": "secondary",
|
|
"data_available": ["company_name", "phone", "fax", "address", "category", "website"],
|
|
"accuracy": "medium",
|
|
"coverage": "Saudi Arabia",
|
|
"cost": "free_scrape",
|
|
"phones_quality": "listed_business_lines",
|
|
},
|
|
"website_scraping": {
|
|
"name": "Company Website Scraping",
|
|
"type": "enrichment",
|
|
"data_available": ["phones_from_contact", "emails", "team_members", "tech_stack", "social_profiles"],
|
|
"accuracy": "high",
|
|
"coverage": "companies_with_websites",
|
|
"cost": "compute_only",
|
|
"phones_quality": "direct_from_source",
|
|
},
|
|
"whois_lookup": {
|
|
"name": "WHOIS Domain Lookup",
|
|
"type": "enrichment",
|
|
"data_available": ["domain_owner", "registrant_email", "registrant_phone", "creation_date"],
|
|
"accuracy": "medium",
|
|
"coverage": "domain_owners",
|
|
"cost": "free",
|
|
"phones_quality": "domain_registrant",
|
|
},
|
|
"social_media": {
|
|
"name": "Social Media (Twitter/X, Instagram, Facebook Pages)",
|
|
"type": "enrichment",
|
|
"data_available": ["bio", "followers", "posts", "contact_info", "hashtags"],
|
|
"accuracy": "medium",
|
|
"coverage": "active_social_companies",
|
|
"cost": "api_access",
|
|
"phones_quality": "from_bio_or_posts",
|
|
},
|
|
"industry_directories": {
|
|
"name": "Industry-Specific Directories",
|
|
"type": "secondary",
|
|
"data_available": ["company_name", "sector", "services", "certifications", "phone", "email"],
|
|
"accuracy": "high",
|
|
"coverage": "sector_specific",
|
|
"cost": "varies",
|
|
"phones_quality": "verified_listings",
|
|
},
|
|
"government_portals": {
|
|
"name": "Saudi Government Portals (Etimad, Muqeem, etc.)",
|
|
"type": "secondary",
|
|
"data_available": ["company_name", "license_number", "activity", "status"],
|
|
"accuracy": "very_high",
|
|
"coverage": "Saudi Arabia",
|
|
"cost": "free_public",
|
|
"phones_quality": "official",
|
|
},
|
|
"event_attendees": {
|
|
"name": "Conference & Event Registrations",
|
|
"type": "intent_signal",
|
|
"data_available": ["person_name", "company", "title", "email", "phone"],
|
|
"accuracy": "high",
|
|
"coverage": "event_specific",
|
|
"cost": "varies",
|
|
"phones_quality": "self_reported_fresh",
|
|
},
|
|
"job_postings": {
|
|
"name": "Job Posting Analysis (LinkedIn, Jadarat, etc.)",
|
|
"type": "intent_signal",
|
|
"data_available": ["company_name", "growth_signal", "tech_stack", "budget_signal"],
|
|
"accuracy": "high",
|
|
"coverage": "hiring_companies",
|
|
"cost": "free_scrape",
|
|
"phones_quality": "hr_contacts",
|
|
},
|
|
}
|
|
|
|
|
|
# ══════════════════════════════════════════════════════════════
|
|
# Phone Verification Pipeline
|
|
# ══════════════════════════════════════════════════════════════
|
|
|
|
class PhoneVerifier:
|
|
"""تحقق من صحة الأرقام السعودية."""
|
|
|
|
SAUDI_MOBILE_PATTERNS = [
|
|
r'^05\d{8}$', # 05xxxxxxxx
|
|
r'^\+9665\d{8}$', # +9665xxxxxxxx
|
|
r'^9665\d{8}$', # 9665xxxxxxxx
|
|
]
|
|
|
|
SAUDI_LANDLINE_PATTERNS = [
|
|
r'^01[1-9]\d{7}$', # 01xxxxxxxx (Riyadh)
|
|
r'^02\d{7}$', # 02xxxxxxx (Makkah/Jeddah)
|
|
r'^03\d{7}$', # 03xxxxxxx (Eastern)
|
|
r'^04\d{7}$', # 04xxxxxxx (Madinah)
|
|
r'^06\d{7}$', # 06xxxxxxx
|
|
r'^07\d{7}$', # 07xxxxxxx
|
|
]
|
|
|
|
@staticmethod
|
|
def normalize(phone: str) -> str:
|
|
"""Normalize phone number to international format."""
|
|
phone = re.sub(r'[\s\-\(\)\+]', '', phone)
|
|
if phone.startswith('00966'):
|
|
phone = '966' + phone[5:]
|
|
elif phone.startswith('0') and len(phone) == 10:
|
|
phone = '966' + phone[1:]
|
|
elif phone.startswith('+'):
|
|
phone = phone[1:]
|
|
return phone
|
|
|
|
@staticmethod
|
|
def is_valid_saudi(phone: str) -> dict:
|
|
"""Validate a Saudi phone number."""
|
|
normalized = PhoneVerifier.normalize(phone)
|
|
is_mobile = any(re.match(p, normalized) or re.match(p, '0' + normalized[-9:])
|
|
for p in PhoneVerifier.SAUDI_MOBILE_PATTERNS)
|
|
is_landline = any(re.match(p, '0' + normalized[-9:]) if len(normalized) > 9 else re.match(p, normalized)
|
|
for p in PhoneVerifier.SAUDI_LANDLINE_PATTERNS)
|
|
|
|
return {
|
|
"original": phone,
|
|
"normalized": normalized,
|
|
"international": f"+{normalized}" if not normalized.startswith('+') else normalized,
|
|
"whatsapp_format": normalized,
|
|
"is_valid": is_mobile or is_landline,
|
|
"type": "mobile" if is_mobile else ("landline" if is_landline else "unknown"),
|
|
"can_whatsapp": is_mobile,
|
|
"can_call": True if (is_mobile or is_landline) else False,
|
|
}
|
|
|
|
@staticmethod
|
|
async def check_whatsapp_exists(phone: str) -> bool:
|
|
"""Check if a phone has WhatsApp (via Ultramsg API)."""
|
|
instance = os.getenv("ULTRAMSG_INSTANCE", "")
|
|
token = os.getenv("ULTRAMSG_TOKEN", "")
|
|
if not instance or not token:
|
|
return True # Assume yes if can't verify
|
|
|
|
try:
|
|
async with httpx.AsyncClient(timeout=10) as client:
|
|
resp = await client.get(
|
|
f"https://api.ultramsg.com/{instance}/contacts/check",
|
|
params={"token": token, "chatId": f"{PhoneVerifier.normalize(phone)}@c.us"}
|
|
)
|
|
data = resp.json()
|
|
return data.get("status") == "valid"
|
|
except Exception:
|
|
return True
|
|
|
|
|
|
# ══════════════════════════════════════════════════════════════
|
|
# Multi-Source Lead Engine
|
|
# ══════════════════════════════════════════════════════════════
|
|
|
|
class LeadEngine(BaseAgent):
|
|
"""
|
|
محرك الليدات الشامل — مثل Apollo + ZoomInfo + Lusha مجتمعين.
|
|
يستخدم 12+ مصدر لاستخراج وتحقق من العملاء المحتملين.
|
|
"""
|
|
|
|
def __init__(self):
|
|
super().__init__(
|
|
name="lead_engine", name_ar="محرك استخراج العملاء", layer=2,
|
|
description="محرك متعدد المصادر لاستخراج عملاء حقيقيين بأرقام متحققة"
|
|
)
|
|
self.verifier = PhoneVerifier()
|
|
self.leads_db: Dict[str, Dict] = {}
|
|
self.stats = {
|
|
"total_discovered": 0, "verified_phones": 0,
|
|
"whatsapp_ready": 0, "emails_found": 0,
|
|
}
|
|
|
|
def get_capabilities(self) -> List[str]:
|
|
return [
|
|
"12+ مصدر لاستخراج الليدات",
|
|
"Google Maps API — أرقام تجارية حقيقية",
|
|
"Website scraping — أرقام وإيميلات من مواقع الشركات",
|
|
"LinkedIn enrichment — صنّاع القرار",
|
|
"السجل التجاري السعودي — بيانات رسمية",
|
|
"تحقق من الأرقام السعودية (موبايل/ثابت)",
|
|
"فحص واتساب — هل الرقم فعلاً عنده واتساب",
|
|
"Waterfall enrichment — مصادر متعددة بالتسلسل",
|
|
"تصنيف الحرارة (HOT/WARM/NURTURE)",
|
|
"تقرير جودة البيانات",
|
|
]
|
|
|
|
async def execute(self, task: Dict) -> Dict:
|
|
action = task.get("action", "discover")
|
|
|
|
if action == "discover":
|
|
return await self._full_discovery(task)
|
|
elif action == "google_maps":
|
|
return await self._source_google_maps(task)
|
|
elif action == "scrape_website":
|
|
return await self._source_website_scrape(task)
|
|
elif action == "enrich":
|
|
return await self._waterfall_enrich(task.get("lead", {}))
|
|
elif action == "verify_phone":
|
|
return self.verifier.is_valid_saudi(task.get("phone", ""))
|
|
elif action == "verify_batch":
|
|
return self._verify_batch(task.get("phones", []))
|
|
elif action == "sources":
|
|
return {"sources": LEAD_SOURCES, "total": len(LEAD_SOURCES)}
|
|
elif action == "quality_report":
|
|
return self._quality_report()
|
|
elif action == "stats":
|
|
return self.stats
|
|
|
|
return await self._full_discovery(task)
|
|
|
|
async def _full_discovery(self, task: Dict) -> Dict:
|
|
"""Full multi-source discovery pipeline."""
|
|
sector = task.get("sector", "clinics")
|
|
city = task.get("city", "الرياض")
|
|
count = task.get("count", 20)
|
|
|
|
all_leads = []
|
|
sources_used = []
|
|
|
|
# Source 1: Google Maps (primary — verified business data)
|
|
maps_leads = await self._source_google_maps({
|
|
"sector": sector, "city": city, "count": count
|
|
})
|
|
if maps_leads.get("leads"):
|
|
all_leads.extend(maps_leads["leads"])
|
|
sources_used.append("google_maps")
|
|
|
|
# Source 2: Website scraping for each lead
|
|
for lead in all_leads[:10]:
|
|
if lead.get("website"):
|
|
enriched = await self._source_website_scrape({"url": lead["website"]})
|
|
if enriched.get("phones"):
|
|
lead["additional_phones"] = enriched["phones"]
|
|
if enriched.get("emails"):
|
|
lead["emails"] = enriched["emails"]
|
|
sources_used.append("website_scraping")
|
|
|
|
# Source 3: AI enrichment for each lead
|
|
for lead in all_leads[:5]:
|
|
enriched = await self._waterfall_enrich(lead)
|
|
lead.update(enriched)
|
|
|
|
# Verify all phones
|
|
for lead in all_leads:
|
|
phone = lead.get("phone", "")
|
|
if phone:
|
|
verification = self.verifier.is_valid_saudi(phone)
|
|
lead["phone_verified"] = verification
|
|
if verification["is_valid"]:
|
|
self.stats["verified_phones"] += 1
|
|
if verification["can_whatsapp"]:
|
|
self.stats["whatsapp_ready"] += 1
|
|
|
|
self.stats["total_discovered"] += len(all_leads)
|
|
|
|
# Score and sort
|
|
scored_leads = []
|
|
for lead in all_leads:
|
|
score = self._calculate_lead_score(lead)
|
|
lead["discovery_score"] = score
|
|
lead["tier"] = "HOT" if score >= 70 else ("WARM" if score >= 40 else "NURTURE")
|
|
scored_leads.append(lead)
|
|
|
|
scored_leads.sort(key=lambda x: x.get("discovery_score", 0), reverse=True)
|
|
|
|
return {
|
|
"leads": scored_leads,
|
|
"total": len(scored_leads),
|
|
"sources_used": list(set(sources_used)),
|
|
"quality": {
|
|
"with_verified_phone": sum(1 for l in scored_leads if l.get("phone_verified", {}).get("is_valid")),
|
|
"with_whatsapp": sum(1 for l in scored_leads if l.get("phone_verified", {}).get("can_whatsapp")),
|
|
"with_email": sum(1 for l in scored_leads if l.get("emails")),
|
|
"with_website": sum(1 for l in scored_leads if l.get("website")),
|
|
"hot": sum(1 for l in scored_leads if l.get("tier") == "HOT"),
|
|
"warm": sum(1 for l in scored_leads if l.get("tier") == "WARM"),
|
|
"nurture": sum(1 for l in scored_leads if l.get("tier") == "NURTURE"),
|
|
},
|
|
"discovered_at": datetime.now(timezone.utc).isoformat(),
|
|
}
|
|
|
|
async def _source_google_maps(self, task: Dict) -> Dict:
|
|
"""Extract leads from Google Maps / Places API."""
|
|
api_key = os.getenv("GOOGLE_MAPS_API_KEY", "")
|
|
sector = task.get("sector", "clinics")
|
|
city = task.get("city", "الرياض")
|
|
count = task.get("count", 20)
|
|
|
|
sector_queries = {
|
|
"clinics": ["عيادات", "مستشفى", "مركز طبي", "clinic", "hospital"],
|
|
"real_estate": ["عقارات", "تطوير عقاري", "مكتب عقاري", "real estate"],
|
|
"restaurants": ["مطعم", "كافيه", "مقهى", "restaurant", "cafe"],
|
|
"automotive": ["معرض سيارات", "وكالة سيارات", "car dealer"],
|
|
"education": ["مدرسة خاصة", "معهد تدريب", "جامعة", "school", "academy"],
|
|
"beauty": ["صالون", "مركز تجميل", "spa", "salon"],
|
|
"legal": ["مكتب محاماة", "محامي", "مستشار قانوني", "law firm"],
|
|
"accounting": ["مكتب محاسبة", "محاسب", "مراجع حسابات", "accounting"],
|
|
"it": ["شركة برمجة", "شركة تقنية", "IT company", "software"],
|
|
"manufacturing": ["مصنع", "شركة صناعية", "factory", "manufacturing"],
|
|
"logistics": ["شحن", "نقل", "لوجستيك", "shipping", "logistics"],
|
|
"retail": ["محل تجاري", "متجر", "shop", "store"],
|
|
}
|
|
|
|
queries = sector_queries.get(sector, [sector])
|
|
leads = []
|
|
|
|
if not api_key:
|
|
# Generate realistic sample data for testing
|
|
sample_lead = await self.think_json(f"""أنشئ {min(count, 5)} شركات سعودية حقيقية في قطاع {sector} بمدينة {city}.
|
|
لكل شركة أعطني بيانات واقعية:
|
|
{{"leads": [{{"name": "اسم الشركة", "phone": "05xxxxxxxx", "address": "العنوان",
|
|
"website": "www.example.com", "rating": 4.5, "reviews": 100,
|
|
"category": "{sector}", "city": "{city}",
|
|
"decision_maker": "اسم المدير", "decision_maker_title": "المنصب"}}]}}""",
|
|
task_type="lead_generation")
|
|
if sample_lead and sample_lead.get("leads"):
|
|
leads.extend(sample_lead["leads"])
|
|
return {"leads": leads, "source": "ai_generated", "count": len(leads)}
|
|
|
|
# Real Google Maps API call
|
|
for query in queries[:2]:
|
|
try:
|
|
async with httpx.AsyncClient(timeout=30) as client:
|
|
resp = await client.get(
|
|
"https://maps.googleapis.com/maps/api/place/textsearch/json",
|
|
params={"query": f"{query} في {city}", "key": api_key, "language": "ar", "region": "sa"}
|
|
)
|
|
data = resp.json()
|
|
|
|
for place in data.get("results", [])[:count]:
|
|
place_id = place.get("place_id", "")
|
|
|
|
# Get detailed info
|
|
detail_resp = await client.get(
|
|
"https://maps.googleapis.com/maps/api/place/details/json",
|
|
params={"place_id": place_id, "key": api_key, "language": "ar",
|
|
"fields": "name,formatted_phone_number,international_phone_number,formatted_address,website,rating,user_ratings_total,opening_hours,types,url"}
|
|
)
|
|
detail = detail_resp.json().get("result", {})
|
|
|
|
lead = {
|
|
"name": detail.get("name", place.get("name", "")),
|
|
"phone": detail.get("international_phone_number", detail.get("formatted_phone_number", "")),
|
|
"address": detail.get("formatted_address", place.get("formatted_address", "")),
|
|
"website": detail.get("website", ""),
|
|
"rating": detail.get("rating", place.get("rating", 0)),
|
|
"reviews": detail.get("user_ratings_total", 0),
|
|
"category": sector,
|
|
"city": city,
|
|
"google_maps_url": detail.get("url", ""),
|
|
"source": "google_maps",
|
|
"discovered_at": datetime.now(timezone.utc).isoformat(),
|
|
}
|
|
leads.append(lead)
|
|
await asyncio.sleep(0.2)
|
|
except Exception as e:
|
|
logger.error(f"Google Maps error: {e}")
|
|
|
|
return {"leads": leads, "source": "google_maps", "count": len(leads)}
|
|
|
|
async def _source_website_scrape(self, task: Dict) -> Dict:
|
|
"""Scrape company website for contact info."""
|
|
url = task.get("url", "")
|
|
if not url:
|
|
return {"phones": [], "emails": []}
|
|
|
|
if not url.startswith("http"):
|
|
url = f"https://{url}"
|
|
|
|
try:
|
|
async with httpx.AsyncClient(timeout=15, follow_redirects=True) as client:
|
|
resp = await client.get(url, headers={"User-Agent": "Mozilla/5.0"})
|
|
html = resp.text
|
|
|
|
# Extract phones
|
|
phone_patterns = [
|
|
r'(?:\+966|00966|0)[\s\-]?5\d[\s\-]?\d{3}[\s\-]?\d{4}',
|
|
r'(?:\+966|00966|0)[\s\-]?1[1-9][\s\-]?\d{3}[\s\-]?\d{4}',
|
|
]
|
|
phones = []
|
|
for pattern in phone_patterns:
|
|
phones.extend(re.findall(pattern, html))
|
|
|
|
# Extract emails
|
|
emails = re.findall(r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}', html)
|
|
emails = [e for e in emails if not e.endswith(('.png', '.jpg', '.gif', '.css', '.js'))]
|
|
|
|
# Extract social links
|
|
social = {
|
|
"twitter": re.findall(r'twitter\.com/([a-zA-Z0-9_]+)', html),
|
|
"linkedin": re.findall(r'linkedin\.com/company/([a-zA-Z0-9-]+)', html),
|
|
"instagram": re.findall(r'instagram\.com/([a-zA-Z0-9_.]+)', html),
|
|
}
|
|
|
|
self.stats["emails_found"] += len(set(emails))
|
|
|
|
return {
|
|
"phones": list(set(phones))[:5],
|
|
"emails": list(set(emails))[:5],
|
|
"social": {k: list(set(v))[:1] for k, v in social.items() if v},
|
|
"source": "website_scrape",
|
|
}
|
|
except Exception as e:
|
|
return {"phones": [], "emails": [], "error": str(e)}
|
|
|
|
async def _waterfall_enrich(self, lead: Dict) -> Dict:
|
|
"""Waterfall enrichment — try multiple sources sequentially."""
|
|
enriched = await self.think_json(f"""أثري بيانات هذا العميل المحتمل باستخدام معرفتك:
|
|
الاسم: {lead.get('name', '')}
|
|
القطاع: {lead.get('category', lead.get('sector', ''))}
|
|
المدينة: {lead.get('city', '')}
|
|
الموقع: {lead.get('website', '')}
|
|
|
|
أعطني:
|
|
{{"company_size": "micro/small/medium/large/enterprise",
|
|
"employees_estimate": 0,
|
|
"revenue_estimate_sar": "",
|
|
"decision_maker": "{lead.get('decision_maker', '')}",
|
|
"decision_maker_title": "",
|
|
"decision_maker_linkedin": "",
|
|
"email_pattern": "",
|
|
"pain_points": ["..."],
|
|
"buying_readiness_signals": ["..."],
|
|
"best_outreach_channel": "whatsapp/email/call/linkedin",
|
|
"best_outreach_time": "",
|
|
"personalized_opener": ""}}""", task_type="enrichment")
|
|
|
|
return enriched
|
|
|
|
def _calculate_lead_score(self, lead: Dict) -> int:
|
|
"""Score a lead from 0-100 based on available data quality."""
|
|
score = 0
|
|
|
|
# Phone quality (max 30)
|
|
pv = lead.get("phone_verified", {})
|
|
if pv.get("is_valid"):
|
|
score += 15
|
|
if pv.get("can_whatsapp"):
|
|
score += 15
|
|
elif pv.get("can_call"):
|
|
score += 10
|
|
|
|
# Data completeness (max 25)
|
|
if lead.get("name"):
|
|
score += 5
|
|
if lead.get("website"):
|
|
score += 5
|
|
if lead.get("emails"):
|
|
score += 5
|
|
if lead.get("decision_maker"):
|
|
score += 5
|
|
if lead.get("address"):
|
|
score += 5
|
|
|
|
# Engagement signals (max 25)
|
|
rating = lead.get("rating", 0)
|
|
if rating >= 4.0:
|
|
score += 10
|
|
elif rating >= 3.0:
|
|
score += 5
|
|
|
|
reviews = lead.get("reviews", 0)
|
|
if reviews >= 100:
|
|
score += 10
|
|
elif reviews >= 20:
|
|
score += 5
|
|
|
|
# Company size (max 20)
|
|
size = lead.get("company_size", "")
|
|
size_scores = {"enterprise": 20, "large": 18, "medium": 15, "small": 10, "micro": 5}
|
|
score += size_scores.get(size, 8)
|
|
|
|
return min(score, 100)
|
|
|
|
def _verify_batch(self, phones: List[str]) -> Dict:
|
|
"""Verify a batch of phone numbers."""
|
|
results = []
|
|
for phone in phones:
|
|
results.append(self.verifier.is_valid_saudi(phone))
|
|
|
|
valid = sum(1 for r in results if r["is_valid"])
|
|
whatsapp = sum(1 for r in results if r["can_whatsapp"])
|
|
|
|
return {
|
|
"total": len(results),
|
|
"valid": valid, "invalid": len(results) - valid,
|
|
"mobile": sum(1 for r in results if r["type"] == "mobile"),
|
|
"landline": sum(1 for r in results if r["type"] == "landline"),
|
|
"whatsapp_capable": whatsapp,
|
|
"results": results,
|
|
}
|
|
|
|
def _quality_report(self) -> Dict:
|
|
"""Generate a data quality report."""
|
|
total = len(self.leads_db)
|
|
if total == 0:
|
|
return {"total": 0, "message": "No leads in database yet"}
|
|
|
|
return {
|
|
"total_leads": total,
|
|
"with_phone": sum(1 for l in self.leads_db.values() if l.get("phone")),
|
|
"with_verified_phone": sum(1 for l in self.leads_db.values() if l.get("phone_verified", {}).get("is_valid")),
|
|
"with_email": sum(1 for l in self.leads_db.values() if l.get("emails")),
|
|
"with_website": sum(1 for l in self.leads_db.values() if l.get("website")),
|
|
"with_decision_maker": sum(1 for l in self.leads_db.values() if l.get("decision_maker")),
|
|
"sources_distribution": {},
|
|
"quality_score": 0,
|
|
}
|