mirror of
https://github.com/x1xhlol/system-prompts-and-models-of-ai-tools.git
synced 2026-06-17 23:09:35 +00:00
ADDED MODULES: - intelligence/icp.py: ICP Builder — 34 Saudi industries, Arabic+English queries - intelligence/discovery.py: Multi-source discovery — web search + 34 seed companies - intelligence/enrichment.py: Company/person enrichment — website data + news - intelligence/scoring.py: 5-dimension scoring — Fit/Intent/Access/Value/Urgency - intelligence/entity_resolution.py: Arabic/English dedup + fuzzy matching - intelligence/outreach.py: Arabic-first outreach — WhatsApp/Email/LinkedIn - intelligence/triggers.py: Trigger alerts — funding/hiring/expansion/IPO - intelligence/pipeline.py: End-to-end orchestrator — ICP→Discovery→Score→Brief - routes/intelligence.py: 15 REST endpoints + audit chain integration - DB: 5 new tables — intelligence_leads/runs/watchlist/triggers/entities ARCHITECTURE: - Layer 1: Signal collection (web + curated Saudi B2B DB of 34 companies) - Layer 2: Enrichment (website data, news, tech stack detection) - Layer 3: 5D scoring — Master = 0.30 Fit + 0.25 Intent + 0.15 Access + 0.20 Value + 0.10 Urgency - Layer 4: Entity resolution — Arabic/English fuzzy dedup - Layer 5: Outreach brief — signal-driven WhatsApp/Email/LinkedIn in Arabic MOTION SUPPORT: B2B sales | partnership | channel | tender
159 lines
5.0 KiB
Python
159 lines
5.0 KiB
Python
"""
|
|
Lead Intelligence Pipeline — End-to-end orchestrator
|
|
ICP → Discovery → Enrichment → Entity Resolution → Scoring → Outreach Brief
|
|
One call drives the full flow.
|
|
"""
|
|
import time
|
|
import json
|
|
from typing import Dict, Any, List, Optional
|
|
from dataclasses import asdict
|
|
|
|
from app.intelligence.icp import ICPConfig, DEALIX_DEFAULT_ICP
|
|
from app.intelligence.discovery import LeadDiscoveryEngine
|
|
from app.intelligence.enrichment import enrich_batch
|
|
from app.intelligence.scoring import score_batch
|
|
from app.intelligence.entity_resolution import EntityRegistry
|
|
from app.intelligence.outreach import generate_batch_briefs
|
|
|
|
|
|
def run_pipeline(
|
|
icp: Optional[ICPConfig] = None,
|
|
custom_queries: Optional[List[str]] = None,
|
|
motion: str = "sales",
|
|
max_leads: int = 30,
|
|
enrich: bool = True,
|
|
generate_outreach: bool = True,
|
|
score_weights: Optional[Dict[str, float]] = None,
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Full lead intelligence pipeline.
|
|
|
|
Returns:
|
|
{
|
|
"run_id": str,
|
|
"icp_used": dict,
|
|
"total_discovered": int,
|
|
"total_after_dedup": int,
|
|
"total_enriched": int,
|
|
"scored_leads": [...], # all leads sorted by score
|
|
"p1_leads": [...], # outreach now
|
|
"p2_leads": [...], # enrich more
|
|
"p3_leads": [...], # nurture
|
|
"outreach_briefs": [...], # generated briefs for P1+P2
|
|
"tier_summary": {...},
|
|
"pipeline_duration_sec": float,
|
|
"errors": [...],
|
|
}
|
|
"""
|
|
start_time = time.time()
|
|
run_id = f"pipeline_{int(start_time)}"
|
|
errors = []
|
|
|
|
# 1. Resolve ICP
|
|
icp = icp or DEALIX_DEFAULT_ICP
|
|
|
|
# 2. Discovery
|
|
engine = LeadDiscoveryEngine(icp=icp)
|
|
if custom_queries:
|
|
candidates = engine.discover(custom_queries, max_per_query=6)
|
|
else:
|
|
candidates = engine.discover_from_icp(icp=icp, max_per_query=5)
|
|
|
|
total_discovered = len(candidates)
|
|
|
|
# 3. Entity Resolution + Dedup
|
|
registry = EntityRegistry()
|
|
raw_lead_dicts = [
|
|
{
|
|
"id": c.id,
|
|
"company_name": c.company_name,
|
|
"domain": c.domain,
|
|
"source": c.source,
|
|
"source_url": c.source_url,
|
|
"raw_snippet": c.raw_snippet,
|
|
"signals": c.signals,
|
|
"trigger": c.trigger,
|
|
"contact_email": c.contact_email,
|
|
"contact_phone": c.phone,
|
|
"contact_linkedin": c.contact_linkedin,
|
|
"confidence": c.confidence,
|
|
"_candidate": c,
|
|
}
|
|
for c in candidates
|
|
]
|
|
deduped_dicts = registry.deduplicate_lead_list(raw_lead_dicts)
|
|
deduped_candidates = [d["_candidate"] for d in deduped_dicts[:max_leads]]
|
|
|
|
total_after_dedup = len(deduped_candidates)
|
|
|
|
# 4. Enrichment
|
|
enriched_leads = []
|
|
if enrich:
|
|
enriched_leads = enrich_batch(deduped_candidates, delay=0.2)
|
|
else:
|
|
# Skip enrichment — use candidates as-is
|
|
from app.intelligence.enrichment import EnrichedLead
|
|
for c in deduped_candidates:
|
|
e = EnrichedLead(
|
|
id=c.id,
|
|
company_name=c.company_name,
|
|
domain=c.domain,
|
|
industry=c.industry,
|
|
region=c.region,
|
|
website=f"https://{c.domain}" if c.domain else "",
|
|
signals=c.signals,
|
|
source=c.source,
|
|
source_url=c.source_url,
|
|
raw_snippet=c.raw_snippet,
|
|
trigger=c.trigger,
|
|
contact_email=c.contact_email,
|
|
contact_phone=c.phone,
|
|
enrichment_confidence=c.confidence,
|
|
)
|
|
enriched_leads.append(e)
|
|
|
|
total_enriched = len(enriched_leads)
|
|
|
|
# 5. Scoring
|
|
scored = score_batch(enriched_leads, weights=score_weights)
|
|
|
|
# 6. Tier breakdown
|
|
tier_counts = {"P1": 0, "P2": 0, "P3": 0, "P4": 0}
|
|
p1, p2, p3, p4 = [], [], [], []
|
|
for item in scored:
|
|
tier = item["score"]["tier"]
|
|
tier_counts[tier] += 1
|
|
if tier == "P1": p1.append(item)
|
|
elif tier == "P2": p2.append(item)
|
|
elif tier == "P3": p3.append(item)
|
|
else: p4.append(item)
|
|
|
|
# 7. Outreach briefs
|
|
outreach_briefs = []
|
|
if generate_outreach:
|
|
outreach_briefs = generate_batch_briefs(scored, motion=motion)
|
|
|
|
duration = round(time.time() - start_time, 2)
|
|
|
|
return {
|
|
"run_id": run_id,
|
|
"icp_used": icp.to_dict() if hasattr(icp, 'to_dict') else {},
|
|
"total_discovered": total_discovered,
|
|
"total_after_dedup": total_after_dedup,
|
|
"total_enriched": total_enriched,
|
|
"scored_leads": scored,
|
|
"p1_leads": p1,
|
|
"p2_leads": p2,
|
|
"p3_leads": p3,
|
|
"p4_leads": p4,
|
|
"outreach_briefs": outreach_briefs,
|
|
"tier_summary": {
|
|
"P1_outreach_now": tier_counts["P1"],
|
|
"P2_enrich_more": tier_counts["P2"],
|
|
"P3_nurture": tier_counts["P3"],
|
|
"P4_archive": tier_counts["P4"],
|
|
},
|
|
"pipeline_duration_sec": duration,
|
|
"errors": errors,
|
|
}
|