mirror of
https://github.com/x1xhlol/system-prompts-and-models-of-ai-tools.git
synced 2026-06-18 23:39:34 +00:00
129 lines
4.7 KiB
Python
129 lines
4.7 KiB
Python
"""
|
|
Enrichment Agent — augments lead data with public info.
|
|
وكيل الإثراء — يثري بيانات العميل من مصادر عامة.
|
|
|
|
Note: production enrichment typically uses providers like Clearbit, Apollo,
|
|
or company-domain lookups. This agent provides:
|
|
1. Domain-based inference (guess company size / sector from email domain)
|
|
2. LLM-based inference from company name (best effort)
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass, field
|
|
from typing import Any
|
|
|
|
from auto_client_acquisition.agents.intake import Lead
|
|
from core.agents.base import BaseAgent
|
|
from core.config.models import Task
|
|
from core.llm.base import Message
|
|
|
|
|
|
@dataclass
|
|
class EnrichmentData:
|
|
inferred_sector: str | None = None
|
|
inferred_size: str | None = None
|
|
inferred_region: str | None = None
|
|
website: str | None = None
|
|
linkedin_handle: str | None = None
|
|
notes: list[str] = field(default_factory=list)
|
|
confidence: float = 0.0 # 0-1
|
|
|
|
def to_dict(self) -> dict[str, Any]:
|
|
return {
|
|
"inferred_sector": self.inferred_sector,
|
|
"inferred_size": self.inferred_size,
|
|
"inferred_region": self.inferred_region,
|
|
"website": self.website,
|
|
"linkedin_handle": self.linkedin_handle,
|
|
"notes": self.notes,
|
|
"confidence": round(self.confidence, 2),
|
|
}
|
|
|
|
|
|
# Domain → sector hints (extend as needed)
|
|
DOMAIN_HINTS: dict[str, str] = {
|
|
".edu.sa": "education",
|
|
".gov.sa": "government",
|
|
".med.sa": "healthcare",
|
|
"aramco.com": "oil_gas",
|
|
"sabic.com": "manufacturing",
|
|
"stc.com.sa": "technology",
|
|
"alrajhibank.com.sa": "finance",
|
|
"saudiairlines.com": "tourism",
|
|
}
|
|
|
|
|
|
class EnrichmentAgent(BaseAgent):
|
|
"""Enriches lead data using heuristics + LLM inference."""
|
|
|
|
name = "enrichment"
|
|
|
|
async def run(
|
|
self,
|
|
*,
|
|
lead: Lead,
|
|
use_llm: bool = True,
|
|
**_: Any,
|
|
) -> EnrichmentData:
|
|
data = EnrichmentData()
|
|
confidence = 0.0
|
|
|
|
# 1. Email-domain hints
|
|
if lead.contact_email and "@" in lead.contact_email:
|
|
domain = lead.contact_email.split("@", 1)[1].lower()
|
|
data.website = f"https://{domain}"
|
|
for key, sector in DOMAIN_HINTS.items():
|
|
if key in domain:
|
|
data.inferred_sector = sector
|
|
confidence = max(confidence, 0.8)
|
|
data.notes.append(f"Sector from domain: {domain} → {sector}")
|
|
break
|
|
if domain.endswith(".sa") or domain.endswith(".ksa"):
|
|
data.inferred_region = "Saudi Arabia"
|
|
confidence = max(confidence, 0.7)
|
|
|
|
# 2. Phone country hint
|
|
if lead.contact_phone and lead.contact_phone.startswith("+966"):
|
|
data.inferred_region = data.inferred_region or "Saudi Arabia"
|
|
confidence = max(confidence, 0.7)
|
|
elif lead.contact_phone and lead.contact_phone.startswith("+971"):
|
|
data.inferred_region = data.inferred_region or "UAE"
|
|
confidence = max(confidence, 0.7)
|
|
|
|
# 3. LLM inference from company name
|
|
if use_llm and lead.company_name and not data.inferred_sector:
|
|
try:
|
|
prompt = (
|
|
f"Given the Saudi/GCC company name '{lead.company_name}', "
|
|
f"infer the most likely sector from this list: "
|
|
f"technology, real_estate, healthcare, education, logistics, "
|
|
f"retail, finance, manufacturing, consulting, construction, "
|
|
f"oil_gas, tourism, other. "
|
|
f'Respond with JSON: {{"sector": str, "confidence": 0-1, "note": str}}. '
|
|
f"If you don't know, say 'other' with low confidence."
|
|
)
|
|
response = await self.router.run(
|
|
task=Task.CLASSIFICATION,
|
|
messages=[Message(role="user", content=prompt)],
|
|
max_tokens=200,
|
|
temperature=0.1,
|
|
)
|
|
parsed = self.parse_json_response(response.content)
|
|
if parsed.get("sector") and parsed["sector"] != "other":
|
|
data.inferred_sector = parsed["sector"]
|
|
confidence = max(confidence, float(parsed.get("confidence", 0.3)))
|
|
data.notes.append(f"Sector from name: {lead.company_name} → {parsed['sector']}")
|
|
except Exception as e:
|
|
self.log.warning("enrichment_llm_failed", error=str(e))
|
|
|
|
data.confidence = confidence
|
|
self.log.info(
|
|
"enriched",
|
|
lead_id=lead.id,
|
|
sector=data.inferred_sector,
|
|
region=data.inferred_region,
|
|
confidence=confidence,
|
|
)
|
|
return data
|