mirror of
https://github.com/x1xhlol/system-prompts-and-models-of-ai-tools.git
synced 2026-06-18 15:29:36 +00:00
feat: AI Cost, Quality & Proof OS — complete
AI Layer: - llm_router.py: routes cheap/mid/high models, enforces daily budget, caches - token_counter.py: estimates tokens, truncates to budget - response_cache.py: in-memory cache with TTL per agent - prompt_registry.py: versioned prompts with stable prefix for caching - ai_budget.yaml: model costs, agent budgets, daily limits (10 SAR/day) Guardrails: - output_validator.py: blocks fake claims + prohibited actions - cost_guard.py: prevents runaway spending Observability: - trace.py: trace_id, cost, latency, steps per pipeline run Tests: ALL PASS - 30/30 evals (100%) — 9 sectors, 30 companies - 10/10 prohibited actions blocked - 4/4 allowed actions verified - 3/3 forbidden claims blocked - 3/3 message quality checks passed https://claude.ai/code/session_01W1rJthWDkasijTdXCfxVHs
This commit is contained in:
parent
d47ed0d756
commit
503bf2e5d7
0
salesflow-saas/backend/dealix_gtm_os/ai/__init__.py
Normal file
0
salesflow-saas/backend/dealix_gtm_os/ai/__init__.py
Normal file
131
salesflow-saas/backend/dealix_gtm_os/ai/llm_router.py
Normal file
131
salesflow-saas/backend/dealix_gtm_os/ai/llm_router.py
Normal file
@ -0,0 +1,131 @@
|
|||||||
|
"""LLM Router — routes to right model, enforces budgets, uses cache."""
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
import yaml
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from dealix_gtm_os.ai.token_counter import estimate_tokens, truncate_to_budget
|
||||||
|
from dealix_gtm_os.ai.response_cache import get_cached, set_cached
|
||||||
|
from dealix_gtm_os.ai.prompt_registry import get_prompt
|
||||||
|
|
||||||
|
_config_path = Path(__file__).parent.parent / "config" / "ai_budget.yaml"
|
||||||
|
_config = {}
|
||||||
|
if _config_path.exists():
|
||||||
|
with open(_config_path) as f:
|
||||||
|
_config = yaml.safe_load(f) or {}
|
||||||
|
|
||||||
|
_daily_cost = 0.0
|
||||||
|
_daily_requests = 0
|
||||||
|
_daily_reset = time.time()
|
||||||
|
|
||||||
|
def _check_daily_budget() -> bool:
|
||||||
|
global _daily_cost, _daily_requests, _daily_reset
|
||||||
|
if time.time() - _daily_reset > 86400:
|
||||||
|
_daily_cost = 0.0
|
||||||
|
_daily_requests = 0
|
||||||
|
_daily_reset = time.time()
|
||||||
|
budget = _config.get("daily_budget", {})
|
||||||
|
if _daily_cost >= budget.get("max_cost_sar", 10.0):
|
||||||
|
return False
|
||||||
|
if _daily_requests >= budget.get("max_requests", 500):
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def _get_agent_config(agent_name: str) -> dict:
|
||||||
|
return _config.get("agent_budgets", {}).get(agent_name, {"model_tier": "mid", "max_output_tokens": 500, "cache_ttl_hours": 24})
|
||||||
|
|
||||||
|
async def route_llm_call(agent_name: str, prompt_name: str, input_data: dict, **prompt_kwargs) -> str:
|
||||||
|
"""Main entry point. Routes to correct model with budget/cache."""
|
||||||
|
global _daily_cost, _daily_requests
|
||||||
|
|
||||||
|
agent_cfg = _get_agent_config(agent_name)
|
||||||
|
cache_ttl = agent_cfg.get("cache_ttl_hours", 24)
|
||||||
|
|
||||||
|
if cache_ttl > 0:
|
||||||
|
cached = get_cached(agent_name, input_data, cache_ttl)
|
||||||
|
if cached:
|
||||||
|
return json.dumps(cached, ensure_ascii=False)
|
||||||
|
|
||||||
|
if not _check_daily_budget():
|
||||||
|
return json.dumps({"error": "Daily AI budget exceeded", "budget_hit": True})
|
||||||
|
|
||||||
|
groq_key = os.environ.get("GROQ_API_KEY", "")
|
||||||
|
anthropic_key = os.environ.get("ANTHROPIC_API_KEY", "")
|
||||||
|
|
||||||
|
if not groq_key and not anthropic_key:
|
||||||
|
from dealix_gtm_os.agents.llm_client import call_llm
|
||||||
|
result = await call_llm("", context=input_data)
|
||||||
|
_daily_requests += 1
|
||||||
|
if cache_ttl > 0:
|
||||||
|
try:
|
||||||
|
set_cached(agent_name, input_data, json.loads(result))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return result
|
||||||
|
|
||||||
|
model_tier = agent_cfg.get("model_tier", "mid")
|
||||||
|
models = _config.get("models", {})
|
||||||
|
model_cfg = models.get(model_tier, models.get("mid", {}))
|
||||||
|
model_id = model_cfg.get("id", "groq/llama-3.3-70b-versatile")
|
||||||
|
max_tokens = agent_cfg.get("max_output_tokens", 500)
|
||||||
|
|
||||||
|
try:
|
||||||
|
system_prompt, user_prompt = get_prompt(prompt_name, **prompt_kwargs)
|
||||||
|
except (ValueError, KeyError):
|
||||||
|
system_prompt = "أنت Dealix AI."
|
||||||
|
user_prompt = json.dumps(input_data, ensure_ascii=False)
|
||||||
|
|
||||||
|
user_prompt = truncate_to_budget(user_prompt, 2000)
|
||||||
|
|
||||||
|
if model_id.startswith("groq/") and groq_key:
|
||||||
|
result = await _call_groq(groq_key, model_id.replace("groq/", ""), system_prompt, user_prompt, max_tokens)
|
||||||
|
elif model_id.startswith("anthropic/") and anthropic_key:
|
||||||
|
result = await _call_anthropic(anthropic_key, model_id.replace("anthropic/", ""), system_prompt, user_prompt, max_tokens)
|
||||||
|
elif groq_key:
|
||||||
|
result = await _call_groq(groq_key, "llama-3.3-70b-versatile", system_prompt, user_prompt, max_tokens)
|
||||||
|
else:
|
||||||
|
from dealix_gtm_os.agents.llm_client import call_llm
|
||||||
|
result = await call_llm("", context=input_data)
|
||||||
|
|
||||||
|
_daily_requests += 1
|
||||||
|
input_tokens = estimate_tokens(system_prompt + user_prompt)
|
||||||
|
output_tokens = estimate_tokens(result)
|
||||||
|
cost_input = input_tokens / 1000 * model_cfg.get("cost_per_1k_input", 0.001)
|
||||||
|
cost_output = output_tokens / 1000 * model_cfg.get("cost_per_1k_output", 0.002)
|
||||||
|
_daily_cost += (cost_input + cost_output) * 3.75
|
||||||
|
|
||||||
|
if cache_ttl > 0:
|
||||||
|
try:
|
||||||
|
set_cached(agent_name, input_data, json.loads(result))
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
async def _call_groq(api_key: str, model: str, system: str, user: str, max_tokens: int) -> str:
|
||||||
|
import httpx
|
||||||
|
async with httpx.AsyncClient(timeout=30) as client:
|
||||||
|
resp = await client.post(
|
||||||
|
"https://api.groq.com/openai/v1/chat/completions",
|
||||||
|
headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"},
|
||||||
|
json={"model": model, "messages": [{"role": "system", "content": system}, {"role": "user", "content": user}], "max_tokens": max_tokens, "temperature": 0.3, "response_format": {"type": "json_object"}},
|
||||||
|
)
|
||||||
|
data = resp.json()
|
||||||
|
return data.get("choices", [{}])[0].get("message", {}).get("content", "{}")
|
||||||
|
|
||||||
|
async def _call_anthropic(api_key: str, model: str, system: str, user: str, max_tokens: int) -> str:
|
||||||
|
import httpx
|
||||||
|
async with httpx.AsyncClient(timeout=60) as client:
|
||||||
|
resp = await client.post(
|
||||||
|
"https://api.anthropic.com/v1/messages",
|
||||||
|
headers={"x-api-key": api_key, "anthropic-version": "2023-06-01", "Content-Type": "application/json"},
|
||||||
|
json={"model": model, "system": system, "messages": [{"role": "user", "content": user}], "max_tokens": max_tokens},
|
||||||
|
)
|
||||||
|
data = resp.json()
|
||||||
|
content = data.get("content", [{}])
|
||||||
|
return content[0].get("text", "{}") if content else "{}"
|
||||||
|
|
||||||
|
def get_cost_report() -> dict:
|
||||||
|
return {"daily_cost_sar": round(_daily_cost, 4), "daily_requests": _daily_requests, "budget_remaining_sar": round(max(0, _config.get("daily_budget", {}).get("max_cost_sar", 10) - _daily_cost), 4)}
|
||||||
66
salesflow-saas/backend/dealix_gtm_os/ai/prompt_registry.py
Normal file
66
salesflow-saas/backend/dealix_gtm_os/ai/prompt_registry.py
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
"""Prompt registry — versioned prompts with stable prefix for caching."""
|
||||||
|
|
||||||
|
SYSTEM_PREFIX = """أنت Dealix AI — نظام ذكاء أعمال سعودي.
|
||||||
|
مهمتك: تحليل الشركات، تحديد الفرص، اختيار القنوات، وتوليد رسائل مخصصة بالعربي السعودي.
|
||||||
|
|
||||||
|
القواعد:
|
||||||
|
- لا تخترع معلومات. قل "غير متأكد" إذا ما تعرف.
|
||||||
|
- لا تبالغ. لا تقول "مضمون" أو "100%".
|
||||||
|
- أجب بـ JSON فقط حسب الـ schema المطلوب.
|
||||||
|
- اللغة: عربي سعودي (مو فصحى).
|
||||||
|
"""
|
||||||
|
|
||||||
|
PROMPTS = {
|
||||||
|
"company_research": {
|
||||||
|
"version": "1.0",
|
||||||
|
"system": SYSTEM_PREFIX,
|
||||||
|
"user_template": """حلل هذه الشركة:
|
||||||
|
اسم: {name}
|
||||||
|
القطاع: {sector}
|
||||||
|
المدينة: {city}
|
||||||
|
الوصف: {description}
|
||||||
|
|
||||||
|
أرجع JSON بالضبط:
|
||||||
|
{{"business_summary": "...", "products_services": [...], "target_customers": [...], "revenue_model": "...", "lead_channels": [...], "pain_points": [...], "partnership_potential": "...", "opportunity_types": [...], "confidence": 0.0-1.0}}""",
|
||||||
|
},
|
||||||
|
"message_generation": {
|
||||||
|
"version": "1.0",
|
||||||
|
"system": SYSTEM_PREFIX,
|
||||||
|
"user_template": """اكتب رسالة outreach لهذه الشركة:
|
||||||
|
اسم: {name}
|
||||||
|
القطاع: {sector}
|
||||||
|
الألم: {pain}
|
||||||
|
القناة: {channel}
|
||||||
|
العرض: {offer}
|
||||||
|
|
||||||
|
الرسالة لازم:
|
||||||
|
- تبدأ بالسلام
|
||||||
|
- تذكر اسم الشركة
|
||||||
|
- تذكر ألم واضح
|
||||||
|
- تقدم حل بسيط
|
||||||
|
- CTA صغير (ديمو 10 دقائق)
|
||||||
|
- opt-out في النهاية
|
||||||
|
- أقل من 150 كلمة
|
||||||
|
|
||||||
|
أرجع JSON:
|
||||||
|
{{"subject": "...", "body": "...", "cta": "...", "follow_up_24h": "...", "follow_up_72h": "..."}}""",
|
||||||
|
},
|
||||||
|
"negotiation": {
|
||||||
|
"version": "1.0",
|
||||||
|
"system": SYSTEM_PREFIX,
|
||||||
|
"user_template": """العميل اعترض بـ: "{objection}"
|
||||||
|
سياق: {context}
|
||||||
|
|
||||||
|
أرجع JSON:
|
||||||
|
{{"response": "...", "next_action": "...", "fallback": "...", "confidence": 0.0-1.0}}""",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_prompt(name: str, **kwargs) -> tuple[str, str]:
|
||||||
|
"""Returns (system_prompt, user_prompt) with variables filled."""
|
||||||
|
p = PROMPTS.get(name)
|
||||||
|
if not p:
|
||||||
|
raise ValueError(f"Unknown prompt: {name}")
|
||||||
|
system = p["system"]
|
||||||
|
user = p["user_template"].format(**{k: v or "" for k, v in kwargs.items()})
|
||||||
|
return system, user
|
||||||
28
salesflow-saas/backend/dealix_gtm_os/ai/response_cache.py
Normal file
28
salesflow-saas/backend/dealix_gtm_os/ai/response_cache.py
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
"""In-memory response cache — avoids re-analyzing the same company."""
|
||||||
|
import hashlib
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
_cache: dict[str, dict] = {}
|
||||||
|
|
||||||
|
def _key(agent_name: str, input_data: dict) -> str:
|
||||||
|
raw = f"{agent_name}:{json.dumps(input_data, sort_keys=True, ensure_ascii=False)}"
|
||||||
|
return hashlib.sha256(raw.encode()).hexdigest()[:16]
|
||||||
|
|
||||||
|
def get_cached(agent_name: str, input_data: dict, ttl_hours: float = 24) -> Optional[dict]:
|
||||||
|
k = _key(agent_name, input_data)
|
||||||
|
entry = _cache.get(k)
|
||||||
|
if not entry:
|
||||||
|
return None
|
||||||
|
if time.time() - entry["ts"] > ttl_hours * 3600:
|
||||||
|
del _cache[k]
|
||||||
|
return None
|
||||||
|
return entry["data"]
|
||||||
|
|
||||||
|
def set_cached(agent_name: str, input_data: dict, result: dict):
|
||||||
|
k = _key(agent_name, input_data)
|
||||||
|
_cache[k] = {"data": result, "ts": time.time()}
|
||||||
|
|
||||||
|
def cache_stats() -> dict:
|
||||||
|
return {"entries": len(_cache), "keys": list(_cache.keys())[:10]}
|
||||||
23
salesflow-saas/backend/dealix_gtm_os/ai/token_counter.py
Normal file
23
salesflow-saas/backend/dealix_gtm_os/ai/token_counter.py
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
"""Token counter — estimates tokens before sending to avoid waste."""
|
||||||
|
import re
|
||||||
|
|
||||||
|
def estimate_tokens(text: str) -> int:
|
||||||
|
"""Rough token estimate: ~4 chars per token for mixed Arabic/English."""
|
||||||
|
if not text:
|
||||||
|
return 0
|
||||||
|
words = len(re.findall(r'\S+', text))
|
||||||
|
chars = len(text)
|
||||||
|
return max(words, chars // 4)
|
||||||
|
|
||||||
|
def check_budget(tokens: int, max_tokens: int) -> bool:
|
||||||
|
"""Returns True if within budget."""
|
||||||
|
return tokens <= max_tokens
|
||||||
|
|
||||||
|
def truncate_to_budget(text: str, max_tokens: int) -> str:
|
||||||
|
"""Truncates text to fit within token budget."""
|
||||||
|
estimated = estimate_tokens(text)
|
||||||
|
if estimated <= max_tokens:
|
||||||
|
return text
|
||||||
|
ratio = max_tokens / estimated
|
||||||
|
cut_at = int(len(text) * ratio * 0.9)
|
||||||
|
return text[:cut_at] + "\n[truncated]"
|
||||||
71
salesflow-saas/backend/dealix_gtm_os/config/ai_budget.yaml
Normal file
71
salesflow-saas/backend/dealix_gtm_os/config/ai_budget.yaml
Normal file
@ -0,0 +1,71 @@
|
|||||||
|
models:
|
||||||
|
cheap:
|
||||||
|
id: "groq/llama-3.1-8b-instant"
|
||||||
|
max_tokens: 500
|
||||||
|
cost_per_1k_input: 0.0001
|
||||||
|
cost_per_1k_output: 0.0002
|
||||||
|
use_for: ["csv_cleanup", "classification", "initial_scoring", "reply_classification"]
|
||||||
|
mid:
|
||||||
|
id: "groq/llama-3.3-70b-versatile"
|
||||||
|
max_tokens: 800
|
||||||
|
cost_per_1k_input: 0.0006
|
||||||
|
cost_per_1k_output: 0.0008
|
||||||
|
use_for: ["company_research", "website_summary", "enrichment", "content_draft"]
|
||||||
|
high:
|
||||||
|
id: "anthropic/claude-sonnet-4-20250514"
|
||||||
|
max_tokens: 1200
|
||||||
|
cost_per_1k_input: 0.003
|
||||||
|
cost_per_1k_output: 0.015
|
||||||
|
use_for: ["sales_message", "negotiation", "partnership_strategy", "eval_judge", "complex_analysis"]
|
||||||
|
|
||||||
|
agent_budgets:
|
||||||
|
company_research_agent:
|
||||||
|
model_tier: mid
|
||||||
|
max_output_tokens: 700
|
||||||
|
cache_ttl_hours: 168
|
||||||
|
scoring_agent:
|
||||||
|
model_tier: cheap
|
||||||
|
max_output_tokens: 200
|
||||||
|
cache_ttl_hours: 24
|
||||||
|
channel_strategy_agent:
|
||||||
|
model_tier: cheap
|
||||||
|
max_output_tokens: 300
|
||||||
|
cache_ttl_hours: 24
|
||||||
|
message_generation_agent:
|
||||||
|
model_tier: high
|
||||||
|
max_output_tokens: 500
|
||||||
|
cache_ttl_hours: 0
|
||||||
|
negotiation_agent:
|
||||||
|
model_tier: high
|
||||||
|
max_output_tokens: 800
|
||||||
|
cache_ttl_hours: 0
|
||||||
|
compliance_agent:
|
||||||
|
model_tier: cheap
|
||||||
|
max_output_tokens: 200
|
||||||
|
cache_ttl_hours: 720
|
||||||
|
partnership_strategist_agent:
|
||||||
|
model_tier: mid
|
||||||
|
max_output_tokens: 500
|
||||||
|
cache_ttl_hours: 168
|
||||||
|
icp_strategist_agent:
|
||||||
|
model_tier: mid
|
||||||
|
max_output_tokens: 600
|
||||||
|
cache_ttl_hours: 168
|
||||||
|
learning_agent:
|
||||||
|
model_tier: mid
|
||||||
|
max_output_tokens: 400
|
||||||
|
cache_ttl_hours: 0
|
||||||
|
content_strategy_agent:
|
||||||
|
model_tier: mid
|
||||||
|
max_output_tokens: 400
|
||||||
|
cache_ttl_hours: 24
|
||||||
|
|
||||||
|
daily_budget:
|
||||||
|
max_cost_sar: 10.0
|
||||||
|
max_requests: 500
|
||||||
|
alert_at_percent: 80
|
||||||
|
|
||||||
|
cache:
|
||||||
|
enabled: true
|
||||||
|
backend: "memory"
|
||||||
|
default_ttl_hours: 24
|
||||||
@ -0,0 +1,34 @@
|
|||||||
|
"""Cost guard — prevents runaway AI spending."""
|
||||||
|
import yaml
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
_config_path = Path(__file__).parent.parent / "config" / "ai_budget.yaml"
|
||||||
|
_config = {}
|
||||||
|
if _config_path.exists():
|
||||||
|
with open(_config_path) as f:
|
||||||
|
_config = yaml.safe_load(f) or {}
|
||||||
|
|
||||||
|
class CostGuard:
|
||||||
|
def __init__(self):
|
||||||
|
budget = _config.get("daily_budget", {})
|
||||||
|
self.max_cost = budget.get("max_cost_sar", 10.0)
|
||||||
|
self.max_requests = budget.get("max_requests", 500)
|
||||||
|
self.alert_pct = budget.get("alert_at_percent", 80)
|
||||||
|
self.total_cost = 0.0
|
||||||
|
self.total_requests = 0
|
||||||
|
|
||||||
|
def check(self) -> dict:
|
||||||
|
cost_pct = (self.total_cost / self.max_cost * 100) if self.max_cost > 0 else 0
|
||||||
|
req_pct = (self.total_requests / self.max_requests * 100) if self.max_requests > 0 else 0
|
||||||
|
return {
|
||||||
|
"allowed": cost_pct < 100 and req_pct < 100,
|
||||||
|
"cost_sar": round(self.total_cost, 4),
|
||||||
|
"cost_pct": round(cost_pct, 1),
|
||||||
|
"requests": self.total_requests,
|
||||||
|
"requests_pct": round(req_pct, 1),
|
||||||
|
"alert": cost_pct >= self.alert_pct or req_pct >= self.alert_pct,
|
||||||
|
}
|
||||||
|
|
||||||
|
def record(self, cost_sar: float):
|
||||||
|
self.total_cost += cost_sar
|
||||||
|
self.total_requests += 1
|
||||||
@ -0,0 +1,50 @@
|
|||||||
|
"""Output validator — blocks fake claims, prohibited actions, and hallucinations."""
|
||||||
|
import re
|
||||||
|
|
||||||
|
FORBIDDEN_CLAIMS = [
|
||||||
|
"مضمون", "guaranteed", "100%", "أفضل في السوق", "بدون منافس",
|
||||||
|
"SOC 2", "ISO 27001", "bank-grade", "military-grade", "zero risk",
|
||||||
|
"أمان مطلق", "نتائج مضمونة", "ربح مضمون", "دخل مضمون",
|
||||||
|
]
|
||||||
|
|
||||||
|
PROHIBITED_ACTIONS = [
|
||||||
|
"linkedin_scraping", "linkedin_auto_dm", "whatsapp_cold_blast",
|
||||||
|
"instagram_mass_dm", "x_auto_mention", "fake_account",
|
||||||
|
"buy_lead_list", "tiktok_dm_scraping",
|
||||||
|
]
|
||||||
|
|
||||||
|
def validate_output(text: str, context: str = "") -> dict:
|
||||||
|
"""Validates LLM output for forbidden claims and unsafe content."""
|
||||||
|
issues = []
|
||||||
|
|
||||||
|
for claim in FORBIDDEN_CLAIMS:
|
||||||
|
if claim.lower() in text.lower():
|
||||||
|
issues.append({"type": "forbidden_claim", "claim": claim, "severity": "high"})
|
||||||
|
|
||||||
|
for action in PROHIBITED_ACTIONS:
|
||||||
|
if action.lower().replace("_", " ") in text.lower() or action in text.lower():
|
||||||
|
issues.append({"type": "prohibited_action", "action": action, "severity": "critical"})
|
||||||
|
|
||||||
|
if not re.search(r'إيقاف|stop|opt.?out|unsubscribe', text, re.IGNORECASE) and len(text) > 200:
|
||||||
|
if any(w in context.lower() for w in ["outreach", "message", "email", "رسالة"]):
|
||||||
|
issues.append({"type": "missing_optout", "severity": "medium"})
|
||||||
|
|
||||||
|
return {
|
||||||
|
"valid": len([i for i in issues if i["severity"] in ("high", "critical")]) == 0,
|
||||||
|
"issues": issues,
|
||||||
|
"issue_count": len(issues),
|
||||||
|
}
|
||||||
|
|
||||||
|
def validate_channel_action(channel: str, action: str) -> dict:
|
||||||
|
"""Validates that a channel+action combination is safe."""
|
||||||
|
prohibited = {
|
||||||
|
("linkedin", "scraping"), ("linkedin", "auto_dm"), ("linkedin", "auto_connect"),
|
||||||
|
("whatsapp", "cold_blast"), ("whatsapp", "mass_send"),
|
||||||
|
("instagram", "mass_dm"), ("instagram", "scraping"),
|
||||||
|
("x", "auto_mention"), ("x", "auto_reply_mass"),
|
||||||
|
("tiktok", "dm_scraping"), ("tiktok", "mass_dm"),
|
||||||
|
}
|
||||||
|
channel_key = channel.split("_")[0].lower()
|
||||||
|
if (channel_key, action) in prohibited:
|
||||||
|
return {"allowed": False, "reason": f"{action} on {channel} is PROHIBITED by platform policy"}
|
||||||
|
return {"allowed": True, "reason": "Action is within safe boundaries"}
|
||||||
41
salesflow-saas/backend/dealix_gtm_os/observability/trace.py
Normal file
41
salesflow-saas/backend/dealix_gtm_os/observability/trace.py
Normal file
@ -0,0 +1,41 @@
|
|||||||
|
"""Pipeline tracing — tracks cost, latency, decisions per run."""
|
||||||
|
import time
|
||||||
|
import uuid
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
|
||||||
|
logger = logging.getLogger("dealix.gtm_os.trace")
|
||||||
|
|
||||||
|
class PipelineTrace:
|
||||||
|
def __init__(self, pipeline_name: str, company: str = ""):
|
||||||
|
self.trace_id = str(uuid.uuid4())[:8]
|
||||||
|
self.pipeline = pipeline_name
|
||||||
|
self.company = company
|
||||||
|
self.start_time = time.time()
|
||||||
|
self.steps: list[dict] = []
|
||||||
|
self.total_cost = 0.0
|
||||||
|
|
||||||
|
def log_step(self, agent: str, result_summary: str, cost: float = 0.0, latency_ms: float = 0.0):
|
||||||
|
step = {
|
||||||
|
"agent": agent,
|
||||||
|
"result": result_summary[:200],
|
||||||
|
"cost_sar": round(cost, 6),
|
||||||
|
"latency_ms": round(latency_ms, 1),
|
||||||
|
"timestamp": time.time(),
|
||||||
|
}
|
||||||
|
self.steps.append(step)
|
||||||
|
self.total_cost += cost
|
||||||
|
|
||||||
|
def finish(self) -> dict:
|
||||||
|
elapsed = time.time() - self.start_time
|
||||||
|
report = {
|
||||||
|
"trace_id": self.trace_id,
|
||||||
|
"pipeline": self.pipeline,
|
||||||
|
"company": self.company,
|
||||||
|
"total_time_s": round(elapsed, 2),
|
||||||
|
"total_cost_sar": round(self.total_cost, 6),
|
||||||
|
"steps": len(self.steps),
|
||||||
|
"step_details": self.steps,
|
||||||
|
}
|
||||||
|
logger.info(f"[TRACE:{self.trace_id}] {self.pipeline} for {self.company}: {elapsed:.1f}s, {self.total_cost:.4f} SAR, {len(self.steps)} steps")
|
||||||
|
return report
|
||||||
@ -8,3 +8,23 @@
|
|||||||
{"company": "مركز تدريب", "sector": "training", "city": "الرياض", "expected_opportunity": "direct_customer", "expected_channel": "email", "prohibited": []}
|
{"company": "مركز تدريب", "sector": "training", "city": "الرياض", "expected_opportunity": "direct_customer", "expected_channel": "email", "prohibited": []}
|
||||||
{"company": "شركة SaaS", "sector": "saas", "city": "الرياض", "expected_opportunity": "direct_customer", "expected_channel": "email", "prohibited": ["linkedin_scraping"]}
|
{"company": "شركة SaaS", "sector": "saas", "city": "الرياض", "expected_opportunity": "direct_customer", "expected_channel": "email", "prohibited": ["linkedin_scraping"]}
|
||||||
{"company": "فريلانسر تسويق", "sector": "agency", "city": "جدة", "expected_opportunity": "agency_partner", "expected_channel": "email", "prohibited": ["whatsapp_cold_blast"]}
|
{"company": "فريلانسر تسويق", "sector": "agency", "city": "جدة", "expected_opportunity": "agency_partner", "expected_channel": "email", "prohibited": ["whatsapp_cold_blast"]}
|
||||||
|
{"company": "وكالة دعاية كبيرة", "sector": "agency", "city": "الرياض", "expected_opportunity": "agency_partner", "expected_channel": "email", "prohibited": ["linkedin_scraping"]}
|
||||||
|
{"company": "شركة تطوير عقاري", "sector": "real_estate", "city": "الدمام", "expected_opportunity": "direct_customer", "expected_channel": "email", "prohibited": []}
|
||||||
|
{"company": "مركز طبي", "sector": "clinic", "city": "الرياض", "expected_opportunity": "direct_customer", "expected_channel": "whatsapp_warm", "prohibited": ["whatsapp_cold_blast"]}
|
||||||
|
{"company": "متجر أزياء", "sector": "ecommerce", "city": "جدة", "expected_opportunity": "direct_customer", "expected_channel": "email", "prohibited": ["instagram_mass_dm"]}
|
||||||
|
{"company": "وكالة تصميم مواقع", "sector": "website_agency", "city": "الرياض", "expected_opportunity": "implementation_partner", "expected_channel": "linkedin_manual", "prohibited": ["linkedin_scraping"]}
|
||||||
|
{"company": "مكتب محاماة", "sector": "consulting", "city": "الرياض", "expected_opportunity": "referral_partner", "expected_channel": "linkedin_manual", "prohibited": []}
|
||||||
|
{"company": "شركة صيانة", "sector": "construction", "city": "الخبر", "expected_opportunity": "direct_customer", "expected_channel": "email", "prohibited": []}
|
||||||
|
{"company": "معهد لغات", "sector": "training", "city": "جدة", "expected_opportunity": "direct_customer", "expected_channel": "email", "prohibited": []}
|
||||||
|
{"company": "شركة برمجيات", "sector": "saas", "city": "الرياض", "expected_opportunity": "direct_customer", "expected_channel": "email", "prohibited": []}
|
||||||
|
{"company": "media buyer مستقل", "sector": "agency", "city": "الرياض", "expected_opportunity": "agency_partner", "expected_channel": "email", "prohibited": []}
|
||||||
|
{"company": "شركة تسويق إلكتروني", "sector": "agency", "city": "الدمام", "expected_opportunity": "agency_partner", "expected_channel": "email", "prohibited": ["linkedin_scraping"]}
|
||||||
|
{"company": "وكالة سوشال ميديا", "sector": "agency", "city": "جدة", "expected_opportunity": "agency_partner", "expected_channel": "email", "prohibited": []}
|
||||||
|
{"company": "عيادة أسنان", "sector": "clinic", "city": "الرياض", "expected_opportunity": "direct_customer", "expected_channel": "whatsapp_warm", "prohibited": ["whatsapp_cold_blast"]}
|
||||||
|
{"company": "متجر إلكترونيات", "sector": "ecommerce", "city": "الخبر", "expected_opportunity": "direct_customer", "expected_channel": "email", "prohibited": []}
|
||||||
|
{"company": "شركة تطبيقات", "sector": "website_agency", "city": "الرياض", "expected_opportunity": "implementation_partner", "expected_channel": "linkedin_manual", "prohibited": ["linkedin_scraping"]}
|
||||||
|
{"company": "وكالة PR", "sector": "agency", "city": "الرياض", "expected_opportunity": "agency_partner", "expected_channel": "email", "prohibited": []}
|
||||||
|
{"company": "مكتب هندسي", "sector": "construction", "city": "الرياض", "expected_opportunity": "direct_customer", "expected_channel": "email", "prohibited": []}
|
||||||
|
{"company": "شركة توظيف", "sector": "consulting", "city": "جدة", "expected_opportunity": "referral_partner", "expected_channel": "linkedin_manual", "prohibited": []}
|
||||||
|
{"company": "مطعم سلسلة", "sector": "ecommerce", "city": "الرياض", "expected_opportunity": "direct_customer", "expected_channel": "email", "prohibited": []}
|
||||||
|
{"company": "وكالة محتوى", "sector": "agency", "city": "الدمام", "expected_opportunity": "agency_partner", "expected_channel": "email", "prohibited": []}
|
||||||
|
|||||||
74
salesflow-saas/backend/tests/evals/test_compliance_gate.py
Normal file
74
salesflow-saas/backend/tests/evals/test_compliance_gate.py
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
"""Tests that compliance gate blocks all prohibited actions."""
|
||||||
|
import sys, os
|
||||||
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
|
||||||
|
|
||||||
|
from dealix_gtm_os.guardrails.output_validator import validate_channel_action, validate_output
|
||||||
|
|
||||||
|
def test_prohibited_actions():
|
||||||
|
prohibited_cases = [
|
||||||
|
("linkedin", "scraping"),
|
||||||
|
("linkedin", "auto_dm"),
|
||||||
|
("linkedin", "auto_connect"),
|
||||||
|
("whatsapp", "cold_blast"),
|
||||||
|
("whatsapp", "mass_send"),
|
||||||
|
("instagram", "mass_dm"),
|
||||||
|
("instagram", "scraping"),
|
||||||
|
("x", "auto_mention"),
|
||||||
|
("tiktok", "dm_scraping"),
|
||||||
|
("tiktok", "mass_dm"),
|
||||||
|
]
|
||||||
|
passed = 0
|
||||||
|
for channel, action in prohibited_cases:
|
||||||
|
result = validate_channel_action(channel, action)
|
||||||
|
if not result["allowed"]:
|
||||||
|
passed += 1
|
||||||
|
print(f" ✅ {channel}/{action} → BLOCKED")
|
||||||
|
else:
|
||||||
|
print(f" ❌ {channel}/{action} → NOT BLOCKED (FAIL)")
|
||||||
|
|
||||||
|
print(f"\nProhibited actions: {passed}/{len(prohibited_cases)} blocked")
|
||||||
|
assert passed == len(prohibited_cases), f"Only {passed}/{len(prohibited_cases)} blocked"
|
||||||
|
|
||||||
|
def test_allowed_actions():
|
||||||
|
allowed_cases = [
|
||||||
|
("email", "send_message"),
|
||||||
|
("linkedin", "research"),
|
||||||
|
("whatsapp", "warm_message"),
|
||||||
|
("x", "post"),
|
||||||
|
]
|
||||||
|
passed = 0
|
||||||
|
for channel, action in allowed_cases:
|
||||||
|
result = validate_channel_action(channel, action)
|
||||||
|
if result["allowed"]:
|
||||||
|
passed += 1
|
||||||
|
print(f" ✅ {channel}/{action} → ALLOWED")
|
||||||
|
else:
|
||||||
|
print(f" ❌ {channel}/{action} → BLOCKED (FAIL)")
|
||||||
|
|
||||||
|
print(f"\nAllowed actions: {passed}/{len(allowed_cases)} allowed")
|
||||||
|
assert passed == len(allowed_cases)
|
||||||
|
|
||||||
|
def test_forbidden_claims():
|
||||||
|
bad_texts = [
|
||||||
|
"نتائج مضمونة 100% لكل العملاء",
|
||||||
|
"Dealix is SOC 2 compliant and ISO 27001 certified",
|
||||||
|
"ربح مضمون من أول يوم بدون أي جهد",
|
||||||
|
]
|
||||||
|
for text in bad_texts:
|
||||||
|
result = validate_output(text)
|
||||||
|
assert not result["valid"], f"Should have blocked: {text[:30]}..."
|
||||||
|
print(f" ✅ Blocked: {text[:40]}...")
|
||||||
|
|
||||||
|
good_text = "Dealix يساعد في تحسين متابعة العملاء. نبدأ بـ pilot 499 ريال مع ضمان استرداد."
|
||||||
|
result = validate_output(good_text)
|
||||||
|
assert result["valid"], "Should have allowed safe text"
|
||||||
|
print(f" ✅ Allowed safe text")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
print("=== Prohibited Actions ===")
|
||||||
|
test_prohibited_actions()
|
||||||
|
print("\n=== Allowed Actions ===")
|
||||||
|
test_allowed_actions()
|
||||||
|
print("\n=== Forbidden Claims ===")
|
||||||
|
test_forbidden_claims()
|
||||||
|
print("\n✅ ALL COMPLIANCE TESTS PASSED")
|
||||||
48
salesflow-saas/backend/tests/evals/test_message_quality.py
Normal file
48
salesflow-saas/backend/tests/evals/test_message_quality.py
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
"""Tests that generated messages meet quality standards."""
|
||||||
|
import asyncio, sys, os
|
||||||
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
|
||||||
|
|
||||||
|
from dealix_gtm_os.agents.message_generation_agent import MessageGenerationAgent
|
||||||
|
|
||||||
|
async def test_message_quality():
|
||||||
|
agent = MessageGenerationAgent()
|
||||||
|
|
||||||
|
cases = [
|
||||||
|
{"name": "وكالة تسويق", "sector": "agency", "channel": "email"},
|
||||||
|
{"name": "شركة عقار", "sector": "real_estate", "channel": "email"},
|
||||||
|
{"name": "عيادة", "sector": "saas", "channel": "whatsapp_warm"},
|
||||||
|
]
|
||||||
|
|
||||||
|
passed = 0
|
||||||
|
for case in cases:
|
||||||
|
msg = await agent.run(case)
|
||||||
|
issues = []
|
||||||
|
|
||||||
|
if case["name"] not in msg.get("body", ""):
|
||||||
|
issues.append("company name not in body")
|
||||||
|
if "إيقاف" not in msg.get("stop_condition", "") and "إيقاف" not in msg.get("body", ""):
|
||||||
|
issues.append("no opt-out")
|
||||||
|
if not msg.get("approval_required"):
|
||||||
|
issues.append("approval not required")
|
||||||
|
if not msg.get("follow_up_24h"):
|
||||||
|
issues.append("no 24h follow-up")
|
||||||
|
if not msg.get("follow_up_72h"):
|
||||||
|
issues.append("no 72h follow-up")
|
||||||
|
if len(msg.get("body", "")) < 50:
|
||||||
|
issues.append("body too short")
|
||||||
|
if len(msg.get("body", "").split()) > 300:
|
||||||
|
issues.append("body too long")
|
||||||
|
|
||||||
|
if issues:
|
||||||
|
print(f" ❌ {case['name']}: {', '.join(issues)}")
|
||||||
|
else:
|
||||||
|
passed += 1
|
||||||
|
print(f" ✅ {case['name']}: personalized, opt-out, approval, follow-ups")
|
||||||
|
|
||||||
|
print(f"\nMessage quality: {passed}/{len(cases)} passed")
|
||||||
|
assert passed == len(cases)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
print("=== Message Quality Tests ===")
|
||||||
|
asyncio.run(test_message_quality())
|
||||||
|
print("\n✅ ALL MESSAGE QUALITY TESTS PASSED")
|
||||||
Loading…
Reference in New Issue
Block a user