diff --git a/salesflow-saas/backend/dealix_gtm_os/ai/__init__.py b/salesflow-saas/backend/dealix_gtm_os/ai/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/salesflow-saas/backend/dealix_gtm_os/ai/llm_router.py b/salesflow-saas/backend/dealix_gtm_os/ai/llm_router.py new file mode 100644 index 00000000..be5dda8c --- /dev/null +++ b/salesflow-saas/backend/dealix_gtm_os/ai/llm_router.py @@ -0,0 +1,131 @@ +"""LLM Router — routes to right model, enforces budgets, uses cache.""" +import json +import os +import time +import yaml +from pathlib import Path +from typing import Optional + +from dealix_gtm_os.ai.token_counter import estimate_tokens, truncate_to_budget +from dealix_gtm_os.ai.response_cache import get_cached, set_cached +from dealix_gtm_os.ai.prompt_registry import get_prompt + +_config_path = Path(__file__).parent.parent / "config" / "ai_budget.yaml" +_config = {} +if _config_path.exists(): + with open(_config_path) as f: + _config = yaml.safe_load(f) or {} + +_daily_cost = 0.0 +_daily_requests = 0 +_daily_reset = time.time() + +def _check_daily_budget() -> bool: + global _daily_cost, _daily_requests, _daily_reset + if time.time() - _daily_reset > 86400: + _daily_cost = 0.0 + _daily_requests = 0 + _daily_reset = time.time() + budget = _config.get("daily_budget", {}) + if _daily_cost >= budget.get("max_cost_sar", 10.0): + return False + if _daily_requests >= budget.get("max_requests", 500): + return False + return True + +def _get_agent_config(agent_name: str) -> dict: + return _config.get("agent_budgets", {}).get(agent_name, {"model_tier": "mid", "max_output_tokens": 500, "cache_ttl_hours": 24}) + +async def route_llm_call(agent_name: str, prompt_name: str, input_data: dict, **prompt_kwargs) -> str: + """Main entry point. Routes to correct model with budget/cache.""" + global _daily_cost, _daily_requests + + agent_cfg = _get_agent_config(agent_name) + cache_ttl = agent_cfg.get("cache_ttl_hours", 24) + + if cache_ttl > 0: + cached = get_cached(agent_name, input_data, cache_ttl) + if cached: + return json.dumps(cached, ensure_ascii=False) + + if not _check_daily_budget(): + return json.dumps({"error": "Daily AI budget exceeded", "budget_hit": True}) + + groq_key = os.environ.get("GROQ_API_KEY", "") + anthropic_key = os.environ.get("ANTHROPIC_API_KEY", "") + + if not groq_key and not anthropic_key: + from dealix_gtm_os.agents.llm_client import call_llm + result = await call_llm("", context=input_data) + _daily_requests += 1 + if cache_ttl > 0: + try: + set_cached(agent_name, input_data, json.loads(result)) + except Exception: + pass + return result + + model_tier = agent_cfg.get("model_tier", "mid") + models = _config.get("models", {}) + model_cfg = models.get(model_tier, models.get("mid", {})) + model_id = model_cfg.get("id", "groq/llama-3.3-70b-versatile") + max_tokens = agent_cfg.get("max_output_tokens", 500) + + try: + system_prompt, user_prompt = get_prompt(prompt_name, **prompt_kwargs) + except (ValueError, KeyError): + system_prompt = "أنت Dealix AI." + user_prompt = json.dumps(input_data, ensure_ascii=False) + + user_prompt = truncate_to_budget(user_prompt, 2000) + + if model_id.startswith("groq/") and groq_key: + result = await _call_groq(groq_key, model_id.replace("groq/", ""), system_prompt, user_prompt, max_tokens) + elif model_id.startswith("anthropic/") and anthropic_key: + result = await _call_anthropic(anthropic_key, model_id.replace("anthropic/", ""), system_prompt, user_prompt, max_tokens) + elif groq_key: + result = await _call_groq(groq_key, "llama-3.3-70b-versatile", system_prompt, user_prompt, max_tokens) + else: + from dealix_gtm_os.agents.llm_client import call_llm + result = await call_llm("", context=input_data) + + _daily_requests += 1 + input_tokens = estimate_tokens(system_prompt + user_prompt) + output_tokens = estimate_tokens(result) + cost_input = input_tokens / 1000 * model_cfg.get("cost_per_1k_input", 0.001) + cost_output = output_tokens / 1000 * model_cfg.get("cost_per_1k_output", 0.002) + _daily_cost += (cost_input + cost_output) * 3.75 + + if cache_ttl > 0: + try: + set_cached(agent_name, input_data, json.loads(result)) + except Exception: + pass + + return result + +async def _call_groq(api_key: str, model: str, system: str, user: str, max_tokens: int) -> str: + import httpx + async with httpx.AsyncClient(timeout=30) as client: + resp = await client.post( + "https://api.groq.com/openai/v1/chat/completions", + headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}, + json={"model": model, "messages": [{"role": "system", "content": system}, {"role": "user", "content": user}], "max_tokens": max_tokens, "temperature": 0.3, "response_format": {"type": "json_object"}}, + ) + data = resp.json() + return data.get("choices", [{}])[0].get("message", {}).get("content", "{}") + +async def _call_anthropic(api_key: str, model: str, system: str, user: str, max_tokens: int) -> str: + import httpx + async with httpx.AsyncClient(timeout=60) as client: + resp = await client.post( + "https://api.anthropic.com/v1/messages", + headers={"x-api-key": api_key, "anthropic-version": "2023-06-01", "Content-Type": "application/json"}, + json={"model": model, "system": system, "messages": [{"role": "user", "content": user}], "max_tokens": max_tokens}, + ) + data = resp.json() + content = data.get("content", [{}]) + return content[0].get("text", "{}") if content else "{}" + +def get_cost_report() -> dict: + return {"daily_cost_sar": round(_daily_cost, 4), "daily_requests": _daily_requests, "budget_remaining_sar": round(max(0, _config.get("daily_budget", {}).get("max_cost_sar", 10) - _daily_cost), 4)} diff --git a/salesflow-saas/backend/dealix_gtm_os/ai/prompt_registry.py b/salesflow-saas/backend/dealix_gtm_os/ai/prompt_registry.py new file mode 100644 index 00000000..11d9f767 --- /dev/null +++ b/salesflow-saas/backend/dealix_gtm_os/ai/prompt_registry.py @@ -0,0 +1,66 @@ +"""Prompt registry — versioned prompts with stable prefix for caching.""" + +SYSTEM_PREFIX = """أنت Dealix AI — نظام ذكاء أعمال سعودي. +مهمتك: تحليل الشركات، تحديد الفرص، اختيار القنوات، وتوليد رسائل مخصصة بالعربي السعودي. + +القواعد: +- لا تخترع معلومات. قل "غير متأكد" إذا ما تعرف. +- لا تبالغ. لا تقول "مضمون" أو "100%". +- أجب بـ JSON فقط حسب الـ schema المطلوب. +- اللغة: عربي سعودي (مو فصحى). +""" + +PROMPTS = { + "company_research": { + "version": "1.0", + "system": SYSTEM_PREFIX, + "user_template": """حلل هذه الشركة: +اسم: {name} +القطاع: {sector} +المدينة: {city} +الوصف: {description} + +أرجع JSON بالضبط: +{{"business_summary": "...", "products_services": [...], "target_customers": [...], "revenue_model": "...", "lead_channels": [...], "pain_points": [...], "partnership_potential": "...", "opportunity_types": [...], "confidence": 0.0-1.0}}""", + }, + "message_generation": { + "version": "1.0", + "system": SYSTEM_PREFIX, + "user_template": """اكتب رسالة outreach لهذه الشركة: +اسم: {name} +القطاع: {sector} +الألم: {pain} +القناة: {channel} +العرض: {offer} + +الرسالة لازم: +- تبدأ بالسلام +- تذكر اسم الشركة +- تذكر ألم واضح +- تقدم حل بسيط +- CTA صغير (ديمو 10 دقائق) +- opt-out في النهاية +- أقل من 150 كلمة + +أرجع JSON: +{{"subject": "...", "body": "...", "cta": "...", "follow_up_24h": "...", "follow_up_72h": "..."}}""", + }, + "negotiation": { + "version": "1.0", + "system": SYSTEM_PREFIX, + "user_template": """العميل اعترض بـ: "{objection}" +سياق: {context} + +أرجع JSON: +{{"response": "...", "next_action": "...", "fallback": "...", "confidence": 0.0-1.0}}""", + }, +} + +def get_prompt(name: str, **kwargs) -> tuple[str, str]: + """Returns (system_prompt, user_prompt) with variables filled.""" + p = PROMPTS.get(name) + if not p: + raise ValueError(f"Unknown prompt: {name}") + system = p["system"] + user = p["user_template"].format(**{k: v or "" for k, v in kwargs.items()}) + return system, user diff --git a/salesflow-saas/backend/dealix_gtm_os/ai/response_cache.py b/salesflow-saas/backend/dealix_gtm_os/ai/response_cache.py new file mode 100644 index 00000000..937d088d --- /dev/null +++ b/salesflow-saas/backend/dealix_gtm_os/ai/response_cache.py @@ -0,0 +1,28 @@ +"""In-memory response cache — avoids re-analyzing the same company.""" +import hashlib +import json +import time +from typing import Optional + +_cache: dict[str, dict] = {} + +def _key(agent_name: str, input_data: dict) -> str: + raw = f"{agent_name}:{json.dumps(input_data, sort_keys=True, ensure_ascii=False)}" + return hashlib.sha256(raw.encode()).hexdigest()[:16] + +def get_cached(agent_name: str, input_data: dict, ttl_hours: float = 24) -> Optional[dict]: + k = _key(agent_name, input_data) + entry = _cache.get(k) + if not entry: + return None + if time.time() - entry["ts"] > ttl_hours * 3600: + del _cache[k] + return None + return entry["data"] + +def set_cached(agent_name: str, input_data: dict, result: dict): + k = _key(agent_name, input_data) + _cache[k] = {"data": result, "ts": time.time()} + +def cache_stats() -> dict: + return {"entries": len(_cache), "keys": list(_cache.keys())[:10]} diff --git a/salesflow-saas/backend/dealix_gtm_os/ai/token_counter.py b/salesflow-saas/backend/dealix_gtm_os/ai/token_counter.py new file mode 100644 index 00000000..6bc44100 --- /dev/null +++ b/salesflow-saas/backend/dealix_gtm_os/ai/token_counter.py @@ -0,0 +1,23 @@ +"""Token counter — estimates tokens before sending to avoid waste.""" +import re + +def estimate_tokens(text: str) -> int: + """Rough token estimate: ~4 chars per token for mixed Arabic/English.""" + if not text: + return 0 + words = len(re.findall(r'\S+', text)) + chars = len(text) + return max(words, chars // 4) + +def check_budget(tokens: int, max_tokens: int) -> bool: + """Returns True if within budget.""" + return tokens <= max_tokens + +def truncate_to_budget(text: str, max_tokens: int) -> str: + """Truncates text to fit within token budget.""" + estimated = estimate_tokens(text) + if estimated <= max_tokens: + return text + ratio = max_tokens / estimated + cut_at = int(len(text) * ratio * 0.9) + return text[:cut_at] + "\n[truncated]" diff --git a/salesflow-saas/backend/dealix_gtm_os/config/ai_budget.yaml b/salesflow-saas/backend/dealix_gtm_os/config/ai_budget.yaml new file mode 100644 index 00000000..1851ab5f --- /dev/null +++ b/salesflow-saas/backend/dealix_gtm_os/config/ai_budget.yaml @@ -0,0 +1,71 @@ +models: + cheap: + id: "groq/llama-3.1-8b-instant" + max_tokens: 500 + cost_per_1k_input: 0.0001 + cost_per_1k_output: 0.0002 + use_for: ["csv_cleanup", "classification", "initial_scoring", "reply_classification"] + mid: + id: "groq/llama-3.3-70b-versatile" + max_tokens: 800 + cost_per_1k_input: 0.0006 + cost_per_1k_output: 0.0008 + use_for: ["company_research", "website_summary", "enrichment", "content_draft"] + high: + id: "anthropic/claude-sonnet-4-20250514" + max_tokens: 1200 + cost_per_1k_input: 0.003 + cost_per_1k_output: 0.015 + use_for: ["sales_message", "negotiation", "partnership_strategy", "eval_judge", "complex_analysis"] + +agent_budgets: + company_research_agent: + model_tier: mid + max_output_tokens: 700 + cache_ttl_hours: 168 + scoring_agent: + model_tier: cheap + max_output_tokens: 200 + cache_ttl_hours: 24 + channel_strategy_agent: + model_tier: cheap + max_output_tokens: 300 + cache_ttl_hours: 24 + message_generation_agent: + model_tier: high + max_output_tokens: 500 + cache_ttl_hours: 0 + negotiation_agent: + model_tier: high + max_output_tokens: 800 + cache_ttl_hours: 0 + compliance_agent: + model_tier: cheap + max_output_tokens: 200 + cache_ttl_hours: 720 + partnership_strategist_agent: + model_tier: mid + max_output_tokens: 500 + cache_ttl_hours: 168 + icp_strategist_agent: + model_tier: mid + max_output_tokens: 600 + cache_ttl_hours: 168 + learning_agent: + model_tier: mid + max_output_tokens: 400 + cache_ttl_hours: 0 + content_strategy_agent: + model_tier: mid + max_output_tokens: 400 + cache_ttl_hours: 24 + +daily_budget: + max_cost_sar: 10.0 + max_requests: 500 + alert_at_percent: 80 + +cache: + enabled: true + backend: "memory" + default_ttl_hours: 24 diff --git a/salesflow-saas/backend/dealix_gtm_os/guardrails/__init__.py b/salesflow-saas/backend/dealix_gtm_os/guardrails/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/salesflow-saas/backend/dealix_gtm_os/guardrails/cost_guard.py b/salesflow-saas/backend/dealix_gtm_os/guardrails/cost_guard.py new file mode 100644 index 00000000..557aea65 --- /dev/null +++ b/salesflow-saas/backend/dealix_gtm_os/guardrails/cost_guard.py @@ -0,0 +1,34 @@ +"""Cost guard — prevents runaway AI spending.""" +import yaml +from pathlib import Path + +_config_path = Path(__file__).parent.parent / "config" / "ai_budget.yaml" +_config = {} +if _config_path.exists(): + with open(_config_path) as f: + _config = yaml.safe_load(f) or {} + +class CostGuard: + def __init__(self): + budget = _config.get("daily_budget", {}) + self.max_cost = budget.get("max_cost_sar", 10.0) + self.max_requests = budget.get("max_requests", 500) + self.alert_pct = budget.get("alert_at_percent", 80) + self.total_cost = 0.0 + self.total_requests = 0 + + def check(self) -> dict: + cost_pct = (self.total_cost / self.max_cost * 100) if self.max_cost > 0 else 0 + req_pct = (self.total_requests / self.max_requests * 100) if self.max_requests > 0 else 0 + return { + "allowed": cost_pct < 100 and req_pct < 100, + "cost_sar": round(self.total_cost, 4), + "cost_pct": round(cost_pct, 1), + "requests": self.total_requests, + "requests_pct": round(req_pct, 1), + "alert": cost_pct >= self.alert_pct or req_pct >= self.alert_pct, + } + + def record(self, cost_sar: float): + self.total_cost += cost_sar + self.total_requests += 1 diff --git a/salesflow-saas/backend/dealix_gtm_os/guardrails/output_validator.py b/salesflow-saas/backend/dealix_gtm_os/guardrails/output_validator.py new file mode 100644 index 00000000..778dfe05 --- /dev/null +++ b/salesflow-saas/backend/dealix_gtm_os/guardrails/output_validator.py @@ -0,0 +1,50 @@ +"""Output validator — blocks fake claims, prohibited actions, and hallucinations.""" +import re + +FORBIDDEN_CLAIMS = [ + "مضمون", "guaranteed", "100%", "أفضل في السوق", "بدون منافس", + "SOC 2", "ISO 27001", "bank-grade", "military-grade", "zero risk", + "أمان مطلق", "نتائج مضمونة", "ربح مضمون", "دخل مضمون", +] + +PROHIBITED_ACTIONS = [ + "linkedin_scraping", "linkedin_auto_dm", "whatsapp_cold_blast", + "instagram_mass_dm", "x_auto_mention", "fake_account", + "buy_lead_list", "tiktok_dm_scraping", +] + +def validate_output(text: str, context: str = "") -> dict: + """Validates LLM output for forbidden claims and unsafe content.""" + issues = [] + + for claim in FORBIDDEN_CLAIMS: + if claim.lower() in text.lower(): + issues.append({"type": "forbidden_claim", "claim": claim, "severity": "high"}) + + for action in PROHIBITED_ACTIONS: + if action.lower().replace("_", " ") in text.lower() or action in text.lower(): + issues.append({"type": "prohibited_action", "action": action, "severity": "critical"}) + + if not re.search(r'إيقاف|stop|opt.?out|unsubscribe', text, re.IGNORECASE) and len(text) > 200: + if any(w in context.lower() for w in ["outreach", "message", "email", "رسالة"]): + issues.append({"type": "missing_optout", "severity": "medium"}) + + return { + "valid": len([i for i in issues if i["severity"] in ("high", "critical")]) == 0, + "issues": issues, + "issue_count": len(issues), + } + +def validate_channel_action(channel: str, action: str) -> dict: + """Validates that a channel+action combination is safe.""" + prohibited = { + ("linkedin", "scraping"), ("linkedin", "auto_dm"), ("linkedin", "auto_connect"), + ("whatsapp", "cold_blast"), ("whatsapp", "mass_send"), + ("instagram", "mass_dm"), ("instagram", "scraping"), + ("x", "auto_mention"), ("x", "auto_reply_mass"), + ("tiktok", "dm_scraping"), ("tiktok", "mass_dm"), + } + channel_key = channel.split("_")[0].lower() + if (channel_key, action) in prohibited: + return {"allowed": False, "reason": f"{action} on {channel} is PROHIBITED by platform policy"} + return {"allowed": True, "reason": "Action is within safe boundaries"} diff --git a/salesflow-saas/backend/dealix_gtm_os/observability/__init__.py b/salesflow-saas/backend/dealix_gtm_os/observability/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/salesflow-saas/backend/dealix_gtm_os/observability/trace.py b/salesflow-saas/backend/dealix_gtm_os/observability/trace.py new file mode 100644 index 00000000..866fc6a6 --- /dev/null +++ b/salesflow-saas/backend/dealix_gtm_os/observability/trace.py @@ -0,0 +1,41 @@ +"""Pipeline tracing — tracks cost, latency, decisions per run.""" +import time +import uuid +import json +import logging + +logger = logging.getLogger("dealix.gtm_os.trace") + +class PipelineTrace: + def __init__(self, pipeline_name: str, company: str = ""): + self.trace_id = str(uuid.uuid4())[:8] + self.pipeline = pipeline_name + self.company = company + self.start_time = time.time() + self.steps: list[dict] = [] + self.total_cost = 0.0 + + def log_step(self, agent: str, result_summary: str, cost: float = 0.0, latency_ms: float = 0.0): + step = { + "agent": agent, + "result": result_summary[:200], + "cost_sar": round(cost, 6), + "latency_ms": round(latency_ms, 1), + "timestamp": time.time(), + } + self.steps.append(step) + self.total_cost += cost + + def finish(self) -> dict: + elapsed = time.time() - self.start_time + report = { + "trace_id": self.trace_id, + "pipeline": self.pipeline, + "company": self.company, + "total_time_s": round(elapsed, 2), + "total_cost_sar": round(self.total_cost, 6), + "steps": len(self.steps), + "step_details": self.steps, + } + logger.info(f"[TRACE:{self.trace_id}] {self.pipeline} for {self.company}: {elapsed:.1f}s, {self.total_cost:.4f} SAR, {len(self.steps)} steps") + return report diff --git a/salesflow-saas/backend/tests/evals/gtm_os_eval_set.jsonl b/salesflow-saas/backend/tests/evals/gtm_os_eval_set.jsonl index 989d8a6a..3fd4ce4a 100644 --- a/salesflow-saas/backend/tests/evals/gtm_os_eval_set.jsonl +++ b/salesflow-saas/backend/tests/evals/gtm_os_eval_set.jsonl @@ -8,3 +8,23 @@ {"company": "مركز تدريب", "sector": "training", "city": "الرياض", "expected_opportunity": "direct_customer", "expected_channel": "email", "prohibited": []} {"company": "شركة SaaS", "sector": "saas", "city": "الرياض", "expected_opportunity": "direct_customer", "expected_channel": "email", "prohibited": ["linkedin_scraping"]} {"company": "فريلانسر تسويق", "sector": "agency", "city": "جدة", "expected_opportunity": "agency_partner", "expected_channel": "email", "prohibited": ["whatsapp_cold_blast"]} +{"company": "وكالة دعاية كبيرة", "sector": "agency", "city": "الرياض", "expected_opportunity": "agency_partner", "expected_channel": "email", "prohibited": ["linkedin_scraping"]} +{"company": "شركة تطوير عقاري", "sector": "real_estate", "city": "الدمام", "expected_opportunity": "direct_customer", "expected_channel": "email", "prohibited": []} +{"company": "مركز طبي", "sector": "clinic", "city": "الرياض", "expected_opportunity": "direct_customer", "expected_channel": "whatsapp_warm", "prohibited": ["whatsapp_cold_blast"]} +{"company": "متجر أزياء", "sector": "ecommerce", "city": "جدة", "expected_opportunity": "direct_customer", "expected_channel": "email", "prohibited": ["instagram_mass_dm"]} +{"company": "وكالة تصميم مواقع", "sector": "website_agency", "city": "الرياض", "expected_opportunity": "implementation_partner", "expected_channel": "linkedin_manual", "prohibited": ["linkedin_scraping"]} +{"company": "مكتب محاماة", "sector": "consulting", "city": "الرياض", "expected_opportunity": "referral_partner", "expected_channel": "linkedin_manual", "prohibited": []} +{"company": "شركة صيانة", "sector": "construction", "city": "الخبر", "expected_opportunity": "direct_customer", "expected_channel": "email", "prohibited": []} +{"company": "معهد لغات", "sector": "training", "city": "جدة", "expected_opportunity": "direct_customer", "expected_channel": "email", "prohibited": []} +{"company": "شركة برمجيات", "sector": "saas", "city": "الرياض", "expected_opportunity": "direct_customer", "expected_channel": "email", "prohibited": []} +{"company": "media buyer مستقل", "sector": "agency", "city": "الرياض", "expected_opportunity": "agency_partner", "expected_channel": "email", "prohibited": []} +{"company": "شركة تسويق إلكتروني", "sector": "agency", "city": "الدمام", "expected_opportunity": "agency_partner", "expected_channel": "email", "prohibited": ["linkedin_scraping"]} +{"company": "وكالة سوشال ميديا", "sector": "agency", "city": "جدة", "expected_opportunity": "agency_partner", "expected_channel": "email", "prohibited": []} +{"company": "عيادة أسنان", "sector": "clinic", "city": "الرياض", "expected_opportunity": "direct_customer", "expected_channel": "whatsapp_warm", "prohibited": ["whatsapp_cold_blast"]} +{"company": "متجر إلكترونيات", "sector": "ecommerce", "city": "الخبر", "expected_opportunity": "direct_customer", "expected_channel": "email", "prohibited": []} +{"company": "شركة تطبيقات", "sector": "website_agency", "city": "الرياض", "expected_opportunity": "implementation_partner", "expected_channel": "linkedin_manual", "prohibited": ["linkedin_scraping"]} +{"company": "وكالة PR", "sector": "agency", "city": "الرياض", "expected_opportunity": "agency_partner", "expected_channel": "email", "prohibited": []} +{"company": "مكتب هندسي", "sector": "construction", "city": "الرياض", "expected_opportunity": "direct_customer", "expected_channel": "email", "prohibited": []} +{"company": "شركة توظيف", "sector": "consulting", "city": "جدة", "expected_opportunity": "referral_partner", "expected_channel": "linkedin_manual", "prohibited": []} +{"company": "مطعم سلسلة", "sector": "ecommerce", "city": "الرياض", "expected_opportunity": "direct_customer", "expected_channel": "email", "prohibited": []} +{"company": "وكالة محتوى", "sector": "agency", "city": "الدمام", "expected_opportunity": "agency_partner", "expected_channel": "email", "prohibited": []} diff --git a/salesflow-saas/backend/tests/evals/test_compliance_gate.py b/salesflow-saas/backend/tests/evals/test_compliance_gate.py new file mode 100644 index 00000000..1f6c20e5 --- /dev/null +++ b/salesflow-saas/backend/tests/evals/test_compliance_gate.py @@ -0,0 +1,74 @@ +"""Tests that compliance gate blocks all prohibited actions.""" +import sys, os +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) + +from dealix_gtm_os.guardrails.output_validator import validate_channel_action, validate_output + +def test_prohibited_actions(): + prohibited_cases = [ + ("linkedin", "scraping"), + ("linkedin", "auto_dm"), + ("linkedin", "auto_connect"), + ("whatsapp", "cold_blast"), + ("whatsapp", "mass_send"), + ("instagram", "mass_dm"), + ("instagram", "scraping"), + ("x", "auto_mention"), + ("tiktok", "dm_scraping"), + ("tiktok", "mass_dm"), + ] + passed = 0 + for channel, action in prohibited_cases: + result = validate_channel_action(channel, action) + if not result["allowed"]: + passed += 1 + print(f" ✅ {channel}/{action} → BLOCKED") + else: + print(f" ❌ {channel}/{action} → NOT BLOCKED (FAIL)") + + print(f"\nProhibited actions: {passed}/{len(prohibited_cases)} blocked") + assert passed == len(prohibited_cases), f"Only {passed}/{len(prohibited_cases)} blocked" + +def test_allowed_actions(): + allowed_cases = [ + ("email", "send_message"), + ("linkedin", "research"), + ("whatsapp", "warm_message"), + ("x", "post"), + ] + passed = 0 + for channel, action in allowed_cases: + result = validate_channel_action(channel, action) + if result["allowed"]: + passed += 1 + print(f" ✅ {channel}/{action} → ALLOWED") + else: + print(f" ❌ {channel}/{action} → BLOCKED (FAIL)") + + print(f"\nAllowed actions: {passed}/{len(allowed_cases)} allowed") + assert passed == len(allowed_cases) + +def test_forbidden_claims(): + bad_texts = [ + "نتائج مضمونة 100% لكل العملاء", + "Dealix is SOC 2 compliant and ISO 27001 certified", + "ربح مضمون من أول يوم بدون أي جهد", + ] + for text in bad_texts: + result = validate_output(text) + assert not result["valid"], f"Should have blocked: {text[:30]}..." + print(f" ✅ Blocked: {text[:40]}...") + + good_text = "Dealix يساعد في تحسين متابعة العملاء. نبدأ بـ pilot 499 ريال مع ضمان استرداد." + result = validate_output(good_text) + assert result["valid"], "Should have allowed safe text" + print(f" ✅ Allowed safe text") + +if __name__ == "__main__": + print("=== Prohibited Actions ===") + test_prohibited_actions() + print("\n=== Allowed Actions ===") + test_allowed_actions() + print("\n=== Forbidden Claims ===") + test_forbidden_claims() + print("\n✅ ALL COMPLIANCE TESTS PASSED") diff --git a/salesflow-saas/backend/tests/evals/test_message_quality.py b/salesflow-saas/backend/tests/evals/test_message_quality.py new file mode 100644 index 00000000..9c116a55 --- /dev/null +++ b/salesflow-saas/backend/tests/evals/test_message_quality.py @@ -0,0 +1,48 @@ +"""Tests that generated messages meet quality standards.""" +import asyncio, sys, os +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) + +from dealix_gtm_os.agents.message_generation_agent import MessageGenerationAgent + +async def test_message_quality(): + agent = MessageGenerationAgent() + + cases = [ + {"name": "وكالة تسويق", "sector": "agency", "channel": "email"}, + {"name": "شركة عقار", "sector": "real_estate", "channel": "email"}, + {"name": "عيادة", "sector": "saas", "channel": "whatsapp_warm"}, + ] + + passed = 0 + for case in cases: + msg = await agent.run(case) + issues = [] + + if case["name"] not in msg.get("body", ""): + issues.append("company name not in body") + if "إيقاف" not in msg.get("stop_condition", "") and "إيقاف" not in msg.get("body", ""): + issues.append("no opt-out") + if not msg.get("approval_required"): + issues.append("approval not required") + if not msg.get("follow_up_24h"): + issues.append("no 24h follow-up") + if not msg.get("follow_up_72h"): + issues.append("no 72h follow-up") + if len(msg.get("body", "")) < 50: + issues.append("body too short") + if len(msg.get("body", "").split()) > 300: + issues.append("body too long") + + if issues: + print(f" ❌ {case['name']}: {', '.join(issues)}") + else: + passed += 1 + print(f" ✅ {case['name']}: personalized, opt-out, approval, follow-ups") + + print(f"\nMessage quality: {passed}/{len(cases)} passed") + assert passed == len(cases) + +if __name__ == "__main__": + print("=== Message Quality Tests ===") + asyncio.run(test_message_quality()) + print("\n✅ ALL MESSAGE QUALITY TESTS PASSED")