mirror of
https://github.com/x1xhlol/system-prompts-and-models-of-ai-tools.git
synced 2026-06-19 07:49:34 +00:00
AI Layer: - llm_router.py: routes cheap/mid/high models, enforces daily budget, caches - token_counter.py: estimates tokens, truncates to budget - response_cache.py: in-memory cache with TTL per agent - prompt_registry.py: versioned prompts with stable prefix for caching - ai_budget.yaml: model costs, agent budgets, daily limits (10 SAR/day) Guardrails: - output_validator.py: blocks fake claims + prohibited actions - cost_guard.py: prevents runaway spending Observability: - trace.py: trace_id, cost, latency, steps per pipeline run Tests: ALL PASS - 30/30 evals (100%) — 9 sectors, 30 companies - 10/10 prohibited actions blocked - 4/4 allowed actions verified - 3/3 forbidden claims blocked - 3/3 message quality checks passed https://claude.ai/code/session_01W1rJthWDkasijTdXCfxVHs
51 lines
2.3 KiB
Python
51 lines
2.3 KiB
Python
"""Output validator — blocks fake claims, prohibited actions, and hallucinations."""
|
|
import re
|
|
|
|
FORBIDDEN_CLAIMS = [
|
|
"مضمون", "guaranteed", "100%", "أفضل في السوق", "بدون منافس",
|
|
"SOC 2", "ISO 27001", "bank-grade", "military-grade", "zero risk",
|
|
"أمان مطلق", "نتائج مضمونة", "ربح مضمون", "دخل مضمون",
|
|
]
|
|
|
|
PROHIBITED_ACTIONS = [
|
|
"linkedin_scraping", "linkedin_auto_dm", "whatsapp_cold_blast",
|
|
"instagram_mass_dm", "x_auto_mention", "fake_account",
|
|
"buy_lead_list", "tiktok_dm_scraping",
|
|
]
|
|
|
|
def validate_output(text: str, context: str = "") -> dict:
|
|
"""Validates LLM output for forbidden claims and unsafe content."""
|
|
issues = []
|
|
|
|
for claim in FORBIDDEN_CLAIMS:
|
|
if claim.lower() in text.lower():
|
|
issues.append({"type": "forbidden_claim", "claim": claim, "severity": "high"})
|
|
|
|
for action in PROHIBITED_ACTIONS:
|
|
if action.lower().replace("_", " ") in text.lower() or action in text.lower():
|
|
issues.append({"type": "prohibited_action", "action": action, "severity": "critical"})
|
|
|
|
if not re.search(r'إيقاف|stop|opt.?out|unsubscribe', text, re.IGNORECASE) and len(text) > 200:
|
|
if any(w in context.lower() for w in ["outreach", "message", "email", "رسالة"]):
|
|
issues.append({"type": "missing_optout", "severity": "medium"})
|
|
|
|
return {
|
|
"valid": len([i for i in issues if i["severity"] in ("high", "critical")]) == 0,
|
|
"issues": issues,
|
|
"issue_count": len(issues),
|
|
}
|
|
|
|
def validate_channel_action(channel: str, action: str) -> dict:
|
|
"""Validates that a channel+action combination is safe."""
|
|
prohibited = {
|
|
("linkedin", "scraping"), ("linkedin", "auto_dm"), ("linkedin", "auto_connect"),
|
|
("whatsapp", "cold_blast"), ("whatsapp", "mass_send"),
|
|
("instagram", "mass_dm"), ("instagram", "scraping"),
|
|
("x", "auto_mention"), ("x", "auto_reply_mass"),
|
|
("tiktok", "dm_scraping"), ("tiktok", "mass_dm"),
|
|
}
|
|
channel_key = channel.split("_")[0].lower()
|
|
if (channel_key, action) in prohibited:
|
|
return {"allowed": False, "reason": f"{action} on {channel} is PROHIBITED by platform policy"}
|
|
return {"allowed": True, "reason": "Action is within safe boundaries"}
|