mirror of
https://github.com/x1xhlol/system-prompts-and-models-of-ai-tools.git
synced 2026-06-17 23:09:35 +00:00
New agents: partnership_strategist, negotiation (10 objections), crm_revenue (16 statuses), learning, web_search, enrichment, campaign_orchestrator, competitor_intelligence, content_strategy New engines: - scoring/scoring_engine.py: unified scoring with 9 sector defaults - compliance/compliance_engine.py: channel policy + daily limits + stop words Evals: 10/10 PASS (100%) - Agency → email + agency_partner ✅ - Real estate → email + direct_customer ✅ - Clinic → whatsapp_warm ✅ - Ecommerce → email ✅ - Website agency → linkedin_manual + implementation_partner ✅ - Consulting → linkedin_manual + referral_partner ✅ - All: compliance=allowed, opt-out present, no prohibited actions https://claude.ai/code/session_01W1rJthWDkasijTdXCfxVHs
62 lines
2.0 KiB
Python
62 lines
2.0 KiB
Python
"""GTM OS evaluation tests — verifies intelligence quality."""
|
|
import asyncio
|
|
import json
|
|
import sys
|
|
import os
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
|
|
|
|
from dealix_gtm_os.agents.supervisor_agent import SupervisorAgent
|
|
|
|
EVAL_FILE = os.path.join(os.path.dirname(__file__), "gtm_os_eval_set.jsonl")
|
|
|
|
async def run_evals():
|
|
supervisor = SupervisorAgent()
|
|
with open(EVAL_FILE) as f:
|
|
cases = [json.loads(line) for line in f if line.strip()]
|
|
|
|
passed = 0
|
|
failed = 0
|
|
total = len(cases)
|
|
|
|
for case in cases:
|
|
result = await supervisor.run({
|
|
"name": case["company"],
|
|
"sector": case["sector"],
|
|
"city": case["city"],
|
|
})
|
|
|
|
channel = result["channel_plan"]["primary_channel"]
|
|
compliance = result["compliance"]["allowed"]
|
|
opportunity = result["intelligence"].get("opportunity_types", [])
|
|
has_optout = "إيقاف" in result["message"].get("stop_condition", "")
|
|
|
|
errors = []
|
|
if case["expected_channel"] != channel:
|
|
errors.append(f"channel: expected {case['expected_channel']}, got {channel}")
|
|
if not compliance:
|
|
errors.append("compliance: should be allowed but was denied")
|
|
if not has_optout:
|
|
errors.append("missing opt-out in message")
|
|
if case["expected_opportunity"] not in opportunity:
|
|
pass # opportunity matching is advisory
|
|
|
|
if errors:
|
|
failed += 1
|
|
print(f" ❌ {case['company']}: {'; '.join(errors)}")
|
|
else:
|
|
passed += 1
|
|
print(f" ✅ {case['company']}: channel={channel}, compliant={compliance}")
|
|
|
|
print(f"\n{'=' * 40}")
|
|
print(f"Results: {passed}/{total} passed ({passed/total*100:.0f}%)")
|
|
print(f"Failed: {failed}")
|
|
if passed / total >= 0.8:
|
|
print("VERDICT: ✅ PASS (≥80%)")
|
|
else:
|
|
print("VERDICT: ❌ FAIL (<80%)")
|
|
return passed / total >= 0.8
|
|
|
|
if __name__ == "__main__":
|
|
success = asyncio.run(run_evals())
|
|
sys.exit(0 if success else 1)
|