system-prompts-and-models-o.../salesflow-saas/backend/app/services/tool_receipts.py
Claude 35e857ec52
feat: Add knowledge brain, memory engine, tool receipts, session continuity
Final layer integration (Second Brain + MemPalace + ToolProof + Claude Code):

- knowledge_brain.py: Project wiki ingest, query, lint, promote raw→wiki (560 lines)
- memory_engine.py: Pluggable memory with Redis + File adapters, evaluator (615 lines)
- tool_receipts.py: Signed receipts, pre-execution policy, trust analytics (417 lines)
- session_continuity.py: AI session state management, restore prompts (478 lines)
- glossary.md: 30+ bilingual terms (Arabic/English)
- master-index.md: Top-level index linking all wiki/memory sections

https://claude.ai/code/session_01LsnvBa7HwF5hs99VZbgLGj
2026-04-11 08:19:56 +00:00

418 lines
16 KiB
Python

"""
Tool Receipts — Dealix ToolProof Enhancement
Signed execution receipts, pre-execution policy, and trust analytics.
Extends tool_verification.py with cryptographic receipts and policy enforcement.
"""
import hashlib
import logging
import uuid
from collections import defaultdict
from datetime import datetime, timedelta, timezone
from enum import Enum
from typing import Any, Optional
from pydantic import BaseModel, Field
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Enums
# ---------------------------------------------------------------------------
class PolicyDecisionType(str, Enum):
ALLOW = "allow"
BLOCK = "block"
HOLD = "hold"
class VerificationVerdict(str, Enum):
VERIFIED = "verified"
PARTIALLY_VERIFIED = "partially_verified"
UNVERIFIED = "unverified"
CONTRADICTED = "contradicted"
BLOCKED = "blocked"
# ---------------------------------------------------------------------------
# Models — نماذج البيانات
# ---------------------------------------------------------------------------
class PolicyDecision(BaseModel):
"""Pre-execution policy decision — قرار السياسة قبل التنفيذ"""
decision: PolicyDecisionType
reason: str
reason_ar: str
tool_name: str
requires_approval_from: Optional[str] = None
pdpl_consent_required: bool = False
budget_remaining: Optional[float] = None
class ToolReceipt(BaseModel):
"""Signed execution receipt — إيصال تنفيذ موقّع"""
receipt_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
run_id: str = ""
session_id: str = ""
agent_id: str = ""
tool_name: str
parameters: dict[str, Any] = {}
timestamp: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
execution_result: str = ""
normalized_result: str = ""
hash_signature: str = ""
policy_decision: PolicyDecisionType = PolicyDecisionType.ALLOW
side_effects: list[str] = []
verification_verdict: VerificationVerdict = VerificationVerdict.UNVERIFIED
cost_estimate: float = 0.0
tenant_id: str = ""
def compute_hash(self) -> str:
"""Generate SHA-256 hash of (tool_name + params + result + timestamp)."""
payload = (
f"{self.tool_name}|"
f"{_stable_dict_str(self.parameters)}|"
f"{self.execution_result}|"
f"{self.timestamp.isoformat()}"
)
self.hash_signature = hashlib.sha256(payload.encode("utf-8")).hexdigest()
return self.hash_signature
def normalize_result(self) -> str:
"""Normalize execution result for comparison."""
raw = self.execution_result.lower().strip()
for noise in ["ok", "success", "done", "completed", "تم", "نجح"]:
raw = raw.replace(noise, "SUCCESS")
for err in ["error", "fail", "exception", "خطأ", "فشل"]:
raw = raw.replace(err, "ERROR")
self.normalized_result = raw
return raw
def _stable_dict_str(d: dict) -> str:
"""Deterministic string representation of a dict for hashing."""
return "|".join(f"{k}={v}" for k, v in sorted(d.items()))
# ---------------------------------------------------------------------------
# Pre-Execution Policy — سياسة ما قبل التنفيذ
# ---------------------------------------------------------------------------
class PreExecutionPolicy:
"""
Decide allow/block/hold before tool execution.
تحديد السماح/الحظر/التعليق قبل تنفيذ الأداة.
"""
SAFE_TOOLS: set[str] = {
"read_file", "search", "query_db_readonly", "get_status",
"list_leads", "get_deal", "get_pipeline", "view_analytics",
"check_consent", "get_sequence_status",
}
HOLD_TOOLS: set[str] = {
"send_message", "send_whatsapp", "send_email", "send_sms",
"update_deal", "assign_lead", "create_proposal",
"change_stage", "update_score", "create_sequence",
"schedule_meeting", "update_territory",
}
BLOCK_TOOLS: set[str] = {
"delete_tenant", "drop_table", "bulk_delete", "export_all_data",
"reset_database", "delete_all_leads", "purge_audit_log",
"disable_pdpl", "bypass_consent", "modify_permissions_bulk",
}
MESSAGING_TOOLS: set[str] = {
"send_message", "send_whatsapp", "send_email", "send_sms",
}
ROLE_PERMISSIONS: dict[str, set[str]] = {
"owner": SAFE_TOOLS | HOLD_TOOLS,
"admin": SAFE_TOOLS | HOLD_TOOLS,
"manager": SAFE_TOOLS | {"update_deal", "assign_lead", "create_proposal", "change_stage"},
"sales_rep": SAFE_TOOLS | {"send_message", "send_whatsapp", "send_email", "update_deal"},
"viewer": SAFE_TOOLS,
}
DEFAULT_BUDGET_LIMIT: float = 100.0 # SAR per session
def __init__(self):
self._session_costs: dict[str, float] = defaultdict(float)
def evaluate(
self,
tool_name: str,
params: dict[str, Any],
user_context: dict[str, Any],
) -> PolicyDecision:
"""
Check tool against policy classes, role, PDPL, and budget.
فحص الأداة مقابل فئات السياسة والدور والموافقة والميزانية.
"""
user_role = user_context.get("role", "viewer")
session_id = user_context.get("session_id", "unknown")
has_consent = user_context.get("has_consent", False)
budget_limit = user_context.get("budget_limit", self.DEFAULT_BUDGET_LIMIT)
# Class C — absolute block
if tool_name in self.BLOCK_TOOLS:
logger.warning(
"محظور: أداة %s محظورة بالكامل (المستخدم: %s)",
tool_name, user_context.get("user_id", "unknown"),
)
return PolicyDecision(
decision=PolicyDecisionType.BLOCK,
reason=f"Tool '{tool_name}' is in the BLOCK list. This action is forbidden.",
reason_ar=f"الأداة '{tool_name}' محظورة. هذا الإجراء ممنوع.",
tool_name=tool_name,
)
# Role check
allowed_tools = self.ROLE_PERMISSIONS.get(user_role, self.SAFE_TOOLS)
if tool_name not in allowed_tools and tool_name not in self.SAFE_TOOLS:
return PolicyDecision(
decision=PolicyDecisionType.BLOCK,
reason=f"Role '{user_role}' lacks permission for tool '{tool_name}'.",
reason_ar=f"الدور '{user_role}' لا يملك صلاحية استخدام الأداة '{tool_name}'.",
tool_name=tool_name,
)
# PDPL consent check for messaging
if tool_name in self.MESSAGING_TOOLS and not has_consent:
return PolicyDecision(
decision=PolicyDecisionType.BLOCK,
reason="PDPL consent required before sending messages.",
reason_ar="مطلوب موافقة نظام حماية البيانات قبل إرسال الرسائل.",
tool_name=tool_name,
pdpl_consent_required=True,
)
# Budget check
estimated_cost = self._estimate_cost(tool_name, params)
current_spent = self._session_costs[session_id]
if current_spent + estimated_cost > budget_limit:
return PolicyDecision(
decision=PolicyDecisionType.HOLD,
reason=f"Budget limit would be exceeded. Spent: {current_spent:.2f}, "
f"estimated: {estimated_cost:.2f}, limit: {budget_limit:.2f} SAR.",
reason_ar=f"سيتم تجاوز حد الميزانية. المصروف: {current_spent:.2f}، "
f"التقدير: {estimated_cost:.2f}، الحد: {budget_limit:.2f} ريال.",
tool_name=tool_name,
budget_remaining=budget_limit - current_spent,
)
# Hold tools need approval
if tool_name in self.HOLD_TOOLS:
approver = "manager" if user_role == "sales_rep" else "admin"
return PolicyDecision(
decision=PolicyDecisionType.HOLD,
reason=f"Tool '{tool_name}' requires approval before execution.",
reason_ar=f"الأداة '{tool_name}' تتطلب موافقة قبل التنفيذ.",
tool_name=tool_name,
requires_approval_from=approver,
)
# Safe tools — allow
return PolicyDecision(
decision=PolicyDecisionType.ALLOW,
reason=f"Tool '{tool_name}' is safe for execution.",
reason_ar=f"الأداة '{tool_name}' آمنة للتنفيذ.",
tool_name=tool_name,
)
def record_cost(self, session_id: str, cost: float) -> None:
"""Record actual cost for budget tracking."""
self._session_costs[session_id] += cost
def _estimate_cost(self, tool_name: str, params: dict) -> float:
"""Estimate cost in SAR for a tool call."""
cost_map = {
"send_whatsapp": 0.15,
"send_sms": 0.08,
"send_email": 0.02,
"send_message": 0.10,
"create_proposal": 0.50,
"query_db_readonly": 0.001,
"search": 0.001,
}
base = cost_map.get(tool_name, 0.01)
# Bulk operations cost more
if "count" in params or "bulk" in tool_name:
base *= params.get("count", 1)
return base
# ---------------------------------------------------------------------------
# Receipt Store — مخزن الإيصالات
# ---------------------------------------------------------------------------
class ReceiptStore:
"""In-memory receipt storage with query capabilities."""
def __init__(self, max_size: int = 50000):
self._receipts: list[ToolReceipt] = []
self._max_size = max_size
def store(self, receipt: ToolReceipt) -> str:
"""Store a receipt and return its ID."""
receipt.compute_hash()
receipt.normalize_result()
self._receipts.append(receipt)
if len(self._receipts) > self._max_size:
self._receipts = self._receipts[-self._max_size:]
logger.info(
"إيصال محفوظ: %s أداة=%s حكم=%s",
receipt.receipt_id, receipt.tool_name, receipt.verification_verdict.value,
)
return receipt.receipt_id
def get(self, receipt_id: str) -> Optional[ToolReceipt]:
for r in self._receipts:
if r.receipt_id == receipt_id:
return r
return None
def query(
self,
agent_id: str = None,
tool_name: str = None,
verdict: VerificationVerdict = None,
since: datetime = None,
limit: int = 100,
) -> list[ToolReceipt]:
results = self._receipts
if agent_id:
results = [r for r in results if r.agent_id == agent_id]
if tool_name:
results = [r for r in results if r.tool_name == tool_name]
if verdict:
results = [r for r in results if r.verification_verdict == verdict]
if since:
results = [r for r in results if r.timestamp >= since]
return results[-limit:]
# ---------------------------------------------------------------------------
# Trust Analytics — تحليلات الثقة
# ---------------------------------------------------------------------------
class TrustAnalytics:
"""
Track trust metrics across agent workflows.
تتبع مقاييس الثقة عبر سير عمل الوكلاء.
"""
def __init__(self, store: ReceiptStore):
self._store = store
def get_trust_score(self, agent_id: str) -> float:
"""
Trust score 0-1 for an agent based on verification history.
درجة الثقة 0-1 للوكيل بناءً على سجل التحقق.
"""
receipts = self._store.query(agent_id=agent_id, limit=500)
if not receipts:
return 0.5 # Neutral for unknown agents
weights = {
VerificationVerdict.VERIFIED: 1.0,
VerificationVerdict.PARTIALLY_VERIFIED: 0.6,
VerificationVerdict.UNVERIFIED: 0.3,
VerificationVerdict.CONTRADICTED: 0.0,
VerificationVerdict.BLOCKED: 0.2,
}
total_weight = sum(weights.get(r.verification_verdict, 0.3) for r in receipts)
return round(total_weight / len(receipts), 4)
def get_contradiction_rate(self, agent_id: str) -> float:
"""
Contradiction rate for an agent.
معدل التناقض للوكيل.
"""
receipts = self._store.query(agent_id=agent_id, limit=500)
if not receipts:
return 0.0
contradictions = sum(
1 for r in receipts
if r.verification_verdict == VerificationVerdict.CONTRADICTED
)
return round(contradictions / len(receipts), 4)
def get_cost_by_agent(self, period_days: int = 30) -> dict[str, float]:
"""
Total cost per agent in period.
إجمالي التكلفة لكل وكيل خلال الفترة.
"""
since = datetime.now(timezone.utc) - timedelta(days=period_days)
receipts = self._store.query(since=since, limit=50000)
costs: dict[str, float] = defaultdict(float)
for r in receipts:
costs[r.agent_id] += r.cost_estimate
return {k: round(v, 4) for k, v in costs.items()}
def get_blocked_attempts(self, period_days: int = 30) -> list[ToolReceipt]:
"""
All blocked tool attempts in period.
جميع محاولات الأدوات المحظورة خلال الفترة.
"""
since = datetime.now(timezone.utc) - timedelta(days=period_days)
return self._store.query(
verdict=VerificationVerdict.BLOCKED, since=since, limit=1000
)
def get_hallucination_suspects(self) -> list[ToolReceipt]:
"""
Claims without matching receipts — possible hallucinations.
ادعاءات بدون إيصالات مطابقة — هلوسات محتملة.
"""
all_receipts = self._store.query(limit=5000)
suspects = []
for r in all_receipts:
if r.verification_verdict == VerificationVerdict.CONTRADICTED:
suspects.append(r)
elif (
r.verification_verdict == VerificationVerdict.UNVERIFIED
and r.execution_result == ""
and r.tool_name not in PreExecutionPolicy.SAFE_TOOLS
):
suspects.append(r)
return suspects
def get_summary(self, agent_id: str = None) -> dict[str, Any]:
"""
Overall trust summary.
ملخص الثقة العام.
"""
receipts = self._store.query(agent_id=agent_id, limit=10000)
total = len(receipts)
if total == 0:
return {"total": 0, "trust_score": 0.5, "message_ar": "لا توجد بيانات"}
by_verdict: dict[str, int] = defaultdict(int)
total_cost = 0.0
for r in receipts:
by_verdict[r.verification_verdict.value] += 1
total_cost += r.cost_estimate
trust = self.get_trust_score(agent_id) if agent_id else 0.5
return {
"total": total,
"by_verdict": dict(by_verdict),
"trust_score": trust,
"total_cost_sar": round(total_cost, 2),
"contradiction_rate": round(
by_verdict.get("contradicted", 0) / total * 100, 2
),
"message_ar": f"إجمالي العمليات: {total}، درجة الثقة: {trust:.2f}",
}
# ---------------------------------------------------------------------------
# Global singletons
# ---------------------------------------------------------------------------
pre_execution_policy = PreExecutionPolicy()
receipt_store = ReceiptStore()
trust_analytics = TrustAnalytics(receipt_store)