mirror of
https://github.com/x1xhlol/system-prompts-and-models-of-ai-tools.git
synced 2026-06-18 23:39:34 +00:00
Phase 1-6 implementation for Dealix AI Revenue OS: - AI Arabic Engine: NLP (arabic_nlp.py), lead scoring (lead_scoring.py) - PDPL Compliance: consent manager, data rights handler, consent model - Sequence Engine: multi-channel sequences with WhatsApp/Email/SMS - CPQ System: quote engine, AI proposal generator - Security Gate: pre-release checks, PDPL message validation - Tool Verification: agent action audit trail - Project Operating Files: AGENTS.md, CLAUDE.md - Project Memory: architecture, ADRs, provider routing, PDPL checklist - Design System: IBM Plex Sans Arabic tokens, RTL-safe components - Sequence/Consent models for database https://claude.ai/code/session_01LsnvBa7HwF5hs99VZbgLGj
177 lines
5.9 KiB
Python
177 lines
5.9 KiB
Python
"""
|
|
Tool Verification Layer — Dealix AI Revenue OS
|
|
Records what agents intended, claimed, and actually executed.
|
|
Provides evidence-based audit trail for all AI actions.
|
|
"""
|
|
import logging
|
|
import uuid
|
|
from datetime import datetime, timezone
|
|
from enum import Enum
|
|
from typing import Any, Optional
|
|
|
|
from pydantic import BaseModel, Field
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class VerificationStatus(str, Enum):
|
|
VERIFIED = "verified"
|
|
PARTIALLY_VERIFIED = "partially_verified"
|
|
UNVERIFIED = "unverified"
|
|
CONTRADICTED = "contradicted"
|
|
PENDING = "pending"
|
|
|
|
|
|
class ToolCall(BaseModel):
|
|
request_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
|
agent_id: str
|
|
agent_name: str
|
|
intended_action: str
|
|
intended_params: dict[str, Any] = {}
|
|
claimed_result: Optional[str] = None
|
|
actual_result: Optional[str] = None
|
|
actual_side_effects: list[str] = []
|
|
status: VerificationStatus = VerificationStatus.PENDING
|
|
contradiction_flags: list[str] = []
|
|
started_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
|
|
completed_at: Optional[datetime] = None
|
|
duration_ms: Optional[int] = None
|
|
tenant_id: Optional[str] = None
|
|
metadata: dict[str, Any] = {}
|
|
|
|
|
|
class ToolVerifier:
|
|
"""
|
|
Verification layer between agents and tools.
|
|
Records intent vs claim vs execution evidence.
|
|
"""
|
|
|
|
def __init__(self):
|
|
self._log: list[ToolCall] = []
|
|
self._max_log_size = 10000
|
|
|
|
def start_call(
|
|
self,
|
|
agent_id: str,
|
|
agent_name: str,
|
|
intended_action: str,
|
|
intended_params: dict[str, Any] = None,
|
|
tenant_id: str = None,
|
|
) -> ToolCall:
|
|
call = ToolCall(
|
|
agent_id=agent_id,
|
|
agent_name=agent_name,
|
|
intended_action=intended_action,
|
|
intended_params=intended_params or {},
|
|
tenant_id=tenant_id,
|
|
)
|
|
self._log.append(call)
|
|
if len(self._log) > self._max_log_size:
|
|
self._log = self._log[-self._max_log_size:]
|
|
logger.info(
|
|
f"[ToolVerify] START {call.request_id}: "
|
|
f"agent={agent_name} action={intended_action}"
|
|
)
|
|
return call
|
|
|
|
def record_claim(self, call: ToolCall, claimed_result: str) -> None:
|
|
call.claimed_result = claimed_result
|
|
logger.info(
|
|
f"[ToolVerify] CLAIM {call.request_id}: {claimed_result[:200]}"
|
|
)
|
|
|
|
def record_execution(
|
|
self,
|
|
call: ToolCall,
|
|
actual_result: str,
|
|
side_effects: list[str] = None,
|
|
) -> None:
|
|
call.actual_result = actual_result
|
|
call.actual_side_effects = side_effects or []
|
|
call.completed_at = datetime.now(timezone.utc)
|
|
call.duration_ms = int(
|
|
(call.completed_at - call.started_at).total_seconds() * 1000
|
|
)
|
|
self._verify(call)
|
|
logger.info(
|
|
f"[ToolVerify] EXEC {call.request_id}: "
|
|
f"status={call.status} duration={call.duration_ms}ms"
|
|
)
|
|
|
|
def _verify(self, call: ToolCall) -> None:
|
|
if not call.claimed_result or not call.actual_result:
|
|
call.status = VerificationStatus.UNVERIFIED
|
|
return
|
|
|
|
claimed = call.claimed_result.lower().strip()
|
|
actual = call.actual_result.lower().strip()
|
|
|
|
if "error" in actual and "success" in claimed:
|
|
call.status = VerificationStatus.CONTRADICTED
|
|
call.contradiction_flags.append(
|
|
"Agent claimed success but execution returned error"
|
|
)
|
|
elif "error" in actual:
|
|
call.status = VerificationStatus.PARTIALLY_VERIFIED
|
|
call.contradiction_flags.append("Execution had errors")
|
|
elif actual == claimed or claimed in actual:
|
|
call.status = VerificationStatus.VERIFIED
|
|
else:
|
|
keywords_claimed = set(claimed.split())
|
|
keywords_actual = set(actual.split())
|
|
overlap = len(keywords_claimed & keywords_actual)
|
|
total = len(keywords_claimed)
|
|
if total > 0 and overlap / total > 0.5:
|
|
call.status = VerificationStatus.VERIFIED
|
|
elif total > 0 and overlap / total > 0.2:
|
|
call.status = VerificationStatus.PARTIALLY_VERIFIED
|
|
else:
|
|
call.status = VerificationStatus.UNVERIFIED
|
|
|
|
def get_log(
|
|
self,
|
|
agent_id: str = None,
|
|
status: VerificationStatus = None,
|
|
tenant_id: str = None,
|
|
limit: int = 100,
|
|
) -> list[ToolCall]:
|
|
results = self._log
|
|
if agent_id:
|
|
results = [c for c in results if c.agent_id == agent_id]
|
|
if status:
|
|
results = [c for c in results if c.status == status]
|
|
if tenant_id:
|
|
results = [c for c in results if c.tenant_id == tenant_id]
|
|
return results[-limit:]
|
|
|
|
def get_contradictions(self, tenant_id: str = None) -> list[ToolCall]:
|
|
return self.get_log(
|
|
status=VerificationStatus.CONTRADICTED, tenant_id=tenant_id
|
|
)
|
|
|
|
def get_stats(self, tenant_id: str = None) -> dict[str, Any]:
|
|
calls = self.get_log(tenant_id=tenant_id, limit=10000)
|
|
total = len(calls)
|
|
if total == 0:
|
|
return {"total": 0}
|
|
by_status = {}
|
|
for call in calls:
|
|
by_status[call.status] = by_status.get(call.status, 0) + 1
|
|
durations = [c.duration_ms for c in calls if c.duration_ms]
|
|
avg_duration = sum(durations) / len(durations) if durations else 0
|
|
return {
|
|
"total": total,
|
|
"by_status": by_status,
|
|
"avg_duration_ms": round(avg_duration, 1),
|
|
"contradiction_rate": round(
|
|
by_status.get(VerificationStatus.CONTRADICTED, 0) / total * 100, 2
|
|
),
|
|
"verification_rate": round(
|
|
by_status.get(VerificationStatus.VERIFIED, 0) / total * 100, 2
|
|
),
|
|
}
|
|
|
|
|
|
# Global singleton
|
|
tool_verifier = ToolVerifier()
|