mirror of
https://github.com/x1xhlol/system-prompts-and-models-of-ai-tools.git
synced 2026-06-17 23:09:35 +00:00
Hermes Fusion Supreme integration: - execution_router.py: Agent-level backend routing (Claude/OpenClaude/Goose/Internal) with fallback chains, cost estimation, health tracking - shannon_security.py: Staging-only white-box pentesting lane (auth, injection, tenant isolation, PDPL compliance checks) - self_improvement.py: Bounded inspect→measure→propose→verify→apply cycle (max 5 proposals, max 2 auto-applies for trivial fixes) - observability.py: Cost tracking, performance metrics, health monitoring, Arabic executive summaries, anomaly detection - hermes.py: Full API (execute, profiles, cost, health, improvements, security scans, session restore — 18 endpoints) https://claude.ai/code/session_01LsnvBa7HwF5hs99VZbgLGj
193 lines
7.3 KiB
Python
193 lines
7.3 KiB
Python
"""
|
|
Observability Service — Dealix AI Revenue OS
|
|
Cost tracking, workflow metrics, health monitoring, and Arabic executive summaries.
|
|
"""
|
|
import logging
|
|
from datetime import datetime, timezone, timedelta
|
|
from typing import Optional
|
|
|
|
from pydantic import BaseModel, Field
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class WorkflowMetric(BaseModel):
|
|
workflow_name: str
|
|
profile_id: str
|
|
backend: str
|
|
started_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
|
|
duration_ms: int = 0
|
|
token_count: int = 0
|
|
estimated_cost_usd: float = 0.0
|
|
success: bool = True
|
|
error: Optional[str] = None
|
|
|
|
|
|
class ObservabilityService:
|
|
"""Track cost, performance, and health across all agent workflows."""
|
|
|
|
def __init__(self):
|
|
self._metrics: list[WorkflowMetric] = []
|
|
self._max_metrics = 50000
|
|
|
|
async def record_workflow(self, metric: WorkflowMetric) -> None:
|
|
self._metrics.append(metric)
|
|
if len(self._metrics) > self._max_metrics:
|
|
self._metrics = self._metrics[-self._max_metrics:]
|
|
logger.debug(
|
|
f"Recorded: {metric.workflow_name} "
|
|
f"cost=${metric.estimated_cost_usd:.4f} "
|
|
f"{'OK' if metric.success else 'FAIL'}"
|
|
)
|
|
|
|
def _filter_by_period(
|
|
self, period: str, metrics: list[WorkflowMetric] = None
|
|
) -> list[WorkflowMetric]:
|
|
source = metrics or self._metrics
|
|
now = datetime.now(timezone.utc)
|
|
if period == "hourly":
|
|
cutoff = now - timedelta(hours=1)
|
|
elif period == "daily":
|
|
cutoff = now - timedelta(days=1)
|
|
elif period == "weekly":
|
|
cutoff = now - timedelta(weeks=1)
|
|
elif period == "monthly":
|
|
cutoff = now - timedelta(days=30)
|
|
else:
|
|
cutoff = now - timedelta(days=1)
|
|
return [m for m in source if m.started_at >= cutoff]
|
|
|
|
async def get_cost_report(
|
|
self, period: str = "daily", profile: str = None
|
|
) -> dict:
|
|
filtered = self._filter_by_period(period)
|
|
if profile:
|
|
filtered = [m for m in filtered if m.profile_id == profile]
|
|
|
|
total_cost = sum(m.estimated_cost_usd for m in filtered)
|
|
by_profile: dict[str, float] = {}
|
|
by_backend: dict[str, float] = {}
|
|
by_workflow: dict[str, float] = {}
|
|
|
|
for m in filtered:
|
|
by_profile[m.profile_id] = by_profile.get(m.profile_id, 0) + m.estimated_cost_usd
|
|
by_backend[m.backend] = by_backend.get(m.backend, 0) + m.estimated_cost_usd
|
|
by_workflow[m.workflow_name] = by_workflow.get(m.workflow_name, 0) + m.estimated_cost_usd
|
|
|
|
top_expensive = sorted(by_workflow.items(), key=lambda x: x[1], reverse=True)[:5]
|
|
|
|
return {
|
|
"period": period,
|
|
"total_cost_usd": round(total_cost, 4),
|
|
"total_workflows": len(filtered),
|
|
"by_profile": {k: round(v, 4) for k, v in by_profile.items()},
|
|
"by_backend": {k: round(v, 4) for k, v in by_backend.items()},
|
|
"top_expensive": [{"name": k, "cost": round(v, 4)} for k, v in top_expensive],
|
|
}
|
|
|
|
async def get_performance_report(self, period: str = "daily") -> dict:
|
|
filtered = self._filter_by_period(period)
|
|
if not filtered:
|
|
return {"period": period, "total": 0}
|
|
|
|
durations = [m.duration_ms for m in filtered]
|
|
durations.sort()
|
|
total = len(durations)
|
|
success_count = sum(1 for m in filtered if m.success)
|
|
|
|
p95_idx = min(int(total * 0.95), total - 1)
|
|
errors = [m for m in filtered if not m.success]
|
|
|
|
return {
|
|
"period": period,
|
|
"total_workflows": total,
|
|
"success_rate": round(success_count / total * 100, 1) if total else 0,
|
|
"avg_duration_ms": round(sum(durations) / total) if total else 0,
|
|
"p95_duration_ms": durations[p95_idx] if durations else 0,
|
|
"error_count": len(errors),
|
|
"error_rate": round(len(errors) / total * 100, 1) if total else 0,
|
|
"recent_errors": [
|
|
{"workflow": e.workflow_name, "error": e.error, "at": e.started_at.isoformat()}
|
|
for e in errors[-5:]
|
|
],
|
|
}
|
|
|
|
async def get_health_report(self) -> dict:
|
|
daily = self._filter_by_period("daily")
|
|
total = len(daily)
|
|
success = sum(1 for m in daily if m.success)
|
|
|
|
backends_used = set(m.backend for m in daily)
|
|
backend_health = {}
|
|
for b in backends_used:
|
|
b_metrics = [m for m in daily if m.backend == b]
|
|
b_success = sum(1 for m in b_metrics if m.success)
|
|
backend_health[b] = {
|
|
"total": len(b_metrics),
|
|
"success_rate": round(b_success / len(b_metrics) * 100, 1) if b_metrics else 0,
|
|
"avg_duration_ms": round(
|
|
sum(m.duration_ms for m in b_metrics) / len(b_metrics)
|
|
) if b_metrics else 0,
|
|
}
|
|
|
|
return {
|
|
"overall_health": "healthy" if (total == 0 or success / total > 0.9) else "degraded",
|
|
"workflows_today": total,
|
|
"success_rate": round(success / total * 100, 1) if total else 100,
|
|
"total_cost_today_usd": round(sum(m.estimated_cost_usd for m in daily), 4),
|
|
"backends": backend_health,
|
|
}
|
|
|
|
async def get_executive_summary(self, period: str = "weekly") -> str:
|
|
filtered = self._filter_by_period(period)
|
|
total = len(filtered)
|
|
success = sum(1 for m in filtered if m.success)
|
|
cost = sum(m.estimated_cost_usd for m in filtered)
|
|
success_rate = round(success / total * 100) if total else 100
|
|
|
|
period_ar = {"daily": "اليوم", "weekly": "هذا الأسبوع", "monthly": "هذا الشهر"}.get(period, period)
|
|
|
|
summary = (
|
|
f"📊 ملخص {period_ar}:\n"
|
|
f"• {total} مهمة منفذة\n"
|
|
f"• {success_rate}% نسبة النجاح\n"
|
|
f"• ${cost:.2f} التكلفة الإجمالية\n"
|
|
)
|
|
|
|
errors = [m for m in filtered if not m.success]
|
|
if errors:
|
|
summary += f"• {len(errors)} خطأ يحتاج مراجعة\n"
|
|
else:
|
|
summary += "• لا أخطاء حرجة ✅\n"
|
|
|
|
return summary
|
|
|
|
async def detect_anomalies(self) -> list[dict]:
|
|
anomalies = []
|
|
hourly = self._filter_by_period("hourly")
|
|
daily = self._filter_by_period("daily")
|
|
|
|
if hourly:
|
|
hourly_cost = sum(m.estimated_cost_usd for m in hourly)
|
|
if hourly_cost > 5.0:
|
|
anomalies.append({
|
|
"type": "cost_spike",
|
|
"severity": "high",
|
|
"message": f"تكلفة الساعة الأخيرة ${hourly_cost:.2f} — أعلى من الحد الطبيعي",
|
|
"value": hourly_cost,
|
|
})
|
|
|
|
hourly_errors = sum(1 for m in hourly if not m.success)
|
|
if len(hourly) > 5 and hourly_errors / len(hourly) > 0.3:
|
|
anomalies.append({
|
|
"type": "error_spike",
|
|
"severity": "critical",
|
|
"message": f"معدل أخطاء مرتفع: {hourly_errors}/{len(hourly)} في الساعة الأخيرة",
|
|
"value": hourly_errors,
|
|
})
|
|
|
|
return anomalies
|
|
|
|
|
|
observability = ObservabilityService()
|