mirror of
https://github.com/x1xhlol/system-prompts-and-models-of-ai-tools.git
synced 2026-06-17 23:09:35 +00:00
256 lines
8.5 KiB
Python
256 lines
8.5 KiB
Python
"""
|
|
Approval SLA: auto-escalation metadata on pending rows + breach alerts (webhook / Slack).
|
|
|
|
Persists escalation state under ApprovalRequest.payload["_dealix_sla"].
|
|
Aggregated breach notifications use a per-tenant cooldown to avoid spam.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
from collections import Counter
|
|
from datetime import datetime, timedelta, timezone
|
|
from typing import Any, Dict, List, Optional
|
|
from uuid import UUID
|
|
|
|
import httpx
|
|
from sqlalchemy import select
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
from sqlalchemy.orm.attributes import flag_modified
|
|
|
|
from app.config import get_settings
|
|
from app.models.operations import ApprovalRequest
|
|
from app.services.operations_hub import emit_domain_event
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
SLA_KEY = "_dealix_sla"
|
|
|
|
# tenant_id -> last aggregate breach alert (UTC)
|
|
_last_aggregate_breach_alert: Dict[str, datetime] = {}
|
|
|
|
|
|
def _hours_between(now: datetime, then: Optional[datetime]) -> float:
|
|
if not then:
|
|
return 0.0
|
|
return max(0.0, (now - then).total_seconds() / 3600.0)
|
|
|
|
|
|
def _escalation_level(age_h: float, warn_h: int, breach_h: int, l3_mult: float) -> int:
|
|
if age_h < warn_h:
|
|
return 0
|
|
if age_h < breach_h:
|
|
return 1
|
|
breach_m = max(float(breach_h), float(warn_h))
|
|
if age_h < breach_m * max(l3_mult, 1.01):
|
|
return 2
|
|
return 3
|
|
|
|
|
|
def _level_label_ar(level: int) -> str:
|
|
return {
|
|
0: "ضمن المهلة",
|
|
1: "تحذير — يقترب من تجاوز SLA",
|
|
2: "تجاوز SLA — يتطلب اهتماماً فورياً",
|
|
3: "تصعيد حرج — تدخل المالك/الإدارة",
|
|
}.get(level, "غير معروف")
|
|
|
|
|
|
async def refresh_pending_escalations(db: AsyncSession, tenant_id: UUID) -> Dict[str, Any]:
|
|
"""
|
|
Update _dealix_sla on each pending approval; emit domain events when level increases.
|
|
"""
|
|
s = get_settings()
|
|
now = datetime.now(timezone.utc)
|
|
warn_h = max(1, int(s.OPENCLAW_APPROVAL_SLA_HOURS_WARN))
|
|
breach_h = max(warn_h, int(s.OPENCLAW_APPROVAL_SLA_HOURS_BREACH))
|
|
l3_mult = max(1.01, float(s.OPENCLAW_APPROVAL_ESCALATION_L3_MULTIPLIER))
|
|
|
|
q = await db.execute(
|
|
select(ApprovalRequest).where(
|
|
ApprovalRequest.tenant_id == tenant_id,
|
|
ApprovalRequest.status == "pending",
|
|
)
|
|
)
|
|
rows: List[ApprovalRequest] = list(q.scalars().all())
|
|
counts: Counter[int] = Counter()
|
|
bumped = 0
|
|
|
|
for row in rows:
|
|
age_h = _hours_between(now, row.created_at)
|
|
level = _escalation_level(age_h, warn_h, breach_h, l3_mult)
|
|
counts[level] += 1
|
|
|
|
base = dict(row.payload) if isinstance(row.payload, dict) else {}
|
|
prev = base.get(SLA_KEY) if isinstance(base.get(SLA_KEY), dict) else {}
|
|
prev_level = int(prev.get("escalation_level", 0) or 0)
|
|
|
|
sla_block = {
|
|
**prev,
|
|
"escalation_level": level,
|
|
"escalation_label_ar": _level_label_ar(level),
|
|
"age_hours": round(age_h, 2),
|
|
"warn_threshold_hours": warn_h,
|
|
"breach_threshold_hours": breach_h,
|
|
"updated_at": now.isoformat(),
|
|
}
|
|
|
|
if level != prev_level:
|
|
sla_block["escalation_changed_at"] = now.isoformat()
|
|
|
|
base[SLA_KEY] = sla_block
|
|
row.payload = base
|
|
flag_modified(row, "payload")
|
|
|
|
if level > prev_level:
|
|
bumped += 1
|
|
await emit_domain_event(
|
|
db,
|
|
tenant_id=tenant_id,
|
|
event_type="approval.sla_escalated",
|
|
payload={
|
|
"approval_id": str(row.id),
|
|
"from_level": prev_level,
|
|
"to_level": level,
|
|
"age_hours": round(age_h, 2),
|
|
},
|
|
source="sla_escalation",
|
|
)
|
|
|
|
by_level = {str(k): int(counts.get(k, 0)) for k in range(4)}
|
|
return {
|
|
"pending_escalation_total": len(rows),
|
|
"by_level": by_level,
|
|
"events_emitted": bumped,
|
|
}
|
|
|
|
|
|
async def maybe_dispatch_sla_breach_alerts(
|
|
db: AsyncSession,
|
|
tenant_id: UUID,
|
|
*,
|
|
tenant_id_str: str,
|
|
metrics: Dict[str, Any],
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
If breach count > 0 and alerts enabled, POST to webhook and/or Slack (respecting cooldown).
|
|
"""
|
|
s = get_settings()
|
|
out: Dict[str, Any] = {
|
|
"attempted": False,
|
|
"skipped_reason": None,
|
|
"webhook_ok": None,
|
|
"slack_ok": None,
|
|
"cooldown_minutes": int(s.OPENCLAW_SLA_ALERT_COOLDOWN_MINUTES),
|
|
}
|
|
|
|
if not s.OPENCLAW_SLA_ALERTS_ENABLED:
|
|
out["skipped_reason"] = "alerts_disabled"
|
|
return out
|
|
|
|
breach_n = int(metrics.get("pending_breach_count") or 0)
|
|
if breach_n <= 0:
|
|
out["skipped_reason"] = "no_breach"
|
|
return out
|
|
|
|
webhook_url = (s.OPENCLAW_SLA_WEBHOOK_URL or "").strip()
|
|
slack_url = (s.OPENCLAW_SLA_SLACK_WEBHOOK_URL or "").strip()
|
|
if not webhook_url and not slack_url:
|
|
out["skipped_reason"] = "no_webhook_configured"
|
|
return out
|
|
|
|
now = datetime.now(timezone.utc)
|
|
cool = timedelta(minutes=max(5, int(s.OPENCLAW_SLA_ALERT_COOLDOWN_MINUTES)))
|
|
last = _last_aggregate_breach_alert.get(tenant_id_str)
|
|
if last and (now - last) < cool:
|
|
out["skipped_reason"] = "cooldown"
|
|
out["next_eligible_at"] = (last + cool).isoformat()
|
|
return out
|
|
|
|
payload = {
|
|
"event": "approval_sla.breach",
|
|
"tenant_id": tenant_id_str,
|
|
"pending_breach_count": breach_n,
|
|
"pending_warn_count": int(metrics.get("pending_warn_count") or 0),
|
|
"breach_threshold_hours": metrics.get("breach_threshold_hours"),
|
|
"warn_threshold_hours": metrics.get("warn_threshold_hours"),
|
|
"health": metrics.get("health"),
|
|
"timestamp": now.isoformat(),
|
|
"source": "dealix",
|
|
}
|
|
|
|
out["attempted"] = True
|
|
timeout = httpx.Timeout(12.0)
|
|
|
|
async with httpx.AsyncClient(timeout=timeout) as client:
|
|
if webhook_url:
|
|
try:
|
|
r = await client.post(webhook_url, json=payload)
|
|
out["webhook_ok"] = 200 <= r.status_code < 300
|
|
if not out["webhook_ok"]:
|
|
out["webhook_status"] = r.status_code
|
|
except Exception as e:
|
|
logger.warning("SLA webhook failed: %s", e)
|
|
out["webhook_ok"] = False
|
|
out["webhook_error"] = str(e)[:200]
|
|
|
|
if slack_url:
|
|
text = (
|
|
f":rotating_light: *Approval SLA breach* — tenant `{tenant_id_str[:8]}…`\n"
|
|
f"*Pending breach:* {breach_n} (warn: {metrics.get('pending_warn_count', 0)})\n"
|
|
f"*Thresholds:* warn {metrics.get('warn_threshold_hours')}h / breach {metrics.get('breach_threshold_hours')}h\n"
|
|
f"*Time:* {now.isoformat()}"
|
|
)
|
|
slack_body = {"text": text}
|
|
try:
|
|
r2 = await client.post(slack_url, json=slack_body)
|
|
out["slack_ok"] = 200 <= r2.status_code < 300
|
|
if not out["slack_ok"]:
|
|
out["slack_status"] = r2.status_code
|
|
except Exception as e:
|
|
logger.warning("SLA Slack webhook failed: %s", e)
|
|
out["slack_ok"] = False
|
|
out["slack_error"] = str(e)[:200]
|
|
|
|
delivered = bool(out.get("webhook_ok")) or bool(out.get("slack_ok"))
|
|
if not delivered:
|
|
out["skipped_reason"] = "delivery_failed"
|
|
out["attempted"] = True
|
|
return out
|
|
|
|
_last_aggregate_breach_alert[tenant_id_str] = now
|
|
out["dispatched_at"] = now.isoformat()
|
|
|
|
# Mark pending breach rows with last aggregate notify (audit in payload)
|
|
q = await db.execute(
|
|
select(ApprovalRequest).where(
|
|
ApprovalRequest.tenant_id == tenant_id,
|
|
ApprovalRequest.status == "pending",
|
|
)
|
|
)
|
|
breach_h = max(1, int(s.OPENCLAW_APPROVAL_SLA_HOURS_BREACH))
|
|
for row in q.scalars().all():
|
|
age_h = _hours_between(now, row.created_at)
|
|
if age_h < breach_h:
|
|
continue
|
|
base = dict(row.payload) if isinstance(row.payload, dict) else {}
|
|
sla = dict(base.get(SLA_KEY) or {})
|
|
sla["last_aggregate_breach_alert_at"] = now.isoformat()
|
|
base[SLA_KEY] = sla
|
|
row.payload = base
|
|
flag_modified(row, "payload")
|
|
|
|
await emit_domain_event(
|
|
db,
|
|
tenant_id=tenant_id,
|
|
event_type="approval.sla_breach_notified",
|
|
payload={
|
|
"pending_breach_count": breach_n,
|
|
"webhook_ok": out.get("webhook_ok"),
|
|
"slack_ok": out.get("slack_ok"),
|
|
},
|
|
source="sla_alerts",
|
|
)
|
|
|
|
return out
|