mirror of
https://github.com/x1xhlol/system-prompts-and-models-of-ai-tools.git
synced 2026-06-18 07:19:35 +00:00
432 lines
20 KiB
Python
432 lines
20 KiB
Python
"""
|
|
Self-Improvement Engine -- Dealix AI Revenue OS -- محرك التحسين الذاتي
|
|
Bounded self-improvement loop: Inspect -> Measure -> Propose -> Verify -> Apply.
|
|
Max 5 proposals per cycle, max 2 auto-applies (trivial only).
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import uuid
|
|
from collections import defaultdict
|
|
from datetime import datetime, timedelta, timezone
|
|
from typing import Any, Optional
|
|
|
|
from pydantic import BaseModel, Field
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Models
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class Metric(BaseModel):
|
|
"""Quantified measurement of a detected issue."""
|
|
name: str
|
|
name_ar: str
|
|
value: float
|
|
unit: str = ""
|
|
threshold: float = 0.0
|
|
exceeds_threshold: bool = False
|
|
frequency: int = 0
|
|
estimated_cost_usd: float = 0.0
|
|
impact: str = "low"
|
|
|
|
|
|
class ImprovementProposal(BaseModel):
|
|
"""A single bounded improvement proposal."""
|
|
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
|
category: str = "quality" # skill_fix, knowledge_update, cost_reduction, quality
|
|
title: str
|
|
title_ar: str
|
|
description: str
|
|
evidence: list[str] = []
|
|
impact: str = "medium" # high, medium, low
|
|
effort: str = "small" # trivial, small, medium, large
|
|
proposed_action: str = ""
|
|
requires_approval: bool = True
|
|
status: str = "proposed" # proposed, approved, applied, rejected, tested
|
|
approved_by: Optional[str] = None
|
|
created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
|
|
applied_at: Optional[datetime] = None
|
|
rollback_action: str = ""
|
|
test_result: Optional[str] = None
|
|
|
|
|
|
class CycleResult(BaseModel):
|
|
"""Result of a full self-improvement cycle."""
|
|
cycle_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
|
tenant_id: str = ""
|
|
started_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
|
|
completed_at: Optional[datetime] = None
|
|
duration_ms: int = 0
|
|
issues_found: int = 0
|
|
metrics_measured: int = 0
|
|
proposals_generated: int = 0
|
|
proposals_auto_applied: int = 0
|
|
proposals_pending: int = 0
|
|
summary: str = ""
|
|
summary_ar: str = ""
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Engine
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class SelfImprovementEngine:
|
|
"""Bounded self-improvement. Inspect -> Measure -> Propose -> Verify -> Apply."""
|
|
|
|
MAX_PROPOSALS_PER_CYCLE = 5
|
|
MAX_AUTO_APPLY = 2 # only trivial improvements auto-apply
|
|
|
|
def __init__(self) -> None:
|
|
self._proposals: list[ImprovementProposal] = []
|
|
self._cycles: list[CycleResult] = []
|
|
self._max_proposals = 5_000
|
|
self._max_cycles = 500
|
|
logger.info("محرك التحسين الذاتي: تم التهيئة")
|
|
|
|
# -- Phase 1: Inspect --------------------------------------------------
|
|
|
|
async def inspect(self, tenant_id: Optional[str] = None) -> dict[str, Any]:
|
|
"""Check for issues: skill failures, expensive workflows, stale knowledge,
|
|
repeated escalations, low-trust tool calls, slow endpoints."""
|
|
issues: list[dict[str, Any]] = []
|
|
|
|
# Simulated inspection checks (in production, these query real services)
|
|
issues.append({
|
|
"type": "skill_failure_rate",
|
|
"description": "Some skills have failure rate above 10%",
|
|
"description_ar": "بعض المهارات لديها معدل فشل أعلى من 10%",
|
|
"data": {"skill_id": "messaging.whatsapp.send", "failure_rate": 0.12, "sample_size": 200},
|
|
})
|
|
issues.append({
|
|
"type": "expensive_workflow",
|
|
"description": "Content generation workflow uses 3x expected budget",
|
|
"description_ar": "سير عمل توليد المحتوى يستخدم 3 أضعاف الميزانية المتوقعة",
|
|
"data": {"workflow": "content_generation", "cost_usd": 0.15, "expected_usd": 0.05},
|
|
})
|
|
issues.append({
|
|
"type": "stale_knowledge",
|
|
"description": "12 knowledge pages not updated in 30+ days",
|
|
"description_ar": "12 صفحة معرفة لم تُحدّث منذ أكثر من 30 يوماً",
|
|
"data": {"stale_count": 12, "threshold_days": 30},
|
|
})
|
|
issues.append({
|
|
"type": "repeated_escalation",
|
|
"description": "Consent-expired escalations repeat 5+ times/week",
|
|
"description_ar": "تصعيدات انتهاء الموافقة تتكرر أكثر من 5 مرات أسبوعياً",
|
|
"data": {"escalation_type": "consent_expired", "weekly_count": 7},
|
|
})
|
|
issues.append({
|
|
"type": "low_trust_calls",
|
|
"description": "Agent 'growth' has 15% unverified tool calls",
|
|
"description_ar": "وكيل 'النمو' لديه 15% مكالمات أدوات غير متحقق منها",
|
|
"data": {"agent_id": "growth", "unverified_rate": 0.15},
|
|
})
|
|
|
|
logger.info("[SelfImprove] فحص: %d مشكلة اكتُشفت tenant=%s", len(issues), tenant_id or "global")
|
|
return {"tenant_id": tenant_id or "global", "issues": issues, "count": len(issues)}
|
|
|
|
# -- Phase 2: Measure --------------------------------------------------
|
|
|
|
async def measure(self, inspection: dict[str, Any]) -> list[Metric]:
|
|
"""Quantify each issue: frequency, cost, impact."""
|
|
metrics: list[Metric] = []
|
|
for issue in inspection.get("issues", []):
|
|
data = issue.get("data", {})
|
|
itype = issue.get("type", "unknown")
|
|
|
|
if itype == "skill_failure_rate":
|
|
metrics.append(Metric(
|
|
name=f"failure_rate:{data.get('skill_id', '?')}",
|
|
name_ar=f"معدل الفشل: {data.get('skill_id', '?')}",
|
|
value=data.get("failure_rate", 0),
|
|
unit="rate", threshold=0.10,
|
|
exceeds_threshold=data.get("failure_rate", 0) > 0.10,
|
|
frequency=data.get("sample_size", 0),
|
|
estimated_cost_usd=data.get("failure_rate", 0) * data.get("sample_size", 0) * 0.01,
|
|
impact="high" if data.get("failure_rate", 0) > 0.2 else "medium",
|
|
))
|
|
elif itype == "expensive_workflow":
|
|
excess = data.get("cost_usd", 0) - data.get("expected_usd", 0)
|
|
metrics.append(Metric(
|
|
name=f"cost_excess:{data.get('workflow', '?')}",
|
|
name_ar=f"تكلفة زائدة: {data.get('workflow', '?')}",
|
|
value=excess, unit="usd", threshold=0.05,
|
|
exceeds_threshold=excess > 0.05,
|
|
estimated_cost_usd=excess * 100, # projected monthly
|
|
impact="high" if excess > 0.10 else "medium",
|
|
))
|
|
elif itype == "stale_knowledge":
|
|
metrics.append(Metric(
|
|
name="stale_knowledge_pages",
|
|
name_ar="صفحات معرفة قديمة",
|
|
value=data.get("stale_count", 0),
|
|
unit="pages", threshold=5,
|
|
exceeds_threshold=data.get("stale_count", 0) > 5,
|
|
impact="low",
|
|
))
|
|
elif itype == "repeated_escalation":
|
|
metrics.append(Metric(
|
|
name=f"repeated_escalation:{data.get('escalation_type', '?')}",
|
|
name_ar=f"تصعيد متكرر: {data.get('escalation_type', '?')}",
|
|
value=data.get("weekly_count", 0),
|
|
unit="per_week", threshold=3,
|
|
exceeds_threshold=data.get("weekly_count", 0) > 3,
|
|
impact="medium",
|
|
))
|
|
elif itype == "low_trust_calls":
|
|
metrics.append(Metric(
|
|
name=f"unverified_rate:{data.get('agent_id', '?')}",
|
|
name_ar=f"معدل غير متحقق: {data.get('agent_id', '?')}",
|
|
value=data.get("unverified_rate", 0),
|
|
unit="rate", threshold=0.10,
|
|
exceeds_threshold=data.get("unverified_rate", 0) > 0.10,
|
|
impact="medium",
|
|
))
|
|
|
|
logger.info("[SelfImprove] قياس: %d مقاييس", len(metrics))
|
|
return metrics
|
|
|
|
# -- Phase 3: Propose --------------------------------------------------
|
|
|
|
async def propose(self, metrics: list[Metric]) -> list[ImprovementProposal]:
|
|
"""Generate max 5 proposals per cycle, prioritized by impact/effort."""
|
|
proposals: list[ImprovementProposal] = []
|
|
|
|
for m in sorted(metrics, key=lambda x: -x.estimated_cost_usd):
|
|
if len(proposals) >= self.MAX_PROPOSALS_PER_CYCLE:
|
|
break
|
|
if not m.exceeds_threshold:
|
|
continue
|
|
|
|
if "failure_rate" in m.name:
|
|
proposals.append(ImprovementProposal(
|
|
category="skill_fix",
|
|
title=f"Fix high failure rate on {m.name.split(':')[-1]}",
|
|
title_ar=f"إصلاح معدل الفشل المرتفع في {m.name.split(':')[-1]}",
|
|
description=f"Failure rate {m.value:.1%} exceeds {m.threshold:.1%} threshold.",
|
|
evidence=[f"Measured {m.frequency} calls, {m.value:.1%} failed"],
|
|
impact=m.impact, effort="small",
|
|
proposed_action="Add retry logic and improve error handling for the skill handler",
|
|
requires_approval=True,
|
|
rollback_action="Revert skill handler to previous version",
|
|
))
|
|
elif "cost_excess" in m.name:
|
|
proposals.append(ImprovementProposal(
|
|
category="cost_reduction",
|
|
title=f"Reduce cost of {m.name.split(':')[-1]} workflow",
|
|
title_ar=f"تقليل تكلفة سير عمل {m.name.split(':')[-1]}",
|
|
description=f"Excess cost: ${m.value:.3f}/call (projected: ${m.estimated_cost_usd:.2f}/mo).",
|
|
evidence=[f"Current: ${m.value + m.threshold:.3f}, Expected: ${m.threshold:.3f}"],
|
|
impact=m.impact, effort="medium",
|
|
proposed_action="Switch to cheaper model for simple content or add caching layer",
|
|
requires_approval=True,
|
|
rollback_action="Revert model routing configuration",
|
|
))
|
|
elif "stale_knowledge" in m.name:
|
|
proposals.append(ImprovementProposal(
|
|
category="knowledge_update",
|
|
title="Refresh stale knowledge pages",
|
|
title_ar="تحديث صفحات المعرفة القديمة",
|
|
description=f"{int(m.value)} pages older than {int(m.threshold)} days.",
|
|
evidence=[f"{int(m.value)} pages not updated in 30+ days"],
|
|
impact="low", effort="trivial",
|
|
proposed_action="Flag stale pages for review and auto-mark as needs-update",
|
|
requires_approval=False,
|
|
rollback_action="Remove stale flags",
|
|
))
|
|
elif "repeated_escalation" in m.name:
|
|
proposals.append(ImprovementProposal(
|
|
category="quality",
|
|
title=f"Automate resolution for {m.name.split(':')[-1]}",
|
|
title_ar=f"أتمتة الحل لتصعيد {m.name.split(':')[-1]}",
|
|
description=f"{int(m.value)} escalations/week exceeds threshold of {int(m.threshold)}.",
|
|
evidence=[f"{int(m.value)} weekly occurrences"],
|
|
impact="medium", effort="medium",
|
|
proposed_action="Add auto-consent-renewal reminder workflow before expiry",
|
|
requires_approval=True,
|
|
rollback_action="Disable auto-reminder workflow",
|
|
))
|
|
elif "unverified_rate" in m.name:
|
|
proposals.append(ImprovementProposal(
|
|
category="quality",
|
|
title=f"Improve verification for agent {m.name.split(':')[-1]}",
|
|
title_ar=f"تحسين التحقق لوكيل {m.name.split(':')[-1]}",
|
|
description=f"Unverified rate {m.value:.1%} exceeds {m.threshold:.1%}.",
|
|
evidence=[f"{m.value:.1%} of tool calls unverified"],
|
|
impact="medium", effort="small",
|
|
proposed_action="Add post-execution verification step for this agent profile",
|
|
requires_approval=False,
|
|
rollback_action="Remove extra verification step",
|
|
))
|
|
|
|
self._proposals.extend(proposals)
|
|
if len(self._proposals) > self._max_proposals:
|
|
self._proposals = self._proposals[-self._max_proposals:]
|
|
|
|
logger.info("[SelfImprove] اقتراحات: %d", len(proposals))
|
|
return proposals
|
|
|
|
# -- Phase 4: Verify ---------------------------------------------------
|
|
|
|
async def verify(self, proposal: ImprovementProposal) -> bool:
|
|
"""Can we test this safely? Does it have rollback?"""
|
|
if not proposal.rollback_action:
|
|
logger.warning("[SelfImprove] لا يوجد إجراء تراجع: %s", proposal.id)
|
|
return False
|
|
if proposal.effort == "large":
|
|
logger.info("[SelfImprove] جهد كبير يتطلب مراجعة يدوية: %s", proposal.id)
|
|
return False
|
|
return True
|
|
|
|
# -- Phase 5: Apply ----------------------------------------------------
|
|
|
|
async def apply(self, proposal: ImprovementProposal, approved_by: Optional[str] = None) -> bool:
|
|
"""Apply only if approved (or trivial + auto-allowed). Log everything."""
|
|
if proposal.status in ("applied", "rejected"):
|
|
logger.info("[SelfImprove] اقتراح بالفعل %s: %s", proposal.status, proposal.id)
|
|
return False
|
|
|
|
if proposal.requires_approval and not approved_by:
|
|
logger.info("[SelfImprove] يتطلب موافقة: %s", proposal.id)
|
|
return False
|
|
|
|
safe = await self.verify(proposal)
|
|
if not safe:
|
|
proposal.status = "rejected"
|
|
proposal.test_result = "Failed safety verification"
|
|
logger.warning("[SelfImprove] رفض: %s -- فشل التحقق من السلامة", proposal.id)
|
|
return False
|
|
|
|
proposal.status = "applied"
|
|
proposal.approved_by = approved_by or "auto"
|
|
proposal.applied_at = datetime.now(timezone.utc)
|
|
proposal.test_result = "Applied successfully"
|
|
|
|
logger.info(
|
|
"[SelfImprove] تطبيق: %s cat=%s by=%s",
|
|
proposal.id, proposal.category, proposal.approved_by,
|
|
)
|
|
return True
|
|
|
|
# -- Approval ----------------------------------------------------------
|
|
|
|
async def approve(self, proposal_id: str, user_id: str) -> Optional[ImprovementProposal]:
|
|
"""Approve a pending proposal."""
|
|
for p in self._proposals:
|
|
if p.id == proposal_id and p.status == "proposed":
|
|
p.status = "approved"
|
|
p.approved_by = user_id
|
|
await self.apply(p, approved_by=user_id)
|
|
return p
|
|
return None
|
|
|
|
async def reject(self, proposal_id: str, user_id: str) -> Optional[ImprovementProposal]:
|
|
"""Reject a pending proposal."""
|
|
for p in self._proposals:
|
|
if p.id == proposal_id and p.status in ("proposed", "approved"):
|
|
p.status = "rejected"
|
|
p.approved_by = user_id
|
|
return p
|
|
return None
|
|
|
|
# -- Full cycle --------------------------------------------------------
|
|
|
|
async def run_cycle(self, tenant_id: Optional[str] = None) -> CycleResult:
|
|
"""Full cycle: inspect -> measure -> propose -> verify -> approve -> apply -> report."""
|
|
start = datetime.now(timezone.utc)
|
|
|
|
# 1. Inspect
|
|
inspection = await self.inspect(tenant_id)
|
|
|
|
# 2. Measure
|
|
metrics = await self.measure(inspection)
|
|
|
|
# 3. Propose
|
|
proposals = await self.propose(metrics)
|
|
|
|
# 4+5. Auto-apply trivial proposals (up to MAX_AUTO_APPLY)
|
|
auto_applied = 0
|
|
for p in proposals:
|
|
if auto_applied >= self.MAX_AUTO_APPLY:
|
|
break
|
|
if not p.requires_approval and p.effort == "trivial":
|
|
if await self.apply(p, approved_by="auto"):
|
|
auto_applied += 1
|
|
|
|
pending = sum(1 for p in proposals if p.status == "proposed")
|
|
|
|
now = datetime.now(timezone.utc)
|
|
cycle = CycleResult(
|
|
tenant_id=tenant_id or "global",
|
|
started_at=start,
|
|
completed_at=now,
|
|
duration_ms=int((now - start).total_seconds() * 1000),
|
|
issues_found=inspection["count"],
|
|
metrics_measured=len(metrics),
|
|
proposals_generated=len(proposals),
|
|
proposals_auto_applied=auto_applied,
|
|
proposals_pending=pending,
|
|
summary=(
|
|
f"Found {inspection['count']} issues, measured {len(metrics)} metrics, "
|
|
f"generated {len(proposals)} proposals, auto-applied {auto_applied}"
|
|
),
|
|
summary_ar=(
|
|
f"اكتشاف {inspection['count']} مشاكل، قياس {len(metrics)} مقاييس، "
|
|
f"إنشاء {len(proposals)} مقترحات، تطبيق تلقائي {auto_applied}"
|
|
),
|
|
)
|
|
|
|
self._cycles.append(cycle)
|
|
if len(self._cycles) > self._max_cycles:
|
|
self._cycles = self._cycles[-self._max_cycles:]
|
|
|
|
logger.info(
|
|
"[SelfImprove] دورة اكتملت: issues=%d proposals=%d auto=%d pending=%d %dms",
|
|
inspection["count"], len(proposals), auto_applied, pending, cycle.duration_ms,
|
|
)
|
|
return cycle
|
|
|
|
# -- Reporting ---------------------------------------------------------
|
|
|
|
async def report(self) -> dict[str, Any]:
|
|
"""Summary: proposals made, applied, rejected, impact measured."""
|
|
by_status: dict[str, int] = defaultdict(int)
|
|
by_category: dict[str, int] = defaultdict(int)
|
|
for p in self._proposals:
|
|
by_status[p.status] += 1
|
|
by_category[p.category] += 1
|
|
|
|
return {
|
|
"total_proposals": len(self._proposals),
|
|
"by_status": dict(by_status),
|
|
"by_category": dict(by_category),
|
|
"total_cycles": len(self._cycles),
|
|
"last_cycle": self._cycles[-1].model_dump(mode="json") if self._cycles else None,
|
|
"message_ar": (
|
|
f"مقترحات: {len(self._proposals)}، "
|
|
f"مطبقة: {by_status.get('applied', 0)}، "
|
|
f"مرفوضة: {by_status.get('rejected', 0)}، "
|
|
f"معلقة: {by_status.get('proposed', 0)}"
|
|
),
|
|
}
|
|
|
|
def list_proposals(self, status: Optional[str] = None) -> list[ImprovementProposal]:
|
|
"""List all proposals, optionally filtered by status."""
|
|
if status:
|
|
return [p for p in self._proposals if p.status == status]
|
|
return list(self._proposals)
|
|
|
|
def get_proposal(self, proposal_id: str) -> Optional[ImprovementProposal]:
|
|
"""Get a single proposal by ID."""
|
|
return next((p for p in self._proposals if p.id == proposal_id), None)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Module-level singleton
|
|
# ---------------------------------------------------------------------------
|
|
|
|
self_improvement_engine = SelfImprovementEngine()
|