""" Self-Improvement Engine — Dealix AI Revenue OS Bounded cycle: inspect → measure → propose → verify → apply → report. Max 5 proposals per cycle, max 2 auto-applies (trivial only). """ import logging from datetime import datetime, timezone from enum import Enum from typing import Optional from pydantic import BaseModel, Field logger = logging.getLogger(__name__) class ImprovementCategory(str, Enum): SKILL_FIX = "skill_fix" KNOWLEDGE_UPDATE = "knowledge_update" COST_REDUCTION = "cost_reduction" QUALITY = "quality" PERFORMANCE = "performance" SECURITY = "security" class ImprovementStatus(str, Enum): PROPOSED = "proposed" APPROVED = "approved" APPLIED = "applied" REJECTED = "rejected" TESTED = "tested" FAILED = "failed" class ImprovementProposal(BaseModel): id: str category: ImprovementCategory title: str title_ar: str description: str evidence: list[str] = [] impact: str = "medium" # high, medium, low effort: str = "small" # trivial, small, medium, large proposed_action: str requires_approval: bool = True status: ImprovementStatus = ImprovementStatus.PROPOSED created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc)) applied_at: Optional[datetime] = None approved_by: Optional[str] = None class Metric(BaseModel): name: str value: float unit: str trend: str = "stable" # improving, degrading, stable severity: str = "info" # critical, warning, info class CycleResult(BaseModel): cycle_id: str inspected_areas: list[str] metrics: list[Metric] proposals: list[ImprovementProposal] auto_applied: int awaiting_approval: int started_at: datetime completed_at: datetime summary: str summary_ar: str class SelfImprovementEngine: MAX_PROPOSALS_PER_CYCLE = 5 MAX_AUTO_APPLY = 2 def __init__(self): self._proposals: list[ImprovementProposal] = [] self._cycle_count = 0 self._metrics_history: list[dict] = [] async def inspect(self, tenant_id: str = None) -> dict: issues = {} issues["skill_failures"] = { "check": "مهارات فاشلة", "description": "Skills with >20% failure rate in last 7 days", "action": "Review and fix or disable failing skills", } issues["expensive_workflows"] = { "check": "سير عمل مكلف", "description": "Workflows costing >$1/run", "action": "Optimize prompts or switch to cheaper model", } issues["stale_knowledge"] = { "check": "معرفة قديمة", "description": "Wiki pages not updated in 30+ days", "action": "Review and update or archive", } issues["repeated_escalations"] = { "check": "تصعيدات متكررة", "description": "Same escalation reason >5 times in 7 days", "action": "Automate the resolution or improve the workflow", } issues["low_trust_calls"] = { "check": "استدعاءات منخفضة الثقة", "description": "Tool calls with <50% verification rate", "action": "Add better verification or restrict the tool", } logger.info(f"Self-improvement inspection: {len(issues)} areas checked") return issues async def measure(self, inspection: dict) -> list[Metric]: metrics = [ Metric(name="skill_success_rate", value=87.5, unit="%", trend="stable"), Metric(name="avg_workflow_cost", value=0.12, unit="USD", trend="improving"), Metric(name="knowledge_freshness", value=72.0, unit="%", trend="degrading", severity="warning"), Metric(name="escalation_rate", value=8.3, unit="%", trend="stable"), Metric(name="tool_trust_score", value=91.0, unit="%", trend="improving"), Metric(name="avg_response_time", value=1.2, unit="seconds", trend="stable"), ] self._metrics_history.append({ "timestamp": datetime.now(timezone.utc).isoformat(), "metrics": [m.model_dump() for m in metrics], }) return metrics async def propose(self, metrics: list[Metric]) -> list[ImprovementProposal]: proposals = [] for metric in metrics: if metric.severity == "warning" or metric.trend == "degrading": proposal = self._create_proposal(metric) if proposal: proposals.append(proposal) proposals = proposals[:self.MAX_PROPOSALS_PER_CYCLE] self._proposals.extend(proposals) return proposals def _create_proposal(self, metric: Metric) -> Optional[ImprovementProposal]: self._cycle_count += 1 pid = f"IMP-{self._cycle_count:04d}" if metric.name == "knowledge_freshness" and metric.value < 80: return ImprovementProposal( id=pid, category=ImprovementCategory.KNOWLEDGE_UPDATE, title="Update stale wiki pages", title_ar="تحديث صفحات الويكي القديمة", description=f"Knowledge freshness at {metric.value}%, below 80% threshold", evidence=[f"{metric.name}={metric.value}{metric.unit}"], impact="medium", effort="trivial", proposed_action="Run knowledge_brain.lint() and update flagged pages", requires_approval=False, ) if metric.name == "avg_workflow_cost" and metric.value > 0.50: return ImprovementProposal( id=pid, category=ImprovementCategory.COST_REDUCTION, title="Optimize expensive workflows", title_ar="تحسين سير العمل المكلف", description=f"Average workflow cost ${metric.value}, above $0.50 threshold", evidence=[f"{metric.name}=${metric.value}"], impact="high", effort="medium", proposed_action="Switch to Groq for classification tasks, reduce prompt tokens", requires_approval=True, ) return None async def verify(self, proposal: ImprovementProposal) -> bool: if proposal.effort == "trivial" and not proposal.requires_approval: return True if proposal.category == ImprovementCategory.SECURITY: return False # Security changes always need approval return proposal.effort in ("trivial", "small") async def apply( self, proposal_id: str, approved_by: str = None ) -> bool: proposal = next((p for p in self._proposals if p.id == proposal_id), None) if not proposal: return False if proposal.requires_approval and not approved_by: logger.warning(f"Proposal {proposal_id} requires approval") return False proposal.status = ImprovementStatus.APPLIED proposal.applied_at = datetime.now(timezone.utc) proposal.approved_by = approved_by or "auto" logger.info(f"Self-improvement applied: {proposal.title}") return True async def reject(self, proposal_id: str, reason: str = "") -> bool: proposal = next((p for p in self._proposals if p.id == proposal_id), None) if not proposal: return False proposal.status = ImprovementStatus.REJECTED logger.info(f"Self-improvement rejected: {proposal.title} — {reason}") return True async def run_cycle(self, tenant_id: str = None) -> CycleResult: started_at = datetime.now(timezone.utc) inspection = await self.inspect(tenant_id) metrics = await self.measure(inspection) proposals = await self.propose(metrics) auto_applied = 0 for proposal in proposals: can_verify = await self.verify(proposal) if can_verify and not proposal.requires_approval: if auto_applied < self.MAX_AUTO_APPLY: await self.apply(proposal.id) auto_applied += 1 awaiting = sum( 1 for p in proposals if p.status == ImprovementStatus.PROPOSED ) summary = ( f"Cycle complete: {len(metrics)} metrics, {len(proposals)} proposals, " f"{auto_applied} auto-applied, {awaiting} awaiting approval" ) summary_ar = ( f"اكتملت الدورة: {len(metrics)} مقاييس، {len(proposals)} مقترحات، " f"{auto_applied} تطبيق تلقائي، {awaiting} بانتظار الموافقة" ) return CycleResult( cycle_id=f"CYCLE-{self._cycle_count}", inspected_areas=list(inspection.keys()), metrics=metrics, proposals=proposals, auto_applied=auto_applied, awaiting_approval=awaiting, started_at=started_at, completed_at=datetime.now(timezone.utc), summary=summary, summary_ar=summary_ar, ) async def get_proposals( self, status: ImprovementStatus = None ) -> list[ImprovementProposal]: if status: return [p for p in self._proposals if p.status == status] return self._proposals async def get_metrics_history(self) -> list[dict]: return self._metrics_history self_improvement = SelfImprovementEngine()