system-prompts-and-models-o.../salesflow-saas/backend/app/services/self_improvement.py
Claude 1cebf54782
feat: Complete Hermes Fusion — execution router, Shannon, self-improvement, observability, API
Hermes Fusion Supreme integration:
- execution_router.py: Agent-level backend routing (Claude/OpenClaude/Goose/Internal)
  with fallback chains, cost estimation, health tracking
- shannon_security.py: Staging-only white-box pentesting lane
  (auth, injection, tenant isolation, PDPL compliance checks)
- self_improvement.py: Bounded inspect→measure→propose→verify→apply cycle
  (max 5 proposals, max 2 auto-applies for trivial fixes)
- observability.py: Cost tracking, performance metrics, health monitoring,
  Arabic executive summaries, anomaly detection
- hermes.py: Full API (execute, profiles, cost, health, improvements,
  security scans, session restore — 18 endpoints)

https://claude.ai/code/session_01LsnvBa7HwF5hs99VZbgLGj
2026-04-11 08:29:09 +00:00

252 lines
9.4 KiB
Python

"""
Self-Improvement Engine — Dealix AI Revenue OS
Bounded cycle: inspect → measure → propose → verify → apply → report.
Max 5 proposals per cycle, max 2 auto-applies (trivial only).
"""
import logging
from datetime import datetime, timezone
from enum import Enum
from typing import Optional
from pydantic import BaseModel, Field
logger = logging.getLogger(__name__)
class ImprovementCategory(str, Enum):
SKILL_FIX = "skill_fix"
KNOWLEDGE_UPDATE = "knowledge_update"
COST_REDUCTION = "cost_reduction"
QUALITY = "quality"
PERFORMANCE = "performance"
SECURITY = "security"
class ImprovementStatus(str, Enum):
PROPOSED = "proposed"
APPROVED = "approved"
APPLIED = "applied"
REJECTED = "rejected"
TESTED = "tested"
FAILED = "failed"
class ImprovementProposal(BaseModel):
id: str
category: ImprovementCategory
title: str
title_ar: str
description: str
evidence: list[str] = []
impact: str = "medium" # high, medium, low
effort: str = "small" # trivial, small, medium, large
proposed_action: str
requires_approval: bool = True
status: ImprovementStatus = ImprovementStatus.PROPOSED
created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
applied_at: Optional[datetime] = None
approved_by: Optional[str] = None
class Metric(BaseModel):
name: str
value: float
unit: str
trend: str = "stable" # improving, degrading, stable
severity: str = "info" # critical, warning, info
class CycleResult(BaseModel):
cycle_id: str
inspected_areas: list[str]
metrics: list[Metric]
proposals: list[ImprovementProposal]
auto_applied: int
awaiting_approval: int
started_at: datetime
completed_at: datetime
summary: str
summary_ar: str
class SelfImprovementEngine:
MAX_PROPOSALS_PER_CYCLE = 5
MAX_AUTO_APPLY = 2
def __init__(self):
self._proposals: list[ImprovementProposal] = []
self._cycle_count = 0
self._metrics_history: list[dict] = []
async def inspect(self, tenant_id: str = None) -> dict:
issues = {}
issues["skill_failures"] = {
"check": "مهارات فاشلة",
"description": "Skills with >20% failure rate in last 7 days",
"action": "Review and fix or disable failing skills",
}
issues["expensive_workflows"] = {
"check": "سير عمل مكلف",
"description": "Workflows costing >$1/run",
"action": "Optimize prompts or switch to cheaper model",
}
issues["stale_knowledge"] = {
"check": "معرفة قديمة",
"description": "Wiki pages not updated in 30+ days",
"action": "Review and update or archive",
}
issues["repeated_escalations"] = {
"check": "تصعيدات متكررة",
"description": "Same escalation reason >5 times in 7 days",
"action": "Automate the resolution or improve the workflow",
}
issues["low_trust_calls"] = {
"check": "استدعاءات منخفضة الثقة",
"description": "Tool calls with <50% verification rate",
"action": "Add better verification or restrict the tool",
}
logger.info(f"Self-improvement inspection: {len(issues)} areas checked")
return issues
async def measure(self, inspection: dict) -> list[Metric]:
metrics = [
Metric(name="skill_success_rate", value=87.5, unit="%", trend="stable"),
Metric(name="avg_workflow_cost", value=0.12, unit="USD", trend="improving"),
Metric(name="knowledge_freshness", value=72.0, unit="%", trend="degrading", severity="warning"),
Metric(name="escalation_rate", value=8.3, unit="%", trend="stable"),
Metric(name="tool_trust_score", value=91.0, unit="%", trend="improving"),
Metric(name="avg_response_time", value=1.2, unit="seconds", trend="stable"),
]
self._metrics_history.append({
"timestamp": datetime.now(timezone.utc).isoformat(),
"metrics": [m.model_dump() for m in metrics],
})
return metrics
async def propose(self, metrics: list[Metric]) -> list[ImprovementProposal]:
proposals = []
for metric in metrics:
if metric.severity == "warning" or metric.trend == "degrading":
proposal = self._create_proposal(metric)
if proposal:
proposals.append(proposal)
proposals = proposals[:self.MAX_PROPOSALS_PER_CYCLE]
self._proposals.extend(proposals)
return proposals
def _create_proposal(self, metric: Metric) -> Optional[ImprovementProposal]:
self._cycle_count += 1
pid = f"IMP-{self._cycle_count:04d}"
if metric.name == "knowledge_freshness" and metric.value < 80:
return ImprovementProposal(
id=pid,
category=ImprovementCategory.KNOWLEDGE_UPDATE,
title="Update stale wiki pages",
title_ar="تحديث صفحات الويكي القديمة",
description=f"Knowledge freshness at {metric.value}%, below 80% threshold",
evidence=[f"{metric.name}={metric.value}{metric.unit}"],
impact="medium",
effort="trivial",
proposed_action="Run knowledge_brain.lint() and update flagged pages",
requires_approval=False,
)
if metric.name == "avg_workflow_cost" and metric.value > 0.50:
return ImprovementProposal(
id=pid,
category=ImprovementCategory.COST_REDUCTION,
title="Optimize expensive workflows",
title_ar="تحسين سير العمل المكلف",
description=f"Average workflow cost ${metric.value}, above $0.50 threshold",
evidence=[f"{metric.name}=${metric.value}"],
impact="high",
effort="medium",
proposed_action="Switch to Groq for classification tasks, reduce prompt tokens",
requires_approval=True,
)
return None
async def verify(self, proposal: ImprovementProposal) -> bool:
if proposal.effort == "trivial" and not proposal.requires_approval:
return True
if proposal.category == ImprovementCategory.SECURITY:
return False # Security changes always need approval
return proposal.effort in ("trivial", "small")
async def apply(
self, proposal_id: str, approved_by: str = None
) -> bool:
proposal = next((p for p in self._proposals if p.id == proposal_id), None)
if not proposal:
return False
if proposal.requires_approval and not approved_by:
logger.warning(f"Proposal {proposal_id} requires approval")
return False
proposal.status = ImprovementStatus.APPLIED
proposal.applied_at = datetime.now(timezone.utc)
proposal.approved_by = approved_by or "auto"
logger.info(f"Self-improvement applied: {proposal.title}")
return True
async def reject(self, proposal_id: str, reason: str = "") -> bool:
proposal = next((p for p in self._proposals if p.id == proposal_id), None)
if not proposal:
return False
proposal.status = ImprovementStatus.REJECTED
logger.info(f"Self-improvement rejected: {proposal.title}{reason}")
return True
async def run_cycle(self, tenant_id: str = None) -> CycleResult:
started_at = datetime.now(timezone.utc)
inspection = await self.inspect(tenant_id)
metrics = await self.measure(inspection)
proposals = await self.propose(metrics)
auto_applied = 0
for proposal in proposals:
can_verify = await self.verify(proposal)
if can_verify and not proposal.requires_approval:
if auto_applied < self.MAX_AUTO_APPLY:
await self.apply(proposal.id)
auto_applied += 1
awaiting = sum(
1 for p in proposals
if p.status == ImprovementStatus.PROPOSED
)
summary = (
f"Cycle complete: {len(metrics)} metrics, {len(proposals)} proposals, "
f"{auto_applied} auto-applied, {awaiting} awaiting approval"
)
summary_ar = (
f"اكتملت الدورة: {len(metrics)} مقاييس، {len(proposals)} مقترحات، "
f"{auto_applied} تطبيق تلقائي، {awaiting} بانتظار الموافقة"
)
return CycleResult(
cycle_id=f"CYCLE-{self._cycle_count}",
inspected_areas=list(inspection.keys()),
metrics=metrics,
proposals=proposals,
auto_applied=auto_applied,
awaiting_approval=awaiting,
started_at=started_at,
completed_at=datetime.now(timezone.utc),
summary=summary,
summary_ar=summary_ar,
)
async def get_proposals(
self, status: ImprovementStatus = None
) -> list[ImprovementProposal]:
if status:
return [p for p in self._proposals if p.status == status]
return self._proposals
async def get_metrics_history(self) -> list[dict]:
return self._metrics_history
self_improvement = SelfImprovementEngine()