mirror of
https://github.com/x1xhlol/system-prompts-and-models-of-ai-tools.git
synced 2026-06-19 15:59:37 +00:00
AI Layer: - llm_router.py: routes cheap/mid/high models, enforces daily budget, caches - token_counter.py: estimates tokens, truncates to budget - response_cache.py: in-memory cache with TTL per agent - prompt_registry.py: versioned prompts with stable prefix for caching - ai_budget.yaml: model costs, agent budgets, daily limits (10 SAR/day) Guardrails: - output_validator.py: blocks fake claims + prohibited actions - cost_guard.py: prevents runaway spending Observability: - trace.py: trace_id, cost, latency, steps per pipeline run Tests: ALL PASS - 30/30 evals (100%) — 9 sectors, 30 companies - 10/10 prohibited actions blocked - 4/4 allowed actions verified - 3/3 forbidden claims blocked - 3/3 message quality checks passed https://claude.ai/code/session_01W1rJthWDkasijTdXCfxVHs
24 lines
784 B
Python
24 lines
784 B
Python
"""Token counter — estimates tokens before sending to avoid waste."""
|
|
import re
|
|
|
|
def estimate_tokens(text: str) -> int:
|
|
"""Rough token estimate: ~4 chars per token for mixed Arabic/English."""
|
|
if not text:
|
|
return 0
|
|
words = len(re.findall(r'\S+', text))
|
|
chars = len(text)
|
|
return max(words, chars // 4)
|
|
|
|
def check_budget(tokens: int, max_tokens: int) -> bool:
|
|
"""Returns True if within budget."""
|
|
return tokens <= max_tokens
|
|
|
|
def truncate_to_budget(text: str, max_tokens: int) -> str:
|
|
"""Truncates text to fit within token budget."""
|
|
estimated = estimate_tokens(text)
|
|
if estimated <= max_tokens:
|
|
return text
|
|
ratio = max_tokens / estimated
|
|
cut_at = int(len(text) * ratio * 0.9)
|
|
return text[:cut_at] + "\n[truncated]"
|