system-prompts-and-models-o.../personal-brand-engine/llm/client.py
VoXc2 4bb2442313
Add Personal Brand Engine - 7 AI Agents Automation System
Complete AI-powered personal brand automation for Sami Assiri.\n\n7 agents: LinkedIn, Email, Social Media, WhatsApp, CV Optimizer, Content Strategist, Opportunity Scout.\nInfra: FastAPI + APScheduler + Docker + Ollama/Groq LLM + GitHub Pages landing page.\n83 files, ~10K lines. Cost: $0-5/month.
2026-03-30 11:45:48 +03:00

178 lines
5.6 KiB
Python

"""Unified LLM client with Ollama -> Groq -> OpenAI fallback chain."""
from __future__ import annotations
import logging
from dataclasses import dataclass
import httpx
logger = logging.getLogger(__name__)
@dataclass
class LLMResponse:
text: str
model: str
provider: str
tokens_used: int = 0
class LLMClient:
"""Unified LLM client that tries providers in order: Ollama -> Groq -> OpenAI."""
def __init__(
self,
ollama_base_url: str = "http://localhost:11434",
ollama_model: str = "qwen2.5:7b",
groq_api_key: str = "",
groq_model: str = "llama-3.1-70b-versatile",
openai_api_key: str = "",
openai_model: str = "gpt-4o-mini",
):
self.ollama_base_url = ollama_base_url.rstrip("/")
self.ollama_model = ollama_model
self.groq_api_key = groq_api_key
self.groq_model = groq_model
self.openai_api_key = openai_api_key
self.openai_model = openai_model
self._http = httpx.AsyncClient(timeout=120.0)
async def generate(
self,
prompt: str,
system_prompt: str = "",
temperature: float = 0.7,
max_tokens: int = 2000,
) -> LLMResponse:
"""Generate text using the first available provider."""
errors = []
# Try Ollama first (free, local)
try:
return await self._ollama_generate(prompt, system_prompt, temperature)
except Exception as e:
errors.append(f"Ollama: {e}")
logger.debug("Ollama unavailable: %s", e)
# Try Groq (free tier)
if self.groq_api_key:
try:
return await self._groq_generate(prompt, system_prompt, temperature, max_tokens)
except Exception as e:
errors.append(f"Groq: {e}")
logger.debug("Groq failed: %s", e)
# Try OpenAI (paid)
if self.openai_api_key:
try:
return await self._openai_generate(prompt, system_prompt, temperature, max_tokens)
except Exception as e:
errors.append(f"OpenAI: {e}")
logger.debug("OpenAI failed: %s", e)
raise RuntimeError(f"All LLM providers failed: {'; '.join(errors)}")
async def _ollama_generate(
self, prompt: str, system_prompt: str, temperature: float
) -> LLMResponse:
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
messages.append({"role": "user", "content": prompt})
resp = await self._http.post(
f"{self.ollama_base_url}/api/chat",
json={
"model": self.ollama_model,
"messages": messages,
"stream": False,
"options": {"temperature": temperature},
},
)
resp.raise_for_status()
data = resp.json()
return LLMResponse(
text=data["message"]["content"],
model=self.ollama_model,
provider="ollama",
tokens_used=data.get("eval_count", 0),
)
async def _groq_generate(
self, prompt: str, system_prompt: str, temperature: float, max_tokens: int
) -> LLMResponse:
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
messages.append({"role": "user", "content": prompt})
resp = await self._http.post(
"https://api.groq.com/openai/v1/chat/completions",
headers={"Authorization": f"Bearer {self.groq_api_key}"},
json={
"model": self.groq_model,
"messages": messages,
"temperature": temperature,
"max_tokens": max_tokens,
},
)
resp.raise_for_status()
data = resp.json()
return LLMResponse(
text=data["choices"][0]["message"]["content"],
model=self.groq_model,
provider="groq",
tokens_used=data.get("usage", {}).get("total_tokens", 0),
)
async def _openai_generate(
self, prompt: str, system_prompt: str, temperature: float, max_tokens: int
) -> LLMResponse:
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})
messages.append({"role": "user", "content": prompt})
resp = await self._http.post(
"https://api.openai.com/v1/chat/completions",
headers={"Authorization": f"Bearer {self.openai_api_key}"},
json={
"model": self.openai_model,
"messages": messages,
"temperature": temperature,
"max_tokens": max_tokens,
},
)
resp.raise_for_status()
data = resp.json()
return LLMResponse(
text=data["choices"][0]["message"]["content"],
model=self.openai_model,
provider="openai",
tokens_used=data.get("usage", {}).get("total_tokens", 0),
)
async def close(self):
await self._http.aclose()
_client: LLMClient | None = None
def get_llm_client() -> LLMClient:
"""Get or create the singleton LLM client."""
global _client
if _client is None:
from config.settings import get_settings
s = get_settings()
_client = LLMClient(
ollama_base_url=s.ollama_base_url,
ollama_model=s.ollama_model,
groq_api_key=s.groq_api_key,
groq_model=s.groq_model,
openai_api_key=s.openai_api_key,
openai_model=s.openai_model,
)
return _client