system-prompts-and-models-o.../dealix/api/routers/data.py

"""
Data Lake + Lead Graph ingestion router.

Endpoints:
    POST /api/v1/data/import                    — register a dataset (JSON rows)
    POST /api/v1/data/import/{id}/normalize     — normalize raw rows
    POST /api/v1/data/import/{id}/dedupe        — match + merge into accounts
    POST /api/v1/data/import/{id}/enrich        — run enrichment for new accounts
    GET  /api/v1/data/import/{id}/report        — totals + per-row counts
    POST /api/v1/data/suppression               — add opt-out email/phone/domain
    GET  /api/v1/data/suppression               — list suppression rows
    GET  /api/v1/data/imports                   — list all imports
    GET  /api/v1/data/accounts                  — list accounts (paginated)
    GET  /api/v1/data/accounts/{id}             — single account + signals
    POST /api/v1/data/accounts/{id}/score       — recompute score from current data

Ingestion is *append-only*. Raw rows are kept; normalization writes
new account/contact/signal records but never deletes raw_lead_rows.

PDPL compliance:
- Every import declares allowed_use, source_type, consent_status, risk_level.
- Suppression list is checked at outreach time, not at ingest time.
"""

from __future__ import annotations

import logging
import uuid
from datetime import datetime, timezone
from typing import Any

from fastapi import APIRouter, Body, HTTPException
from sqlalchemy import select

from auto_client_acquisition.pipelines.dedupe import build_index, find_match
from auto_client_acquisition.pipelines.enrichment import enrich_account
from auto_client_acquisition.pipelines.normalize import (
    fuzzy_company_key,
    is_acceptable,
    normalize_row,
)
from auto_client_acquisition.pipelines.scoring import (
    compute_data_quality,
    compute_lead_score,
)
from db.models import (
    AccountRecord,
    ContactRecord,
    LeadScoreRecord,
    RawLeadImport,
    RawLeadRow,
    SignalRecord,
    SuppressionRecord,
)
from db.session import async_session_factory

router = APIRouter(prefix="/api/v1/data", tags=["data"])
log = logging.getLogger(__name__)


# ── Data Source Catalog (compliance-graded) ──────────────────────
SAUDI_DATA_SOURCE_CATALOG: list[dict[str, Any]] = [
    {
        "key": "riyadh_chamber",
        "name_ar": "غرفة الرياض — دليل الأعضاء",
        "name_en": "Riyadh Chamber of Commerce Member Directory",
        "url": "https://chamber.org.sa",
        "rating": "green",
        "access_method": "public_web",
        "coverage_city": ["riyadh"],
        "coverage_sector": "all",
        "ingest_strategy": "crawl_with_requests_bs4_provider",
    },
    {
        "key": "jeddah_chamber",
        "name_ar": "غرفة جدة — دليل الأعضاء",
        "name_en": "Jeddah Chamber of Commerce Member Directory",
        "url": "https://jcci.org.sa",
        "rating": "green",
        "access_method": "public_web",
        "coverage_city": ["jeddah"],
        "coverage_sector": "all",
    },
    {
        "key": "eastern_chamber",
        "name_ar": "غرفة الشرقية",
        "name_en": "Asharqia Chamber",
        "url": "https://chamber.org.sa/eastern",
        "rating": "green",
        "access_method": "public_web",
        "coverage_city": ["dammam", "khobar", "jubail"],
        "coverage_sector": "all",
    },
    {
        "key": "data_gov_sa",
        "name_ar": "بوابة البيانات المفتوحة (سدايا)",
        "name_en": "SDAIA Open Data Portal",
        "url": "https://data.gov.sa",
        "rating": "green",
        "access_method": "public_dataset_download",
        "coverage_city": "all",
        "coverage_sector": "all",
    },
    {
        "key": "google_places",
        "name_ar": "Google Places (Maps API)",
        "name_en": "Google Places via MapsProvider chain",
        "url": "internal:auto_client_acquisition.providers.maps",
        "rating": "green",
        "access_method": "api_with_key",
        "coverage_city": "all",
        "coverage_sector": "all",
        "ingest_strategy": "store_place_id_only_per_terms",
    },
    {
        "key": "saudi_contractors_authority",
        "name_ar": "هيئة المقاولين السعودية",
        "name_en": "Saudi Contractors Authority Registry",
        "url": "https://sca.org.sa",
        "rating": "green",
        "access_method": "public_web",
        "coverage_sector": ["construction"],
    },
    {
        "key": "saudi_tourism_authority",
        "name_ar": "هيئة السياحة السعودية",
        "name_en": "Saudi Tourism Authority Registry",
        "url": "https://scth.gov.sa",
        "rating": "green",
        "access_method": "public_web",
        "coverage_sector": ["hospitality_events"],
    },
    {
        "key": "linkedin",
        "name_ar": "LinkedIn",
        "name_en": "LinkedIn",
        "url": "https://www.linkedin.com",
        "rating": "red",
        "access_method": "scraping_prohibited",
        "ingest_strategy": "manual_research_only_no_bulk_ingest",
        "note": "Dealix uses LinkedIn for human research + human send only — never for data ingestion.",
    },
    {
        "key": "linkedin_chamber_other_yellow",
        "name_ar": "أدلة تجارية مدفوعة",
        "name_en": "Paid B2B Data Vendors (general)",
        "url": "various",
        "rating": "yellow",
        "access_method": "purchase_with_documentation",
        "ingest_strategy": "audit_lead_file_first_then_import",
        "note": "Demand source documentation, allowed_use, last_updated, sample of 100 rows before paying.",
    },
]


def _new_id(prefix: str = "") -> str:
    suffix = uuid.uuid4().hex[:24]
    return f"{prefix}{suffix}" if prefix else suffix


def _utcnow() -> datetime:
    return datetime.now(timezone.utc).replace(tzinfo=None)


async def _safe_commit(session, *objs: Any) -> bool:
    try:
        for o in objs:
            session.add(o)
        await session.commit()
        return True
    except Exception as exc:  # noqa: BLE001
        log.warning("data_router_commit_failed err=%s", exc)
        try:
            await session.rollback()
        except Exception:
            pass
        return False


# ── Source catalog ────────────────────────────────────────────────
@router.get("/sources/catalog")
async def list_data_sources() -> dict[str, Any]:
    """Compliance-graded Saudi business data source catalog."""
    return {
        "count": len(SAUDI_DATA_SOURCE_CATALOG),
        "rating_legend": {
            "green": "public + clearly permissive — direct ingest",
            "yellow": "public but ToS-sensitive — lookup-only, manual approval",
            "red": "scraping forbidden / paywalled-without-allowed-use — DO NOT INGEST",
        },
        "sources": SAUDI_DATA_SOURCE_CATALOG,
        "doc": "See docs/ops/SAUDI_DATA_SOURCE_CATALOG.md for ingestion strategy per source.",
    }


# ── Import: register a dataset ────────────────────────────────────
@router.post("/import")
async def create_import(body: dict[str, Any] = Body(...)) -> dict[str, Any]:
    """
    Register a dataset. Body:
        source_name (required, str)
        source_type (required, one of: owned/public/paid/partner/google_maps/google_search/manual)
        allowed_use (optional, str — defaults to "business_contact_research_only")
        consent_status (optional)
        risk_level (optional, low/medium/high)
        rows (required, list[dict] — raw records, can be loose schema)
        file_name (optional)
        imported_by (optional)
        notes (optional)
    """
    source_name = str(body.get("source_name") or "").strip()
    source_type = str(body.get("source_type") or "").strip()
    rows = body.get("rows")
    if not source_name:
        raise HTTPException(400, "source_name_required")
    if source_type not in {
        "owned", "public", "paid", "partner",
        "google_maps", "google_search", "manual",
    }:
        raise HTTPException(400, "source_type_invalid")
    if not isinstance(rows, list) or not rows:
        raise HTTPException(400, "rows_required: provide a non-empty list of dicts")
    if len(rows) > 10000:
        raise HTTPException(400, "too_many_rows: max 10000 per import; split into batches")

    import_id = _new_id("imp_")

    rec = RawLeadImport(
        id=import_id,
        source_name=source_name,
        source_type=source_type,
        file_name=body.get("file_name"),
        imported_by=body.get("imported_by"),
        allowed_use=str(body.get("allowed_use") or "business_contact_research_only"),
        consent_status=str(body.get("consent_status") or "unknown"),
        risk_level=str(body.get("risk_level") or "medium"),
        rows_total=len(rows),
        notes=body.get("notes"),
        status="raw",
    )
    raw_rows = [
        RawLeadRow(
            id=_new_id("rr_"),
            import_id=import_id,
            raw_json=r if isinstance(r, dict) else {"value": r},
            normalized_status="pending",
        )
        for r in rows
    ]

    async with async_session_factory() as session:
        ok = await _safe_commit(session, rec, *raw_rows)
        if not ok:
            return {
                "import_id": import_id,
                "status": "skipped_db_unreachable",
                "rows_total": len(rows),
            }

    return {
        "import_id": import_id,
        "status": "raw",
        "rows_total": len(rows),
        "next_action": f"POST /api/v1/data/import/{import_id}/normalize",
    }


# ── Normalize ─────────────────────────────────────────────────────
@router.post("/import/{import_id}/normalize")
async def normalize_import(import_id: str) -> dict[str, Any]:
    async with async_session_factory() as session:
        try:
            imp_rec = (await session.execute(
                select(RawLeadImport).where(RawLeadImport.id == import_id)
            )).scalar_one_or_none()
        except Exception as exc:  # noqa: BLE001
            return {"status": "skipped_db_unreachable", "error": str(exc)}
        if not imp_rec:
            raise HTTPException(404, "import_not_found")

        rows = (await session.execute(
            select(RawLeadRow).where(RawLeadRow.import_id == import_id)
        )).scalars().all()

        normalized_count = 0
        rejected_count = 0
        accounts_created: list[str] = []

        for row in rows:
            if row.normalized_status != "pending":
                continue
            try:
                normalized = normalize_row(row.raw_json or {})
            except Exception as exc:  # noqa: BLE001
                row.normalized_status = "rejected"
                row.error = f"normalize_error: {exc}"
                rejected_count += 1
                continue

            ok, reason = is_acceptable(normalized)
            if not ok:
                row.normalized_status = "rejected"
                row.error = reason or "unacceptable"
                rejected_count += 1
                continue

            # Create AccountRecord stub (dedupe runs in next step)
            acc_id = _new_id("acc_")
            acc = AccountRecord(
                id=acc_id,
                company_name=normalized["company_name"][:255],
                normalized_name=normalized["normalized_name"][:255],
                domain=normalized["domain"],
                website=normalized["website"][:500] if normalized["website"] else None,
                city=normalized["city"][:128] if normalized["city"] else None,
                country=normalized["country"][:64] if normalized["country"] else "SA",
                sector=normalized["sector"][:64] if normalized["sector"] else None,
                google_place_id=normalized["google_place_id"][:128]
                if normalized["google_place_id"] else None,
                source_count=1,
                best_source=imp_rec.source_type,
                risk_level=imp_rec.risk_level,
                status="new",
                extra={
                    "import_id": import_id,
                    "source_url": normalized["source_url"],
                    "raw_keys": normalized["raw_keys"],
                    "allowed_use": imp_rec.allowed_use,
                    "consent_status": imp_rec.consent_status,
                },
            )
            session.add(acc)
            accounts_created.append(acc_id)

            # Optional contact
            if normalized["email"] or normalized["phone"] or normalized["contact_name"]:
                session.add(ContactRecord(
                    id=_new_id("ct_"),
                    account_id=acc_id,
                    name=normalized["contact_name"][:255] if normalized["contact_name"] else None,
                    role=normalized["role"][:128] if normalized["role"] else None,
                    email=normalized["email"][:255] if normalized["email"] else None,
                    phone=normalized["phone"][:32] if normalized["phone"] else None,
                    source=imp_rec.source_type,
                    consent_status=imp_rec.consent_status,
                    opt_out=False,
                    risk_level=imp_rec.risk_level,
                ))

            row.normalized_status = "ok"
            row.account_id = acc_id
            normalized_count += 1

        imp_rec.rows_normalized = normalized_count
        imp_rec.rows_rejected = rejected_count
        imp_rec.status = "normalized"
        imp_rec.updated_at = _utcnow()

        try:
            await session.commit()
        except Exception as exc:  # noqa: BLE001
            await session.rollback()
            return {"status": "commit_failed", "error": str(exc)}

    return {
        "import_id": import_id,
        "status": "normalized",
        "rows_normalized": normalized_count,
        "rows_rejected": rejected_count,
        "accounts_created": len(accounts_created),
        "next_action": f"POST /api/v1/data/import/{import_id}/dedupe",
    }


# ── Dedupe ────────────────────────────────────────────────────────
@router.post("/import/{import_id}/dedupe")
async def dedupe_import(import_id: str) -> dict[str, Any]:
    """Match accounts created by this import against the existing graph."""
    async with async_session_factory() as session:
        try:
            imp_rec = (await session.execute(
                select(RawLeadImport).where(RawLeadImport.id == import_id)
            )).scalar_one_or_none()
            if not imp_rec:
                raise HTTPException(404, "import_not_found")
            all_accounts = (await session.execute(select(AccountRecord))).scalars().all()
        except HTTPException:
            raise
        except Exception as exc:  # noqa: BLE001
            return {"status": "skipped_db_unreachable", "error": str(exc)}

        # Split into already-existing (from prior imports) vs this-import's new ones
        new_for_import = [a for a in all_accounts if (a.extra or {}).get("import_id") == import_id]
        existing = [a for a in all_accounts if a.id not in {n.id for n in new_for_import}]
        existing_dicts = [
            {
                "id": a.id, "company_name": a.company_name,
                "normalized_name": a.normalized_name, "domain": a.domain,
                "website": a.website, "city": a.city,
                "phone": None, "email": None,  # not on AccountRecord directly
                "google_place_id": a.google_place_id,
            }
            for a in existing
        ]
        idx = build_index(existing_dicts)

        merged_count = 0
        kept_count = 0
        for acc in new_for_import:
            normalized = {
                "company_name": acc.company_name,
                "normalized_name": acc.normalized_name,
                "domain": acc.domain,
                "phone": None,
                "email": None,
                "google_place_id": acc.google_place_id,
                "city": acc.city,
            }
            match_id, match_kind = find_match(normalized, idx)
            if match_id:
                # Merge: increment source_count on the canonical, mark this one as duplicate
                target = next((a for a in existing if a.id == match_id), None)
                if target is not None:
                    target.source_count = (target.source_count or 1) + 1
                    extra = dict(target.extra or {})
                    sources = list(extra.get("sources", []))
                    if imp_rec.source_type not in sources:
                        sources.append(imp_rec.source_type)
                        extra["sources"] = sources
                    target.extra = extra
                    acc.status = "merged_into"
                    acc.extra = {**(acc.extra or {}), "merged_into": match_id, "match_kind": match_kind}
                    merged_count += 1
            else:
                kept_count += 1

        imp_rec.rows_duplicate = merged_count
        imp_rec.status = "deduped"
        imp_rec.updated_at = _utcnow()

        try:
            await session.commit()
        except Exception as exc:  # noqa: BLE001
            await session.rollback()
            return {"status": "commit_failed", "error": str(exc)}

    return {
        "import_id": import_id,
        "status": "deduped",
        "merged": merged_count,
        "new_accounts": kept_count,
        "next_action": f"POST /api/v1/data/import/{import_id}/enrich",
    }


# ── Enrich ────────────────────────────────────────────────────────
@router.post("/import/{import_id}/enrich")
async def enrich_import(import_id: str, body: dict[str, Any] = Body(default={})) -> dict[str, Any]:
    """
    Run the enrichment pipeline against new accounts created by this import.

    Body:
        enrichment_level: basic / standard / deep (default: standard)
        max_accounts: int (default 25 — cap to avoid runaway API calls)
    """
    level = str(body.get("enrichment_level") or "standard")
    max_accounts = int(body.get("max_accounts") or 25)
    if max_accounts < 1 or max_accounts > 200:
        raise HTTPException(400, "max_accounts_out_of_range: 1..200")

    async with async_session_factory() as session:
        try:
            new_accounts = (await session.execute(
                select(AccountRecord).where(
                    AccountRecord.status == "new"
                ).limit(max_accounts)
            )).scalars().all()
        except Exception as exc:  # noqa: BLE001
            return {"status": "skipped_db_unreachable", "error": str(exc)}

        # Filter to accounts from this import
        from_this_import = [
            a for a in new_accounts
            if (a.extra or {}).get("import_id") == import_id
        ]

        enriched = 0
        for acc in from_this_import:
            account_dict = {
                "id": acc.id,
                "company_name": acc.company_name,
                "domain": acc.domain,
                "website": acc.website,
                "city": acc.city,
                "country": acc.country,
                "sector": acc.sector,
                "google_place_id": acc.google_place_id,
                "best_source": acc.best_source,
                "source_type": acc.best_source,
                "allowed_use": (acc.extra or {}).get("allowed_use"),
                "risk_level": acc.risk_level,
            }
            try:
                result = await enrich_account(account_dict, enrichment_level=level)
            except Exception as exc:  # noqa: BLE001
                log.warning("enrich_failed acc=%s err=%s", acc.id, exc)
                continue

            # Persist signals
            for s in result.get("signals", []):
                session.add(SignalRecord(
                    id=_new_id("sig_"),
                    account_id=acc.id,
                    signal_type=str(s.get("signal_type") or "tech")[:64],
                    signal_value=str(s.get("signal_value") or "")[:500],
                    source_url=s.get("source_url"),
                    confidence=float(s.get("confidence") or 0.5),
                ))

            # Persist score
            sc = result.get("score") or {}
            session.add(LeadScoreRecord(
                id=_new_id("ls_"),
                account_id=acc.id,
                fit_score=float(sc.get("fit") or 0),
                intent_score=float(sc.get("intent") or 0),
                urgency_score=float(sc.get("urgency") or 0),
                risk_score=float(sc.get("risk") or 0),
                total_score=float(sc.get("total") or 0),
                priority=str(sc.get("priority") or "P3")[:8],
                recommended_channel=sc.get("recommended_channel"),
                reason=sc.get("reason"),
            ))

            # Update account with crawled domain + DQ score
            if result.get("domain") and not acc.domain:
                acc.domain = result["domain"]
                acc.website = f"https://{result['domain']}"
            acc.data_quality_score = float(result.get("data_quality", {}).get("score", 0))
            acc.status = "enriched"
            acc.updated_at = _utcnow()

            # Persist new contacts (avoid dup by email/phone)
            for c in result.get("contacts", []):
                if c.get("type") == "email":
                    session.add(ContactRecord(
                        id=_new_id("ct_"),
                        account_id=acc.id,
                        name=c.get("name"),
                        role=c.get("role"),
                        email=c.get("value"),
                        source=c.get("source") or "enrichment",
                        consent_status="legitimate_interest",
                        opt_out=False,
                        risk_level=acc.risk_level,
                    ))
                elif c.get("type") in ("phone", "whatsapp"):
                    session.add(ContactRecord(
                        id=_new_id("ct_"),
                        account_id=acc.id,
                        name=None,
                        role=None,
                        phone=c.get("value"),
                        source=c.get("source") or "enrichment",
                        consent_status="legitimate_interest",
                        opt_out=False,
                        risk_level=acc.risk_level,
                    ))

            enriched += 1

        try:
            await session.commit()
        except Exception as exc:  # noqa: BLE001
            await session.rollback()
            return {"status": "commit_failed", "error": str(exc)}

    return {
        "import_id": import_id,
        "status": "enriched",
        "accounts_enriched": enriched,
        "level": level,
        "next_action": f"GET /api/v1/data/import/{import_id}/report",
    }


# ── Report ────────────────────────────────────────────────────────
@router.get("/import/{import_id}/report")
async def import_report(import_id: str) -> dict[str, Any]:
    async with async_session_factory() as session:
        try:
            imp = (await session.execute(
                select(RawLeadImport).where(RawLeadImport.id == import_id)
            )).scalar_one_or_none()
            if not imp:
                raise HTTPException(404, "import_not_found")
            accounts = (await session.execute(select(AccountRecord))).scalars().all()
        except HTTPException:
            raise
        except Exception as exc:  # noqa: BLE001
            return {"status": "skipped_db_unreachable", "error": str(exc)}

        related = [a for a in accounts if (a.extra or {}).get("import_id") == import_id]
        priority_counts: dict[str, int] = {}
        # Collect latest scores per account
        try:
            scores = (await session.execute(
                select(LeadScoreRecord).where(
                    LeadScoreRecord.account_id.in_([a.id for a in related])
                )
            )).scalars().all()
            for s in scores:
                priority_counts[s.priority] = priority_counts.get(s.priority, 0) + 1
        except Exception:
            scores = []

    return {
        "import_id": import_id,
        "source_name": imp.source_name,
        "source_type": imp.source_type,
        "status": imp.status,
        "rows_total": imp.rows_total,
        "rows_normalized": imp.rows_normalized,
        "rows_rejected": imp.rows_rejected,
        "rows_duplicate": imp.rows_duplicate,
        "accounts_in_graph_from_this_import": len(related),
        "scored_accounts": len(scores),
        "priority_distribution": priority_counts,
        "allowed_use": imp.allowed_use,
        "consent_status": imp.consent_status,
        "risk_level": imp.risk_level,
    }


# ── Suppression list ──────────────────────────────────────────────
@router.post("/suppression")
async def add_suppression(body: dict[str, Any] = Body(...)) -> dict[str, Any]:
    """
    Add a suppression entry. At least one of email/phone/domain required.
    Body: {email?, phone?, domain?, reason?}
    """
    email = body.get("email")
    phone = body.get("phone")
    domain = body.get("domain")
    if not (email or phone or domain):
        raise HTTPException(400, "at_least_one_of_email_phone_domain_required")
    rec = SuppressionRecord(
        id=_new_id("sup_"),
        email=str(email).strip().lower() if email else None,
        phone=str(phone).strip() if phone else None,
        domain=str(domain).strip().lower() if domain else None,
        reason=str(body.get("reason") or "opt_out")[:128],
    )
    async with async_session_factory() as session:
        ok = await _safe_commit(session, rec)
    return {
        "id": rec.id,
        "email": rec.email, "phone": rec.phone, "domain": rec.domain,
        "reason": rec.reason,
        "status": "ok" if ok else "skipped_db_unreachable",
    }


@router.get("/suppression")
async def list_suppression(limit: int = 200) -> dict[str, Any]:
    async with async_session_factory() as session:
        try:
            rows = (await session.execute(
                select(SuppressionRecord).limit(min(1000, limit))
            )).scalars().all()
        except Exception as exc:  # noqa: BLE001
            return {"status": "skipped_db_unreachable", "error": str(exc), "items": []}
        return {
            "status": "ok",
            "count": len(rows),
            "items": [
                {
                    "id": r.id, "email": r.email, "phone": r.phone, "domain": r.domain,
                    "reason": r.reason, "created_at": r.created_at.isoformat(),
                }
                for r in rows
            ],
        }


# ── Listings ──────────────────────────────────────────────────────
@router.get("/imports")
async def list_imports(limit: int = 50) -> dict[str, Any]:
    async with async_session_factory() as session:
        try:
            rows = (await session.execute(
                select(RawLeadImport).order_by(RawLeadImport.created_at.desc()).limit(min(500, limit))
            )).scalars().all()
        except Exception as exc:  # noqa: BLE001
            return {"status": "skipped_db_unreachable", "error": str(exc), "items": []}
        return {
            "count": len(rows),
            "items": [
                {
                    "id": r.id, "source_name": r.source_name, "source_type": r.source_type,
                    "status": r.status, "rows_total": r.rows_total,
                    "rows_normalized": r.rows_normalized, "rows_rejected": r.rows_rejected,
                    "rows_duplicate": r.rows_duplicate,
                    "risk_level": r.risk_level, "created_at": r.created_at.isoformat(),
                }
                for r in rows
            ],
        }


@router.get("/accounts")
async def list_accounts(
    limit: int = 50,
    sector: str | None = None,
    city: str | None = None,
    status: str | None = None,
    priority: str | None = None,
) -> dict[str, Any]:
    async with async_session_factory() as session:
        try:
            q = select(AccountRecord)
            if sector:
                q = q.where(AccountRecord.sector == sector)
            if city:
                q = q.where(AccountRecord.city == city)
            if status:
                q = q.where(AccountRecord.status == status)
            q = q.order_by(AccountRecord.data_quality_score.desc()).limit(min(500, limit))
            rows = (await session.execute(q)).scalars().all()

            score_map: dict[str, LeadScoreRecord] = {}
            if rows:
                ids = [r.id for r in rows]
                scores = (await session.execute(
                    select(LeadScoreRecord).where(LeadScoreRecord.account_id.in_(ids))
                )).scalars().all()
                for s in scores:
                    if s.account_id not in score_map or s.created_at > score_map[s.account_id].created_at:
                        score_map[s.account_id] = s
        except Exception as exc:  # noqa: BLE001
            return {"status": "skipped_db_unreachable", "error": str(exc), "items": []}

        items = []
        for a in rows:
            s = score_map.get(a.id)
            if priority and (not s or s.priority != priority):
                continue
            items.append({
                "id": a.id, "company_name": a.company_name, "domain": a.domain,
                "website": a.website, "city": a.city, "sector": a.sector,
                "google_place_id": a.google_place_id, "source_count": a.source_count,
                "best_source": a.best_source, "status": a.status,
                "data_quality_score": a.data_quality_score, "risk_level": a.risk_level,
                "score": {
                    "fit": s.fit_score, "intent": s.intent_score,
                    "total": s.total_score, "priority": s.priority,
                    "recommended_channel": s.recommended_channel,
                } if s else None,
            })
        return {"count": len(items), "items": items}


@router.get("/accounts/{account_id}")
async def get_account(account_id: str) -> dict[str, Any]:
    async with async_session_factory() as session:
        try:
            acc = (await session.execute(
                select(AccountRecord).where(AccountRecord.id == account_id)
            )).scalar_one_or_none()
            if not acc:
                raise HTTPException(404, "account_not_found")
            contacts = (await session.execute(
                select(ContactRecord).where(ContactRecord.account_id == account_id)
            )).scalars().all()
            signals = (await session.execute(
                select(SignalRecord).where(SignalRecord.account_id == account_id)
            )).scalars().all()
            scores = (await session.execute(
                select(LeadScoreRecord).where(
                    LeadScoreRecord.account_id == account_id
                ).order_by(LeadScoreRecord.created_at.desc()).limit(1)
            )).scalars().all()
        except HTTPException:
            raise
        except Exception as exc:  # noqa: BLE001
            return {"status": "skipped_db_unreachable", "error": str(exc)}

        latest = scores[0] if scores else None
        return {
            "account": {
                "id": acc.id, "company_name": acc.company_name,
                "domain": acc.domain, "website": acc.website,
                "city": acc.city, "country": acc.country, "sector": acc.sector,
                "google_place_id": acc.google_place_id,
                "source_count": acc.source_count, "best_source": acc.best_source,
                "status": acc.status, "data_quality_score": acc.data_quality_score,
                "risk_level": acc.risk_level, "extra": acc.extra,
                "created_at": acc.created_at.isoformat(),
                "updated_at": acc.updated_at.isoformat(),
            },
            "contacts": [
                {"id": c.id, "name": c.name, "role": c.role, "email": c.email,
                 "phone": c.phone, "source": c.source, "consent_status": c.consent_status,
                 "opt_out": c.opt_out, "risk_level": c.risk_level}
                for c in contacts
            ],
            "signals": [
                {"id": s.id, "type": s.signal_type, "value": s.signal_value,
                 "source_url": s.source_url, "confidence": s.confidence,
                 "detected_at": s.detected_at.isoformat()}
                for s in signals
            ],
            "score": {
                "fit": latest.fit_score, "intent": latest.intent_score,
                "urgency": latest.urgency_score, "risk": latest.risk_score,
                "total": latest.total_score, "priority": latest.priority,
                "recommended_channel": latest.recommended_channel, "reason": latest.reason,
            } if latest else None,
        }


@router.post("/accounts/{account_id}/score")
async def score_account(account_id: str) -> dict[str, Any]:
    """Recompute score from current data in the graph."""
    async with async_session_factory() as session:
        try:
            acc = (await session.execute(
                select(AccountRecord).where(AccountRecord.id == account_id)
            )).scalar_one_or_none()
            if not acc:
                raise HTTPException(404, "account_not_found")
            contacts = (await session.execute(
                select(ContactRecord).where(ContactRecord.account_id == account_id)
            )).scalars().all()
            signals = (await session.execute(
                select(SignalRecord).where(SignalRecord.account_id == account_id)
            )).scalars().all()
        except HTTPException:
            raise
        except Exception as exc:  # noqa: BLE001
            return {"status": "skipped_db_unreachable", "error": str(exc)}

        first_email = next((c.email for c in contacts if c.email), None)
        first_phone = next((c.phone for c in contacts if c.phone), None)

        account_dict = {
            "id": acc.id, "company_name": acc.company_name, "domain": acc.domain,
            "website": acc.website, "city": acc.city, "country": acc.country,
            "sector": acc.sector, "google_place_id": acc.google_place_id,
            "best_source": acc.best_source, "source_count": acc.source_count,
            "risk_level": acc.risk_level, "email": first_email, "phone": first_phone,
            "allowed_use": (acc.extra or {}).get("allowed_use"),
            "opt_out": any(c.opt_out for c in contacts),
            "signals": signals,
        }
        sig_dicts = [
            {"signal_type": s.signal_type, "signal_value": s.signal_value,
             "confidence": s.confidence}
            for s in signals
        ]
        sb = compute_lead_score(account_dict, signals=sig_dicts, technologies=[])
        dq, _reasons = compute_data_quality(account_dict)

        rec = LeadScoreRecord(
            id=_new_id("ls_"),
            account_id=account_id,
            fit_score=sb.fit, intent_score=sb.intent, urgency_score=sb.urgency,
            risk_score=sb.risk, total_score=sb.total, priority=sb.priority,
            recommended_channel=sb.recommended_channel, reason=sb.reason,
        )
        acc.data_quality_score = dq
        acc.updated_at = _utcnow()
        ok = await _safe_commit(session, rec)
        if not ok:
            return {"status": "commit_failed"}

    return {
        "account_id": account_id,
        "score": {
            "fit": sb.fit, "intent": sb.intent, "urgency": sb.urgency,
            "risk": sb.risk, "total": sb.total, "priority": sb.priority,
            "recommended_channel": sb.recommended_channel, "reason": sb.reason,
        },
        "data_quality_score": dq,
    }