"""Transcript parser — accepts Google Meet entries OR plain text."""

from __future__ import annotations

import re
from typing import Any


def parse_transcript_entries(entries: list[dict[str, Any]] | str) -> dict[str, Any]:
    """
    Normalize either:
      - a list of Google-Meet-shaped entries [{"participantId", "text", ...}], or
      - a plain string transcript with "Speaker: text" lines.

    Returns:
        {
          "speaker_turns": [{"speaker", "text"}],
          "speakers":      [str],
          "total_chars":   int,
          "total_turns":   int,
        }
    """
    speaker_turns: list[dict[str, str]] = []

    if isinstance(entries, str):
        for raw in entries.splitlines():
            line = raw.strip()
            if not line:
                continue
            m = re.match(r"^([^:]{1,40}):\s*(.+)$", line)
            if m:
                speaker_turns.append({"speaker": m.group(1).strip(),
                                      "text": m.group(2).strip()})
            else:
                speaker_turns.append({"speaker": "?", "text": line})
    else:
        for e in entries or []:
            speaker = (
                e.get("participant")
                or e.get("participantId")
                or e.get("speaker")
                or "?"
            )
            text = e.get("text") or e.get("content") or ""
            text = str(text).strip()
            if not text:
                continue
            speaker_turns.append({"speaker": str(speaker), "text": text})

    speakers = sorted({t["speaker"] for t in speaker_turns})
    total_chars = sum(len(t["text"]) for t in speaker_turns)
    return {
        "speaker_turns": speaker_turns,
        "speakers": speakers,
        "total_chars": total_chars,
        "total_turns": len(speaker_turns),
    }


def summarize_meeting(parsed: dict[str, Any]) -> dict[str, Any]:
    """
    Produce an Arabic summary skeleton from parsed turns.

    Deterministic; LLM-free for Phase D MVP.
    """
    turns = parsed.get("speaker_turns", [])
    speakers = parsed.get("speakers", [])

    # Extract a few candidate "topic" sentences: longest turns.
    sorted_by_len = sorted(turns, key=lambda t: -len(t["text"]))[:5]
    topic_lines = [t["text"][:200] for t in sorted_by_len]

    # Detect questions.
    questions: list[str] = []
    for t in turns:
        text = t["text"]
        if "؟" in text or text.rstrip().endswith("?"):
            questions.append(text[:200])
        if len(questions) >= 5:
            break

    return {
        "summary_ar": [
            f"شارك في الاجتماع {len(speakers)} متحدث.",
            f"إجمالي عدد الأدوار الكلامية: {parsed.get('total_turns', 0)}.",
            "أبرز نقاط النقاش (مرشحة آلياً، تحتاج مراجعة):",
            *[f"• {line}" for line in topic_lines],
        ],
        "speakers": speakers,
        "candidate_questions_ar": questions,
        "approval_required": True,
    }