""" Saudi B2B Benchmarks Engine — anonymized cross-customer percentiles. Used by: 1. Subscriber dashboard — "your reply rate is at the 67th percentile in your sector" 2. Public Saudi B2B Pulse (lead magnet, monthly free report) 3. Sector Intelligence API — sells data insights to consultancies Privacy: NEVER returns individual customer rows. Minimum 5 customers per sector before publishing a benchmark to prevent re-identification. Pure-function — takes pre-aggregated input, computes percentiles + insights. """ from __future__ import annotations from dataclasses import asdict, dataclass, field from typing import Any MIN_COHORT_SIZE = 5 # Privacy guarantee: never publish below this @dataclass class SectorBenchmark: sector: str cohort_size: int # n customers metric: str # reply_rate / response_time / conversion_rate / etc. p25: float p50: float p75: float p90: float sample_period_days: int def to_dict(self) -> dict[str, Any]: return asdict(self) @dataclass class CustomerComparison: customer_id: str sector: str metric: str customer_value: float sector_p50: float sector_p90: float customer_percentile: int # 0-100 insight: str def to_dict(self) -> dict[str, Any]: return asdict(self) def percentile(values: list[float], p: float) -> float: """Linear-interpolated percentile. p in [0, 100].""" if not values: return 0.0 sorted_v = sorted(values) if len(sorted_v) == 1: return sorted_v[0] rank = (p / 100) * (len(sorted_v) - 1) lower = int(rank) upper = min(lower + 1, len(sorted_v) - 1) frac = rank - lower return sorted_v[lower] + (sorted_v[upper] - sorted_v[lower]) * frac def compute_sector_benchmark( sector: str, metric: str, customer_values: list[float], sample_period_days: int = 30, ) -> SectorBenchmark | None: """ Returns a sector benchmark for `metric` if cohort >= MIN_COHORT_SIZE. Returns None if too few customers (privacy). """ if len(customer_values) < MIN_COHORT_SIZE: return None return SectorBenchmark( sector=sector, cohort_size=len(customer_values), metric=metric, p25=round(percentile(customer_values, 25), 2), p50=round(percentile(customer_values, 50), 2), p75=round(percentile(customer_values, 75), 2), p90=round(percentile(customer_values, 90), 2), sample_period_days=sample_period_days, ) def compare_customer( *, customer_id: str, sector: str, metric: str, customer_value: float, sector_values: list[float], ) -> CustomerComparison | None: """ Where does this customer rank in their sector cohort? Returns None if cohort too small. """ if len(sector_values) < MIN_COHORT_SIZE: return None sorted_v = sorted(sector_values) # rank = how many values are <= customer_value below = sum(1 for v in sorted_v if v <= customer_value) pct = round((below / len(sorted_v)) * 100) p50 = percentile(sector_values, 50) p90 = percentile(sector_values, 90) if pct >= 90: insight = f"Top 10% in {sector} — you're outperforming peers significantly" elif pct >= 75: insight = f"Top quartile in {sector} — strong performance" elif pct >= 50: insight = f"Above median in {sector} — solid ground" elif pct >= 25: insight = f"Bottom half in {sector} — opportunity to improve {metric}" else: insight = f"Bottom quartile in {sector} — review {metric} strategy with CSM" return CustomerComparison( customer_id=customer_id, sector=sector, metric=metric, customer_value=round(customer_value, 2), sector_p50=round(p50, 2), sector_p90=round(p90, 2), customer_percentile=pct, insight=insight, ) def saudi_b2b_pulse( *, sector_data: dict[str, dict[str, list[float]]], ) -> dict[str, Any]: """ Build the monthly free 'Saudi B2B Pulse' report. sector_data shape: { "real_estate": { "reply_rate": [4.5, 6.2, ...], "response_time_min": [12, 8, ...] }, ... } Returns publishable report (no individual customers, only percentiles). """ benchmarks: list[dict[str, Any]] = [] insights: list[str] = [] for sector, metrics in sector_data.items(): for metric, values in metrics.items(): bench = compute_sector_benchmark(sector, metric, values) if bench: benchmarks.append(bench.to_dict()) # Trend insights (high-level, non-identifying) sector_count = len(sector_data) insights.append(f"{sector_count} Saudi B2B sectors covered this month") # Find sector with best reply rate best_sector = None best_p50 = 0 for b in benchmarks: if b["metric"] == "reply_rate" and b["p50"] > best_p50: best_p50 = b["p50"] best_sector = b["sector"] if best_sector: insights.append( f"Best-performing sector by reply rate: {best_sector} (median {best_p50:.1f}%)" ) return { "report_name": "Saudi B2B Pulse", "month_summary": insights, "min_cohort_for_publication": MIN_COHORT_SIZE, "sectors_covered": sector_count, "benchmarks": benchmarks, "methodology": ( "Aggregated anonymized data from Dealix subscribers. Sectors with " f"fewer than {MIN_COHORT_SIZE} customers are excluded for privacy. " "Percentiles use linear interpolation. No individual customer data " "is exposed." ), }