system-prompts-and-models-o.../dealix/auto_client_acquisition/revenue_memory/retention.py
2026-05-01 14:03:52 +03:00

116 lines
3.9 KiB
Python

"""
Retention policy — PDPL-compliant data lifecycle for the event store.
Saudi PDPL requires retention to be limited to what's necessary for the
declared purpose. We separate events into 3 retention tiers:
- operational (90 days) — high-volume signals, opens, clicks
- business_record (3y) — leads, deals, customer events
- legal_hold (7y) — compliance events (consent, opt-out, DSR)
`apply_retention()` is the function the cron runs daily. It NEVER deletes
legal_hold events. Operational events get tombstoned (replaced with a
minimal stub event) — preserving the audit trail without keeping raw payload.
"""
from __future__ import annotations
from datetime import datetime, timedelta, timezone
from typing import Any
from auto_client_acquisition.revenue_memory.events import RevenueEvent
OPERATIONAL_TYPES: tuple[str, ...] = (
"message.opened",
"message.clicked",
"signal.detected",
"signal.expired",
)
LEGAL_HOLD_TYPES: tuple[str, ...] = (
"compliance.consent_recorded",
"compliance.opt_out_received",
"compliance.dsr_received",
"compliance.dsr_completed",
"compliance.blocked",
)
RETENTION_DAYS_OPERATIONAL = 90
RETENTION_DAYS_BUSINESS = 365 * 3
RETENTION_DAYS_LEGAL = 365 * 7
def classify_retention_tier(event_type: str) -> str:
if event_type in LEGAL_HOLD_TYPES:
return "legal_hold"
if event_type in OPERATIONAL_TYPES:
return "operational"
return "business_record"
def is_expired(event: RevenueEvent, *, now: datetime) -> bool:
"""Whether the event has passed its retention window.
Legal-hold events never expire — PDPL Article on lawful basis records
requires indefinite retention for opt-outs / consents / DSR receipts.
"""
tier = classify_retention_tier(event.event_type)
if tier == "legal_hold":
return False # never expires — preserves the audit trail forever
if tier == "operational":
days = RETENTION_DAYS_OPERATIONAL
else:
days = RETENTION_DAYS_BUSINESS
return (now - event.occurred_at).days > days
def tombstone_event(event: RevenueEvent) -> RevenueEvent:
"""Strip payload, keep envelope — preserves audit trail without PII."""
return RevenueEvent(
event_id=event.event_id,
event_type=f"{event.event_type}.tombstoned",
customer_id=event.customer_id,
occurred_at=event.occurred_at,
subject_type=event.subject_type,
subject_id=event.subject_id,
payload={"_tombstoned": True, "reason": "retention_policy"},
causation_id=event.causation_id,
correlation_id=event.correlation_id,
actor=event.actor,
)
def apply_retention(
events: list[RevenueEvent], *, now: datetime | None = None
) -> tuple[list[RevenueEvent], list[str]]:
"""
Apply retention policy. Returns (kept_or_tombstoned_events, removed_event_ids).
- legal_hold events: kept as-is, no expiry
- business_record events past 3y: removed
- operational events past 90d: tombstoned (envelope kept, payload stripped)
"""
n = now or datetime.now(timezone.utc).replace(tzinfo=None)
kept: list[RevenueEvent] = []
removed: list[str] = []
for e in events:
tier = classify_retention_tier(e.event_type)
if tier == "legal_hold":
kept.append(e)
continue
if not is_expired(e, now=n):
kept.append(e)
continue
if tier == "operational":
kept.append(tombstone_event(e))
else:
# business_record past retention → physical delete
removed.append(e.event_id)
return kept, removed
def retention_summary(events: list[RevenueEvent]) -> dict[str, int]:
"""For the Trust Center display — how many events in each tier."""
out: dict[str, int] = {"operational": 0, "business_record": 0, "legal_hold": 0}
for e in events:
out[classify_retention_tier(e.event_type)] += 1
return out