system-prompts-and-models-o.../dealix/scripts/seed_demo_data.py
2026-05-01 14:03:52 +03:00

356 lines
16 KiB
Python

#!/usr/bin/env python3
"""
seed_demo_data.py — populate a fresh Dealix Postgres with realistic Saudi
demo data so dashboards look alive on first deploy.
Inserts (idempotent — uses fixed IDs):
- 10 sample AccountRecord rows across 5 sectors
- 12 ContactRecord rows (mix of business + personal emails)
- 18 SignalRecord rows (whatsapp/forms/booking/CRM)
- 10 LeadScoreRecord rows
- 6 GmailDraftRecord rows
- 4 LinkedInDraftRecord rows
- 8 EmailSendLog rows (mix of sent/replied/bounced)
- 3 SuppressionRecord rows
- 1 RawLeadImport with 12 RawLeadRow
Use:
python scripts/seed_demo_data.py # local DB (DATABASE_URL)
DEALIX_DEMO_PURGE=true python scripts/seed_demo_data.py # purges then reseeds
Safety: only deletes rows whose id starts with "demo_" — never touches real data.
"""
from __future__ import annotations
import asyncio
import os
import sys
from datetime import datetime, timedelta, timezone
from pathlib import Path
# Make src importable when running from project root
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
from sqlalchemy import select
# These imports will fail without proper env — that's intentional
try:
from db.session import async_session_factory, init_db
from db.models import (
AccountRecord, ContactRecord, SignalRecord, LeadScoreRecord,
GmailDraftRecord, LinkedInDraftRecord, EmailSendLog,
SuppressionRecord, RawLeadImport, RawLeadRow,
)
except ImportError as e:
print(f"ERROR: cannot import models. Run from project root with DATABASE_URL set.\n {e}")
sys.exit(2)
def _utcnow() -> datetime:
return datetime.now(timezone.utc).replace(tzinfo=None)
SECTORS = [
("real_estate_developer", "تطوير عقاري"),
("hospitality", "فنادق وقاعات"),
("logistics", "شحن ولوجستيات"),
("training_center", "مراكز تدريب"),
("marketing_agency", "وكالة تسويق"),
]
SAMPLE_ACCOUNTS = [
("demo_acc_001", "شركة الراجحي للتطوير العقاري", "alrajhi-development.sa", "Riyadh", "real_estate_developer", 85),
("demo_acc_002", "شركة الإعمار للمشاريع", "emaar-projects.sa", "Riyadh", "real_estate_developer", 78),
("demo_acc_003", "فندق برج الرياض", "burjriyadh.com", "Riyadh", "hospitality", 72),
("demo_acc_004", "قاعة الياسمين للحفلات", "yasmin-events.sa", "Jeddah", "hospitality", 68),
("demo_acc_005", "شركة خليج الشحن السعودي", "gulf-shipping.sa", "Dammam", "logistics", 75),
("demo_acc_006", "نقل الإمداد السريع", "fastlogistics.sa", "Jeddah", "logistics", 70),
("demo_acc_007", "أكاديمية المعرفة للتدريب", "knowledge-academy.sa", "Riyadh", "training_center", 65),
("demo_acc_008", "مركز تطوير القيادات", "leadership-saudi.sa", "Riyadh", "training_center", 62),
("demo_acc_009", "وكالة فعل التسويق الرقمي", "fail-marketing.sa", "Riyadh", "marketing_agency", 80),
("demo_acc_010", "وكالة Rafal للإبداع", "rafal-creative.sa", "Jeddah", "marketing_agency", 76),
]
SAMPLE_CONTACTS = [
("demo_ct_001", "demo_acc_001", "عبدالله الزهراني", "Sales Director", "abdullah@alrajhi-development.sa", "+966500001001"),
("demo_ct_002", "demo_acc_001", "منى السبيعي", "Marketing Lead", "mona@alrajhi-development.sa", "+966500001002"),
("demo_ct_003", "demo_acc_002", "خالد العتيبي", "Founder", "khalid@emaar-projects.sa", "+966500001003"),
("demo_ct_004", "demo_acc_003", "Mr. Ahmed", "GM", "gm@burjriyadh.com", "+966500001004"),
("demo_ct_005", "demo_acc_004", "نوال الحربي", "Events Manager", "nawal@yasmin-events.sa", "+966500001005"),
("demo_ct_006", "demo_acc_005", "Ali Al-Qahtani", "Operations", "ali@gulf-shipping.sa", "+966500001006"),
("demo_ct_007", "demo_acc_006", "Sultan Al-Mutairi", "Sales", "sultan@fastlogistics.sa", "+966500001007"),
("demo_ct_008", "demo_acc_007", "Layla Al-Ghamdi", "Director", "layla@knowledge-academy.sa", "+966500001008"),
("demo_ct_009", "demo_acc_009", "Yousef Al-Dosari", "CEO", "yousef@fail-marketing.sa", "+966500001009"),
("demo_ct_010", "demo_acc_010", "Rana Khaled", "Partner", "rana@rafal-creative.sa", "+966500001010"),
# 2 personal-email demos (will be demoted to phone-only at outreach time)
("demo_ct_011", "demo_acc_006", "Saad Personal", None, "saad.personal@gmail.com", "+966500001011"),
("demo_ct_012", "demo_acc_008", "Trainer", None, "trainer.demo@hotmail.com", "+966500001012"),
]
SAMPLE_SIGNALS = [
("demo_sig_001", "demo_acc_001", "whatsapp_button", "wa.me/966500001001", 0.9),
("demo_sig_002", "demo_acc_001", "website_form", "/contact", 0.85),
("demo_sig_003", "demo_acc_001", "booking_link", "/معاينة", 0.8),
("demo_sig_004", "demo_acc_001", "high_review_count", "127", 0.75),
("demo_sig_005", "demo_acc_002", "website_form", "/quote-request", 0.85),
("demo_sig_006", "demo_acc_002", "careers_hiring", "/careers", 0.7),
("demo_sig_007", "demo_acc_003", "booking_link", "/reservations", 0.9),
("demo_sig_008", "demo_acc_003", "high_rating", "4.6", 0.7),
("demo_sig_009", "demo_acc_004", "whatsapp_button", "wa.me/966500001005", 0.85),
("demo_sig_010", "demo_acc_005", "website_form", "/rfq", 0.85),
("demo_sig_011", "demo_acc_005", "crm_in_use", "hubspot", 0.9),
("demo_sig_012", "demo_acc_006", "ads_pixel", "meta_pixel", 0.7),
("demo_sig_013", "demo_acc_007", "pricing_page", "/الباقات", 0.7),
("demo_sig_014", "demo_acc_008", "booking_link", "/تسجيل", 0.8),
("demo_sig_015", "demo_acc_009", "ecom_mena", "salla", 0.8),
("demo_sig_016", "demo_acc_010", "chat_widget", "intercom", 0.75),
("demo_sig_017", "demo_acc_001", "multi_branch", "3", 0.85),
("demo_sig_018", "demo_acc_003", "sector_urgency", "high:hospitality", 0.85),
]
async def _purge_demo() -> int:
"""Delete only rows whose ID starts with 'demo_'."""
count = 0
async with async_session_factory() as session:
for model in [
EmailSendLog, GmailDraftRecord, LinkedInDraftRecord,
LeadScoreRecord, SignalRecord, ContactRecord,
SuppressionRecord, RawLeadRow, RawLeadImport, AccountRecord,
]:
try:
rows = (await session.execute(
select(model).where(model.id.like("demo_%"))
)).scalars().all()
for r in rows:
await session.delete(r)
count += 1
except Exception as exc: # noqa: BLE001
print(f" warn: skip {model.__name__}: {exc}")
try:
await session.commit()
except Exception:
await session.rollback()
return count
async def main() -> int:
print("📦 Dealix demo data seeder")
print(f" DATABASE_URL set: {bool(os.getenv('DATABASE_URL'))}")
# Try init_db (creates tables if not exist)
try:
await init_db()
print(" ✓ DB tables ensured")
except Exception as exc: # noqa: BLE001
print(f" ✗ init_db failed: {exc}")
return 2
if os.getenv("DEALIX_DEMO_PURGE", "").lower() in {"true", "1", "yes"}:
print("\n🗑️ purging existing demo_* rows...")
purged = await _purge_demo()
print(f" purged {purged} rows")
now = _utcnow()
async with async_session_factory() as session:
# Accounts
for aid, name, domain, city, sector, dq in SAMPLE_ACCOUNTS:
existing = (await session.execute(
select(AccountRecord).where(AccountRecord.id == aid)
)).scalar_one_or_none()
if existing:
continue
session.add(AccountRecord(
id=aid, company_name=name,
normalized_name=name.lower().replace(" ", "")[:120],
domain=domain, website=f"https://{domain}",
city=city, country="SA", sector=sector,
google_place_id=f"ChIJ_demo_{aid[-3:]}",
source_count=2, best_source="seed_demo",
risk_level="medium", status="enriched",
data_quality_score=float(dq),
extra={
"allowed_use": "business_contact_research_only",
"consent_status": "legitimate_interest_business_directory",
"is_demo": True,
},
))
print(f" ✓ accounts: {len(SAMPLE_ACCOUNTS)} prepared")
# Contacts
for cid, acc_id, name, role, email, phone in SAMPLE_CONTACTS:
existing = (await session.execute(
select(ContactRecord).where(ContactRecord.id == cid)
)).scalar_one_or_none()
if existing:
continue
is_personal = email and any(p in email.lower() for p in ["@gmail.com", "@hotmail.com"])
session.add(ContactRecord(
id=cid, account_id=acc_id,
name=name, role=role, email=email, phone=phone,
source="seed_demo",
consent_status="legitimate_interest",
opt_out=False,
risk_level="high" if is_personal else "medium",
))
print(f" ✓ contacts: {len(SAMPLE_CONTACTS)} prepared")
# Signals
for sid, acc_id, stype, sval, conf in SAMPLE_SIGNALS:
existing = (await session.execute(
select(SignalRecord).where(SignalRecord.id == sid)
)).scalar_one_or_none()
if existing:
continue
session.add(SignalRecord(
id=sid, account_id=acc_id,
signal_type=stype, signal_value=sval,
source_url=f"https://demo.example.sa/{sid}",
confidence=conf,
detected_at=now - timedelta(days=2),
))
print(f" ✓ signals: {len(SAMPLE_SIGNALS)} prepared")
# Lead scores (one per account)
for i, (aid, name, _, _, sector, _) in enumerate(SAMPLE_ACCOUNTS):
sid = f"demo_ls_{i+1:03d}"
existing = (await session.execute(
select(LeadScoreRecord).where(LeadScoreRecord.id == sid)
)).scalar_one_or_none()
if existing:
continue
fit = 25 + (i * 2) # 25-43
intent = 18 + (i * 1.5)
urgency = 15 + i
risk = 10
total = fit + intent + urgency + 8 - (risk * 0.1)
priority = "P0" if total >= 80 else "P1" if total >= 65 else "P2"
session.add(LeadScoreRecord(
id=sid, account_id=aid,
fit_score=fit, intent_score=intent, urgency_score=urgency,
risk_score=risk, total_score=total, priority=priority,
recommended_channel="phone_task" if i % 2 == 0 else "email_warm",
reason=f"demo: fit={fit} intent={intent}{priority}",
))
print(f" ✓ lead_scores: {len(SAMPLE_ACCOUNTS)} prepared")
# Gmail drafts (last 24h, status=created)
for i in range(6):
did = f"demo_gd_{i+1:03d}"
existing = (await session.execute(
select(GmailDraftRecord).where(GmailDraftRecord.id == did)
)).scalar_one_or_none()
if existing:
continue
acc = SAMPLE_ACCOUNTS[i]
ct = SAMPLE_CONTACTS[i]
session.add(GmailDraftRecord(
id=did, account_id=acc[0], queue_id=None,
to_email=ct[4], subject=f"Dealix — تجربة لـ {acc[1][:50]}",
body_plain=f"السلام عليكم،\n\nنرى لشركتكم فرصة في {acc[1]}. Pilot 7 أيام بـ 499 ريال — استرجاع كامل لو لم نرد على lead عربي.\n\nسامي\nDealix",
sender_email="sami@dealix.me", status="created",
))
print(" ✓ gmail_drafts: 6 prepared")
# LinkedIn drafts
for i in range(4):
did = f"demo_ld_{i+1:03d}"
existing = (await session.execute(
select(LinkedInDraftRecord).where(LinkedInDraftRecord.id == did)
)).scalar_one_or_none()
if existing:
continue
acc = SAMPLE_ACCOUNTS[i]
session.add(LinkedInDraftRecord(
id=did, account_id=acc[0],
company_name=acc[1][:200], contact_name=None,
profile_search_query=f'"{acc[1]}" {acc[3]} site:linkedin.com',
company_context=f"Saudi {acc[4]} in {acc[3]}",
reason_for_outreach="signal: high inbound + WhatsApp present",
message_ar=f"أهلاً [اسم المسؤول]، نقترح Pilot 499 ريال على {acc[1]}.",
followup_day_3="متابعة سريعة — هل أعرض على فريقكم؟",
followup_day_7="آخر متابعة. لو لاحقاً يناسب، أنا هنا.",
status="draft",
))
print(" ✓ linkedin_drafts: 4 prepared")
# Email send log (mix of sent/replied/bounced) — 3 days history
statuses = [
("sent", 2),
("sent", 1),
("replied", 0), # today reply
("replied", 1),
("bounced", 1),
("sent", 3),
("sent", 2),
("queued", 0),
]
for i, (status, days_ago) in enumerate(statuses):
lid = f"demo_es_{i+1:03d}"
existing = (await session.execute(
select(EmailSendLog).where(EmailSendLog.id == lid)
)).scalar_one_or_none()
if existing:
continue
acc = SAMPLE_ACCOUNTS[i % len(SAMPLE_ACCOUNTS)]
ct = SAMPLE_CONTACTS[i % len(SAMPLE_CONTACTS)]
sent_at = now - timedelta(days=days_ago, hours=i)
replied_at = (now - timedelta(hours=days_ago * 4)) if status == "replied" else None
classification = None
if status == "replied":
classification = ["interested", "ask_price", "ask_demo"][i % 3]
session.add(EmailSendLog(
id=lid, account_id=acc[0], queue_id=None,
to_email=ct[4], subject=f"Pilot Dealix — {acc[1][:40]}",
body_preview="نقترح Pilot 7 أيام...",
sender_email="sami@dealix.me",
status=status,
gmail_message_id=f"demo_msg_{i:04d}" if status != "queued" else None,
bounce_reason="bounced_test_only" if status == "bounced" else None,
reply_classification=classification,
reply_received_at=replied_at,
sent_at=sent_at if status != "queued" else None,
sequence_step=0,
compliance_check={"allowed": True, "blocked_reasons": []},
))
print(f" ✓ email_send_log: {len(statuses)} prepared")
# Suppression list
sup_rows = [
("demo_sup_001", "test-stop@example.com", None, None, "opt_out_via_reply"),
("demo_sup_002", None, "+966500099999", None, "manual_complaint"),
("demo_sup_003", None, None, "blocked-domain.com", "domain_blocked"),
]
for sid, email, phone, domain, reason in sup_rows:
existing = (await session.execute(
select(SuppressionRecord).where(SuppressionRecord.id == sid)
)).scalar_one_or_none()
if existing:
continue
session.add(SuppressionRecord(
id=sid, email=email, phone=phone, domain=domain, reason=reason,
))
print(f" ✓ suppression: {len(sup_rows)} prepared")
# Commit
try:
await session.commit()
print("\n✅ all demo data committed")
except Exception as exc: # noqa: BLE001
await session.rollback()
print(f"\n❌ commit failed: {exc}")
return 3
print("\n📊 Verify with:")
print(" GET /api/v1/dashboard/dominance")
print(" GET /api/v1/data/accounts?limit=10")
print(" GET /api/v1/gmail/drafts/today")
print(" GET /api/v1/linkedin/drafts/today")
return 0
if __name__ == "__main__":
raise SystemExit(asyncio.run(main()))