mirror of
https://github.com/x1xhlol/system-prompts-and-models-of-ai-tools.git
synced 2026-06-18 15:29:36 +00:00
356 lines
16 KiB
Python
356 lines
16 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
seed_demo_data.py — populate a fresh Dealix Postgres with realistic Saudi
|
|
demo data so dashboards look alive on first deploy.
|
|
|
|
Inserts (idempotent — uses fixed IDs):
|
|
- 10 sample AccountRecord rows across 5 sectors
|
|
- 12 ContactRecord rows (mix of business + personal emails)
|
|
- 18 SignalRecord rows (whatsapp/forms/booking/CRM)
|
|
- 10 LeadScoreRecord rows
|
|
- 6 GmailDraftRecord rows
|
|
- 4 LinkedInDraftRecord rows
|
|
- 8 EmailSendLog rows (mix of sent/replied/bounced)
|
|
- 3 SuppressionRecord rows
|
|
- 1 RawLeadImport with 12 RawLeadRow
|
|
|
|
Use:
|
|
python scripts/seed_demo_data.py # local DB (DATABASE_URL)
|
|
DEALIX_DEMO_PURGE=true python scripts/seed_demo_data.py # purges then reseeds
|
|
|
|
Safety: only deletes rows whose id starts with "demo_" — never touches real data.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import os
|
|
import sys
|
|
from datetime import datetime, timedelta, timezone
|
|
from pathlib import Path
|
|
|
|
# Make src importable when running from project root
|
|
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
|
|
|
from sqlalchemy import select
|
|
|
|
# These imports will fail without proper env — that's intentional
|
|
try:
|
|
from db.session import async_session_factory, init_db
|
|
from db.models import (
|
|
AccountRecord, ContactRecord, SignalRecord, LeadScoreRecord,
|
|
GmailDraftRecord, LinkedInDraftRecord, EmailSendLog,
|
|
SuppressionRecord, RawLeadImport, RawLeadRow,
|
|
)
|
|
except ImportError as e:
|
|
print(f"ERROR: cannot import models. Run from project root with DATABASE_URL set.\n {e}")
|
|
sys.exit(2)
|
|
|
|
|
|
def _utcnow() -> datetime:
|
|
return datetime.now(timezone.utc).replace(tzinfo=None)
|
|
|
|
|
|
SECTORS = [
|
|
("real_estate_developer", "تطوير عقاري"),
|
|
("hospitality", "فنادق وقاعات"),
|
|
("logistics", "شحن ولوجستيات"),
|
|
("training_center", "مراكز تدريب"),
|
|
("marketing_agency", "وكالة تسويق"),
|
|
]
|
|
|
|
SAMPLE_ACCOUNTS = [
|
|
("demo_acc_001", "شركة الراجحي للتطوير العقاري", "alrajhi-development.sa", "Riyadh", "real_estate_developer", 85),
|
|
("demo_acc_002", "شركة الإعمار للمشاريع", "emaar-projects.sa", "Riyadh", "real_estate_developer", 78),
|
|
("demo_acc_003", "فندق برج الرياض", "burjriyadh.com", "Riyadh", "hospitality", 72),
|
|
("demo_acc_004", "قاعة الياسمين للحفلات", "yasmin-events.sa", "Jeddah", "hospitality", 68),
|
|
("demo_acc_005", "شركة خليج الشحن السعودي", "gulf-shipping.sa", "Dammam", "logistics", 75),
|
|
("demo_acc_006", "نقل الإمداد السريع", "fastlogistics.sa", "Jeddah", "logistics", 70),
|
|
("demo_acc_007", "أكاديمية المعرفة للتدريب", "knowledge-academy.sa", "Riyadh", "training_center", 65),
|
|
("demo_acc_008", "مركز تطوير القيادات", "leadership-saudi.sa", "Riyadh", "training_center", 62),
|
|
("demo_acc_009", "وكالة فعل التسويق الرقمي", "fail-marketing.sa", "Riyadh", "marketing_agency", 80),
|
|
("demo_acc_010", "وكالة Rafal للإبداع", "rafal-creative.sa", "Jeddah", "marketing_agency", 76),
|
|
]
|
|
|
|
SAMPLE_CONTACTS = [
|
|
("demo_ct_001", "demo_acc_001", "عبدالله الزهراني", "Sales Director", "abdullah@alrajhi-development.sa", "+966500001001"),
|
|
("demo_ct_002", "demo_acc_001", "منى السبيعي", "Marketing Lead", "mona@alrajhi-development.sa", "+966500001002"),
|
|
("demo_ct_003", "demo_acc_002", "خالد العتيبي", "Founder", "khalid@emaar-projects.sa", "+966500001003"),
|
|
("demo_ct_004", "demo_acc_003", "Mr. Ahmed", "GM", "gm@burjriyadh.com", "+966500001004"),
|
|
("demo_ct_005", "demo_acc_004", "نوال الحربي", "Events Manager", "nawal@yasmin-events.sa", "+966500001005"),
|
|
("demo_ct_006", "demo_acc_005", "Ali Al-Qahtani", "Operations", "ali@gulf-shipping.sa", "+966500001006"),
|
|
("demo_ct_007", "demo_acc_006", "Sultan Al-Mutairi", "Sales", "sultan@fastlogistics.sa", "+966500001007"),
|
|
("demo_ct_008", "demo_acc_007", "Layla Al-Ghamdi", "Director", "layla@knowledge-academy.sa", "+966500001008"),
|
|
("demo_ct_009", "demo_acc_009", "Yousef Al-Dosari", "CEO", "yousef@fail-marketing.sa", "+966500001009"),
|
|
("demo_ct_010", "demo_acc_010", "Rana Khaled", "Partner", "rana@rafal-creative.sa", "+966500001010"),
|
|
# 2 personal-email demos (will be demoted to phone-only at outreach time)
|
|
("demo_ct_011", "demo_acc_006", "Saad Personal", None, "saad.personal@gmail.com", "+966500001011"),
|
|
("demo_ct_012", "demo_acc_008", "Trainer", None, "trainer.demo@hotmail.com", "+966500001012"),
|
|
]
|
|
|
|
SAMPLE_SIGNALS = [
|
|
("demo_sig_001", "demo_acc_001", "whatsapp_button", "wa.me/966500001001", 0.9),
|
|
("demo_sig_002", "demo_acc_001", "website_form", "/contact", 0.85),
|
|
("demo_sig_003", "demo_acc_001", "booking_link", "/معاينة", 0.8),
|
|
("demo_sig_004", "demo_acc_001", "high_review_count", "127", 0.75),
|
|
("demo_sig_005", "demo_acc_002", "website_form", "/quote-request", 0.85),
|
|
("demo_sig_006", "demo_acc_002", "careers_hiring", "/careers", 0.7),
|
|
("demo_sig_007", "demo_acc_003", "booking_link", "/reservations", 0.9),
|
|
("demo_sig_008", "demo_acc_003", "high_rating", "4.6", 0.7),
|
|
("demo_sig_009", "demo_acc_004", "whatsapp_button", "wa.me/966500001005", 0.85),
|
|
("demo_sig_010", "demo_acc_005", "website_form", "/rfq", 0.85),
|
|
("demo_sig_011", "demo_acc_005", "crm_in_use", "hubspot", 0.9),
|
|
("demo_sig_012", "demo_acc_006", "ads_pixel", "meta_pixel", 0.7),
|
|
("demo_sig_013", "demo_acc_007", "pricing_page", "/الباقات", 0.7),
|
|
("demo_sig_014", "demo_acc_008", "booking_link", "/تسجيل", 0.8),
|
|
("demo_sig_015", "demo_acc_009", "ecom_mena", "salla", 0.8),
|
|
("demo_sig_016", "demo_acc_010", "chat_widget", "intercom", 0.75),
|
|
("demo_sig_017", "demo_acc_001", "multi_branch", "3", 0.85),
|
|
("demo_sig_018", "demo_acc_003", "sector_urgency", "high:hospitality", 0.85),
|
|
]
|
|
|
|
|
|
async def _purge_demo() -> int:
|
|
"""Delete only rows whose ID starts with 'demo_'."""
|
|
count = 0
|
|
async with async_session_factory() as session:
|
|
for model in [
|
|
EmailSendLog, GmailDraftRecord, LinkedInDraftRecord,
|
|
LeadScoreRecord, SignalRecord, ContactRecord,
|
|
SuppressionRecord, RawLeadRow, RawLeadImport, AccountRecord,
|
|
]:
|
|
try:
|
|
rows = (await session.execute(
|
|
select(model).where(model.id.like("demo_%"))
|
|
)).scalars().all()
|
|
for r in rows:
|
|
await session.delete(r)
|
|
count += 1
|
|
except Exception as exc: # noqa: BLE001
|
|
print(f" warn: skip {model.__name__}: {exc}")
|
|
try:
|
|
await session.commit()
|
|
except Exception:
|
|
await session.rollback()
|
|
return count
|
|
|
|
|
|
async def main() -> int:
|
|
print("📦 Dealix demo data seeder")
|
|
print(f" DATABASE_URL set: {bool(os.getenv('DATABASE_URL'))}")
|
|
|
|
# Try init_db (creates tables if not exist)
|
|
try:
|
|
await init_db()
|
|
print(" ✓ DB tables ensured")
|
|
except Exception as exc: # noqa: BLE001
|
|
print(f" ✗ init_db failed: {exc}")
|
|
return 2
|
|
|
|
if os.getenv("DEALIX_DEMO_PURGE", "").lower() in {"true", "1", "yes"}:
|
|
print("\n🗑️ purging existing demo_* rows...")
|
|
purged = await _purge_demo()
|
|
print(f" purged {purged} rows")
|
|
|
|
now = _utcnow()
|
|
|
|
async with async_session_factory() as session:
|
|
# Accounts
|
|
for aid, name, domain, city, sector, dq in SAMPLE_ACCOUNTS:
|
|
existing = (await session.execute(
|
|
select(AccountRecord).where(AccountRecord.id == aid)
|
|
)).scalar_one_or_none()
|
|
if existing:
|
|
continue
|
|
session.add(AccountRecord(
|
|
id=aid, company_name=name,
|
|
normalized_name=name.lower().replace(" ", "")[:120],
|
|
domain=domain, website=f"https://{domain}",
|
|
city=city, country="SA", sector=sector,
|
|
google_place_id=f"ChIJ_demo_{aid[-3:]}",
|
|
source_count=2, best_source="seed_demo",
|
|
risk_level="medium", status="enriched",
|
|
data_quality_score=float(dq),
|
|
extra={
|
|
"allowed_use": "business_contact_research_only",
|
|
"consent_status": "legitimate_interest_business_directory",
|
|
"is_demo": True,
|
|
},
|
|
))
|
|
print(f" ✓ accounts: {len(SAMPLE_ACCOUNTS)} prepared")
|
|
|
|
# Contacts
|
|
for cid, acc_id, name, role, email, phone in SAMPLE_CONTACTS:
|
|
existing = (await session.execute(
|
|
select(ContactRecord).where(ContactRecord.id == cid)
|
|
)).scalar_one_or_none()
|
|
if existing:
|
|
continue
|
|
is_personal = email and any(p in email.lower() for p in ["@gmail.com", "@hotmail.com"])
|
|
session.add(ContactRecord(
|
|
id=cid, account_id=acc_id,
|
|
name=name, role=role, email=email, phone=phone,
|
|
source="seed_demo",
|
|
consent_status="legitimate_interest",
|
|
opt_out=False,
|
|
risk_level="high" if is_personal else "medium",
|
|
))
|
|
print(f" ✓ contacts: {len(SAMPLE_CONTACTS)} prepared")
|
|
|
|
# Signals
|
|
for sid, acc_id, stype, sval, conf in SAMPLE_SIGNALS:
|
|
existing = (await session.execute(
|
|
select(SignalRecord).where(SignalRecord.id == sid)
|
|
)).scalar_one_or_none()
|
|
if existing:
|
|
continue
|
|
session.add(SignalRecord(
|
|
id=sid, account_id=acc_id,
|
|
signal_type=stype, signal_value=sval,
|
|
source_url=f"https://demo.example.sa/{sid}",
|
|
confidence=conf,
|
|
detected_at=now - timedelta(days=2),
|
|
))
|
|
print(f" ✓ signals: {len(SAMPLE_SIGNALS)} prepared")
|
|
|
|
# Lead scores (one per account)
|
|
for i, (aid, name, _, _, sector, _) in enumerate(SAMPLE_ACCOUNTS):
|
|
sid = f"demo_ls_{i+1:03d}"
|
|
existing = (await session.execute(
|
|
select(LeadScoreRecord).where(LeadScoreRecord.id == sid)
|
|
)).scalar_one_or_none()
|
|
if existing:
|
|
continue
|
|
fit = 25 + (i * 2) # 25-43
|
|
intent = 18 + (i * 1.5)
|
|
urgency = 15 + i
|
|
risk = 10
|
|
total = fit + intent + urgency + 8 - (risk * 0.1)
|
|
priority = "P0" if total >= 80 else "P1" if total >= 65 else "P2"
|
|
session.add(LeadScoreRecord(
|
|
id=sid, account_id=aid,
|
|
fit_score=fit, intent_score=intent, urgency_score=urgency,
|
|
risk_score=risk, total_score=total, priority=priority,
|
|
recommended_channel="phone_task" if i % 2 == 0 else "email_warm",
|
|
reason=f"demo: fit={fit} intent={intent} → {priority}",
|
|
))
|
|
print(f" ✓ lead_scores: {len(SAMPLE_ACCOUNTS)} prepared")
|
|
|
|
# Gmail drafts (last 24h, status=created)
|
|
for i in range(6):
|
|
did = f"demo_gd_{i+1:03d}"
|
|
existing = (await session.execute(
|
|
select(GmailDraftRecord).where(GmailDraftRecord.id == did)
|
|
)).scalar_one_or_none()
|
|
if existing:
|
|
continue
|
|
acc = SAMPLE_ACCOUNTS[i]
|
|
ct = SAMPLE_CONTACTS[i]
|
|
session.add(GmailDraftRecord(
|
|
id=did, account_id=acc[0], queue_id=None,
|
|
to_email=ct[4], subject=f"Dealix — تجربة لـ {acc[1][:50]}",
|
|
body_plain=f"السلام عليكم،\n\nنرى لشركتكم فرصة في {acc[1]}. Pilot 7 أيام بـ 499 ريال — استرجاع كامل لو لم نرد على lead عربي.\n\nسامي\nDealix",
|
|
sender_email="sami@dealix.me", status="created",
|
|
))
|
|
print(" ✓ gmail_drafts: 6 prepared")
|
|
|
|
# LinkedIn drafts
|
|
for i in range(4):
|
|
did = f"demo_ld_{i+1:03d}"
|
|
existing = (await session.execute(
|
|
select(LinkedInDraftRecord).where(LinkedInDraftRecord.id == did)
|
|
)).scalar_one_or_none()
|
|
if existing:
|
|
continue
|
|
acc = SAMPLE_ACCOUNTS[i]
|
|
session.add(LinkedInDraftRecord(
|
|
id=did, account_id=acc[0],
|
|
company_name=acc[1][:200], contact_name=None,
|
|
profile_search_query=f'"{acc[1]}" {acc[3]} site:linkedin.com',
|
|
company_context=f"Saudi {acc[4]} in {acc[3]}",
|
|
reason_for_outreach="signal: high inbound + WhatsApp present",
|
|
message_ar=f"أهلاً [اسم المسؤول]، نقترح Pilot 499 ريال على {acc[1]}.",
|
|
followup_day_3="متابعة سريعة — هل أعرض على فريقكم؟",
|
|
followup_day_7="آخر متابعة. لو لاحقاً يناسب، أنا هنا.",
|
|
status="draft",
|
|
))
|
|
print(" ✓ linkedin_drafts: 4 prepared")
|
|
|
|
# Email send log (mix of sent/replied/bounced) — 3 days history
|
|
statuses = [
|
|
("sent", 2),
|
|
("sent", 1),
|
|
("replied", 0), # today reply
|
|
("replied", 1),
|
|
("bounced", 1),
|
|
("sent", 3),
|
|
("sent", 2),
|
|
("queued", 0),
|
|
]
|
|
for i, (status, days_ago) in enumerate(statuses):
|
|
lid = f"demo_es_{i+1:03d}"
|
|
existing = (await session.execute(
|
|
select(EmailSendLog).where(EmailSendLog.id == lid)
|
|
)).scalar_one_or_none()
|
|
if existing:
|
|
continue
|
|
acc = SAMPLE_ACCOUNTS[i % len(SAMPLE_ACCOUNTS)]
|
|
ct = SAMPLE_CONTACTS[i % len(SAMPLE_CONTACTS)]
|
|
sent_at = now - timedelta(days=days_ago, hours=i)
|
|
replied_at = (now - timedelta(hours=days_ago * 4)) if status == "replied" else None
|
|
classification = None
|
|
if status == "replied":
|
|
classification = ["interested", "ask_price", "ask_demo"][i % 3]
|
|
session.add(EmailSendLog(
|
|
id=lid, account_id=acc[0], queue_id=None,
|
|
to_email=ct[4], subject=f"Pilot Dealix — {acc[1][:40]}",
|
|
body_preview="نقترح Pilot 7 أيام...",
|
|
sender_email="sami@dealix.me",
|
|
status=status,
|
|
gmail_message_id=f"demo_msg_{i:04d}" if status != "queued" else None,
|
|
bounce_reason="bounced_test_only" if status == "bounced" else None,
|
|
reply_classification=classification,
|
|
reply_received_at=replied_at,
|
|
sent_at=sent_at if status != "queued" else None,
|
|
sequence_step=0,
|
|
compliance_check={"allowed": True, "blocked_reasons": []},
|
|
))
|
|
print(f" ✓ email_send_log: {len(statuses)} prepared")
|
|
|
|
# Suppression list
|
|
sup_rows = [
|
|
("demo_sup_001", "test-stop@example.com", None, None, "opt_out_via_reply"),
|
|
("demo_sup_002", None, "+966500099999", None, "manual_complaint"),
|
|
("demo_sup_003", None, None, "blocked-domain.com", "domain_blocked"),
|
|
]
|
|
for sid, email, phone, domain, reason in sup_rows:
|
|
existing = (await session.execute(
|
|
select(SuppressionRecord).where(SuppressionRecord.id == sid)
|
|
)).scalar_one_or_none()
|
|
if existing:
|
|
continue
|
|
session.add(SuppressionRecord(
|
|
id=sid, email=email, phone=phone, domain=domain, reason=reason,
|
|
))
|
|
print(f" ✓ suppression: {len(sup_rows)} prepared")
|
|
|
|
# Commit
|
|
try:
|
|
await session.commit()
|
|
print("\n✅ all demo data committed")
|
|
except Exception as exc: # noqa: BLE001
|
|
await session.rollback()
|
|
print(f"\n❌ commit failed: {exc}")
|
|
return 3
|
|
|
|
print("\n📊 Verify with:")
|
|
print(" GET /api/v1/dashboard/dominance")
|
|
print(" GET /api/v1/data/accounts?limit=10")
|
|
print(" GET /api/v1/gmail/drafts/today")
|
|
print(" GET /api/v1/linkedin/drafts/today")
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(asyncio.run(main()))
|