mirror of
https://github.com/x1xhlol/system-prompts-and-models-of-ai-tools.git
synced 2026-06-18 15:29:36 +00:00
180 lines
6.2 KiB
Python
180 lines
6.2 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Dealix Prospect CLI — run tech detection + lead scoring on any domain or CSV.
|
|
|
|
Usage:
|
|
# Single domain
|
|
python scripts/dealix_prospect.py foodics.com
|
|
|
|
# Bulk from CSV (CSV must have a 'domain' column)
|
|
python scripts/dealix_prospect.py --csv leads.csv --out enriched.csv
|
|
|
|
# Bulk via live Dealix API
|
|
python scripts/dealix_prospect.py --api https://web-dealix.up.railway.app foodics.com salla.sa
|
|
|
|
Works offline (no API keys) — uses the Dealix native tech detector.
|
|
Outputs: JSON to stdout, or CSV with added columns if --out given.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import asyncio
|
|
import csv
|
|
import json
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
# Project root on path
|
|
ROOT = Path(__file__).resolve().parent.parent
|
|
sys.path.insert(0, str(ROOT))
|
|
|
|
try:
|
|
from auto_client_acquisition.connectors.tech_detect import detect_stack
|
|
except ImportError as exc:
|
|
print(f"ERROR: {exc}. Run from project root or install project.", file=sys.stderr)
|
|
sys.exit(2)
|
|
|
|
|
|
async def detect_one(domain: str, timeout: float = 10.0) -> dict:
|
|
r = await detect_stack(domain, timeout=timeout, extra_paths=["/careers", "/about"])
|
|
return r.to_dict()
|
|
|
|
|
|
async def detect_api(domain: str, api_base: str) -> dict:
|
|
import httpx
|
|
async with httpx.AsyncClient() as c:
|
|
r = await c.post(
|
|
f"{api_base.rstrip('/')}/api/v1/prospect/enrich-tech",
|
|
json={"domain": domain},
|
|
timeout=30.0,
|
|
)
|
|
return r.json()
|
|
|
|
|
|
async def run_bulk(domains: list[str], concurrency: int, api_base: str | None) -> dict[str, dict]:
|
|
sem = asyncio.Semaphore(concurrency)
|
|
|
|
async def _one(d: str) -> tuple[str, dict]:
|
|
async with sem:
|
|
try:
|
|
if api_base:
|
|
res = await detect_api(d, api_base)
|
|
else:
|
|
res = await detect_one(d)
|
|
return d, res
|
|
except Exception as exc: # noqa: BLE001
|
|
return d, {"status": "error", "error": str(exc), "domain": d}
|
|
|
|
pairs = await asyncio.gather(*(_one(d) for d in domains))
|
|
return dict(pairs)
|
|
|
|
|
|
def _read_csv_domains(path: Path) -> list[tuple[int, dict]]:
|
|
with open(path, newline="") as f:
|
|
reader = csv.DictReader(f)
|
|
rows = list(reader)
|
|
if not rows:
|
|
return []
|
|
# Find domain column: exact 'domain', or first col containing 'domain'
|
|
fieldnames = list(rows[0].keys())
|
|
col = next(
|
|
(c for c in fieldnames if c.lower() == "domain"),
|
|
next((c for c in fieldnames if "domain" in c.lower()), None),
|
|
)
|
|
if not col:
|
|
print(f"ERROR: CSV has no 'domain' column. Columns: {fieldnames}", file=sys.stderr)
|
|
sys.exit(3)
|
|
out: list[tuple[int, dict]] = []
|
|
for i, row in enumerate(rows):
|
|
d = (row.get(col) or "").strip()
|
|
if d and "." in d:
|
|
out.append((i, row))
|
|
return out
|
|
|
|
|
|
def _flatten(detection: dict) -> dict:
|
|
tools = [t.get("name", "") for t in detection.get("tools", []) or []]
|
|
signals = [s.get("evidence", "") for s in detection.get("signals", []) or []]
|
|
return {
|
|
"tech_status": detection.get("status", ""),
|
|
"tech_tools": "; ".join(tools[:8]),
|
|
"tech_signals": "; ".join(signals[:5]),
|
|
"tech_tools_count": len(tools),
|
|
}
|
|
|
|
|
|
async def _main(args: argparse.Namespace) -> int:
|
|
api_base = args.api or None
|
|
|
|
# Source: CSV or positional args
|
|
if args.csv:
|
|
csv_path = Path(args.csv)
|
|
if not csv_path.exists():
|
|
print(f"ERROR: {csv_path} not found", file=sys.stderr)
|
|
return 3
|
|
pairs = _read_csv_domains(csv_path)
|
|
print(f"found {len(pairs)} domains in {csv_path}", file=sys.stderr)
|
|
domains = [p[1]["domain"] if "domain" in p[1] else next((v for k, v in p[1].items() if "domain" in k.lower()), "") for p in pairs]
|
|
domains = [d for d in domains if d]
|
|
else:
|
|
domains = args.domains
|
|
|
|
if not domains:
|
|
print("ERROR: no domains provided. Pass args or use --csv", file=sys.stderr)
|
|
return 1
|
|
|
|
print(f"enriching {len(domains)} domain(s) using {'Dealix API' if api_base else 'local detector'}...", file=sys.stderr)
|
|
results = await run_bulk(domains, concurrency=args.concurrency, api_base=api_base)
|
|
|
|
if args.csv and args.out:
|
|
# Re-read CSV and write augmented copy
|
|
with open(args.csv, newline="") as f:
|
|
reader = csv.DictReader(f)
|
|
original_rows = list(reader)
|
|
fieldnames = list(original_rows[0].keys())
|
|
|
|
new_fields = ["tech_status", "tech_tools", "tech_signals", "tech_tools_count"]
|
|
for nf in new_fields:
|
|
if nf not in fieldnames:
|
|
fieldnames.append(nf)
|
|
|
|
for row in original_rows:
|
|
d = row.get("domain") or next((v for k, v in row.items() if "domain" in k.lower()), None)
|
|
d = (d or "").strip()
|
|
res = results.get(d, {})
|
|
row.update(_flatten(res))
|
|
|
|
out_path = Path(args.out)
|
|
with open(out_path, "w", newline="") as f:
|
|
w = csv.DictWriter(f, fieldnames=fieldnames)
|
|
w.writeheader()
|
|
w.writerows(original_rows)
|
|
print(f"✓ wrote {out_path} ({len(original_rows)} rows)", file=sys.stderr)
|
|
return 0
|
|
|
|
# Default: JSON to stdout
|
|
if len(results) == 1:
|
|
_, v = next(iter(results.items()))
|
|
print(json.dumps(v, indent=2, ensure_ascii=False))
|
|
else:
|
|
print(json.dumps(results, indent=2, ensure_ascii=False))
|
|
return 0
|
|
|
|
|
|
def main():
|
|
ap = argparse.ArgumentParser(
|
|
prog="dealix-prospect",
|
|
description="Dealix Prospector CLI — tech detection + lead scoring for Saudi B2B",
|
|
)
|
|
ap.add_argument("domains", nargs="*", help="one or more domains (e.g. foodics.com salla.sa)")
|
|
ap.add_argument("--csv", help="CSV file with a 'domain' column to enrich")
|
|
ap.add_argument("--out", help="output CSV path (with --csv) — adds tech_status/tech_tools/tech_signals/tech_tools_count")
|
|
ap.add_argument("--api", help="use live Dealix API instead of local (e.g. https://web-dealix.up.railway.app)")
|
|
ap.add_argument("--concurrency", type=int, default=5, help="parallelism (1-10)")
|
|
args = ap.parse_args()
|
|
sys.exit(asyncio.run(_main(args)))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|