mirror of
https://github.com/x1xhlol/system-prompts-and-models-of-ai-tools.git
synced 2026-06-18 15:29:36 +00:00
126 lines
5.2 KiB
Bash
126 lines
5.2 KiB
Bash
#!/usr/bin/env bash
|
|
# ─────────────────────────────────────────────────────────────────────────────
|
|
# rollback_drill.sh — Dealix rollback drill (T7 gate)
|
|
# ─────────────────────────────────────────────────────────────────────────────
|
|
#
|
|
# Goal: prove we can roll back from current HEAD to .last_good_sha in <5 min.
|
|
#
|
|
# Modes:
|
|
# --dry-run (default) Print every step, touch nothing. Safe in prod.
|
|
# --real Actually perform the rollback. Requires CONFIRM=YES env.
|
|
#
|
|
# Run on the prod server (or matching staging) as root:
|
|
# sudo bash /opt/dealix/scripts/ops/rollback_drill.sh --dry-run
|
|
#
|
|
# Exit codes:
|
|
# 0 = success
|
|
# 1 = preflight failed
|
|
# 2 = rollback failed
|
|
# 3 = health check failed after rollback
|
|
# ─────────────────────────────────────────────────────────────────────────────
|
|
|
|
set -euo pipefail
|
|
|
|
MODE="${1:---dry-run}"
|
|
APP_DIR="${APP_DIR:-/opt/dealix}"
|
|
SERVICE="${SERVICE:-dealix-api}"
|
|
HEALTH_URL="${HEALTH_URL:-http://127.0.0.1:8001/health/deep}"
|
|
LAST_GOOD_FILE="${APP_DIR}/.last_good_sha"
|
|
LOG_FILE="/var/log/dealix_rollback_drill.$(date +%Y%m%dT%H%M%SZ).log"
|
|
|
|
log() { echo "[$(date -u +%H:%M:%SZ)] $*" | tee -a "$LOG_FILE"; }
|
|
die() { log "FATAL: $*"; exit "${2:-1}"; }
|
|
|
|
# ── Preflight ────────────────────────────────────────────────────────────────
|
|
log "=== Rollback drill start (mode=$MODE) ==="
|
|
|
|
[[ -d "$APP_DIR/.git" ]] || die "Not a git checkout: $APP_DIR" 1
|
|
[[ -f "$LAST_GOOD_FILE" ]] || die "Missing $LAST_GOOD_FILE — cannot roll back" 1
|
|
|
|
CURRENT_SHA=$(cd "$APP_DIR" && git rev-parse --short HEAD)
|
|
TARGET_SHA=$(tr -d '[:space:]' < "$LAST_GOOD_FILE" | head -c 10)
|
|
|
|
log "Current HEAD: $CURRENT_SHA"
|
|
log "Rollback to: $TARGET_SHA"
|
|
|
|
if [[ "$CURRENT_SHA" == "$TARGET_SHA" ]]; then
|
|
die "Already on .last_good_sha — nothing to roll back" 1
|
|
fi
|
|
|
|
# Check service is currently up
|
|
if systemctl is-active --quiet "$SERVICE"; then
|
|
log "Service $SERVICE is active — proceeding"
|
|
else
|
|
log "WARN: Service $SERVICE is NOT active before drill"
|
|
fi
|
|
|
|
# Check current health
|
|
PRE_HEALTH=$(curl -s -o /dev/null -w "%{http_code}" "$HEALTH_URL" || echo "000")
|
|
log "Pre-drill /health/deep: HTTP $PRE_HEALTH"
|
|
|
|
# ── Dry run: stop here ───────────────────────────────────────────────────────
|
|
if [[ "$MODE" == "--dry-run" ]]; then
|
|
log ""
|
|
log "=== DRY-RUN: would execute the following on --real ==="
|
|
log " 1. systemctl stop $SERVICE"
|
|
log " 2. (cd $APP_DIR && git fetch origin && git reset --hard $TARGET_SHA)"
|
|
log " 3. (cd $APP_DIR && .venv/bin/pip install -q -r requirements.txt)"
|
|
log " 4. systemctl start $SERVICE"
|
|
log " 5. Wait 10s, then curl $HEALTH_URL"
|
|
log " 6. If health != 200 → die 3 (you MUST then reset to $CURRENT_SHA manually)"
|
|
log ""
|
|
log "Target rollback time: <5 min (most of it is pip install)"
|
|
log "=== Dry run complete. No state changed. ==="
|
|
exit 0
|
|
fi
|
|
|
|
# ── Real rollback ────────────────────────────────────────────────────────────
|
|
if [[ "$MODE" == "--real" ]]; then
|
|
if [[ "${CONFIRM:-}" != "YES" ]]; then
|
|
die "Refusing to run --real without CONFIRM=YES in env" 1
|
|
fi
|
|
|
|
START_TS=$(date +%s)
|
|
|
|
log "STEP 1/5: systemctl stop $SERVICE"
|
|
systemctl stop "$SERVICE" || die "Failed to stop service" 2
|
|
|
|
log "STEP 2/5: git reset --hard $TARGET_SHA"
|
|
(cd "$APP_DIR" && git fetch origin && git reset --hard "$TARGET_SHA") \
|
|
|| die "git reset failed" 2
|
|
|
|
log "STEP 3/5: pip install -r requirements.txt"
|
|
(cd "$APP_DIR" && .venv/bin/pip install -q -r requirements.txt) \
|
|
|| die "pip install failed — service still stopped, manual recovery needed" 2
|
|
|
|
log "STEP 4/5: systemctl start $SERVICE"
|
|
systemctl start "$SERVICE" || die "Failed to start service after rollback" 2
|
|
|
|
log "STEP 5/5: wait + health check"
|
|
sleep 10
|
|
POST_HEALTH=$(curl -s -o /dev/null -w "%{http_code}" "$HEALTH_URL" || echo "000")
|
|
|
|
END_TS=$(date +%s)
|
|
ELAPSED=$((END_TS - START_TS))
|
|
|
|
log "Post-rollback /health/deep: HTTP $POST_HEALTH"
|
|
log "Elapsed: ${ELAPSED}s"
|
|
|
|
if [[ "$POST_HEALTH" != "200" ]]; then
|
|
die "Health check failed after rollback — $POST_HEALTH" 3
|
|
fi
|
|
|
|
if (( ELAPSED > 300 )); then
|
|
log "WARN: Rollback took ${ELAPSED}s (>5min target) — review pip cache"
|
|
fi
|
|
|
|
log "=== Rollback complete. HEAD is now $TARGET_SHA. ==="
|
|
log "Remember to:"
|
|
log " * Update .last_good_sha if this rollback is now the stable version"
|
|
log " * Investigate the bad commit that triggered the rollback"
|
|
log " * File an incident post-mortem in docs/incidents/"
|
|
exit 0
|
|
fi
|
|
|
|
die "Unknown mode: $MODE (expected --dry-run or --real)" 1
|