From dd8d2ccc9e8dcb4866185a7780beebfeab52a447 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Thu, 23 Apr 2026 13:33:09 +0000
Subject: [PATCH] fix(docker): CPU-only torch + multi-stage build to fix
 Railway 4GB limit

Railway build was failing with "Image of size 5.7 GB exceeded limit of
4.0 GB" because sentence-transformers pulled torch with full CUDA/NVIDIA
GPU packages (~3 GB).

Fix: multi-stage Dockerfile that:
1. Installs CPU-only torch first (--index-url pytorch.org/whl/cpu)
   saving ~3 GB (200 MB CPU vs 3.2 GB CUDA)
2. Multi-stage build: builder + runtime (smaller final image)
3. Non-root user (app:1000)
4. tini init for proper signal handling
5. Built-in HEALTHCHECK with 60s start-period
6. railway.toml with healthcheck path and restart policy

Also fixes healthcheck failure: start-period=60s gives the app time
to initialize before Railway starts checking /health.

Expected image size: ~2 GB (down from 5.7 GB).

https://claude.ai/code/session_01W1rJthWDkasijTdXCfxVHs
---
 salesflow-saas/backend/Dockerfile   | 46 ++++++++++++++++++++++++-----
 salesflow-saas/backend/railway.toml |  9 ++++++
 2 files changed, 47 insertions(+), 8 deletions(-)
 create mode 100644 salesflow-saas/backend/railway.toml

diff --git a/salesflow-saas/backend/Dockerfile b/salesflow-saas/backend/Dockerfile
index 27510ee4..8432bdc5 100644
--- a/salesflow-saas/backend/Dockerfile
+++ b/salesflow-saas/backend/Dockerfile
@@ -1,16 +1,46 @@
-FROM python:3.12-slim
-
-WORKDIR /app
+# ── Stage 1: Builder ──────────────────────────────────
+FROM python:3.12-slim AS builder
 
 RUN apt-get update && apt-get install -y --no-install-recommends \
-    gcc libpq-dev curl \
+    build-essential libpq-dev curl \
     && rm -rf /var/lib/apt/lists/*
 
-COPY requirements.txt .
-RUN pip install --no-cache-dir -r requirements.txt
+WORKDIR /build
 
-COPY . .
+RUN python -m venv /opt/venv
+ENV PATH="/opt/venv/bin:$PATH"
+
+COPY requirements.txt ./
+
+# Install CPU-only torch first (saves ~3 GB vs CUDA version)
+RUN pip install --no-cache-dir --upgrade pip setuptools wheel \
+    && pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu \
+    && pip install --no-cache-dir -r requirements.txt
+
+# ── Stage 2: Runtime ─────────────────────────────────
+FROM python:3.12-slim AS runtime
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    libpq5 curl tini \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN groupadd --gid 1000 app \
+    && useradd --uid 1000 --gid app --shell /bin/bash --create-home app
+
+COPY --from=builder /opt/venv /opt/venv
+ENV PATH="/opt/venv/bin:$PATH" \
+    PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1
+
+WORKDIR /app
+COPY --chown=app:app . .
+
+USER app
 
 EXPOSE 8000
 
-CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
+HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
+    CMD curl -f http://localhost:8000/api/v1/health || exit 1
+
+ENTRYPOINT ["tini", "--"]
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000", "--workers", "2"]
diff --git a/salesflow-saas/backend/railway.toml b/salesflow-saas/backend/railway.toml
new file mode 100644
index 00000000..66a8bbd5
--- /dev/null
+++ b/salesflow-saas/backend/railway.toml
@@ -0,0 +1,9 @@
+[build]
+dockerfilePath = "Dockerfile"
+
+[deploy]
+healthcheckPath = "/api/v1/health"
+healthcheckTimeout = 120
+startCommand = "uvicorn app.main:app --host 0.0.0.0 --port ${PORT:-8000} --workers 2"
+restartPolicyType = "ON_FAILURE"
+restartPolicyMaxRetries = 3