#!/usr/bin/env bash set -euo pipefail # Ensure checkpoint directory exists mkdir -p /tmp # Set CPU threading limits export OMP_NUM_THREADS=4 export MKL_NUM_THREADS=4 export OPENBLAS_NUM_THREADS=4 # Run app with gunicorn # --workers 1: Single worker to avoid loading model multiple times # --threads 4: Allow 4 threads per worker for concurrent requests # --timeout 120: Allow 2 minutes for inference on CPU # --preload: Load application code before forking workers (more efficient) exec gunicorn \ --bind 0.0.0.0:7860 \ --workers 1 \ --threads 4 \ --timeout 120 \ --preload \ --access-logfile - \ --error-logfile - \ app:app