#!/usr/bin/env bash
set -euo pipefail

# Ensure checkpoint directory exists
mkdir -p /tmp

# Set CPU threading limits
export OMP_NUM_THREADS=4
export MKL_NUM_THREADS=4
export OPENBLAS_NUM_THREADS=4

# Run app with gunicorn
# --workers 1: Single worker to avoid loading model multiple times
# --threads 4: Allow 4 threads per worker for concurrent requests
# --timeout 120: Allow 2 minutes for inference on CPU
# --preload: Load application code before forking workers (more efficient)
exec gunicorn \
    --bind 0.0.0.0:7860 \
    --workers 1 \
    --threads 4 \
    --timeout 120 \
    --preload \
    --access-logfile - \
    --error-logfile - \
    app:app