Spaces:

Ajay-projects
/

vton-backend

Sleeping

App Files Files Community

StableVITON Deployer commited on 27 days ago

Commit

8495b3a

0 Parent(s):

Initial backend deployment

Browse files

Files changed (7) hide show

.gitignore +42 -0
Dockerfile +47 -0
README.md +65 -0
inference_wrapper.py +201 -0
main.py +309 -0
requirements.txt +26 -0
setup.bat +42 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,42 @@

+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+# Virtual Environment
+venv/
+env/
+ENV/
+# Environment Variables
+.env
+# IDEs
+.vscode/
+.idea/
+*.swp
+# Model Cache (Local)
+models/
+model_cache/
+*.safetensors
+*.ckpt
+*.bin
+*.pth

Dockerfile ADDED Viewed

	@@ -0,0 +1,47 @@

+# Dockerfile for Hugging Face Spaces (GPU Support)
+FROM python:3.10-slim
+# Install system dependencies
+# libgl1/libglib2.0-0 needed for OpenCV
+RUN apt-get update && apt-get install -y \
+    git \
+    wget \
+    libgl1 \
+    libglib2.0-0 \
+    && rm -rf /var/lib/apt/lists/*
+# Set up a new user named "user" with user ID 1000
+RUN useradd -m -u 1000 user
+# Switch to the "user" user
+USER user
+# Set home to the user's home directory
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH
+# Set working directory to the user's home directory
+WORKDIR $HOME/app
+# Copy requirements.txt and install dependencies
+# Doing this before copying the rest of the code cache-busts only if requirements change
+COPY --chown=user requirements.txt $HOME/app/requirements.txt
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r requirements.txt
+# Copy the rest of the application code
+COPY --chown=user . $HOME/app
+# Set environment variables for Hugging Face Cache
+ENV HF_HOME=$HOME/app/model_cache
+ENV TRANSFORMERS_CACHE=$HOME/app/model_cache
+ENV TORCH_HOME=$HOME/app/model_cache
+# Create cache directory
+RUN mkdir -p $HF_HOME
+# Expose the port
+EXPOSE 7860
+# Start the application
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

README.md ADDED Viewed

	@@ -0,0 +1,65 @@

+---
+title: StableVITON Virtual Try-On
+emoji: 👕
+colorFrom: blue
+colorTo: purple
+sdk: docker
+pinned: false
+license: mit
+app_port: 7860
+---
+# StableVITON Virtual Try-On Backend
+AI-powered virtual try-on service using StableVITON and FastAPI.
+## Features
+- Virtual try-on inference via REST API
+- Support for JPEG/PNG images
+- Automatic image preprocessing and validation
+- GPU-optimized inference (CUDA support)
+- Single-request processing to prevent OOM
+- Comprehensive error handling
+## API Documentation
+### POST /tryon
+Perform virtual try-on inference.
+**Request:**
+- `person_image`: Full-body photo (multipart/form-data)
+- `garment_image`: Garment image (multipart/form-data)
+**Response:**
+```json
+{
+  "success": true,
+  "result_image": "data:image/png;base64,...",
+  "processing_time": 23.4,
+  "model_version": "stablevton-v1"
+}
+```
+### GET /health
+Health check endpoint.
+## Local Development
+```bash
+# Install dependencies
+pip install -r requirements.txt
+# Run server
+python main.py
+```
+## Deployment
+This project is designed to run on Hugging Face Spaces with Docker SDK.
+1. Create a Space with Docker SDK
+2. Select GPU hardware (T4 small is sufficient)
+3. Push the code to the Space

inference_wrapper.py ADDED Viewed

	@@ -0,0 +1,201 @@

+"""
+StableVITON Inference Wrapper
+Clean abstraction layer for virtual try-on inference
+"""
+import torch
+from PIL import Image
+import numpy as np
+from typing import Tuple, Optional
+import gc
+from diffusers import StableDiffusionPipeline, DDIMScheduler
+from transformers import CLIPTextModel, CLIPTokenizer
+import logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+class StableVITONInference:
+    """
+    Wrapper for StableVITON model inference.
+    Handles model loading, preprocessing, inference, and cleanup.
+    """
+    def __init__(
+        self,
+        model_path: str = "yisol/IDM-VTON",
+        device: str = "auto",
+        resolution: int = 768,
+        num_inference_steps: int = 30,
+        guidance_scale: float = 2.5,
+        seed: int = 42
+    ):
+        """
+        Initialize StableVITON inference wrapper.
+        Args:
+            model_path: Path to model or Hugging Face model ID
+            device: Device to run inference on ("cuda", "cpu", or "auto")
+            resolution: Output resolution (default: 768)
+            num_inference_steps: Number of diffusion steps (default: 30)
+            guidance_scale: Guidance scale (default: 2.5)
+            seed: Random seed (default: 42)
+        """
+        self.model_path = model_path
+        self.resolution = resolution
+        self.num_inference_steps = num_inference_steps
+        self.guidance_scale = guidance_scale
+        self.seed = seed
+        # Determine device
+        if device == "auto":
+            self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        else:
+            self.device = device
+        logger.info(f"Initializing StableVITON on device: {self.device}")
+        self.generator = torch.Generator(device=self.device).manual_seed(self.seed)
+        self.pipe = None
+        # Load model immediately
+        self._load_model()
+    def _load_model(self):
+        """Load the model pipeline"""
+        try:
+            logger.info(f"Loading model from {self.model_path}...")
+            # Use float16 for GPU to save memory, float32 for CPU
+            torch_dtype = torch.float16 if self.device == "cuda" else torch.float32
+            self.pipe = AutoPipelineForInpainting.from_pretrained(
+                self.model_path,
+                torch_dtype=torch_dtype,
+                variant="fp16" if self.device == "cuda" else None,
+                use_safetensors=True,
+                low_cpu_mem_usage=True,
+            )
+            if self.device == "cuda":
+                self.pipe.to(self.device)
+                # Enable memory efficient attention if available
+                try:
+                    self.pipe.enable_xformers_memory_efficient_attention()
+                    logger.info("Enabled xformers memory efficient attention")
+                except Exception as e:
+                    logger.warning(f"Could not enable xformers: {e}")
+            logger.info("Model loaded successfully")
+        except Exception as e:
+            logger.error(f"Failed to load model: {e}")
+        orig_width, orig_height = image.size
+        # Calculate aspect ratio preserving resize
+        if orig_width > orig_height:
+            new_width = self.resolution
+            new_height = int((orig_height / orig_width) * self.resolution)
+        else:
+            new_height = self.resolution
+            new_width = int((orig_width / orig_height) * self.resolution)
+        # Resize
+        image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
+        # Pad to square if needed
+        if new_width != self.resolution or new_height != self.resolution:
+            padded = Image.new("RGB", (self.resolution, self.resolution), (255, 255, 255))
+            offset_x = (self.resolution - new_width) // 2
+            offset_y = (self.resolution - new_height) // 2
+            padded.paste(image, (offset_x, offset_y))
+            image = padded
+        return image
+    def tryon(
+        self,
+        person_image: Image.Image,
+        garment_image: Image.Image,
+        **kwargs
+    ) -> Image.Image:
+        """
+        Perform virtual try-on inference.
+        Args:
+            person_image: PIL Image of the person
+            garment_image: PIL Image of the garment
+            **kwargs: Additional parameters for inference
+        Returns:
+            PIL Image of the person wearing the garment
+        """
+        try:
+            logger.info("Starting try-on inference")
+            # Preprocess images
+            person_processed = self.preprocess_image(person_image, is_garment=False)
+            garment_processed = self.preprocess_image(garment_image, is_garment=True)
+            logger.info(f"Images preprocessed to {self.resolution}x{self.resolution}")
+            # TODO: Replace with actual StableVITON inference
+            # This is a placeholder - actual implementation will use StableVITON's specific pipeline
+            # Placeholder: For now, we'll use a simple prompt-based approach
+            # In reality, StableVITON uses the garment image directly as conditioning
+            prompt = "high quality photo of a person wearing clothes, detailed, realistic"
+            with torch.no_grad():
+                # This is a simplified placeholder
+                # Actual StableVITON will pass both images through the pipeline differently
+                result = self.pipe(
+                    prompt=prompt,
+                    num_inference_steps=self.num_inference_steps,
+                    guidance_scale=self.guidance_scale,
+                    **kwargs
+                ).images[0]
+            logger.info("Inference completed successfully")
+            return result
+        except Exception as e:
+            logger.error(f"Inference failed: {e}")
+            raise
+    def cleanup(self):
+        """
+        Clean up GPU memory after inference.
+        """
+        if self.device == "cuda":
+            torch.cuda.empty_cache()
+            gc.collect()
+            logger.info("GPU memory cleaned up")
+    def __del__(self):
+        """Destructor to ensure cleanup."""
+        self.cleanup()
+# Example usage
+if __name__ == "__main__":
+    # Test the inference wrapper
+    print("Testing StableVITON Inference Wrapper")
+    # Create dummy images for testing
+    person_img = Image.new("RGB", (512, 768), color=(200, 200, 200))
+    garment_img = Image.new("RGB", (512, 512), color=(100, 150, 200))
+    # Initialize wrapper
+    wrapper = StableVITONInference(
+        device="cpu",  # Use CPU for testing
+        resolution=512,
+        num_inference_steps=20
+    )
+    # Run inference
+    result = wrapper.tryon(person_img, garment_img)
+    print(f"Result image size: {result.size}")
+    print("Test completed successfully")

main.py ADDED Viewed

	@@ -0,0 +1,309 @@

+"""
+FastAPI Backend for StableVITON Virtual Try-On
+Provides REST API endpoint for virtual try-on inference
+"""
+from fastapi import FastAPI, File, UploadFile, HTTPException, BackgroundTasks
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse
+from pydantic import BaseModel
+from PIL import Image
+import io
+import base64
+import os
+import time
+import asyncio
+from typing import Optional
+import logging
+from queue import Queue
+from threading import Lock
+from inference_wrapper import StableVITONInference
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+# Initialize FastAPI app
+app = FastAPI(
+    title="StableVITON Virtual Try-On API",
+    description="AI-powered virtual try-on service using StableVITON",
+    version="1.0.0"
+)
+# CORS configuration - allow all origins for demo (restrict in production)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # Change to specific domains in production
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Global model instance (loaded once at startup)
+model: Optional[StableVITONInference] = None
+# Request queue for single-request processing
+request_queue = Queue()
+processing_lock = Lock()
+# Configuration
+MAX_IMAGE_SIZE = 10 * 1024 * 1024  # 10MB
+ALLOWED_EXTENSIONS = {"image/jpeg", "image/png", "image/jpg"}
+REQUEST_TIMEOUT = 90  # seconds
+class TryOnResponse(BaseModel):
+    """Response model for try-on endpoint"""
+    success: bool
+    result_image: Optional[str] = None
+    processing_time: Optional[float] = None
+    model_version: str = "stablevton-v1"
+    error: Optional[str] = None
+    error_code: Optional[str] = None
+@app.on_event("startup")
+async def startup_event():
+    """Load model on startup"""
+    global model
+    try:
+        logger.info("Loading StableVITON model...")
+        model = StableVITONInference(
+            model_path=os.getenv("MODEL_PATH", "yisol/IDM-VTON"),
+            device=os.getenv("DEVICE", "auto"),
+            resolution=int(os.getenv("RESOLUTION", "768")),
+            num_inference_steps=int(os.getenv("NUM_INFERENCE_STEPS", "30")),
+            guidance_scale=float(os.getenv("GUIDANCE_SCALE", "2.5"))
+        )
+        logger.info("Model loaded successfully")
+        # Warmup inference
+        logger.info("Running warmup inference...")
+        dummy_person = Image.new("RGB", (512, 768), color=(200, 200, 200))
+        dummy_garment = Image.new("RGB", (512, 512), color=(100, 150, 200))
+        model.tryon(dummy_person, dummy_garment)
+        model.cleanup()
+        logger.info("Warmup completed")
+    except Exception as e:
+        logger.error(f"Failed to load model: {e}")
+        raise
+@app.on_event("shutdown")
+async def shutdown_event():
+    """Cleanup on shutdown"""
+    global model
+    if model:
+        model.cleanup()
+        logger.info("Model cleaned up")
+def validate_image(file: UploadFile) -> None:
+    """
+    Validate uploaded image file.
+    Args:
+        file: Uploaded file
+    Raises:
+        HTTPException: If validation fails
+    """
+    # Check content type
+    if file.content_type not in ALLOWED_EXTENSIONS:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Invalid file type. Allowed: {ALLOWED_EXTENSIONS}"
+        )
+    # Check file size (this is approximate, actual size checked during read)
+    if hasattr(file, 'size') and file.size > MAX_IMAGE_SIZE:
+        raise HTTPException(
+            status_code=400,
+            detail=f"File too large. Maximum size: {MAX_IMAGE_SIZE / (1024*1024)}MB"
+        )
+def image_to_base64(image: Image.Image) -> str:
+    """
+    Convert PIL Image to base64 string.
+    Args:
+        image: PIL Image
+    Returns:
+        Base64 encoded string with data URI prefix
+    """
+    buffered = io.BytesIO()
+    image.save(buffered, format="PNG")
+    img_bytes = buffered.getvalue()
+    img_base64 = base64.b64encode(img_bytes).decode('utf-8')
+    return f"data:image/png;base64,{img_base64}"
+async def read_image_from_upload(file: UploadFile) -> Image.Image:
+    """
+    Read PIL Image from uploaded file.
+    Args:
+        file: Uploaded file
+    Returns:
+        PIL Image
+    Raises:
+        HTTPException: If image cannot be read
+    """
+    try:
+        contents = await file.read()
+        # Check actual size
+        if len(contents) > MAX_IMAGE_SIZE:
+            raise HTTPException(
+                status_code=400,
+                detail=f"File too large. Maximum size: {MAX_IMAGE_SIZE / (1024*1024)}MB"
+            )
+        image = Image.open(io.BytesIO(contents))
+        # Validate dimensions
+        width, height = image.size
+        if width < 256 or height < 256:
+            raise HTTPException(
+                status_code=400,
+                detail="Image too small. Minimum dimensions: 256x256"
+            )
+        if width > 2048 or height > 2048:
+            raise HTTPException(
+                status_code=400,
+                detail="Image too large. Maximum dimensions: 2048x2048"
+            )
+        return image
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Failed to read image: {e}")
+        raise HTTPException(
+            status_code=400,
+            detail=f"Invalid image file: {str(e)}"
+        )
+@app.get("/")
+async def root():
+    """Root endpoint"""
+    return {
+        "message": "StableVITON Virtual Try-On API",
+        "version": "1.0.0",
+        "endpoints": {
+            "/tryon": "POST - Virtual try-on inference",
+            "/health": "GET - Health check"
+        }
+    }
+@app.get("/health")
+async def health_check():
+    """Health check endpoint"""
+    return {
+        "status": "healthy",
+        "model_loaded": model is not None,
+        "timestamp": time.time()
+    }
+@app.post("/tryon", response_model=TryOnResponse)
+async def virtual_tryon(
+    person_image: UploadFile = File(..., description="Full-body photo of person"),
+    garment_image: UploadFile = File(..., description="Garment image")
+):
+    """
+    Perform virtual try-on inference.
+    Args:
+        person_image: Full-body photo of the person
+        garment_image: Image of the garment to try on
+    Returns:
+        TryOnResponse with result image or error
+    """
+    start_time = time.time()
+    try:
+        # Check if model is loaded
+        if model is None:
+            raise HTTPException(
+                status_code=503,
+                detail="Model not loaded. Please try again later."
+            )
+        # Validate files
+        validate_image(person_image)
+        validate_image(garment_image)
+        logger.info(f"Processing try-on request: {person_image.filename}, {garment_image.filename}")
+        # Read images
+        person_img = await read_image_from_upload(person_image)
+        garment_img = await read_image_from_upload(garment_image)
+        # Acquire processing lock (single request at a time)
+        acquired = processing_lock.acquire(blocking=False)
+        if not acquired:
+            raise HTTPException(
+                status_code=503,
+                detail="Server is busy processing another request. Please try again in a moment."
+            )
+        try:
+            # Run inference
+            logger.info("Running inference...")
+            result_image = model.tryon(person_img, garment_img)
+            # Convert to base64
+            result_base64 = image_to_base64(result_image)
+            # Cleanup
+            model.cleanup()
+            processing_time = time.time() - start_time
+            logger.info(f"Inference completed in {processing_time:.2f}s")
+            return TryOnResponse(
+                success=True,
+                result_image=result_base64,
+                processing_time=processing_time
+            )
+        finally:
+            processing_lock.release()
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Inference failed: {e}", exc_info=True)
+        return TryOnResponse(
+            success=False,
+            error=str(e),
+            error_code="INFERENCE_FAILED"
+        )
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(
+        "main:app",
+        host="0.0.0.0",
+        port=7860,
+        reload=False,  # Set to True for development
+        log_level="info"
+    )

requirements.txt ADDED Viewed

	@@ -0,0 +1,26 @@

+# StableVITON Virtual Try-On - Hugging Face Spaces Requirements
+# Core Machine Learning (Latest compatible versions)
+torch>=2.2.0
+torchvision>=0.17.0
+diffusers>=0.27.0
+transformers>=4.39.0
+accelerate>=0.28.0
+xformers>=0.0.25  # Memory efficient attention
+# Image Processing
+Pillow>=10.2.0
+opencv-python-headless>=4.9.0  # Headless for server capability
+numpy>=1.26.0
+# API and Web
+fastapi>=0.110.0
+uvicorn[standard]>=0.29.0
+python-multipart>=0.0.9
+pydantic>=2.6.0
+requests>=2.31.0
+aiofiles>=23.2.0
+python-dotenv>=1.0.1
+# Integration
+huggingface-hub>=0.22.0

setup.bat ADDED Viewed

	@@ -0,0 +1,42 @@

+@echo off
+REM Quick Setup Script for Backend
+REM Run this from C:\antigravity-workspace\backend
+echo StableVITON Backend Setup
+echo =========================
+echo.
+REM Check if we're in the right directory
+if not exist "main.py" (
+    echo ERROR: Please run this script from C:\antigravity-workspace\backend
+    exit /b 1
+)
+REM Create virtual environment
+echo Creating virtual environment...
+python -m venv venv
+REM Activate virtual environment
+echo Activating virtual environment...
+call venv\Scripts\activate.bat
+REM Upgrade pip
+echo Upgrading pip...
+python -m pip install --upgrade pip
+REM Install dependencies
+echo Installing dependencies (this may take a few minutes)...
+pip install torch torchvision --index-url https://download.pytorch.org/whl/cu118
+pip install fastapi uvicorn[standard] python-multipart pydantic
+pip install diffusers transformers accelerate
+pip install Pillow opencv-python numpy
+pip install huggingface-hub python-dotenv requests aiofiles
+echo.
+echo Setup complete!
+echo.
+echo To run the server:
+echo   1. Activate venv: venv\Scripts\activate.bat
+echo   2. Run server: python main.py
+echo.
+echo Note: First run will download the AI model (~4GB)