Spaces:

AiCoderv2
/

app-jcvryp-74

Build error

App Files Files Community

AiCoderv2 commited on Nov 10

Commit

3da4f52

verified ·

1 Parent(s): e165789

Deploy Gradio app with multiple files

Browse files

Files changed (2) hide show

app.py +339 -0
requirements.txt +24 -0

app.py ADDED Viewed

	@@ -0,0 +1,339 @@

+import gradio as gr
+import spaces
+import torch
+import numpy as np
+from diffusers import DiffusionPipeline
+from diffusers.models import AutoencoderKL
+from diffusers.schedulers import EulerDiscreteScheduler
+from diffusers.utils import load_image, check_min_version
+import os
+import time
+from PIL import Image
+from typing import Generator, Tuple
+import gc
+# Model configuration
+MODEL_ID = "cerspense/zeroscope_v2_576w"  # 2.5GB model with good quality
+VAE_ID = "madebyollin/sdxl-vae-fp16-fix"  # Compact VAE
+SCHEDULER = "EulerDiscreteScheduler"
+@spaces.GPU(duration=1500)  # AoT compilation for 7GB+ model
+def compile_model():
+    """Compile the text-to-video model for optimal performance"""
+    print("🚀 Compiling model for ahead-of-time optimization...")
+    # Load components
+    vae = AutoencoderKL.from_pretrained(VAE_ID, torch_dtype=torch.float16)
+    scheduler = EulerDiscreteScheduler.from_pretrained(MODEL_ID, subfolder="scheduler")
+    # Create pipeline with optimization
+    pipe = DiffusionPipeline.from_pretrained(
+        MODEL_ID,
+        vae=vae,
+        scheduler=scheduler,
+        torch_dtype=torch.float16,
+        variant="fp16",
+        use_safetensors=True
+    )
+    # Enable memory efficient attention and compile
+    pipe.enable_model_cpu_offload()
+    pipe.enable_vae_slicing()
+    pipe.enable_attention_slicing()
+    # AoT compilation for 1.3x-1.8x speedup
+    with spaces.aoti_capture(pipe.transformer) as call:
+        pipe("test prompt for compilation", num_frames=6)
+    exported = torch.export.export(
+        pipe.transformer,
+        args=call.args,
+        kwargs=call.kwargs,
+    )
+    compiled_model = spaces.aoti_compile(exported)
+    spaces.aoti_apply(compiled_model, pipe.transformer)
+    return pipe
+# Initialize the model
+print("🔄 Loading text-to-video model...")
+pipe = compile_model()
+pipe.to('cuda')
+@spaces.GPU
+def generate_video(
+    prompt: str,
+    num_frames: int = 8,
+    width: int = 576,
+    height: int = 320,
+    num_inference_steps: int = 25,
+    guidance_scale: float = 17.5,
+    progress: gr.Progress = gr.Progress()
+) -> Generator[Tuple[str, np.ndarray], None, None]:
+    """
+    Generate a video from text prompt using the compiled model.
+    Args:
+        prompt: Text description for video generation
+        num_frames: Number of frames in the video (6-16)
+        width: Video width (576 recommended for quality)
+        height: Video height (320 recommended for quality)
+        num_inference_steps: Diffusion steps (20-30 recommended)
+        guidance_scale: CFG scale (15-20 recommended)
+    Yields:
+        Tuple of (status_message, video_data)
+    """
+    try:
+        # Clear GPU cache for optimal performance
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+            gc.collect()
+        # Validate parameters
+        prompt = prompt.strip()
+        if not prompt:
+            yield "❌ Please enter a text prompt", None
+            return
+        if not 6 <= num_frames <= 16:
+            yield "❌ Number of frames must be between 6-16", None
+            return
+        if not 200 <= width <= 1024:
+            yield "❌ Width must be between 200-1024", None
+            return
+        if not 200 <= height <= 1024:
+            yield "❌ Height must be between 200-1024", None
+            return
+        yield "🎬 Initializing video generation...", None
+        # Set up progress tracking
+        total_steps = num_inference_steps
+        current_step = 0
+        def progress_callback(step, timestep, latents):
+            nonlocal current_step
+            current_step += 1
+            progress = (current_step / total_steps) * 100
+            yield f"🎨 Generating video... {progress:.1f}% ({current_step}/{total_steps} steps)", None
+        # Generate video frames
+        yield "🔥 Generating video frames...", None
+        start_time = time.time()
+        # Run inference with optimized settings
+        with torch.inference_mode():
+            result = pipe(
+                prompt=prompt,
+                num_frames=num_frames,
+                width=width,
+                height=height,
+                num_inference_steps=num_inference_steps,
+                guidance_scale=guidance_scale,
+                callback=progress_callback,
+                callback_steps=1
+            )
+        # Extract frames
+        frames = result.frames[0]  # Get first batch of frames
+        generation_time = time.time() - start_time
+        yield f"✅ Video generated in {generation_time:.1f}s!", frames
+    except Exception as e:
+        error_msg = f"❌ Generation failed: {str(e)}"
+        yield error_msg, None
+        print(f"Error: {e}")
+    finally:
+        # Clean up
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+            gc.collect()
+def get_recommended_settings() -> dict:
+    """Get recommended generation settings"""
+    return {
+        "num_frames": 8,
+        "width": 576,
+        "height": 320,
+        "num_inference_steps": 25,
+        "guidance_scale": 17.5
+    }
+# Create the Gradio interface
+def create_demo():
+    """Create the main Gradio demo"""
+    with gr.Blocks(
+        title="🚀 Lightning Text-to-Video Generator",
+        description="Generate high-quality videos from text prompts using advanced AI",
+        theme=gr.themes.Soft()
+    ) as demo:
+        # Header with anycoder attribution
+        gr.HTML("""
+        <div style="text-align: center; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius: 10px; margin-bottom: 20px;">
+            <h1 style="color: white; margin: 0; font-size: 2.5em;">🎬 Lightning Text-to-Video Generator</h1>
+            <p style="color: white; margin: 10px 0; font-size: 1.2em;">Transform your ideas into stunning videos instantly</p>
+            <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: #FFD700; text-decoration: none; font-size: 1.1em; font-weight: bold;">
+                ⭐ Built with anycoder
+            </a>
+        </div>
+        """)
+        with gr.Row():
+            with gr.Column(scale=1):
+                gr.HTML("<h3>📝 Text Prompt</h3>")
+                prompt_input = gr.Textbox(
+                    label="Describe your video",
+                    placeholder="A majestic dragon flying over a mystical forest at sunset, with glowing particles falling from the sky",
+                    lines=4,
+                    max_length=500
+                )
+                # Quick presets
+                gr.HTML("<h3>🎯 Quick Presets</h3>")
+                with gr.Row():
+                    preset_btn1 = gr.Button("🌊 Nature Scene", variant="secondary", size="sm")
+                    preset_btn2 = gr.Button("🏙️ Urban Scene", variant="secondary", size="sm")
+                    preset_btn3 = gr.Button("🚀 Sci-Fi", variant="secondary", size="sm")
+                    preset_btn4 = gr.Button("🎭 Fantasy", variant="secondary", size="sm")
+                # Advanced settings
+                with gr.Accordion("⚙️ Advanced Settings", open=False):
+                    num_frames = gr.Slider(
+                        minimum=6, maximum=16, value=8, step=1,
+                        label="Number of Frames",
+                        info="More frames = longer video but slower generation"
+                    )
+                    with gr.Row():
+                        width = gr.Slider(
+                            minimum=200, maximum=1024, value=576, step=64,
+                            label="Width",
+                            info="Video width (576px recommended)"
+                        )
+                        height = gr.Slider(
+                            minimum=200, maximum=1024, value=320, step=64,
+                            label="Height",
+                            info="Video height (320px recommended)"
+                        )
+                    num_inference_steps = gr.Slider(
+                        minimum=15, maximum=50, value=25, step=5,
+                        label="Generation Steps",
+                        info="More steps = better quality but slower"
+                    )
+                    guidance_scale = gr.Slider(
+                        minimum=5, maximum=25, value=17.5, step=0.5,
+                        label="Guidance Scale",
+                        info="How closely to follow the prompt (15-20 recommended)"
+                    )
+                # Action buttons
+                with gr.Row():
+                    generate_btn = gr.Button("🚀 Generate Video", variant="primary", size="lg")
+                    clear_btn = gr.Button("🗑️ Clear", variant="secondary")
+                # Quick settings
+                with gr.Row():
+                    quality_btn = gr.Button("⚡ Fast", variant="secondary", size="sm")
+                    quality_btn2 = gr.Button("🎨 High Quality", variant="secondary", size="sm")
+                # Status display
+                status = gr.HTML("<p style='color: #666;'>Ready to generate your video!</p>")
+            with gr.Column(scale=1):
+                gr.HTML("<h3>🎥 Generated Video</h3>")
+                video_output = gr.Video(
+                    label="Your Generated Video",
+                    format="mp4",
+                    loop=True,
+                    autoplay=True,
+                    height=400
+                )
+                # Info panel
+                info_panel = gr.HTML("""
+                <div style="padding: 15px; background: #f8f9fa; border-radius: 8px; margin-top: 10px;">
+                    <h4>💡 Tips for Better Results:</h4>
+                    <ul style="color: #555; font-size: 0.9em;">
+                        <li>Be specific and descriptive in your prompts</li>
+                        <li>Use adjectives to describe style, lighting, mood</li>
+                        <li>Include camera movements (pan, zoom, rotate)</li>
+                        <li>Fast mode: 6-8 frames, 15-20 steps</li>
+                        <li>High quality: 10-12 frames, 25-30 steps</li>
+                    </ul>
+                </div>
+                """)
+        # Preset prompt handlers
+        preset_prompts = {
+            preset_btn1: "A serene mountain landscape with flowing river, golden hour lighting, birds flying in the sky",
+            preset_btn2: "A bustling city street at night with neon lights, cars driving by, people walking",
+            preset_btn3: "A futuristic spaceship flying through a galaxy with colorful nebulas and distant stars",
+            preset_btn4: "A magical forest with glowing mushrooms, fairy lights dancing, mystical creatures moving"
+        }
+        for btn, preset_text in preset_prompts.items():
+            btn.click(
+                lambda text=preset_text: gr.update(value=text),
+                outputs=prompt_input
+            )
+        # Quality settings
+        def apply_fast_settings():
+            return 6, 512, 288, 15, 15.0
+        def apply_quality_settings():
+            return 12, 576, 320, 30, 18.0
+        quality_btn.click(apply_fast_settings, outputs=[num_frames, width, height, num_inference_steps, guidance_scale])
+        quality_btn2.click(apply_quality_settings, outputs=[num_frames, width, height, num_inference_steps, guidance_scale])
+        # Main generation handler
+        def handle_generate(prompt, num_frames, width, height, steps, guidance):
+            # Create generator for progress updates
+            def gen():
+                for status, video in generate_video(prompt, num_frames, width, height, steps, guidance):
+                    yield status, video
+            return gen
+        # Connect events
+        generate_btn.click(
+            handle_generate,
+            inputs=[prompt_input, num_frames, width, height, num_inference_steps, guidance_scale],
+            outputs=[status, video_output]
+        )
+        def clear_all():
+            return "", None, *get_recommended_settings().values(), "🗑️ Cleared! Ready for new generation."
+        clear_btn.click(
+            clear_all,
+            outputs=[prompt_input, video_output, num_frames, width, height, num_inference_steps, guidance_scale, status]
+        )
+    return demo
+# Create and launch the demo
+if __name__ == "__main__":
+    demo = create_demo()
+    # Launch with optimized settings
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=True,
+        show_error=True,
+        quiet=False,
+        max_threads=40,
+        concurrency_limit=10
+    )

requirements.txt ADDED Viewed

	@@ -0,0 +1,24 @@

+gradio[webrtc]==4.29.0
+spaces==0.20.0
+torch>=2.0.0
+torchvision>=0.15.0
+torchaudio>=2.0.0
+diffusers==0.27.0
+transformers==4.40.0
+accelerate==0.27.0
+safetensors==0.4.2
+xformers==0.0.24
+pillow>=10.0.0
+numpy>=1.24.0
+opencv-python>=4.8.0
+einops>=0.7.0
+triton>=2.0.0
+Petitioner:
+spaces (for ZeroGPU optimization)
+torch (>=2.0.0 for diffusion models)
+diffusers (for Stable Video Diffusion pipeline)
+transformers (for model components)
+accelerate (for memory optimization)
+pillow (for image handling)
+numpy (for array operations)
+opencv-python (for video processing)