Spaces:
Build error
Build error
| import gradio as gr | |
| import spaces | |
| import torch | |
| import numpy as np | |
| from diffusers import DiffusionPipeline | |
| from diffusers.models import AutoencoderKL | |
| from diffusers.schedulers import EulerDiscreteScheduler | |
| from diffusers.utils import load_image, check_min_version | |
| import os | |
| import time | |
| from PIL import Image | |
| from typing import Generator, Tuple | |
| import gc | |
| # Model configuration | |
| MODEL_ID = "cerspense/zeroscope_v2_576w" # 2.5GB model with good quality | |
| VAE_ID = "madebyollin/sdxl-vae-fp16-fix" # Compact VAE | |
| SCHEDULER = "EulerDiscreteScheduler" | |
| # AoT compilation for 7GB+ model | |
| def compile_model(): | |
| """Compile the text-to-video model for optimal performance""" | |
| print("π Compiling model for ahead-of-time optimization...") | |
| # Load components | |
| vae = AutoencoderKL.from_pretrained(VAE_ID, torch_dtype=torch.float16) | |
| scheduler = EulerDiscreteScheduler.from_pretrained(MODEL_ID, subfolder="scheduler") | |
| # Create pipeline with optimization | |
| pipe = DiffusionPipeline.from_pretrained( | |
| MODEL_ID, | |
| vae=vae, | |
| scheduler=scheduler, | |
| torch_dtype=torch.float16, | |
| variant="fp16", | |
| use_safetensors=True | |
| ) | |
| # Enable memory efficient attention and compile | |
| pipe.enable_model_cpu_offload() | |
| pipe.enable_vae_slicing() | |
| pipe.enable_attention_slicing() | |
| # AoT compilation for 1.3x-1.8x speedup | |
| with spaces.aoti_capture(pipe.transformer) as call: | |
| pipe("test prompt for compilation", num_frames=6) | |
| exported = torch.export.export( | |
| pipe.transformer, | |
| args=call.args, | |
| kwargs=call.kwargs, | |
| ) | |
| compiled_model = spaces.aoti_compile(exported) | |
| spaces.aoti_apply(compiled_model, pipe.transformer) | |
| return pipe | |
| # Initialize the model | |
| print("π Loading text-to-video model...") | |
| pipe = compile_model() | |
| pipe.to('cuda') | |
| def generate_video( | |
| prompt: str, | |
| num_frames: int = 8, | |
| width: int = 576, | |
| height: int = 320, | |
| num_inference_steps: int = 25, | |
| guidance_scale: float = 17.5, | |
| progress: gr.Progress = gr.Progress() | |
| ) -> Generator[Tuple[str, np.ndarray], None, None]: | |
| """ | |
| Generate a video from text prompt using the compiled model. | |
| Args: | |
| prompt: Text description for video generation | |
| num_frames: Number of frames in the video (6-16) | |
| width: Video width (576 recommended for quality) | |
| height: Video height (320 recommended for quality) | |
| num_inference_steps: Diffusion steps (20-30 recommended) | |
| guidance_scale: CFG scale (15-20 recommended) | |
| Yields: | |
| Tuple of (status_message, video_data) | |
| """ | |
| try: | |
| # Clear GPU cache for optimal performance | |
| if torch.cuda.is_available(): | |
| torch.cuda.empty_cache() | |
| gc.collect() | |
| # Validate parameters | |
| prompt = prompt.strip() | |
| if not prompt: | |
| yield "β Please enter a text prompt", None | |
| return | |
| if not 6 <= num_frames <= 16: | |
| yield "β Number of frames must be between 6-16", None | |
| return | |
| if not 200 <= width <= 1024: | |
| yield "β Width must be between 200-1024", None | |
| return | |
| if not 200 <= height <= 1024: | |
| yield "β Height must be between 200-1024", None | |
| return | |
| yield "π¬ Initializing video generation...", None | |
| # Set up progress tracking | |
| total_steps = num_inference_steps | |
| current_step = 0 | |
| def progress_callback(step, timestep, latents): | |
| nonlocal current_step | |
| current_step += 1 | |
| progress = (current_step / total_steps) * 100 | |
| yield f"π¨ Generating video... {progress:.1f}% ({current_step}/{total_steps} steps)", None | |
| # Generate video frames | |
| yield "π₯ Generating video frames...", None | |
| start_time = time.time() | |
| # Run inference with optimized settings | |
| with torch.inference_mode(): | |
| result = pipe( | |
| prompt=prompt, | |
| num_frames=num_frames, | |
| width=width, | |
| height=height, | |
| num_inference_steps=num_inference_steps, | |
| guidance_scale=guidance_scale, | |
| callback=progress_callback, | |
| callback_steps=1 | |
| ) | |
| # Extract frames | |
| frames = result.frames[0] # Get first batch of frames | |
| generation_time = time.time() - start_time | |
| yield f"β Video generated in {generation_time:.1f}s!", frames | |
| except Exception as e: | |
| error_msg = f"β Generation failed: {str(e)}" | |
| yield error_msg, None | |
| print(f"Error: {e}") | |
| finally: | |
| # Clean up | |
| if torch.cuda.is_available(): | |
| torch.cuda.empty_cache() | |
| gc.collect() | |
| def get_recommended_settings() -> dict: | |
| """Get recommended generation settings""" | |
| return { | |
| "num_frames": 8, | |
| "width": 576, | |
| "height": 320, | |
| "num_inference_steps": 25, | |
| "guidance_scale": 17.5 | |
| } | |
| # Create the Gradio interface | |
| def create_demo(): | |
| """Create the main Gradio demo""" | |
| with gr.Blocks( | |
| title="π Lightning Text-to-Video Generator", | |
| description="Generate high-quality videos from text prompts using advanced AI", | |
| theme=gr.themes.Soft() | |
| ) as demo: | |
| # Header with anycoder attribution | |
| gr.HTML(""" | |
| <div style="text-align: center; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius: 10px; margin-bottom: 20px;"> | |
| <h1 style="color: white; margin: 0; font-size: 2.5em;">π¬ Lightning Text-to-Video Generator</h1> | |
| <p style="color: white; margin: 10px 0; font-size: 1.2em;">Transform your ideas into stunning videos instantly</p> | |
| <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: #FFD700; text-decoration: none; font-size: 1.1em; font-weight: bold;"> | |
| β Built with anycoder | |
| </a> | |
| </div> | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.HTML("<h3>π Text Prompt</h3>") | |
| prompt_input = gr.Textbox( | |
| label="Describe your video", | |
| placeholder="A majestic dragon flying over a mystical forest at sunset, with glowing particles falling from the sky", | |
| lines=4, | |
| max_length=500 | |
| ) | |
| # Quick presets | |
| gr.HTML("<h3>π― Quick Presets</h3>") | |
| with gr.Row(): | |
| preset_btn1 = gr.Button("π Nature Scene", variant="secondary", size="sm") | |
| preset_btn2 = gr.Button("ποΈ Urban Scene", variant="secondary", size="sm") | |
| preset_btn3 = gr.Button("π Sci-Fi", variant="secondary", size="sm") | |
| preset_btn4 = gr.Button("π Fantasy", variant="secondary", size="sm") | |
| # Advanced settings | |
| with gr.Accordion("βοΈ Advanced Settings", open=False): | |
| num_frames = gr.Slider( | |
| minimum=6, maximum=16, value=8, step=1, | |
| label="Number of Frames", | |
| info="More frames = longer video but slower generation" | |
| ) | |
| with gr.Row(): | |
| width = gr.Slider( | |
| minimum=200, maximum=1024, value=576, step=64, | |
| label="Width", | |
| info="Video width (576px recommended)" | |
| ) | |
| height = gr.Slider( | |
| minimum=200, maximum=1024, value=320, step=64, | |
| label="Height", | |
| info="Video height (320px recommended)" | |
| ) | |
| num_inference_steps = gr.Slider( | |
| minimum=15, maximum=50, value=25, step=5, | |
| label="Generation Steps", | |
| info="More steps = better quality but slower" | |
| ) | |
| guidance_scale = gr.Slider( | |
| minimum=5, maximum=25, value=17.5, step=0.5, | |
| label="Guidance Scale", | |
| info="How closely to follow the prompt (15-20 recommended)" | |
| ) | |
| # Action buttons | |
| with gr.Row(): | |
| generate_btn = gr.Button("π Generate Video", variant="primary", size="lg") | |
| clear_btn = gr.Button("ποΈ Clear", variant="secondary") | |
| # Quick settings | |
| with gr.Row(): | |
| quality_btn = gr.Button("β‘ Fast", variant="secondary", size="sm") | |
| quality_btn2 = gr.Button("π¨ High Quality", variant="secondary", size="sm") | |
| # Status display | |
| status = gr.HTML("<p style='color: #666;'>Ready to generate your video!</p>") | |
| with gr.Column(scale=1): | |
| gr.HTML("<h3>π₯ Generated Video</h3>") | |
| video_output = gr.Video( | |
| label="Your Generated Video", | |
| format="mp4", | |
| loop=True, | |
| autoplay=True, | |
| height=400 | |
| ) | |
| # Info panel | |
| info_panel = gr.HTML(""" | |
| <div style="padding: 15px; background: #f8f9fa; border-radius: 8px; margin-top: 10px;"> | |
| <h4>π‘ Tips for Better Results:</h4> | |
| <ul style="color: #555; font-size: 0.9em;"> | |
| <li>Be specific and descriptive in your prompts</li> | |
| <li>Use adjectives to describe style, lighting, mood</li> | |
| <li>Include camera movements (pan, zoom, rotate)</li> | |
| <li>Fast mode: 6-8 frames, 15-20 steps</li> | |
| <li>High quality: 10-12 frames, 25-30 steps</li> | |
| </ul> | |
| </div> | |
| """) | |
| # Preset prompt handlers | |
| preset_prompts = { | |
| preset_btn1: "A serene mountain landscape with flowing river, golden hour lighting, birds flying in the sky", | |
| preset_btn2: "A bustling city street at night with neon lights, cars driving by, people walking", | |
| preset_btn3: "A futuristic spaceship flying through a galaxy with colorful nebulas and distant stars", | |
| preset_btn4: "A magical forest with glowing mushrooms, fairy lights dancing, mystical creatures moving" | |
| } | |
| for btn, preset_text in preset_prompts.items(): | |
| btn.click( | |
| lambda text=preset_text: gr.update(value=text), | |
| outputs=prompt_input | |
| ) | |
| # Quality settings | |
| def apply_fast_settings(): | |
| return 6, 512, 288, 15, 15.0 | |
| def apply_quality_settings(): | |
| return 12, 576, 320, 30, 18.0 | |
| quality_btn.click(apply_fast_settings, outputs=[num_frames, width, height, num_inference_steps, guidance_scale]) | |
| quality_btn2.click(apply_quality_settings, outputs=[num_frames, width, height, num_inference_steps, guidance_scale]) | |
| # Main generation handler | |
| def handle_generate(prompt, num_frames, width, height, steps, guidance): | |
| # Create generator for progress updates | |
| def gen(): | |
| for status, video in generate_video(prompt, num_frames, width, height, steps, guidance): | |
| yield status, video | |
| return gen | |
| # Connect events | |
| generate_btn.click( | |
| handle_generate, | |
| inputs=[prompt_input, num_frames, width, height, num_inference_steps, guidance_scale], | |
| outputs=[status, video_output] | |
| ) | |
| def clear_all(): | |
| return "", None, *get_recommended_settings().values(), "ποΈ Cleared! Ready for new generation." | |
| clear_btn.click( | |
| clear_all, | |
| outputs=[prompt_input, video_output, num_frames, width, height, num_inference_steps, guidance_scale, status] | |
| ) | |
| return demo | |
| # Create and launch the demo | |
| if __name__ == "__main__": | |
| demo = create_demo() | |
| # Launch with optimized settings | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=True, | |
| show_error=True, | |
| quiet=False, | |
| max_threads=40, | |
| concurrency_limit=10 | |
| ) |