Spaces:
Build error
Build error
File size: 13,007 Bytes
3da4f52 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 |
import gradio as gr
import spaces
import torch
import numpy as np
from diffusers import DiffusionPipeline
from diffusers.models import AutoencoderKL
from diffusers.schedulers import EulerDiscreteScheduler
from diffusers.utils import load_image, check_min_version
import os
import time
from PIL import Image
from typing import Generator, Tuple
import gc
# Model configuration
MODEL_ID = "cerspense/zeroscope_v2_576w" # 2.5GB model with good quality
VAE_ID = "madebyollin/sdxl-vae-fp16-fix" # Compact VAE
SCHEDULER = "EulerDiscreteScheduler"
@spaces.GPU(duration=1500) # AoT compilation for 7GB+ model
def compile_model():
"""Compile the text-to-video model for optimal performance"""
print("π Compiling model for ahead-of-time optimization...")
# Load components
vae = AutoencoderKL.from_pretrained(VAE_ID, torch_dtype=torch.float16)
scheduler = EulerDiscreteScheduler.from_pretrained(MODEL_ID, subfolder="scheduler")
# Create pipeline with optimization
pipe = DiffusionPipeline.from_pretrained(
MODEL_ID,
vae=vae,
scheduler=scheduler,
torch_dtype=torch.float16,
variant="fp16",
use_safetensors=True
)
# Enable memory efficient attention and compile
pipe.enable_model_cpu_offload()
pipe.enable_vae_slicing()
pipe.enable_attention_slicing()
# AoT compilation for 1.3x-1.8x speedup
with spaces.aoti_capture(pipe.transformer) as call:
pipe("test prompt for compilation", num_frames=6)
exported = torch.export.export(
pipe.transformer,
args=call.args,
kwargs=call.kwargs,
)
compiled_model = spaces.aoti_compile(exported)
spaces.aoti_apply(compiled_model, pipe.transformer)
return pipe
# Initialize the model
print("π Loading text-to-video model...")
pipe = compile_model()
pipe.to('cuda')
@spaces.GPU
def generate_video(
prompt: str,
num_frames: int = 8,
width: int = 576,
height: int = 320,
num_inference_steps: int = 25,
guidance_scale: float = 17.5,
progress: gr.Progress = gr.Progress()
) -> Generator[Tuple[str, np.ndarray], None, None]:
"""
Generate a video from text prompt using the compiled model.
Args:
prompt: Text description for video generation
num_frames: Number of frames in the video (6-16)
width: Video width (576 recommended for quality)
height: Video height (320 recommended for quality)
num_inference_steps: Diffusion steps (20-30 recommended)
guidance_scale: CFG scale (15-20 recommended)
Yields:
Tuple of (status_message, video_data)
"""
try:
# Clear GPU cache for optimal performance
if torch.cuda.is_available():
torch.cuda.empty_cache()
gc.collect()
# Validate parameters
prompt = prompt.strip()
if not prompt:
yield "β Please enter a text prompt", None
return
if not 6 <= num_frames <= 16:
yield "β Number of frames must be between 6-16", None
return
if not 200 <= width <= 1024:
yield "β Width must be between 200-1024", None
return
if not 200 <= height <= 1024:
yield "β Height must be between 200-1024", None
return
yield "π¬ Initializing video generation...", None
# Set up progress tracking
total_steps = num_inference_steps
current_step = 0
def progress_callback(step, timestep, latents):
nonlocal current_step
current_step += 1
progress = (current_step / total_steps) * 100
yield f"π¨ Generating video... {progress:.1f}% ({current_step}/{total_steps} steps)", None
# Generate video frames
yield "π₯ Generating video frames...", None
start_time = time.time()
# Run inference with optimized settings
with torch.inference_mode():
result = pipe(
prompt=prompt,
num_frames=num_frames,
width=width,
height=height,
num_inference_steps=num_inference_steps,
guidance_scale=guidance_scale,
callback=progress_callback,
callback_steps=1
)
# Extract frames
frames = result.frames[0] # Get first batch of frames
generation_time = time.time() - start_time
yield f"β
Video generated in {generation_time:.1f}s!", frames
except Exception as e:
error_msg = f"β Generation failed: {str(e)}"
yield error_msg, None
print(f"Error: {e}")
finally:
# Clean up
if torch.cuda.is_available():
torch.cuda.empty_cache()
gc.collect()
def get_recommended_settings() -> dict:
"""Get recommended generation settings"""
return {
"num_frames": 8,
"width": 576,
"height": 320,
"num_inference_steps": 25,
"guidance_scale": 17.5
}
# Create the Gradio interface
def create_demo():
"""Create the main Gradio demo"""
with gr.Blocks(
title="π Lightning Text-to-Video Generator",
description="Generate high-quality videos from text prompts using advanced AI",
theme=gr.themes.Soft()
) as demo:
# Header with anycoder attribution
gr.HTML("""
<div style="text-align: center; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius: 10px; margin-bottom: 20px;">
<h1 style="color: white; margin: 0; font-size: 2.5em;">π¬ Lightning Text-to-Video Generator</h1>
<p style="color: white; margin: 10px 0; font-size: 1.2em;">Transform your ideas into stunning videos instantly</p>
<a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: #FFD700; text-decoration: none; font-size: 1.1em; font-weight: bold;">
β Built with anycoder
</a>
</div>
""")
with gr.Row():
with gr.Column(scale=1):
gr.HTML("<h3>π Text Prompt</h3>")
prompt_input = gr.Textbox(
label="Describe your video",
placeholder="A majestic dragon flying over a mystical forest at sunset, with glowing particles falling from the sky",
lines=4,
max_length=500
)
# Quick presets
gr.HTML("<h3>π― Quick Presets</h3>")
with gr.Row():
preset_btn1 = gr.Button("π Nature Scene", variant="secondary", size="sm")
preset_btn2 = gr.Button("ποΈ Urban Scene", variant="secondary", size="sm")
preset_btn3 = gr.Button("π Sci-Fi", variant="secondary", size="sm")
preset_btn4 = gr.Button("π Fantasy", variant="secondary", size="sm")
# Advanced settings
with gr.Accordion("βοΈ Advanced Settings", open=False):
num_frames = gr.Slider(
minimum=6, maximum=16, value=8, step=1,
label="Number of Frames",
info="More frames = longer video but slower generation"
)
with gr.Row():
width = gr.Slider(
minimum=200, maximum=1024, value=576, step=64,
label="Width",
info="Video width (576px recommended)"
)
height = gr.Slider(
minimum=200, maximum=1024, value=320, step=64,
label="Height",
info="Video height (320px recommended)"
)
num_inference_steps = gr.Slider(
minimum=15, maximum=50, value=25, step=5,
label="Generation Steps",
info="More steps = better quality but slower"
)
guidance_scale = gr.Slider(
minimum=5, maximum=25, value=17.5, step=0.5,
label="Guidance Scale",
info="How closely to follow the prompt (15-20 recommended)"
)
# Action buttons
with gr.Row():
generate_btn = gr.Button("π Generate Video", variant="primary", size="lg")
clear_btn = gr.Button("ποΈ Clear", variant="secondary")
# Quick settings
with gr.Row():
quality_btn = gr.Button("β‘ Fast", variant="secondary", size="sm")
quality_btn2 = gr.Button("π¨ High Quality", variant="secondary", size="sm")
# Status display
status = gr.HTML("<p style='color: #666;'>Ready to generate your video!</p>")
with gr.Column(scale=1):
gr.HTML("<h3>π₯ Generated Video</h3>")
video_output = gr.Video(
label="Your Generated Video",
format="mp4",
loop=True,
autoplay=True,
height=400
)
# Info panel
info_panel = gr.HTML("""
<div style="padding: 15px; background: #f8f9fa; border-radius: 8px; margin-top: 10px;">
<h4>π‘ Tips for Better Results:</h4>
<ul style="color: #555; font-size: 0.9em;">
<li>Be specific and descriptive in your prompts</li>
<li>Use adjectives to describe style, lighting, mood</li>
<li>Include camera movements (pan, zoom, rotate)</li>
<li>Fast mode: 6-8 frames, 15-20 steps</li>
<li>High quality: 10-12 frames, 25-30 steps</li>
</ul>
</div>
""")
# Preset prompt handlers
preset_prompts = {
preset_btn1: "A serene mountain landscape with flowing river, golden hour lighting, birds flying in the sky",
preset_btn2: "A bustling city street at night with neon lights, cars driving by, people walking",
preset_btn3: "A futuristic spaceship flying through a galaxy with colorful nebulas and distant stars",
preset_btn4: "A magical forest with glowing mushrooms, fairy lights dancing, mystical creatures moving"
}
for btn, preset_text in preset_prompts.items():
btn.click(
lambda text=preset_text: gr.update(value=text),
outputs=prompt_input
)
# Quality settings
def apply_fast_settings():
return 6, 512, 288, 15, 15.0
def apply_quality_settings():
return 12, 576, 320, 30, 18.0
quality_btn.click(apply_fast_settings, outputs=[num_frames, width, height, num_inference_steps, guidance_scale])
quality_btn2.click(apply_quality_settings, outputs=[num_frames, width, height, num_inference_steps, guidance_scale])
# Main generation handler
def handle_generate(prompt, num_frames, width, height, steps, guidance):
# Create generator for progress updates
def gen():
for status, video in generate_video(prompt, num_frames, width, height, steps, guidance):
yield status, video
return gen
# Connect events
generate_btn.click(
handle_generate,
inputs=[prompt_input, num_frames, width, height, num_inference_steps, guidance_scale],
outputs=[status, video_output]
)
def clear_all():
return "", None, *get_recommended_settings().values(), "ποΈ Cleared! Ready for new generation."
clear_btn.click(
clear_all,
outputs=[prompt_input, video_output, num_frames, width, height, num_inference_steps, guidance_scale, status]
)
return demo
# Create and launch the demo
if __name__ == "__main__":
demo = create_demo()
# Launch with optimized settings
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=True,
show_error=True,
quiet=False,
max_threads=40,
concurrency_limit=10
) |