AiCoderv2 commited on
Commit
3da4f52
Β·
verified Β·
1 Parent(s): e165789

Deploy Gradio app with multiple files

Browse files
Files changed (2) hide show
  1. app.py +339 -0
  2. requirements.txt +24 -0
app.py ADDED
@@ -0,0 +1,339 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import spaces
3
+ import torch
4
+ import numpy as np
5
+ from diffusers import DiffusionPipeline
6
+ from diffusers.models import AutoencoderKL
7
+ from diffusers.schedulers import EulerDiscreteScheduler
8
+ from diffusers.utils import load_image, check_min_version
9
+ import os
10
+ import time
11
+ from PIL import Image
12
+ from typing import Generator, Tuple
13
+ import gc
14
+
15
+ # Model configuration
16
+ MODEL_ID = "cerspense/zeroscope_v2_576w" # 2.5GB model with good quality
17
+ VAE_ID = "madebyollin/sdxl-vae-fp16-fix" # Compact VAE
18
+ SCHEDULER = "EulerDiscreteScheduler"
19
+
20
+ @spaces.GPU(duration=1500) # AoT compilation for 7GB+ model
21
+ def compile_model():
22
+ """Compile the text-to-video model for optimal performance"""
23
+ print("πŸš€ Compiling model for ahead-of-time optimization...")
24
+
25
+ # Load components
26
+ vae = AutoencoderKL.from_pretrained(VAE_ID, torch_dtype=torch.float16)
27
+ scheduler = EulerDiscreteScheduler.from_pretrained(MODEL_ID, subfolder="scheduler")
28
+
29
+ # Create pipeline with optimization
30
+ pipe = DiffusionPipeline.from_pretrained(
31
+ MODEL_ID,
32
+ vae=vae,
33
+ scheduler=scheduler,
34
+ torch_dtype=torch.float16,
35
+ variant="fp16",
36
+ use_safetensors=True
37
+ )
38
+
39
+ # Enable memory efficient attention and compile
40
+ pipe.enable_model_cpu_offload()
41
+ pipe.enable_vae_slicing()
42
+ pipe.enable_attention_slicing()
43
+
44
+ # AoT compilation for 1.3x-1.8x speedup
45
+ with spaces.aoti_capture(pipe.transformer) as call:
46
+ pipe("test prompt for compilation", num_frames=6)
47
+
48
+ exported = torch.export.export(
49
+ pipe.transformer,
50
+ args=call.args,
51
+ kwargs=call.kwargs,
52
+ )
53
+
54
+ compiled_model = spaces.aoti_compile(exported)
55
+ spaces.aoti_apply(compiled_model, pipe.transformer)
56
+
57
+ return pipe
58
+
59
+ # Initialize the model
60
+ print("πŸ”„ Loading text-to-video model...")
61
+ pipe = compile_model()
62
+ pipe.to('cuda')
63
+
64
+ @spaces.GPU
65
+ def generate_video(
66
+ prompt: str,
67
+ num_frames: int = 8,
68
+ width: int = 576,
69
+ height: int = 320,
70
+ num_inference_steps: int = 25,
71
+ guidance_scale: float = 17.5,
72
+ progress: gr.Progress = gr.Progress()
73
+ ) -> Generator[Tuple[str, np.ndarray], None, None]:
74
+ """
75
+ Generate a video from text prompt using the compiled model.
76
+
77
+ Args:
78
+ prompt: Text description for video generation
79
+ num_frames: Number of frames in the video (6-16)
80
+ width: Video width (576 recommended for quality)
81
+ height: Video height (320 recommended for quality)
82
+ num_inference_steps: Diffusion steps (20-30 recommended)
83
+ guidance_scale: CFG scale (15-20 recommended)
84
+
85
+ Yields:
86
+ Tuple of (status_message, video_data)
87
+ """
88
+ try:
89
+ # Clear GPU cache for optimal performance
90
+ if torch.cuda.is_available():
91
+ torch.cuda.empty_cache()
92
+ gc.collect()
93
+
94
+ # Validate parameters
95
+ prompt = prompt.strip()
96
+ if not prompt:
97
+ yield "❌ Please enter a text prompt", None
98
+ return
99
+
100
+ if not 6 <= num_frames <= 16:
101
+ yield "❌ Number of frames must be between 6-16", None
102
+ return
103
+
104
+ if not 200 <= width <= 1024:
105
+ yield "❌ Width must be between 200-1024", None
106
+ return
107
+
108
+ if not 200 <= height <= 1024:
109
+ yield "❌ Height must be between 200-1024", None
110
+ return
111
+
112
+ yield "🎬 Initializing video generation...", None
113
+
114
+ # Set up progress tracking
115
+ total_steps = num_inference_steps
116
+ current_step = 0
117
+
118
+ def progress_callback(step, timestep, latents):
119
+ nonlocal current_step
120
+ current_step += 1
121
+ progress = (current_step / total_steps) * 100
122
+ yield f"🎨 Generating video... {progress:.1f}% ({current_step}/{total_steps} steps)", None
123
+
124
+ # Generate video frames
125
+ yield "πŸ”₯ Generating video frames...", None
126
+ start_time = time.time()
127
+
128
+ # Run inference with optimized settings
129
+ with torch.inference_mode():
130
+ result = pipe(
131
+ prompt=prompt,
132
+ num_frames=num_frames,
133
+ width=width,
134
+ height=height,
135
+ num_inference_steps=num_inference_steps,
136
+ guidance_scale=guidance_scale,
137
+ callback=progress_callback,
138
+ callback_steps=1
139
+ )
140
+
141
+ # Extract frames
142
+ frames = result.frames[0] # Get first batch of frames
143
+ generation_time = time.time() - start_time
144
+
145
+ yield f"βœ… Video generated in {generation_time:.1f}s!", frames
146
+
147
+ except Exception as e:
148
+ error_msg = f"❌ Generation failed: {str(e)}"
149
+ yield error_msg, None
150
+ print(f"Error: {e}")
151
+
152
+ finally:
153
+ # Clean up
154
+ if torch.cuda.is_available():
155
+ torch.cuda.empty_cache()
156
+ gc.collect()
157
+
158
+ def get_recommended_settings() -> dict:
159
+ """Get recommended generation settings"""
160
+ return {
161
+ "num_frames": 8,
162
+ "width": 576,
163
+ "height": 320,
164
+ "num_inference_steps": 25,
165
+ "guidance_scale": 17.5
166
+ }
167
+
168
+ # Create the Gradio interface
169
+ def create_demo():
170
+ """Create the main Gradio demo"""
171
+
172
+ with gr.Blocks(
173
+ title="πŸš€ Lightning Text-to-Video Generator",
174
+ description="Generate high-quality videos from text prompts using advanced AI",
175
+ theme=gr.themes.Soft()
176
+ ) as demo:
177
+
178
+ # Header with anycoder attribution
179
+ gr.HTML("""
180
+ <div style="text-align: center; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius: 10px; margin-bottom: 20px;">
181
+ <h1 style="color: white; margin: 0; font-size: 2.5em;">🎬 Lightning Text-to-Video Generator</h1>
182
+ <p style="color: white; margin: 10px 0; font-size: 1.2em;">Transform your ideas into stunning videos instantly</p>
183
+ <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: #FFD700; text-decoration: none; font-size: 1.1em; font-weight: bold;">
184
+ ⭐ Built with anycoder
185
+ </a>
186
+ </div>
187
+ """)
188
+
189
+ with gr.Row():
190
+ with gr.Column(scale=1):
191
+ gr.HTML("<h3>πŸ“ Text Prompt</h3>")
192
+ prompt_input = gr.Textbox(
193
+ label="Describe your video",
194
+ placeholder="A majestic dragon flying over a mystical forest at sunset, with glowing particles falling from the sky",
195
+ lines=4,
196
+ max_length=500
197
+ )
198
+
199
+ # Quick presets
200
+ gr.HTML("<h3>🎯 Quick Presets</h3>")
201
+ with gr.Row():
202
+ preset_btn1 = gr.Button("🌊 Nature Scene", variant="secondary", size="sm")
203
+ preset_btn2 = gr.Button("πŸ™οΈ Urban Scene", variant="secondary", size="sm")
204
+ preset_btn3 = gr.Button("πŸš€ Sci-Fi", variant="secondary", size="sm")
205
+ preset_btn4 = gr.Button("🎭 Fantasy", variant="secondary", size="sm")
206
+
207
+ # Advanced settings
208
+ with gr.Accordion("βš™οΈ Advanced Settings", open=False):
209
+ num_frames = gr.Slider(
210
+ minimum=6, maximum=16, value=8, step=1,
211
+ label="Number of Frames",
212
+ info="More frames = longer video but slower generation"
213
+ )
214
+
215
+ with gr.Row():
216
+ width = gr.Slider(
217
+ minimum=200, maximum=1024, value=576, step=64,
218
+ label="Width",
219
+ info="Video width (576px recommended)"
220
+ )
221
+ height = gr.Slider(
222
+ minimum=200, maximum=1024, value=320, step=64,
223
+ label="Height",
224
+ info="Video height (320px recommended)"
225
+ )
226
+
227
+ num_inference_steps = gr.Slider(
228
+ minimum=15, maximum=50, value=25, step=5,
229
+ label="Generation Steps",
230
+ info="More steps = better quality but slower"
231
+ )
232
+
233
+ guidance_scale = gr.Slider(
234
+ minimum=5, maximum=25, value=17.5, step=0.5,
235
+ label="Guidance Scale",
236
+ info="How closely to follow the prompt (15-20 recommended)"
237
+ )
238
+
239
+ # Action buttons
240
+ with gr.Row():
241
+ generate_btn = gr.Button("πŸš€ Generate Video", variant="primary", size="lg")
242
+ clear_btn = gr.Button("πŸ—‘οΈ Clear", variant="secondary")
243
+
244
+ # Quick settings
245
+ with gr.Row():
246
+ quality_btn = gr.Button("⚑ Fast", variant="secondary", size="sm")
247
+ quality_btn2 = gr.Button("🎨 High Quality", variant="secondary", size="sm")
248
+
249
+ # Status display
250
+ status = gr.HTML("<p style='color: #666;'>Ready to generate your video!</p>")
251
+
252
+ with gr.Column(scale=1):
253
+ gr.HTML("<h3>πŸŽ₯ Generated Video</h3>")
254
+ video_output = gr.Video(
255
+ label="Your Generated Video",
256
+ format="mp4",
257
+ loop=True,
258
+ autoplay=True,
259
+ height=400
260
+ )
261
+
262
+ # Info panel
263
+ info_panel = gr.HTML("""
264
+ <div style="padding: 15px; background: #f8f9fa; border-radius: 8px; margin-top: 10px;">
265
+ <h4>πŸ’‘ Tips for Better Results:</h4>
266
+ <ul style="color: #555; font-size: 0.9em;">
267
+ <li>Be specific and descriptive in your prompts</li>
268
+ <li>Use adjectives to describe style, lighting, mood</li>
269
+ <li>Include camera movements (pan, zoom, rotate)</li>
270
+ <li>Fast mode: 6-8 frames, 15-20 steps</li>
271
+ <li>High quality: 10-12 frames, 25-30 steps</li>
272
+ </ul>
273
+ </div>
274
+ """)
275
+
276
+ # Preset prompt handlers
277
+ preset_prompts = {
278
+ preset_btn1: "A serene mountain landscape with flowing river, golden hour lighting, birds flying in the sky",
279
+ preset_btn2: "A bustling city street at night with neon lights, cars driving by, people walking",
280
+ preset_btn3: "A futuristic spaceship flying through a galaxy with colorful nebulas and distant stars",
281
+ preset_btn4: "A magical forest with glowing mushrooms, fairy lights dancing, mystical creatures moving"
282
+ }
283
+
284
+ for btn, preset_text in preset_prompts.items():
285
+ btn.click(
286
+ lambda text=preset_text: gr.update(value=text),
287
+ outputs=prompt_input
288
+ )
289
+
290
+ # Quality settings
291
+ def apply_fast_settings():
292
+ return 6, 512, 288, 15, 15.0
293
+
294
+ def apply_quality_settings():
295
+ return 12, 576, 320, 30, 18.0
296
+
297
+ quality_btn.click(apply_fast_settings, outputs=[num_frames, width, height, num_inference_steps, guidance_scale])
298
+ quality_btn2.click(apply_quality_settings, outputs=[num_frames, width, height, num_inference_steps, guidance_scale])
299
+
300
+ # Main generation handler
301
+ def handle_generate(prompt, num_frames, width, height, steps, guidance):
302
+ # Create generator for progress updates
303
+ def gen():
304
+ for status, video in generate_video(prompt, num_frames, width, height, steps, guidance):
305
+ yield status, video
306
+
307
+ return gen
308
+
309
+ # Connect events
310
+ generate_btn.click(
311
+ handle_generate,
312
+ inputs=[prompt_input, num_frames, width, height, num_inference_steps, guidance_scale],
313
+ outputs=[status, video_output]
314
+ )
315
+
316
+ def clear_all():
317
+ return "", None, *get_recommended_settings().values(), "πŸ—‘οΈ Cleared! Ready for new generation."
318
+
319
+ clear_btn.click(
320
+ clear_all,
321
+ outputs=[prompt_input, video_output, num_frames, width, height, num_inference_steps, guidance_scale, status]
322
+ )
323
+
324
+ return demo
325
+
326
+ # Create and launch the demo
327
+ if __name__ == "__main__":
328
+ demo = create_demo()
329
+
330
+ # Launch with optimized settings
331
+ demo.launch(
332
+ server_name="0.0.0.0",
333
+ server_port=7860,
334
+ share=True,
335
+ show_error=True,
336
+ quiet=False,
337
+ max_threads=40,
338
+ concurrency_limit=10
339
+ )
requirements.txt ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio[webrtc]==4.29.0
2
+ spaces==0.20.0
3
+ torch>=2.0.0
4
+ torchvision>=0.15.0
5
+ torchaudio>=2.0.0
6
+ diffusers==0.27.0
7
+ transformers==4.40.0
8
+ accelerate==0.27.0
9
+ safetensors==0.4.2
10
+ xformers==0.0.24
11
+ pillow>=10.0.0
12
+ numpy>=1.24.0
13
+ opencv-python>=4.8.0
14
+ einops>=0.7.0
15
+ triton>=2.0.0
16
+ Petitioner:
17
+ spaces (for ZeroGPU optimization)
18
+ torch (>=2.0.0 for diffusion models)
19
+ diffusers (for Stable Video Diffusion pipeline)
20
+ transformers (for model components)
21
+ accelerate (for memory optimization)
22
+ pillow (for image handling)
23
+ numpy (for array operations)
24
+ opencv-python (for video processing)