Spaces:

AiCoderv2
/

app-jcvryp-74

Build error

App Files Files Community

app-jcvryp-74 / app.py

AiCoderv2

Deploy Gradio app with multiple files

3da4f52 verified about 1 month ago

raw

history blame contribute delete

13 kB

	import gradio as gr
	import spaces
	import torch
	import numpy as np
	from diffusers import DiffusionPipeline
	from diffusers.models import AutoencoderKL
	from diffusers.schedulers import EulerDiscreteScheduler
	from diffusers.utils import load_image, check_min_version
	import os
	import time
	from PIL import Image
	from typing import Generator, Tuple
	import gc

	# Model configuration
	MODEL_ID = "cerspense/zeroscope_v2_576w" # 2.5GB model with good quality
	VAE_ID = "madebyollin/sdxl-vae-fp16-fix" # Compact VAE
	SCHEDULER = "EulerDiscreteScheduler"

	@spaces.GPU(duration=1500) # AoT compilation for 7GB+ model
	def compile_model():
	"""Compile the text-to-video model for optimal performance"""
	print("🚀 Compiling model for ahead-of-time optimization...")

	# Load components
	vae = AutoencoderKL.from_pretrained(VAE_ID, torch_dtype=torch.float16)
	scheduler = EulerDiscreteScheduler.from_pretrained(MODEL_ID, subfolder="scheduler")

	# Create pipeline with optimization
	pipe = DiffusionPipeline.from_pretrained(
	MODEL_ID,
	vae=vae,
	scheduler=scheduler,
	torch_dtype=torch.float16,
	variant="fp16",
	use_safetensors=True
	)

	# Enable memory efficient attention and compile
	pipe.enable_model_cpu_offload()
	pipe.enable_vae_slicing()
	pipe.enable_attention_slicing()

	# AoT compilation for 1.3x-1.8x speedup
	with spaces.aoti_capture(pipe.transformer) as call:
	pipe("test prompt for compilation", num_frames=6)

	exported = torch.export.export(
	pipe.transformer,
	args=call.args,
	kwargs=call.kwargs,
	)

	compiled_model = spaces.aoti_compile(exported)
	spaces.aoti_apply(compiled_model, pipe.transformer)

	return pipe

	# Initialize the model
	print("🔄 Loading text-to-video model...")
	pipe = compile_model()
	pipe.to('cuda')

	@spaces.GPU
	def generate_video(
	prompt: str,
	num_frames: int = 8,
	width: int = 576,
	height: int = 320,
	num_inference_steps: int = 25,
	guidance_scale: float = 17.5,
	progress: gr.Progress = gr.Progress()
	) -> Generator[Tuple[str, np.ndarray], None, None]:
	"""
	Generate a video from text prompt using the compiled model.

	Args:
	prompt: Text description for video generation
	num_frames: Number of frames in the video (6-16)
	width: Video width (576 recommended for quality)
	height: Video height (320 recommended for quality)
	num_inference_steps: Diffusion steps (20-30 recommended)
	guidance_scale: CFG scale (15-20 recommended)

	Yields:
	Tuple of (status_message, video_data)
	"""
	try:
	# Clear GPU cache for optimal performance
	if torch.cuda.is_available():
	torch.cuda.empty_cache()
	gc.collect()

	# Validate parameters
	prompt = prompt.strip()
	if not prompt:
	yield "❌ Please enter a text prompt", None
	return

	if not 6 <= num_frames <= 16:
	yield "❌ Number of frames must be between 6-16", None
	return

	if not 200 <= width <= 1024:
	yield "❌ Width must be between 200-1024", None
	return

	if not 200 <= height <= 1024:
	yield "❌ Height must be between 200-1024", None
	return

	yield "🎬 Initializing video generation...", None

	# Set up progress tracking
	total_steps = num_inference_steps
	current_step = 0

	def progress_callback(step, timestep, latents):
	nonlocal current_step
	current_step += 1
	progress = (current_step / total_steps) * 100
	yield f"🎨 Generating video... {progress:.1f}% ({current_step}/{total_steps} steps)", None

	# Generate video frames
	yield "🔥 Generating video frames...", None
	start_time = time.time()

	# Run inference with optimized settings
	with torch.inference_mode():
	result = pipe(
	prompt=prompt,
	num_frames=num_frames,
	width=width,
	height=height,
	num_inference_steps=num_inference_steps,
	guidance_scale=guidance_scale,
	callback=progress_callback,
	callback_steps=1
	)

	# Extract frames
	frames = result.frames[0] # Get first batch of frames
	generation_time = time.time() - start_time

	yield f"✅ Video generated in {generation_time:.1f}s!", frames

	except Exception as e:
	error_msg = f"❌ Generation failed: {str(e)}"
	yield error_msg, None
	print(f"Error: {e}")

	finally:
	# Clean up
	if torch.cuda.is_available():
	torch.cuda.empty_cache()
	gc.collect()

	def get_recommended_settings() -> dict:
	"""Get recommended generation settings"""
	return {
	"num_frames": 8,
	"width": 576,
	"height": 320,
	"num_inference_steps": 25,
	"guidance_scale": 17.5
	}

	# Create the Gradio interface
	def create_demo():
	"""Create the main Gradio demo"""

	with gr.Blocks(
	title="🚀 Lightning Text-to-Video Generator",
	description="Generate high-quality videos from text prompts using advanced AI",
	theme=gr.themes.Soft()
	) as demo:

	# Header with anycoder attribution
	gr.HTML("""
	<div style="text-align: center; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); border-radius: 10px; margin-bottom: 20px;">
	<h1 style="color: white; margin: 0; font-size: 2.5em;">🎬 Lightning Text-to-Video Generator</h1>
	<p style="color: white; margin: 10px 0; font-size: 1.2em;">Transform your ideas into stunning videos instantly</p>
	<a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: #FFD700; text-decoration: none; font-size: 1.1em; font-weight: bold;">
	⭐ Built with anycoder
	</a>
	</div>
	""")

	with gr.Row():
	with gr.Column(scale=1):
	gr.HTML("<h3>📝 Text Prompt</h3>")
	prompt_input = gr.Textbox(
	label="Describe your video",
	placeholder="A majestic dragon flying over a mystical forest at sunset, with glowing particles falling from the sky",
	lines=4,
	max_length=500
	)

	# Quick presets
	gr.HTML("<h3>🎯 Quick Presets</h3>")
	with gr.Row():
	preset_btn1 = gr.Button("🌊 Nature Scene", variant="secondary", size="sm")
	preset_btn2 = gr.Button("🏙️ Urban Scene", variant="secondary", size="sm")
	preset_btn3 = gr.Button("🚀 Sci-Fi", variant="secondary", size="sm")
	preset_btn4 = gr.Button("🎭 Fantasy", variant="secondary", size="sm")

	# Advanced settings
	with gr.Accordion("⚙️ Advanced Settings", open=False):
	num_frames = gr.Slider(
	minimum=6, maximum=16, value=8, step=1,
	label="Number of Frames",
	info="More frames = longer video but slower generation"
	)

	with gr.Row():
	width = gr.Slider(
	minimum=200, maximum=1024, value=576, step=64,
	label="Width",
	info="Video width (576px recommended)"
	)
	height = gr.Slider(
	minimum=200, maximum=1024, value=320, step=64,
	label="Height",
	info="Video height (320px recommended)"
	)

	num_inference_steps = gr.Slider(
	minimum=15, maximum=50, value=25, step=5,
	label="Generation Steps",
	info="More steps = better quality but slower"
	)

	guidance_scale = gr.Slider(
	minimum=5, maximum=25, value=17.5, step=0.5,
	label="Guidance Scale",
	info="How closely to follow the prompt (15-20 recommended)"
	)

	# Action buttons
	with gr.Row():
	generate_btn = gr.Button("🚀 Generate Video", variant="primary", size="lg")
	clear_btn = gr.Button("🗑️ Clear", variant="secondary")

	# Quick settings
	with gr.Row():
	quality_btn = gr.Button("⚡ Fast", variant="secondary", size="sm")
	quality_btn2 = gr.Button("🎨 High Quality", variant="secondary", size="sm")

	# Status display
	status = gr.HTML("<p style='color: #666;'>Ready to generate your video!</p>")

	with gr.Column(scale=1):
	gr.HTML("<h3>🎥 Generated Video</h3>")
	video_output = gr.Video(
	label="Your Generated Video",
	format="mp4",
	loop=True,
	autoplay=True,
	height=400
	)

	# Info panel
	info_panel = gr.HTML("""
	<div style="padding: 15px; background: #f8f9fa; border-radius: 8px; margin-top: 10px;">
	<h4>💡 Tips for Better Results:</h4>
	<ul style="color: #555; font-size: 0.9em;">
	<li>Be specific and descriptive in your prompts</li>
	<li>Use adjectives to describe style, lighting, mood</li>
	<li>Include camera movements (pan, zoom, rotate)</li>
	<li>Fast mode: 6-8 frames, 15-20 steps</li>
	<li>High quality: 10-12 frames, 25-30 steps</li>
	</ul>
	</div>
	""")

	# Preset prompt handlers
	preset_prompts = {
	preset_btn1: "A serene mountain landscape with flowing river, golden hour lighting, birds flying in the sky",
	preset_btn2: "A bustling city street at night with neon lights, cars driving by, people walking",
	preset_btn3: "A futuristic spaceship flying through a galaxy with colorful nebulas and distant stars",
	preset_btn4: "A magical forest with glowing mushrooms, fairy lights dancing, mystical creatures moving"
	}

	for btn, preset_text in preset_prompts.items():
	btn.click(
	lambda text=preset_text: gr.update(value=text),
	outputs=prompt_input
	)

	# Quality settings
	def apply_fast_settings():
	return 6, 512, 288, 15, 15.0

	def apply_quality_settings():
	return 12, 576, 320, 30, 18.0

	quality_btn.click(apply_fast_settings, outputs=[num_frames, width, height, num_inference_steps, guidance_scale])
	quality_btn2.click(apply_quality_settings, outputs=[num_frames, width, height, num_inference_steps, guidance_scale])

	# Main generation handler
	def handle_generate(prompt, num_frames, width, height, steps, guidance):
	# Create generator for progress updates
	def gen():
	for status, video in generate_video(prompt, num_frames, width, height, steps, guidance):
	yield status, video

	return gen

	# Connect events
	generate_btn.click(
	handle_generate,
	inputs=[prompt_input, num_frames, width, height, num_inference_steps, guidance_scale],
	outputs=[status, video_output]
	)

	def clear_all():
	return "", None, *get_recommended_settings().values(), "🗑️ Cleared! Ready for new generation."

	clear_btn.click(
	clear_all,
	outputs=[prompt_input, video_output, num_frames, width, height, num_inference_steps, guidance_scale, status]
	)

	return demo

	# Create and launch the demo
	if __name__ == "__main__":
	demo = create_demo()

	# Launch with optimized settings
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=True,
	show_error=True,
	quiet=False,
	max_threads=40,
	concurrency_limit=10
	)