Spaces:
Runtime error
Runtime error
File size: 2,633 Bytes
fb63790 38967a3 fb63790 38967a3 d4c09e5 960e153 fb63790 38967a3 fb63790 38967a3 fb63790 050eb17 fb63790 4a7abf4 fb63790 d4c09e5 fb63790 38967a3 960e153 ad61fc3 fb63790 38967a3 fb63790 38967a3 880a1d9 a0627ea 880a1d9 ad61fc3 880a1d9 f81bd55 a0627ea 880a1d9 a0627ea 880a1d9 ad61fc3 880a1d9 a0627ea 880a1d9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 | import os
import torch
import soundfile as sf
from huggingface_hub import login
from diffusers import StableAudioPipeline
import gradio as gr
import spaces
# Load Hugging Face token securely
HUGGINGFACE_TOKEN = os.getenv("HF_TOKEN")
if HUGGINGFACE_TOKEN is None:
raise ValueError("Missing Hugging Face token. Please set it in Spaces Secrets.")
login(HUGGINGFACE_TOKEN)
# Set device for PyTorch (only CPU, if no GPU is available)
device = "cpu"
torch_dtype = torch.float32 # Use float32 for CPU by default
# Check for GPU availability
if torch.cuda.is_available():
device = "cuda"
torch_dtype = torch.float16 # Use float16 for GPU to optimize memory usage
# Load the pipeline
pipe = StableAudioPipeline.from_pretrained(
"stabilityai/stable-audio-open-1.0",
torch_dtype=torch_dtype
)
pipe = pipe.to(device)
# Function to generate audio
@spaces.GPU
def generate_audio(prompt, negative_prompt, duration, diffusion_steps, seed):
generator = torch.Generator(device).manual_seed(seed)
audio_output = pipe(
prompt=prompt,
negative_prompt=negative_prompt,
num_inference_steps=int(diffusion_steps), # Number of diffusion steps
audio_end_in_s=duration,
num_waveforms_per_prompt=1,
generator=generator
).audios
output_audio = audio_output[0].T.float().cpu().numpy()
output_file = "output.wav"
sf.write(output_file, output_audio, pipe.vae.sampling_rate)
return output_file
# Gradio UI
with gr.Blocks() as demo:
gr.Markdown("## π§ Stable Audio Open - Audio Generation πΌ")
gr.Markdown("### Adjust prompts, duration, and diffusion steps to control the generation!")
# Input Section
with gr.Row():
prompt_input = gr.Textbox(label="Prompt", value="The sound of a hammer hitting a wooden surface.")
negative_input = gr.Textbox(label="Negative Prompt", value="Low quality.")
with gr.Row():
duration_input = gr.Slider(minimum=1, maximum=10, step=0.5, value=1, label="Duration (seconds)")
diffusion_steps_input = gr.Slider(minimum=1, maximum=500, step=10, value=10, label="Diffusion Steps")
with gr.Row():
seed_input = gr.Number(label="Random Seed", value=42)
# Output Section
generate_button = gr.Button("Generate Audio")
output_audio = gr.Audio(label="Generated Audio", type="filepath")
# Connect the function to the button click
generate_button.click(
generate_audio,
inputs=[prompt_input, negative_input, duration_input, diffusion_steps_input, seed_input],
outputs=output_audio
)
# Launch the app
demo.launch()
|