Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| import soundfile as sf | |
| from speechbrain.pretrained import EncoderDecoderASR, Tacotron2, HIFIGAN | |
| import google.generativeai as genai | |
| import os | |
| from dotenv import load_dotenv | |
| # Load API key | |
| load_dotenv() | |
| genai.configure(api_key=os.getenv("GEMINI_API_KEY")) | |
| # Correct model name for Gemini | |
| gemini = genai.GenerativeModel("models/gemini-1.5-flash") # You can also try "models/gemini-1.5-pro" if needed | |
| # Load SpeechBrain models | |
| asr_model = EncoderDecoderASR.from_hparams( | |
| source="speechbrain/asr-transformer-transformerlm-librispeech", | |
| savedir="tmp_asr" | |
| ) | |
| tacotron2 = Tacotron2.from_hparams( | |
| source="speechbrain/tts-tacotron2-ljspeech", | |
| savedir="tmp_tts" | |
| ) | |
| hifigan = HIFIGAN.from_hparams( | |
| source="speechbrain/tts-hifigan-ljspeech", | |
| savedir="tmp_hifigan" | |
| ) | |
| # Voice Agent Function | |
| def voice_agent(audio_path): | |
| if audio_path is None: | |
| return "β No audio received.", None | |
| try: | |
| # Transcribe speech | |
| user_input = asr_model.transcribe_file(audio_path) | |
| # Gemini response | |
| gemini_response = gemini.generate_content(user_input) | |
| reply_text = gemini_response.text.strip() | |
| # Convert reply to speech | |
| mel_output, _, _ = tacotron2.encode_text(reply_text) | |
| waveform = hifigan.decode_batch(mel_output).squeeze() | |
| sf.write("reply.wav", waveform.numpy(), 22050) | |
| return reply_text, "reply.wav" | |
| except Exception as e: | |
| return f"β Error: {str(e)}", None | |
| # Gradio UI | |
| iface = gr.Interface( | |
| fn=voice_agent, | |
| inputs=gr.Audio(type="filepath", label="ποΈ Record or Upload Your Voice"), | |
| outputs=[ | |
| gr.Text(label="π€ Gemini's Reply"), | |
| gr.Audio(label="π AI Voice Reply") | |
| ], | |
| title="π§ Voice AI Agent: SpeechBrain + Gemini", | |
| description="Talk to the AI! Free voice assistant using SpeechBrain + Gemini. Entirely open-source and runs on Hugging Face.", | |
| live=True | |
| ) | |
| iface.launch() | |