""" Voice Agent Gradio Application Web interface for the Voice Agent with microphone support """ import gradio as gr import asyncio import logging import os from .voice_agent import VoiceAgent class VoiceApp: """Gradio web application for Voice Agent.""" def __init__(self): self.agent = VoiceAgent() self.conversation_history = [] # Set up logging logging.basicConfig(level=logging.INFO) # Create the interface self.interface = self._create_interface() def _create_interface(self): """Create the Gradio interface.""" with gr.Blocks( title="šŸŽ¤ Voice Agent - Secure AI Suite", theme=gr.themes.Soft( primary_hue="orange", secondary_hue="gray", neutral_hue="slate" ), css=""" .container { max-width: 1200px; margin: auto; } .chatbot { height: 500px; } .status-card { background: linear-gradient(90deg, #fa709a 0%, #fee140 100%); color: white; } .tool-card { border: 2px solid #e2e8f0; border-radius: 8px; padding: 12px; margin: 8px 0; } .audio-controls { text-align: center; padding: 20px; background: #f8fafc; border-radius: 8px; } """ ) as app: # Header gr.HTML("""

šŸŽ¤ Voice Agent

Speech-to-AI & Text-to-Speech with Multi-modal Processing

šŸ” Secure AI Agents Suite

""") with gr.Row(): # Left column - Voice interface with gr.Column(scale=2): gr.HTML("

šŸŽ™ļø Voice Interaction

") # Audio input/output section with gr.Column(): gr.HTML("
") gr.HTML("

šŸŽ™ļø Record Your Voice

") audio_input = gr.Audio( label="Click to record or upload audio file", type="filepath", format="mp3", elem_classes=["audio-input"] ) gr.HTML("

šŸ—£ļø AI Response (Audio)

") audio_output = gr.Audio( label="AI response will appear here", type="numpy", elem_classes=["audio-output"] ) gr.HTML("
") gr.HTML("

šŸ’¬ Text Chat with Voice Features

") chatbot = gr.Chatbot( label="Voice Assistant Chat", height=300, elem_classes=["chatbot"], avatar_images=(None, "šŸŽ¤") ) with gr.Row(): msg_input = gr.Textbox( placeholder="Type or use voice input. Try: 'Transcribe this audio' or 'Say hello in a female voice'...", lines=2, max_lines=4, label="Your Message" ) with gr.Column(scale=0): send_btn = gr.Button("Send", variant="primary") clear_btn = gr.Button("Clear", variant="secondary") # Right column - Voice Tools and Settings with gr.Column(scale=1): gr.HTML("

šŸ› ļø Voice Services

") tools_info = gr.HTML("""

šŸŽ™ļø Speech-to-Text

• Whisper transcription
• Multi-language support
• High accuracy

šŸ—£ļø Text-to-Speech

• ElevenLabs synthesis
• Natural voices
• Emotional expression

šŸ’¬ Voice Conversation

• Full-duplex chat
• Real-time processing
• Context awareness

šŸŒ Multilingual

• 5+ languages
• Auto-detection
• Cultural adaptation

""") gr.HTML("

šŸŽ›ļø Voice Settings

") with gr.Row(): voice_select = gr.Dropdown( choices=["Adam (Male)", "Rachel (Female)", "Cloyd (Deep)", "Custom"], value="Adam (Male)", label="Voice Selection" ) speed_slider = gr.Slider(0.5, 2.0, value=1.0, step=0.1, label="Speech Speed") gr.HTML("

šŸ“Š System Status

") status_display = gr.HTML() # Event handlers def user(user_message, history): """Handle user input.""" if not user_message.strip(): return history, "" # Add user message to history history.append((user_message, None)) return history, "" async def bot_response(history, user_message): """Generate bot response.""" if not user_message.strip(): return history # Get response from agent response = await self.agent.handle_user_input(user_message) # Add bot response to history history[-1] = (user_message, response) return history async def process_audio(audio_file): """Process uploaded or recorded audio.""" if not audio_file: return None, "No audio file provided" try: # Process audio with voice agent response = await self.agent.handle_user_input("process this audio file") return audio_file, response except Exception as e: return audio_file, f"Error processing audio: {str(e)}" async def text_to_speech(text, voice_style, speed): """Convert text to speech.""" if not text.strip(): return None, "No text provided" try: # Process with voice synthesis voice_prompt = f"speak: {text} with {voice_style} voice at {speed}x speed" response = await self.agent.handle_user_input(voice_prompt) # Generate mock audio file path audio_path = f"temp_audio_{hash(text)}.mp3" return audio_path, response except Exception as e: return None, f"Error generating speech: {str(e)}" def clear_conversation(): """Clear conversation history.""" return [] def update_status(): """Update status display.""" status = self.agent.get_status() voice_settings = self.agent.config.get("voice_settings", {}) return f"""

āœ… Voice System Status

Agent: {status['name']}

Status: {status['status']}

Whisper: {voice_settings.get('whisper_model', 'whisper-1')}

ElevenLabs: Active

Languages: 5+ supported

Security: {'šŸ›”ļø Enabled' if status['security_enabled'] else 'āŒ Disabled'}

""" # Connect events send_btn.click( user, inputs=[msg_input, chatbot], outputs=[chatbot, msg_input] ).then( bot_response, inputs=[chatbot, msg_input], outputs=[chatbot] ) msg_input.submit( user, inputs=[msg_input, chatbot], outputs=[chatbot, msg_input] ).then( bot_response, inputs=[chatbot, msg_input], outputs=[chatbot] ) # Audio processing audio_input.change( process_audio, inputs=[audio_input], outputs=[audio_output, chatbot] ) # Text-to-speech generation def generate_speech(text, voice, speed): return text_to_speech(text, voice, speed) clear_btn.click(clear_conversation, outputs=chatbot) # Initial status update app.load(update_status, outputs=status_display) return app def launch(self, **kwargs): """Launch the Gradio application.""" self.interface.launch( server_name="0.0.0.0", server_port=7863, share=False, show_error=True, quiet=False, **kwargs ) # Example usage and quick commands EXAMPLE_QUERIES = [ "Transcribe this audio file", "Say 'Hello, welcome to our voice AI' in a female voice", "Start a voice conversation", "Analyze the sentiment of this audio", "Search for meeting recordings about project updates", "Enable multilingual voice mode" ] def main(): """Main function to run the Voice Agent app.""" print("šŸŽ¤ Starting Voice Agent...") print("šŸŽ™ļø Initializing Whisper (Speech-to-Text)...") print("šŸ—£ļø Loading ElevenLabs (Text-to-Speech)...") print("🧠 Connecting AI models (GPT-4o, Gemini)...") print("šŸŒ Setting up multilingual support...") app = VoiceApp() print("\n" + "="*60) print("šŸŽ¤ VOICE AGENT - SPEECH PROCESSING SUITE") print("="*60) print("\nšŸ’” Example voice requests you can try:") for i, query in enumerate(EXAMPLE_QUERIES, 1): print(f" {i}. {query}") print("\nšŸŽ™ļø Features:") print(" • Record your voice or upload audio files") print(" • Convert text to natural-sounding speech") print(" • Full voice conversations with AI") print(" • Multi-language support (English, Spanish, Nepali, etc.)") print("\n🌐 Starting Gradio server...") print("šŸ”— Open your browser to: http://localhost:7863") print("\n" + "="*60) app.launch() if __name__ == "__main__": main()