Spaces:

Krishna346
/

Youtube-summarizer-api

Running

App Files Files Community

bskrishna2006 commited on 7 days ago

Commit

0254d02

1 Parent(s): b4562f5

Add audio transcription endpoints for Railway integration

Browse files

Files changed (1) hide show

app.py +174 -0

app.py CHANGED Viewed

@@ -135,6 +135,180 @@ def warmup_models():
         }), 500
 # =============================================================================
 # TRANSCRIPT ENDPOINTS
 # =============================================================================

         }), 500
+# =============================================================================
+# AUDIO TRANSCRIPTION ENDPOINTS (for Railway integration)
+# =============================================================================
+@app.route('/api/transcribe-audio', methods=['POST'])
+def transcribe_audio():
+    """
+    Transcribe audio using Whisper.
+    Receives audio as base64 from Railway backend.
+    """
+    try:
+        data = request.get_json()
+        if not data or 'audio_base64' not in data:
+            return jsonify({
+                'error': 'Missing audio',
+                'message': 'Please provide audio_base64'
+            }), 400
+        import base64
+        import tempfile
+        # Decode audio
+        audio_data = base64.b64decode(data['audio_base64'])
+        # Save to temp file
+        with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f:
+            f.write(audio_data)
+            audio_path = f.name
+        try:
+            # Transcribe with Whisper
+            from services.speech_to_text import SpeechToTextService
+            stt = SpeechToTextService()
+            result = stt.transcribe_audio(audio_path)
+            return jsonify({
+                'success': True,
+                'transcript': result['text'],
+                'language': result['language'],
+                'word_count': len(result['text'].split())
+            }), 200
+        finally:
+            # Cleanup
+            import os
+            if os.path.exists(audio_path):
+                os.remove(audio_path)
+    except Exception as e:
+        logger.error(f"Audio transcription failed: {e}")
+        return jsonify({
+            'error': 'Transcription failed',
+            'message': str(e)
+        }), 500
+@app.route('/api/process-audio', methods=['POST'])
+def process_audio():
+    """
+    Full pipeline for audio: Whisper transcription → Translation → Summary.
+    Receives audio as base64 from Railway backend.
+    """
+    try:
+        data = request.get_json()
+        if not data or 'audio_base64' not in data:
+            return jsonify({
+                'error': 'Missing audio',
+                'message': 'Please provide audio_base64'
+            }), 400
+        import base64
+        import tempfile
+        video_id = data.get('video_id', 'unknown')
+        summary_type = data.get('summary_type', 'general')
+        target_language = data.get('target_language', 'eng')
+        # Decode audio
+        audio_data = base64.b64decode(data['audio_base64'])
+        # Save to temp file
+        with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f:
+            f.write(audio_data)
+            audio_path = f.name
+        try:
+            # Step 1: Transcribe with Whisper
+            logger.info("Transcribing audio with Whisper...")
+            from services.speech_to_text import SpeechToTextService
+            stt = SpeechToTextService()
+            whisper_result = stt.transcribe_audio(audio_path)
+            transcript = whisper_result['text']
+            original_language = whisper_result['language']
+            original_word_count = len(transcript.split())
+            logger.info(f"Transcription complete. Language: {original_language}")
+            # Step 2: Translate to English if needed
+            english_transcript = transcript
+            if not is_english(original_language):
+                logger.info("Translating to English...")
+                translation_service = get_translation_service()
+                english_transcript = translation_service.translate_to_english(
+                    transcript,
+                    original_language
+                )
+            # Step 3: Summarize
+            logger.info("Generating summary...")
+            summary = summarizer_service.summarize(
+                text=english_transcript,
+                summary_type=summary_type,
+                chunk_size=2500,
+                max_tokens=500
+            )
+            # Step 4: Translate summary to target language if needed
+            final_summary = summary
+            summary_language = "eng"
+            if not is_english(target_language):
+                logger.info(f"Translating summary to {target_language}...")
+                translation_service = get_translation_service()
+                final_summary = translation_service.translate_from_english(summary, target_language)
+                summary_language = target_language
+            # Calculate statistics
+            summary_word_count = len(final_summary.split())
+            compression_ratio = (summary_word_count / original_word_count) * 100 if original_word_count > 0 else 0
+            response = {
+                'success': True,
+                'video_id': video_id,
+                'original_language': original_language,
+                'original_language_name': get_language_name(original_language),
+                'transcript': transcript,
+                'transcript_source': 'whisper',
+                'summary': final_summary,
+                'summary_language': summary_language,
+                'summary_language_name': get_language_name(summary_language),
+                'statistics': {
+                    'original_word_count': original_word_count,
+                    'summary_word_count': summary_word_count,
+                    'compression_ratio': round(compression_ratio, 1),
+                    'reading_time_minutes': max(1, summary_word_count // 200)
+                }
+            }
+            if not is_english(original_language):
+                response['english_transcript'] = english_transcript
+            if not is_english(target_language):
+                response['english_summary'] = summary
+            logger.info("Audio processing complete!")
+            return jsonify(response), 200
+        finally:
+            # Cleanup
+            import os
+            if os.path.exists(audio_path):
+                os.remove(audio_path)
+    except Exception as e:
+        logger.error(f"Audio processing failed: {e}")
+        return jsonify({
+            'error': 'Processing failed',
+            'message': str(e)
+        }), 500
 # =============================================================================
 # TRANSCRIPT ENDPOINTS
 # =============================================================================