bskrishna2006
commited on
Commit
·
0254d02
1
Parent(s):
b4562f5
Add audio transcription endpoints for Railway integration
Browse files
app.py
CHANGED
|
@@ -135,6 +135,180 @@ def warmup_models():
|
|
| 135 |
}), 500
|
| 136 |
|
| 137 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
# =============================================================================
|
| 139 |
# TRANSCRIPT ENDPOINTS
|
| 140 |
# =============================================================================
|
|
|
|
| 135 |
}), 500
|
| 136 |
|
| 137 |
|
| 138 |
+
# =============================================================================
|
| 139 |
+
# AUDIO TRANSCRIPTION ENDPOINTS (for Railway integration)
|
| 140 |
+
# =============================================================================
|
| 141 |
+
|
| 142 |
+
@app.route('/api/transcribe-audio', methods=['POST'])
|
| 143 |
+
def transcribe_audio():
|
| 144 |
+
"""
|
| 145 |
+
Transcribe audio using Whisper.
|
| 146 |
+
Receives audio as base64 from Railway backend.
|
| 147 |
+
"""
|
| 148 |
+
try:
|
| 149 |
+
data = request.get_json()
|
| 150 |
+
|
| 151 |
+
if not data or 'audio_base64' not in data:
|
| 152 |
+
return jsonify({
|
| 153 |
+
'error': 'Missing audio',
|
| 154 |
+
'message': 'Please provide audio_base64'
|
| 155 |
+
}), 400
|
| 156 |
+
|
| 157 |
+
import base64
|
| 158 |
+
import tempfile
|
| 159 |
+
|
| 160 |
+
# Decode audio
|
| 161 |
+
audio_data = base64.b64decode(data['audio_base64'])
|
| 162 |
+
|
| 163 |
+
# Save to temp file
|
| 164 |
+
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f:
|
| 165 |
+
f.write(audio_data)
|
| 166 |
+
audio_path = f.name
|
| 167 |
+
|
| 168 |
+
try:
|
| 169 |
+
# Transcribe with Whisper
|
| 170 |
+
from services.speech_to_text import SpeechToTextService
|
| 171 |
+
stt = SpeechToTextService()
|
| 172 |
+
result = stt.transcribe_audio(audio_path)
|
| 173 |
+
|
| 174 |
+
return jsonify({
|
| 175 |
+
'success': True,
|
| 176 |
+
'transcript': result['text'],
|
| 177 |
+
'language': result['language'],
|
| 178 |
+
'word_count': len(result['text'].split())
|
| 179 |
+
}), 200
|
| 180 |
+
|
| 181 |
+
finally:
|
| 182 |
+
# Cleanup
|
| 183 |
+
import os
|
| 184 |
+
if os.path.exists(audio_path):
|
| 185 |
+
os.remove(audio_path)
|
| 186 |
+
|
| 187 |
+
except Exception as e:
|
| 188 |
+
logger.error(f"Audio transcription failed: {e}")
|
| 189 |
+
return jsonify({
|
| 190 |
+
'error': 'Transcription failed',
|
| 191 |
+
'message': str(e)
|
| 192 |
+
}), 500
|
| 193 |
+
|
| 194 |
+
|
| 195 |
+
@app.route('/api/process-audio', methods=['POST'])
|
| 196 |
+
def process_audio():
|
| 197 |
+
"""
|
| 198 |
+
Full pipeline for audio: Whisper transcription → Translation → Summary.
|
| 199 |
+
Receives audio as base64 from Railway backend.
|
| 200 |
+
"""
|
| 201 |
+
try:
|
| 202 |
+
data = request.get_json()
|
| 203 |
+
|
| 204 |
+
if not data or 'audio_base64' not in data:
|
| 205 |
+
return jsonify({
|
| 206 |
+
'error': 'Missing audio',
|
| 207 |
+
'message': 'Please provide audio_base64'
|
| 208 |
+
}), 400
|
| 209 |
+
|
| 210 |
+
import base64
|
| 211 |
+
import tempfile
|
| 212 |
+
|
| 213 |
+
video_id = data.get('video_id', 'unknown')
|
| 214 |
+
summary_type = data.get('summary_type', 'general')
|
| 215 |
+
target_language = data.get('target_language', 'eng')
|
| 216 |
+
|
| 217 |
+
# Decode audio
|
| 218 |
+
audio_data = base64.b64decode(data['audio_base64'])
|
| 219 |
+
|
| 220 |
+
# Save to temp file
|
| 221 |
+
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f:
|
| 222 |
+
f.write(audio_data)
|
| 223 |
+
audio_path = f.name
|
| 224 |
+
|
| 225 |
+
try:
|
| 226 |
+
# Step 1: Transcribe with Whisper
|
| 227 |
+
logger.info("Transcribing audio with Whisper...")
|
| 228 |
+
from services.speech_to_text import SpeechToTextService
|
| 229 |
+
stt = SpeechToTextService()
|
| 230 |
+
whisper_result = stt.transcribe_audio(audio_path)
|
| 231 |
+
|
| 232 |
+
transcript = whisper_result['text']
|
| 233 |
+
original_language = whisper_result['language']
|
| 234 |
+
original_word_count = len(transcript.split())
|
| 235 |
+
|
| 236 |
+
logger.info(f"Transcription complete. Language: {original_language}")
|
| 237 |
+
|
| 238 |
+
# Step 2: Translate to English if needed
|
| 239 |
+
english_transcript = transcript
|
| 240 |
+
|
| 241 |
+
if not is_english(original_language):
|
| 242 |
+
logger.info("Translating to English...")
|
| 243 |
+
translation_service = get_translation_service()
|
| 244 |
+
english_transcript = translation_service.translate_to_english(
|
| 245 |
+
transcript,
|
| 246 |
+
original_language
|
| 247 |
+
)
|
| 248 |
+
|
| 249 |
+
# Step 3: Summarize
|
| 250 |
+
logger.info("Generating summary...")
|
| 251 |
+
summary = summarizer_service.summarize(
|
| 252 |
+
text=english_transcript,
|
| 253 |
+
summary_type=summary_type,
|
| 254 |
+
chunk_size=2500,
|
| 255 |
+
max_tokens=500
|
| 256 |
+
)
|
| 257 |
+
|
| 258 |
+
# Step 4: Translate summary to target language if needed
|
| 259 |
+
final_summary = summary
|
| 260 |
+
summary_language = "eng"
|
| 261 |
+
|
| 262 |
+
if not is_english(target_language):
|
| 263 |
+
logger.info(f"Translating summary to {target_language}...")
|
| 264 |
+
translation_service = get_translation_service()
|
| 265 |
+
final_summary = translation_service.translate_from_english(summary, target_language)
|
| 266 |
+
summary_language = target_language
|
| 267 |
+
|
| 268 |
+
# Calculate statistics
|
| 269 |
+
summary_word_count = len(final_summary.split())
|
| 270 |
+
compression_ratio = (summary_word_count / original_word_count) * 100 if original_word_count > 0 else 0
|
| 271 |
+
|
| 272 |
+
response = {
|
| 273 |
+
'success': True,
|
| 274 |
+
'video_id': video_id,
|
| 275 |
+
'original_language': original_language,
|
| 276 |
+
'original_language_name': get_language_name(original_language),
|
| 277 |
+
'transcript': transcript,
|
| 278 |
+
'transcript_source': 'whisper',
|
| 279 |
+
'summary': final_summary,
|
| 280 |
+
'summary_language': summary_language,
|
| 281 |
+
'summary_language_name': get_language_name(summary_language),
|
| 282 |
+
'statistics': {
|
| 283 |
+
'original_word_count': original_word_count,
|
| 284 |
+
'summary_word_count': summary_word_count,
|
| 285 |
+
'compression_ratio': round(compression_ratio, 1),
|
| 286 |
+
'reading_time_minutes': max(1, summary_word_count // 200)
|
| 287 |
+
}
|
| 288 |
+
}
|
| 289 |
+
|
| 290 |
+
if not is_english(original_language):
|
| 291 |
+
response['english_transcript'] = english_transcript
|
| 292 |
+
if not is_english(target_language):
|
| 293 |
+
response['english_summary'] = summary
|
| 294 |
+
|
| 295 |
+
logger.info("Audio processing complete!")
|
| 296 |
+
return jsonify(response), 200
|
| 297 |
+
|
| 298 |
+
finally:
|
| 299 |
+
# Cleanup
|
| 300 |
+
import os
|
| 301 |
+
if os.path.exists(audio_path):
|
| 302 |
+
os.remove(audio_path)
|
| 303 |
+
|
| 304 |
+
except Exception as e:
|
| 305 |
+
logger.error(f"Audio processing failed: {e}")
|
| 306 |
+
return jsonify({
|
| 307 |
+
'error': 'Processing failed',
|
| 308 |
+
'message': str(e)
|
| 309 |
+
}), 500
|
| 310 |
+
|
| 311 |
+
|
| 312 |
# =============================================================================
|
| 313 |
# TRANSCRIPT ENDPOINTS
|
| 314 |
# =============================================================================
|