import gradio as gr from transformers import AutoTokenizer, AutoModelForSeq2SeqLM # Model MODEL_NAME = "ai4bharat/indictrans2-indic-indic-1B" # Load tokenizer and model tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True) model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME, trust_remote_code=True) # Supported languages: full name -> code LANGUAGES = { "Assamese": "asm", "Bengali": "ben", "Gujarati": "guj", "Hindi": "hin", "Kannada": "kan", "Malayalam": "mal", "Marathi": "mar", "Odia": "ori", "Punjabi": "pan", "Tamil": "tam", "Telugu": "tel", "Urdu": "urd", "English": "eng" } def translate(text: str, src_lang_name: str, tgt_lang_name: str) -> str: if not text.strip(): return "⚠️ Please enter some text to translate." try: # Convert full name to code src_lang = LANGUAGES[src_lang_name] tgt_lang = LANGUAGES[tgt_lang_name] formatted_text = f"{src_lang}>>{tgt_lang} {text}" inputs = tokenizer(formatted_text, return_tensors="pt") output_tokens = model.generate(**inputs, max_length=512) translation = tokenizer.decode(output_tokens[0], skip_special_tokens=True) return translation except Exception as e: return f"❌ Error: {str(e)}" # Gradio interface demo = gr.Interface( fn=translate, inputs=[ gr.Textbox(label="Text", placeholder="Enter your text here..."), gr.Dropdown(label="Source Language", choices=list(LANGUAGES.keys()), value="Tamil"), gr.Dropdown(label="Target Language", choices=list(LANGUAGES.keys()), value="English") ], outputs=gr.Textbox(label="Translated Text"), title="IndicTrans2 Language Translator", description=( "🌐 Translate text between Indian languages (and English) using " "[ai4bharat/indictrans2-indic-indic-1B](https://huggingface.co/ai4bharat/indictrans2-indic-indic-1B)." ) ) if __name__ == "__main__": demo.launch()