dilaksh06 commited on
Commit
6e4990c
Β·
1 Parent(s): 96d63fd
Files changed (2) hide show
  1. app.py +17 -59
  2. requirements.txt +3 -5
app.py CHANGED
@@ -1,45 +1,23 @@
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
3
 
4
- # -------------------------------
5
- # CONFIGURATION
6
- # -------------------------------
7
- MODEL_NAME = "ai4bharat/indictrans2-m2m-1B"
8
 
9
- # -------------------------------
10
- # LOAD MODEL & TOKENIZER
11
- # -------------------------------
12
- print("πŸ“₯ Loading model... This may take a moment.")
13
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
14
  model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME, trust_remote_code=True)
15
- print("βœ… Model loaded successfully!")
16
 
17
- # -------------------------------
18
- # TRANSLATION FUNCTION
19
- # -------------------------------
20
  def translate(text: str, src_lang: str, tgt_lang: str) -> str:
21
- """
22
- Translate text from `src_lang` to `tgt_lang` using IndicTrans2.
23
-
24
- Args:
25
- text (str): Input text to translate.
26
- src_lang (str): Source language code (e.g., 'ta', 'en', 'hi').
27
- tgt_lang (str): Target language code (e.g., 'en', 'ta', 'fr').
28
-
29
- Returns:
30
- str: Translated text or error message.
31
- """
32
  if not text.strip():
33
  return "⚠️ Please enter some text to translate."
34
 
35
- if not src_lang.strip() or not tgt_lang.strip():
36
- return "⚠️ Please provide both source and target language codes."
37
-
38
  src_lang = src_lang.strip().lower()
39
  tgt_lang = tgt_lang.strip().lower()
40
 
41
  try:
42
- # Format input for IndicTrans2
43
  formatted_text = f"{src_lang}>>{tgt_lang} {text}"
44
  inputs = tokenizer(formatted_text, return_tensors="pt")
45
 
@@ -47,45 +25,25 @@ def translate(text: str, src_lang: str, tgt_lang: str) -> str:
47
  output_tokens = model.generate(**inputs, max_length=512)
48
  translation = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
49
 
50
- return translation.strip()
51
-
52
  except Exception as e:
53
- return f"❌ Error during translation: {str(e)}"
54
 
55
- # -------------------------------
56
- # GRADIO UI
57
- # -------------------------------
58
  demo = gr.Interface(
59
  fn=translate,
60
  inputs=[
61
- gr.Textbox(
62
- label="Text",
63
- placeholder="Enter your text here...",
64
- lines=4
65
- ),
66
- gr.Textbox(
67
- label="Source Language Code (e.g., ta, en, hi)",
68
- placeholder="ta"
69
- ),
70
- gr.Textbox(
71
- label="Target Language Code (e.g., en, ta, fr)",
72
- placeholder="en"
73
- )
74
  ],
75
- outputs=gr.Textbox(
76
- label="Translated Text",
77
- lines=4
78
- ),
79
- title="🌐 IndicTrans2 Language Translator",
80
  description=(
81
- "Translate between Indian and international languages using "
82
- "[ai4bharat/indictrans2-m2m-1B](https://huggingface.co/ai4bharat/indictrans2-m2m-1B)."
83
- ),
84
- allow_flagging="never"
85
  )
86
 
87
- # -------------------------------
88
- # RUN APP
89
- # -------------------------------
90
  if __name__ == "__main__":
91
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
3
 
4
+ # βœ… Use valid model
5
+ MODEL_NAME = "ai4bharat/indictrans2-indic-indic-1B"
 
 
6
 
7
+ # Load tokenizer and model
 
 
 
8
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
9
  model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME, trust_remote_code=True)
 
10
 
 
 
 
11
  def translate(text: str, src_lang: str, tgt_lang: str) -> str:
12
+ """Translate text from src_lang to tgt_lang using IndicTrans2."""
 
 
 
 
 
 
 
 
 
 
13
  if not text.strip():
14
  return "⚠️ Please enter some text to translate."
15
 
 
 
 
16
  src_lang = src_lang.strip().lower()
17
  tgt_lang = tgt_lang.strip().lower()
18
 
19
  try:
20
+ # Format input as required by IndicTrans2
21
  formatted_text = f"{src_lang}>>{tgt_lang} {text}"
22
  inputs = tokenizer(formatted_text, return_tensors="pt")
23
 
 
25
  output_tokens = model.generate(**inputs, max_length=512)
26
  translation = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
27
 
28
+ return translation
 
29
  except Exception as e:
30
+ return f"❌ Error: {str(e)}"
31
 
32
+ # Gradio interface
 
 
33
  demo = gr.Interface(
34
  fn=translate,
35
  inputs=[
36
+ gr.Textbox(label="Text", placeholder="Enter your text here..."),
37
+ gr.Textbox(label="Source Language Code (e.g., ta, hi, kn)", placeholder="ta"),
38
+ gr.Textbox(label="Target Language Code (e.g., en, hi, kn)", placeholder="en")
 
 
 
 
 
 
 
 
 
 
39
  ],
40
+ outputs=gr.Textbox(label="Translated Text"),
41
+ title="IndicTrans2 Language Translator",
 
 
 
42
  description=(
43
+ "🌐 Translate text between Indian languages using "
44
+ "[ai4bharat/indictrans2-indic-indic-1B](https://huggingface.co/ai4bharat/indictrans2-indic-indic-1B)."
45
+ )
 
46
  )
47
 
 
 
 
48
  if __name__ == "__main__":
49
+ demo.launch()
requirements.txt CHANGED
@@ -1,5 +1,3 @@
1
- transformers>=4.39.0
2
- torch>=2.0.0
3
- sentencepiece
4
- protobuf
5
- gradio>=4.0.0
 
1
+ gradio>=5.0
2
+ transformers>=4.40
3
+ torch>=2.1