Update app.py
Browse files
app.py
CHANGED
|
@@ -2,7 +2,6 @@ import gradio as gr
|
|
| 2 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 3 |
import torch
|
| 4 |
import os
|
| 5 |
-
from compressed_tensors import load_compressed_model
|
| 6 |
|
| 7 |
# Set cache directory for Spaces
|
| 8 |
os.environ['TRANSFORMERS_CACHE'] = '/tmp/cache'
|
|
@@ -15,8 +14,8 @@ class HunyuanTranslator:
|
|
| 15 |
self._load_model()
|
| 16 |
|
| 17 |
def _load_model(self):
|
| 18 |
-
"""Load the pre-quantized FP8 model
|
| 19 |
-
print("Loading Hunyuan-MT FP8 model
|
| 20 |
|
| 21 |
try:
|
| 22 |
# Load tokenizer first
|
|
@@ -26,34 +25,23 @@ class HunyuanTranslator:
|
|
| 26 |
trust_remote_code=True
|
| 27 |
)
|
| 28 |
|
| 29 |
-
#
|
| 30 |
-
|
| 31 |
-
self.model =
|
| 32 |
self.model_name,
|
| 33 |
-
|
| 34 |
-
torch_dtype=torch.float16,
|
| 35 |
-
trust_remote_code=True
|
|
|
|
| 36 |
)
|
| 37 |
|
| 38 |
-
print("FP8 model loaded successfully
|
| 39 |
print(f"Model device: {self.model.device}")
|
| 40 |
print(f"Model dtype: {next(self.model.parameters()).dtype}")
|
| 41 |
|
| 42 |
except Exception as e:
|
| 43 |
-
print(f"Error loading model
|
| 44 |
-
|
| 45 |
-
try:
|
| 46 |
-
print("Trying standard loading as fallback...")
|
| 47 |
-
self.model = AutoModelForCausalLM.from_pretrained(
|
| 48 |
-
self.model_name,
|
| 49 |
-
device_map="auto",
|
| 50 |
-
torch_dtype=torch.float16,
|
| 51 |
-
trust_remote_code=True,
|
| 52 |
-
cache_dir='/tmp/cache'
|
| 53 |
-
)
|
| 54 |
-
print("Model loaded successfully with standard method!")
|
| 55 |
-
except Exception as e2:
|
| 56 |
-
raise Exception(f"Both Compressed Tensors and standard loading failed: {e2}")
|
| 57 |
|
| 58 |
def translate_ja_to_en(self, input_text: str) -> str:
|
| 59 |
"""Translate Japanese to English using FP8 model"""
|
|
@@ -136,7 +124,7 @@ def create_translation_interface():
|
|
| 136 |
print(f"Failed to initialize translator: {e}")
|
| 137 |
|
| 138 |
def translate_function(input_text):
|
| 139 |
-
return f"Model initialization failed: {str(e)}\n\nPlease check
|
| 140 |
|
| 141 |
# Custom CSS for better appearance
|
| 142 |
custom_css = """
|
|
@@ -168,7 +156,7 @@ def create_translation_interface():
|
|
| 168 |
gr.Markdown(
|
| 169 |
"""
|
| 170 |
# π―π΅ β πΊπΈ Japanese to English Translation
|
| 171 |
-
**Model:** `tencent/Hunyuan-MT-7B-fp8` β’ **Technology:**
|
| 172 |
|
| 173 |
*Fast, high-quality Japanese to English translation using optimized FP8 model*
|
| 174 |
"""
|
|
@@ -227,7 +215,7 @@ def create_translation_interface():
|
|
| 227 |
inputs=input_text,
|
| 228 |
outputs=output_text,
|
| 229 |
fn=translate_function,
|
| 230 |
-
cache_examples=
|
| 231 |
label="Click any example to try:"
|
| 232 |
)
|
| 233 |
|
|
@@ -260,15 +248,14 @@ def create_translation_interface():
|
|
| 260 |
|
| 261 |
**Model Details:**
|
| 262 |
- **Base Model**: Hunyuan-MT 7B
|
| 263 |
-
- **Quantization**: FP8 (8-bit floating point)
|
| 264 |
- **Memory Usage**: ~3-4GB
|
| 265 |
- **Specialization**: Japanese β English translation
|
| 266 |
|
| 267 |
**Optimization Features:**
|
| 268 |
- β
FP8 quantization for faster inference
|
| 269 |
-
- β
Compressed Tensors for efficient storage
|
| 270 |
- β
GPU acceleration support
|
| 271 |
-
- β
|
| 272 |
|
| 273 |
**Usage Tips:**
|
| 274 |
- Keep inputs under 1500 characters for best results
|
|
|
|
| 2 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 3 |
import torch
|
| 4 |
import os
|
|
|
|
| 5 |
|
| 6 |
# Set cache directory for Spaces
|
| 7 |
os.environ['TRANSFORMERS_CACHE'] = '/tmp/cache'
|
|
|
|
| 14 |
self._load_model()
|
| 15 |
|
| 16 |
def _load_model(self):
|
| 17 |
+
"""Load the pre-quantized FP8 model"""
|
| 18 |
+
print("Loading Hunyuan-MT FP8 model...")
|
| 19 |
|
| 20 |
try:
|
| 21 |
# Load tokenizer first
|
|
|
|
| 25 |
trust_remote_code=True
|
| 26 |
)
|
| 27 |
|
| 28 |
+
# For Compressed Tensors models, use the standard from_pretrained
|
| 29 |
+
# The quantization is automatically handled by the model files
|
| 30 |
+
self.model = AutoModelForCausalLM.from_pretrained(
|
| 31 |
self.model_name,
|
| 32 |
+
device_map="auto",
|
| 33 |
+
torch_dtype=torch.float16, # Use fp16 as base dtype
|
| 34 |
+
trust_remote_code=True,
|
| 35 |
+
cache_dir='/tmp/cache'
|
| 36 |
)
|
| 37 |
|
| 38 |
+
print("FP8 model loaded successfully!")
|
| 39 |
print(f"Model device: {self.model.device}")
|
| 40 |
print(f"Model dtype: {next(self.model.parameters()).dtype}")
|
| 41 |
|
| 42 |
except Exception as e:
|
| 43 |
+
print(f"Error loading model: {e}")
|
| 44 |
+
raise Exception(f"Could not load the Hunyuan-MT model: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
|
| 46 |
def translate_ja_to_en(self, input_text: str) -> str:
|
| 47 |
"""Translate Japanese to English using FP8 model"""
|
|
|
|
| 124 |
print(f"Failed to initialize translator: {e}")
|
| 125 |
|
| 126 |
def translate_function(input_text):
|
| 127 |
+
return f"Model initialization failed: {str(e)}\n\nPlease check the Space logs for details."
|
| 128 |
|
| 129 |
# Custom CSS for better appearance
|
| 130 |
custom_css = """
|
|
|
|
| 156 |
gr.Markdown(
|
| 157 |
"""
|
| 158 |
# π―π΅ β πΊπΈ Japanese to English Translation
|
| 159 |
+
**Model:** `tencent/Hunyuan-MT-7B-fp8` β’ **Technology:** FP8 Quantization
|
| 160 |
|
| 161 |
*Fast, high-quality Japanese to English translation using optimized FP8 model*
|
| 162 |
"""
|
|
|
|
| 215 |
inputs=input_text,
|
| 216 |
outputs=output_text,
|
| 217 |
fn=translate_function,
|
| 218 |
+
cache_examples=False,
|
| 219 |
label="Click any example to try:"
|
| 220 |
)
|
| 221 |
|
|
|
|
| 248 |
|
| 249 |
**Model Details:**
|
| 250 |
- **Base Model**: Hunyuan-MT 7B
|
| 251 |
+
- **Quantization**: FP8 (8-bit floating point)
|
| 252 |
- **Memory Usage**: ~3-4GB
|
| 253 |
- **Specialization**: Japanese β English translation
|
| 254 |
|
| 255 |
**Optimization Features:**
|
| 256 |
- β
FP8 quantization for faster inference
|
|
|
|
| 257 |
- β
GPU acceleration support
|
| 258 |
+
- β
Efficient memory usage
|
| 259 |
|
| 260 |
**Usage Tips:**
|
| 261 |
- Keep inputs under 1500 characters for best results
|