Image_Inversion / translator.py
IdlecloudX's picture
Update translator.py
329cf76 verified
"""
translator.py
腾讯云 (批量接口+动态分包+多账号轮询) + 百度翻译 API 封装
⚠️ 需在 HF 空间的 “Variables” 页设置以下环境变量
------------------------------------------------------------------
TENCENT_CREDENTIALS_JSON 形如:
[
{"secret_id": "AKIDxxxx", "secret_key": "yyyy"},
{"secret_id": "AKIDaaaa", "secret_key": "bbbb"}
]
TENCENT_SECRET_ID (兼容旧配置) 单个 SecretId
TENCENT_SECRET_KEY (兼容旧配置) 单个 SecretKey
------------------------------------------------------------------
BAIDU_CREDENTIALS_JSON 形如:
[
{"app_id": "xxxx", "secret_key": "yyyy"}
]
------------------------------------------------------------------
"""
import hashlib, hmac, json, os, random, time
from datetime import datetime
from typing import List, Sequence, Optional, Dict, Any
import requests
_tencent_creds_list = json.loads(os.environ.get("TENCENT_CREDENTIALS_JSON", "[]"))
# 兼容旧的单账号配置
_legacy_id = os.environ.get("TENCENT_SECRET_ID")
_legacy_key = os.environ.get("TENCENT_SECRET_KEY")
if _legacy_id and _legacy_key:
if not any(c.get("secret_id") == _legacy_id for c in _tencent_creds_list):
_tencent_creds_list.append({"secret_id": _legacy_id, "secret_key": _legacy_key})
TENCENT_TRANSLATE_URL = os.environ.get("TENCENT_TRANSLATE_URL", "https://tmt.tencentcloudapi.com")
_tencent_idx: int = 0
BAIDU_TRANSLATE_URL = os.environ.get("BAIDU_TRANSLATE_URL", "https://fanyi-api.baidu.com/api/trans/vip/translate")
_baidu_creds_list = json.loads(os.environ.get("BAIDU_CREDENTIALS_JSON", "[]"))
# 全局索引,用于轮询
_baidu_idx: int = 0
# 腾讯云翻译逻辑 (批量接口 TextTranslateBatch)
def _sign(key: bytes, msg: str) -> bytes:
return hmac.new(key, msg.encode("utf-8"), hashlib.sha256).digest()
def _tc3_signature(secret_key: str, date: str, service: str, string_to_sign: str) -> str:
secret_date = _sign(("TC3" + secret_key).encode(), date)
secret_service = _sign(secret_date, service)
secret_signing = _sign(secret_service, "tc3_request")
return hmac.new(secret_signing, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest()
def _call_tencent_batch_once(cred: Dict[str, str], text_list: List[str], src: str, tgt: str) -> List[str]:
"""
调用腾讯云 TextTranslateBatch 接口
"""
secret_id = cred["secret_id"]
secret_key = cred["secret_key"]
service = "tmt"
host = "tmt.tencentcloudapi.com"
action = "TextTranslateBatch"
version = "2018-03-21"
region = "ap-beijing"
ts = int(time.time())
date = datetime.utcfromtimestamp(ts).strftime("%Y-%m-%d")
algorithm = "TC3-HMAC-SHA256"
payload = {
"SourceTextList": text_list,
"Source": src,
"Target": tgt,
"ProjectId": 0,
}
payload_str = json.dumps(payload, ensure_ascii=False)
canonical_request = "\n".join([
"POST",
"/",
"",
f"content-type:application/json; charset=utf-8\nhost:{host}\nx-tc-action:{action.lower()}\n",
"content-type;host;x-tc-action",
hashlib.sha256(payload_str.encode()).hexdigest(),
])
credential_scope = f"{date}/{service}/tc3_request"
string_to_sign = "\n".join([
algorithm, str(ts), credential_scope,
hashlib.sha256(canonical_request.encode()).hexdigest(),
])
signature = _tc3_signature(secret_key, date, service, string_to_sign)
authorization = (
f"{algorithm} Credential={secret_id}/{credential_scope}, "
f"SignedHeaders=content-type;host;x-tc-action, Signature={signature}"
)
headers = {
"Authorization": authorization,
"Content-Type": "application/json; charset=utf-8",
"Host": host,
"X-TC-Action": action,
"X-TC-Timestamp": str(ts),
"X-TC-Version": version,
"X-TC-Region": region,
}
resp = requests.post(TENCENT_TRANSLATE_URL, headers=headers, data=payload_str, timeout=8)
resp.raise_for_status()
data = resp.json()
if "Response" in data and "Error" in data["Response"]:
err_code = data["Response"]["Error"].get("Code", "")
err_msg = data["Response"]["Error"].get("Message", "")
raise Exception(f"Tencent Biz Error: {err_code} - {err_msg}")
return data["Response"]["TargetTextList"]
def _translate_with_tencent_pool(texts: Sequence[str], src="auto", tgt="zh") -> Optional[List[str]]:
"""
腾讯云入口:
1. 动态分包:同时考虑字符数限制 (<6000) 和 条数限制。
- 累计字符数 < 5000 (安全阈值)
- 单批次条数 < 50 (安全阈值)
2. 账号轮询 (Polling):每组请求如果失败,会自动换号重试。
"""
global _tencent_idx, _tencent_creds_list
if not _tencent_creds_list:
return None
# 配置安全阈值
MAX_CHARS_PER_BATCH = 5000 # 官方限制 6000,留 1000 buffer
MAX_ITEMS_PER_BATCH = 50 # 避免单次数组过大
chunks = []
current_chunk = []
current_char_count = 0
for text in texts:
text_len = len(text)
# 检查加入当前文本是否会超限
if current_chunk and (
(current_char_count + text_len > MAX_CHARS_PER_BATCH) or
(len(current_chunk) >= MAX_ITEMS_PER_BATCH)
):
# 结算当前块
chunks.append(current_chunk)
current_chunk = []
current_char_count = 0
current_chunk.append(text)
current_char_count += text_len
# 处理剩余的最后一块
if current_chunk:
chunks.append(current_chunk)
all_results = []
for chunk in chunks:
chunk_success = False
attempts = len(_tencent_creds_list)
for _ in range(attempts):
cred = _tencent_creds_list[_tencent_idx]
_tencent_idx = (_tencent_idx + 1) % len(_tencent_creds_list)
try:
res = _call_tencent_batch_once(cred, list(chunk), src, tgt)
all_results.extend(res)
chunk_success = True
break # 成功则跳出重试
except Exception as e:
safe_id = cred['secret_id'][:4] + "****"
print(f"[translator] Tencent ID {safe_id} failed on batch: {e}. Switching...")
continue
if not chunk_success:
print("[translator] All Tencent credentials failed for a batch. Falling back to Baidu.")
return None # 只要有一个分片失败,整体降级,保证一致性
return all_results
# 百度翻译逻辑
def _translate_with_baidu_pool(texts: Sequence[str], src="auto", tgt="zh") -> Optional[List[str]]:
global _baidu_idx, _baidu_creds_list
if not _baidu_creds_list:
return None
cred = _baidu_creds_list[_baidu_idx]
_baidu_idx = (_baidu_idx + 1) % len(_baidu_creds_list)
app_id, secret_key = cred["app_id"], cred["secret_key"]
salt = random.randint(32768, 65536)
query = "\n".join(texts)
sign = hashlib.md5((app_id + query + str(salt) + secret_key).encode()).hexdigest()
params = {
"q": query, "from": src, "to": tgt,
"appid": app_id, "salt": salt, "sign": sign,
}
try:
resp = requests.get(BAIDU_TRANSLATE_URL, params=params, timeout=8)
resp.raise_for_status()
data = resp.json()
if "error_code" in data:
raise Exception(f"Baidu Biz Error: {data['error_code']} - {data.get('error_msg')}")
return [item["dst"] for item in data["trans_result"]]
except Exception as e:
print(f"[translator] Baidu API error → {e}")
return None
# 对外统一入口
def translate_texts(texts: Sequence[str],
src_lang: str = "auto",
tgt_lang: str = "zh") -> List[str]:
"""
逻辑:
1. 尝试腾讯云 (批量接口 + 多账号轮询)
2. 失败降级到百度云
3. 还失败返回原文
"""
if not texts:
return []
# 1. 优先尝试腾讯云
out = _translate_with_tencent_pool(texts, src_lang, tgt_lang)
# 2. 失败降级到百度
if out is None:
out = _translate_with_baidu_pool(texts, src_lang, tgt_lang)
return out or list(texts)