Spaces:
Running
Running
| """ | |
| translator.py | |
| 腾讯云 (批量接口+动态分包+多账号轮询) + 百度翻译 API 封装 | |
| ⚠️ 需在 HF 空间的 “Variables” 页设置以下环境变量 | |
| ------------------------------------------------------------------ | |
| TENCENT_CREDENTIALS_JSON 形如: | |
| [ | |
| {"secret_id": "AKIDxxxx", "secret_key": "yyyy"}, | |
| {"secret_id": "AKIDaaaa", "secret_key": "bbbb"} | |
| ] | |
| TENCENT_SECRET_ID (兼容旧配置) 单个 SecretId | |
| TENCENT_SECRET_KEY (兼容旧配置) 单个 SecretKey | |
| ------------------------------------------------------------------ | |
| BAIDU_CREDENTIALS_JSON 形如: | |
| [ | |
| {"app_id": "xxxx", "secret_key": "yyyy"} | |
| ] | |
| ------------------------------------------------------------------ | |
| """ | |
| import hashlib, hmac, json, os, random, time | |
| from datetime import datetime | |
| from typing import List, Sequence, Optional, Dict, Any | |
| import requests | |
| _tencent_creds_list = json.loads(os.environ.get("TENCENT_CREDENTIALS_JSON", "[]")) | |
| # 兼容旧的单账号配置 | |
| _legacy_id = os.environ.get("TENCENT_SECRET_ID") | |
| _legacy_key = os.environ.get("TENCENT_SECRET_KEY") | |
| if _legacy_id and _legacy_key: | |
| if not any(c.get("secret_id") == _legacy_id for c in _tencent_creds_list): | |
| _tencent_creds_list.append({"secret_id": _legacy_id, "secret_key": _legacy_key}) | |
| TENCENT_TRANSLATE_URL = os.environ.get("TENCENT_TRANSLATE_URL", "https://tmt.tencentcloudapi.com") | |
| _tencent_idx: int = 0 | |
| BAIDU_TRANSLATE_URL = os.environ.get("BAIDU_TRANSLATE_URL", "https://fanyi-api.baidu.com/api/trans/vip/translate") | |
| _baidu_creds_list = json.loads(os.environ.get("BAIDU_CREDENTIALS_JSON", "[]")) | |
| # 全局索引,用于轮询 | |
| _baidu_idx: int = 0 | |
| # 腾讯云翻译逻辑 (批量接口 TextTranslateBatch) | |
| def _sign(key: bytes, msg: str) -> bytes: | |
| return hmac.new(key, msg.encode("utf-8"), hashlib.sha256).digest() | |
| def _tc3_signature(secret_key: str, date: str, service: str, string_to_sign: str) -> str: | |
| secret_date = _sign(("TC3" + secret_key).encode(), date) | |
| secret_service = _sign(secret_date, service) | |
| secret_signing = _sign(secret_service, "tc3_request") | |
| return hmac.new(secret_signing, string_to_sign.encode("utf-8"), hashlib.sha256).hexdigest() | |
| def _call_tencent_batch_once(cred: Dict[str, str], text_list: List[str], src: str, tgt: str) -> List[str]: | |
| """ | |
| 调用腾讯云 TextTranslateBatch 接口 | |
| """ | |
| secret_id = cred["secret_id"] | |
| secret_key = cred["secret_key"] | |
| service = "tmt" | |
| host = "tmt.tencentcloudapi.com" | |
| action = "TextTranslateBatch" | |
| version = "2018-03-21" | |
| region = "ap-beijing" | |
| ts = int(time.time()) | |
| date = datetime.utcfromtimestamp(ts).strftime("%Y-%m-%d") | |
| algorithm = "TC3-HMAC-SHA256" | |
| payload = { | |
| "SourceTextList": text_list, | |
| "Source": src, | |
| "Target": tgt, | |
| "ProjectId": 0, | |
| } | |
| payload_str = json.dumps(payload, ensure_ascii=False) | |
| canonical_request = "\n".join([ | |
| "POST", | |
| "/", | |
| "", | |
| f"content-type:application/json; charset=utf-8\nhost:{host}\nx-tc-action:{action.lower()}\n", | |
| "content-type;host;x-tc-action", | |
| hashlib.sha256(payload_str.encode()).hexdigest(), | |
| ]) | |
| credential_scope = f"{date}/{service}/tc3_request" | |
| string_to_sign = "\n".join([ | |
| algorithm, str(ts), credential_scope, | |
| hashlib.sha256(canonical_request.encode()).hexdigest(), | |
| ]) | |
| signature = _tc3_signature(secret_key, date, service, string_to_sign) | |
| authorization = ( | |
| f"{algorithm} Credential={secret_id}/{credential_scope}, " | |
| f"SignedHeaders=content-type;host;x-tc-action, Signature={signature}" | |
| ) | |
| headers = { | |
| "Authorization": authorization, | |
| "Content-Type": "application/json; charset=utf-8", | |
| "Host": host, | |
| "X-TC-Action": action, | |
| "X-TC-Timestamp": str(ts), | |
| "X-TC-Version": version, | |
| "X-TC-Region": region, | |
| } | |
| resp = requests.post(TENCENT_TRANSLATE_URL, headers=headers, data=payload_str, timeout=8) | |
| resp.raise_for_status() | |
| data = resp.json() | |
| if "Response" in data and "Error" in data["Response"]: | |
| err_code = data["Response"]["Error"].get("Code", "") | |
| err_msg = data["Response"]["Error"].get("Message", "") | |
| raise Exception(f"Tencent Biz Error: {err_code} - {err_msg}") | |
| return data["Response"]["TargetTextList"] | |
| def _translate_with_tencent_pool(texts: Sequence[str], src="auto", tgt="zh") -> Optional[List[str]]: | |
| """ | |
| 腾讯云入口: | |
| 1. 动态分包:同时考虑字符数限制 (<6000) 和 条数限制。 | |
| - 累计字符数 < 5000 (安全阈值) | |
| - 单批次条数 < 50 (安全阈值) | |
| 2. 账号轮询 (Polling):每组请求如果失败,会自动换号重试。 | |
| """ | |
| global _tencent_idx, _tencent_creds_list | |
| if not _tencent_creds_list: | |
| return None | |
| # 配置安全阈值 | |
| MAX_CHARS_PER_BATCH = 5000 # 官方限制 6000,留 1000 buffer | |
| MAX_ITEMS_PER_BATCH = 50 # 避免单次数组过大 | |
| chunks = [] | |
| current_chunk = [] | |
| current_char_count = 0 | |
| for text in texts: | |
| text_len = len(text) | |
| # 检查加入当前文本是否会超限 | |
| if current_chunk and ( | |
| (current_char_count + text_len > MAX_CHARS_PER_BATCH) or | |
| (len(current_chunk) >= MAX_ITEMS_PER_BATCH) | |
| ): | |
| # 结算当前块 | |
| chunks.append(current_chunk) | |
| current_chunk = [] | |
| current_char_count = 0 | |
| current_chunk.append(text) | |
| current_char_count += text_len | |
| # 处理剩余的最后一块 | |
| if current_chunk: | |
| chunks.append(current_chunk) | |
| all_results = [] | |
| for chunk in chunks: | |
| chunk_success = False | |
| attempts = len(_tencent_creds_list) | |
| for _ in range(attempts): | |
| cred = _tencent_creds_list[_tencent_idx] | |
| _tencent_idx = (_tencent_idx + 1) % len(_tencent_creds_list) | |
| try: | |
| res = _call_tencent_batch_once(cred, list(chunk), src, tgt) | |
| all_results.extend(res) | |
| chunk_success = True | |
| break # 成功则跳出重试 | |
| except Exception as e: | |
| safe_id = cred['secret_id'][:4] + "****" | |
| print(f"[translator] Tencent ID {safe_id} failed on batch: {e}. Switching...") | |
| continue | |
| if not chunk_success: | |
| print("[translator] All Tencent credentials failed for a batch. Falling back to Baidu.") | |
| return None # 只要有一个分片失败,整体降级,保证一致性 | |
| return all_results | |
| # 百度翻译逻辑 | |
| def _translate_with_baidu_pool(texts: Sequence[str], src="auto", tgt="zh") -> Optional[List[str]]: | |
| global _baidu_idx, _baidu_creds_list | |
| if not _baidu_creds_list: | |
| return None | |
| cred = _baidu_creds_list[_baidu_idx] | |
| _baidu_idx = (_baidu_idx + 1) % len(_baidu_creds_list) | |
| app_id, secret_key = cred["app_id"], cred["secret_key"] | |
| salt = random.randint(32768, 65536) | |
| query = "\n".join(texts) | |
| sign = hashlib.md5((app_id + query + str(salt) + secret_key).encode()).hexdigest() | |
| params = { | |
| "q": query, "from": src, "to": tgt, | |
| "appid": app_id, "salt": salt, "sign": sign, | |
| } | |
| try: | |
| resp = requests.get(BAIDU_TRANSLATE_URL, params=params, timeout=8) | |
| resp.raise_for_status() | |
| data = resp.json() | |
| if "error_code" in data: | |
| raise Exception(f"Baidu Biz Error: {data['error_code']} - {data.get('error_msg')}") | |
| return [item["dst"] for item in data["trans_result"]] | |
| except Exception as e: | |
| print(f"[translator] Baidu API error → {e}") | |
| return None | |
| # 对外统一入口 | |
| def translate_texts(texts: Sequence[str], | |
| src_lang: str = "auto", | |
| tgt_lang: str = "zh") -> List[str]: | |
| """ | |
| 逻辑: | |
| 1. 尝试腾讯云 (批量接口 + 多账号轮询) | |
| 2. 失败降级到百度云 | |
| 3. 还失败返回原文 | |
| """ | |
| if not texts: | |
| return [] | |
| # 1. 优先尝试腾讯云 | |
| out = _translate_with_tencent_pool(texts, src_lang, tgt_lang) | |
| # 2. 失败降级到百度 | |
| if out is None: | |
| out = _translate_with_baidu_pool(texts, src_lang, tgt_lang) | |
| return out or list(texts) |