本脚本调用有道的json实现翻译,不过缺点是大批量翻译时容易触发防爬虫规则,实际测试为:500条以上 易会触发规则
解决方法:
1.换IP(有效),本人采用的是跳板路由+VPN来解决
2.使用代理(未测试)
使用方法:
将需要逐行翻译的句子放在一行显示,存入hack.txt,运行即可
代码:
import requests,re
from fake_useragent import UserAgent
import json
a=UserAgent()
input_s=[]
ends=[]
lines=open('hack.txt')
for line in lines:
print(line)
input_s.append(line)
url = '/translate?smartresult=dict&smartresult=rule'
headers = {
'User-Agent':a.random
}
for input in input_s:
data = {
'i': input,
'from': 'AUTO',
'to': 'AUTO',
'smartresult': 'dict',
'client': 'fanyideskweb',
'salt': '15598805838805',
'sign': '601e9410133b355529e58d23a6c60578',
'ts': '1559880583880',
'bv': '565657d9b2f836d2c4c3a1fd81d7b3c3;',
'doctype': 'json',
'version': '2.1',
'keyfrom': 'fanyi.web',
'action': 'FY_BY_CLICKBUTTION'
}
response = requests.post(url, data=data, headers=headers)
response.encoding=response.apparent_encoding
html_str = response.content.decode()
#将字符串转换成字典
dict_json = json.loads(html_str)
#打印转换之后的数据以及数据类型
print(dict_json)
#获取翻译结果
try:
ret_1 = dict_json['translateResult'][0][0]['tgt']
ret_2 = dict_json['translateResult'][0][1]['tgt']
ret_3 = dict_json['translateResult'][0][2]['tgt']
print('翻译结果是:',ret_1+','+ret_2+','+ret_3)
okok=open('hack2.txt','a+')
okok.write(ret_1+','+ret_2+','+ret_3+' ')
okok.close()
except:
try:
ret_1 = dict_json['translateResult'][0][0]['tgt']
ret_2 = dict_json['translateResult'][0][1]['tgt']
print('翻译结果是:', ret_1 + ',' + ret_2)
okok = open('hack2.txt', 'a+')
okok.write(ret_1 + ',' + ret_2+' ')
okok.close()
except:
ret_1 = dict_json['translateResult'][0][0]['tgt']
print('翻译结果是:', ret_1)
okok = open('hack2.txt', 'a+')
okok.write(ret_1+' ')
okok.close()