def __init__(self, service_urls=None, user_agent=DEFAULT_USER_AGENT, proxies=None, timeout=None): self.session = requests.Session() if proxies is not None: self.session.proxies = proxies self.session.headers.update({ 'User-Agent': user_agent, }) if timeout is not None: self.session.mount('https://', TimeoutAdapter(timeout)) self.session.mount('http://', TimeoutAdapter(timeout)) self.service_urls = service_urls or ['translate.google.com'] self.token_acquirer = TokenAcquirer(session=self.session, host=self.service_urls[0]) # Use HTTP2 Adapter if hyper is installed try: # pragma: nocover from hyper.contrib import HTTP20Adapter self.session.mount(urls.BASE, HTTP20Adapter()) except ImportError: # pragma: nocover pass
def print_analise(self): # IDENTIFICANDO E TRADUZINDO IDIOMA frase_use = self.frase.get() translator = Translator() idioma = translator.detect(frase_use) acquirer = TokenAcquirer() acquirer.do(frase_use) frase_traduzida = frase_use frase_traduzida = translator.translate(frase_traduzida, src=idioma.lang, dest="en") texto = 'POSITIVO' if print_resultado( analisar_texto(classificador, vectorizer, frase_traduzida.text)) == "Positivo": self.mensagem["text"] = texto else: texto = "NEGATIVO" self.mensagem["text"] = texto
def trad(string): translator = Translator() acquirer = TokenAcquirer() acquirer.do(string) tr = translator.translate(string, src='en', dest='pt') titulo = tr.text return titulo
def __init__(self, service_urls=DEFAULT_CLIENT_SERVICE_URLS, user_agent=DEFAULT_USER_AGENT, raise_exception=DEFAULT_RAISE_EXCEPTION, proxies: typing.Dict[str, httpcore.AsyncHTTPTransport] = None, timeout: Timeout = None, http2=True, use_fallback=False): self.client = httpx.AsyncClient(http2=http2) if proxies is not None: # pragma: nocover self.client.proxies = proxies self.client.headers.update({ 'User-Agent': user_agent, 'Referer': 'https://translate.google.com', }) if timeout is not None: self.client.timeout = timeout if use_fallback: self.service_urls = DEFAULT_FALLBACK_SERVICE_URLS self.client_type = 'gtx' pass else: #default way of working: use the defined values from user app self.service_urls = service_urls self.client_type = 'tw-ob' self.token_acquirer = TokenAcquirer( client=self.client, host=self.service_urls[0]) self.raise_exception = raise_exception
def __init__(self, service_url: str = "translate.google.cn"): from constants import SERVICE_URLS if service_url in SERVICE_URLS: self.service_url = service_url else: self.service_url = "translate.google.cn" self.token_tool = TokenAcquirer(None, host=self.service_url)
def __init__(self, settings): super(Translate, self).__init__(settings) self.web_site = self.settings.get("WEBSITE", "baidu,qq,google").split(",") self.site_count = 0 self.session = requests.Session() self.acquirer = TokenAcquirer(session=self.session, host="translate.google.cn")
def __init__(self, user_agent=DEFAULT_USER_AGENT): self.session = requests.Session() self.session.headers.update({ 'User-Agent': user_agent, }) self.token_acquirer = TokenAcquirer(session=self.session) # Use HTTP2 Adapter if hyper is installed try: # pragma: nocover from hyper.contrib import HTTP20Adapter self.session.mount(urls.BASE, HTTP20Adapter()) except ImportError: # pragma: nocover pass
def func(l): dic = {} proxies = { 'http': 'socks5://127.0.0.1:9150', 'https': 'socks5://127.0.0.1:9150' } name = sys.argv[2].split('.') name = name[0] for i in l: csv = '' f = open("Data/" + name + "_" + sys.argv[1] + "_tags.csv", "a") try: acq = TokenAcquirer() text = i tk = acq.do(text) r = requests.get( 'https://translate.google.co.in/translate_a/single?client=webapp&sl=auto&tl=' + sys.argv[1] + '&hl=en&dt=at&dt=bd&dt=ex&dt=ld&dt=md&dt=qca&dt=rw&dt=rm&dt=ss&dt=t&otf=2&ssel=0&tsel=0&xid=1791807&kc=1&tk=' + tk + '&q=' + i, proxies=proxies) a = r.json() dic[text] = a[0][0][0] csv = i.strip() + ',' temp = dic[i] csv = csv + temp csv = i.strip() + ',' temp = dic[i] csv = csv + temp f.write(csv + '\n') f.close() except: dic[text] = '' return dic
def get_translate(phrase): try: from googletrans.gtoken import TokenAcquirer import requests import json acquirer = TokenAcquirer() tk = acquirer.do(phrase) url_base = "https://translate.googleapis.com/translate_a/single?client=gtx&sl=auto&tl=zh-CN&hl=zh-CN&dt=t&dt=bd&dj=1&source=input&tk={}&q={}" url = url_base.format(tk, phrase) print(url) res = requests.get(url) jobj = json.loads(res.text) trans = jobj["sentences"][0]['trans'] res = {"result": trans} except: raise res = {"result": 0} return jsonify(res)
def __init__(self, web_site, proxy_list="./proxy.list", proxy_auth=None, retry_times=10, translate_timeout=5, load_module=None): self.web_site = web_site.split(",") self.proxy = {} self.proxy_auth = proxy_auth self.retry_times = retry_times self.translate_timeout = translate_timeout self.site_func = dict() self.session = requests.Session() self.acquirer = TokenAcquirer(session=self.session) if os.path.exists(proxy_list): self.proxy_list = [i.strip() for i in open(proxy_list).readlines() if (i.strip() and i.strip()[0] != "#")] if load_module: sys.path.insert(0, os.getcwd()) attr = vars(__import__(load_module, fromlist=load_module)) for k, v in attr.items(): if hasattr(v, "__call__"): self.site_func[k] = v
def __init__(self, service_urls=None, user_agent=DEFAULT_USER_AGENT, raise_exception=DEFAULT_RAISE_EXCEPTION, proxies: typing.Dict[str, httpcore.SyncHTTPTransport] = None, timeout: Timeout = None, http2=True): self.client = httpx.Client(http2=http2) if proxies is not None: # pragma: nocover self.client.proxies = proxies self.client.headers.update({ 'User-Agent': user_agent, }) if timeout is not None: self.client.timeout = timeout if (service_urls is not None): #default way of working: use the defined values from user app self.service_urls = service_urls self.client_type = 'webapp' self.token_acquirer = TokenAcquirer(client=self.client, host=self.service_urls[0]) #if we have a service url pointing to client api we force the use of it as defaut client for t in enumerate(service_urls): api_type = re.search('googleapis', service_urls[0]) if (api_type): self.service_urls = ['translate.googleapis.com'] self.client_type = 'gtx' break else: self.service_urls = ['translate.google.com'] self.client_type = 'webapp' self.token_acquirer = TokenAcquirer(client=self.client, host=self.service_urls[0]) self.raise_exception = raise_exception
def __init__(self, service_urls=None, user_agent=DEFAULT_USER_AGENT, raise_exception=DEFAULT_RAISE_EXCEPTION, proxies: typing.Dict[str, httpcore.SyncHTTPTransport] = None, timeout: Timeout = None): self.client = httpx.Client() if proxies is not None: # pragma: nocover self.client.proxies = proxies self.client.headers.update({ 'User-Agent': user_agent, }) if timeout is not None: self.client.timeout = timeout self.service_urls = service_urls or ['translate.google.com'] self.token_acquirer = TokenAcquirer(client=self.client, host=self.service_urls[0]) self.raise_exception = raise_exception
def destPronounceButton(self): try: #temporarily does not work because of 'googletrans' lib issue if 'translatedText' in globals(): if self.ui.outputText.toPlainText().strip( ) != '': #if string is not empty if self.boolDest == True: token = TokenAcquirer().do(translatedText.text) self.audioUrl = f"https://translate.google.com/translate_tts?ie=UTF-8&q={urllib.parse.quote(translatedText.text)}"\ f"&tl={translatedText.dest}&tk={token}&client=webapp" #url to play the sound from destination(translated text) # print(self.audioUrl) global p2 #Declare p(player) variable p2 = vlc.MediaPlayer( self.audioUrl) #initialize p(player) variable eventP = p2.event_manager() eventP.event_attach( vlc.EventType.MediaPlayerEndReached, self.eventEnd2) p2.audio_set_volume(100) p2.play() icon3 = QtGui.QIcon() icon3.addPixmap( QtGui.QPixmap("../icons/%s/stop.png" % self.theme), QtGui.QIcon.Normal, QtGui.QIcon.Off) self.ui.destPronounceButton.setIcon(icon3) self.boolDest = False #change button status to "ready to stop" else: # p.set_pause(1) p2.stop( ) #does not work properly. Works only in case with instantly stop (probably due to url request properties) icon2 = QtGui.QIcon() icon2.addPixmap( QtGui.QPixmap("../icons/%s/headphones.png" % self.theme), QtGui.QIcon.Normal, QtGui.QIcon.Off) self.ui.destPronounceButton.setIcon(icon2) self.boolDest = True #return button status to "ready to play" except: pass
class GoogleVoice(object): def __init__(self, service_url: str = "translate.google.cn"): from constants import SERVICE_URLS if service_url in SERVICE_URLS: self.service_url = service_url else: self.service_url = "translate.google.cn" self.token_tool = TokenAcquirer(None, host=self.service_url) @staticmethod def splicing_audio(file_list: list, output_file) -> None: try: output_music = AudioSegment.empty() for i in file_list: output_music += AudioSegment.from_file(i, "mp3") output_music += AudioSegment.silent(duration=1000) output_music.export(output_file, format="mp3") except BaseException as Error: raise Error def get_token(self, text: str) -> str: return self.token_tool.do(text) def output_voice(self, text: str, output_file: str = "Output.mp3", language: str = "zh-cn") -> None: output_file = f"Output/{output_file}" try: string_after_modification = text.replace("%20", " ") url = "https://{}/translate_tts?ie=UTF-8&q={}&tl={}&total=1&idx=0&textlen={}&tk={}&client=webapp".format( self.service_url, string_after_modification, language, str(len(text)), self.get_token(text)) context = requests.get(url, timeout=3000) with open(output_file, "wb") as output_file: for data in context.iter_content(chunk_size=1024): if data: output_file.write(data) except ConnectionError: raise ConnectionError("请求失败")
class Translator(object): """Google Translate ajax API implementation class You have to create an instance of Translator to use this API :param service_urls: google translate url list. URLs will be used randomly. For example ``['translate.google.com', 'translate.google.co.kr']`` :type service_urls: a sequence of strings :param user_agent: the User-Agent header to send when making requests. :type user_agent: :class:`str` """ def __init__(self, service_urls=None, user_agent=DEFAULT_USER_AGENT): self.session = requests.Session() self.session.headers.update({ 'User-Agent': user_agent, }) self.token_acquirer = TokenAcquirer(session=self.session) self.service_urls = service_urls or ['translate.google.com'] # Use HTTP2 Adapter if hyper is installed try: # pragma: nocover from hyper.contrib import HTTP20Adapter self.session.mount(urls.BASE, HTTP20Adapter()) except ImportError: # pragma: nocover pass def _pick_service_url(self): if len(self.service_urls) == 1: return self.service_urls[0] return random.choice(self.service_urls) def _translate(self, text, dest='en', src='auto'): if src != 'auto': if src not in LANGUAGES.keys() and src in SPECIAL_CASES.keys(): src = SPECIAL_CASES[src] elif src not in LANGUAGES.keys(): raise ValueError('invalid source language') if dest not in LANGUAGES.keys(): if dest in SPECIAL_CASES.keys(): dest = SPECIAL_CASES[dest] else: raise ValueError('invalid destination language') if not PY3 and isinstance(text, str): # pragma: nocover text = text.decode('utf-8') token = self.token_acquirer.do(text) params = utils.build_params(query=text, src=src, dest=dest, token=token) url = urls.TRANSLATE.format(host=self._pick_service_url()) r = self.session.get(url, params=params) data = utils.format_json(r.text) return data def translate(self, text, dest='en', src='auto'): """Translate text from source language to destination language :param text: The source text(s) to be translated. Batch translation is supported via sequence input. :type text: UTF-8 :class:`str`; :class:`unicode`; string sequence (list, tuple, iterator, generator) :param dest: The language to translate the source text into. The value should be one of the language codes listed in :const:`googletrans.LANGUAGES`. :param dest: :class:`str`; :class:`unicode` :param src: The language of the source text. The value should be one of the language codes listed in :const:`googletrans.LANGUAGES`. If a language is not specified, the system will attempt to identify the source language automatically. :param src: :class:`str`; :class:`unicode` :rtype: Translated :rtype: :class:`list` (when a list is passed) Basic usage: >>> from googletrans import Translator >>> translator = Translator() >>> translator.translate('안녕하세요.') <Translated src=ko dest=en text=Good evening. pronunciation=Good evening.> >>> translator.translate('안녕하세요.', dest='ja') <Translated src=ko dest=ja text=こんにちは。 pronunciation=Kon'nichiwa.> >>> translator.translate('veritas lux mea', src='la') <Translated src=la dest=en text=The truth is my light pronunciation=The truth is my light> Advanced usage: >>> translations = translator.translate(['The quick brown fox', 'jumps over', 'the lazy dog'], dest='ko') >>> for translation in translations: ... print(translation.origin, ' -> ', translation.text) The quick brown fox -> 빠른 갈색 여우 jumps over -> 이상 점프 the lazy dog -> 게으른 개 """ if isinstance(text, list): result = [] for item in text: translated = self.translate(item, dest=dest, src=src) result.append(translated) return result origin = text data = self._translate(text, dest, src) # this code will be updated when the format is changed. translated = ''.join([d[0] if d[0] else '' for d in data[0]]) # actual source language that will be recognized by Google Translator when the # src passed is equal to auto. try: src = data[-1][0][0] except Exception: # pragma: nocover pass pron = origin try: pron = data[0][1][-1] except Exception: # pragma: nocover pass if not PY3 and isinstance(pron, unicode) and isinstance( origin, str): # pragma: nocover origin = origin.decode('utf-8') if dest in EXCLUDES and pron == origin: pron = translated # for python 2.x compatbillity if not PY3: # pragma: nocover if isinstance(src, str): src = src.decode('utf-8') if isinstance(dest, str): dest = dest.decode('utf-8') if isinstance(translated, str): translated = translated.decode('utf-8') # put final values into a new Translated object result = Translated(src=src, dest=dest, origin=origin, text=translated, pronunciation=pron) return result def detect(self, text): """Detect language of the input text :param text: The source text(s) whose language you want to identify. Batch detection is supported via sequence input. :type text: UTF-8 :class:`str`; :class:`unicode`; string sequence (list, tuple, iterator, generator) :rtype: Detected :rtype: :class:`list` (when a list is passed) Basic usage: >>> from googletrans import Translator >>> translator = Translator() >>> translator.detect('이 문장은 한글로 쓰여졌습니다.') <Detected lang=ko confidence=0.27041003> >>> translator.detect('この文章は日本語で書かれました。') <Detected lang=ja confidence=0.64889508> >>> translator.detect('This sentence is written in English.') <Detected lang=en confidence=0.22348526> >>> translator.detect('Tiu frazo estas skribita en Esperanto.') <Detected lang=eo confidence=0.10538048> Advanced usage: >>> langs = translator.detect(['한국어', '日本語', 'English', 'le français']) >>> for lang in langs: ... print(lang.lang, lang.confidence) ko 1 ja 0.92929292 en 0.96954316 fr 0.043500196 """ if isinstance(text, list): result = [] for item in text: lang = self.detect(item) result.append(lang) return result data = self._translate(text, dest='en', src='auto') # actual source language that will be recognized by Google Translator when the # src passed is equal to auto. src = '' confidence = 0.0 try: src = ''.join(data[8][0]) confidence = data[8][-2][0] except Exception: # pragma: nocover pass result = Detected(lang=src, confidence=confidence) return result
from googletrans.gtoken import TokenAcquirer acquirer = TokenAcquirer() texto = 'Olá!' tk = acquirer.do(texto) print(tk)
class Translate: """ 翻译类 """ proxy_list = [None] headers = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.76 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', "Accept-Language": "en-US,en;q=0.5", } def __init__(self, web_site, proxy_list="./proxy.list", proxy_auth=None, retry_times=10, translate_timeout=5, load_module=None): self.web_site = web_site.split(",") self.proxy = {} self.proxy_auth = proxy_auth self.retry_times = retry_times self.translate_timeout = translate_timeout self.site_func = dict() self.session = requests.Session() self.acquirer = TokenAcquirer(session=self.session) if os.path.exists(proxy_list): self.proxy_list = [i.strip() for i in open(proxy_list).readlines() if (i.strip() and i.strip()[0] != "#")] if load_module: sys.path.insert(0, os.getcwd()) attr = vars(__import__(load_module, fromlist=load_module)) for k, v in attr.items(): if hasattr(v, "__call__"): self.site_func[k] = v def __getattr__(self, item): if item in self.site_func: return partial(self.site_func[item], self=self) raise AttributeError(item) def proxy_choice(self): return self.proxy_list and self.proxy_list[0] and {"http": "http://%s%s"%( "%s@"%self.proxy_auth if self.proxy_auth else "" , random.choice(self.proxy_list))} def trans_error_handler(self, func_name, retry_time, e, *args, **kwargs): """ error_handler实现参数 :param func_name: 重试函数的名字 :param retry_time: 重试到了第几次 :param e: 需要重试的异常 :param args: 重试参数的参数 :param kwargs: 重试参数的参数 :return: 当返回True时,该异常不会计入重试次数 """ print("Error in %s for retry %s times. Error: %s"%(func_name, retry_time, e)) args[1].update(self.proxy_choice()) def translate(self, src): """ 翻译主函数 :param src: 源 :return: 结果 """ try: # 找出大于号和小于号之间的字符,使用换行符连接,进行翻译 pattern = re.compile(r"(?:^|(?<=>))([\s\S]*?)(?:(?=<)|$)") ls = re.findall(pattern, src.replace("\n", "")) src_data = "\n".join(x.strip("\t ") for x in ls if x.strip()) if src_data.strip(): # 对源中的%号进行转义 src_escape = src.replace("%", "%%") # 将源中被抽离进行翻译的部分替换成`%s`, 如果被抽离部分没有实质内容(为空),则省略 src_template = re.sub(pattern, lambda x: "%s" if x.group(1).strip() else "", src_escape) return self.retry_wrapper(self.retry_times, self.trans_error_handler)( self._translate)(src_data, self.proxy or self.proxy_choice() or self.proxy, src_template) else: return src except Exception: print("Error in translate, finally, we could not get the translate result. src: %s, Error: %s"%( src, traceback.format_exc())) return src def _translate(self, src, proxies, src_template): return getattr(self, random.choice(self.web_site).strip())(src, proxies, src_template) def youdao(self, src_data, proxies, src_template): """ 有道翻译的实现(废弃) :param src_data: 原生数据 :param proxies: 代理 :param src_template: 原生数据模板 :return: 结果 """ url = "http://fanyi.youdao.com/translate" resp = requests.post(url=url, data={ 'keyfrom': 'fanyi.web', 'i': src_data, 'doctype': 'json', 'action': 'FY_BY_CLICKBUTTON', 'ue': 'UTF-8', 'xmlVersion': '1.8', 'type': 'AUTO', 'typoResult': 'true'}, headers=self.headers, timeout=self.translate_timeout, proxies=proxies) return src_template % tuple(map(lambda y: "".join( map(lambda x: x["tgt"], y)), json.loads(resp.text)["translateResult"])) def baidu(self, src_data, proxies, src_template): """ 百度翻译的实现, 百度翻译最长只能翻译5000个字符 :param src_data: 原生数据 :param proxies: 代理 :param src_template: 原生数据模板 :return: 结果 """ url = "http://fanyi.baidu.com/v2transapi" resp = requests.post(url=url, data={ 'from': 'en', 'to': 'zh', 'transtype': 'realtime', 'query': src_data, 'simple_means_flag': 3}, headers=self.headers, timeout=self.translate_timeout, proxies=proxies) return src_template % tuple( "".join(map(lambda x: x["src_str"], json.loads(resp.text)["trans_result"]['phonetic'])).split("\n")) def qq(self, src_data, proxies, src_template): """ 腾讯翻译的实现, 腾讯翻译最长只能翻译2000个字符 :param src_data: 原生数据 :param proxies: 代理 :param src_template: 原生数据模板 :return: 结果 """ url = 'http://fanyi.qq.com/api/translate' resp = requests.post( url, data={'source': 'auto', 'target': 'en', 'sourceText': src_data}, headers=self.headers, timeout=self.translate_timeout, proxies=proxies) print(resp.text) return src_template % tuple( record["targetText"] for record in json.loads(resp.text)["records"] if record.get("sourceText") != "\n") def google(self, src_data, proxies, src_template): url = 'https://translate.google.cn/translate_a/single' data = { 'client': 't', 'sl': "auto", 'tl': "zh", 'hl': "zh", 'dt': ['at', 'bd', 'ex', 'ld', 'md', 'qca', 'rw', 'rm', 'ss', 't'], 'ie': 'UTF-8', 'oe': 'UTF-8', 'otf': 1, 'ssel': 0, 'tsel': 0, 'tk': self.acquirer.do(src_data), 'q': src_data, } resp = self.session.get(url, params=data, headers=self.headers, timeout=self.translate_timeout, proxies=proxies) return self.merge_conflict(src_template, [line[0] for line in json.loads(resp.text)[0]]) @staticmethod def merge_conflict(src_template, returns): return src_template % tuple(returns[:src_template.count("%s")]) @staticmethod def retry_wrapper(retry_times, error_handler=None): """ 重试装饰器 :param retry_times: 重试次数 :param error_handler: 重试异常处理函数 :return: """ def out_wrapper(func): @wraps(func) def wrapper(*args, **kwargs): count = 0 while True: try: return func(*args, **kwargs) except Exception as e: count += 1 if error_handler: result = error_handler(func.__name__, count, e, *args, **kwargs) if result: count -= 1 if count >= retry_times: raise return wrapper return out_wrapper @classmethod def parse_args(cls): parser = ArgumentParser() parser.add_argument("-ws", "--web-site", default="baidu,qq,google", help="Which site do you want to use for translating, split by `,`?") parser.add_argument("-pl", "--proxy-list", help="The proxy.list contains proxy to use for translating. default: ./proxy.list") parser.add_argument("-pa", "--proxy-auth", help="Proxy password if have. eg. user:password") parser.add_argument("-rt", "--retry-times", type=int, default=10, help="If translate failed retry times. default: 10") parser.add_argument("-tt", "--translate-timeout", type=int, default=5, help="Translate timeout. default: 5") parser.add_argument("-lm", "--load-module", help="The module contains custom web site functions which may use for translating. eg: trans.google") parser.add_argument("src", nargs="+", help="The html you want to translate. ") data = vars(parser.parse_args()) src = data.pop("src") return cls(**dict(filter(lambda x: x[1], data.items()))).translate(" ".join(src))
class Translator(object): def __init__(self, user_agent=DEFAULT_USER_AGENT): self.session = requests.Session() self.session.headers.update({ 'User-Agent': user_agent, }) self.token_acquirer = TokenAcquirer(session=self.session) # Use HTTP2 Adapter if hyper is installed try: # pragma: nocover from hyper.contrib import HTTP20Adapter self.session.mount(urls.BASE, HTTP20Adapter()) except ImportError: # pragma: nocover pass def _translate(self, text, dest='en', src='auto'): if src != 'auto': if src not in LANGUAGES.keys() and src in SPECIAL_CASES.keys(): src = SPECIAL_CASES[src] elif src not in LANGUAGES.keys(): raise ValueError('invalid source language') if dest not in LANGUAGES.keys(): if dest in SPECIAL_CASES.keys(): dest = SPECIAL_CASES[dest] else: raise ValueError('invalid destination language') if not PY3 and isinstance(text, str): # pragma: nocover text = text.decode('utf-8') token = self.token_acquirer.do(text) params = utils.build_params(query=text, src=src, dest=dest, token=token) r = self.session.get(urls.TRANSLATE, params=params) data = utils.format_json(r.text) return data def translate(self, text, dest='en', src='auto'): """ Translate the passed text into destination language. Basic usage: >>> from googletrans import Translator >>> translator = Translator() >>> translator.translate('안녕하세요.') <Translated src=ko dest=en text=Good evening. pronunciation=Good evening.> >>> translator.translate('안녕하세요.', dest='ja') <Translated src=ko dest=ja text=こんにちは。 pronunciation=Kon'nichiwa.> >>> translator.translate('veritas lux mea', src='la') <Translated src=la dest=en text=The truth is my light pronunciation=The truth is my light> Advanced usage: >>> translations = translator.translate(['The quick brown fox', 'jumps over', 'the lazy dog'], dest='ko') >>> for translation in translations: ... print(translation.origin, ' -> ', translation.text) The quick brown fox -> 빠른 갈색 여우 jumps over -> 이상 점프 the lazy dog -> 게으른 개 :param text: the text you want to translate. you can pass this parameter as a list object, as shown in the advanced usage above. :param dest: the destination language you want to translate. (default: en) :param src: the source language you want to translate. (default: auto) :rtype: Translated :rtype: list (when list is passed) """ if isinstance(text, list): result = [] for item in text: translated = self.translate(item, dest=dest, src=src) result.append(translated) return result origin = text data = self._translate(text, dest, src) # this code will be updated when the format is changed. translated = data[0][0][0] # actual source language that will be recognized by Google Translator when the # src passed is equal to auto. try: src = data[-1][0][0] except Exception: # pragma: nocover pass pron = origin try: pron = data[0][1][-1] except Exception: # pragma: nocover pass if not PY3 and isinstance(pron, unicode) and isinstance( origin, str): # pragma: nocover origin = origin.decode('utf-8') if dest in EXCLUDES and pron == origin: pron = translated # for python 2.x compatbillity if not PY3: # pragma: nocover if isinstance(src, str): src = src.decode('utf-8') if isinstance(dest, str): dest = dest.decode('utf-8') if isinstance(translated, str): translated = translated.decode('utf-8') # put final values into a new Translated object result = Translated(src=src, dest=dest, origin=origin, text=translated, pronunciation=pron) return result def detect(self, text): """ Detect the language of a text. Basic usage: >>> from googletrans import Translator >>> translator = Translator() >>> translator.detect('이 문장은 한글로 쓰여졌습니다.') <Detected lang=ko confidence=0.27041003> >>> translator.detect('この文章は日本語で書かれました。') <Detected lang=ja confidence=0.64889508> >>> translator.detect('This sentence is written in English.') <Detected lang=en confidence=0.22348526> >>> translator.detect('Tiu frazo estas skribita en Esperanto.') <Detected lang=eo confidence=0.10538048> Advanced usage: >>> langs = translator.detect(['한국어', '日本語', 'English', 'le français']) >>> for lang in langs: ... print(lang.lang, lang.confidence) ko 1 ja 0.92929292 en 0.96954316 fr 0.043500196 :param text: the text you want to detect. :rtype: Detected :rtype: list (when list is passed) """ if isinstance(text, list): result = [] for item in text: lang = self.detect(item) result.append(lang) return result data = self._translate(text, dest='en', src='auto') # actual source language that will be recognized by Google Translator when the # src passed is equal to auto. src = '' confidence = 0.0 try: src = ''.join(data[8][0]) confidence = data[8][-2][0] except Exception: # pragma: nocover pass result = Detected(lang=src, confidence=confidence) return result
#! /home/nautilis/local/env_py3/bin/python from googletrans.gtoken import TokenAcquirer import sys acquirer = TokenAcquirer() phrase = sys.argv[1] tk = acquirer.do(phrase) print(tk) #print("2018.2018")
class Translator: """Google Translate ajax API implementation class You have to create an instance of Translator to use this API :param service_urls: google translate url list. URLs will be used randomly. For example ``['translate.google.com', 'translate.google.co.kr']`` To preferably use the non webapp api, service url should be translate.googleapis.com :type service_urls: a sequence of strings :param user_agent: the User-Agent header to send when making requests. :type user_agent: :class:`str` :param proxies: proxies configuration. Dictionary mapping protocol or protocol and host to the URL of the proxy For example ``{'http': 'foo.bar:3128', 'http://host.name': 'foo.bar:4012'}`` :type proxies: dictionary :param timeout: Definition of timeout for httpx library. Will be used for every request. :type timeout: number or a double of numbers :param proxies: proxies configuration. Dictionary mapping protocol or protocol and host to the URL of the proxy For example ``{'http': 'foo.bar:3128', 'http://host.name': 'foo.bar:4012'}`` :param raise_exception: if `True` then raise exception if smth will go wrong :param http2: whether to use HTTP2 (default: True) :param use_fallback: use a fallback method :type raise_exception: boolean """ def __init__(self, service_urls=DEFAULT_CLIENT_SERVICE_URLS, user_agent=DEFAULT_USER_AGENT, raise_exception=DEFAULT_RAISE_EXCEPTION, proxies: typing.Dict[str, httpcore.AsyncHTTPTransport] = None, timeout: Timeout = None, http2=True, use_fallback=False): self.client = httpx.AsyncClient(http2=http2) if proxies is not None: # pragma: nocover self.client.proxies = proxies self.client.headers.update({ 'User-Agent': user_agent, 'Referer': 'https://translate.google.com', }) if timeout is not None: self.client.timeout = timeout if use_fallback: self.service_urls = DEFAULT_FALLBACK_SERVICE_URLS self.client_type = 'gtx' pass else: #default way of working: use the defined values from user app self.service_urls = service_urls self.client_type = 'tw-ob' self.token_acquirer = TokenAcquirer( client=self.client, host=self.service_urls[0]) self.raise_exception = raise_exception def _build_rpc_request(self, text: str, dest: str, src: str): return json.dumps([[ [ RPC_ID, json.dumps([[text, src, dest, True],[None]], separators=(',', ':')), None, 'generic', ], ]], separators=(',', ':')) def _pick_service_url(self): if len(self.service_urls) == 1: return self.service_urls[0] return random.choice(self.service_urls) async def _translate(self, text: str, dest: str, src: str): url = urls.TRANSLATE_RPC.format(host=self._pick_service_url()) data = { 'f.req': self._build_rpc_request(text, dest, src), } params = { 'rpcids': RPC_ID, 'bl': 'boq_translate-webserver_20201207.13_p0', 'soc-app': 1, 'soc-platform': 1, 'soc-device': 1, 'rt': 'c', } r = await self.client.post(url, params=params, data=data) if r.status_code != 200 and self.raise_Exception: raise Exception('Unexpected status code "{}" from {}'.format( r.status_code, self.service_urls)) return r.text, r async def _translate_legacy(self, text, dest, src, override): token = '' #dummy default value here as it is not used by api client if self.client_type == 'webapp': token = self.token_acquirer.do(text) params = utils.build_params(client=self.client_type, query=text, src=src, dest=dest, token=token, override=override) url = urls.TRANSLATE.format(host=self._pick_service_url()) r = await self.client.get(url, params=params) if r.status_code == 200: data = utils.format_json(r.text) return data, r if self.raise_exception: raise Exception('Unexpected status code "{}" from {}'.format( r.status_code, self.service_urls)) DUMMY_DATA[0][0][0] = text return DUMMY_DATA, r def _parse_extra_data(self, data): response_parts_name_mapping = { 0: 'translation', 1: 'all-translations', 2: 'original-language', 5: 'possible-translations', 6: 'confidence', 7: 'possible-mistakes', 8: 'language', 11: 'synonyms', 12: 'definitions', 13: 'examples', 14: 'see-also', } extra = {} for index, category in response_parts_name_mapping.items(): extra[category] = data[index] if ( index < len(data) and data[index]) else None return extra async def translate(self, text: str, dest='en', src='auto'): dest = dest.lower().split('_', 1)[0] src = src.lower().split('_', 1)[0] if src != 'auto' and src not in LANGUAGES: if src in SPECIAL_CASES: src = SPECIAL_CASES[src] elif src in LANGCODES: src = LANGCODES[src] else: raise ValueError('invalid source language') if dest not in LANGUAGES: if dest in SPECIAL_CASES: dest = SPECIAL_CASES[dest] elif dest in LANGCODES: dest = LANGCODES[dest] else: raise ValueError('invalid destination language') origin = text data, response = await self._translate(text, dest, src) token_found = False square_bracket_counts = [0, 0] resp = '' for line in data.split('\n'): token_found = token_found or f'"{RPC_ID}"' in line[:30] if not token_found: continue is_in_string = False for index, char in enumerate(line): if char == '\"' and line[max(0, index - 1)] != '\\': is_in_string = not is_in_string if not is_in_string: if char == '[': square_bracket_counts[0] += 1 elif char == ']': square_bracket_counts[1] += 1 resp += line if square_bracket_counts[0] == square_bracket_counts[1]: break data = json.loads(resp) parsed = json.loads(data[0][2]) # not sure should_spacing = parsed[1][0][0][3] translated_parts = list(map(lambda part: TranslatedPart(part[0], part[1] if len(part) >= 2 else []), parsed[1][0][0][5])) translated = (' ' if should_spacing else '').join(map(lambda part: part.text, translated_parts)) if src == 'auto': try: src = parsed[2] except: pass if src == 'auto': try: src = parsed[0][2] except: pass # currently not available confidence = None origin_pronunciation = None try: origin_pronunciation = parsed[0][0] except: pass pronunciation = None try: pronunciation = parsed[1][0][0][1] except: pass extra_data = { 'confidence': confidence, 'parts': translated_parts, 'origin_pronunciation': origin_pronunciation, 'parsed': parsed, } result = Translated(src=src, dest=dest, origin=origin, text=translated, pronunciation=pronunciation, parts=translated_parts, extra_data=extra_data, response=response) return result async def translate_legacy(self, text, dest='en', src='auto', **kwargs): """Translate text from source language to destination language :param text: The source text(s) to be translated. Batch translation is supported via sequence input. :type text: UTF-8 :class:`str`; :class:`unicode`; string sequence (list, tuple, iterator, generator) :param dest: The language to translate the source text into. The value should be one of the language codes listed in :const:`googletrans.LANGUAGES` or one of the language names listed in :const:`googletrans.LANGCODES`. :param dest: :class:`str`; :class:`unicode` :param src: The language of the source text. The value should be one of the language codes listed in :const:`googletrans.LANGUAGES` or one of the language names listed in :const:`googletrans.LANGCODES`. If a language is not specified, the system will attempt to identify the source language automatically. :param src: :class:`str`; :class:`unicode` :rtype: Translated :rtype: :class:`list` (when a list is passed) Basic usage: >>> from googletrans import Translator >>> translator = Translator() >>> translator.translate('안녕하세요.') <Translated src=ko dest=en text=Good evening. pronunciation=Good evening.> >>> translator.translate('안녕하세요.', dest='ja') <Translated src=ko dest=ja text=こんにちは。 pronunciation=Kon'nichiwa.> >>> translator.translate('veritas lux mea', src='la') <Translated src=la dest=en text=The truth is my light pronunciation=The truth is my light> Advanced usage: >>> translations = translator.translate(['The quick brown fox', 'jumps over', 'the lazy dog'], dest='ko') >>> for translation in translations: ... print(translation.origin, ' -> ', translation.text) The quick brown fox -> 빠른 갈색 여우 jumps over -> 이상 점프 the lazy dog -> 게으른 개 """ dest = dest.lower().split('_', 1)[0] src = src.lower().split('_', 1)[0] if src != 'auto' and src not in LANGUAGES: if src in SPECIAL_CASES: src = SPECIAL_CASES[src] elif src in LANGCODES: src = LANGCODES[src] else: raise ValueError('invalid source language') if dest not in LANGUAGES: if dest in SPECIAL_CASES: dest = SPECIAL_CASES[dest] elif dest in LANGCODES: dest = LANGCODES[dest] else: raise ValueError('invalid destination language') if isinstance(text, list): result = [] for item in text: translated = self.translate_legacy(item, dest=dest, src=src, **kwargs) result.append(translated) return result origin = text data, response = self.translate_legacy(text, dest, src) # this code will be updated when the format is changed. translated = ''.join([d[0] if d[0] else '' for d in data[0]]) extra_data = self._parse_extra_data(data) # actual source language that will be recognized by Google Translator when the # src passed is equal to auto. try: src = data[2] except Exception: # pragma: nocover pass pron = origin try: pron = data[0][1][-2] except Exception: # pragma: nocover pass if pron is None: try: pron = data[0][1][2] except: # pragma: nocover pass if dest in EXCLUDES and pron == origin: pron = translated # put final values into a new Translated object result = Translated(src=src, dest=dest, origin=origin, text=translated, pronunciation=pron, extra_data=extra_data, response=response) return result async def detect(self, text: str): translated = await self.translate(text, src='auto', dest='en') result = Detected(lang=translated.src, confidence=translated.extra_data.get('confidence', None), response=translated._response) return result async def detect_legacy(self, text, **kwargs): """Detect language of the input text :param text: The source text(s) whose language you want to identify. Batch detection is supported via sequence input. :type text: UTF-8 :class:`str`; :class:`unicode`; string sequence (list, tuple, iterator, generator) :rtype: Detected :rtype: :class:`list` (when a list is passed) Basic usage: >>> from googletrans import Translator >>> translator = Translator() >>> translator.detect('이 문장은 한글로 쓰여졌습니다.') <Detected lang=ko confidence=0.27041003> >>> translator.detect('この文章は日本語で書かれました。') <Detected lang=ja confidence=0.64889508> >>> translator.detect('This sentence is written in English.') <Detected lang=en confidence=0.22348526> >>> translator.detect('Tiu frazo estas skribita en Esperanto.') <Detected lang=eo confidence=0.10538048> Advanced usage: >>> langs = translator.detect(['한국어', '日本語', 'English', 'le français']) >>> for lang in langs: ... print(lang.lang, lang.confidence) ko 1 ja 0.92929292 en 0.96954316 fr 0.043500196 """ if isinstance(text, list): result = [] for item in text: lang = await self.detect(item) result.append(lang) return result data, response = await self._translate_legacy(text, 'en', 'auto', kwargs) # actual source language that will be recognized by Google Translator when the # src passed is equal to auto. src = '' confidence = 0.0 try: if len(data[8][0]) > 1: src = data[8][0] confidence = data[8][-2] else: src = ''.join(data[8][0]) confidence = data[8][-2][0] except Exception: # pragma: nocover pass result = Detected(lang=src, confidence=confidence, response=response) return result
def get_token(string): acquirer = TokenAcquirer() tk = acquirer.do(string) return tk
무료지만, 하루 사용 가능 횟수 제한됨 """ import googletrans from googletrans import Translator test_text = "안녕하세요 구글 번역기" translator = Translator() trans_result = translator.translate(text=test_text, src='ko', dest='en') print(trans_result) print(trans_result.text) print(trans_result.pronunciation) support_lang = googletrans.LANGUAGES for lang in support_lang: print(support_lang[lang]) print(translator.detect(test_text)) translator = Translator( service_urls=['translate.google.com', 'translate.google.co.kr']) from googletrans.gtoken import TokenAcquirer acquier = TokenAcquirer() token = acquier.do(test_text) print(token) """ Google Cloud Translation (https://cloud.google.com/translate/docs/) 구글에서 공식적으로 제공하는 API 500,000 글자 미만 공짜, 100,000 자 당 $20 https://codechacha.com/ko/python-google-translate/ """
class Translate(ProxyPool): """ 翻译类 """ def __init__(self, settings): super(Translate, self).__init__(settings) self.web_site = self.settings.get("WEBSITE", "baidu,qq,google").split(",") self.site_count = 0 self.session = requests.Session() self.acquirer = TokenAcquirer(session=self.session, host="translate.google.cn") def trans_error_handler(self, func_name, retry_time, e, *args, **kwargs): """ error_handler实现参数 :param func_name: 重试函数的名字 :param retry_time: 重试到了第几次 :param e: 需要重试的异常 :param args: 重试参数的参数 :param kwargs: 重试参数的参数 :return: 当返回True时,该异常不会计入重试次数 """ self.logger.error("Error in %s for retry %s times: %s" % (func_name, retry_time, e)) # 更新代理Ip args[1].update(self.proxy_choice()) def site_choice(self): """ 顺序循环选择翻译网站 :return: site """ self.site_count += 1 return self.web_site[self.site_count % len(self.web_site)] def translate(self, src): """ 翻译主函数 :param src: 源 :return: 结果 """ with ExceptContext(errback=lambda func_name, *args: self.logger.error( "Error in translate, finally, we could not get the translate result. src: %s, Error: %s" % (src, "".join(traceback.format_exception(*args)))) is None): # 找出大于号和小于号之间的字符,使用换行符连接,进行翻译 pattern = re.compile(r"(?:^|(?<=>))([\s\S]*?)(?:(?=<)|$)") ls = re.findall(pattern, src.replace("\n", "")) src_data = "\n".join(x.strip("\t ") for x in ls if x.strip()) if src_data.strip(): # 对源中的%号进行转义 src_escape = src.replace("%", "%%") # 将源中被抽离进行翻译的部分替换成`%s`, 如果被抽离部分没有实质内容(为空),则省略 src_template = re.sub( pattern, lambda x: "%s" if x.group(1).strip() else "", src_escape) return retry_wrapper( self.settings.get_int("TRANSLATE_RETRY_TIME", 10), error_handler=self.trans_error_handler)( self._translate)(src_data, self.proxy or self.proxy_choice(), src_template) return src def _translate(self, src, proxies, src_template): return getattr(self, self.site_choice().strip())(src, proxies, src_template) def youdao(self, src_data, proxies, src_template): """ 有道翻译的实现,已过时 :param src_data: 原生数据 :param proxies: 代理 :param src_template: 原生数据模板 :return: 结果 """ warnings.warn("youdao is a deprecated alias, use other site instead.", DeprecationWarning, 2) url = "http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule" #"http://fanyi.youdao.com/translate" self.logger.debug("Process request %s with proxy: %s" % (url, proxies)) resp = requests.post(url=url, data={ 'keyfrom': 'fanyi.web', 'i': src_data, 'doctype': 'json', 'action': 'FY_BY_CLICKBUTTON', 'ue': 'UTF-8', 'xmlVersion': '1.8', 'type': 'AUTO', 'typoResult': 'true' }, headers=self.settings.get("HEADERS"), timeout=self.settings.get_int( "TRANSLATE_TIMEOUT", 5), proxies=proxies) return src_template % tuple( map(lambda y: "".join(map(lambda x: x["tgt"], y)), json.loads(resp.text)["translateResult"])) def qq(self, src_data, proxies, src_template, url='http://fanyi.qq.com/api/translate'): """ 腾讯翻译的实现, 腾讯翻译最长只能翻译2000个字符 :param url: 翻译api :param src_data: 原生数据 :param proxies: 代理 :param src_template: 原生数据模板 :return: 结果 """ self.logger.debug("Process request %s with proxy: %s" % (url, proxies)) resp = requests.post(url, data={ 'source': 'auto', 'target': 'en', 'sourceText': src_data }, headers=self.settings.get("HEADERS"), timeout=self.settings.get_int( "TRANSLATE_TIMEOUT", 5), proxies=proxies) return src_template % tuple( record["targetText"] for record in json.loads(resp.text)["records"] if record.get("sourceText") != "\n") def baidu(self, src_data, proxies, src_template, url="http://fanyi.baidu.com/v2transapi", select_url="http://fanyi.baidu.com/langdetect"): """ 百度翻译的实现, 百度翻译最长只能翻译5000个字符 :param src_data: 原生数据 :param proxies: 代理 :param src_template: 原生数据模板 :param url: 翻译api :param select_url: 语言选择api :return: 结果 """ self.logger.debug("Select lang. ") resp = requests.post(url=select_url, data={"query": src_data[:50]}) try: lan = json.loads(resp.text)["lan"] except Exception: lan = "en" self.logger.debug("Process request %s with proxy: %s" % (url, proxies)) resp = requests.post(url=url, data={ 'from': lan, 'to': 'zh', 'transtype': 'realtime', 'query': src_data, 'simple_means_flag': 3 }, headers=self.settings.get("HEADERS"), timeout=self.settings.get_int( "TRANSLATE_TIMEOUT", 5), proxies=proxies) result = src_template % tuple("".join( map(lambda x: x["src_str"], json.loads( resp.text)["trans_result"]['phonetic'])).split("\n")) return result def google(self, src_data, proxies, src_template, url='https://translate.google.cn/translate_a/single'): """ 谷歌翻译的实现 :param url: 翻译api :param src_data: 原生数据 :param proxies: 代理 :param src_template: 原生数据模板 :return: 结果 """ self.logger.debug("Process request %s with proxy: %s" % (url, proxies)) resp = self.session.get( url=url, params={ 'client': 't', 'sl': "auto", 'tl': "zh", 'hl': "zh", 'dt': ['at', 'bd', 'ex', 'ld', 'md', 'qca', 'rw', 'rm', 'ss', 't'], 'ie': 'UTF-8', 'oe': 'UTF-8', 'otf': 1, 'ssel': 0, 'tsel': 0, 'tk': self.acquirer.do(src_data), 'q': src_data, }, headers=self.settings.get("HEADERS"), timeout=self.settings.get_int("TRANSLATE_TIMEOUT", 5), proxies=proxies) return self.merge_conflict( src_template, [line[0] for line in json.loads(resp.text)[0]]) @staticmethod def merge_conflict(src_template, returns): return src_template % tuple(returns[:src_template.count("%s")])
def get_token(text): acquirer = TokenAcquirer() return acquirer.do(text)
class Translator: """Google Translate ajax API implementation class You have to create an instance of Translator to use this API :param service_urls: google translate url list. URLs will be used randomly. For example ``['translate.google.com', 'translate.google.co.kr']`` :type service_urls: a sequence of strings :param user_agent: the User-Agent header to send when making requests. :type user_agent: :class:`str` :param proxies: proxies configuration. Dictionary mapping protocol or protocol and host to the URL of the proxy For example ``{'http': 'foo.bar:3128', 'http://host.name': 'foo.bar:4012'}`` :type proxies: dictionary :param timeout: Definition of timeout for Requests library. Will be used by every request. :type timeout: number or a double of numbers """ def __init__(self, service_urls=None, user_agent=DEFAULT_USER_AGENT, proxies=None, timeout=None): self.session = requests.Session() if proxies is not None: self.session.proxies = proxies self.session.headers.update({ 'User-Agent': user_agent, }) if timeout is not None: self.session.mount('https://', TimeoutAdapter(timeout)) self.session.mount('http://', TimeoutAdapter(timeout)) self.service_urls = service_urls or ['translate.google.com'] self.token_acquirer = TokenAcquirer(session=self.session, host=self.service_urls[0]) # Use HTTP2 Adapter if hyper is installed try: # pragma: nocover from hyper.contrib import HTTP20Adapter self.session.mount(urls.BASE, HTTP20Adapter()) except ImportError: # pragma: nocover pass def _pick_service_url(self): if len(self.service_urls) == 1: return self.service_urls[0] return random.choice(self.service_urls) def _translate(self, text, dest, src, override): token = self.token_acquirer.do(text) params = utils.build_params(query=text, src=src, dest=dest, token=token, override=override) url = urls.TRANSLATE.format(host=self._pick_service_url()) r = self.session.get(url, params=params) data = utils.format_json(r.text) return data def _parse_extra_data(self, data): response_parts_name_mapping = { 0: 'translation', 1: 'all-translations', 2: 'original-language', 5: 'possible-translations', 6: 'confidence', 7: 'possible-mistakes', 8: 'language', 11: 'synonyms', 12: 'definitions', 13: 'examples', 14: 'see-also', } extra = {} for index, category in response_parts_name_mapping.items(): extra[category] = data[index] if (index < len(data) and data[index]) else None return extra def translate(self, text, dest='en', src='auto', **kwargs): """Translate text from source language to destination language :param text: The source text(s) to be translated. Batch translation is supported via sequence input. :type text: UTF-8 :class:`str`; :class:`unicode`; string sequence (list, tuple, iterator, generator) :param dest: The language to translate the source text into. The value should be one of the language codes listed in :const:`googletrans.LANGUAGES` or one of the language names listed in :const:`googletrans.LANGCODES`. :param dest: :class:`str`; :class:`unicode` :param src: The language of the source text. The value should be one of the language codes listed in :const:`googletrans.LANGUAGES` or one of the language names listed in :const:`googletrans.LANGCODES`. If a language is not specified, the system will attempt to identify the source language automatically. :param src: :class:`str`; :class:`unicode` :rtype: Translated :rtype: :class:`list` (when a list is passed) Basic usage: >>> from googletrans import Translator >>> translator = Translator() >>> translator.translate('안녕하세요.') <Translated src=ko dest=en text=Good evening. pronunciation=Good evening.> >>> translator.translate('안녕하세요.', dest='ja') <Translated src=ko dest=ja text=こんにちは。 pronunciation=Kon'nichiwa.> >>> translator.translate('veritas lux mea', src='la') <Translated src=la dest=en text=The truth is my light pronunciation=The truth is my light> Advanced usage: >>> translations = translator.translate(['The quick brown fox', 'jumps over', 'the lazy dog'], dest='ko') >>> for translation in translations: ... print(translation.origin, ' -> ', translation.text) The quick brown fox -> 빠른 갈색 여우 jumps over -> 이상 점프 the lazy dog -> 게으른 개 """ dest = dest.lower().split('_', 1)[0] src = src.lower().split('_', 1)[0] if src != 'auto' and src not in LANGUAGES: if src in SPECIAL_CASES: src = SPECIAL_CASES[src] elif src in LANGCODES: src = LANGCODES[src] else: raise ValueError('invalid source language') if dest not in LANGUAGES: if dest in SPECIAL_CASES: dest = SPECIAL_CASES[dest] elif dest in LANGCODES: dest = LANGCODES[dest] else: raise ValueError('invalid destination language') if isinstance(text, list): result = [] for item in text: translated = self.translate(item, dest=dest, src=src, **kwargs) result.append(translated) return result origin = text data = self._translate(text, dest, src, kwargs) # this code will be updated when the format is changed. translated = ''.join([d[0] if d[0] else '' for d in data[0]]) extra_data = self._parse_extra_data(data) # actual source language that will be recognized by Google Translator when the # src passed is equal to auto. try: src = data[2] except Exception: # pragma: nocover pass pron = origin try: pron = data[0][1][-2] except Exception: # pragma: nocover pass if pron is None: try: pron = data[0][1][2] except: # pragma: nocover pass if dest in EXCLUDES and pron == origin: pron = translated # put final values into a new Translated object result = Translated(src=src, dest=dest, origin=origin, text=translated, pronunciation=pron, extra_data=extra_data) return result def detect(self, text, **kwargs): """Detect language of the input text :param text: The source text(s) whose language you want to identify. Batch detection is supported via sequence input. :type text: UTF-8 :class:`str`; :class:`unicode`; string sequence (list, tuple, iterator, generator) :rtype: Detected :rtype: :class:`list` (when a list is passed) Basic usage: >>> from googletrans import Translator >>> translator = Translator() >>> translator.detect('이 문장은 한글로 쓰여졌습니다.') <Detected lang=ko confidence=0.27041003> >>> translator.detect('この文章は日本語で書かれました。') <Detected lang=ja confidence=0.64889508> >>> translator.detect('This sentence is written in English.') <Detected lang=en confidence=0.22348526> >>> translator.detect('Tiu frazo estas skribita en Esperanto.') <Detected lang=eo confidence=0.10538048> Advanced usage: >>> langs = translator.detect(['한국어', '日本語', 'English', 'le français']) >>> for lang in langs: ... print(lang.lang, lang.confidence) ko 1 ja 0.92929292 en 0.96954316 fr 0.043500196 """ if isinstance(text, list): result = [] for item in text: lang = self.detect(item) result.append(lang) return result data = self._translate(text, 'en', 'auto', kwargs) # actual source language that will be recognized by Google Translator when the # src passed is equal to auto. src = '' confidence = 0.0 try: src = ''.join(data[8][0]) confidence = data[8][-2][0] except Exception: # pragma: nocover pass result = Detected(lang=src, confidence=confidence) return result
class GoogleScrawler: def __init__(self, word): self.word = word self.acquirer = TokenAcquirer() def get_info_of_word(self): # This function return a dict contaning information of word (raw word, word type, # meaning, example of word) # E.x: {"raw_word": "dog", # "word_type": "noun", # "word_meaning": "con cho", # "examples": "I have a pretty dog"} try: word_info = {} tk = self.acquirer.do(self.word) _url = SCRAW_URL % (tk, self.word) response = requests.get(_url) if response.status_code != 200: raise Exception("Scrawl translation from Google translate unsuccessfully!\nSTATUS_CODE = %s\n" "resp.text = \n word = %s" % (response.status_code, self.word)) raw_info = response.json() word_info['word'] = self.word word_info['word_type'] = self.get_word_type(raw_info) word_info['meaning'] = self.get_meaning_of_word(raw_info) word_info['pronounce'] = self.get_pronounce_of_word(raw_info) word_info['synonyms'] = self.get_synonyms_of_word(raw_info) word_info['examples'] = self.get_examples_of_word(raw_info) return word_info except Exception as e: return e def get_word_type(self, raw_info): # This function return type of word: noun(n), verb(v), adj, adv... # Return type: str try: word_type_list = raw_info[1] word_types = [] for i in word_type_list: word_types.append(i[0]) return ', '.join(word_types) except Exception as e: return e def get_meaning_of_word(self, raw_info): # This function return meanings of word # E.x: "con cho(danh tu), di theo(dong tu)" # Return type: str try: word_type_list = raw_info[1] word_meanings = [] for i in word_type_list: word_meanings.append(i[1][0] + "(" + i[0] + ")") return ', '.join(word_meanings) except Exception as e: return e def get_pronounce_of_word(self, raw_info): try: return raw_info[0][1][-1] except Exception as e: return e def get_synonyms_of_word(self, raw_info): try: synonyms_list = raw_info[11] word_synonyms = [] for i in synonyms_list: synonyms = [] synonums_count = 1 for k in i[1][0][0]: if synonums_count > SYNONYMS_NUMS: break synonyms.append(k) synonums_count += 1 synonyms = ', '.join(synonyms) synonyms = "%s(%s)" % (i[0], synonyms) word_synonyms.append(synonyms) return ', '.join(word_synonyms) except Exception as e: return e def get_examples_of_word(self, raw_info): try: ex_list = raw_info[13] word_examples = [] ex_count = 1 for ex in ex_list[0]: if ex_count > EXAMPLES_NUMS: break word_examples.append(erase_tab(ex[0])) ex_count += 1 return ', '.join(word_examples) except Exception as e: return e
def _xr(a, b): size_b = len(b) c = 0 while c < size_b - 2: d = b[c + 2] d = ord(d[0]) - 87 if "a" <= d else int(d) d = rshift(a, d) if "+" == b[c + 1] else a << d a = a + d & 4294967295 if "+" == b[c] else a ^ d c += 3 return a text = "ted" conta = TokenAcquirer() # conta._update() tk = conta.acquire(text) up = update() simples = tex_letras(text) cliente = httpx.Client() """ Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36 """ url = "https://translate.google.com.br/?sl=en&tl=pt&text=Hi&op=translate" user_agent = {"User-agent": "Mozilla/5.0"}
som.setProperty('rate', 150) som.setProperty('volume', 1) som.say("digite uma frase") som.runAndWait() som.runAndWait() else: som.setProperty('rate', 150) som.setProperty('volume', 1) som.say("Digite uma frase") som.runAndWait() else: som.setProperty('rate', 150) som.setProperty('volume', 1) som.say("Digite uma frase") som.runAndWait() r = 'S' while r == 'S': texto_pt = input("frase: ") acquirer = TokenAcquirer() acquirer.do(texto_pt) texto_en = translator.translate(texto_pt, src="pt", dest="en") print("Original text:", texto_pt) print("translation:", texto_en.text) r = str(input("Quer continuar ? [S/N]: ")).upper() print('Fim')
def __init__(self, word): self.word = word self.acquirer = TokenAcquirer()
class Translator(object): """Google Translate ajax API implementation class You have to create an instance of Translator to use this API :param service_urls: google translate url list. URLs will be used randomly. For example ``['translate.google.com', 'translate.google.co.kr']`` :type service_urls: a sequence of strings :param user_agent: the User-Agent header to send when making requests. :type user_agent: :class:`str` :param proxies: proxies configuration. Dictionary mapping protocol or protocol and host to the URL of the proxy For example ``{'http': 'foo.bar:3128', 'http://host.name': 'foo.bar:4012'}`` :type proxies: dictionary :param timeout: Definition of timeout for Requests library. Will be used by every request. :type timeout: number or a double of numbers """ def __init__(self, service_urls=None, user_agent=DEFAULT_USER_AGENT, proxies=None, timeout=None): self.session = requests.Session() if proxies is not None: self.session.proxies = proxies self.session.headers.update({ 'User-Agent': user_agent, }) if timeout is not None: self.session.mount('https://', TimeoutAdapter(timeout)) self.session.mount('http://', TimeoutAdapter(timeout)) self.service_urls = service_urls or ['translate.google.com'] self.token_acquirer = TokenAcquirer(session=self.session, host=self.service_urls[0]) # Use HTTP2 Adapter if hyper is installed try: # pragma: nocover from hyper.contrib import HTTP20Adapter self.session.mount(urls.BASE, HTTP20Adapter()) except ImportError: # pragma: nocover pass def _pick_service_url(self): if len(self.service_urls) == 1: return self.service_urls[0] return random.choice(self.service_urls) def _translate(self, text, dest, src): if not PY3 and isinstance(text, str): # pragma: nocover text = text.decode('utf-8') token = self.token_acquirer.do(text) params = utils.build_params(query=text, src=src, dest=dest, token=token) url = urls.TRANSLATE.format(host=self._pick_service_url()) r = self.session.get(url, params=params) data = utils.format_json(r.text) return data def _parse_extra_data(self, data): response_parts_name_mapping = { 0: 'translation', 1: 'all-translations', 2: 'original-language', 5: 'possible-translations', 6: 'confidence', 7: 'possible-mistakes', 8: 'language', 11: 'synonyms', 12: 'definitions', 13: 'examples', 14: 'see-also', } extra = {} for index, category in response_parts_name_mapping.items(): extra[category] = data[index] if (index < len(data) and data[index]) else None return extra def translate(self, text, dest='en', src='auto'): """Translate text from source language to destination language :param text: The source text(s) to be translated. Batch translation is supported via sequence input. :type text: UTF-8 :class:`str`; :class:`unicode`; string sequence (list, tuple, iterator, generator) :param dest: The language to translate the source text into. The value should be one of the language codes listed in :const:`googletrans.LANGUAGES` or one of the language names listed in :const:`googletrans.LANGCODES`. :param dest: :class:`str`; :class:`unicode` :param src: The language of the source text. The value should be one of the language codes listed in :const:`googletrans.LANGUAGES` or one of the language names listed in :const:`googletrans.LANGCODES`. If a language is not specified, the system will attempt to identify the source language automatically. :param src: :class:`str`; :class:`unicode` :rtype: Translated :rtype: :class:`list` (when a list is passed) Basic usage: >>> from googletrans import Translator >>> translator = Translator() >>> translator.translate('안녕하세요.') <Translated src=ko dest=en text=Good evening. pronunciation=Good evening.> >>> translator.translate('안녕하세요.', dest='ja') <Translated src=ko dest=ja text=こんにちは。 pronunciation=Kon'nichiwa.> >>> translator.translate('veritas lux mea', src='la') <Translated src=la dest=en text=The truth is my light pronunciation=The truth is my light> Advanced usage: >>> translations = translator.translate(['The quick brown fox', 'jumps over', 'the lazy dog'], dest='ko') >>> for translation in translations: ... print(translation.origin, ' -> ', translation.text) The quick brown fox -> 빠른 갈색 여우 jumps over -> 이상 점프 the lazy dog -> 게으른 개 """ dest = dest.lower().split('_', 1)[0] src = src.lower().split('_', 1)[0] if src != 'auto' and src not in LANGUAGES: if src in SPECIAL_CASES: src = SPECIAL_CASES[src] elif src in LANGCODES: src = LANGCODES[src] else: raise ValueError('invalid source language') if dest not in LANGUAGES: if dest in SPECIAL_CASES: dest = SPECIAL_CASES[dest] elif dest in LANGCODES: dest = LANGCODES[dest] else: raise ValueError('invalid destination language') if isinstance(text, list): result = [] for item in text: translated = self.translate(item, dest=dest, src=src) result.append(translated) return result origin = text data = self._translate(text, dest, src) # this code will be updated when the format is changed. translated = ''.join([d[0] if d[0] else '' for d in data[0]]) extra_data = self._parse_extra_data(data) # actual source language that will be recognized by Google Translator when the # src passed is equal to auto. try: src = data[2] except Exception: # pragma: nocover pass pron = origin try: pron = data[0][1][-2] except Exception: # pragma: nocover pass if not PY3 and isinstance(pron, unicode) and isinstance(origin, str): # pragma: nocover origin = origin.decode('utf-8') if dest in EXCLUDES and pron == origin: pron = translated # for python 2.x compatbillity if not PY3: # pragma: nocover if isinstance(src, str): src = src.decode('utf-8') if isinstance(dest, str): dest = dest.decode('utf-8') if isinstance(translated, str): translated = translated.decode('utf-8') # put final values into a new Translated object result = Translated(src=src, dest=dest, origin=origin, text=translated, pronunciation=pron, extra_data=extra_data) return result def detect(self, text): """Detect language of the input text :param text: The source text(s) whose language you want to identify. Batch detection is supported via sequence input. :type text: UTF-8 :class:`str`; :class:`unicode`; string sequence (list, tuple, iterator, generator) :rtype: Detected :rtype: :class:`list` (when a list is passed) Basic usage: >>> from googletrans import Translator >>> translator = Translator() >>> translator.detect('이 문장은 한글로 쓰여졌습니다.') <Detected lang=ko confidence=0.27041003> >>> translator.detect('この文章は日本語で書かれました。') <Detected lang=ja confidence=0.64889508> >>> translator.detect('This sentence is written in English.') <Detected lang=en confidence=0.22348526> >>> translator.detect('Tiu frazo estas skribita en Esperanto.') <Detected lang=eo confidence=0.10538048> Advanced usage: >>> langs = translator.detect(['한국어', '日本語', 'English', 'le français']) >>> for lang in langs: ... print(lang.lang, lang.confidence) ko 1 ja 0.92929292 en 0.96954316 fr 0.043500196 """ if isinstance(text, list): result = [] for item in text: lang = self.detect(item) result.append(lang) return result data = self._translate(text, dest='en', src='auto') # actual source language that will be recognized by Google Translator when the # src passed is equal to auto. src = '' confidence = 0.0 try: src = ''.join(data[8][0]) confidence = data[8][-2][0] except Exception: # pragma: nocover pass result = Detected(lang=src, confidence=confidence) return result
'hl': dest, 'dt': ['at', 'bd', 'ex', 'ld', 'md', 'qca', 'rw', 'rm', 'ss', 't'], 'ie': 'UTF-8', 'oe': 'UTF-8', 'otf': 1, 'ssel': 0, 'tsel': 0, 'tk': token, 'q': query, } return params session = requests.Session() service_urls = 'translate.google.com' token_acquirer = TokenAcquirer(session=session, host=service_urls) def _pick_service_url(): return service_urls # if len(service_urls) == 1: # return service_urls[0] # return random.choice(self.service_urls) def _translate(text, dest, src): token = token_acquirer.do(text) params = build_params(query=text, src=src, dest=dest, token=token) url = 'https://{host}/translate_a/single'.format(host=_pick_service_url()) r = session.get(url, params=params) print(url)