def _tokenize(self, text): # Pre-clean text = text.strip() # Apply pre-processors for pp in self.pre_processor_funcs: log.debug("pre-processing: %s", pp) text = pp(text) if _len(text) <= self.GOOGLE_TTS_MAX_CHARS: return _clean_tokens([text]) # Tokenize log.debug("tokenizing: %s", self.tokenizer_func) tokens = self.tokenizer_func(text) # Clean tokens = _clean_tokens(tokens) # Minimize min_tokens = [] for t in tokens: min_tokens += _minimize(t, ' ', self.GOOGLE_TTS_MAX_CHARS) # Filter empty tokens, post-minimize tokens = [t for t in min_tokens if t] return min_tokens
def url(self): """ generate url to call google tts """ try: # Calculate token part_tk = self.token.calculate_token(self.text) except requests.exceptions.RequestException as e: # pragma: no cover log.debug(str(e), exc_info=True) raise gTTSError("Connection error during token calculation: %s" % str(e)) payload = { 'ie': 'UTF-8', 'q': self.text, 'tl': self.lang, 'ttsspeed': self.speed, 'total': 1, 'idx': 0, 'client': 'tw-ob', 'textlen': _len(self.text), 'tk': part_tk } return self.GOOGLE_TTS_URL + "?" + urllib.parse.urlencode(payload)
def _prepare_requests(self): """Created the TTS API the request(s) without sending them. Returns: list: ``requests.PreparedRequests_``. <https://2.python-requests.org/en/master/api/#requests.PreparedRequest>`_``. """ # TTS API URL translate_url = _translate_url(tld=self.tld, path="translate_tts") text_parts = self._tokenize(self.text) log.debug("text_parts: %i", len(text_parts)) assert text_parts, 'No text to send to TTS API' prepared_requests = [] for idx, part in enumerate(text_parts): try: # Calculate token part_tk = self.token.calculate_token(part) except requests.exceptions.RequestException as e: # pragma: no cover log.debug(str(e), exc_info=True) raise gTTSError( "Connection error during token calculation: %s" % str(e)) payload = { 'ie': 'UTF-8', 'q': part, 'tl': self.lang, 'ttsspeed': self.speed, 'total': len(text_parts), 'idx': idx, 'client': 'tw-ob', 'textlen': _len(part), 'tk': part_tk } log.debug("payload-%i: %s", idx, payload) # Request r = requests.Request(method='GET', url=translate_url, params=payload, headers=self.GOOGLE_TTS_HEADERS) # Prepare request prepared_requests.append(r.prepare()) return prepared_requests
def stream_to_fps(self, fps): if not isinstance(fps, list): fps = [fps] urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) text_parts = self._tokenize(self.text) assert text_parts, 'No text to send to TTS API' for idx, part in enumerate(text_parts): try: # Calculate token part_tk = self.token.calculate_token(part) except requests.exceptions.RequestException as e: # pragma: no cover raise gtts.gTTSError( "Connection error during token calculation: %s" % str(e)) payload = {'ie': 'UTF-8', 'q': part, 'tl': self.lang, 'ttsspeed': self.speed, 'total': len(text_parts), 'idx': idx, 'client': 'tw-ob', 'textlen': _len(part), 'tk': part_tk} r = None try: # Request r = requests.get(self.GOOGLE_TTS_URL, params=payload, headers=self.GOOGLE_TTS_HEADERS, proxies=urllib.request.getproxies(), verify=False, stream=True) r.raise_for_status() except requests.exceptions.HTTPError: # Request successful, bad response raise gtts.gTTSError(tts=self, response=r) except requests.exceptions.RequestException as e: # pragma: no cover # Request failed raise gtts.gTTSError(str(e)) for chunk in r.iter_content(chunk_size=1024): for f in fps: f.write(chunk)
def write_to_fp(self, fp): """Do the TTS API request and write bytes to a file-like object. Args: fp (file object): Any file-like object to write the ``mp3`` to. Raises: :class:`gTTSError`: When there's an error with the API request. TypeError: When ``fp`` is a file-like object that takes bytes. """ # When disabling ssl verify in requests (for proxies and firewalls), # urllib3 prints an insecure warning on stdout. We disable that. urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) text_parts = self._tokenize(self.text) log.debug("text_parts: %i", len(text_parts)) assert text_parts, 'No text to send to TTS API' for idx, part in enumerate(text_parts): try: # Calculate token part_tk = self.token.calculate_token(part) except requests.exceptions.RequestException as e: # pragma: no cover log.debug(str(e), exc_info=True) raise gTTSError( "Connection error during token calculation: %s" % str(e)) payload = { 'ie': 'UTF-8', 'q': part, 'tl': self.lang, 'ttsspeed': self.speed, 'total': len(text_parts), 'idx': idx, 'client': 'tw-ob', 'textlen': _len(part), 'tk': part_tk } log.debug("payload-%i: %s", idx, payload) try: # Request r = requests.get(self.GOOGLE_TTS_URL, params=payload, headers=self.GOOGLE_TTS_HEADERS, proxies=urllib.request.getproxies(), verify=False) log.debug("headers-%i: %s", idx, r.request.headers) log.debug("url-%i: %s", idx, r.request.url) log.debug("status-%i: %s", idx, r.status_code) r.raise_for_status() except requests.exceptions.HTTPError as e: # Request successful, bad response raise gTTSError(tts=self, response=r) except requests.exceptions.RequestException as e: # pragma: no cover # Request failed raise gTTSError(str(e)) try: # Write for chunk in r.iter_content(chunk_size=1024): fp.write(chunk) log.debug("part-%i written to %s", idx, fp) except (AttributeError, TypeError) as e: raise TypeError( "'fp' is not a file-like object or it does not take bytes: %s" % str(e))
def test_len_unicode(): text = u"但在一个重要的任务上" assert _len(text) == 10
def test_len_ascii(): text = "Bacon ipsum dolor sit amet flank corned beef." assert _len(text) == 45
def test_unicode(self): text = u"但在一个重要的任务上" self.assertEqual(_len(text), 10)
def test_ascii(self): text = "Bacon ipsum dolor sit amet flank corned beef." self.assertEqual(_len(text), 45)