def _get_subtitle_from_archive(archive, subtitle): _valid_names = [] for name in archive.namelist(): # discard hidden files # discard non-subtitle files if not os.path.split(name)[-1].startswith(".") and name.lower().endswith( SUBTITLE_EXTENSIONS ): _valid_names.append(name) # archive with only 1 subtitle if len(_valid_names) == 1: logger.debug( f"returning from archive: {_valid_names[0]} (single subtitle file)" ) return archive.read(_valid_names[0]) # in archives with more than 1 subtitle (season pack) we try to guess the best subtitle file _scores = get_scores(subtitle.video) _max_score = 0 _max_name = "" for name in _valid_names: _guess = guessit(name) if "season" not in _guess: _guess["season"] = -1 if "episode" not in _guess: _guess["episode"] = -1 if isinstance(subtitle.video, Episode): logger.debug("guessing %s" % name) logger.debug( f"subtitle S{_guess['season']}E{_guess['episode']} video " f"S{subtitle.video.season}E{subtitle.video.episode}" ) if ( subtitle.video.episode != _guess["episode"] or subtitle.video.season != _guess["season"] ): logger.debug("subtitle does not match video, skipping") continue matches = set() matches |= guess_matches(subtitle.video, _guess) _score = sum((_scores.get(match, 0) for match in matches)) logger.debug("srt matches: %s, score %d" % (matches, _score)) if _score > _max_score: _max_score = _score _max_name = name logger.debug(f"new max: {name} {_score}") if _max_score > 0: logger.debug(f"returning from archive: {_max_name} scored {_max_score}") return archive.read(_max_name) raise APIThrottled("Can not find the subtitle in the compressed file")
def _get_download_link(self, subtitle): response = self.session.get(subtitle.page_link, timeout=20) self._check_response(response) try: page_soup = ParserBeautifulSoup( response.content.decode("utf-8", "ignore"), ["lxml", "html.parser"]) links_soup = page_soup.find_all("a", {"class": "detalle_link"}) for link_soup in links_soup: if link_soup["href"].startswith("bajar"): return self.server_url + link_soup["href"] links_soup = page_soup.find_all("a", {"class": "link1"}) for link_soup in links_soup: if "bajar.php" in link_soup["href"]: return link_soup["href"] except Exception as e: raise APIThrottled(f"Error parsing download link: {e}") raise APIThrottled("Download link not found")
def _get_download_link(self, subtitle): response = self.session.get(subtitle.page_link, timeout=20) self._check_response(response) try: page_soup = ParserBeautifulSoup( response.content.decode('iso-8859-1', 'ignore'), ['lxml', 'html.parser']) links_soup = page_soup.find_all("a", {'class': 'detalle_link'}) for link_soup in links_soup: if link_soup['href'].startswith('bajar'): return self.server_url + link_soup['href'] links_soup = page_soup.find_all("a", {'class': 'link1'}) for link_soup in links_soup: if "bajar.php" in link_soup['href']: return link_soup['href'] except Exception as e: raise APIThrottled('Error parsing download link: ' + str(e)) raise APIThrottled('Download link not found')
def _get_archive(self, content): archive_stream = io.BytesIO(content) if rarfile.is_rarfile(archive_stream): logger.debug("Identified rar archive") archive = rarfile.RarFile(archive_stream) elif zipfile.is_zipfile(archive_stream): logger.debug("Identified zip archive") archive = zipfile.ZipFile(archive_stream) else: raise APIThrottled("Unsupported compressed format") return archive
def get_file(self, archive): for name in archive.namelist(): if os.path.split(name)[-1].startswith("."): continue if not name.lower().endswith(SUBTITLE_EXTENSIONS): continue if ("[eng]" in name.lower() or ".en." in name.lower() or ".eng." in name.lower()): continue logger.debug("Returning from archive: {}".format(name)) return archive.read(name) raise APIThrottled("Can not find the subtitle in the compressed file")
def get_file(self, archive): for name in archive.namelist(): if os.path.split(name)[-1].startswith("."): continue if not name.lower().endswith(SUBTITLE_EXTENSIONS): continue logger.debug(f"Returning from archive: {name}") return archive.read(name) raise APIThrottled("Can not find the subtitle in the zip file")
def download_subtitle(self, subtitle): logger.info("Downloading subtitle %r", subtitle) download_url_ = self.scrape_download_url(subtitle.sub_dict) if not download_url_: raise APIThrottled("Can't scrape download url") response = self.session.get(download_url_, timeout=10, allow_redirects=True) self._check_response(response) subtitle.content = fix_line_ending(response.content)
def _get_subtitle_from_archive(archive, subtitle): _valid_names = [] for name in archive.namelist(): # discard hidden files # discard non-subtitle files if not os.path.split(name)[-1].startswith('.') and name.lower( ).endswith(SUBTITLE_EXTENSIONS): _valid_names.append(name) # archive with only 1 subtitle if len(_valid_names) == 1: logger.debug( "returning from archive: {} (single subtitle file)".format( _valid_names[0])) return archive.read(_valid_names[0]) # in archives with more than 1 subtitle (season pack) we try to guess the best subtitle file _scores = get_scores(subtitle.video) _max_score = 0 _max_name = "" for name in _valid_names: _guess = guessit(name) if 'season' not in _guess: _guess['season'] = -1 if 'episode' not in _guess: _guess['episode'] = -1 if isinstance(subtitle.video, Episode): logger.debug("guessing %s" % name) logger.debug("subtitle S{}E{} video S{}E{}".format( _guess['season'], _guess['episode'], subtitle.video.season, subtitle.video.episode)) if subtitle.video.episode != _guess[ 'episode'] or subtitle.video.season != _guess['season']: logger.debug('subtitle does not match video, skipping') continue matches = set() matches |= guess_matches(subtitle.video, _guess) _score = sum((_scores.get(match, 0) for match in matches)) logger.debug('srt matches: %s, score %d' % (matches, _score)) if _score > _max_score: _max_score = _score _max_name = name logger.debug("new max: {} {}".format(name, _score)) if _max_score > 0: logger.debug("returning from archive: {} scored {}".format( _max_name, _max_score)) return archive.read(_max_name) raise APIThrottled('Can not find the subtitle in the compressed file')
def _get_subtitle_from_archive(self, archive): for name in archive.namelist(): # discard hidden files if os.path.split(name)[-1].startswith('.'): continue # discard non-subtitle files if not name.lower().endswith(SUBTITLE_EXTENSIONS): continue return archive.read(name) raise APIThrottled('Can not find the subtitle in the compressed file')
def _get_archive(content): # open the archive archive_stream = io.BytesIO(content) if rarfile.is_rarfile(archive_stream): logger.debug('Identified rar archive') archive = rarfile.RarFile(archive_stream) elif zipfile.is_zipfile(archive_stream): logger.debug('Identified zip archive') archive = zipfile.ZipFile(archive_stream) else: raise APIThrottled('Unsupported compressed format') return archive
def download_subtitle(self, subtitle): logger.debug(f"Downloading subtitle {subtitle.download_url}") response = self.session.get(subtitle.download_url, allow_redirects=True, timeout=10) response.raise_for_status() stream = io.BytesIO(response.content) if is_zipfile(stream): logger.debug("Zip file found") subtitle_ = self.get_file(ZipFile(stream)) subtitle.content = fix_line_ending(subtitle_) else: raise APIThrottled(f"Unknown file type: {subtitle.download_url}")
def download_subtitle(self, subtitle): logger.debug(f"Downloading subtitle {subtitle.download_url}") response = self.session.get(subtitle.download_url, allow_redirects=True, timeout=10) response.raise_for_status() if subtitle.file_type.endswith(".zip"): logger.debug("Zip file found") subtitle_ = self.get_file(ZipFile(io.BytesIO(response.content))) elif subtitle.file_type.endswith(".srt"): logger.debug("Srt file found") subtitle_ = response.content else: raise APIThrottled(f"Unknown file type: {subtitle.file_type}") subtitle.content = fix_line_ending(subtitle_)
def _get_subtitle_from_archive(self, archive, subtitle): _max_score = 0 _scores = get_scores(subtitle.video) for name in archive.namelist(): # discard hidden files if os.path.split(name)[-1].startswith('.'): continue # discard non-subtitle files if not name.lower().endswith(SUBTITLE_EXTENSIONS): continue _guess = guessit(name) if isinstance(subtitle.video, Episode): logger.debug("guessing %s" % name) logger.debug("subtitle S{}E{} video S{}E{}".format( _guess['season'], _guess['episode'], subtitle.video.season, subtitle.video.episode)) if subtitle.video.episode != _guess[ 'episode'] or subtitle.video.season != _guess['season']: logger.debug('subtitle does not match video, skipping') continue matches = set() matches |= guess_matches(subtitle.video, _guess) _score = sum((_scores.get(match, 0) for match in matches)) logger.debug('srt matches: %s, score %d' % (matches, _score)) if _score > _max_score: _max_name = name _max_score = _score logger.debug("new max: {} {}".format(name, _score)) if _max_score > 0: logger.debug("returning from archive: {} scored {}".format( _max_name, _max_score)) return archive.read(_max_name) raise APIThrottled('Can not find the subtitle in the compressed file')
def checked(fn, raise_api_limit=False, validate_token=False, validate_json=False, json_key_name=None, validate_content=False): """Run :fn: and check the response status before returning it. :param fn: the function to make an API call to OpenSubtitles.com. :param raise_api_limit: if True we wait a little bit longer before running the call again. :param validate_token: test if token is valid and return 401 if not. :param validate_json: test if response is valid json. :param json_key_name: test if returned json contain a specific key. :param validate_content: test if response have a content (used with download). :return: the response. """ response = None try: try: response = fn() except APIThrottled: if not raise_api_limit: logger.info( "API request limit hit, waiting and trying again once.") time.sleep(2) return checked(fn, raise_api_limit=True) raise except (ConnectionError, Timeout, ReadTimeout): raise ServiceUnavailable( f'Unknown Error, empty response: {response.status_code}: {response}' ) except Exception: logging.exception('Unhandled exception raised.') raise ProviderError('Unhandled exception raised. Check log.') else: status_code = response.status_code except Exception: status_code = None else: if status_code == 401: if validate_token: return 401 else: raise AuthenticationError(f'Login failed: {response.reason}') elif status_code == 403: raise ProviderError("Bazarr API key seems to be in problem") elif status_code == 406: raise DownloadLimitExceeded("Daily download limit reached") elif status_code == 410: raise ProviderError("Download as expired") elif status_code == 429: raise TooManyRequests() elif status_code == 502: # this one should deal with Bad Gateway issue on their side. raise APIThrottled() elif 500 <= status_code <= 599: raise ProviderError(response.reason) if status_code != 200: raise ProviderError(f'Bad status code: {response.status_code}') if validate_json: try: json_test = response.json() except JSONDecodeError: raise ProviderError('Invalid JSON returned by provider') else: if json_key_name not in json_test: raise ProviderError( f'Invalid JSON returned by provider: no {json_key_name} key in returned json.' ) if validate_content: if not hasattr(response, 'content'): logging.error('Download link returned no content attribute.') return False elif not response.content: logging.error( f'This download link returned empty content: {response.url}' ) return False return response