def parse_results(self, video, film): subtitles = [] for s in film.subtitles: try: subtitle = SubsceneSubtitle.from_api(s) except NotImplementedError as e: logger.info(e) continue subtitle.asked_for_release_group = video.release_group if isinstance(video, Episode): subtitle.asked_for_episode = video.episode if self.only_foreign: subtitle.language = Language.rebuild(subtitle.language, forced=True) # set subtitle language to hi if it's hearing_impaired if subtitle.hearing_impaired: subtitle.language = Language.rebuild(subtitle.language, hi=True) subtitles.append(subtitle) logger.debug('Found subtitle %r', subtitle) return subtitles
def _get_language_obj(languages): language_set = set() if not isinstance(languages, (set, list)): languages = [languages] for language in languages: lang, hi_item, forced_item = language if hi_item == "True": hi = "force HI" else: hi = "force non-HI" # Always use alpha2 in API Request lang = alpha3_from_alpha2(lang) lang_obj = _get_lang_obj(lang) if forced_item == "True": lang_obj = Language.rebuild(lang_obj, forced=True) if hi == "force HI": lang_obj = Language.rebuild(lang_obj, hi=True) language_set.add(lang_obj) return language_set
def get_embedded_subtitle_streams(part, requested_language=None, skip_duplicate_unknown=True, skip_unknown=False): streams = [] streams_unknown = [] all_streams = [] has_unknown = False found_requested_language = False update_stream_info(part) for stream in part.streams: # subtitle stream if stream.stream_type == 3 and not stream.stream_key and stream.codec in TEXT_SUBTITLE_EXTS: is_forced = is_stream_forced(stream) language = helpers.get_language_from_stream(stream.language_code) if language: language = Language.rebuild(language, forced=is_forced) is_unknown = False found_requested_language = requested_language and requested_language == language stream_data = None if not language: # only consider first unknown subtitle stream if config.treat_und_as_first: if has_unknown and skip_duplicate_unknown: Log.Debug("skipping duplicate unknown") continue language = Language.rebuild(list(config.lang_list)[0], forced=is_forced) else: language = None is_unknown = True has_unknown = True stream_data = {"stream": stream, "is_unknown": is_unknown, "language": language, "is_forced": is_forced} streams_unknown.append(stream_data) if not requested_language or found_requested_language: stream_data = {"stream": stream, "is_unknown": is_unknown, "language": language, "is_forced": is_forced} streams.append(stream_data) if found_requested_language: break if stream_data: all_streams.append(stream_data) if requested_language: if streams_unknown and not found_requested_language and not skip_unknown: streams = streams_unknown else: streams = all_streams return streams
def translate_subtitles_file(video_path, source_srt_file, to_lang, forced, hi): to_lang = alpha3_from_alpha2(to_lang) lang_obj = Language(to_lang) if forced: lang_obj = Language.rebuild(lang_obj, forced=True) if hi: lang_obj = Language.rebuild(lang_obj, hi=True) logging.debug('BAZARR is translating in {0} this subtitles {1}'.format(lang_obj, source_srt_file)) max_characters = 5000 dest_srt_file = get_subtitle_path(video_path, language=lang_obj, extension='.srt', forced_tag=forced, hi_tag=hi) subs = pysubs2.load(source_srt_file, encoding='utf-8') lines_list = [x.plaintext for x in subs] joined_lines_str = '\n\n\n'.join(lines_list) logging.debug('BAZARR splitting subtitles into {} characters blocks'.format(max_characters)) lines_block_list = [] translated_lines_list = [] while len(joined_lines_str): partial_lines_str = joined_lines_str[:max_characters] if len(joined_lines_str) > max_characters: new_partial_lines_str = partial_lines_str.rsplit('\n\n\n', 1)[0] else: new_partial_lines_str = partial_lines_str lines_block_list.append(new_partial_lines_str) joined_lines_str = joined_lines_str.replace(new_partial_lines_str, '') logging.debug('BAZARR is sending {} blocks to Google Translate'.format(len(lines_block_list))) for block_str in lines_block_list: try: translated_partial_srt_text = GoogleTranslator(source='auto', target=lang_obj.basename).translate(text=block_str) except: return False else: translated_partial_srt_list = translated_partial_srt_text.split('\n\n\n') translated_lines_list += translated_partial_srt_list logging.debug('BAZARR saving translated subtitles to {}'.format(dest_srt_file)) for i, line in enumerate(subs): line.plaintext = translated_lines_list[i] subs.save(dest_srt_file) return dest_srt_file
def get_embedded_subtitle_streams(part, requested_language=None, skip_duplicate_unknown=True): streams = [] streams_unknown = [] has_unknown = False found_requested_language = False for stream in part.streams: # subtitle stream if stream.stream_type == 3 and not stream.stream_key and stream.codec in TEXT_SUBTITLE_EXTS: is_forced = helpers.is_stream_forced(stream) language = helpers.get_language_from_stream(stream.language_code) if language: language = Language.rebuild(language, forced=is_forced) is_unknown = False found_requested_language = requested_language and requested_language == language if not language and config.treat_und_as_first: # only consider first unknown subtitle stream if has_unknown and skip_duplicate_unknown: continue language = Language.rebuild(list(config.lang_list)[0], forced=is_forced) is_unknown = True has_unknown = True streams_unknown.append({ "stream": stream, "is_unknown": is_unknown, "language": language, "is_forced": is_forced }) if not requested_language or found_requested_language: streams.append({ "stream": stream, "is_unknown": is_unknown, "language": language, "is_forced": is_forced }) if found_requested_language: break if streams_unknown and not found_requested_language: streams = streams_unknown return streams
def load(self, fn=None, content=None, language=None, encoding="utf-8"): """ :param encoding: used for decoding the content when fn is given, not used in case content is given :param language: babelfish.Language language of the subtitle :param fn: filename :param content: unicode :return: """ if language: self.language = Language.rebuild(language, forced=False) self.initialized_mods = {} try: if fn: self.f = pysubs2.load(fn, encoding=encoding) elif content: self.f = pysubs2.SSAFile.from_string(content) except (IOError, UnicodeDecodeError, pysubs2.exceptions.UnknownFPSError, pysubs2.exceptions.UnknownFormatIdentifierError, pysubs2.exceptions.FormatAutodetectionError): if fn: logger.exception("Couldn't load subtitle: %s: %s", fn, traceback.format_exc()) elif content: logger.exception("Couldn't load subtitle: %s", traceback.format_exc()) return bool(self.f)
def __init__(self, language, forced, hearing_impaired, page_link, file_id, releases, uploader, title, year, hash_matched, file_hash=None, season=None, episode=None): language = Language.rebuild(language, hi=hearing_impaired, forced=forced) self.title = title self.year = year self.season = season self.episode = episode self.releases = releases self.release_info = releases self.language = language self.hearing_impaired = hearing_impaired self.forced = forced self.file_id = file_id self.page_link = page_link self.download_link = None self.uploader = uploader self.matches = None self.hash = file_hash self.encoding = 'utf-8' self.hash_matched = hash_matched
def guess_external_subtitles(dest_folder, subtitles): for subtitle, language in subtitles.iteritems(): if not language: subtitle_path = os.path.join(dest_folder, subtitle) if os.path.exists(subtitle_path) and os.path.splitext( subtitle_path)[1] in core.SUBTITLE_EXTENSIONS: logging.debug( "BAZARR falling back to file content analysis to detect language." ) detected_language = None with open(subtitle_path, 'r') as f: text = ' '.join(list(islice(f, 100))) try: encoding = UnicodeDammit(text) text = text.decode(encoding.original_encoding) detected_language = langdetect.detect(text) except Exception as e: logging.exception( 'BAZARR Error trying to detect language for this subtitles file: ' + subtitle_path + ' You should try to delete this subtitles file manually and ask ' 'Bazarr to download it again.') else: if detected_language: logging.debug( "BAZARR external subtitles detected and guessed this language: " + str(detected_language)) try: subtitles[subtitle] = Language.rebuild( Language.fromietf(detected_language)) except: pass return subtitles
def _parse_row(self, row, languages, server_url): td = row.findAll('td') rating = int(td[0].text) sub_lang = td[1].text release = re.sub(r'^subtitle ', '', td[2].text) sub_link = td[2].find('a').get('href') page_link = server_url + sub_link sub_link = re.sub(r'^/subtitles/', server_url + '/subtitle/', sub_link) + '.zip' hi = True if td[3].find('span', {'class': 'hi-subtitle'}) else False uploader = td[4].text _, l, c = next(x for x in self.YifyLanguages if x[0] == sub_lang) lang = Language(l, c) # set subtitle language to hi if it's hearing_impaired if hi: lang = Language.rebuild(lang, hi=True) if languages & {lang}: return [ YifySubtitle(lang, page_link, release, uploader, sub_link, rating, hi) ] return []
def load(self, fn=None, content=None, language=None, encoding="utf-8"): """ :param encoding: used for decoding the content when fn is given, not used in case content is given :param language: babelfish.Language language of the subtitle :param fn: filename :param content: unicode :return: """ if language: self.language = Language.rebuild(language, forced=False) self.initialized_mods = {} try: if fn: self.f = pysubs2.load(fn, encoding=encoding) elif content: self.f = pysubs2.SSAFile.from_string(content) except (IOError, UnicodeDecodeError, pysubs2.exceptions.UnknownFPSError, pysubs2.exceptions.UnknownFormatIdentifierError, pysubs2.exceptions.FormatAutodetectionError): if fn: logger.exception("Couldn't load subtitle: %s: %s", fn, traceback.format_exc()) elif content: logger.exception("Couldn't load subtitle: %s", traceback.format_exc()) return bool(self.f)
def _search_external_subtitles(path, languages=None, only_one=False, scandir_generic=False): dirpath, filename = os.path.split(path) dirpath = dirpath or '.' fileroot, fileext = os.path.splitext(filename) subtitles = {} _scandir = _scandir_generic if scandir_generic else scandir for entry in _scandir(dirpath): if not entry.name and not scandir_generic: logger.debug('Could not determine the name of the file, retrying with scandir_generic') return _search_external_subtitles(path, languages, only_one, True) if not entry.is_file(follow_symlinks=False): continue p = entry.name # keep only valid subtitle filenames if not p.lower().startswith(fileroot.lower()) or not p.lower().endswith(SUBTITLE_EXTENSIONS): continue p_root, p_ext = os.path.splitext(p) if not INCLUDE_EXOTIC_SUBS and p_ext not in (".srt", ".ass", ".ssa", ".vtt"): continue # extract potential forced/normal/default tag # fixme: duplicate from subtitlehelpers split_tag = p_root.rsplit('.', 1) adv_tag = None if len(split_tag) > 1: adv_tag = split_tag[1].lower() if adv_tag in ['forced', 'normal', 'default', 'embedded', 'embedded-forced', 'custom']: p_root = split_tag[0] forced = False if adv_tag: forced = "forced" in adv_tag # extract the potential language code language_code = p_root[len(fileroot):].replace('_', '-')[1:] # default language is undefined language = Language('und') # attempt to parse if language_code: try: language = Language.fromietf(language_code) language.forced = forced except ValueError: logger.error('Cannot parse language code %r', language_code) language = None elif not language_code and only_one: language = Language.rebuild(list(languages)[0], forced=forced) subtitles[p] = language logger.debug('Found subtitles %r', subtitles) return subtitles
def _search_external_subtitles(path, languages=None, only_one=False, scandir_generic=False): dirpath, filename = os.path.split(path) dirpath = dirpath or '.' fileroot, fileext = os.path.splitext(filename) subtitles = {} _scandir = _scandir_generic if scandir_generic else scandir for entry in _scandir(dirpath): if not entry.name and not scandir_generic: logger.debug('Could not determine the name of the file, retrying with scandir_generic') return _search_external_subtitles(path, languages, only_one, True) if not entry.is_file(follow_symlinks=False): continue p = entry.name # keep only valid subtitle filenames if not p.lower().startswith(fileroot.lower()) or not p.lower().endswith(SUBTITLE_EXTENSIONS): continue p_root, p_ext = os.path.splitext(p) if not INCLUDE_EXOTIC_SUBS and p_ext not in (".srt", ".ass", ".ssa", ".vtt"): continue # extract potential forced/normal/default tag # fixme: duplicate from subtitlehelpers split_tag = p_root.rsplit('.', 1) adv_tag = None if len(split_tag) > 1: adv_tag = split_tag[1].lower() if adv_tag in ['forced', 'normal', 'default', 'embedded', 'embedded-forced', 'custom']: p_root = split_tag[0] forced = False if adv_tag: forced = "forced" in adv_tag # extract the potential language code language_code = p_root[len(fileroot):].replace('_', '-')[1:] # default language is undefined language = Language('und') # attempt to parse if language_code: try: language = Language.fromietf(language_code) language.forced = forced except ValueError: logger.error('Cannot parse language code %r', language_code) language = None elif not language_code and only_one: language = Language.rebuild(list(languages)[0], forced=forced) subtitles[p] = language logger.debug('Found subtitles %r', subtitles) return subtitles
def test_list_subtitles_also_forced(video_single_language): with EmbeddedSubtitlesProvider() as provider: language_1 = Language.fromalpha2("en") language_2 = Language.rebuild(language_1, forced=True) subs = provider.list_subtitles(video_single_language, {language_1, language_2}) assert any(language_1 == sub.language for sub in subs) assert any(not sub.language.forced for sub in subs)
def __init__(self, stream, container, matches): super().__init__(stream.language, stream.disposition.hearing_impaired) if stream.disposition.forced: self.language = Language.rebuild(stream.language, forced=True) self.stream: FFprobeSubtitleStream = stream self.container: FFprobeVideoContainer = container self.forced = stream.disposition.forced self._matches: set = matches self.page_link = self.container.path self.release_info = os.path.basename(self.page_link)
def guess_external_subtitles(dest_folder, subtitles): for subtitle, language in six.iteritems(subtitles): if not language: subtitle_path = os.path.join(dest_folder, subtitle) if os.path.exists(subtitle_path) and os.path.splitext( subtitle_path)[1] in core.SUBTITLE_EXTENSIONS: logging.debug( "BAZARR falling back to file content analysis to detect language." ) detected_language = None # to improve performance, skip detection of files larger that 5M if os.path.getsize(subtitle_path) > 5 * 1024 * 1024: logging.debug( "BAZARR subtitles file is too large to be text based. Skipping this file: " + subtitle_path) continue with open(subtitle_path, 'rb') as f: text = f.read() try: # to improve performance, use only the first 32K to detect encoding guess = chardet.detect(text[:32768]) logging.debug('BAZARR detected encoding %r', guess) if guess["confidence"] < 0.6: raise UnicodeError if guess["confidence"] < 0.8 or guess[ "encoding"] == "ascii": guess["encoding"] = "utf-8" text = text.decode(guess["encoding"]) detected_language = guess_language(text) except UnicodeError: logging.exception( "BAZARR subtitles file doesn't seems to be text based. Skipping this file: " + subtitle_path) except: logging.exception( 'BAZARR Error trying to detect language for this subtitles file: ' + subtitle_path + ' You should try to delete this subtitles file manually and ask ' 'Bazarr to download it again.') else: if detected_language: logging.debug( "BAZARR external subtitles detected and guessed this language: " + str(detected_language)) try: subtitles[subtitle] = Language.rebuild( Language.fromietf(detected_language)) except: pass return subtitles
def get_embedded_subtitle_streams(part, requested_language=None, skip_duplicate_unknown=True, skip_unknown=False): streams = [] streams_unknown = [] has_unknown = False found_requested_language = False for stream in part.streams: # subtitle stream if stream.stream_type == 3 and not stream.stream_key and stream.codec in TEXT_SUBTITLE_EXTS: is_forced = helpers.is_stream_forced(stream) language = helpers.get_language_from_stream(stream.language_code) if language: language = Language.rebuild(language, forced=is_forced) is_unknown = False found_requested_language = requested_language and requested_language == language if not language and config.treat_und_as_first: # only consider first unknown subtitle stream if has_unknown and skip_duplicate_unknown: continue language = Language.rebuild(list(config.lang_list)[0], forced=is_forced) is_unknown = True has_unknown = True streams_unknown.append({"stream": stream, "is_unknown": is_unknown, "language": language, "is_forced": is_forced}) if not requested_language or found_requested_language: streams.append({"stream": stream, "is_unknown": is_unknown, "language": language, "is_forced": is_forced}) if found_requested_language: break if streams_unknown and not found_requested_language and not skip_unknown: streams = streams_unknown return streams
def _get_language_obj(profile_id): initial_language_set = set() language_set = set() # where [3] is items list of dict(id, lang, forced, hi) language_items = get_profiles_list(profile_id=int(profile_id))['items'] for language in language_items: forced = language['forced'] hi = language['hi'] language = language['language'] lang = alpha3_from_alpha2(language) lang_obj = _get_lang_obj(lang) if forced == "True": lang_obj = Language.rebuild(lang_obj, forced=True) if hi == "True": lang_obj = Language.rebuild(lang_obj, hi=True) initial_language_set.add(lang_obj) language_set = initial_language_set.copy() for language in language_set.copy(): lang_obj_for_hi = language if not language.forced and not language.hi: lang_obj_hi = Language.rebuild(lang_obj_for_hi, hi=True) elif not language.forced and language.hi: lang_obj_hi = Language.rebuild(lang_obj_for_hi, hi=False) else: continue language_set.add(lang_obj_hi) return language_set, initial_language_set
def parse_results(self, video, film): subtitles = [] for s in film.subtitles: subtitle = SubsceneSubtitle.from_api(s) subtitle.asked_for_release_group = video.release_group if isinstance(video, Episode): subtitle.asked_for_episode = video.episode if self.only_foreign: subtitle.language = Language.rebuild(subtitle.language, forced=True) subtitles.append(subtitle) logger.debug('Found subtitle %r', subtitle) return subtitles
def parse_results(self, video, film): subtitles = [] for s in film.subtitles: subtitle = SubsceneSubtitle.from_api(s) subtitle.asked_for_release_group = video.release_group if isinstance(video, Episode): subtitle.asked_for_episode = video.episode if self.only_foreign: subtitle.language = Language.rebuild(subtitle.language, forced=True) subtitles.append(subtitle) logger.debug('Found subtitle %r', subtitle) return subtitles
def download_best_subtitles(video_part_map, min_score=0, throttle_time=None, providers=None): hearing_impaired = Prefs['subtitles.search.hearingImpaired'] languages = set([Language.rebuild(l) for l in config.lang_list]) if not languages: return use_videos = [] missing_languages = set() for video, part in video_part_map.iteritems(): if not video.ignore_all: p_missing_languages = get_missing_languages(video, part) else: p_missing_languages = languages if p_missing_languages: Log.Info(u"%s has missing languages: %s", os.path.basename(video.name), p_missing_languages) refine_video(video, refiner_settings=config.refiner_settings) use_videos.append(video) missing_languages.update(p_missing_languages) # prepare blacklist blacklist = get_blacklist_from_part_map(video_part_map, languages) if use_videos and missing_languages: Log.Debug( "Download best subtitles using settings: min_score: %s, hearing_impaired: %s, languages: %s" % (min_score, hearing_impaired, missing_languages)) return subliminal.download_best_subtitles( set(use_videos), missing_languages, min_score, hearing_impaired, providers=providers or config.providers, provider_configs=config.provider_settings, pool_class=config.provider_pool, compute_score=compute_score, throttle_time=throttle_time, blacklist=blacklist, throttle_callback=config.provider_throttle, pre_download_hook=pre_download_hook, post_download_hook=post_download_hook, language_hook=language_hook) Log.Debug("All languages for all requested videos exist. Doing nothing.")
def __init__(self, language, hearing_impaired=False, page_link=None, encoding=None, mods=None): # set subtitle language to hi if it's hearing_impaired if hearing_impaired: language = Language.rebuild(language, hi=True) super(Subtitle, self).__init__(language, hearing_impaired=hearing_impaired, page_link=page_link, encoding=encoding) self.mods = mods self._is_valid = False
def guess_external_subtitles(dest_folder, subtitles): for subtitle, language in six.iteritems(subtitles): if not language: subtitle_path = os.path.join(dest_folder, subtitle) if os.path.exists(subtitle_path) and os.path.splitext( subtitle_path)[1] in core.SUBTITLE_EXTENSIONS: logging.debug( "BAZARR falling back to file content analysis to detect language." ) if is_binary(subtitle_path): logging.debug( "BAZARR subtitles file doesn't seems to be text based. Skipping this file: " + subtitle_path) continue detected_language = None if six.PY3: with open(subtitle_path, 'r', errors='ignore') as f: text = f.read() else: with open(subtitle_path, 'r') as f: text = f.read() try: encoding = UnicodeDammit(text) if six.PY2: text = text.decode(encoding.original_encoding) detected_language = guess_language(text) except Exception as e: logging.exception( 'BAZARR Error trying to detect language for this subtitles file: ' + subtitle_path + ' You should try to delete this subtitles file manually and ask ' 'Bazarr to download it again.') else: if detected_language: logging.debug( "BAZARR external subtitles detected and guessed this language: " + str(detected_language)) try: subtitles[subtitle] = Language.rebuild( Language.fromietf(detected_language)) except: pass return subtitles
def test_list_subtitles_only_forced(video_single_language): with EmbeddedSubtitlesProvider() as provider: language = Language.fromalpha2("en") language = Language.rebuild(language, forced=True) subs = provider.list_subtitles(video_single_language, {language}) assert len(subs) == 0
def query(self, languages, hash=None, size=None, imdb_id=None, query=None, season=None, episode=None, tag=None, use_tag_search=False, only_foreign=False, also_foreign=False): # fill the search criteria criteria = [] if hash and size: criteria.append({'moviehash': hash, 'moviebytesize': str(size)}) if use_tag_search and tag: criteria.append({'tag': tag}) if imdb_id: if season and episode: criteria.append({'imdbid': imdb_id[2:], 'season': season, 'episode': episode}) else: criteria.append({'imdbid': imdb_id[2:]}) if query and season and episode: for q in query: criteria.append({'query': q.replace('\'', ''), 'season': season, 'episode': episode}) elif query: for q in query: criteria.append({'query': q.replace('\'', '')}) if not criteria: raise ValueError('Not enough information') # add the language for criterion in criteria: criterion['sublanguageid'] = ','.join(sorted(l.opensubtitles for l in languages)) # query the server logger.info('Searching subtitles %r', criteria) response = self.use_token_or_login( lambda: self.retry(lambda: checked(lambda: self.server.SearchSubtitles(self.token, criteria))) ) subtitles = [] # exit if no data if not response['data']: logger.info('No subtitles found') return subtitles # loop over subtitle items for subtitle_item in response['data']: _subtitle_item = subtitle_item # in case OS messes their API results up again, check whether we've got a dict or a string as subtitle_item if hasattr(_subtitle_item, "startswith"): _subtitle_item = response["data"][subtitle_item] # read the item language = Language.fromopensubtitles(_subtitle_item['SubLanguageID']) hearing_impaired = bool(int(_subtitle_item['SubHearingImpaired'])) page_link = _subtitle_item['SubtitlesLink'] subtitle_id = int(_subtitle_item['IDSubtitleFile']) matched_by = _subtitle_item['MatchedBy'] movie_kind = _subtitle_item['MovieKind'] hash = _subtitle_item['MovieHash'] movie_name = _subtitle_item['MovieName'] movie_release_name = _subtitle_item['MovieReleaseName'] movie_year = int(_subtitle_item['MovieYear']) if _subtitle_item['MovieYear'] else None movie_imdb_id = 'tt' + _subtitle_item['IDMovieImdb'] movie_fps = _subtitle_item.get('MovieFPS') series_season = int(_subtitle_item['SeriesSeason']) if _subtitle_item['SeriesSeason'] else None series_episode = int(_subtitle_item['SeriesEpisode']) if _subtitle_item['SeriesEpisode'] else None filename = _subtitle_item['SubFileName'] encoding = _subtitle_item.get('SubEncoding') or None foreign_parts_only = bool(int(_subtitle_item.get('SubForeignPartsOnly', 0))) # foreign/forced subtitles only wanted if only_foreign and not foreign_parts_only: continue # foreign/forced not wanted elif not only_foreign and not also_foreign and foreign_parts_only: continue # set subtitle language to forced if it's foreign_parts_only elif (also_foreign or only_foreign) and foreign_parts_only: language = Language.rebuild(language, forced=True) if language not in languages: continue query_parameters = _subtitle_item.get("QueryParameters") subtitle = self.subtitle_class(language, hearing_impaired, page_link, subtitle_id, matched_by, movie_kind, hash, movie_name, movie_release_name, movie_year, movie_imdb_id, series_season, series_episode, query_parameters, filename, encoding, movie_fps, skip_wrong_fps=self.skip_wrong_fps) logger.debug('Found subtitle %r by %s', subtitle, matched_by) subtitles.append(subtitle) return subtitles
def get_lang_list(self, provider=None, ordered=False): # advanced settings if provider and self.advanced.providers and provider in self.advanced.providers: adv_languages = self.advanced.providers[provider].get("languages", None) if adv_languages: adv_out = set() for adv_lang in adv_languages: adv_lang = adv_lang.strip() try: real_lang = Language.fromietf(adv_lang) except: try: real_lang = Language.fromname(adv_lang) except: continue adv_out.update({real_lang}) # fallback to default languages if no valid language was found in advanced settings if adv_out: return adv_out l = [Language.fromietf(Prefs["langPref1a"])] lang_custom = Prefs["langPrefCustom"].strip() if Prefs['subtitles.only_one']: return set(l) if not ordered else l if Prefs["langPref2a"] != "None": try: l.append(Language.fromietf(Prefs["langPref2a"])) except: pass if Prefs["langPref3a"] != "None": try: l.append(Language.fromietf(Prefs["langPref3a"])) except: pass if len(lang_custom) and lang_custom != "None": for lang in lang_custom.split(u","): lang = lang.strip() try: real_lang = Language.fromietf(lang) except: try: real_lang = Language.fromname(lang) except: continue l.append(real_lang) if self.forced_also: if Prefs["subtitles.when_forced"] == "Always": for lang in list(l): l.append(Language.rebuild(lang, forced=True)) else: for (setting, index) in (("Only for Subtitle Language (1)", 0), ("Only for Subtitle Language (2)", 1), ("Only for Subtitle Language (3)", 2)): if Prefs["subtitles.when_forced"] == setting: try: l.append(Language.rebuild(list(l)[index], forced=True)) break except: pass elif self.forced_only: for lang in l: lang.forced = True if not self.normal_subs: for lang in l[:]: if not lang.forced: l.remove(lang) return set(l) if not ordered else l
def query(self, language, keyword, video, season=None, episode=None, year=None, only_foreign=False, also_foreign=False): search_language = str(language).lower() # sr-Cyrl specialcase if search_language == "sr-cyrl": search_language = "sr" # set parameters, see http://www.podnapisi.net/forum/viewtopic.php?f=62&t=26164#p212652 params = {'sXML': 1, 'sL': search_language, 'sK': keyword} is_episode = False if season and episode: is_episode = True params['sTS'] = season params['sTE'] = episode if year: params['sY'] = year # loop over paginated results logger.info('Searching subtitles %r', params) subtitles = [] pids = set() while True: # query the server content = None try: content = self.session.get(self.server_url + 'search/old', params=params, timeout=10).content xml = etree.fromstring(content) except etree.ParseError: logger.error("Wrong data returned: %r", content) break # exit if no results if not int(xml.find('pagination/results').text): logger.debug('No subtitles found') break # loop over subtitles for subtitle_xml in xml.findall('subtitle'): # read xml elements pid = subtitle_xml.find('pid').text # ignore duplicates, see http://www.podnapisi.net/forum/viewtopic.php?f=62&t=26164&start=10#p213321 if pid in pids: continue _language = Language.fromietf(subtitle_xml.find('language').text) hearing_impaired = 'n' in (subtitle_xml.find('flags').text or '') foreign = 'f' in (subtitle_xml.find('flags').text or '') if only_foreign and not foreign: continue elif not only_foreign and not also_foreign and foreign: continue elif also_foreign and foreign: _language = Language.rebuild(_language, forced=True) # set subtitle language to hi if it's hearing_impaired if hearing_impaired: _language = Language.rebuild(_language, hi=True) if language != _language: continue page_link = subtitle_xml.find('url').text releases = [] if subtitle_xml.find('release').text: for release in subtitle_xml.find('release').text.split(): releases.append(re.sub(r'\.+$', '', release)) # remove trailing dots title = subtitle_xml.find('title').text r_season = int(subtitle_xml.find('tvSeason').text) r_episode = int(subtitle_xml.find('tvEpisode').text) r_year = int(subtitle_xml.find('year').text) if is_episode: subtitle = self.subtitle_class(_language, hearing_impaired, page_link, pid, releases, title, season=r_season, episode=r_episode, year=r_year, asked_for_release_group=video.release_group, asked_for_episode=episode) else: subtitle = self.subtitle_class(_language, hearing_impaired, page_link, pid, releases, title, year=r_year, asked_for_release_group=video.release_group) logger.debug('Found subtitle %r', subtitle) subtitles.append(subtitle) pids.add(pid) # stop on last page if int(xml.find('pagination/current').text) >= int(xml.find('pagination/count').text): break # increment current page params['page'] = int(xml.find('pagination/current').text) + 1 logger.debug('Getting page %d', params['page']) xml = None return subtitles
class OpenSubtitlesComProvider(ProviderRetryMixin, Provider): """OpenSubtitlesCom Provider""" server_url = 'https://www.opensubtitles.com/api/v1/' languages = {Language.fromopensubtitles(l) for l in language_converters['szopensubtitles'].codes} languages.update(set(Language.rebuild(l, forced=True) for l in languages)) def __init__(self, username=None, password=None, use_hash=True, api_key=None): if not api_key: raise ConfigurationError('Api_key must be specified') if not all((username, password)): raise ConfigurationError('Username and password must be specified') self.session = Session() self.session.headers = {'User-Agent': os.environ.get("SZ_USER_AGENT", "Sub-Zero/2"), 'Api-Key': api_key, 'Content-Type': 'application/json'} self.token = None self.username = username self.password = password self.video = None self.use_hash = use_hash def initialize(self): self.token = region.get("oscom_token") if self.token: self.session.headers.update({'Authorization': 'Beaker ' + self.token}) return True else: self.login() def terminate(self): self.session.close() def login(self): try: r = self.session.post(self.server_url + 'login', json={"username": self.username, "password": self.password}, allow_redirects=False, timeout=10) except (ConnectionError, Timeout, ReadTimeout): raise ServiceUnavailable('Unknown Error, empty response: %s: %r' % (r.status_code, r)) else: if r.status_code == 200: try: self.token = r.json()['token'] except ValueError: raise ProviderError('Invalid JSON returned by provider') else: self.session.headers.update({'Authorization': 'Beaker ' + self.token}) region.set("oscom_token", self.token) return True elif r.status_code == 401: raise AuthenticationError('Login failed: {}'.format(r.reason)) elif r.status_code == 429: raise TooManyRequests() else: raise ProviderError('Bad status code: {}'.format(r.status_code)) finally: return False @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME) def search_titles(self, title): title_id = None imdb_id = None if isinstance(self.video, Episode) and self.video.series_imdb_id: imdb_id = self.video.series_imdb_id elif isinstance(self.video, Movie) and self.video.imdb_id: imdb_id = self.video.imdb_id if imdb_id: parameters = {'imdb_id': imdb_id} logging.debug('Searching using this IMDB id: {}'.format(imdb_id)) else: parameters = {'query': title} logging.debug('Searching using this title: {}'.format(title)) results = self.session.get(self.server_url + 'features', params=parameters, timeout=10) results.raise_for_status() if results.status_code == 401: logging.debug('Authentification failed: clearing cache and attempting to login.') region.delete("oscom_token") self.login() results = self.session.get(self.server_url + 'features', params=parameters, timeout=10) results.raise_for_status() if results.status_code == 429: raise TooManyRequests() elif results.status_code == 429: raise TooManyRequests() # deserialize results try: results_dict = results.json()['data'] except ValueError: raise ProviderError('Invalid JSON returned by provider') else: # loop over results for result in results_dict: if title.lower() == result['attributes']['title'].lower() and \ (not self.video.year or self.video.year == int(result['attributes']['year'])): title_id = result['id'] break if title_id: logging.debug('Found this title ID: {}'.format(title_id)) return title_id finally: if not title_id: logger.debug('No match found for {}'.format(title)) def query(self, languages, video): self.video = video if self.use_hash: hash = self.video.hashes.get('opensubtitlescom') logging.debug('Searching using this hash: {}'.format(hash)) else: hash = None if isinstance(self.video, Episode): title = self.video.series else: title = self.video.title title_id = self.search_titles(title) if not title_id: return [] lang_strings = [str(lang) for lang in languages] langs = ','.join(lang_strings) logging.debug('Searching for this languages: {}'.format(lang_strings)) # query the server if isinstance(self.video, Episode): res = self.session.get(self.server_url + 'subtitles', params={'parent_feature_id': title_id, 'languages': langs, 'episode_number': self.video.episode, 'season_number': self.video.season, 'moviehash': hash}, timeout=10) else: res = self.session.get(self.server_url + 'subtitles', params={'id': title_id, 'languages': langs, 'moviehash': hash}, timeout=10) res.raise_for_status() if res.status_code == 429: raise TooManyRequests() subtitles = [] try: result = res.json() except ValueError: raise ProviderError('Invalid JSON returned by provider') else: logging.debug('Query returned {} subtitles'.format(len(result['data']))) if len(result['data']): for item in result['data']: if 'season_number' in item['attributes']['feature_details']: season_number = item['attributes']['feature_details']['season_number'] else: season_number = None if 'episode_number' in item['attributes']['feature_details']: episode_number = item['attributes']['feature_details']['episode_number'] else: episode_number = None if 'moviehash_match' in item['attributes']: moviehash_match = item['attributes']['moviehash_match'] else: moviehash_match = False if len(item['attributes']['files']): subtitle = OpenSubtitlesComSubtitle( language=Language.fromietf(item['attributes']['language']), hearing_impaired=item['attributes']['hearing_impaired'], page_link=item['attributes']['url'], file_id=item['attributes']['files'][0]['file_id'], releases=item['attributes']['release'], uploader=item['attributes']['uploader']['name'], title=item['attributes']['feature_details']['movie_name'], year=item['attributes']['feature_details']['year'], season=season_number, episode=episode_number, hash_matched=moviehash_match ) subtitle.get_matches(self.video) subtitles.append(subtitle) return subtitles def list_subtitles(self, video, languages): return self.query(languages, video) def download_subtitle(self, subtitle): logger.info('Downloading subtitle %r', subtitle) headers = {'Accept': 'application/json', 'Content-Type': 'application/json'} res = self.session.post(self.server_url + 'download', json={'file_id': subtitle.file_id, 'sub_format': 'srt'}, headers=headers, timeout=10) res.raise_for_status() if res.status_code == 429: raise TooManyRequests() elif res.status_code == 406: raise DownloadLimitExceeded("Daily download limit reached") else: try: subtitle.download_link = res.json()['link'] except ValueError: raise ProviderError('Invalid JSON returned by provider') else: r = self.session.get(subtitle.download_link, timeout=10) r.raise_for_status() if res.status_code == 429: raise TooManyRequests() elif res.status_code == 406: raise DownloadLimitExceeded("Daily download limit reached") subtitle_content = r.content if subtitle_content: subtitle.content = fix_line_ending(subtitle_content) else: logger.debug('Could not download subtitle from {}'.format(subtitle.download_link))
class OpenSubtitlesProvider(ProviderRetryMixin, _OpenSubtitlesProvider): only_foreign = False also_foreign = False subtitle_class = OpenSubtitlesSubtitle hash_verifiable = True hearing_impaired_verifiable = True skip_wrong_fps = True is_vip = False use_ssl = True timeout = 15 default_url = "//api.opensubtitles.org/xml-rpc" vip_url = "//vip-api.opensubtitles.org/xml-rpc" languages = { Language.fromopensubtitles(l) for l in language_converters['szopensubtitles'].codes } languages.update(set(Language.rebuild(l, forced=True) for l in languages)) languages.update(set(Language.rebuild(l, hi=True) for l in languages)) def __init__(self, username=None, password=None, use_tag_search=False, only_foreign=False, also_foreign=False, skip_wrong_fps=True, is_vip=False, use_ssl=True, timeout=15): if any((username, password)) and not all((username, password)): raise ConfigurationError('Username and password must be specified') self.username = username or '' self.password = password or '' self.use_tag_search = use_tag_search self.only_foreign = only_foreign self.also_foreign = also_foreign self.skip_wrong_fps = skip_wrong_fps self.token = None self.is_vip = is_vip self.use_ssl = use_ssl self.timeout = timeout logger.debug("Using timeout: %d", timeout) if use_ssl: logger.debug("Using HTTPS connection") self.default_url = ("https:" if use_ssl else "http:") + self.default_url self.vip_url = ("https:" if use_ssl else "http:") + self.vip_url if use_tag_search: logger.info("Using tag/exact filename search") if only_foreign: logger.info("Only searching for foreign/forced subtitles") def get_server_proxy(self, url, timeout=None): return ServerProxy( url, SubZeroRequestsTransport(use_https=self.use_ssl, timeout=timeout or self.timeout, user_agent=os.environ.get( "SZ_USER_AGENT", "Sub-Zero/2"))) def log_in_url(self, server_url): self.token = None self.server = self.get_server_proxy(server_url) response = self.retry(lambda: checked(lambda: self.server.LogIn( self.username, self.password, 'eng', os.environ.get("SZ_USER_AGENT", "Sub-Zero/2")))) self.token = response['token'] logger.debug('Logged in with token %r', self.token[:10] + "X" * (len(self.token) - 10)) region.set("os_token", bytearray(self.token, encoding='utf-8')) region.set("os_server_url", bytearray(server_url, encoding='utf-8')) def log_in(self): logger.info('Logging in') try: self.log_in_url(self.vip_url if self.is_vip else self.default_url) except Unauthorized: if self.is_vip: logger.info("VIP server login failed, falling back") try: self.log_in_url(self.default_url) except Unauthorized: pass if not self.token: logger.error("Login failed, please check your credentials") raise Unauthorized def use_token_or_login(self, func): if not self.token: self.log_in() return func() try: return func() except Unauthorized: self.log_in() return func() def initialize(self): token_cache = region.get("os_token") url_cache = region.get("os_server_url") if token_cache is not NO_VALUE and url_cache is not NO_VALUE: self.token = token_cache.decode("utf-8") self.server = self.get_server_proxy(url_cache.decode("utf-8")) logger.debug("Using previous login token: %r", self.token[:10] + "X" * (len(self.token) - 10)) else: self.server = None self.token = None def terminate(self): self.server = None self.token = None def list_subtitles(self, video, languages): """ :param video: :param languages: :return: patch: query movies even if hash is known; add tag parameter """ season = episode = None if isinstance(video, Episode): query = [video.series] + video.alternative_series season = video.season episode = episode = min(video.episode) if isinstance( video.episode, list) else video.episode if video.is_special: season = None episode = None query = [ u"%s %s" % (series, video.title) for series in [video.series] + video.alternative_series ] logger.info("%s: Searching for special: %r", self.__class__, query) # elif ('opensubtitles' not in video.hashes or not video.size) and not video.imdb_id: # query = video.name.split(os.sep)[-1] else: query = [video.title] + video.alternative_titles if isinstance(video, Episode): imdb_id = video.series_imdb_id else: imdb_id = video.imdb_id return self.query(video, languages, hash=video.hashes.get('opensubtitles'), size=video.size, imdb_id=imdb_id, query=query, season=season, episode=episode, tag=video.original_name, use_tag_search=self.use_tag_search, only_foreign=self.only_foreign, also_foreign=self.also_foreign) def query(self, video, languages, hash=None, size=None, imdb_id=None, query=None, season=None, episode=None, tag=None, use_tag_search=False, only_foreign=False, also_foreign=False): # fill the search criteria criteria = [] if hash and size: criteria.append({'moviehash': hash, 'moviebytesize': str(size)}) if use_tag_search and tag: criteria.append({'tag': tag}) if imdb_id: if season and episode: criteria.append({ 'imdbid': imdb_id[2:], 'season': season, 'episode': episode }) else: criteria.append({'imdbid': imdb_id[2:]}) # Commented out after the issue with episode released after October 17th 2020. # if query and season and episode: # for q in query: # criteria.append({'query': q.replace('\'', ''), 'season': season, 'episode': episode}) # elif query: # for q in query: # criteria.append({'query': q.replace('\'', '')}) if not criteria: raise ValueError('Not enough information') # add the language for criterion in criteria: criterion['sublanguageid'] = ','.join( sorted(l.opensubtitles for l in languages)) # query the server logger.info('Searching subtitles %r', criteria) response = self.use_token_or_login(lambda: self.retry(lambda: checked( lambda: self.server.SearchSubtitles(self.token, criteria)))) subtitles = [] # exit if no data if not response['data']: logger.info('No subtitles found') return subtitles # loop over subtitle items for subtitle_item in response['data']: _subtitle_item = subtitle_item # in case OS messes their API results up again, check whether we've got a dict or a string as subtitle_item if hasattr(_subtitle_item, "startswith"): _subtitle_item = response["data"][subtitle_item] # read the item language = Language.fromopensubtitles( _subtitle_item['SubLanguageID']) hearing_impaired = bool(int(_subtitle_item['SubHearingImpaired'])) page_link = _subtitle_item['SubtitlesLink'] subtitle_id = int(_subtitle_item['IDSubtitleFile']) matched_by = _subtitle_item['MatchedBy'] movie_kind = _subtitle_item['MovieKind'] hash = _subtitle_item['MovieHash'] movie_name = _subtitle_item['MovieName'] movie_release_name = _subtitle_item['MovieReleaseName'] movie_year = int(_subtitle_item['MovieYear'] ) if _subtitle_item['MovieYear'] else None if season or episode: movie_imdb_id = 'tt' + _subtitle_item['SeriesIMDBParent'] else: movie_imdb_id = 'tt' + _subtitle_item['IDMovieImdb'] movie_fps = _subtitle_item.get('MovieFPS') series_season = int(_subtitle_item['SeriesSeason'] ) if _subtitle_item['SeriesSeason'] else None series_episode = int(_subtitle_item['SeriesEpisode'] ) if _subtitle_item['SeriesEpisode'] else None filename = _subtitle_item['SubFileName'] encoding = _subtitle_item.get('SubEncoding') or None foreign_parts_only = bool( int(_subtitle_item.get('SubForeignPartsOnly', 0))) # foreign/forced subtitles only wanted if only_foreign and not foreign_parts_only: continue # foreign/forced not wanted elif not only_foreign and not also_foreign and foreign_parts_only: continue # set subtitle language to forced if it's foreign_parts_only elif (also_foreign or only_foreign) and foreign_parts_only: language = Language.rebuild(language, forced=True) # set subtitle language to hi if it's hearing_impaired if hearing_impaired: language = Language.rebuild(language, hi=True) if language not in languages: continue if video.imdb_id and (movie_imdb_id != re.sub( "(?<![^a-zA-Z])0+", "", video.imdb_id)): continue query_parameters = _subtitle_item.get("QueryParameters") subtitle = self.subtitle_class(language, hearing_impaired, page_link, subtitle_id, matched_by, movie_kind, hash, movie_name, movie_release_name, movie_year, movie_imdb_id, series_season, series_episode, query_parameters, filename, encoding, movie_fps, skip_wrong_fps=self.skip_wrong_fps) subtitle.uploader = _subtitle_item[ 'UserNickName'] if _subtitle_item[ 'UserNickName'] else 'anonymous' logger.debug('Found subtitle %r by %s', subtitle, matched_by) subtitles.append(subtitle) return subtitles def download_subtitle(self, subtitle): logger.info('Downloading subtitle %r', subtitle) response = self.use_token_or_login( lambda: checked(lambda: self.server.DownloadSubtitles( self.token, [str(subtitle.subtitle_id)]))) subtitle.content = fix_line_ending( zlib.decompress(base64.b64decode(response['data'][0]['data']), 47))
class SubsceneProvider(Provider, ProviderSubtitleArchiveMixin): """ This currently only searches for the filename on SubScene. It doesn't open every found subtitle page to avoid massive hammering, thus it can't determine whether a subtitle is only-foreign or not. """ subtitle_class = SubsceneSubtitle languages = supported_languages languages.update(set(Language.rebuild(l, forced=True) for l in languages)) session = None skip_wrong_fps = False hearing_impaired_verifiable = True only_foreign = False username = None password = None search_throttle = 5 # seconds def __init__(self, only_foreign=False, username=None, password=None): if not all((username, password)): raise ConfigurationError('Username and password must be specified') self.only_foreign = only_foreign self.username = username self.password = password def initialize(self): logger.info("Creating session") self.session = RetryingCFSession() def login(self): r = self.session.get("https://subscene.com/account/login") if "Server Error" in r.content: logger.error("Login unavailable; Maintenance?") raise ServiceUnavailable("Login unavailable; Maintenance?") match = re.search(r"<script id='modelJson' type='application/json'>\s*(.+)\s*</script>", r.content) if match: h = HTMLParser.HTMLParser() data = json.loads(h.unescape(match.group(1))) login_url = urlparse.urljoin(data["siteUrl"], data["loginUrl"]) time.sleep(1.0) r = self.session.post(login_url, { "username": self.username, "password": self.password, data["antiForgery"]["name"]: data["antiForgery"]["value"] }) pep_content = re.search(r"<form method=\"post\" action=\"https://subscene\.com/\">" r".+name=\"id_token\".+?value=\"(?P<id_token>.+?)\".*?" r"access_token\".+?value=\"(?P<access_token>.+?)\".+?" r"token_type.+?value=\"(?P<token_type>.+?)\".+?" r"expires_in.+?value=\"(?P<expires_in>.+?)\".+?" r"scope.+?value=\"(?P<scope>.+?)\".+?" r"state.+?value=\"(?P<state>.+?)\".+?" r"session_state.+?value=\"(?P<session_state>.+?)\"", r.content, re.MULTILINE | re.DOTALL) if pep_content: r = self.session.post(SITE_DOMAIN, pep_content.groupdict()) try: r.raise_for_status() except Exception: raise ProviderError("Something went wrong when trying to log in: %s", traceback.format_exc()) else: cj = self.session.cookies.copy() store_cks = ("scene", "idsrv", "idsrv.xsrf", "idsvr.clients", "idsvr.session", "idsvr.username") for cn in self.session.cookies.iterkeys(): if cn not in store_cks: del cj[cn] logger.debug("Storing cookies: %r", cj) region.set("subscene_cookies2", cj) return raise ProviderError("Something went wrong when trying to log in #1") def terminate(self): logger.info("Closing session") self.session.close() def _create_filters(self, languages): self.filters = dict(HearingImpaired="2") if self.only_foreign: self.filters["ForeignOnly"] = "True" logger.info("Only searching for foreign/forced subtitles") self.filters["LanguageFilter"] = ",".join((str(language_ids[l.alpha3]) for l in languages if l.alpha3 in language_ids)) logger.debug("Filter created: '%s'" % self.filters) def _enable_filters(self): self.session.cookies.update(self.filters) logger.debug("Filters applied") def list_subtitles(self, video, languages): if not video.original_name: logger.info("Skipping search because we don't know the original release name") return [] self._create_filters(languages) self._enable_filters() return [s for s in self.query(video) if s.language in languages] def download_subtitle(self, subtitle): if subtitle.pack_data: logger.info("Using previously downloaded pack data") archive = ZipFile(io.BytesIO(subtitle.pack_data)) subtitle.pack_data = None try: subtitle.content = self.get_subtitle_from_archive(subtitle, archive) return except ProviderError: pass # open the archive r = self.session.get(subtitle.get_download_link(self.session), timeout=10) r.raise_for_status() archive_stream = io.BytesIO(r.content) archive = ZipFile(archive_stream) subtitle.content = self.get_subtitle_from_archive(subtitle, archive) # store archive as pack_data for later caching subtitle.pack_data = r.content def parse_results(self, video, film): subtitles = [] for s in film.subtitles: try: subtitle = SubsceneSubtitle.from_api(s) except NotImplementedError, e: logger.info(e) continue subtitle.asked_for_release_group = video.release_group if isinstance(video, Episode): subtitle.asked_for_episode = video.episode if self.only_foreign: subtitle.language = Language.rebuild(subtitle.language, forced=True) subtitles.append(subtitle) logger.debug('Found subtitle %r', subtitle) return subtitles
def query(self, language, keyword, video, season=None, episode=None, year=None, only_foreign=False, also_foreign=False): search_language = str(language).lower() # sr-Cyrl specialcase if search_language == "sr-cyrl": search_language = "sr" # set parameters, see http://www.podnapisi.net/forum/viewtopic.php?f=62&t=26164#p212652 params = {'sXML': 1, 'sL': search_language, 'sK': keyword} is_episode = False if season and episode: is_episode = True params['sTS'] = season params['sTE'] = episode if year: params['sY'] = year # loop over paginated results logger.info('Searching subtitles %r', params) subtitles = [] pids = set() while True: # query the server content = None try: content = self.session.get(self.server_url + 'search/old', params=params, timeout=10).content xml = etree.fromstring(content) except etree.ParseError: logger.error("Wrong data returned: %r", content) break # exit if no results if not int(xml.find('pagination/results').text): logger.debug('No subtitles found') break # loop over subtitles for subtitle_xml in xml.findall('subtitle'): # read xml elements pid = subtitle_xml.find('pid').text # ignore duplicates, see http://www.podnapisi.net/forum/viewtopic.php?f=62&t=26164&start=10#p213321 if pid in pids: continue _language = Language.fromietf(subtitle_xml.find('language').text) hearing_impaired = 'n' in (subtitle_xml.find('flags').text or '') foreign = 'f' in (subtitle_xml.find('flags').text or '') if only_foreign and not foreign: continue elif not only_foreign and not also_foreign and foreign: continue elif also_foreign and foreign: _language = Language.rebuild(_language, forced=True) if language != _language: continue page_link = subtitle_xml.find('url').text releases = [] if subtitle_xml.find('release').text: for release in subtitle_xml.find('release').text.split(): releases.append(re.sub(r'\.+$', '', release)) # remove trailing dots title = subtitle_xml.find('title').text r_season = int(subtitle_xml.find('tvSeason').text) r_episode = int(subtitle_xml.find('tvEpisode').text) r_year = int(subtitle_xml.find('year').text) if is_episode: subtitle = self.subtitle_class(_language, hearing_impaired, page_link, pid, releases, title, season=r_season, episode=r_episode, year=r_year, asked_for_release_group=video.release_group, asked_for_episode=episode) else: subtitle = self.subtitle_class(_language, hearing_impaired, page_link, pid, releases, title, year=r_year, asked_for_release_group=video.release_group) logger.debug('Found subtitle %r', subtitle) subtitles.append(subtitle) pids.add(pid) # stop on last page if int(xml.find('pagination/current').text) >= int(xml.find('pagination/count').text): break # increment current page params['page'] = int(xml.find('pagination/current').text) + 1 logger.debug('Getting page %d', params['page']) xml = None return subtitles
class TitrariProvider(Provider, ProviderSubtitleArchiveMixin): subtitle_class = TitrariSubtitle languages = {Language(lang) for lang in ['ron', 'eng']} languages.update(set(Language.rebuild(lang, forced=True) for lang in languages)) video_types = (Episode, Movie) api_url = 'https://www.titrari.ro/' query_advanced_search = 'cautarepreaavansata' def __init__(self): self.session = None def initialize(self): self.session = Session() # Hardcoding the UA to bypass the 30s throttle that titrari.ro uses for IP/UA pair self.session.headers['User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, ' \ 'like Gecko) Chrome/93.0.4535.2 Safari/537.36' # self.session.headers['User-Agent'] = AGENT_LIST[randint(0, len(AGENT_LIST) - 1)] def terminate(self): self.session.close() def query(self, language=None, title=None, imdb_id=None, video=None): subtitles = [] params = self.getQueryParams(imdb_id, title, language) search_response = self.session.get(self.api_url, params=params, timeout=15) search_response.raise_for_status() if not search_response.content: logger.debug('No data returned from provider') return [] soup = ParserBeautifulSoup(search_response.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser']) # loop over subtitle cells rows = soup.select('td[rowspan="5"]') for index, row in enumerate(rows): result_anchor_el = row.select_one('a') # Download link href = result_anchor_el.get('href') download_link = self.api_url + href fullTitle = row.parent.select('h1 a')[0].text # Get title try: title = fullTitle.split("(")[0] except: logger.error("Error parsing title") # Get downloads count downloads = 0 try: downloads = int(row.parent.parent.select('span')[index].text[12:]) except: logger.error("Error parsing downloads") # Get year try: year = int(fullTitle.split("(")[1].split(")")[0]) except: year = None logger.error("Error parsing year") # Get imdbId sub_imdb_id = self.getImdbIdFromSubtitle(row) comments = '' try: comments = row.parent.parent.select('.comment')[1].text except: logger.error("Error parsing comments") # Get page_link try: page_link = self.api_url + row.parent.select('h1 a')[0].get('href') except: logger.error("Error parsing page_link") # Get uploader try: uploader = row.parent.select('td.row1.stanga a')[-1].text except: logger.error("Error parsing uploader") episode_number = video.episode if isinstance(video, Episode) else None subtitle = self.subtitle_class(language, download_link, index, comments, title, sub_imdb_id, page_link, uploader, year, downloads, isinstance(video, Episode), episode_number) logger.debug('Found subtitle %r', str(subtitle)) subtitles.append(subtitle) ordered_subs = self.order(subtitles) return ordered_subs @staticmethod def order(subtitles): logger.debug("Sorting by download count...") sorted_subs = sorted(subtitles, key=lambda s: s.download_count, reverse=True) return sorted_subs @staticmethod def getImdbIdFromSubtitle(row): imdbId = None try: imdbId = row.parent.parent.find_all(src=re.compile("imdb"))[0].parent.get('href').split("tt")[-1] except: logger.error("Error parsing imdb id") if imdbId is not None: return "tt" + imdbId else: return None # titrari.ro seems to require all parameters now # z2 = comment (empty) # z3 = fps (-1: any, 0: N/A, 1: 23.97 FPS etc.) # z4 = CD count (-1: any) # z5 = imdb_id (empty or integer) # z6 = sort order (0: unsorted, 1: by date, 2: by name) # z7 = title (empty or string) # z8 = language (-1: all, 1: ron, 2: eng) # z9 = genre (All: all, Action: action etc.) # z11 = type (0: any, 1: movie, 2: series) def getQueryParams(self, imdb_id, title, language): queryParams = { 'page': self.query_advanced_search, 'z7': '', 'z2': '', 'z5': '', 'z3': '-1', 'z4': '-1', 'z8': '-1', 'z9': 'All', 'z11': '0', 'z6': '0' } if imdb_id is not None: queryParams["z5"] = imdb_id elif title is not None: queryParams["z7"] = title if language == 'ro': queryParams["z8"] = '1' elif language == 'en': queryParams["z8"] = '2' return queryParams def list_subtitles(self, video, languages): title = fix_inconsistent_naming(video.title) imdb_id = None try: if isinstance(video, Episode): imdb_id = video.series_imdb_id[2:] else: imdb_id = video.imdb_id[2:] except: logger.error('Error parsing imdb_id from video object {}'.format(str(video))) subtitles = [s for lang in languages for s in self.query(lang, title, imdb_id, video)] return subtitles def download_subtitle(self, subtitle): r = self.session.get(subtitle.download_link, headers={'Referer': self.api_url}, timeout=10) r.raise_for_status() # open the archive archive_stream = io.BytesIO(r.content) if is_rarfile(archive_stream): logger.debug('Archive identified as RAR') archive = RarFile(archive_stream) elif is_zipfile(archive_stream): logger.debug('Archive identified as ZIP') archive = ZipFile(archive_stream) else: subtitle.content = r.content if subtitle.is_valid(): return subtitle.content = None raise ProviderError('Unidentified archive type') if subtitle.is_episode: subtitle.content = self._get_subtitle_from_archive(subtitle, archive) else: subtitle.content = self.get_subtitle_from_archive(subtitle, archive) @staticmethod def _get_subtitle_from_archive(subtitle, archive): for name in archive.namelist(): # discard hidden files if os.path.split(name)[-1].startswith('.'): continue # discard non-subtitle files if not name.lower().endswith(SUBTITLE_EXTENSIONS): continue _guess = guessit(name) if subtitle.desired_episode == _guess['episode']: return archive.read(name) return None
class SubtitrarinoiProvider(Provider, ProviderSubtitleArchiveMixin): subtitle_class = SubtitrarinoiSubtitle languages = {Language(lang) for lang in ['ron']} languages.update( set(Language.rebuild(lang, forced=True) for lang in languages)) video_types = (Episode, Movie) server_url = 'https://www.subtitrari-noi.ro/' api_url = server_url + 'paginare_filme.php' def __init__(self): self.session = None def initialize(self): self.session = Session() self.session.headers[ 'User-Agent'] = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4535.2 Safari/537.36' self.session.headers['X-Requested-With'] = 'XMLHttpRequest' self.session.headers['Referer'] = self.server_url def terminate(self): self.session.close() def query(self, languages=None, title=None, imdb_id=None, video=None): subtitles = [] params = self.getQueryParams(imdb_id, title) search_response = self.session.post(self.api_url, data=params, timeout=15) search_response.raise_for_status() soup = ParserBeautifulSoup( search_response.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser']) # loop over subtitle cells rows = soup.select('div[id="round"]') if len(rows) == 0: logger.debug('No data returned from provider') return [] # release comments are outside of the parent for the sub details itself, so we just map it to another list comment_rows = soup.findAll('div', attrs={ 'class': None, 'id': None, 'align': None }) for index, row in enumerate(rows): result_anchor_el = row.select_one('.buton').select('a') # Download link href = result_anchor_el[0]['href'] download_link = self.server_url + href fullTitle = row.select_one('#content-main a').text # Get title try: title = fullTitle.split("(")[0] except: logger.error("Error parsing title") # Get Uploader try: uploader = row.select('#content-main p')[4].text[10:] except: logger.error("Error parsing uploader") # Get downloads count downloads = 0 try: downloads = int(row.select_one('#content-right p').text[12:]) except: logger.error("Error parsing downloads") # Get year try: year = int(fullTitle.split("(")[1].split(")")[0]) except: year = None logger.error("Error parsing year") # Get imdbId sub_imdb_id = self.getImdbIdFromSubtitle(row) comments = '' try: comments = comment_rows[index].text logger.debug('Comments: {}'.format(comments)) except: logger.error("Error parsing comments") # Get Page Link try: page_link = row.select_one('#content-main a')['href'] except: logger.error("Error parsing page_link") episode_number = video.episode if isinstance(video, Episode) else None subtitle = self.subtitle_class(next(iter(languages)), download_link, index, comments, title, sub_imdb_id, uploader, page_link, year, downloads, isinstance(video, Episode), episode_number) logger.debug('Found subtitle %r', str(subtitle)) subtitles.append(subtitle) ordered_subs = self.order(subtitles) return ordered_subs @staticmethod def order(subtitles): logger.debug("Sorting by download count...") sorted_subs = sorted(subtitles, key=lambda s: s.download_count, reverse=True) return sorted_subs @staticmethod def getImdbIdFromSubtitle(row): imdbId = None try: imdbId = row.select('div[id=content-right] a')[-1].find_all( src=re.compile("imdb"))[0].parent.get('href').split("tt")[-1] except: logger.error("Error parsing imdb id") if imdbId is not None: return "tt" + imdbId else: return None # subtitrari-noi.ro params # info: there seems to be no way to do an advanced search by imdb_id or title # the page seems to populate both "search_q" and "cautare" with the same value # search_q = ? # cautare = search string # tip = type of search (0: premiere - doesn't return anything, 1: series only, 2: both, I think, not sure on that) # an = year # gen = genre def getQueryParams(self, imdb_id, title): queryParams = { 'search_q': '1', 'tip': '2', 'an': 'Toti anii', 'gen': 'Toate', } if imdb_id is not None: queryParams["cautare"] = imdb_id elif title is not None: queryParams["cautare"] = title queryParams["query_q"] = queryParams["cautare"] return queryParams def list_subtitles(self, video, languages): title = fix_inconsistent_naming(video.title) imdb_id = None try: if isinstance(video, Episode): imdb_id = video.series_imdb_id[2:] else: imdb_id = video.imdb_id[2:] except: logger.error('Error parsing imdb_id from video object {}'.format( str(video))) subtitles = [s for s in self.query(languages, title, imdb_id, video)] return subtitles def download_subtitle(self, subtitle): r = self.session.get(subtitle.download_link, headers={'Referer': self.api_url}, timeout=10) r.raise_for_status() # open the archive archive_stream = io.BytesIO(r.content) if is_rarfile(archive_stream): logger.debug('Archive identified as rar') archive = RarFile(archive_stream) elif is_zipfile(archive_stream): logger.debug('Archive identified as zip') archive = ZipFile(archive_stream) else: subtitle.content = r.content if subtitle.is_valid(): return subtitle.content = None raise ProviderError('Unidentified archive type') if subtitle.is_episode: subtitle.content = self._get_subtitle_from_archive( subtitle, archive) else: subtitle.content = self.get_subtitle_from_archive( subtitle, archive) @staticmethod def _get_subtitle_from_archive(subtitle, archive): for name in archive.namelist(): # discard hidden files if os.path.split(name)[-1].startswith('.'): continue # discard non-subtitle files if not name.lower().endswith(SUBTITLE_EXTENSIONS): continue _guess = guessit(name) if subtitle.desired_episode == _guess['episode']: return archive.read(name) return None
def manual_upload_subtitle(path, language, forced, hi, title, scene_name, media_type, subtitle, audio_language): logging.debug('BAZARR Manually uploading subtitles for this file: ' + path) single = settings.general.getboolean('single_language') use_postprocessing = settings.general.getboolean('use_postprocessing') postprocessing_cmd = settings.general.postprocessing_cmd chmod = int(settings.general.chmod, 8) if not sys.platform.startswith( 'win') and settings.general.getboolean('chmod_enabled') else None language = alpha3_from_alpha2(language) custom = CustomLanguage.from_value(language, "alpha3") if custom is None: lang_obj = Language(language) else: lang_obj = custom.subzero_language() if forced: lang_obj = Language.rebuild(lang_obj, forced=True) sub = Subtitle( lang_obj, mods=get_array_from(settings.general.subzero_mods) ) sub.content = subtitle.read() if not sub.is_valid(): logging.exception('BAZARR Invalid subtitle file: ' + subtitle.filename) sub.mods = None if settings.general.getboolean('utf8_encode'): sub.set_encoding("utf-8") saved_subtitles = [] try: saved_subtitles = save_subtitles(path, [sub], single=single, tags=None, # fixme directory=get_target_folder(path), chmod=chmod, # formats=("srt", "vtt") path_decoder=force_unicode) except Exception: logging.exception('BAZARR Error saving Subtitles file to disk for this file:' + path) return if len(saved_subtitles) < 1: logging.exception('BAZARR Error saving Subtitles file to disk for this file:' + path) return subtitle_path = saved_subtitles[0].storage_path if hi: modifier_string = " HI" elif forced: modifier_string = " forced" else: modifier_string = "" message = language_from_alpha3(language) + modifier_string + " Subtitles manually uploaded." if hi: modifier_code = ":hi" elif forced: modifier_code = ":forced" else: modifier_code = "" uploaded_language_code3 = language + modifier_code uploaded_language = language_from_alpha3(language) + modifier_string uploaded_language_code2 = alpha2_from_alpha3(language) + modifier_code audio_language_code2 = alpha2_from_language(audio_language) audio_language_code3 = alpha3_from_language(audio_language) if media_type == 'series': episode_metadata = TableEpisodes.select(TableEpisodes.sonarrSeriesId, TableEpisodes.sonarrEpisodeId) \ .where(TableEpisodes.path == path_mappings.path_replace_reverse(path)) \ .dicts() \ .get_or_none() if not episode_metadata: return series_id = episode_metadata['sonarrSeriesId'] episode_id = episode_metadata['sonarrEpisodeId'] sync_subtitles(video_path=path, srt_path=subtitle_path, srt_lang=uploaded_language_code2, media_type=media_type, percent_score=100, sonarr_series_id=episode_metadata['sonarrSeriesId'], forced=forced, sonarr_episode_id=episode_metadata['sonarrEpisodeId']) else: movie_metadata = TableMovies.select(TableMovies.radarrId) \ .where(TableMovies.path == path_mappings.path_replace_reverse_movie(path)) \ .dicts() \ .get_or_none() if not movie_metadata: return series_id = "" episode_id = movie_metadata['radarrId'] sync_subtitles(video_path=path, srt_path=subtitle_path, srt_lang=uploaded_language_code2, media_type=media_type, percent_score=100, radarr_id=movie_metadata['radarrId'], forced=forced) if use_postprocessing: command = pp_replace(postprocessing_cmd, path, subtitle_path, uploaded_language, uploaded_language_code2, uploaded_language_code3, audio_language, audio_language_code2, audio_language_code3, forced, 100, "1", "manual", series_id, episode_id, hi=hi) postprocessing(command, path) if media_type == 'series': reversed_path = path_mappings.path_replace_reverse(path) reversed_subtitles_path = path_mappings.path_replace_reverse(subtitle_path) notify_sonarr(episode_metadata['sonarrSeriesId']) event_stream(type='series', action='update', payload=episode_metadata['sonarrSeriesId']) event_stream(type='episode-wanted', action='delete', payload=episode_metadata['sonarrEpisodeId']) else: reversed_path = path_mappings.path_replace_reverse_movie(path) reversed_subtitles_path = path_mappings.path_replace_reverse_movie(subtitle_path) notify_radarr(movie_metadata['radarrId']) event_stream(type='movie', action='update', payload=movie_metadata['radarrId']) event_stream(type='movie-wanted', action='delete', payload=movie_metadata['radarrId']) return message, reversed_path, reversed_subtitles_path
def item_discover_missing_subs(rating_key, kind="show", added_at=None, section_title=None, internal=False, external=True, languages=()): item_id = int(rating_key) item = get_item(rating_key) if kind == "show": item_title = get_plex_item_display_title(item, kind, parent=item.season, section_title=section_title, parent_title=item.show.title) else: item_title = get_plex_item_display_title(item, kind, section_title=section_title) subtitle_storage = get_subtitle_storage() stored_subs = subtitle_storage.load(rating_key) subtitle_storage.destroy() subtitle_target_dir, tdir_is_absolute = config.subtitle_sub_dir missing = set() languages_set = set([Language.rebuild(l) for l in languages]) for media in item.media: existing_subs = {"internal": [], "external": [], "own_external": [], "count": 0} for part in media.parts: # did we already download an external subtitle before? if subtitle_target_dir and stored_subs: for language in languages_set: if has_external_subtitle(part.id, stored_subs, language): # check the existence of the actual subtitle file # get media filename without extension part_basename = os.path.splitext(os.path.basename(part.file))[0] # compute target directory for subtitle # fixme: move to central location if tdir_is_absolute: possible_subtitle_path_base = subtitle_target_dir else: possible_subtitle_path_base = os.path.join(os.path.dirname(part.file), subtitle_target_dir) possible_subtitle_path_base = os.path.realpath(possible_subtitle_path_base) # folder actually exists? if not os.path.isdir(possible_subtitle_path_base): continue found_any = False for ext in config.subtitle_formats: if cast_bool(Prefs['subtitles.only_one']): possible_subtitle_path = os.path.join(possible_subtitle_path_base, u"%s.%s" % (part_basename, ext)) else: possible_subtitle_path = os.path.join(possible_subtitle_path_base, u"%s.%s.%s" % (part_basename, language, ext)) # check for subtitle existence if os.path.isfile(possible_subtitle_path): found_any = True Log.Debug(u"Found: %s", possible_subtitle_path) break if found_any: existing_subs["own_external"].append(language) existing_subs["count"] = existing_subs["count"] + 1 for stream in part.streams: if stream.stream_type == 3: is_forced = is_stream_forced(stream) if stream.index: key = "internal" else: key = "external" if not config.exotic_ext and stream.codec.lower() not in TEXT_SUBTITLE_EXTS: continue # treat unknown language as lang1? if not stream.language_code and config.treat_und_as_first: lang = Language.rebuild(list(config.lang_list)[0]) # we can't parse empty language codes elif not stream.language_code or not stream.codec: continue else: # parse with internal language parser first try: lang = get_language_from_stream(stream.language_code) if not lang: if config.treat_und_as_first: lang = Language.rebuild(list(config.lang_list)[0]) else: continue except (ValueError, LanguageReverseError): continue if lang: # Log.Debug("Found babelfish language: %r", lang) lang.forced = is_forced existing_subs[key].append(lang) existing_subs["count"] = existing_subs["count"] + 1 missing_from_part = set([Language.rebuild(l) for l in languages]) if existing_subs["count"]: # fixme: this is actually somewhat broken with IETF, as Plex doesn't store the country portion # (pt instead of pt-BR) inside the database. So it might actually download pt-BR if there's a local pt-BR # subtitle but not our own. existing_flat = set((existing_subs["internal"] if internal else []) + (existing_subs["external"] if external else []) + existing_subs["own_external"]) check_languages = set([Language.rebuild(l) for l in languages]) alpha3_map = {} if config.ietf_as_alpha3: for language in existing_flat: if language.country: alpha3_map[language.alpha3] = language.country language.country = None for language in check_languages: if language.country: alpha3_map[language.alpha3] = language.country language.country = None # compare sets of strings, not sets of different Language instances check_languages_str = set(str(l) for l in check_languages) existing_flat_str = set(str(l) for l in existing_flat) if check_languages_str.issubset(existing_flat_str) or \ (len(existing_flat) >= 1 and Prefs['subtitles.only_one']): # all subs found #Log.Info(u"All subtitles exist for '%s'", item_title) continue missing_from_part = set(Language.fromietf(l) for l in check_languages_str - existing_flat_str) if config.ietf_as_alpha3: for language in missing_from_part: language.country = alpha3_map.get(language.alpha3, None) if missing_from_part: Log.Info(u"Subs still missing for '%s' (%s: %s): %s", item_title, rating_key, media.id, missing_from_part) missing.update(missing_from_part) if missing: # deduplicate missing = set(Language.fromietf(la) for la in set(str(l) for l in missing)) return added_at, item_id, item_title, item, missing
def query(self, video, languages, hash=None, size=None, imdb_id=None, query=None, season=None, episode=None, tag=None, use_tag_search=False, only_foreign=False, also_foreign=False): # fill the search criteria criteria = [] if hash and size: criteria.append({'moviehash': hash, 'moviebytesize': str(size)}) if use_tag_search and tag: criteria.append({'tag': tag}) if imdb_id: if season and episode: criteria.append({ 'imdbid': imdb_id[2:], 'season': season, 'episode': episode }) else: criteria.append({'imdbid': imdb_id[2:]}) # Commented out after the issue with episode released after October 17th 2020. # if query and season and episode: # for q in query: # criteria.append({'query': q.replace('\'', ''), 'season': season, 'episode': episode}) # elif query: # for q in query: # criteria.append({'query': q.replace('\'', '')}) if not criteria: raise ValueError('Not enough information') # add the language for criterion in criteria: criterion['sublanguageid'] = ','.join( sorted(l.opensubtitles for l in languages)) # query the server logger.info('Searching subtitles %r', criteria) response = self.use_token_or_login(lambda: self.retry(lambda: checked( lambda: self.server.SearchSubtitles(self.token, criteria)))) subtitles = [] # exit if no data if not response['data']: logger.info('No subtitles found') return subtitles # loop over subtitle items for subtitle_item in response['data']: _subtitle_item = subtitle_item # in case OS messes their API results up again, check whether we've got a dict or a string as subtitle_item if hasattr(_subtitle_item, "startswith"): _subtitle_item = response["data"][subtitle_item] # read the item language = Language.fromopensubtitles( _subtitle_item['SubLanguageID']) hearing_impaired = bool(int(_subtitle_item['SubHearingImpaired'])) page_link = _subtitle_item['SubtitlesLink'] subtitle_id = int(_subtitle_item['IDSubtitleFile']) matched_by = _subtitle_item['MatchedBy'] movie_kind = _subtitle_item['MovieKind'] hash = _subtitle_item['MovieHash'] movie_name = _subtitle_item['MovieName'] movie_release_name = _subtitle_item['MovieReleaseName'] movie_year = int(_subtitle_item['MovieYear'] ) if _subtitle_item['MovieYear'] else None if season or episode: movie_imdb_id = 'tt' + _subtitle_item['SeriesIMDBParent'] else: movie_imdb_id = 'tt' + _subtitle_item['IDMovieImdb'] movie_fps = _subtitle_item.get('MovieFPS') series_season = int(_subtitle_item['SeriesSeason'] ) if _subtitle_item['SeriesSeason'] else None series_episode = int(_subtitle_item['SeriesEpisode'] ) if _subtitle_item['SeriesEpisode'] else None filename = _subtitle_item['SubFileName'] encoding = _subtitle_item.get('SubEncoding') or None foreign_parts_only = bool( int(_subtitle_item.get('SubForeignPartsOnly', 0))) # foreign/forced subtitles only wanted if only_foreign and not foreign_parts_only: continue # foreign/forced not wanted elif not only_foreign and not also_foreign and foreign_parts_only: continue # set subtitle language to forced if it's foreign_parts_only elif (also_foreign or only_foreign) and foreign_parts_only: language = Language.rebuild(language, forced=True) # set subtitle language to hi if it's hearing_impaired if hearing_impaired: language = Language.rebuild(language, hi=True) if language not in languages: continue if video.imdb_id and (movie_imdb_id != re.sub( "(?<![^a-zA-Z])0+", "", video.imdb_id)): continue query_parameters = _subtitle_item.get("QueryParameters") subtitle = self.subtitle_class(language, hearing_impaired, page_link, subtitle_id, matched_by, movie_kind, hash, movie_name, movie_release_name, movie_year, movie_imdb_id, series_season, series_episode, query_parameters, filename, encoding, movie_fps, skip_wrong_fps=self.skip_wrong_fps) subtitle.uploader = _subtitle_item[ 'UserNickName'] if _subtitle_item[ 'UserNickName'] else 'anonymous' logger.debug('Found subtitle %r by %s', subtitle, matched_by) subtitles.append(subtitle) return subtitles
class YifySubtitlesProvider(Provider): """YIFY Subtitles Provider.""" YifyLanguages = [('Albanian', 'sqi', None), ('Arabic', 'ara', None), ('Bengali', 'ben', None), ('Brazilian Portuguese', 'por', 'BR'), ('Bulgarian', 'bul', None), ('Chinese', 'zho', None), ('Croatian', 'hrv', None), ('Czech', 'ces', None), ('Danish', 'dan', None), ('Dutch', 'nld', None), ('English', 'eng', None), ('Farsi/Persian', 'fas', None), ('Finnish', 'fin', None), ('French', 'fra', None), ('German', 'deu', None), ('Greek', 'ell', None), ('Hebrew', 'heb', None), ('Hungarian', 'hun', None), ('Indonesian', 'ind', None), ('Italian', 'ita', None), ('Japanese', 'jpn', None), ('Korean', 'kor', None), ('Lithuanian', 'lit', None), ('Macedonian', 'mkd', None), ('Malay', 'msa', None), ('Norwegian', 'nor', None), ('Polish', 'pol', None), ('Portuguese', 'por', None), ('Romanian', 'ron', None), ('Russian', 'rus', None), ('Serbian', 'srp', None), ('Slovenian', 'slv', None), ('Spanish', 'spa', None), ('Swedish', 'swe', None), ('Thai', 'tha', None), ('Turkish', 'tur', None), ('Urdu', 'urd', None), ('Vietnamese', 'vie', None)] languages = {Language(l, c) for (_, l, c) in YifyLanguages} languages.update(set(Language.rebuild(l, hi=True) for l in languages)) server_urls = [ 'https://yifysubtitles.org', 'https://www.yifysubtitles.com' ] video_types = (Movie, ) def initialize(self): self.session = Session() self.session.headers['User-Agent'] = AGENT_LIST[randint( 0, len(AGENT_LIST) - 1)] self.session.headers[ "Accept"] = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" self.session.headers["Accept-Language"] = "en-US,en;q=0.5" self.session.headers["Accept-Encoding"] = "gzip, deflate" self.session.headers["DNT"] = "1" self.session.headers["Connection"] = "keep-alive" self.session.headers["Upgrade-Insecure-Requests"] = "1" self.session.headers["Cache-Control"] = "max-age=0" def terminate(self): self.session.close() def _parse_row(self, row, languages, server_url): td = row.findAll('td') rating = int(td[0].text) sub_lang = td[1].text release = re.sub(r'^subtitle ', '', td[2].text) sub_link = td[2].find('a').get('href') page_link = server_url + sub_link sub_link = re.sub(r'^/subtitles/', server_url + '/subtitle/', sub_link) + '.zip' hi = True if td[3].find('span', {'class': 'hi-subtitle'}) else False uploader = td[4].text _, l, c = next(x for x in self.YifyLanguages if x[0] == sub_lang) lang = Language(l, c) # set subtitle language to hi if it's hearing_impaired if hi: lang = Language.rebuild(lang, hi=True) if languages & {lang}: return [ YifySubtitle(lang, page_link, release, uploader, sub_link, rating, hi) ] return [] def query(self, languages, imdb_id): subtitles = [] logger.info('Searching subtitle %r', imdb_id) for server_url in self.server_urls: response = self.session.get(server_url + '/movie-imdb/' + imdb_id, allow_redirects=False, timeout=10, headers={'Referer': server_url}) if response.status_code == 200: break response.raise_for_status() if response.status_code != 200: logger.debug('No subtitles found') return subtitles soup = BeautifulSoup(response.content, 'lxml') tbl = soup.find('table', {'class': 'other-subs'}) tbl_body = tbl.find('tbody') if tbl else None rows = tbl_body.findAll('tr') if tbl_body else [] for row in rows: try: subtitles = subtitles + self._parse_row( row, languages, server_url) except Exception as e: pass subtitles.sort(key=lambda x: x.rating, reverse=True) return subtitles def list_subtitles(self, video, languages): return self.query(languages, video.imdb_id) if isinstance( video, Movie) and video.imdb_id else [] def download_subtitle(self, subtitle): logger.info('Downloading subtitle %r', subtitle.sub_link) cache_key = sha1(subtitle.sub_link.encode("utf-8")).digest() request = region.get(cache_key) if request is NO_VALUE: request = self.session.get(subtitle.sub_link, headers={'Referer': subtitle.page_link}) request.raise_for_status() region.set(cache_key, request) else: logger.info('Cache file: %s', codecs.encode(cache_key, 'hex_codec').decode('utf-8')) archive_stream = io.BytesIO(request.content) if is_zipfile(archive_stream): self._process_archive(ZipFile(archive_stream), subtitle) else: logger.error('Ignore unsupported archive %r', request.headers) region.delete(cache_key) def _process_archive(self, archive_stream, subtitle): for file_name in archive_stream.namelist(): if file_name.lower().endswith(('.srt', '.sub')): logger.info('Found subtitle file %r', file_name) subtitle.content = fix_line_ending( archive_stream.read(file_name)) if subtitle.is_valid(): return
class PodnapisiProvider(_PodnapisiProvider, ProviderSubtitleArchiveMixin): languages = ({Language('por', 'BR'), Language('srp', script='Latn'), Language('srp', script='Cyrl')} | {Language.fromalpha2(l) for l in language_converters['alpha2'].codes}) languages.update(set(Language.rebuild(l, forced=True) for l in languages)) languages.update(set(Language.rebuild(l, hi=True) for l in languages)) video_types = (Episode, Movie) server_url = 'https://podnapisi.net/subtitles/' only_foreign = False also_foreign = False verify_ssl = True subtitle_class = PodnapisiSubtitle hearing_impaired_verifiable = True def __init__(self, only_foreign=False, also_foreign=False, verify_ssl=True): self.only_foreign = only_foreign self.also_foreign = also_foreign self.verify_ssl = verify_ssl if only_foreign: logger.info("Only searching for foreign/forced subtitles") super(PodnapisiProvider, self).__init__() def initialize(self): super().initialize() self.session.mount('https://', PodnapisiAdapter()) self.session.verify = self.verify_ssl def list_subtitles(self, video, languages): if video.is_special: logger.info("%s can't search for specials right now, skipping", self) return [] season = episode = None if isinstance(video, Episode): titles = [fix_inconsistent_naming(title) for title in [video.series] + video.alternative_series] season = video.season episode = video.episode else: titles = [video.title] + video.alternative_titles for title in titles: subtitles = [s for l in languages for s in self.query(l, title, video, season=season, episode=episode, year=video.year, only_foreign=self.only_foreign, also_foreign=self.also_foreign)] if subtitles: return subtitles return [] def query(self, language, keyword, video, season=None, episode=None, year=None, only_foreign=False, also_foreign=False): search_language = str(language).lower() # sr-Cyrl specialcase if search_language == "sr-cyrl": search_language = "sr" # set parameters, see http://www.podnapisi.net/forum/viewtopic.php?f=62&t=26164#p212652 params = {'sXML': 1, 'sL': search_language, 'sK': keyword} is_episode = False if season and episode: is_episode = True params['sTS'] = season params['sTE'] = episode if year: params['sY'] = year # loop over paginated results logger.info('Searching subtitles %r', params) subtitles = [] pids = set() while True: # query the server content = None try: content = self.session.get(self.server_url + 'search/old', params=params, timeout=10).content xml = etree.fromstring(content) except etree.ParseError: logger.error("Wrong data returned: %r", content) break # exit if no results if not int(xml.find('pagination/results').text): logger.debug('No subtitles found') break # loop over subtitles for subtitle_xml in xml.findall('subtitle'): # read xml elements pid = subtitle_xml.find('pid').text # ignore duplicates, see http://www.podnapisi.net/forum/viewtopic.php?f=62&t=26164&start=10#p213321 if pid in pids: continue _language = Language.fromietf(subtitle_xml.find('language').text) hearing_impaired = 'n' in (subtitle_xml.find('flags').text or '') foreign = 'f' in (subtitle_xml.find('flags').text or '') if only_foreign and not foreign: continue elif not only_foreign and not also_foreign and foreign: continue elif also_foreign and foreign: _language = Language.rebuild(_language, forced=True) # set subtitle language to hi if it's hearing_impaired if hearing_impaired: _language = Language.rebuild(_language, hi=True) if language != _language: continue page_link = subtitle_xml.find('url').text releases = [] if subtitle_xml.find('release').text: for release in subtitle_xml.find('release').text.split(): releases.append(re.sub(r'\.+$', '', release)) # remove trailing dots title = subtitle_xml.find('title').text r_season = int(subtitle_xml.find('tvSeason').text) r_episode = int(subtitle_xml.find('tvEpisode').text) r_year = int(subtitle_xml.find('year').text) if is_episode: subtitle = self.subtitle_class(_language, hearing_impaired, page_link, pid, releases, title, season=r_season, episode=r_episode, year=r_year, asked_for_release_group=video.release_group, asked_for_episode=episode) else: subtitle = self.subtitle_class(_language, hearing_impaired, page_link, pid, releases, title, year=r_year, asked_for_release_group=video.release_group) logger.debug('Found subtitle %r', subtitle) subtitles.append(subtitle) pids.add(pid) # stop on last page if int(xml.find('pagination/current').text) >= int(xml.find('pagination/count').text): break # increment current page params['page'] = int(xml.find('pagination/current').text) + 1 logger.debug('Getting page %d', params['page']) xml = None return subtitles def download_subtitle(self, subtitle): # download as a zip logger.info('Downloading subtitle %r', subtitle) r = self.session.get(self.server_url + subtitle.pid + '/download', params={'container': 'zip'}, timeout=10) r.raise_for_status() # open the zip with ZipFile(io.BytesIO(r.content)) as zf: subtitle.content = self.get_subtitle_from_archive(subtitle, zf)
def guess_external_subtitles(dest_folder, subtitles): for subtitle, language in subtitles.items(): if not language: subtitle_path = os.path.join(dest_folder, subtitle) if os.path.exists(subtitle_path) and os.path.splitext( subtitle_path)[1] in core.SUBTITLE_EXTENSIONS: logging.debug( "BAZARR falling back to file content analysis to detect language." ) detected_language = None # to improve performance, skip detection of files larger that 1M if os.path.getsize(subtitle_path) > 1 * 1024 * 1024: logging.debug( "BAZARR subtitles file is too large to be text based. Skipping this file: " + subtitle_path) continue with open(subtitle_path, 'rb') as f: text = f.read() try: text = text.decode('utf-8') detected_language = guess_language(text) except UnicodeDecodeError: detector = Detector() try: guess = detector.detect(text) except: logging.debug( "BAZARR skipping this subtitles because we can't guess the encoding. " "It's probably a binary file: " + subtitle_path) continue else: logging.debug('BAZARR detected encoding %r', guess) try: text = text.decode(guess) except: logging.debug( "BAZARR skipping this subtitles because we can't decode the file using the " "guessed encoding. It's probably a binary file: " + subtitle_path) continue detected_language = guess_language(text) except: logging.debug( 'BAZARR was unable to detect encoding for this subtitles file: %r', subtitle_path) finally: if detected_language: logging.debug( "BAZARR external subtitles detected and guessed this language: " + str(detected_language)) try: subtitles[subtitle] = Language.rebuild( Language.fromietf(detected_language), forced=False, hi=False) except: pass # Detect hearing-impaired external subtitles not identified in filename if not subtitles[subtitle].hi: subtitle_path = os.path.join(dest_folder, subtitle) # to improve performance, skip detection of files larger that 1M if os.path.getsize(subtitle_path) > 1 * 1024 * 1024: logging.debug( "BAZARR subtitles file is too large to be text based. Skipping this file: " + subtitle_path) continue with open(subtitle_path, 'rb') as f: text = f.read() try: text = text.decode('utf-8') except UnicodeDecodeError: detector = Detector() try: guess = detector.detect(text) except: logging.debug( "BAZARR skipping this subtitles because we can't guess the encoding. " "It's probably a binary file: " + subtitle_path) continue else: logging.debug('BAZARR detected encoding %r', guess) try: text = text.decode(guess) except: logging.debug( "BAZARR skipping this subtitles because we can't decode the file using the " "guessed encoding. It's probably a binary file: " + subtitle_path) continue if bool(re.search(hi_regex, text)): subtitles[subtitle] = Language.rebuild(subtitles[subtitle], forced=False, hi=True) return subtitles
def prepare_video(pms_video_info, ignore_all=False, hints=None, rating_key=None, providers=None, skip_hashing=False): """ returnes a subliminal/guessit-refined parsed video :param pms_video_info: :param ignore_all: :param hints: :param rating_key: :return: """ embedded_subtitles = not ignore_all and Prefs['subtitles.scan.embedded'] external_subtitles = not ignore_all and Prefs['subtitles.scan.external'] plex_part = pms_video_info["plex_part"] if ignore_all: Log.Debug("Force refresh intended.") Log.Debug("Detecting streams: %s, external_subtitles=%s, embedded_subtitles=%s" % ( plex_part.file, external_subtitles, embedded_subtitles)) known_embedded = [] parts = [] for media in list(Plex["library"].metadata(rating_key))[0].media: parts += media.parts plexpy_part = None for part in parts: if int(part.id) == int(plex_part.id): plexpy_part = part # embedded subtitles # fixme: skip the whole scanning process if known_embedded == wanted languages? audio_languages = [] if plexpy_part: for stream in plexpy_part.streams: if stream.stream_type == 2: lang = None try: lang = language_from_stream(stream.language_code) except LanguageError: Log.Debug("Couldn't detect embedded audio stream language: %s", stream.language_code) # treat unknown language as lang1? if not lang and config.treat_und_as_first: lang = Language.rebuild(list(config.lang_list)[0]) audio_languages.append(lang) # subtitle stream elif stream.stream_type == 3 and embedded_subtitles: is_forced = helpers.is_stream_forced(stream) if ((config.forced_only or config.forced_also) and is_forced) or not is_forced: # embedded subtitle # fixme: tap into external subtitles here instead of scanning for ourselves later? if stream.codec and getattr(stream, "index", None): if config.exotic_ext or stream.codec.lower() in config.text_based_formats: lang = None try: lang = language_from_stream(stream.language_code) except LanguageError: Log.Debug("Couldn't detect embedded subtitle stream language: %s", stream.language_code) # treat unknown language as lang1? if not lang and config.treat_und_as_first: lang = Language.rebuild(list(config.lang_list)[0]) if lang: if is_forced: lang.forced = True known_embedded.append(lang) else: Log.Warn("Part %s missing of %s, not able to scan internal streams", plex_part.id, rating_key) # metadata subtitles known_metadata_subs = set() meta_subs = get_subtitles_from_metadata(plex_part) for language, subList in meta_subs.iteritems(): try: lang = Language.fromietf(Locale.Language.Match(language)) except LanguageError: if config.treat_und_as_first: lang = Language.rebuild(list(config.lang_list)[0]) else: continue if subList: for key in subList: if key.startswith("subzero_md_forced"): lang = Language.rebuild(lang, forced=True) known_metadata_subs.add(lang) Log.Debug("Found metadata subtitle %r:%s for %s", lang, key, plex_part.file) Log.Debug("Known metadata subtitles: %r", known_metadata_subs) Log.Debug("Known embedded subtitles: %r", known_embedded) subtitle_storage = get_subtitle_storage() stored_subs = subtitle_storage.load(rating_key) subtitle_storage.destroy() try: # get basic video info scan (filename) video = parse_video(plex_part.file, hints, skip_hashing=config.low_impact_mode or skip_hashing, providers=providers) # set stream languages if audio_languages: video.audio_languages = audio_languages Log.Info("Found audio streams: %s" % ", ".join([str(l) for l in audio_languages])) if not ignore_all: set_existing_languages(video, pms_video_info, external_subtitles=external_subtitles, embedded_subtitles=embedded_subtitles, known_embedded=known_embedded, stored_subs=stored_subs, languages=config.lang_list, only_one=config.only_one, known_metadata_subs=known_metadata_subs) # add video fps info video.fps = plex_part.fps return video except ValueError: Log.Warn("File could not be guessed: %s: %s", plex_part.file, traceback.format_exc())
def ItemDetailsMenu(rating_key, title=None, base_title=None, item_title=None, randomize=None, header=None, message=None): """ displays the item details menu of an item that doesn't contain any deeper tree, such as a movie or an episode :param rating_key: :param title: :param base_title: :param item_title: :param randomize: :return: """ from interface.main import InclExclMenu title = unicode(base_title) + " > " + unicode(title) if base_title else unicode(title) item = plex_item = get_item(rating_key) current_kind = get_item_kind_from_rating_key(rating_key) timeout = 30 oc = SubFolderObjectContainer( title2=title, replace_parent=True, header=header, message=message) if not item: oc.add(DirectoryObject( key=Callback( ItemDetailsMenu, rating_key=rating_key, title=title, base_title=base_title, item_title=item_title, randomize=timestamp()), title=_(u"Item not found: %s!", item_title), summary=_("Plex didn't return any information about the item, please refresh it and come back later"), thumb=default_thumb )) return oc # add back to season for episode if current_kind == "episode": from interface.menu import MetadataMenu show = get_item(item.show.rating_key) season = get_item(item.season.rating_key) oc.add(DirectoryObject( key=Callback( MetadataMenu, rating_key=season.rating_key, title=season.title, base_title=show.title, previous_item_type="show", previous_rating_key=show.rating_key, display_items=True, randomize=timestamp()), title=_(u"< Back to %s", season.title), summary=_("Back to %s > %s", show.title, season.title), thumb=season.thumb or default_thumb )) oc.add(DirectoryObject( key=Callback( RefreshItem, rating_key=rating_key, item_title=item_title, randomize=timestamp(), timeout=timeout * 1000), title=_(u"Refresh: %s", item_title), summary=_("Refreshes %(the_movie_series_season_episode)s, possibly searching for missing and picking up " "new subtitles on disk", the_movie_series_season_episode=_(u"the %s" % current_kind)), thumb=item.thumb or default_thumb )) oc.add(DirectoryObject( key=Callback(RefreshItem, rating_key=rating_key, item_title=item_title, force=True, randomize=timestamp(), timeout=timeout * 1000), title=_(u"Force-find subtitles: %(item_title)s", item_title=item_title), summary=_("Issues a forced refresh, ignoring known subtitles and searching for new ones"), thumb=item.thumb or default_thumb )) # get stored subtitle info for item id subtitle_storage = get_subtitle_storage() stored_subs = subtitle_storage.load_or_new(item) # look for subtitles for all available media parts and all of their languages has_multiple_parts = len(plex_item.media) > 1 part_index = 0 for media in plex_item.media: for part in media.parts: filename = os.path.basename(part.file) if not os.path.exists(part.file): continue part_id = str(part.id) part_index += 1 part_index_addon = u"" part_summary_addon = u"" if has_multiple_parts: part_index_addon = _(u"File %(file_part_index)s: ", file_part_index=part_index) part_summary_addon = u"%s " % filename # iterate through all configured languages for lang in config.lang_list: # get corresponding stored subtitle data for that media part (physical media item), for language current_sub = stored_subs.get_any(part_id, lang) current_sub_id = None current_sub_provider_name = None summary = _(u"%(part_summary)sNo current subtitle in storage", part_summary=part_summary_addon) current_score = None if current_sub: current_sub_id = current_sub.id current_sub_provider_name = current_sub.provider_name current_score = current_sub.score summary = _(u"%(part_summary)sCurrent subtitle: %(provider_name)s (added: %(date_added)s, " u"%(mode)s), Language: %(language)s, Score: %(score)i, Storage: %(storage_type)s", part_summary=part_summary_addon, provider_name=_(current_sub.provider_name), date_added=df(current_sub.date_added), mode=_(current_sub.mode_verbose), language=display_language(lang), score=current_sub.score, storage_type=current_sub.storage_type) oc.add(DirectoryObject( key=Callback(SubtitleOptionsMenu, rating_key=rating_key, part_id=part_id, title=title, item_title=item_title, language=lang, language_name=display_language(lang), current_id=current_sub_id, item_type=plex_item.type, filename=filename, current_data=summary, randomize=timestamp(), current_provider=current_sub_provider_name, current_score=current_score), title=_(u"%(part_summary)sManage %(language)s subtitle", part_summary=part_index_addon, language=display_language(lang)), summary=summary )) else: oc.add(DirectoryObject( key=Callback(ListAvailableSubsForItemMenu, rating_key=rating_key, part_id=part_id, title=title, item_title=item_title, language=lang, language_name=display_language(lang), current_id=current_sub_id, item_type=plex_item.type, filename=filename, current_data=summary, randomize=timestamp(), current_provider=current_sub_provider_name, current_score=current_score), title=_(u"%(part_summary)sList %(language)s subtitles", part_summary=part_index_addon, language=display_language(lang)), summary=summary )) if config.plex_transcoder: # embedded subtitles embedded_count = 0 embedded_langs = [] for stream in part.streams: # subtitle stream if stream.stream_type == 3 and not stream.stream_key and stream.codec in TEXT_SUBTITLE_EXTS: lang = get_language_from_stream(stream.language_code) is_forced = is_stream_forced(stream) if not lang and config.treat_und_as_first: lang = list(config.lang_list)[0] if lang: lang = Language.rebuild(lang, forced=is_forced) embedded_langs.append(lang) embedded_count += 1 if embedded_count: oc.add(DirectoryObject( key=Callback(ListEmbeddedSubsForItemMenu, rating_key=rating_key, part_id=part_id, title=title, item_type=plex_item.type, item_title=item_title, base_title=base_title, randomize=timestamp()), title=_(u"%(part_summary)sEmbedded subtitles (%(languages)s)", part_summary=part_index_addon, languages=", ".join(display_language(l) for l in list(OrderedDict.fromkeys(embedded_langs)))), summary=_(u"Extract embedded subtitle streams") )) ignore_title = item_title if current_kind == "episode": ignore_title = get_item_title(item) add_incl_excl_options(oc, "videos", title=ignore_title, rating_key=rating_key, callback_menu=InclExclMenu) subtitle_storage.destroy() return oc