def load_language_in_db(): # Get languages list in langs tuple langs = [[lang.alpha_3, lang.alpha_2, lang.name] for lang in pycountry.languages if hasattr(lang, 'alpha_2')] # Insert standard languages in database table TableSettingsLanguages.insert_many(langs, fields=[TableSettingsLanguages.code3, TableSettingsLanguages.code2, TableSettingsLanguages.name]) \ .on_conflict(action='IGNORE') \ .execute() # Update standard languages with code3b if available langs = [[lang.bibliographic, lang.alpha_3] for lang in pycountry.languages if hasattr(lang, 'alpha_2') and hasattr(lang, 'bibliographic')] # Update languages in database table for lang in langs: TableSettingsLanguages.update({TableSettingsLanguages.code3b: lang[0]}) \ .where(TableSettingsLanguages.code3 == lang[1]) \ .execute() # Insert custom languages in database table CustomLanguage.register(TableSettingsLanguages) # Create languages dictionary for faster conversion than calling database create_languages_dict()
def _handle_alpha3(detected_language: dict): alpha3 = detected_language["language"].alpha3 custom = CustomLanguage.from_value(alpha3, "official_alpha3") if custom and custom.ffprobe_found(detected_language): logger.debug("Custom embedded language found: %s", custom.name) return custom.alpha3 return alpha3
def get_language_set(): languages = TableSettingsLanguages.select(TableSettingsLanguages.code3) \ .where(TableSettingsLanguages.enabled == 1).dicts() language_set = set() for lang in languages: custom = CustomLanguage.from_value(lang["code3"], "alpha3") if custom is None: language_set.add(Language(lang["code3"])) else: language_set.add(custom.subzero_language()) return language_set
def subtitles_apply_mods(language, subtitle_path, mods): language = alpha3_from_alpha2(language) custom = CustomLanguage.from_value(language, "alpha3") if custom is None: lang_obj = Language(language) else: lang_obj = custom.subzero_language() sub = Subtitle(lang_obj, mods=mods) with open(subtitle_path, 'rb') as f: sub.content = f.read() if not sub.is_valid(): logging.exception('BAZARR Invalid subtitle file: ' + subtitle_path) return content = sub.get_modified_content() if content: if os.path.exists(subtitle_path): os.remove(subtitle_path) with open(subtitle_path, 'wb') as f: f.write(content)
def manual_upload_subtitle(path, language, forced, hi, title, scene_name, media_type, subtitle, audio_language): logging.debug('BAZARR Manually uploading subtitles for this file: ' + path) single = settings.general.getboolean('single_language') use_postprocessing = settings.general.getboolean('use_postprocessing') postprocessing_cmd = settings.general.postprocessing_cmd chmod = int(settings.general.chmod, 8) if not sys.platform.startswith( 'win') and settings.general.getboolean('chmod_enabled') else None language = alpha3_from_alpha2(language) custom = CustomLanguage.from_value(language, "alpha3") if custom is None: lang_obj = Language(language) else: lang_obj = custom.subzero_language() if forced: lang_obj = Language.rebuild(lang_obj, forced=True) sub = Subtitle( lang_obj, mods=get_array_from(settings.general.subzero_mods) ) sub.content = subtitle.read() if not sub.is_valid(): logging.exception('BAZARR Invalid subtitle file: ' + subtitle.filename) sub.mods = None if settings.general.getboolean('utf8_encode'): sub.set_encoding("utf-8") saved_subtitles = [] try: saved_subtitles = save_subtitles(path, [sub], single=single, tags=None, # fixme directory=get_target_folder(path), chmod=chmod, # formats=("srt", "vtt") path_decoder=force_unicode) except Exception: logging.exception('BAZARR Error saving Subtitles file to disk for this file:' + path) return if len(saved_subtitles) < 1: logging.exception('BAZARR Error saving Subtitles file to disk for this file:' + path) return subtitle_path = saved_subtitles[0].storage_path if hi: modifier_string = " HI" elif forced: modifier_string = " forced" else: modifier_string = "" message = language_from_alpha3(language) + modifier_string + " Subtitles manually uploaded." if hi: modifier_code = ":hi" elif forced: modifier_code = ":forced" else: modifier_code = "" uploaded_language_code3 = language + modifier_code uploaded_language = language_from_alpha3(language) + modifier_string uploaded_language_code2 = alpha2_from_alpha3(language) + modifier_code audio_language_code2 = alpha2_from_language(audio_language) audio_language_code3 = alpha3_from_language(audio_language) if media_type == 'series': episode_metadata = TableEpisodes.select(TableEpisodes.sonarrSeriesId, TableEpisodes.sonarrEpisodeId) \ .where(TableEpisodes.path == path_mappings.path_replace_reverse(path)) \ .dicts() \ .get_or_none() if not episode_metadata: return series_id = episode_metadata['sonarrSeriesId'] episode_id = episode_metadata['sonarrEpisodeId'] sync_subtitles(video_path=path, srt_path=subtitle_path, srt_lang=uploaded_language_code2, media_type=media_type, percent_score=100, sonarr_series_id=episode_metadata['sonarrSeriesId'], forced=forced, sonarr_episode_id=episode_metadata['sonarrEpisodeId']) else: movie_metadata = TableMovies.select(TableMovies.radarrId) \ .where(TableMovies.path == path_mappings.path_replace_reverse_movie(path)) \ .dicts() \ .get_or_none() if not movie_metadata: return series_id = "" episode_id = movie_metadata['radarrId'] sync_subtitles(video_path=path, srt_path=subtitle_path, srt_lang=uploaded_language_code2, media_type=media_type, percent_score=100, radarr_id=movie_metadata['radarrId'], forced=forced) if use_postprocessing: command = pp_replace(postprocessing_cmd, path, subtitle_path, uploaded_language, uploaded_language_code2, uploaded_language_code3, audio_language, audio_language_code2, audio_language_code3, forced, 100, "1", "manual", series_id, episode_id, hi=hi) postprocessing(command, path) if media_type == 'series': reversed_path = path_mappings.path_replace_reverse(path) reversed_subtitles_path = path_mappings.path_replace_reverse(subtitle_path) notify_sonarr(episode_metadata['sonarrSeriesId']) event_stream(type='series', action='update', payload=episode_metadata['sonarrSeriesId']) event_stream(type='episode-wanted', action='delete', payload=episode_metadata['sonarrEpisodeId']) else: reversed_path = path_mappings.path_replace_reverse_movie(path) reversed_subtitles_path = path_mappings.path_replace_reverse_movie(subtitle_path) notify_radarr(movie_metadata['radarrId']) event_stream(type='movie', action='update', payload=movie_metadata['radarrId']) event_stream(type='movie-wanted', action='delete', payload=movie_metadata['radarrId']) return message, reversed_path, reversed_subtitles_path
def _get_lang_obj(alpha3): sub = CustomLanguage.from_value(alpha3, "alpha3") if sub is None: return Language(alpha3) return sub.subzero_language()
def _get_download_code3(subtitle): custom = CustomLanguage.from_value(subtitle.language, "language") if custom is None: return subtitle.language.alpha3 return custom.alpha3
def translate_subtitles_file(video_path, source_srt_file, to_lang, forced, hi): language_code_convert_dict = { 'he': 'iw', 'zt': 'zh-cn', 'zh': 'zh-tw', } to_lang = alpha3_from_alpha2(to_lang) lang_obj = CustomLanguage.from_value(to_lang, "alpha3") if not lang_obj: lang_obj = Language(to_lang) if forced: lang_obj = Language.rebuild(lang_obj, forced=True) if hi: lang_obj = Language.rebuild(lang_obj, hi=True) logging.debug('BAZARR is translating in {0} this subtitles {1}'.format( lang_obj, source_srt_file)) max_characters = 5000 dest_srt_file = get_subtitle_path( video_path, language=lang_obj if isinstance(lang_obj, Language) else lang_obj.subzero_language(), extension='.srt', forced_tag=forced, hi_tag=hi) subs = pysubs2.load(source_srt_file, encoding='utf-8') lines_list = [x.plaintext for x in subs] joined_lines_str = '\n\n\n'.join(lines_list) logging.debug( 'BAZARR splitting subtitles into {} characters blocks'.format( max_characters)) lines_block_list = [] translated_lines_list = [] while len(joined_lines_str): partial_lines_str = joined_lines_str[:max_characters] if len(joined_lines_str) > max_characters: new_partial_lines_str = partial_lines_str.rsplit('\n\n\n', 1)[0] else: new_partial_lines_str = partial_lines_str lines_block_list.append(new_partial_lines_str) joined_lines_str = joined_lines_str.replace(new_partial_lines_str, '') logging.debug('BAZARR is sending {} blocks to Google Translate'.format( len(lines_block_list))) for block_str in lines_block_list: empty_first_line = False if block_str.startswith('\n\n\n'): # This happens when the first line of text in a subtitles file is an empty string empty_first_line = True try: translated_partial_srt_text = GoogleTranslator( source='auto', target=language_code_convert_dict.get( lang_obj.alpha2, lang_obj.alpha2)).translate(text=block_str) except Exception: logging.exception( f'BAZARR Unable to translate subtitles {source_srt_file}') return False else: if empty_first_line: # GoogleTranslate remove new lines at the beginning of the string, so we add it back. translated_partial_srt_text = '\n\n\n' + translated_partial_srt_text translated_partial_srt_list = translated_partial_srt_text.split( '\n\n\n') translated_lines_list += translated_partial_srt_list logging.debug( 'BAZARR saving translated subtitles to {}'.format(dest_srt_file)) for i, line in enumerate(subs): try: line.plaintext = translated_lines_list[i] except IndexError: logging.error( f'BAZARR is unable to translate malformed subtitles: {source_srt_file}' ) return False subs.save(dest_srt_file) return dest_srt_file
def translate_subtitles_file(video_path, source_srt_file, to_lang, forced, hi): language_code_convert_dict = { 'he': 'iw', 'zt': 'zh-cn', 'zh': 'zh-tw', } to_lang = alpha3_from_alpha2(to_lang) lang_obj = CustomLanguage.from_value(to_lang, "alpha3") if not lang_obj: lang_obj = Language(to_lang) if forced: lang_obj = Language.rebuild(lang_obj, forced=True) if hi: lang_obj = Language.rebuild(lang_obj, hi=True) logging.debug('BAZARR is translating in {0} this subtitles {1}'.format( lang_obj, source_srt_file)) max_characters = 5000 dest_srt_file = get_subtitle_path( video_path, language=lang_obj if isinstance(lang_obj, Language) else lang_obj.subzero_language(), extension='.srt', forced_tag=forced, hi_tag=hi) subs = pysubs2.load(source_srt_file, encoding='utf-8') lines_list = [x.plaintext for x in subs] joined_lines_str = '\n\n\n'.join(lines_list) logging.debug( 'BAZARR splitting subtitles into {} characters blocks'.format( max_characters)) lines_block_list = [] translated_lines_list = [] while len(joined_lines_str): partial_lines_str = joined_lines_str[:max_characters] if len(joined_lines_str) > max_characters: new_partial_lines_str = partial_lines_str.rsplit('\n\n\n', 1)[0] else: new_partial_lines_str = partial_lines_str lines_block_list.append(new_partial_lines_str) joined_lines_str = joined_lines_str.replace(new_partial_lines_str, '') logging.debug('BAZARR is sending {} blocks to Google Translate'.format( len(lines_block_list))) for block_str in lines_block_list: try: translated_partial_srt_text = GoogleTranslator( source='auto', target=language_code_convert_dict.get( lang_obj.alpha2, lang_obj.alpha2)).translate(text=block_str) except: return False else: translated_partial_srt_list = translated_partial_srt_text.split( '\n\n\n') translated_lines_list += translated_partial_srt_list logging.debug( 'BAZARR saving translated subtitles to {}'.format(dest_srt_file)) for i, line in enumerate(subs): line.plaintext = translated_lines_list[i] subs.save(dest_srt_file) return dest_srt_file
def store_subtitles(original_path, reversed_path, use_cache=True): logging.debug('BAZARR started subtitles indexing for this file: ' + reversed_path) actual_subtitles = [] if os.path.exists(reversed_path): if settings.general.getboolean('use_embedded_subs'): logging.debug("BAZARR is trying to index embedded subtitles.") item = TableEpisodes.select(TableEpisodes.episode_file_id, TableEpisodes.file_size)\ .where(TableEpisodes.path == original_path)\ .dicts()\ .get_or_none() if not item: logging.exception(f"BAZARR error when trying to select this episode from database: {reversed_path}") else: try: subtitle_languages = embedded_subs_reader(reversed_path, file_size=item['file_size'], episode_file_id=item['episode_file_id'], use_cache=use_cache) for subtitle_language, subtitle_forced, subtitle_hi, subtitle_codec in subtitle_languages: try: if (settings.general.getboolean("ignore_pgs_subs") and subtitle_codec.lower() == "pgs") or \ (settings.general.getboolean("ignore_vobsub_subs") and subtitle_codec.lower() == "vobsub") or \ (settings.general.getboolean("ignore_ass_subs") and subtitle_codec.lower() == "ass"): logging.debug("BAZARR skipping %s sub for language: %s" % (subtitle_codec, alpha2_from_alpha3(subtitle_language))) continue if alpha2_from_alpha3(subtitle_language) is not None: lang = str(alpha2_from_alpha3(subtitle_language)) if subtitle_forced: lang = lang + ":forced" if subtitle_hi: lang = lang + ":hi" logging.debug("BAZARR embedded subtitles detected: " + lang) actual_subtitles.append([lang, None]) except Exception as error: logging.debug("BAZARR unable to index this unrecognized language: %s (%s)", subtitle_language, error) except Exception: logging.exception( "BAZARR error when trying to analyze this %s file: %s" % (os.path.splitext(reversed_path)[1], reversed_path)) pass try: dest_folder = get_subtitle_destination_folder() core.CUSTOM_PATHS = [dest_folder] if dest_folder else [] subtitles = search_external_subtitles(reversed_path, languages=get_language_set(), only_one=settings.general.getboolean('single_language')) full_dest_folder_path = os.path.dirname(reversed_path) if dest_folder: if settings.general.subfolder == "absolute": full_dest_folder_path = dest_folder elif settings.general.subfolder == "relative": full_dest_folder_path = os.path.join(os.path.dirname(reversed_path), dest_folder) subtitles = guess_external_subtitles(full_dest_folder_path, subtitles) except Exception: logging.exception("BAZARR unable to index external subtitles.") else: for subtitle, language in subtitles.items(): valid_language = False if language: if hasattr(language, 'alpha3'): valid_language = alpha2_from_alpha3(language.alpha3) else: logging.debug(f"Skipping subtitles because we are unable to define language: {subtitle}") continue if not valid_language: logging.debug(f'{language.alpha3} is an unsupported language code.') continue subtitle_path = get_external_subtitles_path(reversed_path, subtitle) custom = CustomLanguage.found_external(subtitle, subtitle_path) if custom is not None: actual_subtitles.append([custom, path_mappings.path_replace_reverse(subtitle_path)]) elif str(language) != 'und': if language.forced: language_str = str(language) elif language.hi: language_str = str(language) + ':hi' else: language_str = str(language) logging.debug("BAZARR external subtitles detected: " + language_str) actual_subtitles.append([language_str, path_mappings.path_replace_reverse(subtitle_path)]) TableEpisodes.update({TableEpisodes.subtitles: str(actual_subtitles)})\ .where(TableEpisodes.path == original_path)\ .execute() matching_episodes = TableEpisodes.select(TableEpisodes.sonarrEpisodeId, TableEpisodes.sonarrSeriesId)\ .where(TableEpisodes.path == original_path)\ .dicts() for episode in matching_episodes: if episode: logging.debug("BAZARR storing those languages to DB: " + str(actual_subtitles)) list_missing_subtitles(epno=episode['sonarrEpisodeId']) else: logging.debug("BAZARR haven't been able to update existing subtitles to DB : " + str(actual_subtitles)) else: logging.debug("BAZARR this file doesn't seems to exist or isn't accessible.") logging.debug('BAZARR ended subtitles indexing for this file: ' + reversed_path) return actual_subtitles