def get(id): # First, normalizing id id = id.replace('/', '_') if id.startswith("ABO"): flavour = "OnbViewer" elif id.startswith("DTL"): flavour = "RepViewer" else: raise RuntimeError(f"Can not determine flavour for {id}") # Second, obtaining JSESSIONID cookie value viewer_url = f"http://digital.onb.ac.at/{flavour}/viewer.faces?doc={id}" viewer_response = requests.get(viewer_url) cookies = viewer_response.cookies metadata_url = f"http://digital.onb.ac.at/{flavour}/service/viewer/imageData?doc={id}&from=1&to=1000" metadata = utils.get_json(metadata_url, cookies=cookies) output_folder = utils.make_output_folder("onb", id) image_data = metadata["imageData"] print(f"Going to download {len(image_data)} images") for image in image_data: query_args = image["queryArgs"] image_id = image["imageID"] image_url = f"http://digital.onb.ac.at/{flavour}/image?{query_args}&s=1.0&q=100" output_filename = utils.make_output_filename(output_folder, image_id, extension=None) if os.path.isfile(output_filename): print(f"Skip downloading existing image {image_id}") continue print(f"Downloading {image_id}") utils.get_binary(output_filename, image_url, cookies=cookies)
def sync(self, video_path, srt_path, srt_lang, media_type, sonarr_series_id=None, sonarr_episode_id=None, radarr_id=None): self.reference = video_path self.srtin = srt_path self.srtout = '{}.synced.srt'.format(os.path.splitext(self.srtin)[0]) self.args = None ffprobe_exe = get_binary('ffprobe') if not ffprobe_exe: logging.debug('BAZARR FFprobe not found!') return else: logging.debug('BAZARR FFprobe used is %s', ffprobe_exe) ffmpeg_exe = get_binary('ffmpeg') if not ffmpeg_exe: logging.debug('BAZARR FFmpeg not found!') return else: logging.debug('BAZARR FFmpeg used is %s', ffmpeg_exe) self.ffmpeg_path = os.path.dirname(ffmpeg_exe) try: unparsed_args = [self.reference, '-i', self.srtin, '-o', self.srtout, '--ffmpegpath', self.ffmpeg_path, '--vad', self.vad, '--log-dir-path', self.log_dir_path] if settings.subsync.getboolean('debug'): unparsed_args.append('--make-test-case') parser = make_parser() self.args = parser.parse_args(args=unparsed_args) result = run(self.args) except Exception as e: logging.exception('BAZARR an exception occurs during the synchronization process for this subtitles: ' '{0}'.format(self.srtin)) else: if settings.subsync.getboolean('debug'): return result if os.path.isfile(self.srtout): if not settings.subsync.getboolean('debug'): os.remove(self.srtin) os.rename(self.srtout, self.srtin) offset_seconds = result['offset_seconds'] or 0 framerate_scale_factor = result['framerate_scale_factor'] or 0 message = "{0} subtitles synchronization ended with an offset of {1} seconds and a framerate " \ "scale factor of {2}.".format(language_from_alpha2(srt_lang), offset_seconds, "{:.2f}".format(framerate_scale_factor)) if media_type == 'series': history_log(action=5, sonarr_series_id=sonarr_series_id, sonarr_episode_id=sonarr_episode_id, description=message, video_path=path_mappings.path_replace_reverse(self.reference), language=srt_lang, subtitles_path=srt_path) else: history_log_movie(action=5, radarr_id=radarr_id, description=message, video_path=path_mappings.path_replace_reverse_movie(self.reference), language=srt_lang, subtitles_path=srt_path) else: logging.error('BAZARR unable to sync subtitles: {0}'.format(self.srtin)) return result
def get(id): children_url = f"https://kramerius.difmoe.eu/search/api/v5.0/item/uuid:{id}/children" children = utils.get_json(children_url) print(f"Downloading {len(children)} images from kramerius.difmoe.eu") output_folder = utils.make_output_folder("difmoe", id) for page, child in enumerate(children, start=1): child_pid = child["pid"] image_url = f"https://kramerius.difmoe.eu/search/img?pid={child_pid}&stream=IMG_FULL" output_filename = utils.make_output_filename(output_folder, page=page, extension="jpg") utils.get_binary(output_filename, image_url)
def get_book(id): output_folder = utils.make_output_folder("hab", id) page = 0 for page in range(1, 1000): url = f"http://diglib.hab.de/{id}/max/{page:05d}.jpg" output_filename = utils.make_output_filename(output_folder, page=page, extension="jpg") if os.path.exists(output_filename): print(f"Skip downloading existing page #{page:05d}") continue try: print(f"Downloading page #{page:05d} from {url}") utils.get_binary(output_filename, url) except ValueError: break
def get(id): output_folder = utils.make_output_folder("fulda", id) for page in range(1, 1000): # it looks like Fulda library does not use manifest.json, hence it is not possible to guess number of pages in the book in advance image_url = f"https://fuldig.hs-fulda.de/viewer/rest/image/{id}/{page:08d}.tif/full/10000,/0/default.jpg" output_filename = utils.make_output_filename(output_folder, page, extension="jpg") if os.path.exists(output_filename): print(f"Skip downloading existing page #{page:08d}") continue print(f"Downloading page {page} to {output_filename}") try: utils.get_binary(output_filename, image_url) except ValueError: break
def get(id): output_folder = utils.make_output_folder("hathitrust", id) metadata_url = f"https://babel.hathitrust.org/cgi/imgsrv/meta?id={id}" metadata = utils.get_json(metadata_url) total_pages = metadata["total_items"] print(f"Going to download {total_pages} pages to {output_folder}") for page in range(1, total_pages + 1): url = f"https://babel.hathitrust.org/cgi/imgsrv/image?id={id};seq={page};width=1000000" output_filename = utils.make_output_filename(output_folder, page, extension="jpg") if os.path.exists(output_filename): print(f"Skip downloading existing page #{page:08d}") continue print(f"Downloading page {page} to {output_filename}") utils.get_binary(output_filename, url)
def download_book_fast(manifest_url, output_folder): """ Downloads entire book via IIIF protocol. Issues single request per image, but might be unsupported by certain backends. API is documented here: http://iiif.io/about/ """ manifest = utils.get_json(manifest_url) canvases = manifest["sequences"][0]["canvases"] for page, metadata in enumerate(canvases): output_filename = utils.make_output_filename(output_folder, page, extension="jpg") if os.path.isfile(output_filename): print(f"Skip downloading existing page #{page:04d}") continue full_url = metadata["images"][-1]["resource"]["@id"] print(f"Downloading page #{page:04d} from {full_url}") utils.get_binary(output_filename, full_url)
def init_binaries(): from utils import get_binary exe = get_binary("unrar") rarfile.UNRAR_TOOL = exe rarfile.ORIG_UNRAR_TOOL = exe try: rarfile.custom_check([rarfile.UNRAR_TOOL], True) except Exception: logging.debug("custom check failed for: %s", exe) logging.debug("Using UnRAR from: %s", exe) unrar = exe return unrar
def init_binaries(): exe = get_binary("unrar") rarfile.UNRAR_TOOL = exe rarfile.ORIG_UNRAR_TOOL = exe try: rarfile.custom_check([rarfile.UNRAR_TOOL], True) except: logging.debug("custom check failed for: %s", exe) rarfile.OPEN_ARGS = rarfile.ORIG_OPEN_ARGS rarfile.EXTRACT_ARGS = rarfile.ORIG_EXTRACT_ARGS rarfile.TEST_ARGS = rarfile.ORIG_TEST_ARGS logging.info("Using UnRAR from: %s", exe) unrar = exe return unrar
def list_languages(self, file): from utils import get_binary self.ffprobe = get_binary("ffprobe") subtitles_list = [] if self.ffprobe: api.initialize({'provider': 'ffmpeg', 'ffmpeg': self.ffprobe}) data = api.know(file) traditional_chinese = ["cht", "tc", "traditional", "zht", "hant", "big5", u"繁", u"雙語"] brazilian_portuguese = ["pt-br", "pob", "pb", "brazilian", "brasil", "brazil"] if 'subtitle' in data: for detected_language in data['subtitle']: if 'language' in detected_language: language = detected_language['language'].alpha3 if language == 'zho' and 'name' in detected_language: if any (ext in (detected_language['name'].lower()) for ext in traditional_chinese): language = 'zht' if language == 'por' and 'name' in detected_language: if any (ext in (detected_language['name'].lower()) for ext in brazilian_portuguese): language = 'pob' forced = detected_language['forced'] if 'forced' in detected_language else False hearing_impaired = detected_language['hearing_impaired'] if 'hearing_impaired' in \ detected_language else False codec = detected_language['format'] if 'format' in detected_language else None subtitles_list.append([language, forced, hearing_impaired, codec]) else: continue else: if os.path.splitext(file)[1] == '.mkv': with open(file, 'rb') as f: try: mkv = enzyme.MKV(f) except MalformedMKVError: logging.error('BAZARR cannot analyze this MKV with our built-in MKV parser, you should install ffmpeg: ' + file) else: for subtitle_track in mkv.subtitle_tracks: hearing_impaired = False if subtitle_track.name: if 'sdh' in subtitle_track.name.lower(): hearing_impaired = True subtitles_list.append([subtitle_track.language, subtitle_track.forced, hearing_impaired, subtitle_track.codec_id]) return subtitles_list
def list_languages(self, file): from utils import get_binary self.ffprobe = get_binary("ffprobe") subtitles_list = [] if self.ffprobe: api.initialize({'provider': 'ffmpeg', 'ffmpeg': self.ffprobe}) data = api.know(file) if 'subtitle' in data: for detected_language in data['subtitle']: if 'language' in detected_language: language = detected_language['language'].alpha3 forced = detected_language[ 'forced'] if 'forced' in detected_language else None codec = detected_language[ 'format'] if 'format' in detected_language else None subtitles_list.append([language, forced, codec]) else: continue else: if os.path.splitext(file)[1] == '.mkv': with open(file, 'rb') as f: try: mkv = enzyme.MKV(f) except MalformedMKVError: logging.error( 'BAZARR cannot analyze this MKV with our built-in MKV parser, you should install ffmpeg: ' + file) else: for subtitle_track in mkv.subtitle_tracks: subtitles_list.append([ subtitle_track.language, subtitle_track.forced, subtitle_track.codec_id ]) return subtitles_list
def get(id): full_id = f"oai:www.internetculturale.sbn.it/{id}" # FIXME: this xpath is just broken # metadata_url = f"http://www.internetculturale.it/jmms/magparser?id={full_id}&teca=MagTeca+-+ICCU&mode=all" # metadata = utils.get_xml(metadata_url) # page_nodes = metadata.findall("./package/medias/media[1]/pages") # page_count = int(page_nodes[0].attrib("count")) page_url_base = f"http://www.internetculturale.it/jmms/objdownload?id={full_id}&teca=MagTeca%20-%20ICCU&resource=img&mode=raw" output_folder = utils.make_output_folder("iculturale", id) for page in range(1, 1000): page_url = f"{page_url_base}&start={page}" print(f"Downloading page #{page} from {page_url}") output_filename = utils.make_output_filename(output_folder, page=page, extension="jpg") if os.path.exists(output_filename): print(f"Skip downloading existing page #{page:08d}") continue data_size = utils.get_binary(output_filename, page_url) if data_size == 0: os.remove(output_filename) break
def get_providers_auth(): return { 'addic7ed' : { 'username': settings.addic7ed.username, 'password': settings.addic7ed.password, 'is_vip': settings.addic7ed.getboolean('vip'), }, 'opensubtitles' : { 'username' : settings.opensubtitles.username, 'password' : settings.opensubtitles.password, 'use_tag_search': settings.opensubtitles.getboolean( 'use_tag_search' ), 'only_foreign' : False, # fixme 'also_foreign' : False, # fixme 'is_vip' : settings.opensubtitles.getboolean('vip'), 'use_ssl' : settings.opensubtitles.getboolean('ssl'), 'timeout' : int(settings.opensubtitles.timeout) or 15, 'skip_wrong_fps': settings.opensubtitles.getboolean( 'skip_wrong_fps' ), }, 'opensubtitlescom': {'username': settings.opensubtitlescom.username, 'password': settings.opensubtitlescom.password, 'use_hash': settings.opensubtitlescom.getboolean('use_hash'), 'api_key' : 's38zmzVlW7IlYruWi7mHwDYl2SfMQoC1' }, 'podnapisi' : { 'only_foreign': False, # fixme 'also_foreign': False, # fixme 'verify_ssl': settings.podnapisi.getboolean('verify_ssl') }, 'subscene' : { 'username' : settings.subscene.username, 'password' : settings.subscene.password, 'only_foreign': False, # fixme }, 'legendasdivx' : { 'username' : settings.legendasdivx.username, 'password' : settings.legendasdivx.password, 'skip_wrong_fps': settings.legendasdivx.getboolean( 'skip_wrong_fps' ), }, 'legendastv' : { 'username': settings.legendastv.username, 'password': settings.legendastv.password, 'featured_only': settings.legendastv.getboolean( 'featured_only' ), }, 'xsubs' : { 'username': settings.xsubs.username, 'password': settings.xsubs.password, }, 'assrt' : { 'token': settings.assrt.token, }, 'napisy24' : { 'username': settings.napisy24.username, 'password': settings.napisy24.password, }, 'betaseries' : {'token': settings.betaseries.token}, 'titulky' : { 'username': settings.titulky.username, 'password': settings.titulky.password, 'skip_wrong_fps': settings.titulky.getboolean('skip_wrong_fps'), 'approved_only': settings.titulky.getboolean('approved_only'), 'multithreading': settings.titulky.getboolean('multithreading'), }, 'titlovi' : { 'username': settings.titlovi.username, 'password': settings.titlovi.password, }, 'ktuvit' : { 'email': settings.ktuvit.email, 'hashed_password': settings.ktuvit.hashed_password, }, 'embeddedsubtitles': { 'include_ass': settings.embeddedsubtitles.getboolean('include_ass'), 'include_srt': settings.embeddedsubtitles.getboolean('include_srt'), 'cache_dir': os.path.join(args.config_dir, "cache"), 'ffprobe_path': get_binary("ffprobe"), 'ffmpeg_path': get_binary("ffmpeg"), } }
def __init__(self): self.ffprobe = get_binary("ffprobe")
def sync(self, video_path, srt_path, srt_lang, media_type, sonarr_series_id=None, sonarr_episode_id=None, radarr_id=None): self.reference = video_path self.srtin = srt_path self.srtout = None ffprobe_exe = get_binary('ffprobe') if not ffprobe_exe: logging.debug('BAZARR FFprobe not found!') return else: logging.debug('BAZARR FFprobe used is %s', ffprobe_exe) api.initialize({'provider': 'ffmpeg', 'ffmpeg': ffprobe_exe}) data = api.know(self.reference) using_what = None if 'subtitle' in data: for i, embedded_subs in enumerate(data['subtitle']): if 'language' in embedded_subs: language = embedded_subs['language'].alpha3 if language == "eng": using_what = "English embedded subtitle track" self.reference_stream = "s:{}".format(i) break if not self.reference_stream: using_what = "{0} embedded subtitle track".format( language_from_alpha3(embedded_subs['language'].alpha3) or 'unknown language embedded subtitles ' 'track') self.reference_stream = "s:0" elif 'audio' in data: audio_tracks = data['audio'] for i, audio_track in enumerate(audio_tracks): if 'language' in audio_track: language = audio_track['language'].alpha3 if language == srt_lang: using_what = "{0} audio track".format(language_from_alpha3(audio_track['language'].alpha3) or 'unknown language audio track') self.reference_stream = "a:{}".format(i) break if not self.reference_stream: audio_tracks = data['audio'] for i, audio_track in enumerate(audio_tracks): if 'language' in audio_track: language = audio_track['language'].alpha3 if language == "eng": using_what = "English audio track" self.reference_stream = "a:{}".format(i) break if not self.reference_stream: using_what = "first audio track" self.reference_stream = "a:0" else: raise NoAudioTrack ffmpeg_exe = get_binary('ffmpeg') if not ffmpeg_exe: logging.debug('BAZARR FFmpeg not found!') return else: logging.debug('BAZARR FFmpeg used is %s', ffmpeg_exe) self.ffmpeg_path = os.path.dirname(ffmpeg_exe) try: result = run(self) except Exception as e: logging.error('BAZARR an exception occurs during the synchronization process for this subtitles: ' + self.srtin) else: if result['sync_was_successful']: message = "{0} subtitles synchronization ended with an offset of {1} seconds and a framerate scale " \ "factor of {2} using {3} (0:{4}).".format(language_from_alpha3(srt_lang), result['offset_seconds'], result['framerate_scale_factor'], using_what, self.reference_stream) if media_type == 'series': history_log(action=5, sonarr_series_id=sonarr_series_id, sonarr_episode_id=sonarr_episode_id, description=message, video_path=path_mappings.path_replace_reverse(self.reference), language=alpha2_from_alpha3(srt_lang), subtitles_path=srt_path) else: history_log_movie(action=5, radarr_id=radarr_id, description=message, video_path=path_mappings.path_replace_reverse_movie(self.reference), language=alpha2_from_alpha3(srt_lang), subtitles_path=srt_path) else: logging.error('BAZARR unable to sync subtitles using {0}({1}): {2}'.format(using_what, self.reference_stream, self.srtin)) return result
def parse_video_metadata(file, file_size, episode_file_id=None, movie_file_id=None): # Define default data keys value data = { 'ffprobe': {}, 'enzyme': {}, 'file_id': episode_file_id if episode_file_id else movie_file_id, 'file_size': file_size } # Get the actual cache value form database if episode_file_id: cache_key = database.execute( 'SELECT ffprobe_cache FROM table_episodes WHERE episode_file_id=? AND file_size=?', (episode_file_id, file_size), only_one=True) elif movie_file_id: cache_key = database.execute( 'SELECT ffprobe_cache FROM table_movies WHERE movie_file_id=? AND file_size=?', (movie_file_id, file_size), only_one=True) else: cache_key = None # check if we have a value for that cache key if not isinstance(cache_key, dict): return data else: try: # Unpickle ffprobe cache cached_value = pickle.loads(cache_key['ffprobe_cache']) except: pass else: # Check if file size and file id matches and if so, we return the cached value if cached_value['file_size'] == file_size and cached_value[ 'file_id'] in [episode_file_id, movie_file_id]: return cached_value # if not, we retrieve the metadata from the file from utils import get_binary ffprobe_path = get_binary("ffprobe") # if we have ffprobe available if ffprobe_path: api.initialize({'provider': 'ffmpeg', 'ffmpeg': ffprobe_path}) data['ffprobe'] = api.know(file) # if nto, we use enzyme for mkv files else: if os.path.splitext(file)[1] == '.mkv': with open(file, 'rb') as f: try: mkv = enzyme.MKV(f) except MalformedMKVError: logging.error( 'BAZARR cannot analyze this MKV with our built-in MKV parser, you should install ' 'ffmpeg/ffprobe: ' + file) else: data['enzyme'] = mkv # we write to db the result and return the newly cached ffprobe dict if episode_file_id: database.execute( 'UPDATE table_episodes SET ffprobe_cache=? WHERE episode_file_id=?', (pickle.dumps(data, pickle.HIGHEST_PROTOCOL), episode_file_id)) elif movie_file_id: database.execute( 'UPDATE table_movies SET ffprobe_cache=? WHERE movie_file_id=?', (pickle.dumps(data, pickle.HIGHEST_PROTOCOL), movie_file_id)) return data
def parse_video_metadata(file, file_size, episode_file_id=None, movie_file_id=None, use_cache=True): # Define default data keys value data = { "ffprobe": {}, "enzyme": {}, "file_id": episode_file_id or movie_file_id, "file_size": file_size, } if use_cache: # Get the actual cache value form database if episode_file_id: cache_key = TableEpisodes.select(TableEpisodes.ffprobe_cache)\ .where(TableEpisodes.path == path_mappings.path_replace_reverse(file))\ .dicts()\ .get() elif movie_file_id: cache_key = TableMovies.select(TableMovies.ffprobe_cache)\ .where(TableMovies.path == path_mappings.path_replace_reverse_movie(file))\ .dicts()\ .get() else: cache_key = None # check if we have a value for that cache key try: # Unpickle ffprobe cache cached_value = pickle.loads(cache_key['ffprobe_cache']) except: pass else: # Check if file size and file id matches and if so, we return the cached value if cached_value['file_size'] == file_size and cached_value[ 'file_id'] in [episode_file_id, movie_file_id]: return cached_value # if not, we retrieve the metadata from the file from utils import get_binary ffprobe_path = get_binary("ffprobe") # if we have ffprobe available if ffprobe_path: api.initialize({"provider": "ffmpeg", "ffmpeg": ffprobe_path}) data["ffprobe"] = api.know(file) # if not, we use enzyme for mkv files else: if os.path.splitext(file)[1] == ".mkv": with open(file, "rb") as f: try: mkv = enzyme.MKV(f) except MalformedMKVError: logger.error( "BAZARR cannot analyze this MKV with our built-in MKV parser, you should install " "ffmpeg/ffprobe: " + file) else: data["enzyme"] = mkv # we write to db the result and return the newly cached ffprobe dict if episode_file_id: TableEpisodes.update({TableEpisodes.ffprobe_cache: pickle.dumps(data, pickle.HIGHEST_PROTOCOL)})\ .where(TableEpisodes.path == path_mappings.path_replace_reverse(file))\ .execute() elif movie_file_id: TableMovies.update({TableEpisodes.ffprobe_cache: pickle.dumps(data, pickle.HIGHEST_PROTOCOL)})\ .where(TableMovies.path == path_mappings.path_replace_reverse_movie(file))\ .execute() return data
def sync(self, video_path, srt_path, srt_lang, media_type, sonarr_series_id=None, sonarr_episode_id=None, radarr_id=None): self.reference = video_path self.srtin = srt_path self.srtout = None self.args = None ffprobe_exe = get_binary('ffprobe') if not ffprobe_exe: logging.debug('BAZARR FFprobe not found!') return else: logging.debug('BAZARR FFprobe used is %s', ffprobe_exe) ffmpeg_exe = get_binary('ffmpeg') if not ffmpeg_exe: logging.debug('BAZARR FFmpeg not found!') return else: logging.debug('BAZARR FFmpeg used is %s', ffmpeg_exe) self.ffmpeg_path = os.path.dirname(ffmpeg_exe) try: unparsed_args = [ self.reference, '-i', self.srtin, '--overwrite-input', '--ffmpegpath', self.ffmpeg_path, '--vad', self.vad ] parser = make_parser() self.args = parser.parse_args(args=unparsed_args) result = run(self.args) except Exception as e: logging.exception( 'BAZARR an exception occurs during the synchronization process for this subtitles: ' '{0}'.format(self.srtin)) else: if result['sync_was_successful']: message = "{0} subtitles synchronization ended with an offset of {1} seconds and a framerate scale " \ "factor of {2}.".format(language_from_alpha3(srt_lang), result['offset_seconds'], "{:.2f}".format(result['framerate_scale_factor'])) if media_type == 'series': history_log(action=5, sonarr_series_id=sonarr_series_id, sonarr_episode_id=sonarr_episode_id, description=message, video_path=path_mappings.path_replace_reverse( self.reference), language=alpha2_from_alpha3(srt_lang), subtitles_path=srt_path) else: history_log_movie( action=5, radarr_id=radarr_id, description=message, video_path=path_mappings.path_replace_reverse_movie( self.reference), language=alpha2_from_alpha3(srt_lang), subtitles_path=srt_path) else: logging.error('BAZARR unable to sync subtitles: {0}'.format( self.srtin)) return result
logging.info("Using %s again after %s, (disabled because: %s)", provider, throttle_desc, reason) del tp[provider] set_throttled_providers(str(tp)) # if forced only is enabled: # fixme: Prepared for forced only implementation to remove providers with don't support forced only subtitles # for provider in providers_list: # if provider in PROVIDERS_FORCED_OFF: # providers_list.remove(provider) if not providers_list: providers_list = None return providers_list _FFPROBE_BINARY = get_binary("ffprobe") _FFMPEG_BINARY = get_binary("ffmpeg") def get_providers_auth(): return { 'addic7ed': { 'username': settings.addic7ed.username, 'password': settings.addic7ed.password, 'cookies': settings.addic7ed.cookies, 'user_agent': settings.addic7ed.user_agent, 'is_vip': settings.addic7ed.getboolean('vip'), }, 'opensubtitles': { 'username': settings.opensubtitles.username, 'password': settings.opensubtitles.password,