def download(obj, provider, language, age, directory, encoding, single, force, hearing_impaired, min_score, max_workers, verbose, path): """Download best subtitles. PATH can be an directory containing videos, a video file path or a video file name. It can be used multiple times. If an existing subtitle is detected (external or embedded) in the correct language, the download is skipped for the associated video. """ # process parameters language = set(language) # scan videos videos = [] ignored_videos = [] errored_paths = [] with click.progressbar(path, label='Collecting videos', item_show_func=lambda p: p or '') as bar: for p in bar: logger.debug('Collecting path %s', p) # non-existing if not os.path.exists(p): try: video = Video.fromname(p) except: logger.exception('Unexpected error while collecting non-existing path %s', p) errored_paths.append(p) continue videos.append(video) continue # directories if os.path.isdir(p): try: scanned_videos = scan_videos(p, subtitles=not force, embedded_subtitles=not force, subtitles_dir=directory, age=age) except: logger.exception('Unexpected error while collecting directory path %s', p) errored_paths.append(p) continue for video in scanned_videos: if check_video(video, languages=language, age=age, undefined=single): videos.append(video) else: ignored_videos.append(video) continue # other inputs try: video = scan_video(p, subtitles=not force, embedded_subtitles=not force, subtitles_dir=directory) except: logger.exception('Unexpected error while collecting path %s', p) errored_paths.append(p) continue if check_video(video, languages=language, age=age, undefined=single): videos.append(video) else: ignored_videos.append(video) # output errored paths if verbose > 0: for p in errored_paths: click.secho('%s errored' % p, fg='red') # output ignored videos if verbose > 1: for video in ignored_videos: click.secho('%s ignored - subtitles: %s / age: %d day%s' % ( os.path.split(video.name)[1], ', '.join(str(s) for s in video.subtitle_languages) or 'none', video.age.days, 's' if video.age.days > 1 else '' ), fg='yellow') # report collected videos click.echo('%s video%s collected / %s video%s ignored / %s error%s' % ( click.style(str(len(videos)), bold=True, fg='green' if videos else None), 's' if len(videos) > 1 else '', click.style(str(len(ignored_videos)), bold=True, fg='yellow' if ignored_videos else None), 's' if len(ignored_videos) > 1 else '', click.style(str(len(errored_paths)), bold=True, fg='red' if errored_paths else None), 's' if len(errored_paths) > 1 else '', )) # exit if no video collected if not videos: return # download best subtitles downloaded_subtitles = defaultdict(list) with AsyncProviderPool(max_workers=max_workers, providers=provider, provider_configs=obj['provider_configs']) as p: with click.progressbar(videos, label='Downloading subtitles', item_show_func=lambda v: os.path.split(v.name)[1] if v is not None else '') as bar: for v in bar: scores = get_scores(v) subtitles = p.download_best_subtitles(p.list_subtitles(v, language - v.subtitle_languages), v, language, min_score=scores['hash'] * min_score / 100, hearing_impaired=hearing_impaired, only_one=single) downloaded_subtitles[v] = subtitles # save subtitles total_subtitles = 0 for v, subtitles in downloaded_subtitles.items(): saved_subtitles = save_subtitles(v, subtitles, single=single, directory=directory, encoding=encoding) total_subtitles += len(saved_subtitles) if verbose > 0: click.echo('%s subtitle%s downloaded for %s' % (click.style(str(len(saved_subtitles)), bold=True), 's' if len(saved_subtitles) > 1 else '', os.path.split(v.name)[1])) if verbose > 1: for s in saved_subtitles: matches = s.get_matches(v) score = compute_score(s, v) # score color score_color = None scores = get_scores(v) if isinstance(v, Movie): if score < scores['title']: score_color = 'red' elif score < scores['title'] + scores['year'] + scores['release_group']: score_color = 'yellow' else: score_color = 'green' elif isinstance(v, Episode): if score < scores['series'] + scores['season'] + scores['episode']: score_color = 'red' elif score < scores['series'] + scores['season'] + scores['episode'] + scores['release_group']: score_color = 'yellow' else: score_color = 'green' # scale score from 0 to 100 taking out preferences scaled_score = score if s.hearing_impaired == hearing_impaired: scaled_score -= scores['hearing_impaired'] scaled_score *= 100 / scores['hash'] # echo some nice colored output click.echo(' - [{score}] {language} subtitle from {provider_name} (match on {matches})'.format( score=click.style('{:5.1f}'.format(scaled_score), fg=score_color, bold=score >= scores['hash']), language=s.language.name if s.language.country is None else '%s (%s)' % (s.language.name, s.language.country.name), provider_name=s.provider_name, matches=', '.join(sorted(matches, key=scores.get, reverse=True)) )) if verbose == 0: click.echo('Downloaded %s subtitle%s' % (click.style(str(total_subtitles), bold=True), 's' if total_subtitles > 1 else ''))
def compute_score(matches, subtitle, video, hearing_impaired=None, score_obj=None): """Compute the score of the `subtitle` against the `video` with `hearing_impaired` preference. patch: - remove upper bounds of score - re-add matches argument and remove get_matches from here :func:`compute_score` uses the :meth:`Subtitle.get_matches <subliminal.subtitle.Subtitle.get_matches>` method and applies the scores (either from :data:`episode_scores` or :data:`movie_scores`) after some processing. :param subtitle: the subtitle to compute the score of. :type subtitle: :class:`~subliminal.subtitle.Subtitle` :param video: the video to compute the score against. :type video: :class:`~subliminal.video.Video` :param bool hearing_impaired: hearing impaired preference. :return: score of the subtitle. :rtype: int """ logger.info('%r: Computing score for video %r with %r', subtitle, video, dict(hearing_impaired=hearing_impaired)) if score_obj is not None: scores = score_obj.scores score_obj.check_custom_profiles(subtitle, matches) else: scores = get_scores(video) is_episode = isinstance(video, Episode) is_movie = isinstance(video, Movie) episode_hash_valid_if = {"series", "season", "episode", "source"} movie_hash_valid_if = {"video_codec", "source"} orig_matches = matches.copy() # on hash match, discard everything else if subtitle.hash_verifiable and 'hash' in matches: # hash is error-prone, try to fix that hash_valid_if = episode_hash_valid_if if is_episode else movie_hash_valid_if # don't validate hashes of specials, as season and episode tend to be wrong if is_movie or not video.is_special: if hash_valid_if <= set(matches): # series, season and episode matched, hash is valid logger.debug( '%r: Using valid hash, as %s are correct (%r) and (%r)', subtitle, hash_valid_if, matches, video) matches &= {'hash'} else: # no match, invalidate hash logger.debug( '%r: Ignoring hash as other matches are wrong (missing: %r) and (%r)', subtitle, hash_valid_if - matches, video) matches -= {"hash"} elif 'hash' in matches: logger.debug('%r: Hash not verifiable for this provider. Keeping it', subtitle) matches &= {'hash'} # handle equivalent matches eq_matches = set() if is_episode: _episode_checks(video, eq_matches, matches) elif is_movie and 'imdb_id' in matches: logger.debug('Adding imdb_id match equivalents') eq_matches |= {'title', 'year'} matches |= eq_matches # handle hearing impaired if hearing_impaired is not None and subtitle.hearing_impaired == hearing_impaired: logger.debug('Matched hearing_impaired') matches.add('hearing_impaired') orig_matches.add('hearing_impaired') # compute the score score = sum((scores.get(match, 0) for match in matches)) logger.info('%r: Computed score %r with final matches %r', subtitle, score, matches) score_without_hash = sum((scores.get(match, 0) for match in orig_matches | eq_matches if match != "hash")) return score, score_without_hash
def download(obj, provider, language, age, directory, encoding, single, force, hearing_impaired, min_score, max_workers, verbose, path): """Download best subtitles. PATH can be an directory containing videos, a video file path or a video file name. It can be used multiple times. If an existing subtitle is detected (external or embedded) in the correct language, the download is skipped for the associated video. """ # process parameters language = set(language) # scan videos videos = [] ignored_videos = [] errored_paths = [] with click.progressbar(path, label='Collecting videos', item_show_func=lambda p: p or '') as bar: for p in bar: logger.debug('Collecting path %s', p) # non-existing if not os.path.exists(p): try: video = Video.fromname(p) except: logger.exception( 'Unexpected error while collecting non-existing path %s', p) errored_paths.append(p) continue videos.append(video) continue # directories if os.path.isdir(p): try: scanned_videos = scan_videos(p, subtitles=not force, embedded_subtitles=not force, subtitles_dir=directory, age=age) except: logger.exception( 'Unexpected error while collecting directory path %s', p) errored_paths.append(p) continue for video in scanned_videos: if check_video(video, languages=language, age=age, undefined=single): videos.append(video) else: ignored_videos.append(video) continue # other inputs try: video = scan_video(p, subtitles=not force, embedded_subtitles=not force, subtitles_dir=directory) except: logger.exception('Unexpected error while collecting path %s', p) errored_paths.append(p) continue if check_video(video, languages=language, age=age, undefined=single): videos.append(video) else: ignored_videos.append(video) # output errored paths if verbose > 0: for p in errored_paths: click.secho('%s errored' % p, fg='red') # output ignored videos if verbose > 1: for video in ignored_videos: click.secho( '%s ignored - subtitles: %s / age: %d day%s' % (os.path.split(video.name)[1], ', '.join(str(s) for s in video.subtitle_languages) or 'none', video.age.days, 's' if video.age.days > 1 else ''), fg='yellow') # report collected videos click.echo('%s video%s collected / %s video%s ignored / %s error%s' % ( click.style( str(len(videos)), bold=True, fg='green' if videos else None), 's' if len(videos) > 1 else '', click.style(str(len(ignored_videos)), bold=True, fg='yellow' if ignored_videos else None), 's' if len(ignored_videos) > 1 else '', click.style(str(len(errored_paths)), bold=True, fg='red' if errored_paths else None), 's' if len(errored_paths) > 1 else '', )) # exit if no video collected if not videos: return # download best subtitles downloaded_subtitles = defaultdict(list) with AsyncProviderPool(max_workers=max_workers, providers=provider, provider_configs=obj['provider_configs']) as p: with click.progressbar( videos, label='Downloading subtitles', item_show_func=lambda v: os.path.split(v.name)[1] if v is not None else '') as bar: for v in bar: scores = get_scores(v) subtitles = p.download_best_subtitles( p.list_subtitles(v, language - v.subtitle_languages), v, language, min_score=scores['hash'] * min_score / 100, hearing_impaired=hearing_impaired, only_one=single) downloaded_subtitles[v] = subtitles # save subtitles total_subtitles = 0 for v, subtitles in downloaded_subtitles.items(): saved_subtitles = save_subtitles(v, subtitles, single=single, directory=directory, encoding=encoding) total_subtitles += len(saved_subtitles) if verbose > 0: click.echo( '%s subtitle%s downloaded for %s' % (click.style(str(len(saved_subtitles)), bold=True), 's' if len(saved_subtitles) > 1 else '', os.path.split(v.name)[1])) if verbose > 1: for s in saved_subtitles: matches = s.get_matches(v) score = compute_score(s, v) # score color score_color = None scores = get_scores(v) if isinstance(v, Movie): if score < scores['title']: score_color = 'red' elif score < scores['title'] + scores['year'] + scores[ 'release_group']: score_color = 'yellow' else: score_color = 'green' elif isinstance(v, Episode): if score < scores['series'] + scores['season'] + scores[ 'episode']: score_color = 'red' elif score < scores['series'] + scores['season'] + scores[ 'episode'] + scores['release_group']: score_color = 'yellow' else: score_color = 'green' # scale score from 0 to 100 taking out preferences scaled_score = score if s.hearing_impaired == hearing_impaired: scaled_score -= scores['hearing_impaired'] scaled_score *= 100 / scores['hash'] # echo some nice colored output click.echo( ' - [{score}] {language} subtitle from {provider_name} (match on {matches})' .format(score=click.style('{:5.1f}'.format(scaled_score), fg=score_color, bold=score >= scores['hash']), language=s.language.name if s.language.country is None else '%s (%s)' % (s.language.name, s.language.country.name), provider_name=s.provider_name, matches=', '.join( sorted(matches, key=scores.get, reverse=True)))) if verbose == 0: click.echo('Downloaded %s subtitle%s' % (click.style(str(total_subtitles), bold=True), 's' if total_subtitles > 1 else ''))