def subtitle_language(): """Subtitle language property. :return: :rtype: Rebulk """ rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE | re.UNICODE, abbreviations=[alt_dash]) rebulk.defaults(name='subtitle_language', validator=seps_surround) # special handling rebulk.regex(r'Legenda(?:s|do)?@PT-?BR', value=babelfish.Language('por', 'BR')) rebulk.regex(r'Legenda(?:s|do)?@PT(?!-?BR)', value=babelfish.Language('por')) rebulk.regex('Subtitulado@ESP(?:a[nñ]ol)?@Spanish', 'Subtitulado@ESP(?:a[nñ]ol)?', value=babelfish.Language('spa'), conflict_solver=lambda match, other: other if other.name == 'language' else '__default__') # undefined language rebulk.regex('Subtitles', 'Legenda(?:s|do)', 'Subbed', 'Sub(?:title)?s?@Latino', value='und', formatter=babelfish.Language, tags='subtitle.undefined') rebulk.rules(RemoveSubtitleUndefined) return rebulk
def getEmbeddedLanguages(video_path): embedded_subtitle_languages = set() try: with io.open(video_path, 'rb') as f: mkv = MKV(f) if mkv.subtitle_tracks: for st in mkv.subtitle_tracks: if st.language: try: embedded_subtitle_languages.add(babelfish.Language.fromalpha3b(st.language)) except babelfish.Error: sickrage.LOGGER.debug('Embedded subtitle track is not a valid language') embedded_subtitle_languages.add(babelfish.Language('und')) elif st.name: try: embedded_subtitle_languages.add(babelfish.Language.fromname(st.name)) except babelfish.Error: sickrage.LOGGER.debug('Embedded subtitle track is not a valid language') embedded_subtitle_languages.add(babelfish.Language('und')) else: embedded_subtitle_languages.add(babelfish.Language('und')) else: sickrage.LOGGER.debug('MKV has no subtitle track') except MalformedMKVError: sickrage.LOGGER.info('MKV seems to be malformed ( %s ), ignoring embedded subtitles' % video_path) return embedded_subtitle_languages
def scan_subtitle_languages(path): language_extensions = tuple( '.' + c for c in babelfish.language_converters['opensubtitles'].codes) dirpath, filename = os.path.split(path) subtitles = set() for p in os.listdir(dirpath): if not isinstance(p, bytes) and p.startswith( os.path.splitext(filename)[0]) and p.endswith( subliminal.SUBTITLE_EXTENSIONS): if os.path.splitext(p)[0].endswith(language_extensions) and len( os.path.splitext(p)[0].rsplit('.', 1)[1]) is 2: subtitles.add( babelfish.Language.fromopensubtitles( os.path.splitext(p)[0][-2:])) elif os.path.splitext(p)[0].endswith(language_extensions) and len( os.path.splitext(p)[0].rsplit('.', 1)[1]) is 3: subtitles.add( babelfish.Language.fromopensubtitles( os.path.splitext(p)[0][-3:])) elif os.path.splitext(p)[0].endswith('pt-BR') and len( os.path.splitext(p)[0].rsplit('.', 1)[1]) is 5: subtitles.add(babelfish.Language.fromopensubtitles('pob')) else: subtitles.add(babelfish.Language('und')) return subtitles
def download_subtitles(movie_title): # Creating subliminal.video object using movie title. video = subliminal.Video.fromname(movie_title) print("Downloading subtitles for '", movie_title, "'...", sep='') # Downloading subtitles for created video object. If several are # available, subtitles with higher rating will be chosen. All available # providers are used for searching. best_subtitles = \ subliminal.download_best_subtitles({video}, {babelfish.Language('eng')}) if not best_subtitles[video]: print("No subtitles found for '", movie_title, "'...", sep='') return [] # This line can enable saving downloaded files for further use. Default # directory is the directory, where running script is located. # Note: when the script is running in non-sudo mode on Linux, # downloaded files will be saved in user Home directory. # subliminal.save_subtitles(video, [best_subtitles[video][0]]) # Converting list of subtitles to string, so pysrt module can then convert # it to its own format. subtitles_string = '' for item in [best_subtitles[video][0]]: subtitles_string += item.text # Converting string to list of strings without any SRT-special content # (text only) and returning it. return pysrt.from_string(subtitles_string)
def download_subtitles(directory): name = 'dogpile.cache.dbm' cache_file = get_cache_file(name) region.configure('dogpile.cache.dbm', arguments={'filename': cache_file}) videos = scan_videos(directory) subtitles = download_best_subtitles(videos, {babelfish.Language('eng')}) for video in videos: save_subtitles(video, subtitles[video], single=True)
def language(): """Language property. :return: :rtype: Rebulk """ rebulk = Rebulk().regex_defaults(flags=re.IGNORECASE, abbreviations=[dash]) rebulk.defaults(name='language', validator=seps_surround) rebulk.regex('SPANISH-?AUDIO', r'(?:Espa[.]ol-)?castellano', value=babelfish.Language('spa')) rebulk.regex('german-dubbed', 'dubbed-german', value=babelfish.Language('deu')) rebulk.regex('english-dubbed', value=babelfish.Language('eng')) rebulk.regex('dublado', value='und', formatter=babelfish.Language) return rebulk
def downloadSubtitle_OpenSubtitles_heb(videoname, folder): video = subliminal.Video.fromname(videoname) best_subtitles = subliminal.download_best_subtitles( [video], {babelfish.Language('heb')}, providers=None) best_subtitle = best_subtitles[video][0] file_name = folder + "/" + videoname + ".srt" subtitle = best_subtitle.content with open(file_name, 'wb') as f: f.write(subtitle)
def download(self, imdb_id): subtitles = self.provider.query(set([babelfish.Language('eng')]), imdb_id=imdb_id) # import pdb; pdb.set_trace() for subtitle in subtitles: try: return self.provider.download_subtitle(subtitle) except Exception as e: print e continue
def __init__(self, video_obj, lang=babelfish.Language("eng")): """ Class constructor which receives as input a video object of a movie or TV series episode and the language of the video :param video_obj: video object that contains a movie's or TV series episode's details :param lang: the language of the video as babelfish object :return: None """ self._video_obj = video_obj self._lang = lang
def get_path(alpha3t_code): language = babelfish.Language.fromalpha3t(alpha3t_code) output_path = output_directory directory = 'values-' + language.alpha2 if language == babelfish.Language('eng'): directory = 'values' path = output_path + os.path.sep + directory + os.path.sep if not os.path.exists(path): os.makedirs(path) return path + 'strings.xml'
def download_best_subtitles(videos, languages, providers=None, provider_configs=None, min_score=0, hearing_impaired=None, single=False): """Download the best subtitles for `videos` with the given `languages` using the specified `providers` :param videos: videos to download subtitles for :type videos: set of :class:`~subliminal.video.Video` :param languages: languages of subtitles to download :type languages: set of :class:`babelfish.Language` :param providers: providers to use for the search, if not all :type providers: list of string or None :param provider_configs: configuration for providers :type provider_configs: dict of provider name => provider constructor kwargs or None :param int min_score: minimum score for subtitles to download :param bool hearing_impaired: download hearing impaired subtitles => None ignores the flag :param bool single: do not download for videos with an undetermined subtitle language detected """ downloaded_subtitles = collections.defaultdict(list) with ProviderPool(providers, provider_configs) as pp: for video in videos: # filter if single and babelfish.Language('und') in video.subtitle_languages: logger.debug('Skipping video %r: undetermined language found') continue # list logger.info('Listing subtitles for %r', video) video_subtitles = pp.list_subtitles(video, languages) logger.info('Found %d subtitles total', len(video_subtitles)) # download downloaded_languages = set() for subtitle, score in sorted([(s, s.compute_score(video)) for s in video_subtitles], key=operator.itemgetter(1), reverse=True): if score < min_score: logger.info('No subtitle with score >= %d', min_score) break # Ignore hearing_impaired check if None if hearing_impaired is not None: # Check if hearing_impaired flag matches and skip subtitle if not if subtitle.hearing_impaired != hearing_impaired: logger.debug('Skipping subtitle: hearing impaired != %r', hearing_impaired) continue if subtitle.language in downloaded_languages: logger.debug('Skipping subtitle: %r already downloaded', subtitle.language) continue logger.info('Downloading subtitle %r with score %d', subtitle, score) if pp.download_subtitle(subtitle): downloaded_languages.add(subtitle.language) downloaded_subtitles[video].append(subtitle) if single or downloaded_languages == languages: logger.debug('All languages downloaded') break return downloaded_subtitles
def download_subtitles(directory): if not directory: directory = os.getcwd() logger.info('Downloading subtitles for videos in {}'.format(directory)) backend = 'dogpile.cache.dbm' cache_file = u.get_cache_file('subliminal.cache') region.configure(backend, arguments={'filename': cache_file}) videos = scan_videos(directory) subtitles = download_best_subtitles(videos, {babelfish.Language('eng')}) for video in videos: save_subtitles(video, subtitles[video], single=True)
def download_sub(self, lang='eng'): prov_conf = {'opensubtitles': {'username': '******', 'password': '******'}} logging.info("{}: Downloading subtitles...".format(self.filename)) vid = scan_video(self.path) best_subs = download_best_subtitles({vid}, {babelfish.Language(lang)}, only_one=True, provider_configs=prov_conf) if best_subs[vid]: sub = best_subs[vid][0] save_subtitles(vid, [sub], single=True) logging.info("{}: Subtitles successfully downloaded.".format(self.filename)) else: logging.error("{}: No subtitles found online.".format(self.filename))
def execute(args): """ Executes SubLime with given arguments. """ videos = [] selected_languages = [ babelfish.Language(selected_lang) for selected_lang in args.selected_languages ] # List of filenames directly given by user if args.video_files: videos = [ VideoFactory.make_from_filename(video_filename) for video_filename in args.video_files ] # Or list of filenames by walking through directories elif args.directories: for movie_dir in args.directories: for root, _, files in os.walk(movie_dir): for name in files: video_filename = os.path.join(root, name) video = VideoFactory.make_from_filename(video_filename) if video: videos.append(video) # Informs user that there is already existing subtitles for video in videos: for selected_lang in selected_languages: if video.has_subtitle(selected_lang): video_type = video.__class__.__name__ video_name = os.path.basename(video.filename) if not args.force: LOG.warning( "{} named {} already has a subtitle " "for {} and nothing will happen for it! " "Use option '-f --force' to replace.".format( video_type, video_name, selected_lang.name)) else: LOG.info( 'Replacing {} subtitle for {} named {}.'.format( selected_lang.name, video_type, video_name)) video.languages_to_download.append(selected_lang) else: video.languages_to_download.append(selected_lang) # Search subtitles for videos for sub_server in SubtitleProvider.get_providers(): sub_server.connect() sub_server.download_subtitles( videos, selected_languages, args.rename, args.rename_pattern, args.underscore) sub_server.disconnect()
def execute(self, props, pv_props, context): """Language detection using name.""" if 'language' in props: return if 'name' in props: name = props.get('name', '') match = self.name_re.match(name) if match: try: return babelfish.Language.fromname(match.group('name')) except babelfish.Error: pass logger.info('Invalid %s: %r', self.description, name) return babelfish.Language('und')
def subtitlesLanguages(video_path): """Return a list detected subtitles for the given video file""" resultList = [] should_save_subtitles = None embedded_subtitle_languages = set() if not sickrage.EMBEDDED_SUBTITLES_ALL and video_path.endswith('.mkv'): embedded_subtitle_languages = getEmbeddedLanguages(video_path.encode(sickrage.SYS_ENCODING)) # Search subtitles with the absolute path if os.path.isabs(sickrage.SUBTITLES_DIR): video_path = os.path.join(sickrage.SUBTITLES_DIR, os.path.basename(video_path)) # Search subtitles with the relative path elif sickrage.SUBTITLES_DIR: check_subtitles_path = os.path.join(os.path.dirname(video_path), sickrage.SUBTITLES_DIR) if not os.path.exists(check_subtitles_path): getSubtitlesPath(video_path) video_path = os.path.join(os.path.dirname(video_path), sickrage.SUBTITLES_DIR, os.path.basename(video_path)) else: video_path = os.path.join(os.path.dirname(video_path), os.path.basename(video_path)) if not sickrage.EMBEDDED_SUBTITLES_ALL and video_path.endswith('.mkv'): external_subtitle_languages = scan_subtitle_languages(video_path) subtitle_languages = external_subtitle_languages.union(embedded_subtitle_languages) if not sickrage.SUBTITLES_MULTI: currentWantedLanguages = wantedLanguages() if len(currentWantedLanguages) == 1 and babelfish.Language('und') in external_subtitle_languages: if embedded_subtitle_languages not in currentWantedLanguages and babelfish.Language( 'und') in embedded_subtitle_languages: subtitle_languages.add(fromietf(currentWantedLanguages[0])) should_save_subtitles = True elif embedded_subtitle_languages not in currentWantedLanguages and babelfish.Language( 'und') not in embedded_subtitle_languages: subtitle_languages.remove(babelfish.Language('und')) subtitle_languages.add(fromietf(currentWantedLanguages[0])) should_save_subtitles = True else: subtitle_languages = scan_subtitle_languages(video_path) if not sickrage.SUBTITLES_MULTI: if len(wantedLanguages()) == 1 and babelfish.Language('und') in subtitle_languages: subtitle_languages.remove(babelfish.Language('und')) subtitle_languages.add(fromietf(wantedLanguages()[0])) should_save_subtitles = True for language in subtitle_languages: if hasattr(language, 'opensubtitles') and language.opensubtitles: resultList.append(language.opensubtitles) elif hasattr(language, 'alpha3b') and language.alpha3b: resultList.append(language.alpha3b) elif hasattr(language, 'alpha3t') and language.alpha3t: resultList.append(language.alpha3t) elif hasattr(language, 'alpha2') and language.alpha2: resultList.append(language.alpha2) return sorted(resultList), should_save_subtitles
def handle(self, value, context: typing.MutableMapping): """Handle languages.""" try: if len(value) == 3: return babelfish.Language.fromalpha3b(value) return babelfish.Language.fromietf(value) except (babelfish.Error, ValueError): pass try: return babelfish.Language.fromname(value) except babelfish.Error: pass self.report(value, context) return babelfish.Language('und')
def scan_subtitle_languages(path): """Search for subtitles with alpha2 extension from a video `path` and return their language :param string path: path to the video :return: found subtitle languages :rtype: set """ language_extensions = tuple('.' + c for c in babelfish.language_converters['alpha2'].codes) dirpath, filename = os.path.split(path) subtitles = set() for p in os.listdir(dirpath): if not isinstance(p, bytes) and p.startswith(os.path.splitext(filename)[0]) and p.endswith(SUBTITLE_EXTENSIONS): if os.path.splitext(p)[0].endswith(language_extensions): subtitles.add(babelfish.Language.fromalpha2(os.path.splitext(p)[0][-2:])) else: subtitles.add(babelfish.Language('und')) logger.debug('Found subtitles %r', subtitles) return subtitles
def as_language(l): '''Converts the language string into a :py:class:`babelfish.Language` object This method tries a conversion using the following techniques: 1. Tries the IETF 3-letter standard 2. Tries an ISO 639 3-letter B standard 3. Finally, if everything else fails, tries a simple call to the base class with the input string. An exception (from :py:mod:`babelfish`) is raised in case of problems. Parameters: l (str): An ISO 639 3-letter string for the language to convert. This method also accepts 2-letter or 2+2-letter identifiers Returns: babelfish.Language: A language object with the normalize language definition Raises: ValueError: If it cannot convert the language ''' try: return babelfish.Language.fromietf(l) except Exception: pass try: return babelfish.Language.fromalpha3b(l) except Exception: return babelfish.Language(l)
def setUp(self): self.languages = ['eng', 'fra'] self.babel_languages = [ babelfish.Language(code) for code in self.languages ] self.mock_hashcode = lambda filepath: "8fcf0167e19c41be" self.video_filename = os.path.join(get_exe_dir(), 'Tests', 'Fixtures', 'movie.avi') self.video2_filename = os.path.join(get_exe_dir(), 'Tests', 'Fixtures', 'small.mp4') self.expected_french_subtitle_filename = \ os.path.join(get_exe_dir(), 'Tests', 'Fixtures', 'movie.fr.srt') self.expected_english_subtitle_filename = \ os.path.join(get_exe_dir(), 'Tests', 'Fixtures', 'movie.en.srt') self.expected_renamed_video_filename = os.path.join( get_exe_dir(), 'Tests', 'Fixtures', 'Louie_S01E01_Pilot.avi') self.expected_renamed_french_subtitle_filename = os.path.join( get_exe_dir(), 'Tests', 'Fixtures', 'Louie_S01E01_Pilot.fr.srt') self.expected_renamed_english_subtitle_filename = os.path.join( get_exe_dir(), 'Tests', 'Fixtures', 'Louie_S01E01_Pilot.en.srt')
def guess_video_metadata(filename): """Gets the video metadata properties out of a given file. The file needs to exist on the filesystem to be able to be analyzed. An empty guess is returned otherwise. You need to have the Enzyme python package installed for this to work.""" result = Guess() def found(prop, value): result[prop] = value log.debug('Found with enzyme %s: %s' % (prop, value)) # first get the size of the file, in bytes try: size = os.stat(filename).st_size found('fileSize', size) except Exception as e: log.error('Cannot get video file size: %s' % e) # file probably does not exist, we might as well return now return result # then get additional metadata from the file using enzyme, if available try: import enzyme with open(filename) as f: mkv = enzyme.MKV(f) found('duration', mkv.info.duration.total_seconds()) if mkv.video_tracks: video_track = mkv.video_tracks[0] # resolution if video_track.height in (480, 720, 1080): if video_track.interlaced: found('screenSize', '%di' % video_track.height) else: found('screenSize', '%dp' % video_track.height) else: # TODO: do we want this? #found('screenSize', '%dx%d' % (video_track.width, video_track.height)) pass # video codec if video_track.codec_id == 'V_MPEG4/ISO/AVC': found('videoCodec', 'h264') elif video_track.codec_id == 'V_MPEG4/ISO/SP': found('videoCodec', 'DivX') elif video_track.codec_id == 'V_MPEG4/ISO/ASP': found('videoCodec', 'XviD') else: log.warning('MKV has no video track') if mkv.audio_tracks: audio_track = mkv.audio_tracks[0] # audio codec if audio_track.codec_id == 'A_AC3': found('audioCodec', 'AC3') elif audio_track.codec_id == 'A_DTS': found('audioCodec', 'DTS') elif audio_track.codec_id == 'A_AAC': found('audioCodec', 'AAC') else: log.warning('MKV has no audio track') if mkv.subtitle_tracks: embedded_subtitle_languages = set() for st in mkv.subtitle_tracks: try: if st.language: lang = babelfish.Language.fromalpha3b(st.language) elif st.name: lang = babelfish.Language.fromname(st.name) else: lang = babelfish.Language('und') except babelfish.Error: lang = babelfish.Language('und') embedded_subtitle_languages.add(lang) found('subtitleLanguage', embedded_subtitle_languages) else: log.debug('MKV has no subtitle track') return result except ImportError: log.error('Cannot get video file metadata, missing dependency: enzyme') log.error( 'Please install it from PyPI, by doing eg: pip install enzyme') return result except IOError as e: log.error('Could not open file: %s' % filename) log.error( 'Make sure it exists and is available for reading on the filesystem' ) log.error('Error: %s' % e) return result except enzyme.Error as e: log.error('Cannot guess video file metadata') log.error('enzyme.Error while reading file: %s' % filename) log.error('Error: %s' % e) return result
def convert(self, alpha3, country=None, script=None): return str(babelfish.Language(alpha3, country, script))
rebulk.string(*lang_suffixes, name="language.suffix", ignore_case=True, private=True, validator=seps_surround, tags=['format-suffix']) rebulk.functional(find_languages, properties={'language': [None]}) rebulk.rules(SubtitlePrefixLanguageRule, SubtitleSuffixLanguageRule, SubtitleExtensionRule) return rebulk COMMON_WORDS_STRICT = frozenset(['brazil']) UNDETERMINED = babelfish.Language('und') SYN = { ('ell', None): ['gr', 'greek'], ('spa', None): ['esp', 'español', 'espanol'], ('fra', None): ['français', 'vf', 'vff', 'vfi', 'vfq'], ('swe', None): ['se'], ('por', 'BR'): ['po', 'pb', 'pob', 'ptbr', 'br', 'brazilian'], ('cat', None): ['català', 'castellano', 'espanol castellano', 'español castellano'], ('ces', None): ['cz'], ('ukr', None): ['ua'], ('zho', None): ['cn'], ('jpn', None): ['jp'], ('hrv', None): ['scr'], ('mul', None): ['multi', 'dl']
def get_default_transcripts(self, **kwargs): """ Fetch transcripts list from Wistia API. Urls of transcripts are to be fetched later on with separate API calls. References: https://wistia.com/doc/data-api#captions_index https://wistia.com/doc/data-api#captions_show Arguments: kwargs (dict): Key-value pairs with video_id, fetched from video xblock, and token, fetched from Wistia API. Returns: list: List of dicts of transcripts. Example: [ { 'lang': 'en', 'label': 'English', 'url': 'default_url_to_be_replaced', 'source': 'default' }, # ... ] """ video_id = kwargs.get('video_id') token = kwargs.get('token') url = self.captions_api['url'].format(token=token, media_id=video_id) message = _('Success.') self.default_transcripts = [] # Fetch available transcripts' languages (codes and English labels), and assign its' urls. try: # get all languages caps data: response = requests.get('https://{}'.format(url)) except IOError as exc: # Probably, current API has changed message = _( 'No timed transcript may be fetched from a video platform.\nError details: {}' ).format(exc) log.exception("Transcripts INDEX request failure.") return self.default_transcripts, message # If a video does not exist, the response will be an empty HTTP 404 Not Found. # Reference: https://wistia.com/doc/data-api#captions_index if response.status_code == http_client.NOT_FOUND: message = _("Wistia video {} doesn't exist.").format(video_id) return self.default_transcripts, message # Fetch other failure cases: if not response.ok: message = _("Invalid request.") return self.default_transcripts, message try: wistia_data = response.json() except ValueError: wistia_data = '' # No transcripts case, see: wistia.com/doc/data-api#captions_index if not wistia_data: message = _( "For now, video platform doesn't have any timed transcript for this video." ) return self.default_transcripts, message transcripts_data = [[el.get('language'), el.get('english_name')] for el in wistia_data] # Populate default_transcripts for lang_code, lang_label in transcripts_data: download_url = self.captions_api['download_url'].format( media_id=video_id, lang_code=lang_code, token=token) # Wistia's API uses ISO-639-2, so "lang_code" is a 3-character code, e.g. "eng". # Reference: https://wistia.com/doc/data-api#captions_show # Convert from ISO-639-2 to ISO-639-1; reference: https://pythonhosted.org/babelfish/ try: lang_code = babelfish.Language(lang_code).alpha2 except ValueError: # In case of B or T codes, e.g. 'fre'. # Reference: https://en.wikipedia.org/wiki/List_of_ISO_639-2_codes lang_code = babelfish.Language.fromalpha3b(lang_code).alpha2 # pylint: disable=no-member lang_label = self.get_transcript_language_parameters(lang_code)[1] self.default_transcripts.append({ 'lang': lang_code, 'label': lang_label, 'url': download_url, 'source': TranscriptSource.DEFAULT, }) return self.default_transcripts, message
class PodnapisiProvider(Provider): languages = set([babelfish.Language('por', 'BR')]) | set([ babelfish.Language(l) for l in [ 'ara', 'aze', 'ben', 'bos', 'bul', 'cat', 'ces', 'dan', 'deu', 'ell', 'eng', 'eus', 'fas', 'fin', 'fra', 'glg', 'heb', 'hrv', 'hun', 'hye', 'ind', 'ita', 'jpn', 'kor', 'mkd', 'msa', 'nld', 'nor', 'pol', 'por', 'ron', 'rus', 'slk', 'slv', 'spa', 'sqi', 'srp', 'swe', 'tha', 'tur', 'ukr', 'vie', 'zho' ] ]) video_types = (Episode, Movie) server = 'http://simple.podnapisi.net' pre_link_re = re.compile( '^.*(?P<link>/ppodnapisi/predownload/i/\d+/k/.*$)') link_re = re.compile( '^.*(?P<link>/[a-zA-Z]{2}/ppodnapisi/download/i/\d+/k/.*$)') headers = {} def initialize(self): self.session = requests.Session() self.headers = { 'User-Agent': self.random_user_agent, 'Referer': '%s/subtitles/search/advanced' % self.server } def terminate(self): self.session.close() def get(self, url, params=None, headers=None, is_xml=False): """Make a GET request on `url` with the given parameters :param string url: part of the URL to reach with the leading slash :param dict params: params of the request :param dict headers: headers of the request :param bool xml: whether the response content is XML or not :return: the response :rtype: :class:`xml.etree.ElementTree.Element` or :class:`bs4.BeautifulSoup` :raise: :class:`~subliminal.exceptions.ProviderNotAvailable` """ prefix_url = '' url_result = URL_RE.search(url) if url_result and url_result.group(2) is None: prefix_url = self.server # Update url url = '%s%s' % (prefix_url, url) # Handle Headers self.session.headers = self.headers # Apply over-ride if headers: self.session.headers.update(headers) self.last_url = None try: r = self.session.get( url, params=params, headers=headers, timeout=10, ) # store last url self.last_url = r.url except requests.Timeout: raise ProviderNotAvailable('Timeout after 10 seconds') if r.status_code != 200: raise ProviderNotAvailable('Request failed with status code %d' % r.status_code) if is_xml: return xml.etree.ElementTree.fromstring(r.content) else: return bs4.BeautifulSoup(r.content, ['permissive']) def query(self, language, series=None, season=None, episode=None, title=None, year=None): """ Preforms a query for a show on Podnapisi.net """ # Track page count (for multipage fetches page = 1 # parameter listing params = {'language': language.alpha2, 'page': str(page)} if series and season and episode: params['keywords'] = sanitize_string(series, strip_date=True) params['seasons'] = season params['episodes'] = episode if not year: year = extract_title_year(series) if year: params['year'] = year elif title: params['keywords'] = sanitize_string(title) if year: params['year'] = year else: raise ValueError( 'Missing parameters series and season and episode or title') logger.debug('Searching series %r', params) subtitles = [] # Initial Fetch preload = self.get( '/subtitles/search/advanced', params=params, ) preload_url = self.last_url # Fetch tracking details verify = self.get( '/forum/app.php/track', params=dict([('path', quote('/subtitles/search/advanced', ''))] + \ params.items()), headers={ 'Referer': preload_url, }, ) # Reload page soup = self.get( '/subtitles/search/advanced', params=params, headers={ 'Referer': preload_url, }, ) # Get page information pages = soup.find('div', class_='panel-body') pages = pages.find('ul', class_='pagination') if pages: bullets = pages('li') pages = int(bullets[-2][0].a.string) else: pages = 1 logger.debug('Podnapisi page matches: %r' % pages) while page < 10: # Set a hard cap on page count to 10, there is really # no reason to turn up more content then that for row in soup('tr', class_='subtitle-entry'): cells = row('td') # common error checking on matched results if not cells: continue if len(cells) < 1: continue # Acquire flags flags = [] flag_entries = cells[0].find_all('i') for entry in flag_entries: try: if entry['data-toggle'] != 'tooltip': continue except KeyError: continue try: flags += [ e.lower() for e in entry['class'] if e != 'flag' ] except KeyError: continue # convert list flags = set(flags) # Get Hearing Impared Flag hearing_impaired = ('text-cc' in flags) # Get Link link = cells[0].find('a', rel='nofollow')['href'] # Get ID id = link[11:-9] # Get releases (if defined) releases = cells[0].find('span', class_='release') if not releases: # Fall back to general name releases = [ str(cells[0].find('a', href=link[:-9]).string.strip()), ] # Store Title elif 'title' in releases: releases = [ str(releases['title'].string.strip()), ] else: # store name try: releases = [ str(releases.string.strip()), ] except UnicodeError: releases = [ releases.string\ .decode(detect( releases.string, language.alpha2)['encoding'], 'replace', ), ] # attempt to match against multi listings (if they exist) multi_release = cells[0].find_all('div', class_='release') if len(multi_release): for r in multi_release: releases.append(r.get_text()) if isinstance(releases, basestring): releases = [ releases, ] # Simplify list by making it unique releases = list(set(releases)) if series and season and episode: try: subtitles.append( PodnapisiSubtitle( language, id, releases, hearing_impaired, link, series=series, season=season, episode=episode, )) except AttributeError: # there simply wasn't enough information in the TV Show # gracefully handle this instead of crashing :) continue elif title: try: subtitles.append( PodnapisiSubtitle( language, id, releases, hearing_impaired, link, title=title, year=year, )) except AttributeError: # there simply wasn't enough information in the movie # gracefully handle this instead of crashing :) continue pass # Handle multiple pages page += 1 if page > pages: # We're done break # Store new page params['page'] = str(page) soup = self.get('/subtitles/search/advanced', params) return subtitles def list_subtitles(self, video, languages): if isinstance(video, Episode): return [s for l in languages \ for s in self.query(l, series=video.series, season=video.season, episode=video.episode)] elif isinstance(video, Movie): return [s for l in languages \ for s in self.query(l, title=video.title, year=video.year)] def download_subtitle(self, subtitle): try: r = self.session.get(self.server + subtitle.link, timeout=10) logger.debug('Download URL: %s' % (self.server + subtitle.link)) except requests.Timeout: raise ProviderNotAvailable('Timeout after 10 seconds') if r.status_code != 200: raise ProviderNotAvailable('Request failed with status code %d' % r.status_code) with contextlib.closing(zipfile.ZipFile(io.BytesIO(r.content))) as zf: if len(zf.namelist()) > 1: raise ProviderError('More than one file to unzip') subtitle_bytes = zf.read(zf.namelist()[0]) subtitle_text = subtitle_bytes.decode( detect(subtitle_bytes, subtitle.language.alpha2)['encoding'], 'replace') if not is_valid_subtitle(subtitle_text): raise InvalidSubtitle return subtitle_text
class Addic7edProvider(Provider): languages = {babelfish.Language('por', 'BR')} | {babelfish.Language(l) for l in ['ara', 'aze', 'ben', 'bos', 'bul', 'cat', 'ces', 'dan', 'deu', 'ell', 'eng', 'eus', 'fas', 'fin', 'fra', 'glg', 'heb', 'hrv', 'hun', 'hye', 'ind', 'ita', 'jpn', 'kor', 'mkd', 'msa', 'nld', 'nor', 'pol', 'por', 'ron', 'rus', 'slk', 'slv', 'spa', 'sqi', 'srp', 'swe', 'tha', 'tur', 'ukr', 'vie', 'zho']} video_types = (Episode,) server = 'http://www.addic7ed.com' def __init__(self, username=None, password=None): if username is not None and password is None or username is None and password is not None: raise ConfigurationError('Username and password must be specified') self.username = username self.password = password self.logged_in = False def initialize(self): self.session = requests.Session() self.session.headers = {'User-Agent': 'Subliminal/%s' % __version__.split('-')[0]} # login if self.username is not None and self.password is not None: logger.debug('Logging in') data = {'username': self.username, 'password': self.password, 'Submit': 'Log in'} r = self.session.post(self.server + '/dologin.php', data, timeout=10, allow_redirects=False) if r.status_code == 302: logger.info('Logged in') self.logged_in = True else: raise AuthenticationError(self.username) def terminate(self): # logout if self.logged_in: r = self.session.get(self.server + '/logout.php', timeout=10) logger.info('Logged out') if r.status_code != 200: raise ProviderError('Request failed with status code %d' % r.status_code) self.session.close() def get(self, url, params=None): """Make a GET request on `url` with the given parameters :param string url: part of the URL to reach with the leading slash :param params: params of the request :return: the response :rtype: :class:`bs4.BeautifulSoup` """ r = self.session.get(self.server + url, params=params, timeout=10) if r.status_code != 200: raise ProviderError('Request failed with status code %d' % r.status_code) return bs4.BeautifulSoup(r.content, ['permissive']) #@region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME) def get_show_ids(self): """Load the shows page with default series to show ids mapping :return: series to show ids :rtype: dict """ soup = self.get('/shows.php') show_ids = {} for html_show in soup.select('td.version > h3 > a[href^="/show/"]'): show_ids[html_show.string.lower()] = int(html_show['href'][6:]) return show_ids #@region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME) def find_show_id(self, series, year=None): """Find the show id from the `series` with optional `year` Use this only if the show id cannot be found with :meth:`get_show_ids` :param string series: series of the episode in lowercase :param year: year of the series, if any :type year: int or None :return: the show id, if any :rtype: int or None """ series_year = series if year is not None: series_year += ' (%d)' % year params = {'search': series_year, 'Submit': 'Search'} logger.debug('Searching series %r', params) suggested_shows = self.get('/search.php', params).select('span.titulo > a[href^="/show/"]') if not suggested_shows: logger.info('Series %r not found', series_year) return None return int(suggested_shows[0]['href'][6:]) def query(self, series, season, year=None): show_ids = self.get_show_ids() show_id = None if year is not None: # search with the year series_year = '%s (%d)' % (series.lower(), year) if series_year in show_ids: show_id = show_ids[series_year] else: show_id = self.find_show_id(series.lower(), year) if show_id is None: # search without the year year = None if series.lower() in show_ids: show_id = show_ids[series.lower()] else: show_id = self.find_show_id(series.lower()) if show_id is None: return [] params = {'show_id': show_id, 'season': season} logger.debug('Searching subtitles %r', params) link = '/show/{show_id}&season={season}'.format(**params) soup = self.get(link) subtitles = [] for row in soup('tr', class_='epeven completed'): cells = row('td') if cells[5].string != 'Completed': continue if not cells[3].string: continue subtitles.append(Addic7edSubtitle(babelfish.Language.fromaddic7ed(cells[3].string), series, season, int(cells[1].string), cells[2].string, year, cells[4].string, bool(cells[6].string), cells[9].a['href'], self.server + cells[2].a['href'])) return subtitles def list_subtitles(self, video, languages): return [s for s in self.query(video.series, video.season, video.year) if s.language in languages and s.episode == video.episode] def download_subtitle(self, subtitle): r = self.session.get(self.server + subtitle.download_link, timeout=10, headers={'Referer': subtitle.page_link}) if r.status_code != 200: raise ProviderError('Request failed with status code %d' % r.status_code) if r.headers['Content-Type'] == 'text/html': raise DownloadLimitExceeded subtitle.content = fix_line_endings(r.content)
rebulk.functional( find_languages, properties={'language': [None]}, disabled=lambda context: not context.get('allowed_languages')) rebulk.rules(SubtitleExtensionRule, SubtitlePrefixLanguageRule, SubtitleSuffixLanguageRule, RemoveLanguage, RemoveInvalidLanguages(common_words), RemoveUndeterminedLanguages) babelfish.language_converters['guessit'] = GuessitConverter( config['synonyms']) return rebulk UNDETERMINED = babelfish.Language('und') MULTIPLE = babelfish.Language('mul') NON_SPECIFIC_LANGUAGES = frozenset([UNDETERMINED, MULTIPLE]) class GuessitConverter(babelfish.LanguageReverseConverter): # pylint: disable=missing-docstring _with_country_regexp = re.compile(r'(.*)\((.*)\)') _with_country_regexp2 = re.compile(r'(.*)-(.*)') def __init__(self, synonyms): self.guessit_exceptions = {} for code, synlist in synonyms.items(): if '_' in code: (alpha3, country) = code.split('_') else: (alpha3, country) = (code, None)
def rescan_dir(): ''' Sync the sqlite DB with the contents of the "movies" directory. ''' db_movies = [m.title for m in Movie.select(Movie.title)] print "Rescanning movie directory..." # Add all new movies to DB for item in os.listdir(movies_dir): abs_path = os.path.join(movies_dir, item) path = item if os.path.isfile(abs_path): continue if item in db_movies: ''' # Check if file extension is still the same video = [f for f in os.listdir(abs_path) if f.endswith(MOVIE_EXT)][0] m = Movie.select().where(Movie.title == item).get() if m.video != video: print "New file extension for %s" % item ''' db_movies.remove(item) continue # Create new movie files = os.listdir(abs_path) subtitles = [f for f in files if f.endswith(SUBTITLE_EXT)] video = [f for f in files if f.endswith(MOVIE_EXT)] if not video: print "No movie file found for '%s'. Please add and rescan!" % item continue else: video = os.path.join(path, video[0]) if not subtitles: import subliminal as sb import babelfish print "Downloading subtitle for '%s'." % item v = sb.Video.fromname(_full_path(video)) titles = sb.download_best_subtitles( [v], {babelfish.Language(SUB_LANGUAGE)}) sb.save_subtitles(v, titles[v]) files = os.listdir(abs_path) subtitles = [f for f in files if f.endswith(SUBTITLE_EXT)] if not subtitles: print "Subtitles could not be loaded. Add manually and rescan!" continue subtitles = os.path.join(path, subtitles[0]) m = Movie.create(title=item, subtitles=subtitles, video=video) m.save() # Remove gone movies from DB for movie in db_movies: print "Removing '%s'." % movie m = Movie.get(Movie.title == movie) m.delete_instance(recursive=True)
def scan_video(path, subtitles=True, embedded_subtitles=True, original=None): """Scan a video and its subtitle languages from a video `path` :param string path: absolute path to the video :param bool subtitles: scan for subtitles with the same name :param bool embedded_subtitles: scan for embedded subtitles :return: the scanned video :rtype: :class:`Video` :raise: ValueError if cannot guess enough information from the path """ if not original: original = path dirpath, filename = os.path.split(path) logger.info('Scanning video %r in %r', filename, dirpath) video = Video.fromguess(path, guessit.guess_file_info(original, 'autodetect')) video.size = os.path.getsize(path) if video.size > 10485760: logger.debug('Size is %d', video.size) video.hashes['opensubtitles'] = hash_opensubtitles(path) video.hashes['thesubdb'] = hash_thesubdb(path) logger.debug('Computed hashes %r', video.hashes) else: logger.warning('Size is lower than 10MB: hashes not computed') if subtitles: video.subtitle_languages |= scan_subtitle_languages(path) # enzyme try: if filename.endswith('.mkv'): with open(path, 'rb') as f: mkv = enzyme.MKV(f) if mkv.video_tracks: video_track = mkv.video_tracks[0] # resolution if video_track.height in (480, 720, 1080): if video_track.interlaced: video.resolution = '%di' % video_track.height logger.debug('Found resolution %s with enzyme', video.resolution) else: video.resolution = '%dp' % video_track.height logger.debug('Found resolution %s with enzyme', video.resolution) # video codec if video_track.codec_id == 'V_MPEG4/ISO/AVC': video.video_codec = 'h264' logger.debug('Found video_codec %s with enzyme', video.video_codec) elif video_track.codec_id == 'V_MPEG4/ISO/SP': video.video_codec = 'DivX' logger.debug('Found video_codec %s with enzyme', video.video_codec) elif video_track.codec_id == 'V_MPEG4/ISO/ASP': video.video_codec = 'XviD' logger.debug('Found video_codec %s with enzyme', video.video_codec) else: logger.warning('MKV has no video track') if mkv.audio_tracks: audio_track = mkv.audio_tracks[0] # audio codec if audio_track.codec_id == 'A_AC3': video.audio_codec = 'AC3' logger.debug('Found audio_codec %s with enzyme', video.audio_codec) elif audio_track.codec_id == 'A_DTS': video.audio_codec = 'DTS' logger.debug('Found audio_codec %s with enzyme', video.audio_codec) elif audio_track.codec_id == 'A_AAC': video.audio_codec = 'AAC' logger.debug('Found audio_codec %s with enzyme', video.audio_codec) else: logger.warning('MKV has no audio track') if mkv.subtitle_tracks: # embedded subtitles if embedded_subtitles: embedded_subtitle_languages = set() for st in mkv.subtitle_tracks: if st.language: try: embedded_subtitle_languages.add( babelfish.Language.fromalpha3b( st.language)) except babelfish.Error: logger.error( 'Embedded subtitle track language %r is not a valid language', st.language) embedded_subtitle_languages.add( babelfish.Language('und')) elif st.name: try: embedded_subtitle_languages.add( babelfish.Language.fromname(st.name)) except babelfish.Error: logger.debug( 'Embedded subtitle track name %r is not a valid language', st.name) embedded_subtitle_languages.add( babelfish.Language('und')) else: embedded_subtitle_languages.add( babelfish.Language('und')) logger.debug('Found embedded subtitle %r with enzyme', embedded_subtitle_languages) video.subtitle_languages |= embedded_subtitle_languages else: logger.debug('MKV has no subtitle track') except enzyme.Error: logger.exception('Parsing video metadata with enzyme failed') return video
def get_best_subtitles_links(videos, languages, providers=None, provider_configs=None, min_score=0, hearing_impaired=False, single=False): """Get the links of the best subtitles for `videos` with the given `languages` using the specified `providers` :param videos: videos to get subtitles links :type videos: set of :class:`~subliminal.video.Video` :param languages: languages of subtitles to get links :type languages: set of :class:`babelfish.Language` :param providers: providers to use for the search, if not all :type providers: list of string or None :param provider_configs: configuration for providers :type provider_configs: dict of provider name => provider constructor kwargs or None :param int min_score: minimum score for subtitles to get links :param bool hearing_impaired: get links for hearing impaired subtitles :param bool single: do not get for videos with an undetermined subtitle language detected """ best_subtitles_links = collections.defaultdict(list) with ProviderPool(providers, provider_configs) as pp: for video in videos: # filter if single and babelfish.Language( 'und') in video.subtitle_languages: logger.debug('Skipping video %r: undetermined language found') continue # list logger.info('Listing subtitles for %r', video) video_subtitles = pp.list_subtitles(video, languages) logger.info('Found %d subtitles total', len(video_subtitles)) # get links done_languages = set() for subtitle, score in sorted([(s, s.compute_score(video)) for s in video_subtitles], key=operator.itemgetter(1), reverse=True): if score < min_score: logger.info('No subtitle with score >= %d', min_score) break if subtitle.hearing_impaired != hearing_impaired: logger.debug('Skipping subtitle: hearing impaired != %r', hearing_impaired) continue if subtitle.language in done_languages: logger.debug('Skipping subtitle: %r already downloaded', subtitle.language) continue if subtitle.page_link is not None: done_languages.add(subtitle.language) best_subtitles_links[video.name].append({ 'language': str(subtitle.language), 'link': subtitle.page_link, 'score': score }) if single or done_languages == languages: logger.debug('All languages downloaded') break return best_subtitles_links