def get_matches(self, video): matches = set() # episode if isinstance(video, Episode): # series if video.series and (sanitize(self.title) in ( sanitize(name) for name in [video.series] + video.alternative_series)): matches.add("series") # season if video.season and self.season == video.season: matches.add("season") # episode if video.episode and self.episode == video.episode: matches.add("episode") # imdb_id if video.series_imdb_id and self.imdb_id == video.series_imdb_id: matches.add("series_imdb_id") # guess matches |= guess_matches( video, guessit(self.release, {"type": "episode"})) # movie elif isinstance(video, Movie): # guess matches |= guess_matches(video, guessit(self.release, {"type": "movie"})) # title if video.title and (sanitize(self.title) in ( sanitize(name) for name in [video.title] + video.alternative_titles)): matches.add("title") return matches
def get_matches(self, video, hearing_impaired=False): matches = super(OpenSubtitlesSubtitle, self).get_matches(video) type_ = "episode" if isinstance(video, Episode) else "movie" matches |= guess_matches( video, guessit(self.movie_release_name, {'type': type_})) matches |= guess_matches(video, guessit(self.filename, {'type': type_})) # episode if type_ == "episode" and self.movie_kind == "episode": # series if fix_tv_naming(video.series) and (sanitize(self.series_name) in ( sanitize(name) for name in [fix_tv_naming(video.series)] + video.alternative_series)): matches.add('series') # movie elif type_ == "movie" and self.movie_kind == "movie": # title if fix_movie_naming(video.title) and (sanitize( self.movie_name) in ( sanitize(name) for name in [fix_movie_naming(video.title)] + video.alternative_titles)): matches.add('title') sub_fps = None try: sub_fps = float(self.fps) except ValueError: pass # video has fps info, sub also, and sub's fps is greater than 0 if video.fps and sub_fps and not framerate_equal(video.fps, self.fps): self.wrong_fps = True if self.skip_wrong_fps: logger.debug( "Wrong FPS (expected: %s, got: %s, lowering score massively)", video.fps, self.fps) # fixme: may be too harsh return set() else: logger.debug("Wrong FPS (expected: %s, got: %s, continuing)", video.fps, self.fps) # matched by tag? if self.matched_by == "tag": # treat a tag match equally to a hash match logger.debug( "Subtitle matched by tag, treating it as a hash-match. Tag: '%s'", self.query_parameters.get("tag", None)) matches.add("hash") # imdb_id match so we'll consider year as matching if self.movie_imdb_id and video.imdb_id and (self.movie_imdb_id == video.imdb_id): matches.add("year") return matches
def get_matches(self, video): matches = set() logger.debug("--ScrewZiraSubtitle--\n{}".format(self.__dict__)) # episode if isinstance(video, Episode): # series if video.series and sanitize(self.series) == sanitize( video.series): matches.add('series') # season if video.season and self.season == video.season: matches.add('season') # episode if video.episode and self.episode == video.episode: matches.add('episode') # guess matches |= guess_matches( video, guessit(self.release, {'type': 'episode'})) # movie elif isinstance(video, Movie): # title if video.title and (sanitize(self.series) in ( sanitize(name) for name in [video.title] + video.alternative_titles)): matches.add('title') # year if video.year and self.year == video.year: matches.add('year') # guess matches |= guess_matches(video, guessit(self.release, {'type': 'movie'})) logger.debug("ScrewZira subtitle criteria match:\n{}".format(matches)) return matches
def query(self, language, video): subtitles = [] isEpisode = isinstance(video, Episode) params = { 'act': 'search', 'movie': '', 'select-language': '2', 'upldr': '', 'yr': '', 'release': '' } if isEpisode: params['movie'] = "%s %02d %02d" % (sanitize( fix_tv_naming(video.series), {'\''}), video.season, video.episode) else: params['yr'] = video.year params['movie'] = sanitize(video.title, {'\''}) if language == 'en' or language == 'eng': params['select-language'] = 1 logger.info('Searching subtitle %r', params) response = self.session.post('http://subs.sab.bz/index.php?', params=params, allow_redirects=False, timeout=10, headers={ 'Referer': 'http://subs.sab.bz/', }) response.raise_for_status() if response.status_code != 200: logger.debug('No subtitles found') return subtitles soup = BeautifulSoup(response.content, 'lxml') rows = soup.findAll('tr', {'class': 'subs-row'}) # Search on first 20 rows only for row in rows[:20]: a_element_wrapper = row.find('td', {'class': 'c2field'}) if a_element_wrapper: element = a_element_wrapper.find('a') if element: link = element.get('href') element = row.find('a', href=re.compile(r'.*showuser=.*')) uploader = element.get_text() if element else None logger.info('Found subtitle link %r', link) sub = self.download_archive_and_add_subtitle_files( link, language, video) for s in sub: s.uploader = uploader subtitles = subtitles + sub return subtitles
def get_matches(self, video): matches = set() # handle movies and series separately if isinstance(video, Episode): # series if video.series and sanitize( self.title) == fix_inconsistent_naming( video.series) or sanitize( self.alt_title) == fix_inconsistent_naming( video.series): matches.add('series') # year if video.original_series and self.year is None or video.year and video.year == self.year: matches.add('year') # season if video.season and self.season == video.season: matches.add('season') # episode if video.episode and self.episode == video.episode: matches.add('episode') # movie elif isinstance(video, Movie): # title if video.title and sanitize(self.title) == fix_inconsistent_naming( video.title) or sanitize( self.alt_title) == fix_inconsistent_naming( video.title): matches.add('title') # year if video.year and self.year == video.year: matches.add('year') # rest is same for both groups # release_group if (video.release_group and self.releases and any(r in sanitize_release_group(self.releases) for r in get_equivalent_release_groups( sanitize_release_group(video.release_group)))): matches.add('release_group') # resolution if video.resolution and self.releases and video.resolution in self.releases.lower( ): matches.add('resolution') # source if video.source and self.releases and video.source.lower( ) in self.releases.lower(): matches.add('source') # other properties matches |= guess_matches(video, guessit(self.releases)) self.matches = matches return matches
def get_show_id(self, series, year=None, country_code=None, ignore_cache=False): """Get the best matching show id for `series`, `year` and `country_code`. First search in the result of :meth:`_get_show_ids` and fallback on a search with :meth:`_search_show_id`. :param str series: series of the episode. :param year: year of the series, if any. :type year: int :param country_code: country code of the series, if any. :type country_code: str :return: the show id, if found. :rtype: int """ show_id = None ids_to_look_for = {sanitize(series).lower(), sanitize(series.replace(".", "")).lower(), sanitize(series.replace("&", "and")).lower()} show_ids = self._get_show_ids() if ignore_cache or not show_ids: show_ids = self._get_show_ids.refresh(self) logger.debug("Trying show ids: %s", ids_to_look_for) for series_sanitized in ids_to_look_for: # attempt with country if not show_id and country_code: logger.debug('Getting show id with country') show_id = show_ids.get('%s %s' % (series_sanitized, country_code.lower())) # attempt with year if not show_id and year: logger.debug('Getting show id with year') show_id = show_ids.get('%s %d' % (series_sanitized, year)) # attempt clean if not show_id: logger.debug('Getting show id') show_id = show_ids.get(series_sanitized) if not show_id: now = datetime.datetime.now() last_fetch = region.get(self.last_show_ids_fetch_key) # re-fetch show ids once per day if any show ID not found if not ignore_cache and last_fetch != NO_VALUE and last_fetch + datetime.timedelta(days=1) < now: logger.info("Show id not found; re-fetching show ids") return self.get_show_id(series, year=year, country_code=country_code, ignore_cache=True) logger.debug("Not refreshing show ids, as the last fetch has been too recent") # search as last resort # broken right now # if not show_id: # logger.warning('Series %s not found in show ids', series) # show_id = self._search_show_id(series) return show_id
def query(self, language, video): subtitles = [] isEpisode = isinstance(video, Episode) params = {'s': '', 'y': '', 'u': '', 'l': 'BG', 'i': ''} if isEpisode: params['s'] = "%s s%02de%02d" % (sanitize( video.series, {'\''}), video.season, video.episode) else: params['y'] = video.year params['s'] = sanitize(video.title, {'\''}) if language == 'en' or language == 'eng': params['l'] = 'EN' elif language == 'ru' or language == 'rus': params['l'] = 'RU' elif language == 'es' or language == 'spa': params['l'] = 'ES' elif language == 'it' or language == 'ita': params['l'] = 'IT' logger.info('Searching subtitle %r', params) response = self.session.get('http://yavka.net/subtitles.php', params=params, allow_redirects=False, timeout=10, headers={ 'Referer': 'http://yavka.net/', }) response.raise_for_status() if response.status_code != 200: logger.debug('No subtitles found') return subtitles soup = BeautifulSoup(response.content, 'lxml') rows = soup.findAll('tr', {'class': 'info'}) # Search on first 20 rows only for row in rows[:20]: element = row.find('a', {'class': 'selector'}) if element: link = element.get('href') element = row.find('a', {'class': 'click'}) uploader = element.get_text() if element else None logger.info('Found subtitle link %r', link) sub = self.download_archive_and_add_subtitle_files( 'http://yavka.net/' + link, language, video) for s in sub: s.uploader = uploader subtitles = subtitles + sub return subtitles
def get_matches(self, video): """ patch: set guessit to single_value :param video: :return: """ matches = set() # episode if isinstance(video, Episode): # series if video.series: matches.add('series') # year if video.original_series and self.year is None or video.year and video.year == self.year: matches.add('year') # season if video.season and self.season == video.season: matches.add('season') # episode if video.episode and self.episode == video.episode: matches.add('episode') # guess matches |= guess_matches( video, guessit(self.version, { 'type': 'episode', "single_value": True })) pass # movie elif isinstance(video, Movie): # title if video.title and (sanitize(self.title) in ( sanitize(name) for name in [video.title] + video.alternative_titles)): matches.add('title') # year if video.year and self.year == video.year: matches.add('year') # guess matches |= guess_matches( video, guessit(self.version, { 'type': 'movie', "single_value": True })) self.matches = matches return matches
def get_matches(self, video): matches = set() if video.year and self.year == video.year: matches.add('year') if video.release_group and video.release_group in self.comments: matches.add('release_group') if isinstance(video, Movie): # title if video.title and sanitize(self.title) == fix_inconsistent_naming( video.title): matches.add('title') # imdb if video.imdb_id and self.imdb_id == video.imdb_id: matches.add('imdb_id') # guess match others matches |= guess_matches(video, guessit(self.comments, {"type": "movie"})) else: # title seasonless_title = re.sub(r'\s-\sSezonul\s\d+$', '', self.title.rstrip()) if video.series and fix_inconsistent_naming( video.series) == sanitize(seasonless_title): matches.add('series') # imdb if video.series_imdb_id and self.imdb_id == video.series_imdb_id: matches.add('imdb_id') # season if f"Sezonul {video.season}" in self.comments: matches.add('season') # episode if {"imdb_id", "season"}.issubset(matches): matches.add('episode') # guess match others matches |= guess_matches( video, guessit(self.comments, {"type": "episode"})) self.matches = matches return matches
def _search_show_id(self, series, year=None): """Search the show id from the `series` and `year`. :param str series: series of the episode. :param year: year of the series, if any. :type year: int :return: the show id, if found. :rtype: int """ # addic7ed doesn't support search with quotes series = series.replace('\'', ' ') # build the params series_year = '%s %d' % (series, year) if year is not None else series params = {'search': series_year, 'Submit': 'Search'} # make the search logger.info('Searching show ids with %r', params) r = self.session.get(self.server_url + 'search.php', params=params, timeout=10) r.raise_for_status() if r.status_code == 304: raise TooManyRequests() soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) suggestion = None # get the suggestion try: suggestion = soup.select('span.titulo > a[href^="/show/"]') if not suggestion: logger.warning('Show id not found: no suggestion') return None if not sanitize(suggestion[0].i.text.replace('\'', ' '), default_characters=self.sanitize_characters) == \ sanitize(series_year, default_characters=self.sanitize_characters): logger.warning('Show id not found: suggestion does not match') return None show_id = int(suggestion[0]['href'][6:]) logger.debug('Found show id %d', show_id) return show_id finally: if suggestion: suggestion.decompose() soup.decompose() soup = None
def get_movie_id(self, movie, year=None): """Get the best matching movie id for `movie`, `year`. :param str movie: movie. :param year: year of the movie, if any. :type year: int :return: the movie id, if found. :rtype: int """ movie_id = None # get the movie id logger.info('Getting movie id') r = self.session.get(self.server_url + 'search.php?search=' + quote_plus(movie), timeout=10) r.raise_for_status() soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser']) # populate the movie id movies_table = soup.find('table', {'class': 'tabel'}) movies = movies_table.find_all('tr') for item in movies: link = item.find('a', href=True) if link: if link['href'].startswith('movie/'): splitted_uri = link['href'].split('/') if len(splitted_uri) == 2: media_id = splitted_uri[1] else: continue media_title = link.text match = re.search(r'(.+)\s\((\d{4})\)$', media_title) if match: media_name = match.group(1) media_year = match.group(2) if sanitize(media_name.lower()) == sanitize(movie.lower()) and media_year == str(year): movie_id = media_id soup.decompose() soup = None logger.debug(f'Found this movie id: {movie_id}') if not movie_id: logging.debug(f"Addic7ed: Cannot find this movie with guessed year {year}: {movie}") return movie_id
def get_matches(self, video): matches = set() type_ = "movie" if isinstance(video, Movie) else "episode" # handle movies and series separately if type_ == "episode": # series if video.series and sanitize( self.title) == fix_inconsistent_naming( video.series) or sanitize( self.alt_title) == fix_inconsistent_naming( video.series): matches.add('series') # year if video.original_series and self.year is None or video.year and video.year == self.year: matches.add('year') # season if video.season and self.season == video.season: matches.add('season') # episode if video.episode and self.episode == video.episode: matches.add('episode') # movie else: # title if video.title and sanitize(self.title) == fix_inconsistent_naming( video.title) or sanitize( self.alt_title) == fix_inconsistent_naming( video.title): matches.add('title') # year if video.year and self.year == video.year: matches.add('year') # rest is same for both groups # release_group if (video.release_group and self.releases and any(r in sanitize_release_group(self.releases) for r in get_equivalent_release_groups( sanitize_release_group(video.release_group)))): matches.add('release_group') matches |= guess_matches(video, guessit(self.releases, {"type": type_})) self.matches = matches return matches
def _get_show_ids(self): """Get the ``dict`` of show ids per series by querying the `shows.php` page. :return: show id per series, lower case and without quotes. :rtype: dict # patch: add punctuation cleaning """ # get the show page logger.info('Getting show ids') r = self.session.get(self.server_url + 'shows.php', timeout=10) r.raise_for_status() soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) # populate the show ids show_ids = {} for show in soup.select('td.version > h3 > a[href^="/show/"]'): show_clean = sanitize(show.text, default_characters=self.sanitize_characters) try: show_id = int(show['href'][6:]) except ValueError: continue show_ids[show_clean] = show_id match = series_year_re.match(show_clean) if match and match.group(2) and match.group(1) not in show_ids: # year found, also add it without year show_ids[match.group(1)] = show_id soup.decompose() soup = None logger.debug('Found %d show ids', len(show_ids)) return show_ids
def get_matches(self, video): matches = set() # handle movies and series separately if isinstance(video, Episode): # series if video.series and sanitize(self.title) == fix_inconsistent_naming(video.series) or sanitize( self.alt_title) == fix_inconsistent_naming(video.series): matches.add('series') # year if video.original_series and self.year is None or video.year and video.year == self.year: matches.add('year') # season if video.season and self.season == video.season: matches.add('season') # episode if video.episode and self.episode == video.episode: matches.add('episode') # movie elif isinstance(video, Movie): # title if video.title and sanitize(self.title) == fix_inconsistent_naming(video.title) or sanitize( self.alt_title) == fix_inconsistent_naming(video.title): matches.add('title') # year if video.year and self.year == video.year: matches.add('year') # rest is same for both groups # release_group if (video.release_group and self.releases and any(r in sanitize_release_group(self.releases) for r in get_equivalent_release_groups(sanitize_release_group(video.release_group)))): matches.add('release_group') # resolution if video.resolution and self.releases and video.resolution in self.releases.lower(): matches.add('resolution') # format if video.format and self.releases and video.format.lower() in self.releases.lower(): matches.add('format') # other properties matches |= guess_matches(video, guessit(self.releases)) self.matches = matches return matches
def get_matches(self, video, hearing_impaired=False): matches = super(OpenSubtitlesSubtitle, self).get_matches(video) # episode if isinstance(video, Episode) and self.movie_kind == 'episode': # series if video.series and (sanitize(self.series_name) in ( sanitize(name) for name in [video.series] + video.alternative_series)): matches.add('series') # movie elif isinstance(video, Movie) and self.movie_kind == 'movie': # title if video.title and (sanitize(self.movie_name) in ( sanitize(name) for name in [video.title] + video.alternative_titles)): matches.add('title') sub_fps = None try: sub_fps = float(self.fps) except ValueError: pass # video has fps info, sub also, and sub's fps is greater than 0 if video.fps and sub_fps and not framerate_equal(video.fps, self.fps): self.wrong_fps = True if self.skip_wrong_fps: logger.debug( "Wrong FPS (expected: %s, got: %s, lowering score massively)", video.fps, self.fps) # fixme: may be too harsh return set() else: logger.debug("Wrong FPS (expected: %s, got: %s, continuing)", video.fps, self.fps) # matched by tag? if self.matched_by == "tag": # treat a tag match equally to a hash match logger.debug( "Subtitle matched by tag, treating it as a hash-match. Tag: '%s'", self.query_parameters.get("tag", None)) matches.add("hash") return matches
def query(self, language, video): subtitles = [] isEpisode = isinstance(video, Episode) params = { 'm': '', 'l': 0, 'c': '', 'y': '', 'action': " Търси ", 'a': '', 'd': '', 'u': '', 'g': '', 't': '', 'imdbcheck': 1 } if isEpisode: params['m'] = "%s %02d %02d" % (sanitize( video.series), video.season, video.episode) else: params['y'] = video.year params['m'] = (video.title) if language == 'en' or language == 'eng': params['l'] = 1 logger.info('Searching subtitle %r', params) response = self.session.post('https://subsunacs.net/search.php', params=params, allow_redirects=False, timeout=10, headers={ 'Referer': 'https://subsunacs.net/index.php', }) response.raise_for_status() if response.status_code != 200: logger.debug('No subtitles found') return subtitles soup = BeautifulSoup(response.content, 'html.parser') rows = soup.findAll('td', {'class': 'tdMovie'}) # Search on first 10 rows only for row in rows[:10]: element = row.find('a', {'class': 'tooltip'}) if element: link = element.get('href') logger.info('Found subtitle link %r', link) subtitles = subtitles + self.download_archive_and_add_subtitle_files( 'https://subsunacs.net' + link, language, video) return subtitles
def get_matches(self, video, hearing_impaired=False): matches = super(OpenSubtitlesSubtitle, self).get_matches(video) # episode if isinstance(video, Episode) and self.movie_kind == 'episode': # series if video.series and (sanitize(self.series_name) in ( sanitize(name) for name in [video.series] + video.alternative_series)): matches.add('series') # movie elif isinstance(video, Movie) and self.movie_kind == 'movie': # title if video.title and (sanitize(self.movie_name) in ( sanitize(name) for name in [video.title] + video.alternative_titles)): matches.add('title') sub_fps = None try: sub_fps = float(self.fps) except ValueError: pass # video has fps info, sub also, and sub's fps is greater than 0 if video.fps and sub_fps and not framerate_equal(video.fps, self.fps): self.wrong_fps = True if self.skip_wrong_fps: logger.debug("Wrong FPS (expected: %s, got: %s, lowering score massively)", video.fps, self.fps) # fixme: may be too harsh return set() else: logger.debug("Wrong FPS (expected: %s, got: %s, continuing)", video.fps, self.fps) # matched by tag? if self.matched_by == "tag": # treat a tag match equally to a hash match logger.debug("Subtitle matched by tag, treating it as a hash-match. Tag: '%s'", self.query_parameters.get("tag", None)) matches.add("hash") return matches
def _get_show_ids(self): """Get the ``dict`` of show ids per series by querying the `shows.php` page. :return: show id per series, lower case and without quotes. :rtype: dict # patch: add punctuation cleaning """ # get the show page logger.info('Getting show ids') region.set(self.last_show_ids_fetch_key, datetime.datetime.now()) r = self.session.get(self.server_url + 'shows.php', timeout=10) r.raise_for_status() # LXML parser seems to fail when parsing Addic7ed.com HTML markup. # Last known version to work properly is 3.6.4 (next version, 3.7.0, fails) # Assuming the site's markup is bad, and stripping it down to only contain what's needed. show_cells = re.findall(show_cells_re, r.content) if show_cells: soup = ParserBeautifulSoup( b''.join(show_cells).decode('utf-8', 'ignore'), ['lxml', 'html.parser']) else: # If RegEx fails, fall back to original r.text and use 'html.parser' soup = ParserBeautifulSoup(r.text, ['html.parser']) # populate the show ids show_ids = {} shows = soup.select('td > h3 > a[href^="/show/"]') for show in shows: show_clean = sanitize(show.text, default_characters=self.sanitize_characters) try: show_id = int(show['href'][6:]) except ValueError: continue show_ids[show_clean] = show_id match = series_year_re.match(show_clean) if match and match.group(2) and match.group(1) not in show_ids: # year found, also add it without year show_ids[match.group(1)] = show_id soup.decompose() soup = None logger.debug('Found %d show ids', len(show_ids)) if not show_ids: raise Exception("Addic7ed: No show IDs found!") return show_ids
def get_matches(self, video): matches = set() # episode if isinstance(video, Episode): # series if video.series and (sanitize(self.title) in ( sanitize(name) for name in [video.series] + video.alternative_series)): matches.add('series') # season if video.season and self.season == video.season: matches.add('season') # episode if video.episode and self.episode == video.episode: matches.add('episode') # imdb_id if video.series_imdb_id and self.imdb_id == video.series_imdb_id: matches.add('series_imdb_id') # guess matches |= guess_matches(video, guessit(self.release, {'type': 'episode'}), partial=True) # movie elif isinstance(video, Movie): # guess matches |= guess_matches(video, guessit(self.release, {'type': 'movie'}), partial=True) # title if video.title and (sanitize(self.title) in ( sanitize(name) for name in [video.title] + video.alternative_titles)): matches.add('title') return matches
def _get_show_ids(self): """Get the ``dict`` of show ids per series by querying the `shows.php` page. :return: show id per series, lower case and without quotes. :rtype: dict # patch: add punctuation cleaning """ # get the show page logger.info('Getting show ids') region.set(self.last_show_ids_fetch_key, datetime.datetime.now()) r = self.session.get(self.server_url, timeout=10) r.raise_for_status() soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser']) # populate the show ids show_ids = {} shows = soup.find(id='qsShow') for show in shows: if hasattr(show, 'attrs'): try: show_id = int(show.attrs['value']) except ValueError: continue if show_id != 0: show_clean = sanitize(show.text, default_characters=self.sanitize_characters) show_ids[show_clean] = show_id match = series_year_re.match(show_clean) if match and match.group(2) and match.group(1) not in show_ids: # year found, also add it without year show_ids[match.group(1)] = show_id soup.decompose() soup = None logger.debug('Found %d show ids', len(show_ids)) if not show_ids: raise Exception("Addic7ed: No show IDs found!") return show_ids
def _get_show_ids(self): """Get the ``dict`` of show ids per series by querying the `shows.php` page. :return: show id per series, lower case and without quotes. :rtype: dict # patch: add punctuation cleaning """ # get the show page logger.info('Getting show ids') r = self.session.get(self.server_url + 'shows.php', timeout=10) r.raise_for_status() # LXML parser seems to fail when parsing Addic7ed.com HTML markup. # Last known version to work properly is 3.6.4 (next version, 3.7.0, fails) # Assuming the site's markup is bad, and stripping it down to only contain what's needed. show_cells = re.findall(show_cells_re, r.content) if show_cells: soup = ParserBeautifulSoup(b''.join(show_cells), ['lxml', 'html.parser']) else: # If RegEx fails, fall back to original r.content and use 'html.parser' soup = ParserBeautifulSoup(r.content, ['html.parser']) # populate the show ids show_ids = {} for show in soup.select('td > h3 > a[href^="/show/"]'): show_clean = sanitize(show.text, default_characters=self.sanitize_characters) try: show_id = int(show['href'][6:]) except ValueError: continue show_ids[show_clean] = show_id match = series_year_re.match(show_clean) if match and match.group(2) and match.group(1) not in show_ids: # year found, also add it without year show_ids[match.group(1)] = show_id soup.decompose() soup = None logger.debug('Found %d show ids', len(show_ids)) return show_ids
def get_show_id(self, series, year=None, country_code=None): """Get the best matching show id for `series`, `year` and `country_code`. First search in the result of :meth:`_get_show_ids` and fallback on a search with :meth:`_search_show_id`. :param str series: series of the episode. :param year: year of the series, if any. :type year: int :param country_code: country code of the series, if any. :type country_code: str :return: the show id, if found. :rtype: int """ series_sanitized = sanitize(series).lower() show_ids = self._get_show_ids() show_id = None # attempt with country if not show_id and country_code: logger.debug('Getting show id with country') show_id = show_ids.get('%s %s' % (series_sanitized, country_code.lower())) # attempt with year if not show_id and year: logger.debug('Getting show id with year') show_id = show_ids.get('%s %d' % (series_sanitized, year)) # attempt clean if not show_id: logger.debug('Getting show id') show_id = show_ids.get(series_sanitized) # search as last resort # broken right now # if not show_id: # logger.warning('Series %s not found in show ids', series) # show_id = self._search_show_id(series) return show_id
def get_matches(self, video): matches = set() if isinstance(video, Movie): # title if video.title and sanitize(self.title) == fix_inconsistent_naming(video.title): matches.add('title') if video.year and self.year == video.year: matches.add('year') if video.imdb_id and self.imdb_id == video.imdb_id: matches.add('imdb_id') if video.release_group and video.release_group in self.comments: matches.add('release_group') if video.resolution and video.resolution.lower() in self.comments: matches.add('resolution') self.matches = matches return matches
def query(self, language, video): subtitles = [] isEpisode = isinstance(video, Episode) params = { 'm': '', 'l': 0, 'c': '', 'y': '', 'action': " Търси ", 'a': '', 'd': '', 'u': '', 'g': '', 't': '', 'imdbcheck': 1 } if isEpisode: params['m'] = "%s %02d %02d" % (sanitize( fix_tv_naming(video.series), {'\''}), video.season, video.episode) else: params['y'] = video.year params['m'] = sanitize(video.title, {'\''}) if language == 'en' or language == 'eng': params['l'] = 1 logger.info('Searching subtitle %r', params) response = self.session.post('https://subsunacs.net/search.php', params=params, allow_redirects=False, timeout=10, headers={ 'Referer': 'https://subsunacs.net/index.php', }) response.raise_for_status() if response.status_code != 200: logger.debug('No subtitles found') return subtitles soup = BeautifulSoup(response.content, 'lxml') rows = soup.findAll('tr', onmouseover=True) # Search on first 20 rows only for row in rows[:20]: a_element_wrapper = row.find('td', {'class': 'tdMovie'}) if a_element_wrapper: element = a_element_wrapper.find('a', {'class': 'tooltip'}) if element: link = element.get('href') element = row.find( 'a', href=re.compile(r'.*/search\.php\?t=1\&(memid|u)=.*')) uploader = element.get_text() if element else None logger.info('Found subtitle link %r', link) sub = self.download_archive_and_add_subtitle_files( 'https://subsunacs.net' + link, language, video) for s in sub: s.uploader = uploader subtitles = subtitles + sub return subtitles
def get_matches(self, video): matches = set() subtitle_filename = self.release.lower() # episode if isinstance(video, Episode): # series if video.series and (sanitize(self.title) in ( sanitize(name) for name in [video.series] + video.alternative_series)): matches.add('series') # season if video.season and self.season == video.season: matches.add('season') # episode if video.episode and self.episode == video.episode: matches.add('episode') # imdb_id if video.series_imdb_id and self.imdb_id == video.series_imdb_id: matches.add('series_imdb_id') # guess matches |= guess_matches(video, guessit(self.release, {'type': 'episode'}), partial=True) # movie elif isinstance(video, Movie): matches |= guess_matches(video, guessit(self.release, {'type': 'movie'}), partial=True) # title if video.title and (sanitize(self.title) in ( sanitize(name) for name in [video.title] + video.alternative_titles)): matches.add('title') # release_group if video.release_group and video.release_group.lower() in subtitle_filename: matches.add('release_group') # resolution if video.resolution and video.resolution.lower() in subtitle_filename: matches.add('resolution') # source formats = [] if video.source: formats = [video.source.lower()] if formats[0] == "web": formats.append("webdl") formats.append("webrip") formats.append("web ") for frmt in formats: if frmt.lower() in subtitle_filename: matches.add('source') break # video_codec if video.video_codec: video_codecs = [video.video_codec.lower()] if video_codecs[0] == "h.264": formats.append("x264") elif video_codecs[0] == "h.265": formats.append("x265") for vc in formats: if vc.lower() in subtitle_filename: matches.add('video_codec') break return matches
def guess_matches(video, guess, partial=False): """Get matches between a `video` and a `guess`. If a guess is `partial`, the absence information won't be counted as a match. Patch: add multiple release group and formats handling :param video: the video. :type video: :class:`~subliminal.video.Video` :param guess: the guess. :type guess: dict :param bool partial: whether or not the guess is partial. :return: matches between the `video` and the `guess`. :rtype: set """ matches = set() if isinstance(video, Episode): # series if video.series and 'title' in guess: titles = guess["title"] if not isinstance(titles, list): titles = [titles] for title in titles: if sanitize(title) in (sanitize(name) for name in [video.series] + video.alternative_series): matches.add('series') # title if video.title and 'episode_title' in guess and sanitize( guess['episode_title']) == sanitize(video.title): matches.add('title') # season if video.season and 'season' in guess and guess[ 'season'] == video.season: matches.add('season') # episode # Currently we only have single-ep support (guessit returns a multi-ep as a list with int values) # Most providers only support single-ep, so make sure it contains only 1 episode # In case of multi-ep, take the lowest episode (subtitles will normally be available on lowest episode number) if video.episode and 'episode' in guess: episode_guess = guess['episode'] episode = min(episode_guess) if episode_guess and isinstance( episode_guess, list) else episode_guess if episode == video.episode: matches.add('episode') # year if video.year and 'year' in guess and guess['year'] == video.year: matches.add('year') # count "no year" as an information if not partial and video.original_series and 'year' not in guess: matches.add('year') elif isinstance(video, Movie): # year if video.year and 'year' in guess and guess['year'] == video.year: matches.add('year') # title if video.title and 'title' in guess and sanitize(guess['title']) in ( sanitize(name) for name in [video.title] + video.alternative_titles): matches.add('title') # release_group if 'release_group' in guess: release_groups = guess["release_group"] if not isinstance(release_groups, list): release_groups = [release_groups] if video.release_group: for release_group in release_groups: if (sanitize_release_group(release_group) in get_equivalent_release_groups( sanitize_release_group(video.release_group))): matches.add('release_group') break # source if 'source' in guess: formats = guess["source"] if not isinstance(formats, list): formats = [formats] if video.source: video_format = video.source.lower() _video_gen_format = MERGED_FORMATS_REV.get(video_format) matched = False for frmt in formats: _guess_gen_frmt = MERGED_FORMATS_REV.get(frmt.lower()) # We don't want to match a singleton if _guess_gen_frmt is None: # If the source is not in MERGED_FORMATS _guess_gen_frmt = guess["source"] if _guess_gen_frmt == _video_gen_format: matched = True matches.add('source') break logger.debug("Source match found? %s: %s -> %s", matched, video.source, formats) if "release_group" in matches and "source" not in matches: logger.info( "Release group matched but source didn't. Removing release group match." ) matches.remove("release_group") guess.update({"resolution": guess.get("screen_size")}) # Solve match keys for potential lists for key in ("video_codec", "audio_codec", "edition", "streaming_service", "resolution"): if _has_match(video, guess, key): matches.add(key) # Add streaming service match for non-web sources if video.source and video.source != "Web": matches.add("streaming_service") # As edition tags are rare, add edition match if the video doesn't have an edition if not video.edition: matches.add("edition") return matches
def _search_show_id(self, series, year=None): """Search the show id from the `series` and `year`. :param str series: series of the episode. :param year: year of the series, if any. :type year: int :return: the show id, if found. :rtype: int """ # addic7ed doesn't support search with quotes series = series.replace('\'', ' ') # build the params series_year = '%s %d' % (series, year) if year is not None else series params = {'search': series_year, 'Submit': 'Search'} # make the search logger.info('Searching show ids with %r', params) # currently addic7ed searches via srch.php from the front page, then a re-search is needed which calls # search.php for endpoint in ("srch.php", "search.php",): headers = None if endpoint == "search.php": headers = { "referer": self.server_url + "srch.php" } r = self.session.get(self.server_url + endpoint, params=params, timeout=10, headers=headers) r.raise_for_status() if r.content and "Sorry, your search" not in r.content: break time.sleep(4) if r.status_code == 304: raise TooManyRequests() soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser']) suggestion = None # get the suggestion try: suggestion = soup.select('span.titulo > a[href^="/show/"]') if not suggestion: logger.warning('Show id not found: no suggestion') return None if not sanitize(suggestion[0].i.text.replace('\'', ' '), default_characters=self.sanitize_characters) == \ sanitize(series_year, default_characters=self.sanitize_characters): logger.warning('Show id not found: suggestion does not match') return None show_id = int(suggestion[0]['href'][6:]) logger.debug('Found show id %d', show_id) return show_id finally: soup.decompose() soup = None
def _search_show_id(self, series, year=None): """Search the show id from the `series` and `year`. :param str series: series of the episode. :param year: year of the series, if any. :type year: int :return: the show id, if found. :rtype: int """ # addic7ed doesn't support search with quotes series = series.replace('\'', ' ') # build the params series_year = '%s %d' % (series, year) if year is not None else series params = {'search': series_year, 'Submit': 'Search'} # make the search logger.info('Searching show ids with %r', params) # currently addic7ed searches via srch.php from the front page, then a re-search is needed which calls # search.php for endpoint in ( "srch.php", "search.php", ): headers = None if endpoint == "search.php": headers = {"referer": self.server_url + "srch.php"} r = self.session.get(self.server_url + endpoint, params=params, timeout=10, headers=headers) r.raise_for_status() if r.text and "Sorry, your search" not in r.text: break time.sleep(4) if r.status_code == 304: raise TooManyRequests() soup = ParserBeautifulSoup(r.text, ['lxml', 'html.parser']) suggestion = None # get the suggestion try: suggestion = soup.select('span.titulo > a[href^="/show/"]') if not suggestion: logger.warning('Show id not found: no suggestion') return None if not sanitize(suggestion[0].i.text.replace('\'', ' '), default_characters=self.sanitize_characters) == \ sanitize(series_year, default_characters=self.sanitize_characters): logger.warning('Show id not found: suggestion does not match') return None show_id = int(suggestion[0]['href'][6:]) logger.debug('Found show id %d', show_id) return show_id finally: soup.decompose() soup = None
def query(self, language, video): subtitles = [] isEpisode = isinstance(video, Episode) params = { 'act': 'search', 'movie': '', 'select-language': '2', 'upldr': '', 'yr': '', 'release': '' } if isEpisode: params['movie'] = "%s %02d %02d" % (sanitize( fix_tv_naming(video.series), {'\''}), video.season, video.episode) else: params['yr'] = video.year params['movie'] = sanitize(video.title, {'\''}) if language == 'en' or language == 'eng': params['select-language'] = 1 logger.info('Searching subtitle %r', params) response = self.session.post('http://subs.sab.bz/index.php?', params=params, allow_redirects=False, timeout=10, headers={ 'Referer': 'http://subs.sab.bz/', }) response.raise_for_status() if response.status_code != 200: logger.debug('No subtitles found') return subtitles soup = BeautifulSoup(response.content, 'lxml') rows = soup.findAll('tr', {'class': 'subs-row'}) # Search on first 25 rows only for row in rows[:25]: a_element_wrapper = row.find('td', {'class': 'c2field'}) if a_element_wrapper: element = a_element_wrapper.find('a') if element: link = element.get('href') notes = element.get('onmouseover') title = element.get_text() try: year = int(str(element.next_sibling).strip(' ()')) except: year = None td = row.findAll('td') try: num_cds = int(td[6].get_text()) except: num_cds = None try: fps = float(td[7].get_text()) except: fps = None try: uploader = td[8].get_text() except: uploader = None try: imdb_id = re.findall(r'imdb.com/title/(tt\d+)/?$', td[9].find('a').get('href'))[0] except: imdb_id = None logger.info('Found subtitle link %r', link) sub = self.download_archive_and_add_subtitle_files( link, language, video, fps, num_cds) for s in sub: s.title = title s.notes = notes s.year = year s.uploader = uploader s.imdb_id = imdb_id s.single_file = True if len( sub) == 1 and num_cds == 1 else False subtitles = subtitles + sub return subtitles
def query(self, language, video): subtitles = [] isEpisode = isinstance(video, Episode) params = {'s': '', 'y': '', 'u': '', 'l': 'BG', 'i': ''} if isEpisode: params['s'] = "%s s%02de%02d" % (sanitize( video.series, {'\''}), video.season, video.episode) else: params['y'] = video.year params['s'] = sanitize(video.title, {'\''}) if language == 'en' or language == 'eng': params['l'] = 'EN' elif language == 'ru' or language == 'rus': params['l'] = 'RU' elif language == 'es' or language == 'spa': params['l'] = 'ES' elif language == 'it' or language == 'ita': params['l'] = 'IT' logger.info('Searching subtitle %r', params) response = self.session.get('http://yavka.net/subtitles.php', params=params, allow_redirects=False, timeout=10, headers={ 'Referer': 'http://yavka.net/', }) response.raise_for_status() if response.status_code != 200: logger.debug('No subtitles found') return subtitles soup = BeautifulSoup(response.content, 'lxml') rows = soup.findAll('tr') # Search on first 25 rows only for row in rows[:25]: element = row.find('a', {'class': 'selector'}) if element: link = element.get('href') notes = re.sub(r'(?s)<p.*><img [A-z0-9=\'/\. :;#]*>(.*)</p>', r"\1", element.get('content')) title = element.get_text() try: year = int( element.find_next_sibling('span').text.strip('()')) except: year = None try: fps = float( row.find('span', { 'title': 'Кадри в секунда' }).text.strip()) except: fps = None element = row.find('a', {'class': 'click'}) uploader = element.get_text() if element else None logger.info('Found subtitle link %r', link) sub = self.download_archive_and_add_subtitle_files( 'http://yavka.net/' + link, language, video, fps) for s in sub: s.title = title s.notes = notes s.year = year s.uploader = uploader s.single_file = True if len(sub) == 1 else False subtitles = subtitles + sub return subtitles
def guess_matches(video, guess, partial=False): """Get matches between a `video` and a `guess`. If a guess is `partial`, the absence information won't be counted as a match. Patch: add multiple release group and formats handling :param video: the video. :type video: :class:`~subliminal.video.Video` :param guess: the guess. :type guess: dict :param bool partial: whether or not the guess is partial. :return: matches between the `video` and the `guess`. :rtype: set """ matches = set() if isinstance(video, Episode): # series if video.series and 'title' in guess: titles = guess["title"] if not isinstance(titles, list): titles = [titles] for title in titles: if sanitize(title) in (sanitize(name) for name in [video.series] + video.alternative_series): matches.add('series') # title if video.title and 'episode_title' in guess and sanitize(guess['episode_title']) == sanitize(video.title): matches.add('title') # season if video.season and 'season' in guess and guess['season'] == video.season: matches.add('season') # episode # Currently we only have single-ep support (guessit returns a multi-ep as a list with int values) # Most providers only support single-ep, so make sure it contains only 1 episode # In case of multi-ep, take the lowest episode (subtitles will normally be available on lowest episode number) if video.episode and 'episode' in guess: episode_guess = guess['episode'] episode = min(episode_guess) if episode_guess and isinstance(episode_guess, list) else episode_guess if episode == video.episode: matches.add('episode') # year if video.year and 'year' in guess and guess['year'] == video.year: matches.add('year') # count "no year" as an information if not partial and video.original_series and 'year' not in guess: matches.add('year') elif isinstance(video, Movie): # year if video.year and 'year' in guess and guess['year'] == video.year: matches.add('year') # title if video.title and 'title' in guess and sanitize(guess['title']) in ( sanitize(name) for name in [video.title] + video.alternative_titles): matches.add('title') # release_group if 'release_group' in guess: release_groups = guess["release_group"] if not isinstance(release_groups, list): release_groups = [release_groups] if video.release_group: for release_group in release_groups: if (sanitize_release_group(release_group) in get_equivalent_release_groups(sanitize_release_group(video.release_group))): matches.add('release_group') break # resolution if video.resolution and 'screen_size' in guess and guess['screen_size'] == video.resolution: matches.add('resolution') # format if 'format' in guess: formats = guess["format"] if not isinstance(formats, list): formats = [formats] if video.format: video_format = video.format if video_format in ("HDTV", "SDTV", "TV"): video_format = "TV" logger.debug("Treating HDTV/SDTV the same") for frmt in formats: if frmt in ("HDTV", "SDTV"): frmt = "TV" if frmt.lower() == video_format.lower(): matches.add('format') break # video_codec if video.video_codec and 'video_codec' in guess and guess['video_codec'] == video.video_codec: matches.add('video_codec') # audio_codec if video.audio_codec and 'audio_codec' in guess and guess['audio_codec'] == video.audio_codec: matches.add('audio_codec') return matches
def query(self, language, video): subtitles = [] isEpisode = isinstance(video, Episode) params = { 'm': '', 'l': 0, 'c': '', 'y': '', 'action': " Търси ", 'a': '', 'd': '', 'u': '', 'g': '', 't': '', 'imdbcheck': 1 } if isEpisode: params['m'] = "%s %02d %02d" % (sanitize( fix_tv_naming(video.series), {'\''}), video.season, video.episode) else: params['y'] = video.year params['m'] = sanitize(fix_movie_naming(video.title), {'\''}) if language == 'en' or language == 'eng': params['l'] = 1 logger.info('Searching subtitle %r', params) response = self.session.post('https://subsunacs.net/search.php', params=params, allow_redirects=False, timeout=10, headers={ 'Referer': 'https://subsunacs.net/index.php', }) response.raise_for_status() if response.status_code != 200: logger.debug('No subtitles found') return subtitles soup = BeautifulSoup(response.content, 'lxml') rows = soup.findAll('tr', onmouseover=True) # Search on first 20 rows only for row in rows[:20]: a_element_wrapper = row.find('td', {'class': 'tdMovie'}) if a_element_wrapper: element = a_element_wrapper.find('a', {'class': 'tooltip'}) if element: link = element.get('href') notes = re.sub(r'(<img.*)(src=")(/)(.*.jpg">)', r"", element.get('title')) title = element.get_text() try: year = int( element.find_next_sibling('span', { 'class': 'smGray' }).text.strip('\xa0()')) except: year = None td = row.findAll('td') try: num_cds = int(td[1].get_text()) except: num_cds = None try: fps = float(td[2].get_text()) except: fps = None try: rating = float(td[3].find('img').get('title')) except: rating = None try: uploader = td[5].get_text() except: uploader = None logger.info('Found subtitle link %r', link) sub = self.download_archive_and_add_subtitle_files( 'https://subsunacs.net' + link, language, video, fps, num_cds) for s in sub: s.title = title s.notes = notes s.year = year s.rating = rating s.uploader = uploader s.single_file = True if len( sub) == 1 and num_cds == 1 else False subtitles = subtitles + sub return subtitles
def guess_matches(video, guess, partial=False): """Get matches between a `video` and a `guess`. If a guess is `partial`, the absence information won't be counted as a match. Patch: add multiple release group and formats handling :param video: the video. :type video: :class:`~subliminal.video.Video` :param guess: the guess. :type guess: dict :param bool partial: whether or not the guess is partial. :return: matches between the `video` and the `guess`. :rtype: set """ matches = set() if isinstance(video, Episode): # series if video.series and 'title' in guess: titles = guess["title"] if not isinstance(titles, types.ListType): titles = [titles] for title in titles: if sanitize(title) in (sanitize(name) for name in [video.series] + video.alternative_series): matches.add('series') # title if video.title and 'episode_title' in guess and sanitize(guess['episode_title']) == sanitize(video.title): matches.add('title') # season if video.season and 'season' in guess and guess['season'] == video.season: matches.add('season') # episode # Currently we only have single-ep support (guessit returns a multi-ep as a list with int values) # Most providers only support single-ep, so make sure it contains only 1 episode # In case of multi-ep, take the lowest episode (subtitles will normally be available on lowest episode number) if video.episode and 'episode' in guess: episode_guess = guess['episode'] episode = min(episode_guess) if episode_guess and isinstance(episode_guess, list) else episode_guess if episode == video.episode: matches.add('episode') # year if video.year and 'year' in guess and guess['year'] == video.year: matches.add('year') # count "no year" as an information if not partial and video.original_series and 'year' not in guess: matches.add('year') elif isinstance(video, Movie): # year if video.year and 'year' in guess and guess['year'] == video.year: matches.add('year') # title if video.title and 'title' in guess and sanitize(guess['title']) in ( sanitize(name) for name in [video.title] + video.alternative_titles): matches.add('title') # release_group if 'release_group' in guess: release_groups = guess["release_group"] if not isinstance(release_groups, types.ListType): release_groups = [release_groups] if video.release_group: for release_group in release_groups: if (sanitize_release_group(release_group) in get_equivalent_release_groups(sanitize_release_group(video.release_group))): matches.add('release_group') break # resolution if video.resolution and 'screen_size' in guess and guess['screen_size'] == video.resolution: matches.add('resolution') # format if 'format' in guess: formats = guess["format"] if not isinstance(formats, types.ListType): formats = [formats] if video.format: video_format = video.format if video_format in ("HDTV", "SDTV", "TV"): video_format = "TV" logger.debug("Treating HDTV/SDTV the same") for frmt in formats: if frmt in ("HDTV", "SDTV"): frmt = "TV" if frmt.lower() == video_format.lower(): matches.add('format') break # video_codec if video.video_codec and 'video_codec' in guess and guess['video_codec'] == video.video_codec: matches.add('video_codec') # audio_codec if video.audio_codec and 'audio_codec' in guess and guess['audio_codec'] == video.audio_codec: matches.add('audio_codec') return matches