class TitloviProvider(Provider, ProviderSubtitleArchiveMixin): subtitle_class = TitloviSubtitle languages = {Language.fromtitlovi(l) for l in language_converters['titlovi'].codes} | {Language.fromietf('sr')} server_url = 'http://titlovi.com' search_url = server_url + '/titlovi/?' download_url = server_url + '/download/?type=1&mediaid=' def initialize(self): self.session = Session() def terminate(self): self.session.close() def query(self, languages, title, season=None, episode=None, year=None, video=None): items_per_page = 10 current_page = 1 # convert list of languages into search string langs = '|'.join( map(str, [l.titlovi if l != Language.fromietf('sr') else 'cirilica' for l in languages])) # set query params params = {'prijevod': title, 'jezik': langs} is_episode = False if season and episode: is_episode = True params['s'] = season params['e'] = episode if year: params['g'] = year # loop through paginated results logger.info('Searching subtitles %r', params) subtitles = [] while True: # query the server try: r = self.session.get(self.search_url, params=params, timeout=10) r.raise_for_status() soup = BeautifulSoup(r.content, 'lxml') # number of results result_count = int(soup.select_one('.results_count b').string) except: result_count = None # exit if no results if not result_count: if not subtitles: logger.debug('No subtitles found') else: logger.debug("No more subtitles found") break # number of pages with results pages = int(math.ceil(result_count / float(items_per_page))) # get current page if 'pg' in params: current_page = int(params['pg']) try: sublist = soup.select('section.titlovi > ul.titlovi > li') for sub in sublist: # subtitle id sid = sub.find(attrs={'data-id': True}).attrs['data-id'] # get download link download_link = self.download_url + sid # title and alternate title match = title_re.search(sub.a.string) if match: _title = match.group('title') alt_title = match.group('altitle') else: continue # page link page_link = self.server_url + sub.a.attrs['href'] # subtitle language match = lang_re.search(sub.select_one('.lang').attrs['src']) if match: try: lang = Language.fromtitlovi(match.group('lang')) script = match.group('script') if script: lang.script = Script(script) except ValueError: continue # relase year or series start year match = year_re.search(sub.find(attrs={'data-id': True}).parent.i.string) if match: r_year = int(match.group('year')) # fps match = fps_re.search(sub.select_one('.fps').string) if match: fps = match.group('fps') # releases releases = str(sub.select_one('.fps').parent.contents[0].string) # handle movies and series separately if is_episode: # season and episode info sxe = sub.select_one('.s0xe0y').string r_season = None r_episode = None if sxe: match = season_re.search(sxe) if match: r_season = int(match.group('season')) match = episode_re.search(sxe) if match: r_episode = int(match.group('episode')) subtitle = self.subtitle_class(lang, page_link, download_link, sid, releases, _title, alt_title=alt_title, season=r_season, episode=r_episode, year=r_year, fps=fps, asked_for_release_group=video.release_group, asked_for_episode=episode) else: subtitle = self.subtitle_class(lang, page_link, download_link, sid, releases, _title, alt_title=alt_title, year=r_year, fps=fps, asked_for_release_group=video.release_group) logger.debug('Found subtitle %r', subtitle) # prime our matches so we can use the values later subtitle.get_matches(video) # add found subtitles subtitles.append(subtitle) finally: soup.decompose() # stop on last page if current_page >= pages: break # increment current page params['pg'] = current_page + 1 logger.debug('Getting page %d', params['pg']) return subtitles def list_subtitles(self, video, languages): season = episode = None if isinstance(video, Episode): title = video.series season = video.season episode = video.episode else: title = video.title return [s for s in self.query(languages, fix_inconsistent_naming(title), season=season, episode=episode, year=video.year, video=video)] def download_subtitle(self, subtitle): r = self.session.get(subtitle.download_link, timeout=10) r.raise_for_status() # open the archive archive_stream = io.BytesIO(r.content) if is_rarfile(archive_stream): logger.debug('Archive identified as rar') archive = RarFile(archive_stream) elif is_zipfile(archive_stream): logger.debug('Archive identified as zip') archive = ZipFile(archive_stream) else: raise ProviderError('Unidentified archive type') subtitle.content = self.get_subtitle_from_archive(subtitle, archive)
def query(self, languages, title, season=None, episode=None, year=None, video=None): items_per_page = 10 current_page = 1 # convert list of languages into search string langs = '|'.join( map(str, [l.titlovi if l != Language.fromietf('sr') else 'cirilica' for l in languages])) # set query params params = {'prijevod': title, 'jezik': langs} is_episode = False if season and episode: is_episode = True params['s'] = season params['e'] = episode if year: params['g'] = year # loop through paginated results logger.info('Searching subtitles %r', params) subtitles = [] while True: # query the server try: r = self.session.get(self.search_url, params=params, timeout=10) r.raise_for_status() soup = BeautifulSoup(r.content, 'lxml') # number of results result_count = int(soup.select_one('.results_count b').string) except: result_count = None # exit if no results if not result_count: if not subtitles: logger.debug('No subtitles found') else: logger.debug("No more subtitles found") break # number of pages with results pages = int(math.ceil(result_count / float(items_per_page))) # get current page if 'pg' in params: current_page = int(params['pg']) try: sublist = soup.select('section.titlovi > ul.titlovi > li') for sub in sublist: # subtitle id sid = sub.find(attrs={'data-id': True}).attrs['data-id'] # get download link download_link = self.download_url + sid # title and alternate title match = title_re.search(sub.a.string) if match: _title = match.group('title') alt_title = match.group('altitle') else: continue # page link page_link = self.server_url + sub.a.attrs['href'] # subtitle language match = lang_re.search(sub.select_one('.lang').attrs['src']) if match: try: lang = Language.fromtitlovi(match.group('lang')) script = match.group('script') if script: lang.script = Script(script) except ValueError: continue # relase year or series start year match = year_re.search(sub.find(attrs={'data-id': True}).parent.i.string) if match: r_year = int(match.group('year')) # fps match = fps_re.search(sub.select_one('.fps').string) if match: fps = match.group('fps') # releases releases = str(sub.select_one('.fps').parent.contents[0].string) # handle movies and series separately if is_episode: # season and episode info sxe = sub.select_one('.s0xe0y').string r_season = None r_episode = None if sxe: match = season_re.search(sxe) if match: r_season = int(match.group('season')) match = episode_re.search(sxe) if match: r_episode = int(match.group('episode')) subtitle = self.subtitle_class(lang, page_link, download_link, sid, releases, _title, alt_title=alt_title, season=r_season, episode=r_episode, year=r_year, fps=fps, asked_for_release_group=video.release_group, asked_for_episode=episode) else: subtitle = self.subtitle_class(lang, page_link, download_link, sid, releases, _title, alt_title=alt_title, year=r_year, fps=fps, asked_for_release_group=video.release_group) logger.debug('Found subtitle %r', subtitle) # prime our matches so we can use the values later subtitle.get_matches(video) # add found subtitles subtitles.append(subtitle) finally: soup.decompose() # stop on last page if current_page >= pages: break # increment current page params['pg'] = current_page + 1 logger.debug('Getting page %d', params['pg']) return subtitles
def query(self, languages, title, season=None, episode=None, year=None, imdb_id=None, video=None): search_params = dict() used_languages = languages lang_strings = [str(lang) for lang in used_languages] # handle possible duplicate use of Serbian Latin if "sr" in lang_strings and "sr-Latn" in lang_strings: logger.info( 'Duplicate entries <Language [sr]> and <Language [sr-Latn]> found, filtering languages' ) used_languages = [ l for l in used_languages if l != Language.fromietf('sr-Latn') ] logger.info('Filtered language list %r', used_languages) # convert list of languages into search string langs = '|'.join(map(str, [l.titlovi for l in used_languages])) # set query params search_params['query'] = title search_params['lang'] = langs is_episode = False if season and episode: is_episode = True #search_params['season'] = season #search_params['episode'] = episode #if year: # search_params['year'] = year if imdb_id: search_params['imdbID'] = imdb_id # loop through paginated results logger.info('Searching subtitles %r', search_params) subtitles = [] query_results = [] try: search_params['token'] = self.login_token search_params['userid'] = self.user_id search_params['json'] = True #response = self.get_result(search_url=self.api_search_url, search_params=search_params) response = self.get_result(self.api_search_url, search_params) resp_json = response.json() if resp_json['SubtitleResults']: query_results.extend(resp_json['SubtitleResults']) except Exception as e: logger.error(e) for sub in query_results: # title and alternate title match = title_re.search(sub.get('Title')) if match: _title = match.group('title') alt_title = match.group('altitle') else: continue # handle movies and series separately if is_episode: # skip if season and episode number does not match if season and season != sub.get('Season'): continue elif episode and episode != sub.get('Episode'): continue subtitle = self.subtitle_class( Language.fromtitlovi(sub.get('Lang')), sub.get('Link'), sub.get('Id'), sub.get('Release'), _title, alt_title=alt_title, season=sub.get('Season'), episode=sub.get('Episode'), year=sub.get('Year'), rating=sub.get('Rating'), download_count=sub.get('DownloadCount'), asked_for_release_group=video.release_group, asked_for_episode=episode) else: subtitle = self.subtitle_class( Language.fromtitlovi(sub.get('Lang')), sub.get('Link'), sub.get('Id'), sub.get('Release'), _title, alt_title=alt_title, year=sub.get('Year'), rating=sub.get('Rating'), download_count=sub.get('DownloadCount'), asked_for_release_group=video.release_group) logger.debug('Found subtitle %r', subtitle) # prime our matches so we can use the values later subtitle.get_matches(video) # add found subtitles subtitles.append(subtitle) return subtitles
class TitloviProvider(Provider, ProviderSubtitleArchiveMixin): subtitle_class = TitloviSubtitle languages = { Language.fromtitlovi(l) for l in language_converters['titlovi'].codes } | {Language.fromietf('sr-Latn')} api_url = 'https://kodi.titlovi.com/api/subtitles' api_gettoken_url = api_url + '/gettoken' api_search_url = api_url + '/search' def __init__(self, username=None, password=None): if not all((username, password)): raise ConfigurationError('Username and password must be specified') self.username = username self.password = password self.session = None self.user_id = None self.login_token = None self.token_exp = None def initialize(self): self.session = RetryingCFSession() #load_verification("titlovi", self.session) token = region.get("titlovi_token") if token is not NO_VALUE: self.user_id, self.login_token, self.token_exp = token if datetime.now() > self.token_exp: logger.debug('Token expired') self.log_in() else: logger.debug('Use cached token') else: logger.debug('Token not found in cache') self.log_in() def log_in(self): login_params = dict(username=self.username, password=self.password, json=True) try: response = self.session.post(self.api_gettoken_url, params=login_params) if response.status_code == request_codes.ok: resp_json = response.json() self.login_token = resp_json.get('Token') self.user_id = resp_json.get('UserId') self.token_exp = dateutil.parser.parse( resp_json.get('ExpirationDate')) region.set("titlovi_token", [self.user_id, self.login_token, self.token_exp]) logger.debug('New token obtained') elif response.status_code == request_codes.unauthorized: raise AuthenticationError('Login failed') except RequestException as e: logger.error(e) def terminate(self): self.session.close() @region.cache_on_arguments(expiration_time=SHOW_EXPIRATION_TIME) def get_result(self, search_url, search_params): return self.session.get(search_url, params=search_params) def query(self, languages, title, season=None, episode=None, year=None, imdb_id=None, video=None): search_params = dict() used_languages = languages lang_strings = [str(lang) for lang in used_languages] # handle possible duplicate use of Serbian Latin if "sr" in lang_strings and "sr-Latn" in lang_strings: logger.info( 'Duplicate entries <Language [sr]> and <Language [sr-Latn]> found, filtering languages' ) used_languages = [ l for l in used_languages if l != Language.fromietf('sr-Latn') ] logger.info('Filtered language list %r', used_languages) # convert list of languages into search string langs = '|'.join(map(str, [l.titlovi for l in used_languages])) # set query params search_params['query'] = title search_params['lang'] = langs is_episode = False if season and episode: is_episode = True #search_params['season'] = season #search_params['episode'] = episode #if year: # search_params['year'] = year if imdb_id: search_params['imdbID'] = imdb_id # loop through paginated results logger.info('Searching subtitles %r', search_params) subtitles = [] query_results = [] try: search_params['token'] = self.login_token search_params['userid'] = self.user_id search_params['json'] = True #response = self.get_result(search_url=self.api_search_url, search_params=search_params) response = self.get_result(self.api_search_url, search_params) resp_json = response.json() if resp_json['SubtitleResults']: query_results.extend(resp_json['SubtitleResults']) except Exception as e: logger.error(e) for sub in query_results: # title and alternate title match = title_re.search(sub.get('Title')) if match: _title = match.group('title') alt_title = match.group('altitle') else: continue # handle movies and series separately if is_episode: # skip if season and episode number does not match if season and season != sub.get('Season'): continue elif episode and episode != sub.get('Episode'): continue subtitle = self.subtitle_class( Language.fromtitlovi(sub.get('Lang')), sub.get('Link'), sub.get('Id'), sub.get('Release'), _title, alt_title=alt_title, season=sub.get('Season'), episode=sub.get('Episode'), year=sub.get('Year'), rating=sub.get('Rating'), download_count=sub.get('DownloadCount'), asked_for_release_group=video.release_group, asked_for_episode=episode) else: subtitle = self.subtitle_class( Language.fromtitlovi(sub.get('Lang')), sub.get('Link'), sub.get('Id'), sub.get('Release'), _title, alt_title=alt_title, year=sub.get('Year'), rating=sub.get('Rating'), download_count=sub.get('DownloadCount'), asked_for_release_group=video.release_group) logger.debug('Found subtitle %r', subtitle) # prime our matches so we can use the values later subtitle.get_matches(video) # add found subtitles subtitles.append(subtitle) return subtitles def list_subtitles(self, video, languages): season = episode = None if isinstance(video, Episode): title = video.series season = video.season episode = video.episode else: title = video.title return [ s for s in self.query(languages, fix_inconsistent_naming(title), season=season, episode=episode, year=video.year, imdb_id=video.imdb_id, video=video) ] def download_subtitle(self, subtitle): r = self.session.get(subtitle.download_link, timeout=10) r.raise_for_status() # open the archive archive_stream = io.BytesIO(r.content) if is_rarfile(archive_stream): logger.debug('Archive identified as rar') archive = RarFile(archive_stream) elif is_zipfile(archive_stream): logger.debug('Archive identified as zip') archive = ZipFile(archive_stream) else: subtitle.content = r.content if subtitle.is_valid(): return subtitle.content = None raise ProviderError('Unidentified archive type') subs_in_archive = archive.namelist() # if Serbian lat and cyr versions are packed together, try to find right version if len(subs_in_archive) > 1 and (subtitle.language == 'sr' or subtitle.language == 'sr-Cyrl'): self.get_subtitle_from_bundled_archive(subtitle, subs_in_archive, archive) else: # use default method for everything else subtitle.content = self.get_subtitle_from_archive( subtitle, archive) def get_subtitle_from_bundled_archive(self, subtitle, subs_in_archive, archive): sr_lat_subs = [] sr_cyr_subs = [] sub_to_extract = None for sub_name in subs_in_archive: _sub_name = sub_name.lower() if not ('.cyr' in _sub_name or '.cir' in _sub_name or 'cyr)' in _sub_name): sr_lat_subs.append(sub_name) if ('.cyr' in sub_name or '.cir' in _sub_name) and not '.lat' in _sub_name.lower(): sr_cyr_subs.append(sub_name) if subtitle.language == 'sr': if len(sr_lat_subs) > 0: sub_to_extract = sr_lat_subs[0] if subtitle.language == 'sr-Cyrl': if len(sr_cyr_subs) > 0: sub_to_extract = sr_cyr_subs[0] logger.info(u'Using %s from the archive', sub_to_extract) subtitle.content = fix_line_ending(archive.read(sub_to_extract))
class TitloviProvider(Provider, ProviderSubtitleArchiveMixin): subtitle_class = TitloviSubtitle languages = {Language.fromtitlovi(l) for l in language_converters['titlovi'].codes} | {Language.fromietf('sr-Latn')} server_url = 'https://titlovi.com' search_url = server_url + '/titlovi/?' download_url = server_url + '/download/?type=1&mediaid=' def initialize(self): self.session = RetryingCFSession() #load_verification("titlovi", self.session) def terminate(self): self.session.close() def query(self, languages, title, season=None, episode=None, year=None, video=None): items_per_page = 10 current_page = 1 used_languages = languages lang_strings = [str(lang) for lang in used_languages] # handle possible duplicate use of Serbian Latin if "sr" in lang_strings and "sr-Latn" in lang_strings: logger.info('Duplicate entries <Language [sr]> and <Language [sr-Latn]> found, filtering languages') used_languages = filter(lambda l: l != Language.fromietf('sr-Latn'), used_languages) logger.info('Filtered language list %r', used_languages) # convert list of languages into search string langs = '|'.join(map(str, [l.titlovi for l in used_languages])) # set query params params = {'prijevod': title, 'jezik': langs} is_episode = False if season and episode: is_episode = True params['s'] = season params['e'] = episode if year: params['g'] = year # loop through paginated results logger.info('Searching subtitles %r', params) subtitles = [] while True: # query the server try: r = self.session.get(self.search_url, params=params, timeout=10) r.raise_for_status() except RequestException as e: logger.exception('RequestException %s', e) break else: try: soup = BeautifulSoup(r.content, 'lxml') # number of results result_count = int(soup.select_one('.results_count b').string) except: result_count = None # exit if no results if not result_count: if not subtitles: logger.debug('No subtitles found') else: logger.debug("No more subtitles found") break # number of pages with results pages = int(math.ceil(result_count / float(items_per_page))) # get current page if 'pg' in params: current_page = int(params['pg']) try: sublist = soup.select('section.titlovi > ul.titlovi > li.subtitleContainer.canEdit') for sub in sublist: # subtitle id sid = sub.find(attrs={'data-id': True}).attrs['data-id'] # get download link download_link = self.download_url + sid # title and alternate title match = title_re.search(sub.a.string) if match: _title = match.group('title') alt_title = match.group('altitle') else: continue # page link page_link = self.server_url + sub.a.attrs['href'] # subtitle language _lang = sub.select_one('.lang') match = lang_re.search(_lang.attrs.get('src', _lang.attrs.get('cfsrc', ''))) if match: try: # decode language lang = Language.fromtitlovi(match.group('lang')+match.group('script')) except ValueError: continue # relase year or series start year match = year_re.search(sub.find(attrs={'data-id': True}).parent.i.string) if match: r_year = int(match.group('year')) # fps match = fps_re.search(sub.select_one('.fps').string) if match: fps = match.group('fps') # releases releases = str(sub.select_one('.fps').parent.contents[0].string) # handle movies and series separately if is_episode: # season and episode info sxe = sub.select_one('.s0xe0y').string r_season = None r_episode = None if sxe: match = season_re.search(sxe) if match: r_season = int(match.group('season')) match = episode_re.search(sxe) if match: r_episode = int(match.group('episode')) subtitle = self.subtitle_class(lang, page_link, download_link, sid, releases, _title, alt_title=alt_title, season=r_season, episode=r_episode, year=r_year, fps=fps, asked_for_release_group=video.release_group, asked_for_episode=episode) else: subtitle = self.subtitle_class(lang, page_link, download_link, sid, releases, _title, alt_title=alt_title, year=r_year, fps=fps, asked_for_release_group=video.release_group) logger.debug('Found subtitle %r', subtitle) # prime our matches so we can use the values later subtitle.get_matches(video) # add found subtitles subtitles.append(subtitle) finally: soup.decompose() # stop on last page if current_page >= pages: break # increment current page params['pg'] = current_page + 1 logger.debug('Getting page %d', params['pg']) return subtitles def list_subtitles(self, video, languages): season = episode = None if isinstance(video, Episode): title = video.series season = video.season episode = video.episode else: title = video.title return [s for s in self.query(languages, fix_inconsistent_naming(title), season=season, episode=episode, year=video.year, video=video)] def download_subtitle(self, subtitle): r = self.session.get(subtitle.download_link, timeout=10) r.raise_for_status() # open the archive archive_stream = io.BytesIO(r.content) if is_rarfile(archive_stream): logger.debug('Archive identified as rar') archive = RarFile(archive_stream) elif is_zipfile(archive_stream): logger.debug('Archive identified as zip') archive = ZipFile(archive_stream) else: subtitle.content = r.content if subtitle.is_valid(): return subtitle.content = None raise ProviderError('Unidentified archive type') subs_in_archive = archive.namelist() # if Serbian lat and cyr versions are packed together, try to find right version if len(subs_in_archive) > 1 and (subtitle.language == 'sr' or subtitle.language == 'sr-Cyrl'): self.get_subtitle_from_bundled_archive(subtitle, subs_in_archive, archive) else: # use default method for everything else subtitle.content = self.get_subtitle_from_archive(subtitle, archive) def get_subtitle_from_bundled_archive(self, subtitle, subs_in_archive, archive): sr_lat_subs = [] sr_cyr_subs = [] sub_to_extract = None for sub_name in subs_in_archive: if not ('.cyr' in sub_name or '.cir' in sub_name): sr_lat_subs.append(sub_name) if ('.cyr' in sub_name or '.cir' in sub_name) and not '.lat' in sub_name: sr_cyr_subs.append(sub_name) if subtitle.language == 'sr': if len(sr_lat_subs) > 0: sub_to_extract = sr_lat_subs[0] if subtitle.language == 'sr-Cyrl': if len(sr_cyr_subs) > 0: sub_to_extract = sr_cyr_subs[0] logger.info(u'Using %s from the archive', sub_to_extract) subtitle.content = fix_line_ending(archive.read(sub_to_extract))
def query(self, languages, title, season=None, episode=None, year=None, video=None): items_per_page = 10 current_page = 1 used_languages = languages lang_strings = [str(lang) for lang in used_languages] # handle possible duplicate use of Serbian Latin if "sr" in lang_strings and "sr-Latn" in lang_strings: logger.info( 'Duplicate entries <Language [sr]> and <Language [sr-Latn]> found, filtering languages' ) used_languages = filter( lambda l: l != Language.fromietf('sr-Latn'), used_languages) logger.info('Filtered language list %r', used_languages) # convert list of languages into search string langs = '|'.join(map(str, [l.titlovi for l in used_languages])) # set query params params = {'prijevod': title, 'jezik': langs} is_episode = False if season and episode: is_episode = True params['s'] = season params['e'] = episode if year: params['g'] = year # loop through paginated results logger.info('Searching subtitles %r', params) subtitles = [] while True: # query the server try: r = self.session.get(self.search_url, params=params, timeout=10) r.raise_for_status() except RequestException as e: captcha_passed = False if e.response.status_code == 403 and "data-sitekey" in e.response.content: logger.info( 'titlovi: Solving captcha. This might take a couple of minutes, but should only ' 'happen once every so often') site_key = re.search(r'data-sitekey="(.+?)"', e.response.content).group(1) challenge_s = re.search( r'type="hidden" name="s" value="(.+?)"', e.response.content).group(1) challenge_ray = re.search(r'data-ray="(.+?)"', e.response.content).group(1) if not all([site_key, challenge_s, challenge_ray]): raise Exception("titlovi: Captcha site-key not found!") pitcher = pitchers.get_pitcher()( "titlovi", e.request.url, site_key, user_agent=self.session.headers["User-Agent"], cookies=self.session.cookies.get_dict(), is_invisible=True) result = pitcher.throw() if not result: raise Exception("titlovi: Couldn't solve captcha!") s_params = { "s": challenge_s, "id": challenge_ray, "g-recaptcha-response": result, } r = self.session.get(self.server_url + "/cdn-cgi/l/chk_captcha", params=s_params, timeout=10, allow_redirects=False) r.raise_for_status() r = self.session.get(self.search_url, params=params, timeout=10) r.raise_for_status() store_verification("titlovi", self.session) captcha_passed = True if not captcha_passed: logger.exception('RequestException %s', e) break else: try: soup = BeautifulSoup(r.content, 'lxml') # number of results result_count = int( soup.select_one('.results_count b').string) except: result_count = None # exit if no results if not result_count: if not subtitles: logger.debug('No subtitles found') else: logger.debug("No more subtitles found") break # number of pages with results pages = int(math.ceil(result_count / float(items_per_page))) # get current page if 'pg' in params: current_page = int(params['pg']) try: sublist = soup.select( 'section.titlovi > ul.titlovi > li.subtitleContainer.canEdit' ) for sub in sublist: # subtitle id sid = sub.find(attrs={ 'data-id': True }).attrs['data-id'] # get download link download_link = self.download_url + sid # title and alternate title match = title_re.search(sub.a.string) if match: _title = match.group('title') alt_title = match.group('altitle') else: continue # page link page_link = self.server_url + sub.a.attrs['href'] # subtitle language match = lang_re.search( sub.select_one('.lang').attrs['src']) if match: try: # decode language lang = Language.fromtitlovi( match.group('lang') + match.group('script')) except ValueError: continue # relase year or series start year match = year_re.search( sub.find(attrs={ 'data-id': True }).parent.i.string) if match: r_year = int(match.group('year')) # fps match = fps_re.search(sub.select_one('.fps').string) if match: fps = match.group('fps') # releases releases = str( sub.select_one('.fps').parent.contents[0].string) # handle movies and series separately if is_episode: # season and episode info sxe = sub.select_one('.s0xe0y').string r_season = None r_episode = None if sxe: match = season_re.search(sxe) if match: r_season = int(match.group('season')) match = episode_re.search(sxe) if match: r_episode = int(match.group('episode')) subtitle = self.subtitle_class( lang, page_link, download_link, sid, releases, _title, alt_title=alt_title, season=r_season, episode=r_episode, year=r_year, fps=fps, asked_for_release_group=video.release_group, asked_for_episode=episode) else: subtitle = self.subtitle_class( lang, page_link, download_link, sid, releases, _title, alt_title=alt_title, year=r_year, fps=fps, asked_for_release_group=video.release_group) logger.debug('Found subtitle %r', subtitle) # prime our matches so we can use the values later subtitle.get_matches(video) # add found subtitles subtitles.append(subtitle) finally: soup.decompose() # stop on last page if current_page >= pages: break # increment current page params['pg'] = current_page + 1 logger.debug('Getting page %d', params['pg']) return subtitles
def query(self, languages, title, season=None, episode=None, year=None, video=None): items_per_page = 10 current_page = 1 used_languages = languages lang_strings = [str(lang) for lang in used_languages] # handle possible duplicate use of Serbian Latin if "sr" in lang_strings and "sr-Latn" in lang_strings: logger.info('Duplicate entries <Language [sr]> and <Language [sr-Latn]> found, filtering languages') used_languages = filter(lambda l: l != Language.fromietf('sr-Latn'), used_languages) logger.info('Filtered language list %r', used_languages) # convert list of languages into search string langs = '|'.join(map(str, [l.titlovi for l in used_languages])) # set query params params = {'prijevod': title, 'jezik': langs} is_episode = False if season and episode: is_episode = True params['s'] = season params['e'] = episode if year: params['g'] = year # loop through paginated results logger.info('Searching subtitles %r', params) subtitles = [] while True: # query the server try: r = self.session.get(self.search_url, params=params, timeout=10) r.raise_for_status() except RequestException as e: logger.exception('RequestException %s', e) break else: try: soup = BeautifulSoup(r.content, 'lxml') # number of results result_count = int(soup.select_one('.results_count b').string) except: result_count = None # exit if no results if not result_count: if not subtitles: logger.debug('No subtitles found') else: logger.debug("No more subtitles found") break # number of pages with results pages = int(math.ceil(result_count / float(items_per_page))) # get current page if 'pg' in params: current_page = int(params['pg']) try: sublist = soup.select('section.titlovi > ul.titlovi > li.subtitleContainer.canEdit') for sub in sublist: # subtitle id sid = sub.find(attrs={'data-id': True}).attrs['data-id'] # get download link download_link = self.download_url + sid # title and alternate title match = title_re.search(sub.a.string) if match: _title = match.group('title') alt_title = match.group('altitle') else: continue # page link page_link = self.server_url + sub.a.attrs['href'] # subtitle language match = lang_re.search(sub.select_one('.lang').attrs['src']) if match: try: # decode language lang = Language.fromtitlovi(match.group('lang')+match.group('script')) except ValueError: continue # relase year or series start year match = year_re.search(sub.find(attrs={'data-id': True}).parent.i.string) if match: r_year = int(match.group('year')) # fps match = fps_re.search(sub.select_one('.fps').string) if match: fps = match.group('fps') # releases releases = str(sub.select_one('.fps').parent.contents[0].string) # handle movies and series separately if is_episode: # season and episode info sxe = sub.select_one('.s0xe0y').string r_season = None r_episode = None if sxe: match = season_re.search(sxe) if match: r_season = int(match.group('season')) match = episode_re.search(sxe) if match: r_episode = int(match.group('episode')) subtitle = self.subtitle_class(lang, page_link, download_link, sid, releases, _title, alt_title=alt_title, season=r_season, episode=r_episode, year=r_year, fps=fps, asked_for_release_group=video.release_group, asked_for_episode=episode) else: subtitle = self.subtitle_class(lang, page_link, download_link, sid, releases, _title, alt_title=alt_title, year=r_year, fps=fps, asked_for_release_group=video.release_group) logger.debug('Found subtitle %r', subtitle) # prime our matches so we can use the values later subtitle.get_matches(video) # add found subtitles subtitles.append(subtitle) finally: soup.decompose() # stop on last page if current_page >= pages: break # increment current page params['pg'] = current_page + 1 logger.debug('Getting page %d', params['pg']) return subtitles