def get_series_episodes(self, series_id): doc = self._get_series_doc(series_id) episodes = [] with Timer(logger=self.log, name='Parsing episodes of series with ID %d' % series_id): body = doc.find('div', {'class': 'mid'}) series_title, original_title = parse_title(body.find('h1').first.text) image = self.BASE_URL + body.find('img').attr('src') icon = image.replace('/posters/poster_', '/icons/cat_') episode_divs = body.find('div', {'class': 't_row.*?'}) series_poster = None for ep in episode_divs: title_td = ep.find('td', {'class': 't_episode_title'}) episode_title, orig_title = parse_title(title_td.text) onclick = title_td.attr('onClick') release_date = ep.find('span', {'class': 'micro'}).find('span')[0].text release_date = str_to_date(release_date, '%d.%m.%Y %H:%M') if release_date else None _, season_number, episode_number = parse_onclick(onclick) poster = poster_url(original_title, season_number) if not series_poster: series_poster = poster episode = Episode(series_id, series_title, season_number, episode_number, episode_title, orig_title, release_date, icon, poster, image) episodes.append(episode) self.log.info("Got %d episode(s) successfully" % (len(episodes))) self.log.debug(repr(episodes).decode("unicode-escape")) return episodes
def browse_episodes(self, skip=0): self.ensure_authorized() doc = self.fetch(self.BASE_URL + "/browse.php", {'o': skip}) with Timer(logger=self.log, name='Parsing episodes list'): body = doc.find('div', {'class': 'content_body'}) series_titles = body.find('span', {'style': 'font-family:arial;.*?'}).strings titles = body.find('span', {'class': 'torrent_title'}).strings episode_titles, original_titles = zip(*[parse_title(t) for t in titles]) release_dates = body.find('b').strings[1::3] release_dates = [str_to_date(d, '%d.%m.%Y %H:%M') for d in release_dates] selected_page = body.find('span', {'class': 'd_pages_link_selected'}).text last_page = body.find('a', {'class': 'd_pages_link'}).last.text self.has_more = int(selected_page) < int(last_page) icons = body.find('img', {'class': 'category_icon'}).attrs('src') onclicks = body.find('a', {'href': 'javascript:{};'}).attrs('onClick') series_ids, season_numbers, episode_numbers = zip(*[parse_onclick(s or "") for s in onclicks]) posters = [poster_url(i[0][18:-5], i[1]) for i in zip(icons, season_numbers)] icons = [self.BASE_URL + url for url in icons] images = [url.replace('/icons/cat_', '/posters/poster_') for url in icons] data = zip(series_ids, series_titles, season_numbers, episode_numbers, episode_titles, original_titles, release_dates, icons, posters, images) episodes = [Episode(*e) for e in data if e[0]] self.log.info("Got %d episode(s) successfully" % (len(episodes))) self.log.debug(repr(episodes).decode("unicode-escape")) return episodes
def fetch(self, url, params=None, data=None, **request_params): try: with Timer(logger=self.log, name='Fetching URL %s with params %r' % (url, params)): response = self.session.request('post' if data else 'get', url, params=params, data=data, **request_params) response.raise_for_status() if data: self.save_cookies() return response except Timeout as e: raise LocalizedError(32000, "Timeout while fetching URL: %s (%%s)" % url, plugin.get_string(30000), cause=e) except NoValidProxiesFound as e: raise LocalizedError(32005, "Can't find anonymous proxy", cause=e) except RequestException as e: raise LocalizedError(32001, "Can't fetch URL: %s (%%s)" % url, plugin.get_string(30000), cause=e) except ProxyListException as e: plugin.set_setting('use-proxy', 0) raise LocalizedError(32004, "Can't load anonymous proxy list", cause=e)
def authorize(self): with Timer(logger=self.log, name='Authorization'): try: self.session.cookies.clear('.lostfilm.tv') except KeyError: pass response = self.fetchDom(url=self.POST_URL, data=self.login_data) parsed_response = json.loads(response.text) if 'error' in parsed_response and parsed_response['error'] == 2: raise ScraperError(32003, "Authorization failed", check_settings=True)
def get_series_episodes_bulk(self, series_ids): """ :rtype : dict[int, list[Episode]] """ if not series_ids: return {} results = {} with Timer(logger=self.log, name="Bulk fetching series episodes with IDs " + ", ".join(str(i) for i in series_ids)): with ThreadPoolExecutor(max_workers=self.max_workers) as executor: futures = dict((executor.submit(self.get_series_episodes, _id), _id) for _id in series_ids) for future in as_completed(futures): _id = futures[future] results[_id] = future.result() return results
def get_torrent_links(self, series_id, season_number, episode_number): doc = self.fetch(self.BASE_URL + '/nrdr.php', { 'c': series_id, 's': season_number, 'e': episode_number }) links = [] with Timer(logger=self.log, name='Parsing torrent links'): urls = doc.find('a', {'style': 'font-size:18px;.*?'}).attrs('href') table = doc.find('table') qualities = table.find('img', {'src': 'img/search_.+?'}).attrs('src') qualities = [s[11:-4] for s in qualities] sizes = re.findall('Размер: (.+)\.', table.text) for url, qua, size in zip(urls, qualities, sizes): links.append(TorrentLink(Quality.find(qua), url, parse_size(size))) self.log.info("Got %d link(s) successfully" % (len(links))) self.log.info(repr(links).decode("unicode-escape")) return links
def get_series_bulk(self, series_ids): """ :rtype : dict[int, Series] """ if not series_ids: return {} cached_details = self.series_cache.keys() not_cached_ids = [_id for _id in series_ids if _id not in cached_details] results = dict((_id, self.series_cache[_id]) for _id in series_ids if _id in cached_details) if not_cached_ids: with Timer(logger=self.log, name="Bulk fetching series with IDs " + ", ".join(str(i) for i in not_cached_ids)): with ThreadPoolExecutor(max_workers=self.max_workers) as executor: futures = [executor.submit(self.get_series_info, _id) for _id in not_cached_ids] for future in as_completed(futures): result = future.result() self.series_cache[result.id] = results[result.id] = result return results
def get_series_info(self, series_id): doc = self._get_series_doc(series_id) with Timer(logger=self.log, name='Parsing series info with ID %d' % series_id): body = doc.find('div', {'class': 'mid'}) series_title, original_title = parse_title(body.find('h1').first.text) image = self.BASE_URL + body.find('img').attr('src') icon = image.replace('/posters/poster_', '/icons/cat_') info = body.find('div').first.text.replace("\xa0", "") res = re.search('Страна: (.+)\r\n', info) country = res.group(1) if res else None res = re.search('Год выхода: (.+)\r\n', info) year = res.group(1) if res else None res = re.search('Жанр: (.+)\r\n', info) genres = res.group(1).split(', ') if res else None res = re.search('Количество сезонов: (.+)\r\n', info) seasons_count = int(res.group(1)) if res else 0 res = re.search('О сериале[^\r\n]+\s*(.+?)($|\r\n)', info, re.S | re.M) about = res.group(1) if res else None res = re.search('Актеры:\s*(.+?)($|\r\n)', info, re.S | re.M) actors = [parse_title(t) for t in res.group(1).split(', ')] if res else None res = re.search('Режиссеры:\s*(.+?)($|\r\n)', info, re.S | re.M) producers = res.group(1).split(', ') if res else None res = re.search('Сценаристы:\s*(.+?)($|\r\n)', info, re.S | re.M) writers = res.group(1).split(', ') if res else None res = re.search('Сюжет:\s*(.+?)($|\r\n)', info, re.S | re.M) plot = res.group(1) if res else None episodes_count = len(body.find('div', {'class': 't_row.*?'})) - \ len(body.find('label', {'title': 'Сезон полностью'})) poster = poster_url(original_title, seasons_count) series = Series(series_id, series_title, original_title, image, icon, poster, country, year, genres, about, actors, producers, writers, plot, seasons_count, episodes_count) self.log.info("Parsed '%s' series info successfully" % series_title) self.log.debug(repr(series).decode("unicode-escape")) return series