def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] if video_type == VIDEO_TYPES.TVSHOW and title: test_url = '/tv-show/%s/' % (scraper_utils.to_slug(title)) test_url = scraper_utils.urljoin(self.base_url, test_url) html = self._http_get(test_url, require_debrid=True, cache_limit=24) posts = dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')}) if posts: result = {'url': scraper_utils.pathify_url(test_url), 'title': scraper_utils.cleanse_title(title), 'year': ''} results.append(result) elif video_type == VIDEO_TYPES.MOVIE: search_title = re.sub('[^A-Za-z0-9 ]', '', title.lower()) html = self._http_get(self.base_url, params={'s': search_title}, require_debrid=True, cache_limit=1) norm_title = scraper_utils.normalize_title(title) for _attrs, post in dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')}): match = re.search('<h\d+[^>]*>\s*<a\s+href="([^"]+)[^>]*>(.*?)</a>', post) if match: post_url, post_title = match.groups() if '/tv-show/' in post or self.__too_old(post): continue post_title = re.sub('<[^>]*>', '', post_title) meta = scraper_utils.parse_movie_link(post_title) full_title = '%s [%s] (%sp)' % (meta['title'], meta['extra'], meta['height']) match_year = meta['year'] match_norm_title = scraper_utils.normalize_title(meta['title']) if (match_norm_title in norm_title or norm_title in match_norm_title) and (not year or not match_year or year == match_year): result = {'url': scraper_utils.pathify_url(post_url), 'title': scraper_utils.cleanse_title(full_title), 'year': match_year} results.append(result) return results
def _get_episode_url(self, show_url, video): url = scraper_utils.urljoin(self.base_url, show_url) html = self._http_get(url, cache_limit=2) if html: force_title = scraper_utils.force_title(video) episodes = dom_parser2.parse_dom(html, 'div', {'class': 'el-item'}) if not force_title: episode_pattern = 'href="([^"]*-[sS]%02d[eE]%02d(?!\d)[^"]*)' % (int(video.season), int(video.episode)) match = re.search(episode_pattern, html) if match: return scraper_utils.pathify_url(match.group(1)) if kodi.get_setting('airdate-fallback') == 'true' and video.ep_airdate: airdate_pattern = '%02d-%02d-%d' % (video.ep_airdate.day, video.ep_airdate.month, video.ep_airdate.year) for episode in episodes: episode = episode.content ep_url = dom_parser2.parse_dom(episode, 'a', req='href') ep_airdate = dom_parser2.parse_dom(episode, 'div', {'class': 'date'}) if ep_url and ep_airdate: ep_airdate = ep_airdate[0].content.strip() if airdate_pattern == ep_airdate: return scraper_utils.pathify_url(ep_url[0].attrs['href']) if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title: norm_title = scraper_utils.normalize_title(video.ep_title) for episode in episodes: episode = episode.content ep_url = dom_parser2.parse_dom(episode, 'a', req='href') ep_title = dom_parser2.parse_dom(episode, 'div', {'class': 'e-name'}) if ep_url and ep_title and norm_title == scraper_utils.normalize_title(ep_title[0].content): return scraper_utils.pathify_url(ep_url[0].attrs['href'])
def _get_episode_url(self, show_url, video): force_title = scraper_utils.force_title(video) title_fallback = kodi.get_setting('title-fallback') == 'true' norm_title = scraper_utils.normalize_title(video.ep_title) page_url = [show_url] too_old = False while page_url and not too_old: url = scraper_utils.urljoin(self.base_url, page_url[0]) html = self._http_get(url, require_debrid=True, cache_limit=1) for _attrs, post in dom_parser2.parse_dom( html, 'div', {'id': re.compile('post-\d+')}): if self.__too_old(post): too_old = True break if show_url not in post: continue match = dom_parser2.parse_dom(post, 'a', req='href') if match: url, title = match[0].attrs['href'], match[0].content if not force_title: if scraper_utils.release_check(video, title, require_title=False): return scraper_utils.pathify_url(url) else: if title_fallback and norm_title: match = re.search('</strong>(.*?)</p>', post) if match and norm_title == scraper_utils.normalize_title( match.group(1)): return scraper_utils.pathify_url(url) page_url = dom_parser2.parse_dom(html, 'a', {'class': 'nextpostslink'}, req='href') if page_url: page_url = [page_url[0].attrs['href']]
def _default_get_episode_url(self, html, video, episode_pattern, title_pattern='', airdate_pattern=''): logger.log('Default Episode Url: |%s|%s|' % (self.get_name(), video), log_utils.LOGDEBUG) if not html: return try: html = html[0].content except AttributeError: pass force_title = scraper_utils.force_title(video) if not force_title: if episode_pattern: match = re.search(episode_pattern, html, re.DOTALL | re.I) if match: return scraper_utils.pathify_url(match.group(1)) if kodi.get_setting( 'airdate-fallback' ) == 'true' and airdate_pattern and video.ep_airdate: airdate_pattern = airdate_pattern.replace( '{year}', str(video.ep_airdate.year)) airdate_pattern = airdate_pattern.replace( '{month}', str(video.ep_airdate.month)) airdate_pattern = airdate_pattern.replace( '{p_month}', '%02d' % (video.ep_airdate.month)) airdate_pattern = airdate_pattern.replace( '{month_name}', MONTHS[video.ep_airdate.month - 1]) airdate_pattern = airdate_pattern.replace( '{short_month}', SHORT_MONS[video.ep_airdate.month - 1]) airdate_pattern = airdate_pattern.replace( '{day}', str(video.ep_airdate.day)) airdate_pattern = airdate_pattern.replace( '{p_day}', '%02d' % (video.ep_airdate.day)) logger.log('Air Date Pattern: %s' % (airdate_pattern), log_utils.LOGDEBUG) match = re.search(airdate_pattern, html, re.DOTALL | re.I) if match: return scraper_utils.pathify_url(match.group(1)) else: logger.log( 'Skipping S&E matching as title search is forced on: %s' % (video.trakt_id), log_utils.LOGDEBUG) if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title and title_pattern: norm_title = scraper_utils.normalize_title(video.ep_title) for match in re.finditer(title_pattern, html, re.DOTALL | re.I): episode = match.groupdict() if norm_title == scraper_utils.normalize_title( episode['title']): return scraper_utils.pathify_url(episode['url'])
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = '/search/' + urllib.quote_plus(title) html = self._http_get(search_url, require_debrid=True, cache_limit=1) if video_type == VIDEO_TYPES.TVSHOW: seen_urls = {} for _attr, post in dom_parser2.parse_dom( html, 'div', {'id': re.compile('post-\d+')}): if CATEGORIES[video_type] not in post: continue match = re.search( '<span>\s*TAGS:\s*</span>\s*<a\s+href="([^"]+)[^>]+>([^<]+)', post, re.I) if match: show_url, match_title = match.groups() if show_url in seen_urls: continue result = { 'url': scraper_utils.pathify_url(show_url), 'title': scraper_utils.cleanse_title(match_title), 'year': '' } seen_urls[show_url] = result results.append(result) elif video_type == VIDEO_TYPES.MOVIE: norm_title = scraper_utils.normalize_title(title) headings = re.findall('<h2>\s*<a\s+href="([^"]+)[^>]+>(.*?)</a>', html) posts = [ result.content for result in dom_parser2.parse_dom( html, 'div', {'id': re.compile('post-\d+')}) ] for heading, post in zip(headings, posts): if CATEGORIES[video_type] not in post or self.__too_old(post): continue post_url, post_title = heading meta = scraper_utils.parse_movie_link(post_title) full_title = '%s [%s] (%sp)' % (meta['title'], meta['extra'], meta['height']) match_year = meta['year'] match_norm_title = scraper_utils.normalize_title(meta['title']) if (match_norm_title in norm_title or norm_title in match_norm_title) and (not year or not match_year or year == match_year): result = { 'url': scraper_utils.pathify_url(post_url), 'title': scraper_utils.cleanse_title(full_title), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] media_type = 'series' if video_type == VIDEO_TYPES.TVSHOW else 'movie' search_url = scraper_utils.urljoin( self.base_url, '/typeahead/%s' % (urllib.quote(title))) headers = {'Referer': self.base_url} headers.update(XHR) html = self._http_get(search_url, headers=headers, require_debrid=True, cache_limit=.5) for item in scraper_utils.parse_json(html, search_url): match_title = item.get('title') match_url = item.get('link') match_year = '' if item.get('type') == media_type and match_title and match_url: if not year or not match_year or year == match_year: result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/search/searchBoxSuggestion') html = self._http_get(search_url, params={ 'top': 8, 'query': title }, cache_limit=8) js_data = scraper_utils.parse_json(html, search_url) for item in js_data: entityName = match_title_year = item.get('Value', '') if entityName: match_title, match_year2 = scraper_utils.extra_year( match_title_year) match_year = str(item.get('ReleaseYear', '')) if not match_year: match_year = match_year2 match_url = '/ontology/EntityDetails?' + urllib.urlencode( { 'entityName': entityName, 'ignoreMediaLinkError': 'false' }) if not year or not match_year or year == match_year: result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] seen_urls = set() search_url = scraper_utils.urljoin(self.base_url, '/ajax_submit.php') html = self._http_get(search_url, cache_limit=48) norm_title = scraper_utils.normalize_title(title) for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'category-post'}): match_url = dom_parser2.parse_dom(item, 'a', req='href') match_title = dom_parser2.parse_dom(item, 'h3') if match_url and match_title: match_url = scraper_utils.pathify_url( match_url[0].attrs['href']) match_title = match_title[0].content if match_url in seen_urls: continue seen_urls.add(match_url) if norm_title in scraper_utils.normalize_title(match_title): result = { 'url': match_url, 'title': scraper_utils.cleanse_title(match_title), 'year': '' } results.append(result) return results
def search(self, video_type, title, year, season=''): results = [] search_url = scraper_utils.urljoin(self.base_url, '/movies/search') html = self._http_get(search_url, params={'s': title}, cache_limit=8) for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'item_movie'}): match = dom_parser2.parse_dom(item, 'a', req=['href', 'title']) if not match: continue match_title_year = match[0].attrs['title'] match_url = match[0].attrs['href'] is_season = re.search('S(?:eason\s+)?(\d+)', match_title_year, re.I) match_vt = video_type == (VIDEO_TYPES.MOVIE and not is_season) or (video_type == VIDEO_TYPES.SEASON and is_season) match_year = '' if video_type == VIDEO_TYPES.SEASON: if not season and not match_vt: continue if match_vt: if season and int(is_season.group(1)) != int(season): continue else: if season and int(season) != 1: continue site_title, site_year = scraper_utils.extra_year(match_title_year) if scraper_utils.normalize_title(site_title) not in scraper_utils.normalize_title(title) or year != site_year: continue match_title = match_title_year else: if not match_vt: continue match_title, match_year = scraper_utils.extra_year(match_title_year) match_url = scraper_utils.urljoin(match_url, 'watching.html') if not year or not match_year or year == match_year: result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/bestmatch-fund-movies-%s.html') search_title = title.replace(' ', '-') search_title = re.sub('[^A-Za-z0-9-]', '', search_title).lower() search_url = search_url % (search_title) html = self._http_get(search_url, cache_limit=1) for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'thumbsTitle'}): match = dom_parser2.parse_dom(item, 'a', req='href') if not match: continue match_url, match_title_year = match[0].attrs['href'], match[ 0].content match_title, match_year = scraper_utils.extra_year( match_title_year) if (not year or not match_year or year == match_year): result = { 'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin( self.base_url, '/search/%s' % (urllib.quote(title))) html = self._http_get(search_url, cache_limit=8) fragment = dom_parser2.parse_dom(html, 'div', {'id': 'who-likes'}) if not fragment: return results fragment = fragment[0].content match_url = dom_parser2.parse_dom(fragment, 'a', req='href') match_title_year = dom_parser2.parse_dom(fragment, 'img', req='alt') if match_url and match_title_year: match_url = match_url[0].attrs['href'] match_title_year = match_title_year[0].attrs['alt'] match_title, match_year = scraper_utils.extra_year( match_title_year) if not year or not match_year or year == match_year: result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/search') search_type = 'movies' if video_type == VIDEO_TYPES.MOVIE else 'series' html = self._http_get(search_url, params={'query': title.lower(), 'type': search_type}, cache_limit=8) for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'one_movie-item'}): match_url = dom_parser2.parse_dom(item, 'a', req='href') match_title = dom_parser2.parse_dom(item, 'img', req='alt') media_type = dom_parser2.parse_dom(item, 'div', {'class': 'movie-series'}) if not media_type: media_type = VIDEO_TYPES.MOVIE elif media_type[0].content == 'TV SERIE': media_type = VIDEO_TYPES.TVSHOW if match_url and match_title and video_type == media_type: match_url = match_url[0].attrs['href'] match_title = match_title[0].attrs['alt'] match_year = re.search('-(\d{4})-', match_url) if match_year: match_year = match_year.group(1) else: match_year = '' if not year or not match_year or year == match_year: result = {'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year} results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/index.php') data = {'subaction': 'search', 'story': title, 'do': 'search'} headers = {'Referer': search_url} html = self._http_get(search_url, params={'do': 'search'}, data=data, headers=headers, cache_limit=1) fragment = dom_parser2.parse_dom(html, 'div', {'id': 'dle-content'}) if not fragment: return results for _attrs, item in dom_parser2.parse_dom(fragment[0].content, 'div', {'class': 'short-film'}): match = re.search('<h5><a\s+href="([^"]+)[^>]+title="([^"]+)', item) if not match: continue url, match_title = match.groups('') result = { 'url': scraper_utils.pathify_url(url), 'title': scraper_utils.cleanse_title(match_title), 'year': '' } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] html = self._http_get(self.base_url, params={'s': title}, cache_limit=8) for _attrs, item in dom_parser2.parse_dom(html, 'h2'): for attrs, match_title_year in dom_parser2.parse_dom(item, 'a', req=['href']): match_url = attrs['href'] match_title_year = re.sub('[^\x00-\x7F]', '', match_title_year) match = re.search('(.*?)\s+(\d{4})$', match_title_year) if match: match_title, match_year = match.groups() else: match = re.search('-(\d{4})/?$', match_url) if match: match_year = match.groups(1) else: match_title, match_year = match_title_year, '' if not year or not match_year or year == match_year: result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/search?q=') headers = {'Referer': ''} html = self._http_get(search_url, params={ 'search': title, 've': 1 }, headers=headers, cache_limit=8) for _attrs, article in dom_parser2.parse_dom(html, 'article', {'class': 'movie_item'}): match = dom_parser2.parse_dom(article, 'a', req=['href', 'data-title']) if match: match_url = match[0].attrs['href'] match_title = match[0].attrs['data-title'] match_year = '' if not year or not match_year or year == match_year: result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=8) for attrs, _content in dom_parser2.parse_dom(html, 'a', req='href'): stream_url = attrs['href'] if MOVIE_URL in stream_url: meta = scraper_utils.parse_movie_link(stream_url) stream_url = scraper_utils.pathify_url( stream_url) + scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) quality = scraper_utils.height_get_quality(meta['height']) hoster = { 'multi-part': False, 'host': scraper_utils.get_direct_hostname(self, stream_url), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True } if 'format' in meta: hoster['format'] = meta['format'] hosters.append(hoster) return hosters
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/ajax/search.php') timestamp = int(time.time() * 1000) query = { 'q': title, 'limit': 100, 'timestamp': timestamp, 'verifiedCheck': '' } html = self._http_get(search_url, data=query, headers=XHR, cache_limit=1) if video_type in [VIDEO_TYPES.TVSHOW, VIDEO_TYPES.EPISODE]: media_type = 'TV SHOW' else: media_type = 'MOVIE' js_data = scraper_utils.parse_json(html, search_url) for item in js_data: if not item['meta'].upper().startswith(media_type): continue result = { 'title': scraper_utils.cleanse_title(item['title']), 'url': scraper_utils.pathify_url(item['permalink']), 'year': '' } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] params = {'c': 'movie', 'm': 'filter', 'keyword': title} html = self._http_get(self.base_url, params=params, cache_limit=8) for attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'recent-item'}, req='title'): match_url = dom_parser2.parse_dom(item, 'a', req='href') if not match_url: continue match_url = match_url[0].attrs['href'] is_series = re.search('/series/', match_url, re.I) if (video_type == VIDEO_TYPES.MOVIE and is_series) or (video_type == VIDEO_TYPES.TVSHOW and not is_series): continue match_title_year = attrs['title'] match_title, match_year = scraper_utils.extra_year( match_title_year) if not year or not match_year or year == match_year: result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/search.php') if video_type == VIDEO_TYPES.MOVIE: params = { 'all': 'all', 'searchin': 'mov', 'subtitles': '', 'imdbfrom': '', 'yearrange': '', 'keywords': title } else: params = {'all': 'all', 'vselect': 'ser', 'keywords': title} html = self._http_get(search_url, params=params, cache_limit=8) fragment = dom_parser2.parse_dom(html, 'ul', {'class': 'cbp-rfgrid'}) if not fragment: return results for item in dom_parser2.parse_dom(fragment, 'li'): match = dom_parser2.parse_dom(item, 'a', req=['title', 'href']) if not match: continue match_url = match[0].attrs['href'] match_title_year = match[0].attrs['title'] match_title, match_year = scraper_utils.extra_year( match_title_year) if not year or not match_year or year == match_year: result = { 'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] html = self._http_get(self.base_url, params={'s': title}, cache_limit=8) for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'result-item'}): match = dom_parser2.parse_dom(item, 'div', {'class': 'title'}) is_movie = dom_parser2.parse_dom(item, 'span', {'class': 'movies'}) if not is_movie or not match: return results match = dom_parser2.parse_dom(match[0].content, 'a', req='href') if not match: return results match_url, match_title_year = match[0].attrs['href'], match[ 0].content match_title, match_year = scraper_utils.extra_year( match_title_year) if not match_year: match_year = dom_parser2.parse_dom(item, 'span', {'class': 'year'}) match_year = match_year[0].content if match_year else '' if not year or not match_year or year == match_year: result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] html = self._http_get(self.base_url, params={'s': title}, cache_limit=4) for _attrs, movie in dom_parser2.parse_dom(html, 'div', {'class': 'movie'}): match_url = dom_parser2.parse_dom(movie, 'a', req='href') match_title_year = dom_parser2.parse_dom(movie, 'img', req='alt') if match_url and match_title_year: match_url = match_url[0].attrs['href'] if re.search('season-\d+-episode\d+', match_url): continue match_title_year = match_title_year[0].attrs['alt'] match_title, match_year = scraper_utils.extra_year( match_title_year) if not match_year: match_year = dom_parser2.parse_dom(movie, 'div', {'class': 'year'}) try: match_year = match_year[0].content except: match_year = '' if not year or not match_year or year == match_year: result = { 'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/advanced-search/') headers = {'Referer': self.base_url} params = {'search_query': title, 'orderby': '', 'order': '', 'wpas': 1} html = self._http_get(search_url, params=params, headers=headers, cache_limit=8) norm_title = scraper_utils.normalize_title(title) for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'datos'}): match = dom_parser2.parse_dom(item, 'a', req='href') if not match: continue match_url = match[0].attrs['href'] is_tvshow = '/tvshows/' in match_url if is_tvshow and video_type == VIDEO_TYPES.MOVIE or not is_tvshow and video_type == VIDEO_TYPES.TVSHOW: continue match_title = match[0].content match_title, match_year = scraper_utils.extra_year(match_title) if scraper_utils.normalize_title(match_title) in norm_title and ( not year or not match_year or year == match_year): result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] seen_urls = set() for page in ['/latest-added/', '/popular-today/', '/most-popular/']: url = scraper_utils.urljoin(self.base_url, page) html = self._http_get(url, cache_limit=24) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'home'}) if fragment: norm_title = scraper_utils.normalize_title(title) for attrs, match_title_year in dom_parser2.parse_dom( fragment[0].content, 'a', req='href'): match_url = attrs['href'] match_title, match_year = scraper_utils.extra_year( match_title_year) if norm_title in scraper_utils.normalize_title( match_title) and (not year or not match_year or year == match_year): match_url = scraper_utils.pathify_url(match_url) if match_url in seen_urls: continue seen_urls.add(match_url) result = { 'url': match_url, 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] url = scraper_utils.urljoin(self.base_url, '/searchtest.php') data = {'searchapi': title} headers = {'Referer': self.base_url} html = self._http_get(url, data=data, headers=headers, cache_limit=2) if video_type == VIDEO_TYPES.MOVIE: query_type = 'watch-movie-' else: query_type = 'watch-tvshow-' norm_title = scraper_utils.normalize_title(title) for _attrs, item in dom_parser2.parse_dom(html, 'a', {'href': '#'}): match = re.search('href="(%s[^"]+)' % (query_type), item) if match: link = match.group(1) match_title = self.__make_title(link, query_type) match_year = '' if norm_title in scraper_utils.normalize_title( match_title) and (not year or not match_year or int(year) == int(match_year)): result = { 'url': scraper_utils.pathify_url(link), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def _get_episode_url(self, show_url, video): show_url = scraper_utils.urljoin(self.base_url, show_url) html = self._http_get(show_url, cache_limit=8) pattern = '''href=['"]([^'"]+)[^>]+>\s*%s\.\s*Sezon<''' % ( video.season) match = re.search(pattern, html) if match: episode_pattern = '''href=['"]([^'"]+-%s-sezon-%s-bolum[^'"]*)''' % ( video.season, video.episode) season_url = scraper_utils.urljoin(self.base_url, match.group(1)) html = self._http_get(season_url, cache_limit=2) ep_url = self._default_get_episode_url(html, video, episode_pattern) if ep_url: return ep_url # front page fallback html = self._http_get(self.base_url, cache_limit=2) for slug in reversed(show_url.split('/')): if slug: break ep_url_frag = 'href="([^"]+/{slug}-{season}-sezon-{episode}-bolum[^"]*)'.format( slug=slug, season=video.season, episode=video.episode) match = re.search(ep_url_frag, html) if match: return scraper_utils.pathify_url(match.group(1))
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/search/') search_url = scraper_utils.urljoin(search_url, urllib.quote_plus(title)) html = self._http_get(search_url, require_debrid=True, cache_limit=8) for _attrs, fragment in dom_parser2.parse_dom(html, 'div', {'class': 'list'}): if not dom_parser2.parse_dom(fragment, 'div', {'class': 'lists_titles'}): continue for attrs, match_title_year in dom_parser2.parse_dom( fragment, 'a', {'class': 'title'}, req='href'): match_url = attrs['href'] match_title_year = re.sub('</?[^>]*>', '', match_title_year) is_show = re.search('\(d{4|-\)', match_title_year) if (is_show and video_type == VIDEO_TYPES.MOVIE) or ( not is_show and video_type == VIDEO_TYPES.TVSHOW): continue match_title, match_year = scraper_utils.extra_year( match_title_year) if not year or not match_year or year == match_year: result = { 'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/search') html = self._http_get(search_url, params={'q': title}, cache_limit=8) for _attrs, item in dom_parser2.parse_dom(html, 'div', {'class': 'movie-item'}): match = dom_parser2.parse_dom(item, 'a', {'itemprop': 'url'}, req='href') if not match: continue match_url, match_title_year = match[0].attrs['href'], match[ 0].content match_title, match_year = scraper_utils.extra_year( match_title_year) if not match_year: try: match_year = dom_parser2.parse_dom( item, 'div', {'class': 'overlay-year'})[0].content except: match_year = '' if not year or not match_year or year == match_year: result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] html = self._http_get(self.base_url, params={'s': title}, cache_limit=8) for _attrs, item in dom_parser2.parse_dom( html, 'div', {'class': 'browse-movie-top'}): match = dom_parser2.parse_dom(item, 'a', req='href') if match: match_url, match_title_year = match[0].attrs['href'], match[ 0].content match_title, match_year = scraper_utils.extra_year( match_title_year) if not match_year: div = dom_parser2.parse_dom(item, 'div', {'class': 'browse-movie-year'}) if div: match_year = div[0].content.strip() match_url += '?watching' if not year or not match_year or year == match_year: result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_in = 'm' if video_type == VIDEO_TYPES.MOVIE else 't' search_url = scraper_utils.urljoin(self.base_url, '/search') html = self._http_get(search_url, data={ 'searchquery': title, 'searchin': search_in }, cache_limit=8) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'search-page'}) if not fragment: return results fragment = dom_parser2.parse_dom(fragment[0].content, 'table') if not fragment: return results for attrs, match_title_year in dom_parser2.parse_dom( fragment[0].content, 'a', req='href'): match_url = attrs['href'] match_title, match_year = scraper_utils.extra_year( match_title_year) if not year or not match_year or year == match_year: result = { 'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/suggest.php') headers = {'Referer': self.base_url} headers.update(XHR) params = {'ajax': 1, 's': title, 'type': 'TVShows'} html = self._http_get(search_url, params=params, cache_limit=8) for attrs, match_title in dom_parser2.parse_dom(html, 'a', req='href'): match_url = attrs['href'] match_title = re.sub('</?[^>]*>', '', match_title) match = re.search('\((\d{4})\)$', match_url) if match: match_year = match.group(1) else: match_year = '' if not year or not match_year or year == match_year: result = { 'url': scraper_utils.pathify_url(match_url), 'title': scraper_utils.cleanse_title(match_title), 'year': match_year } results.append(result) return results