def __get_links_from_json2(self, url, page_url, video_type): sources = {} headers = {'Referer': page_url} headers.update(XHR) html = self._http_get(url, headers=headers, cache_limit=0) js_data = scraper_utils.parse_json(html, url) try: playlist = js_data.get('playlist', []) for source in playlist[0].get('sources', []): stream_url = source['file'] if scraper_utils.get_direct_hostname(self, stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'label' in source: quality = scraper_utils.height_get_quality(source['label']) else: if video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(stream_url) else: meta = scraper_utils.parse_episode_link(stream_url) quality = scraper_utils.height_get_quality(meta['height']) sources[stream_url] = {'quality': quality, 'direct': True} logger.log( 'Adding stream: %s Quality: %s' % (stream_url, quality), log_utils.LOGDEBUG) except Exception as e: logger.log('Exception during yesmovies extract: %s' % (e), log_utils.LOGDEBUG) return sources
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=8) for attrs, _content in dom_parser2.parse_dom(html, 'a', req='href'): stream_url = attrs['href'] if MOVIE_URL in stream_url: meta = scraper_utils.parse_movie_link(stream_url) stream_url = scraper_utils.pathify_url( stream_url) + scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) quality = scraper_utils.height_get_quality(meta['height']) hoster = { 'multi-part': False, 'host': scraper_utils.get_direct_hostname(self, stream_url), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True } if 'format' in meta: hoster['format'] = meta['format'] hosters.append(hoster) return hosters
def __get_links(self, url, video): hosters = [] seen_urls = set() for search_type in SEARCH_TYPES: search_url, params = self.__translate_search(url, search_type) if not search_url: continue html = self._http_get(search_url, params=params, cache_limit=.5) js_result = scraper_utils.parse_json(html, search_url) if js_result.get('status') != 'success': logger.log('Pron API Error: |%s|%s|: %s' % (search_url, params, js_result.get('message', 'Unknown Error')), log_utils.LOGWARNING) continue for result in js_result['result']: stream_url = result['hosterurls'][0]['url'] if len(result['hosterurls']) > 1: continue if result['extension'] == 'rar': continue if stream_url in seen_urls: continue if scraper_utils.release_check(video, result['title']): host = urlparse.urlsplit(stream_url).hostname quality = scraper_utils.get_quality(video, host, self._get_title_quality(result['title'])) hoster = {'multi-part': False, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'host': host, 'quality': quality, 'direct': False} hoster['extra'] = scraper_utils.cleanse_title(result['title']) if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(hoster['extra']) else: meta = scraper_utils.parse_episode_link(hoster['extra']) if 'format' in meta: hoster['format'] = meta['format'] hosters.append(hoster) seen_urls.add(stream_url) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, require_debrid=True, cache_limit=.5) post = dom_parser2.parse_dom(html, 'div', {'class': 'entry-content'}) if not post: return hosters for match in re.finditer('(?:href="|>)(http(?:s|)://.+?)', post[0].content): stream_url = match.group(1) if scraper_utils.excluded_link( stream_url) or 'imdb.com' in stream_url: continue host = urlparse.urlparse(stream_url).hostname if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(stream_url) else: meta = scraper_utils.parse_episode_link(stream_url) quality = scraper_utils.height_get_quality(meta['height']) hoster = { 'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'quality': quality, 'direct': False } hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, require_debrid=True, cache_limit=.5) sources = self.__get_post_links(html) for source, value in sources.iteritems(): if scraper_utils.excluded_link(source): continue host = urlparse.urlparse(source).hostname if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(value['release']) else: meta = scraper_utils.parse_episode_link(value['release']) quality = scraper_utils.height_get_quality(meta['height']) hoster = { 'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': source, 'rating': None, 'quality': quality, 'direct': False } if 'format' in meta: hoster['format'] = meta['format'] hosters.append(hoster) return hosters
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] if video_type == VIDEO_TYPES.TVSHOW and title: test_url = '/category/tv-shows/' % (scraper_utils.to_slug(title)) test_url = scraper_utils.urljoin(self.base_url, test_url) html = self._http_get(test_url, require_debrid=True, cache_limit=24) posts = dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')}) if posts: result = { 'url': scraper_utils.pathify_url(test_url), 'title': scraper_utils.cleanse_title(title), 'year': '' } results.append(result) elif video_type == VIDEO_TYPES.MOVIE: search_title = re.sub( '[/forum/7-1080p-720p-high-definition-movies/]', '', title.lower()) html = self._http_get(self.base_url, params={'s': search_title}, require_debrid=True, cache_limit=1) norm_title = scraper_utils.normalize_title(title) for _attrs, post in dom_parser2.parse_dom( html, 'div', {'id': re.compile('post-\d+')}): match = re.search( '<h\d+[^>]*>\s*<a\s+href="([^"]+)[^>]*>(.*?)</a>', post) if match: post_url, post_title = match.groups() if '/tv-show/' in post or self.__too_old(post): continue post_title = re.sub('<[^>]*>', '', post_title) meta = scraper_utils.parse_movie_link(post_title) full_title = '%s [%s] (%sp)' % ( meta['title'], meta['extra'], meta['height']) match_year = meta['year'] match_norm_title = scraper_utils.normalize_title( meta['title']) if (match_norm_title in norm_title or norm_title in match_norm_title) and (not year or not match_year or year == match_year): result = { 'url': scraper_utils.pathify_url(post_url), 'title': scraper_utils.cleanse_title(full_title), 'year': match_year } results.append(result) return results
def __get_release(self, html, video): try: select = int(kodi.get_setting('%s-select' % (self.get_name()))) except: select = 0 ul_id = 'releases' if video.video_type == VIDEO_TYPES.MOVIE else 'episodes' fragment = dom_parser2.parse_dom(html, 'ul', {'id': ul_id}) if fragment: best_qorder = 0 best_page = None for _attrs, item in dom_parser2.parse_dom(fragment[0].content, 'li'): match = dom_parser2.parse_dom(item, 'span', req=['href', 'title']) if not match: match = dom_parser2.parse_dom(item, 'a', req=['href', 'title']) if not match: continue page_url, release = match[0].attrs['href'], match[0].attrs[ 'title'] match = dom_parser2.parse_dom(item, 'span', {'class': 'time'}) if match and self.__too_old(match[0].content): break release = re.sub('^\[[^\]]*\]\s*', '', release) if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(release) else: if not scraper_utils.release_check( video, release, require_title=False): continue meta = scraper_utils.parse_episode_link(release) if select == 0: best_page = page_url break else: quality = scraper_utils.height_get_quality(meta['height']) logger.log( 'result: |%s|%s|%s|' % (page_url, quality, Q_ORDER[quality]), log_utils.LOGDEBUG) if Q_ORDER[quality] > best_qorder: logger.log( 'Setting best as: |%s|%s|%s|' % (page_url, quality, Q_ORDER[quality]), log_utils.LOGDEBUG) best_page = page_url best_qorder = Q_ORDER[quality] return best_page
def __get_quality(self, item, video): if item.get('width'): return scraper_utils.width_get_quality(item['width']) elif item.get('height'): return scraper_utils.height_get_quality(item['height']) elif 'name' in item: if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(item['name']) else: meta = scraper_utils.parse_episode_link(item['name']) return scraper_utils.height_get_quality(meta['height']) else: return QUALITIES.HIGH
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = '/search/' + urllib.quote_plus(title) html = self._http_get(search_url, require_debrid=False, cache_limit=1) if video_type == VIDEO_TYPES.TVSHOW: seen_urls = {} for _attr, post in dom_parser2.parse_dom( html, 'div', {'id': re.compile('post-\d+')}): if CATEGORIES[video_type] not in post: continue match = re.search( '<span>\s*TAGS:\s*</span>\s*<a\s+href="([^"]+)[^>]+>([^<]+)', post, re.I) if match: show_url, match_title = match.groups() if show_url in seen_urls: continue result = { 'url': scraper_utils.pathify_url(show_url), 'title': scraper_utils.cleanse_title(match_title), 'year': '' } seen_urls[show_url] = result results.append(result) elif video_type == VIDEO_TYPES.MOVIE: norm_title = scraper_utils.normalize_title(title) headings = re.findall('<h2>\s*<a\s+href="([^"]+)[^>]+>(.*?)</a>', html) posts = [ result.content for result in dom_parser2.parse_dom( html, 'div', {'id': re.compile('post-\d+')}) ] for heading, post in zip(headings, posts): if CATEGORIES[video_type] not in post or self.__too_old(post): continue post_url, post_title = heading meta = scraper_utils.parse_movie_link(post_title) full_title = '%s [%s] (%sp)' % (meta['title'], meta['extra'], meta['height']) match_year = meta['year'] match_norm_title = scraper_utils.normalize_title(meta['title']) if (match_norm_title in norm_title or norm_title in match_norm_title) and (not year or not match_year or year == match_year): result = { 'url': scraper_utils.pathify_url(post_url), 'title': scraper_utils.cleanse_title(full_title), 'year': match_year } results.append(result) return results
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, require_debrid=True, cache_limit=.5) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'post-cont'}) if not fragment: return hosters match = re.search('<p>\s*<strong>(.*?)<script', fragment[0].content, re.DOTALL) if not match: return hosters for attrs, _content in dom_parser2.parse_dom(match.group(1), 'a', req='href'): stream_url = attrs['href'] if scraper_utils.excluded_link(stream_url): continue if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(stream_url) else: meta = scraper_utils.parse_episode_link(stream_url) host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.get_quality( video, host, scraper_utils.height_get_quality(meta['height'])) hoster = { 'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'quality': quality, 'direct': False } hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: html = self._http_get(source_url, cache_limit=.5) fragment = dom_parser.parse_dom(html, 'div', {'class': '[^"]*screen[^"]*'}) if fragment: js_src = dom_parser.parse_dom(fragment[0], 'script', ret='src') if js_src: js_url = urlparse.urljoin(self.base_url, js_src[0]) html = self._http_get(js_url, cache_limit=.5) else: html = fragment[0] for match in re.finditer('<source[^>]+src="([^"]+)', html): stream_url = match.group(1) host = self._get_direct_hostname(stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: _, _, height, _ = scraper_utils.parse_movie_link( stream_url) quality = scraper_utils.height_get_quality(height) stream_url += '|User-Agent=%s' % ( scraper_utils.get_ua()) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True } hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, require_debrid=True, cache_limit=.5) post = dom_parser.parse_dom(html, 'div', {'class': 'entry-content'}) if post: for p in dom_parser.parse_dom(post[0], 'p'): for match in re.finditer('href="([^"]+)[^>]+>([^<]+)', p): stream_url, q_str = match.groups() if re.search('\.part\.?\d+', q_str, re.I) or '.rar' in q_str or 'sample' in q_str or q_str.endswith('.nfo'): continue host = urlparse.urlparse(stream_url).hostname if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(q_str) else: meta = scraper_utils.parse_episode_link(q_str) quality = scraper_utils.height_get_quality(meta['height']) hoster = {'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'quality': quality, 'direct': False} if 'format' in meta: hoster['format'] = meta['format'] hosters.append(hoster) return hosters
def _blog_proc_results(self, html, post_pattern, date_format, video_type, title, year): results = [] search_date = '' search_sxe = '' if video_type == VIDEO_TYPES.EPISODE: match = re.search('(.*?)\s*(S\d+E\d+)\s*', title) if match: show_title, search_sxe = match.groups() else: match = re.search( '(.*?)\s*(\d{4})[._ -]?(\d{2})[._ -]?(\d{2})\s*', title) if match: show_title, search_year, search_month, search_day = match.groups( ) search_date = '%s-%s-%s' % (search_year, search_month, search_day) search_date = scraper_utils.to_datetime( search_date, "%Y-%m-%d").date() else: show_title = title else: show_title = title today = datetime.date.today() for match in re.finditer(post_pattern, html, re.DOTALL): post_data = match.groupdict() post_title = post_data['post_title'] post_title = re.sub('<[^>]*>', '', post_title) if 'quality' in post_data: post_title += '- [%s]' % (post_data['quality']) try: filter_days = int( kodi.get_setting('%s-filter' % (self.get_name()))) except ValueError: filter_days = 0 if filter_days and date_format and 'date' in post_data: post_data['date'] = post_data['date'].strip() filter_days = datetime.timedelta(days=filter_days) post_date = scraper_utils.to_datetime(post_data['date'], date_format).date() if not post_date: logger.log( 'Failed date Check in %s: |%s|%s|%s|' % (self.get_name(), post_data['date'], date_format), log_utils.LOGWARNING) post_date = today if today - post_date > filter_days: continue match_year = '' match_date = '' match_sxe = '' match_title = full_title = post_title if video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(post_title) match_year = meta['year'] else: meta = scraper_utils.parse_episode_link(post_title) match_sxe = 'S%02dE%02d' % (int( meta['season']), int(meta['episode'])) match_date = meta['airdate'] match_title = meta['title'] full_title = '%s (%sp) [%s]' % (meta['title'], meta['height'], meta['extra']) norm_title = scraper_utils.normalize_title(show_title) match_norm_title = scraper_utils.normalize_title(match_title) title_match = norm_title and (match_norm_title in norm_title or norm_title in match_norm_title) year_match = not year or not match_year or year == match_year sxe_match = not search_sxe or (search_sxe == match_sxe) date_match = not search_date or (search_date == match_date) logger.log( 'Blog Results: |%s|%s|%s| - |%s|%s|%s| - |%s|%s|%s| - |%s|%s|%s| (%s)' % (match_norm_title, norm_title, title_match, year, match_year, year_match, search_date, match_date, date_match, search_sxe, match_sxe, sxe_match, self.get_name()), log_utils.LOGDEBUG) if title_match and year_match and date_match and sxe_match: quality = scraper_utils.height_get_quality(meta['height']) result = { 'url': scraper_utils.pathify_url(post_data['url']), 'title': scraper_utils.cleanse_title(full_title), 'year': match_year, 'quality': quality } results.append(result) return results
def __get_links(self, url, video): hosters = [] search_url = scraper_utils.urljoin(self.base_url, SEARCH_URL) query = self.__translate_search(url) result = self._http_get(search_url, data=query, allow_redirect=False, cache_limit=.5) for item in result.get('files', []): checks = [False] * 6 if item.get('type', '').upper() != 'VIDEO': checks[0] = True if item.get('is_ready') != '1': checks[1] = True if item.get('av_result') in ['warning', 'infected']: checks[2] = True if 'video_info' not in item: checks[3] = True if item.get('video_info') and not re.search( '#0:(0|1)(\((eng|und)\))?:\s*Audio:', item['video_info'], re.I): checks[4] = True if not scraper_utils.release_check(video, item['name']): checks[5] = True if any(checks): logger.log( 'Furk.net result excluded: %s - |%s|' % (checks, item['name']), log_utils.LOGDEBUG) continue match = re.search('(\d{3,})\s*x\s*(\d{3,})', item['video_info']) if match: width, _height = match.groups() quality = scraper_utils.width_get_quality(width) else: if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(item['name']) else: meta = scraper_utils.parse_episode_link(item['name']) quality = scraper_utils.height_get_quality(meta['height']) if 'url_pls' in item: size_gb = scraper_utils.format_size(int(item['size']), 'B') if self.max_bytes and int(item['size']) > self.max_bytes: logger.log( 'Result skipped, Too big: |%s| - %s (%s) > %s (%sGB)' % (item['name'], item['size'], size_gb, self.max_bytes, self.max_gb)) continue stream_url = item['url_pls'] host = scraper_utils.get_direct_hostname(self, stream_url) hoster = { 'multi-part': False, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'host': host, 'quality': quality, 'direct': True } hoster['size'] = size_gb hoster['extra'] = item['name'] hosters.append(hoster) else: logger.log( 'Furk.net result skipped - no playlist: |%s|' % (json.dumps(item)), log_utils.LOGDEBUG) return hosters
def __get_links(self, url, video): hosters = [] search_url, params = self.__translate_search(url) html = self._http_get(search_url, params=params, cache_limit=.5) js_result = scraper_utils.parse_json(html, search_url) down_url = js_result.get('downURL') dl_farm = js_result.get('dlFarm') dl_port = js_result.get('dlPort') for item in js_result.get('data', []): post_hash, size, post_title, ext, duration = item['0'], item[ '4'], item['10'], item['11'], item['14'] checks = [False] * 6 if not scraper_utils.release_check(video, post_title): checks[0] = True if 'alangs' in item and item['alangs'] and 'eng' not in item[ 'alangs']: checks[1] = True if re.match('^\d+s', duration) or re.match('^[0-5]m', duration): checks[2] = True if 'passwd' in item and item['passwd']: checks[3] = True if 'virus' in item and item['virus']: checks[4] = True if 'type' in item and item['type'].upper() != 'VIDEO': checks[5] = True if any(checks): logger.log( 'EasyNews Post excluded: %s - |%s|' % (checks, item), log_utils.LOGDEBUG) continue stream_url = down_url + urllib.quote( '/%s/%s/%s%s/%s%s' % (dl_farm, dl_port, post_hash, ext, post_title, ext)) stream_url = stream_url + '|Authorization=%s' % (urllib.quote( self.auth)) host = scraper_utils.get_direct_hostname(self, stream_url) quality = None if 'width' in item: try: width = int(item['width']) except: width = 0 if width: quality = scraper_utils.width_get_quality(width) if quality is None: if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(post_title) else: meta = scraper_utils.parse_episode_link(post_title) quality = scraper_utils.height_get_quality(meta['height']) if self.max_bytes: match = re.search('([\d.]+)\s+(.*)', size) if match: size_bytes = scraper_utils.to_bytes(*match.groups()) if size_bytes > self.max_bytes: logger.log( 'Result skipped, Too big: |%s| - %s (%s) > %s (%s GB)' % (post_title, size_bytes, size, self.max_bytes, self.max_gb)) continue hoster = { 'multi-part': False, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'host': host, 'quality': quality, 'direct': True } if any(i for i in ['X265', 'HEVC'] if i in post_title.upper()): hoster['format'] = 'x265' if size: hoster['size'] = size if post_title: hoster['extra'] = post_title hosters.append(hoster) return hosters
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] if video_type == VIDEO_TYPES.TVSHOW and title: test_url = '/show/%s/' % (scraper_utils.to_slug(title)) test_url = scraper_utils.urljoin(self.base_url, test_url) html = self._http_get(test_url, require_debrid=True, cache_limit=24) posts = dom_parser2.parse_dom(html, 'div', {'id': re.compile('post-\d+')}) if posts and CATEGORIES[video_type] in posts[0].content: match = re.search( '<div[^>]*>\s*show\s+name:.*?<a\s+href="([^"]+)[^>]+>(?!Season\s+\d+)([^<]+)', posts[0].content, re.I) if match: show_url, match_title = match.groups() result = { 'url': scraper_utils.pathify_url(show_url), 'title': scraper_utils.cleanse_title(match_title), 'year': '' } results.append(result) elif video_type == VIDEO_TYPES.MOVIE: search_url = scraper_utils.urljoin(self.base_url, '/search/%s/') search_title = re.sub('[^A-Za-z0-9 ]', '', title.lower()) search_url = search_url % (urllib.quote_plus(search_title)) headers = {'User-Agent': LOCAL_UA} html = self._http_get(search_url, headers=headers, require_debrid=True, cache_limit=1) headings = re.findall('<h2>\s*<a\s+href="([^"]+).*?">(.*?)</a>', html) posts = [ r.content for r in dom_parser2.parse_dom( html, 'div', {'id': re.compile('post-\d+')}) ] norm_title = scraper_utils.normalize_title(title) for heading, post in zip(headings, posts): if not re.search('[._ -]S\d+E\d+[._ -]', heading[1], re.I) and not self.__too_old(post): post_url, post_title = heading post_title = re.sub('<[^>]*>', '', post_title) meta = scraper_utils.parse_movie_link(post_title) full_title = '%s [%s] (%sp)' % ( meta['title'], meta['extra'], meta['height']) match_year = meta['year'] match_norm_title = scraper_utils.normalize_title( meta['title']) if (match_norm_title in norm_title or norm_title in match_norm_title) and (not year or not match_year or year == match_year): result = { 'url': scraper_utils.pathify_url(post_url), 'title': scraper_utils.cleanse_title(full_title), 'year': match_year } results.append(result) return results