def __get_links_from_xml(self, url, video, page_url, cookies): sources = {} try: headers = {'Referer': page_url} xml = self._http_get(url, cookies=cookies, headers=headers, cache_limit=.5) root = ET.fromstring(xml) for item in root.findall('.//item'): title = item.find('title').text if title and title.upper() == 'OOPS!': continue for source in item.findall('{http://rss.jwpcdn.com/}source'): stream_url = source.get('file') label = source.get('label') if scraper_utils.get_direct_hostname(self, stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif label: quality = scraper_utils.height_get_quality(label) elif title: quality = scraper_utils.blog_get_quality(video, title, '') else: quality = scraper_utils.blog_get_quality(video, stream_url, '') sources[stream_url] = {'quality': quality, 'direct': True} logger.log('Adding stream: %s Quality: %s' % (stream_url, quality), log_utils.LOGDEBUG) except Exception as e: logger.log('Exception during YesMovies XML Parse: %s' % (e), log_utils.LOGWARNING) return sources
def __get_links_from_xml(self, url, video, page_url, cookies): sources = {} try: headers = {'Referer': page_url} xml = self._http_get(url, cookies=cookies, headers=headers, cache_limit=.5) root = ET.fromstring(xml) for item in root.findall('.//item'): title = item.find('title').text if title and title.upper() == 'OOPS!': continue for source in item.findall('{http://rss.jwpcdn.com/}source'): stream_url = source.get('file') label = source.get('label') if scraper_utils.get_direct_hostname( self, stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif label: quality = scraper_utils.height_get_quality(label) elif title: quality = scraper_utils.blog_get_quality( video, title, '') else: quality = scraper_utils.blog_get_quality( video, stream_url, '') sources[stream_url] = {'quality': quality, 'direct': True} logger.log( 'Adding stream: %s Quality: %s' % (stream_url, quality), log_utils.LOGDEBUG) except Exception as e: logger.log('Exception during YesMovies XML Parse: %s' % (e), log_utils.LOGWARNING) return sources
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) q_str = 'HDRIP' match = re.search('<p\s+rel="tag">Quality:\s*(.*?)</p>', html, re.I) if match: q_str = match.group(1) stream_url = self.__decode(html) if stream_url: host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.blog_get_quality(video, q_str, host) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': False } hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters html = self._http_get(source_url, require_debrid=False, cache_limit=.5) if video.video_type == VIDEO_TYPES.MOVIE: pattern = '<singlelink>(.*?)(?=<hr\s*/>|download>|thanks_button_div)' else: pattern = '<hr\s*/>\s*<strong>(.*?)</strong>.*?<singlelink>(.*?)(?=<hr\s*/>|download>|thanks_button_div)' for match in re.finditer(pattern, html, re.DOTALL): if video.video_type == VIDEO_TYPES.MOVIE: links = match.group(1) match = re.search('<h2>\s*<a[^>]+>(.*?)</a>', html) if match: title = match.group(1) else: title = '' else: title, links = match.groups() for match in re.finditer('href="([^"]+)', links): stream_url = match.group(1).lower() if any(link in stream_url for link in EXCLUDE_LINKS): continue host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.blog_get_quality(video, title, host) hoster = {'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'quality': quality, 'direct': False} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) fragment = dom_parser.parse_dom(html, 'div', {'class': 'tab_container'}) if fragment: q_str = 'HDRIP' match = re.search('>Quality(.*?)<br\s*/?>', html, re.I) if match: q_str = match.group(1) q_str = re.sub('(</?strong[^>]*>|:|\s)', '', q_str, re.I | re.U) for source in dom_parser.parse_dom(fragment[0], 'iframe', ret='src'): host = urlparse.urlparse(source).hostname quality = scraper_utils.blog_get_quality(video, q_str, host) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': source, 'direct': False} match = re.search('class="views-infos">(\d+).*?class="rating">(\d+)%', html, re.DOTALL) if match: hoster['views'] = int(match.group(1)) hoster['rating'] = match.group(2) hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, require_debrid=True, cache_limit=.5) for source, values in self.__get_post_links(html).iteritems(): if scraper_utils.excluded_link(source): continue host = urlparse.urlparse(source).hostname release = values['release'] quality = scraper_utils.blog_get_quality(video, release, host) hoster = { 'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': source, 'rating': None, 'quality': quality, 'direct': False } if 'X265' in release or 'HEVC' in release: hoster['format'] = 'x265' hosters.append(hoster) return hosters
def __get_post_links(self, html, video): sources = {} post = dom_parser2.parse_dom(html, 'div', {'class': 'postContent'}) if post: post = post[0].content results = re.findall( '<p\s+style="text-align:\s*center;">(?:\s*<strong>)*(.*?)<br(.*?)</p>', post, re.DOTALL) if not results: match = re.search('>Release Name\s*:(.*?)<br', post, re.I) release = match.group(1) if match else '' match = re.search('>Download\s*:(.*?)</p>', post, re.DOTALL | re.I) links = match.group(1) if match else '' results = [(release, links)] for result in results: release, links = result release = re.sub('</?[^>]*>', '', release) for attrs, hostname in dom_parser2.parse_dom(links, 'a', req='href'): stream_url = attrs['href'] if hostname.upper() in ['TORRENT SEARCH', 'VIP FILE']: continue host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.blog_get_quality( video, release, host) sources[stream_url] = quality return sources
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) q_str = 'HDRIP' match = re.search('>Quality(.*?)<br\s*/?>', html, re.I) if match: q_str = match.group(1) q_str = re.sub('(</?strong[^>]*>|:|\s)', '', q_str, re.I | re.U) for _attr, content in dom_parser2.parse_dom(html, 'div', {'class': 'tab_content'}): for attrs, _content in dom_parser2.parse_dom(content, 'iframe', req='src'): source = attrs['src'] host = urlparse.urlparse(source).hostname quality = scraper_utils.blog_get_quality(video, q_str, host) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': source, 'direct': False} match = re.search('class="views-infos">(\d+).*?class="rating">(\d+)%', html, re.DOTALL) if not match: continue hoster['views'] = int(match.group(1)) hoster['rating'] = match.group(2) hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'video-embed'}) if not fragment: return hosters iframe_url = dom_parser2.parse_dom(fragment[0].content, 'iframe', req='src') if not iframe_url: return hosters stream_url = iframe_url[0].attrs['src'] host = urlparse.urlparse(stream_url).hostname q_str = 'HDRIP' match = re.search('>Quality(.*?)<br\s*/>', html, re.I) if match: q_str = match.group(1) q_str = q_str.decode('utf-8').encode('ascii', 'ignore') q_str = re.sub('(</?strong[^>]*>|:|\s)', '', q_str, re.I | re.U) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': scraper_utils.blog_get_quality(video, q_str, host), 'views': None, 'rating': None, 'url': stream_url, 'direct': False} match = re.search('class="views-infos">(\d+).*?class="rating">(\d+)%', html, re.DOTALL) if match: hoster['views'] = int(match.group(1)) hoster['rating'] = match.group(2) hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) q_str = '' quality = None match = re.search('>Category.*?td_col">([^<]+)', html) if match: quality = QUALITY_MAP.get(match.group(1).upper(), None) else: match = re.search('>Release.*?td_col">([^<]+)', html) if match: q_str = match.group(1).upper() pattern = "td_cols.*?href='([^']+)" for match in re.finditer(pattern, html): url = match.group(1) if re.search('\.rar(\.|$)', url): continue hoster = {'multi-part': False, 'class': self, 'views': None, 'url': url, 'rating': None, 'direct': False} hoster['host'] = urlparse.urlsplit(url).hostname if quality is None: hoster['quality'] = scraper_utils.blog_get_quality(video, q_str, hoster['host']) else: hoster['quality'] = scraper_utils.get_quality(video, hoster['host'], quality) hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) if video.video_type == VIDEO_TYPES.MOVIE: pattern = '<singlelink>(.*?)(?=<hr\s*/>|download>|thanks_button_div)' else: pattern = '<hr\s*/>\s*<strong>(.*?)</strong>.*?<singlelink>(.*?)(?=<hr\s*/>|download>|thanks_button_div)' for match in re.finditer(pattern, html, re.DOTALL): if video.video_type == VIDEO_TYPES.MOVIE: links = match.group(1) match = re.search('<h2>\s*<a[^>]+>(.*?)</a>', html) if match: title = match.group(1) else: title = '' else: title, links = match.groups() for match in re.finditer('href="([^"]+)', links): stream_url = match.group(1).lower() if any(link in stream_url for link in EXCLUDE_LINKS): continue host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.blog_get_quality(video, title, host) hoster = {'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'quality': quality, 'direct': False} hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, require_debrid=True, cache_limit=.5) q_str = '' match = re.search('class="entry-title">([^<]+)', html) if match: q_str = match.group(1) pattern = 'href="?([^" ]+)(?:[^>]+>){2}\s+\|' for match in re.finditer(pattern, html, re.DOTALL): url = match.group(1) if 'adf.ly' in url: continue hoster = { 'multi-part': False, 'class': self, 'views': None, 'url': url, 'rating': None, 'quality': None, 'direct': False } hoster['host'] = urlparse.urlsplit(url).hostname hoster['quality'] = scraper_utils.blog_get_quality( video, q_str, hoster['host']) hosters.append(hoster) return hosters
def __get_links_from_xml(self, xml, video): sources = {} try: root = ET.fromstring(xml) for item in root.findall('.//item'): title = item.find('title').text for source in item.findall('{http://rss.jwpcdn.com/}source'): stream_url = source.get('file') label = source.get('label') if self._get_direct_hostname(stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif label: quality = scraper_utils.height_get_quality(label) else: quality = scraper_utils.blog_get_quality( video, title, '') sources[stream_url] = {'quality': quality, 'direct': True} log_utils.log( 'Adding stream: %s Quality: %s' % (stream_url, quality), log_utils.LOGDEBUG) except Exception as e: log_utils.log('Exception during 123Movies XML Parse: %s' % (e), log_utils.LOGWARNING) return sources
def get_sources(self, video): sources = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return sources url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, require_debrid=True, cache_limit=8) for div in dom_parser2.parse_dom(html, 'div', {'id': re.compile('stb-container-\d+')}): stream_url = dom_parser2.parse_dom(div.content, 'iframe', req='src') if not stream_url: continue stream_url = stream_url[0].attrs['src'] host = urlparse.urlparse(stream_url).hostname source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': QUALITIES.HIGH, 'views': None, 'rating': None, 'direct': False} sources.append(source) fragment = dom_parser2.parse_dom(html, 'div', {'class': "stb-download-body_box"}) if not fragment: return sources labels = dom_parser2.parse_dom(fragment[0].content, 'a', {'href': '#'}) stream_urls = [result for result in dom_parser2.parse_dom(fragment[0].content, 'a', req='href') if result.content.lower() == 'download now'] for label, stream_url in zip(labels, stream_urls): stream_url = stream_url.attrs['href'] label = re.sub('</?[^>]*>', '', label.content) host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.blog_get_quality(video, label, host) source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': False} sources.append(source) return sources
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) headers = {'User-Agent': LOCAL_UA} html = self._http_get(url, require_debrid=True, headers=headers, cache_limit=.5) for match in re.finditer( "<span\s+class='info2'(.*?)(<span\s+class='info|<hr\s*/>)", html, re.DOTALL): for match2 in re.finditer('href="([^"]+)', match.group(1)): stream_url = match2.group(1) host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.blog_get_quality( video, stream_url, host) hoster = { 'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'quality': quality, 'direct': False } hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: page_url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.25) q_str = '' match = re.search('class="calishow">([^<]+)', html) if match: q_str = match.group(1) else: match = re.search('<a[^>]*href="#embed\d*"[^>]+>([^<]+)', html) if match: q_str = match.group(1) fragment = dom_parser.parse_dom(html, 'div', {'class': 'tab-content'}) if fragment: for source in dom_parser.parse_dom(fragment[0], 'iframe', ret='src'): host = urlparse.urlparse(source).hostname quality = scraper_utils.blog_get_quality(video, q_str, host) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': source, 'direct': False} hosters.append(hoster) fragment = dom_parser.parse_dom(html, 'div', {'id': 'olmt'}) if fragment: hosters += self.__get_links(video, fragment[0]) fragment = dom_parser.parse_dom(html, 'div', {'id': 'dlnmt'}) if fragment: hosters += self.__get_links(video, fragment[0]) hosters = dict((stream['url'], stream) for stream in hosters).values() return hosters
def __get_episode_sources(self, source_url, video): hosters = [] links = self.__find_episode(source_url, video) if links: hash_data = self.__get_hash_data([link[0] for link in links]) for link in links: try: status = hash_data['hashes'][link[0]]['status'] except KeyError: status = '' if status.lower() == 'finished': stream_url = 'hash_id=%s' % (link[0]) host = self._get_direct_hostname(stream_url) quality = scraper_utils.blog_get_quality( video, link[1], '') hoster = { 'multi-part': False, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'host': host, 'quality': quality, 'direct': True } hoster['extra'] = link[1] hosters.append(hoster) return hosters
def __get_comment_links(self, comment, video): sources = {} for match in re.finditer('href="([^"]+)', comment): stream_url = match.group(1) host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.blog_get_quality(video, stream_url, host) sources[stream_url] = quality return sources
def __get_comment_links(self, comment, video): sources = {} for attrs, _content in dom_parser2.parse_dom(comment, 'a', req='href'): stream_url = attrs['href'] host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.blog_get_quality(video, stream_url, host) sources[stream_url] = quality return sources
def __get_links(self, video, fragment): hosters = [] for match in re.finditer('href="([^"]+).*?<td>(.*?)</td>\s*</tr>', fragment, re.DOTALL): stream_url, q_str = match.groups() host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.blog_get_quality(video, q_str, host) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': False} hosters.append(hoster) return hosters
def _blog_get_url(self, video, delim="."): url = None self.create_db_connection() result = self.db_connection.get_related_url( video.video_type, video.title, video.year, self.get_name(), video.season, video.episode ) if result: url = result[0][0] log_utils.log( "Got local related url: |%s|%s|%s|%s|%s|" % (video.video_type, video.title, video.year, self.get_name(), url) ) else: select = int(kodi.get_setting("%s-select" % (self.get_name()))) if video.video_type == VIDEO_TYPES.EPISODE: temp_title = re.sub("[^A-Za-z0-9 ]", "", video.title) if not scraper_utils.force_title(video): search_title = "%s S%02dE%02d" % (temp_title, int(video.season), int(video.episode)) if isinstance(video.ep_airdate, datetime.date): fallback_search = "%s %s" % ( temp_title, video.ep_airdate.strftime("%Y{0}%m{0}%d".format(delim)), ) else: fallback_search = "" else: if not video.ep_title: return None search_title = "%s %s" % (temp_title, video.ep_title) fallback_search = "" else: search_title = "%s %s" % (video.title, video.year) fallback_search = "" results = self.search(video.video_type, search_title, video.year) if not results and fallback_search: results = self.search(video.video_type, fallback_search, video.year) if results: # TODO: First result isn't always the most recent... best_result = results[0] if select != 0: best_qorder = 0 for result in results: match = re.search("\[(.*)\]$", result["title"]) if match: q_str = match.group(1) quality = scraper_utils.blog_get_quality(video, q_str, "") # print 'result: |%s|%s|%s|%s|' % (result, q_str, quality, Q_ORDER[quality]) if Q_ORDER[quality] > best_qorder: # print 'Setting best as: |%s|%s|%s|%s|' % (result, q_str, quality, Q_ORDER[quality]) best_result = result best_qorder = Q_ORDER[quality] url = best_result["url"] self.db_connection.set_related_url(video.video_type, video.title, video.year, self.get_name(), url) return url
def __get_links(self, video, views, html, q_str): pattern = 'li>\s*<a\s+href="(http[^"]+)' hosters = [] for match in re.finditer(pattern, html, re.DOTALL): url = match.group(1) hoster = {'multi-part': False, 'class': self, 'views': views, 'url': url, 'rating': None, 'quality': None, 'direct': False} hoster['host'] = urlparse.urlsplit(url).hostname hoster['quality'] = scraper_utils.blog_get_quality(video, q_str, hoster['host']) hosters.append(hoster) return hosters
def _blog_get_url(self, video, delim='.'): url = None result = self.db_connection().get_related_url(video.video_type, video.title, video.year, self.get_name(), video.season, video.episode) if result: url = result[0][0] logger.log('Got local related url: |%s|%s|%s|%s|%s|' % (video.video_type, video.title, video.year, self.get_name(), url), log_utils.LOGDEBUG) else: try: select = int(kodi.get_setting('%s-select' % (self.get_name()))) except: select = 0 if video.video_type == VIDEO_TYPES.EPISODE: temp_title = re.sub('[^A-Za-z0-9 ]', '', video.title) if not scraper_utils.force_title(video): search_title = '%s S%02dE%02d' % (temp_title, int(video.season), int(video.episode)) if isinstance(video.ep_airdate, datetime.date): fallback_search = '%s %s' % (temp_title, video.ep_airdate.strftime('%Y{0}%m{0}%d'.format(delim))) else: fallback_search = '' else: if not video.ep_title: return None search_title = '%s %s' % (temp_title, video.ep_title) fallback_search = '' else: search_title = video.title fallback_search = '' results = self.search(video.video_type, search_title, video.year) if not results and fallback_search: results = self.search(video.video_type, fallback_search, video.year) if results: # TODO: First result isn't always the most recent... best_result = results[0] if select != 0: best_qorder = 0 for result in results: if 'quality' in result: quality = result['quality'] else: match = re.search('\((\d+p)\)', result['title']) if match: quality = scraper_utils.height_get_quality(match.group(1)) else: match = re.search('\[(.*)\]$', result['title']) q_str = match.group(1) if match else '' quality = scraper_utils.blog_get_quality(video, q_str, '') logger.log('result: |%s|%s|%s|' % (result, quality, Q_ORDER[quality]), log_utils.LOGDEBUG) if Q_ORDER[quality] > best_qorder: logger.log('Setting best as: |%s|%s|%s|' % (result, quality, Q_ORDER[quality]), log_utils.LOGDEBUG) best_result = result best_qorder = Q_ORDER[quality] url = best_result['url'] self.db_connection().set_related_url(video.video_type, video.title, video.year, self.get_name(), url, video.season, video.episode) return url
def __get_post_links(self, html, video): sources = {} post = dom_parser.parse_dom(html, 'div', {'class': 'postContent'}) if post: for match in re.finditer('<p\s+style="text-align:\s*center;">(?:\s*<strong>)*([^<]+)(.*?)</p>', post[0], re.DOTALL): release, links = match.groups() for match2 in re.finditer('href="([^"]+)">([^<]+)', links): stream_url, hostname = match2.groups() if hostname.upper() in ['TORRENT SEARCH', 'VIP FILE']: continue host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.blog_get_quality(video, release, host) sources[stream_url] = quality return sources
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) for match in re.finditer("<span\s+class='info2'(.*?)(<span\s+class='info|<hr\s*/>)", html, re.DOTALL): for match2 in re.finditer('href="([^"]+)', match.group(1)): stream_url = match2.group(1) host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.blog_get_quality(video, stream_url, host) hoster = {'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'quality': quality, 'direct': False} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) q_str = '' match = re.search('>quality(.*?)<br\s*/>', html, re.I) if match: q_str = match.group(1) q_str = q_str.decode('utf-8').encode('ascii', 'ignore') q_str = re.sub('(</?strong[^>]*>|:|\s)', '', q_str, re.I | re.U) fragment = dom_parser.parse_dom(html, 'div', {'class': 'video-embed'}) if fragment: for match in re.finditer('<iframe[^>]+src="([^"]+)', fragment[0], re.I): stream_url = match.group(1) host = urlparse.urlparse(stream_url).hostname hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': scraper_utils.blog_get_quality(video, q_str, host), 'views': None, 'rating': None, 'url': stream_url, 'direct': False } match = re.search( 'class="views-infos">(\d+).*?class="rating">(\d+)%', html, re.DOTALL) if match: hoster['views'] = int(match.group(1)) hoster['rating'] = match.group(2) hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, require_debrid=True, cache_limit=.5) sources = self.__get_post_links(html) for source in sources: release = sources[source]['release'] host = urlparse.urlparse(source).hostname quality = scraper_utils.blog_get_quality(video, release, host) hoster = {'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': source, 'rating': None, 'quality': quality, 'direct': False} if 'X265' in release or 'HEVC' in release: hoster['format'] = 'x265' hosters.append(hoster) return hosters
def __get_episode_sources(self, source_url, video): hosters = [] links = self.__find_episode(source_url, video) if links: hash_data = self.__get_hash_data([link[0] for link in links]) for link in links: try: status = hash_data['hashes'][link[0]]['status'] except KeyError: status = '' if status.lower() == 'finished': stream_url = 'hash_id=%s' % (link[0]) host = self._get_direct_hostname(stream_url) quality = scraper_utils.blog_get_quality(video, link[1], '') hoster = {'multi-part': False, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'host': host, 'quality': quality, 'direct': True} hoster['extra'] = link[1] hosters.append(hoster) return hosters
def _blog_get_url(self, video, delim='.'): url = None self.create_db_connection() result = self.db_connection.get_related_url(video.video_type, video.title, video.year, self.get_name(), video.season, video.episode) if result: url = result[0][0] log_utils.log('Got local related url: |%s|%s|%s|%s|%s|' % (video.video_type, video.title, video.year, self.get_name(), url)) else: select = int(kodi.get_setting('%s-select' % (self.get_name()))) if video.video_type == VIDEO_TYPES.EPISODE: temp_title = re.sub('[^A-Za-z0-9 ]', '', video.title) if not scraper_utils.force_title(video): search_title = '%s S%02dE%02d' % (temp_title, int(video.season), int(video.episode)) if isinstance(video.ep_airdate, datetime.date): fallback_search = '%s %s' % (temp_title, video.ep_airdate.strftime('%Y{0}%m{0}%d'.format(delim))) else: fallback_search = '' else: if not video.ep_title: return None search_title = '%s %s' % (temp_title, video.ep_title) fallback_search = '' else: search_title = '%s %s' % (video.title, video.year) fallback_search = '' results = self.search(video.video_type, search_title, video.year) if not results and fallback_search: results = self.search(video.video_type, fallback_search, video.year) if results: # TODO: First result isn't always the most recent... best_result = results[0] if select != 0: best_qorder = 0 for result in results: match = re.search('\[(.*)\]$', result['title']) if match: q_str = match.group(1) quality = scraper_utils.blog_get_quality(video, q_str, '') log_utils.log('result: |%s|%s|%s|%s|' % (result, q_str, quality, Q_ORDER[quality]), log_utils.LOGDEBUG) if Q_ORDER[quality] > best_qorder: log_utils.log('Setting best as: |%s|%s|%s|%s|' % (result, q_str, quality, Q_ORDER[quality]), log_utils.LOGDEBUG) best_result = result best_qorder = Q_ORDER[quality] url = best_result['url'] self.db_connection.set_related_url(video.video_type, video.title, video.year, self.get_name(), url) return url
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: page_url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.25) q_str = '' match = re.search('class="calishow">([^<]+)', html) if match: q_str = match.group(1) else: match = re.search('<a[^>]*href="#embed\d*"[^>]+>([^<]+)', html) if match: q_str = match.group(1) fragment = dom_parser.parse_dom(html, 'div', {'class': 'tab-content'}) if fragment: for source in dom_parser.parse_dom(fragment[0], 'iframe', ret='src'): host = urlparse.urlparse(source).hostname quality = scraper_utils.blog_get_quality( video, q_str, host) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': source, 'direct': False } hosters.append(hoster) fragment = dom_parser.parse_dom(html, 'div', {'id': 'olmt'}) if fragment: hosters += self.__get_links(video, fragment[0]) fragment = dom_parser.parse_dom(html, 'div', {'id': 'dlnmt'}) if fragment: hosters += self.__get_links(video, fragment[0]) hosters = dict( (stream['url'], stream) for stream in hosters).values() return hosters
def __get_post_links(self, html, video): sources = {} post = dom_parser.parse_dom(html, 'div', {'class': 'postContent'}) if post: for match in re.finditer( '<p\s+style="text-align:\s*center;">(?:\s*<strong>)*([^<]+)(.*?)</p>', post[0], re.DOTALL): release, links = match.groups() for match2 in re.finditer('href="([^"]+)">([^<]+)', links): stream_url, hostname = match2.groups() if hostname.upper() in ['TORRENT SEARCH', 'VIP FILE']: continue host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.blog_get_quality( video, release, host) sources[stream_url] = quality return sources
def __get_links(self, video, views, html, q_str): pattern = 'li>\s*<a\s+href="(http[^"]+)' hosters = [] for match in re.finditer(pattern, html, re.DOTALL): url = match.group(1) hoster = { "multi-part": False, "class": self, "views": views, "url": url, "rating": None, "quality": None, "direct": False, } hoster["host"] = urlparse.urlsplit(url).hostname hoster["quality"] = scraper_utils.blog_get_quality(video, q_str, hoster["host"]) hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) sources = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, require_debrid=True, cache_limit=8) fragment = dom_parser.parse_dom(html, 'div', {'class': "[^']*stb-download-body_box[^']*"}) if fragment: pattern = '<a[^>]*style="[^"]*background-color: #33809e[^>]*>(?:<b>)?([^<]+)(.*?)(?=<a[^>]*class="fasc-button|$)' for match in re.finditer(pattern, fragment[0], re.DOTALL): q_str, links = match.groups() for stream_url in dom_parser.parse_dom(links, 'a', ret='href'): host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.blog_get_quality(video, q_str, host) source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': False} sources.append(source) return sources
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) q_str = 'HDRIP' match = re.search('<p\s+rel="tag">Quality:\s*(.*?)</p>', html, re.I) if match: q_str = match.group(1) stream_url = self.__decode(html) if stream_url: host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.blog_get_quality(video, q_str, host) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': False} hosters.append(hoster) return hosters
def __get_links(self, video, fragment): hosters = [] for match in re.finditer('href="([^"]+).*?<td>(.*?)</td>\s*</tr>', fragment, re.DOTALL): stream_url, q_str = match.groups() host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.blog_get_quality(video, q_str, host) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': False } hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'video-embed'}) if not fragment: return hosters iframe_url = dom_parser2.parse_dom(fragment[0].content, 'iframe', req='src') if not iframe_url: return hosters stream_url = iframe_url[0].attrs['src'] host = urlparse.urlparse(stream_url).hostname q_str = 'HDRIP' match = re.search('>Quality(.*?)<br\s*/>', html, re.I) if match: q_str = match.group(1) q_str = q_str.decode('utf-8').encode('ascii', 'ignore') q_str = re.sub('(</?strong[^>]*>|:|\s)', '', q_str, re.I | re.U) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': scraper_utils.blog_get_quality(video, q_str, host), 'views': None, 'rating': None, 'url': stream_url, 'direct': False } match = re.search('class="views-infos">(\d+).*?class="rating">(\d+)%', html, re.DOTALL) if match: hoster['views'] = int(match.group(1)) hoster['rating'] = match.group(2) hosters.append(hoster) return hosters
def __get_links_from_xml(self, xml, video): sources = {} try: root = ET.fromstring(xml) for item in root.findall('.//item'): title = item.find('title').text for source in item.findall('{http://rss.jwpcdn.com/}source'): stream_url = source.get('file') label = source.get('label') if self._get_direct_hostname(stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif label: quality = scraper_utils.height_get_quality(label) else: quality = scraper_utils.blog_get_quality(video, title, '') sources[stream_url] = {'quality': quality, 'direct': True} log_utils.log('Adding stream: %s Quality: %s' % (stream_url, quality), log_utils.LOGDEBUG) except Exception as e: log_utils.log('Exception during 123Movies XML Parse: %s' % (e), log_utils.LOGWARNING) return sources
def __get_post_links(self, html, video): sources = {} post = dom_parser.parse_dom(html, 'div', {'class': 'postContent'}) if post: results = re.findall('<p\s+style="text-align:\s*center;">(?:\s*<strong>)*(.*?)<br(.*?)</p>', post[0], re.DOTALL) if not results: match = re.search('>Release Name\s*:(.*?)<br', post[0], re.I) release = match.group(1) if match else '' match = re.search('>Download\s*:(.*?)</p>', post[0], re.DOTALL | re.I) links = match.group(1) if match else '' results = [(release, links)] for result in results: release, links = result release = re.sub('</?[^>]*>', '', release) for match in re.finditer('href="([^"]+)">([^<]+)', links): stream_url, hostname = match.groups() if hostname.upper() in ['TORRENT SEARCH', 'VIP FILE']: continue host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.blog_get_quality(video, release, host) sources[stream_url] = quality return sources
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) q_str = '' match = re.search('<b>\s*Quality:\s*</b>([^<]+)', html) if match: q_str = match.group(1) q_str = ' %s ' % (q_str) for fragment in dom_parser.parse_dom(html, 'div', {'class': 'video-embed'}): for match in re.finditer('<iframe[^>]*src="([^"]+)', fragment, re.I): stream_url = match.group(1) host = urlparse.urlparse(stream_url).hostname hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': scraper_utils.blog_get_quality(video, q_str, host), 'views': None, 'rating': None, 'url': stream_url, 'direct': False } hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, require_debrid=True, cache_limit=.5) q_str = '' match = re.search('class="entry-title">([^<]+)', html) if match: q_str = match.group(1) pattern = 'href="?([^" ]+)(?:[^>]+>){2}\s+\|' for match in re.finditer(pattern, html, re.DOTALL): url = match.group(1) if 'adf.ly' in url: continue hoster = {'multi-part': False, 'class': self, 'views': None, 'url': url, 'rating': None, 'quality': None, 'direct': False} hoster['host'] = urlparse.urlsplit(url).hostname hoster['quality'] = scraper_utils.blog_get_quality(video, q_str, hoster['host']) hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) for item in dom_parser.parse_dom(html, 'li'): label = dom_parser.parse_dom(item, 'span', {'class': 'type'}) value = dom_parser.parse_dom(item, 'p', {'class': 'text'}) if label and value and 'quality' in label[0].lower(): q_str = value[0] break else: q_str = '' fragment = dom_parser.parse_dom(html, 'div', {'id': 'fstory-video'}) if fragment: for match in re.finditer('<iframe[^>]*src="([^"]+)', fragment[0], re.I): stream_url = match.group(1) host = urlparse.urlparse(stream_url).hostname hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': scraper_utils.blog_get_quality(video, q_str, host), 'views': None, 'rating': None, 'url': stream_url, 'direct': False} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) fragment = dom_parser.parse_dom(html, 'div', {'id': 'fstory-video'}) if fragment: q_str = '' match = re.search('<span>([^<]+)', fragment[0]) if match: q_str = match.group(1) for match in re.finditer('<iframe[^>]*src="([^"]+)', fragment[0], re.I): stream_url = match.group(1) host = urlparse.urlparse(stream_url).hostname hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': scraper_utils.blog_get_quality(video, q_str, host), 'views': None, 'rating': None, 'url': stream_url, 'direct': False} hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) match = re.search('class="video-embed".*?src="([^"]+)', html, re.DOTALL) if match: stream_url = match.group(1) host = urlparse.urlparse(stream_url).hostname q_str = 'HDRIP' match = re.search('>Quality(.*?)<br\s*/>', html, re.I) if match: q_str = match.group(1) q_str = q_str.decode('utf-8').encode('ascii', 'ignore') q_str = re.sub('(</?strong[^>]*>|:|\s)', '', q_str, re.I | re.U) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': scraper_utils.blog_get_quality(video, q_str, host), 'views': None, 'rating': None, 'url': stream_url, 'direct': False} match = re.search('class="views-infos">(\d+).*?class="rating">(\d+)%', html, re.DOTALL) if match: hoster['views'] = int(match.group(1)) hoster['rating'] = match.group(2) hosters.append(hoster) return hosters