def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) for match in re.finditer( '<a[^>]+href="([^"]+)[^>]+>(Version \d+)<', html): url, version = match.groups() host = urlparse.urlsplit(url).hostname.replace('embed.', '') hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': scraper_utils.get_quality(video, host, QUALITIES.HIGH), 'views': None, 'rating': None, 'url': url, 'direct': False } hoster['version'] = version hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) for match in re.finditer( '<td>\s*<a\s+href="([^"]+)(?:[^>]+>){2}\s*(?: )*\s*([^<]+)', html): stream_url, host = match.groups() hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': scraper_utils.get_quality(video, host, QUALITIES.HIGH), 'views': None, 'rating': None, 'url': stream_url, 'direct': False } hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) for _attrs, td in dom_parser2.parse_dom(html, 'td', {'class': 'tdhost'}): match = dom_parser2.parse_dom(td, 'a', req='href') if match: stream_url = match[0].attrs['href'] host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH) hoster = { 'multi-part': False, 'host': host, 'class': self, 'url': stream_url, 'quality': quality, 'views': None, 'rating': None, 'direct': False } hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) pattern = 'href="[^"]+gtfo=([^&"]+)[^>]+>([^<]+)' for match in re.finditer(pattern, html, re.DOTALL | re.I): url, link_name = match.groups() url = url.decode('base-64') host = urlparse.urlsplit(url).hostname match = re.search('Part\s+(\d+)', link_name) if match: if match.group(1) == '2': del hosters[-1] # remove Part 1 previous link added continue source = { 'multi-part': False, 'url': url, 'host': host, 'class': self, 'quality': scraper_utils.get_quality(video, host, QUALITIES.HIGH), 'views': None, 'rating': None, 'direct': False } hosters.append(source) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) pattern = '<tr\s+id="link_(.*?)</tr>' for match in re.finditer(pattern, html, re.DOTALL): link = match.group(1) link_pattern = 'href="([^"]+)">\s*([^<]+).*?class="text">\s*([^<%]+).*?class="qualityCell[^>]*>\s*([^<]+)' link_match = re.search(link_pattern, link, re.DOTALL) if link_match: url, host, rating, quality = link_match.groups() host = host.strip() quality = quality.upper().strip() if rating == 'n/a': rating = None url = url.replace('/show/', '/play/') quality = QUALITY_MAP.get(quality, QUALITIES.MEDIUM) hoster = { 'multi-part': False, 'url': url, 'host': host, 'class': self, 'quality': scraper_utils.get_quality(video, host, quality), 'views': None, 'rating': rating, 'direct': False } hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) container_pattern = r'<table[^>]+class="movie_version[ "][^>]*>(.*?)</table>' item_pattern = ( r'quality_(?!sponsored|unknown|play)([^>]*)></span>.*?' r'url=([^&]+)&(?:amp;)?domain=([^&]+)&(?:amp;)?(.*?)' r'"version_veiws"> ([\d]+) views</') max_index = 0 max_views = -1 for container in re.finditer(container_pattern, html, re.DOTALL | re.IGNORECASE): for i, source in enumerate( re.finditer(item_pattern, container.group(1), re.DOTALL)): qual, url, host, parts, views = source.groups() if host == 'ZnJhbWVndGZv': continue # filter out promo hosts item = { 'host': host.decode('base-64'), 'url': url.decode('base-64'), 'class': self, 'direct': False } item['verified'] = source.group(0).find('star.gif') > -1 item['quality'] = scraper_utils.get_quality( video, item['host'], QUALITY_MAP.get(qual.upper())) item['views'] = int(views) if item['views'] > max_views: max_index = i max_views = item['views'] if max_views > 0: item['rating'] = item['views'] * 100 / max_views else: item['rating'] = None pattern = r'<a href=".*?url=(.*?)&(?:amp;)?.*?".*?>(part \d*)</a>' other_parts = re.findall(pattern, parts, re.DOTALL | re.I) if other_parts: item['multi-part'] = True item['parts'] = [ part[0].decode('base-64') for part in other_parts ] else: item['multi-part'] = False hosters.append(item) if max_views > 0: for i in xrange(0, max_index): hosters[i][ 'rating'] = hosters[i]['views'] * 100 / max_views return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) best_quality = QUALITIES.HIGH fragment = dom_parser2.parse_dom(html, 'div', {'class': 'entry'}) if fragment: for match in re.finditer( 'href="[^"]*/movies-quality/[^"]*[^>]*>([^<]+)', fragment[0].content, re.I): quality = Q_MAP.get(match.group(1).upper(), QUALITIES.HIGH) if Q_ORDER[quality] > Q_ORDER[best_quality]: best_quality = quality sources = [] for attrs, _content in dom_parser2.parse_dom(html, 'a', req='data-vid'): try: vid_url = dom_parser2.parse_dom(scraper_utils.cleanse_title( attrs['data-vid']), 'iframe', req='src') sources.append(vid_url[0]) except: pass fragment = dom_parser2.parse_dom(html, 'table', {'class': 'additional-links'}) if fragment: sources += dom_parser2.parse_dom(fragment[0].content, 'a', req='href') for stream_url in sources: stream_url = stream_url.attrs.get('href') or stream_url.attrs.get( 'src') host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.get_quality(video, host, best_quality) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': False } hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=10) hosts = [ r.content for r in dom_parser2.parse_dom( html, 'p', {'class': 'server_servername'}) ] links = [ r.content for r in dom_parser2.parse_dom(html, 'p', {'class': 'server_play'}) ] for host, link_frag in zip(hosts, links): stream_url = dom_parser2.parse_dom(link_frag, 'a', req='href') if not stream_url: continue stream_url = stream_url[0].attrs['href'] host = re.sub('^Server\s*', '', host, re.I) host = re.sub('\s*Link\s+\d+', '', host) if host.lower() == 'google': sources = self.__get_gvideo_links(stream_url) else: sources = [{'host': host, 'link': stream_url}] for source in sources: host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) stream_url = source['link'] + scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) direct = True else: stream_url = scraper_utils.pathify_url(source['link']) host = HOST_SUB.get(source['host'].lower(), source['host']) quality = scraper_utils.get_quality( video, host, QUALITIES.HIGH) direct = False hoster = { 'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct } hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) page_quality = QUALITIES.HD720 if video.video_type == VIDEO_TYPES.MOVIE else QUALITIES.HIGH for _attrs, fragment in dom_parser2.parse_dom( html, 'div', {'class': 'embed-responsive'}): iframe_url = dom_parser2.parse_dom(fragment, 'iframe', req='data-src') if iframe_url: iframe_url = iframe_url[0].attrs['data-src'] iframe_host = urlparse.urlparse(iframe_url).hostname if iframe_host in DIRECT_HOSTS: sources = self.__parse_streams(iframe_url, url) else: sources = { iframe_url: { 'quality': scraper_utils.get_quality(video, iframe_host, page_quality), 'direct': False } } for source in sources: quality = sources[source]['quality'] direct = sources[source]['direct'] if direct: host = scraper_utils.get_direct_hostname(self, source) stream_url = source + scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) else: host = urlparse.urlparse(source).hostname stream_url = source hoster = { 'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct } hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) sources = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) quality = None for key in QUALITY_ICONS: if key in html: quality = QUALITY_ICONS[key] break if quality is None: match = re.search( '(?:qaulity|quality):\s*<span[^>]*>(.*?)</span>', html, re.DOTALL | re.I) if match: quality = QUALITY_MAP.get(match.group(1).upper()) pattern = '''href="[^"]+url=([^&]+)&domain=([^"&]+).*?fa-thumbs-o-up">\s*([^<]+).*?vote_bad_embedid_\d+'>([^<]+)''' for match in re.finditer(pattern, html, re.I | re.DOTALL): url, host, up, down = match.groups() up = ''.join([c for c in up if c in string.digits]) down = ''.join([c for c in down if c in string.digits]) url = url.decode('base-64') host = host.decode('base-64') # skip ad match if host.upper() == 'HDSTREAM': continue up = int(up) down = int(down) source = { 'multi-part': False, 'url': url, 'host': host, 'class': self, 'quality': scraper_utils.get_quality(video, host, quality), 'up': up, 'down': down, 'direct': False } rating = up * 100 / (up + down) if (up > 0 or down > 0) else None source['rating'] = rating source['views'] = up + down sources.append(source) return sources
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, require_debrid=True, cache_limit=.5) fragment = dom_parser2.parse_dom(html, 'table', {'class': 'links-table'}) if not fragment: return hosters for _attrs, row in dom_parser2.parse_dom(fragment[0].content, 'tr'): match = re.search( "playVideo\.bind\(.*?'([^']+)(?:[^>]*>){2}(.*?)</td>", row, re.DOTALL) if not match: continue stream_url, release = match.groups() if scraper_utils.get_direct_hostname(self, stream_url) == 'gvideo': sources = scraper_utils.parse_google(self, stream_url) else: sources = [stream_url] for source in sources: host = scraper_utils.get_direct_hostname(self, source) if host == 'gvideo': quality = scraper_utils.gv_get_quality(source) direct = True else: host = urlparse.urlparse(source).hostname if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(release) else: meta = scraper_utils.parse_episode_link(release) base_quality = scraper_utils.height_get_quality( meta['height']) quality = scraper_utils.get_quality( video, host, base_quality) direct = False hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': source, 'direct': direct } hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) page_quality = dom_parser2.parse_dom(html, 'dd', {'class': 'status'}) if page_quality: page_quality = QUALITY_MAP.get(page_quality[0].content, QUALITIES.HIGH) else: page_quality = QUALITIES.HIGH if video.video_type == VIDEO_TYPES.EPISODE: fragment = dom_parser2.parse_dom(html, 'div', {'id': 'servers-list'}) gk_html = fragment[0].content if fragment else '' else: gk_html = html link_url = scraper_utils.urljoin(self.base_url, LINK_URL) player_url = scraper_utils.urljoin(self.base_url, PLAYER_URL) for stream_url, quality in scraper_utils.get_gk_links( self, gk_html, page_url, page_quality, link_url, player_url).iteritems(): host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': direct = True quality = quality else: host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.get_quality(video, host, quality) direct = False if host is not None: stream_url += scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct } hosters.append(hoster) return hosters
def __get_post_links(self, html, video): sources = {} post = dom_parser.parse_dom(html, 'article', {'id': 'post-\d+'}) if post: for fragment in dom_parser.parse_dom(post[0], 'h2'): for match in re.finditer('href="([^"]+)', fragment): stream_url = match.group(1) meta = scraper_utils.parse_episode_link(stream_url) release_quality = scraper_utils.height_get_quality( meta['height']) host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.get_quality( video, host, release_quality) sources[stream_url] = quality return sources
def __add_sources(self, sources, video, quality=QUALITIES.HIGH): hosters = [] for source in sources: if self._get_direct_hostname(source) == 'gvideo': host = self._get_direct_hostname(source) quality = scraper_utils.gv_get_quality(source) stream_url = source + '|User-Agent=%s' % (scraper_utils.get_ua()) direct = True else: host = urlparse.urlparse(source).hostname quality = scraper_utils.get_quality(video, host, quality) stream_url = source direct = False hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct} hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.25) for _attrs, link in dom_parser2.parse_dom(html, 'div', {'class': 'ldr-item'}): stream_url = dom_parser2.parse_dom(link, 'a', req='data-actuallink') try: watched = dom_parser2.parse_dom(link, 'div', {'class': 'click-count'}) match = re.search(' (\d+) ', watched[0].content) views = match.group(1) except: views = None try: score = dom_parser2.parse_dom(link, 'div', {'class': 'point'}) score = int(score[0].content) rating = score * 10 if score else None except: rating = None if stream_url: stream_url = stream_url[0].attrs['data-actuallink'].strip() host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': views, 'rating': rating, 'url': stream_url, 'direct': False } hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=8) movie_id = dom_parser2.parse_dom(html, 'input', {'id': 'movie_id'}, req='value') if not movie_id: return hosters data = { 'movie': movie_id[0].attrs['value'], 'starttime': 'undefined', 'pageevent': 0, 'aspectration': '' } xhr_url = scraper_utils.urljoin(self.base_url, '/movies/play_online') headers = {'Referer': page_url} headers.update(XHR) html = self._http_get(xhr_url, data=data, headers=headers, cache_limit=.5) best_quality, _sources = self.__get_direct(html, page_url) for attrs, _content in dom_parser2.parse_dom(html, 'iframe', req='src'): stream_url = attrs['src'] host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.get_quality(video, host, best_quality) hoster = { 'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': False } hosters.append(hoster) return hosters
def __get_links(self, url, video): hosters = [] seen_urls = set() for search_type in SEARCH_TYPES: search_url, params = self.__translate_search(url, search_type) if not search_url: continue html = self._http_get(search_url, params=params, cache_limit=.5) js_result = scraper_utils.parse_json(html, search_url) if js_result.get('status') != 'sucsess': continue for result in js_result['post']: stream_url = result['hosterurls'][0]['url'] if len(result['hosterurls']) > 1: continue if result['extension'] == 'rar': continue if stream_url in seen_urls: continue if scraper_utils.release_check(video, result['title']): host = urlparse.urlsplit(stream_url).hostname quality = scraper_utils.get_quality( video, host, self._get_title_quality(result['title'])) hoster = { 'multi-part': False, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'host': host, 'quality': quality, 'direct': False } hoster['extra'] = scraper_utils.cleanse_title( result['title']) if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(hoster['extra']) else: meta = scraper_utils.parse_episode_link( hoster['extra']) if 'format' in meta: hoster['format'] = meta['format'] hosters.append(hoster) seen_urls.add(stream_url) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) entry = '' while True: html = self._http_get(url, cache_limit=.5) if not html: url = scraper_utils.urljoin(BASE_URL2, source_url) html = self._http_get(url, cache_limit=.5) entry = dom_parser2.parse_dom(html, 'div', {'class': 'entry'}) if entry: entry = entry[0].content match = re.search('Watch it here\s*:.*?href="([^"]+)', entry, re.I) if not match: break url = match.group(1) else: entry = '' break for _attribs, tab in dom_parser2.parse_dom(entry, 'div', {'class': 'postTabs_divs'}): match = dom_parser2.parse_dom(tab, 'iframe', req='src') if not match: continue link = match[0].attrs['src'] host = urlparse.urlparse(link).hostname hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': scraper_utils.get_quality(video, host, QUALITIES.HIGH), 'views': None, 'rating': None, 'url': link, 'direct': False } hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) headers = {'Referer': self.base_url} html = self._http_get(url, headers=headers, cache_limit=.5) for _attrs, fragment in dom_parser2.parse_dom(html, 'div', {'class': 'lang'}): section_label = dom_parser2.parse_dom(fragment, 'div', {'title': re.compile('Language Flag\s+[^"]*')}) lang, subs = self.__get_section_label(section_label) if lang.lower() == 'english': for attrs, host in dom_parser2.parse_dom(fragment, 'a', {'class': 'p1'}, req='href'): stream_url = attrs['href'] quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH) hoster = {'multi-part': False, 'url': stream_url, 'class': self, 'quality': quality, 'host': host, 'rating': None, 'views': None, 'direct': False} if subs: hoster['subs'] = subs hosters.append(hoster) return hosters
def get_sources(self, video): scraper = cfscrape.create_scraper() source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) q_str = '' quality = None match = re.search('>Category.*?td_col">([^<]+)', html) if match: quality = QUALITY_MAP.get(match.group(1).upper(), None) else: match = re.search('>Release.*?td_col">([^<]+)', html) if match: q_str = match.group(1).upper() pattern = "td_cols.+?href='(.+?)" for match in re.finditer(pattern, html): url = match.group(1) if re.search('\.rar(\.|$)', url): continue hoster = { 'multi-part': False, 'class': self, 'views': None, 'url': url, 'rating': None, 'direct': False } hoster['host'] = urlparse.urlsplit(url).hostname if quality is None: hoster['quality'] = scraper_utils.blog_get_quality( video, q_str, hoster['host']) else: hoster['quality'] = scraper_utils.get_quality( video, hoster['host'], quality) hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] sources = {} if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) sources = self.__get_gk_links(html, url) if not sources: sources = self.__get_gk_links2(html) for source in sources: host = self._get_direct_hostname(source) if host == 'gvideo': direct = True quality = sources[source] stream_url = source + scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) else: direct = False stream_url = source if self.base_url in source: host = sources[source] quality = scraper_utils.get_quality( video, host, QUALITIES.HIGH) else: host = urlparse.urlparse(source).hostname quality = sources[source] hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct } hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) pattern = 'href="([^"]+)">Watch (Link \d+)(.*?)</td>\s*<td[^>]*>(.*?)</td>.*?<td[^>]*id="lv_\d+"[^>]*>([^<]+)' for match in re.finditer(pattern, html, re.DOTALL): stream_url, label, part_str, q_str, views = match.groups() q_str = q_str.strip().upper() parts = re.findall('href="([^"]+)">(Part\s+\d+)<', part_str, re.DOTALL) if parts: multipart = True else: multipart = False host = urlparse.urlparse(stream_url).hostname if host is None: continue quality = scraper_utils.get_quality( video, host, QUALITY_MAP.get(q_str, QUALITIES.HIGH)) hoster = { 'multi-part': multipart, 'host': host, 'class': self, 'quality': quality, 'views': views, 'rating': None, 'url': stream_url, 'direct': False } hoster['extra'] = label hosters.append(hoster) for part in parts: stream_url, part_label = part part_hoster = hoster.copy() part_hoster['part_label'] = part_label part_hoster['url'] = stream_url hosters.append(part_hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) max_age = 0 now = min_age = int(time.time()) for _attrs, row in dom_parser2.parse_dom(html, 'tr', {'id': re.compile('pt\d+')}): stream_url = dom_parser2.parse_dom(row, 'a', {'class': 'spf-link'}, req='href') host = dom_parser2.parse_dom(row, 'img', {'alt': ''}, req='src') data = dom_parser2.parse_dom(row, 'td') if stream_url and host: stream_url = stream_url[0].attrs['href'] match = re.search('.*/(.*?)\.gif', host[0].attrs['src']) host = match.group(1) if match else '' age = dom_parser2.parse_dom(row, 'span', {'class': 'linkdate'}) try: age = age[0].content except: try: age = data[2].content except: age = 0 try: quality = data[3].content except: quality = 'HDTV' age = self.__get_age(now, age) if age > max_age: max_age = age if age < min_age: min_age = age hoster = {'multi-part': False, 'class': self, 'url': scraper_utils.pathify_url(stream_url), 'host': host, 'age': age, 'views': None, 'rating': None, 'direct': False} quality = QUALITY_MAP.get(quality.upper(), QUALITIES.HIGH) hoster['quality'] = scraper_utils.get_quality(video, host, quality) hosters.append(hoster) unit = (max_age - min_age) / 100 if unit > 0: for hoster in hosters: hoster['rating'] = (hoster['age'] - min_age) / unit return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: page_url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) fragment = dom_parser.parse_dom(html, 'div', {'class': 'alternativesc'}) if fragment: for item in dom_parser.parse_dom(fragment[0], 'div', {'class': 'altercolumn'}): link = dom_parser.parse_dom(item, 'a', {'class': 'altercolumnlink'}, ret='href') host = dom_parser.parse_dom(item, 'span') if link and host: link = link[0] if not link.startswith('http'): link = source_url + link host = host[0] quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': link, 'direct': False} hosters.append(hoster) return hosters
def get_sources(self, video): sources = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return sources page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) if video.video_type == VIDEO_TYPES.MOVIE: action = 'getMovieEmb' else: action = 'getEpisodeEmb' match = re.search('elid\s*=\s*"([^"]+)', html) if self.__token is None: self.__get_token() if match and self.__token is not None: elid = urllib.quote(base64.encodestring(str(int(time.time()))).strip()) data = {'action': action, 'idEl': match.group(1), 'token': self.__token, 'elid': elid} ajax_url = scraper_utils.urljoin(self.base_url, EMBED_URL) headers = {'Authorization': 'Bearer %s' % (self.__get_bearer()), 'Referer': page_url} headers.update(XHR) html = self._http_get(ajax_url, data=data, headers=headers, cache_limit=.5) html = html.replace('\\"', '"').replace('\\/', '/') pattern = '<IFRAME\s+SRC="([^"]+)' for match in re.finditer(pattern, html, re.DOTALL | re.I): url = match.group(1) host = scraper_utils.get_direct_hostname(self, url) if host == 'gvideo': direct = True quality = scraper_utils.gv_get_quality(url) else: if 'vk.com' in url and url.endswith('oid='): continue # skip bad vk.com links direct = False host = urlparse.urlparse(url).hostname quality = scraper_utils.get_quality(video, host, QUALITIES.HD720) source = {'multi-part': False, 'url': url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct} sources.append(source) return sources
def get_sources(self, video): source_url = self.get_url(video) hosters = [] sources = {} if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) for _attrs, fragment in dom_parser2.parse_dom(html, 'div', {'class': 'movieplay'}): for attrs, _content in dom_parser2.parse_dom(fragment, 'iframe', req='src') + dom_parser2.parse_dom(fragment, 'iframe', req='data-lazy-src'): iframe_url = attrs.get('src', '') if not iframe_url.startswith('http'): iframe_url = attrs.get('data-lazy-src', '') if not iframe_url.startswith('http'): continue if 'miradetodo' in iframe_url: html = self._http_get(iframe_url, cache_limit=.5) fragment = dom_parser2.parse_dom(html, 'nav', {'class': 'nav'}) if fragment: stream_url = dom_parser2.parse_dom(fragment[0].content, 'a', req='href') if stream_url: html = self._http_get(stream_url[0].attrs['href'], cache_limit=.5) sources.update(self.__get_gk_links(html)) sources.update(self.__get_gk_links2(html)) sources.update(self.__get_amazon_links(html)) sources.update(scraper_utils.parse_sources_list(self, html)) else: host = urlparse.urlparse(iframe_url).hostname source = {'quality': scraper_utils.get_quality(video, host, QUALITIES.HIGH), 'direct': False} sources.update({iframe_url: source}) for source in sources: stream_url = source + '|User-Agent=%s' % (scraper_utils.get_ua()) direct = sources[source]['direct'] quality = sources[source]['quality'] host = scraper_utils.get_direct_hostname(self, source) if direct else urlparse.urlparse(source).hostname hoster = {'multi-part': False, 'url': stream_url, 'class': self, 'quality': quality, 'host': host, 'rating': None, 'views': None, 'direct': direct} hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, require_debrid=True, cache_limit=.5) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'post-cont'}) if not fragment: return hosters match = re.search('<p>\s*<strong>(.*?)<script', fragment[0].content, re.DOTALL) if not match: return hosters for attrs, _content in dom_parser2.parse_dom(match.group(1), 'a', req='href'): stream_url = attrs['href'] if scraper_utils.excluded_link(stream_url): continue if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(stream_url) else: meta = scraper_utils.parse_episode_link(stream_url) host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.get_quality( video, host, scraper_utils.height_get_quality(meta['height'])) hoster = { 'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'quality': quality, 'direct': False } hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) for _attrs, row in dom_parser2.parse_dom(html, 'tr', {'id': 'linktr'}): redirect = dom_parser2.parse_dom(row, 'span', req='id') link = dom_parser2.parse_dom(row, 'a', req='href') if link and link[0].attrs['href'].startswith('http'): stream_url = link[0].attrs['href'] elif redirect: stream_url = redirect[0].attrs['id'] else: stream_url = '' if stream_url.startswith('http'): host = urlparse.urlparse(stream_url).hostname else: host = dom_parser2.parse_dom(row, 'h9') host = host[0].content if host else '' if stream_url and host: quality = scraper_utils.get_quality(video, host, QUALITIES.HIGH) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': False } hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) for _attrs, item in dom_parser2.parse_dom(html, 'li'): label = dom_parser2.parse_dom(item, 'span', {'class': 'type'}) value = dom_parser2.parse_dom(item, 'p', {'class': 'text'}) if label and value and 'quality' in label[0].content.lower(): q_str = value[0].content.upper() break else: q_str = '' fragment = dom_parser2.parse_dom(html, 'div', {'id': 'fstory-video'}) if not fragment: return hosters for match in re.finditer('<iframe[^>]*src="([^"]+)', fragment[0].content, re.I): stream_url = match.group(1) host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.get_quality( video, host, QUALITY_MAP.get(q_str, QUALITIES.HIGH)) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': False } hosters.append(hoster) return hosters
def __get_post_links(self, html, video): sources = {} post = dom_parser2.parse_dom(html, 'div', {'class': 'postContent'}) if post: post = post[0].content for fragment in re.finditer('(<strong>.*?)(?=<strong>|$)', post, re.DOTALL): fragment = fragment.group(1) release = dom_parser2.parse_dom(fragment, 'strong') if release: release = release[0].content meta = scraper_utils.parse_episode_link(release) release_quality = scraper_utils.height_get_quality( meta['height']) for attrs, _content in dom_parser2.parse_dom(fragment, 'a', req='href'): link = attrs['href'] host = urlparse.urlparse(link).hostname quality = scraper_utils.get_quality( video, host, release_quality) sources[link] = quality return sources