def __get_links_from_json2(self, url, page_url, video_type): sources = {} headers = {'Referer': page_url} headers.update(XHR) html = self._http_get(url, headers=headers, cache_limit=0) js_data = scraper_utils.parse_json(html, url) try: playlist = js_data.get('playlist', []) for source in playlist[0].get('sources', []): stream_url = source['file'] if scraper_utils.get_direct_hostname(self, stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'label' in source: quality = scraper_utils.height_get_quality(source['label']) else: if video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(stream_url) else: meta = scraper_utils.parse_episode_link(stream_url) quality = scraper_utils.height_get_quality(meta['height']) sources[stream_url] = {'quality': quality, 'direct': True} logger.log( 'Adding stream: %s Quality: %s' % (stream_url, quality), log_utils.LOGDEBUG) except Exception as e: logger.log('Exception during yesmovies extract: %s' % (e), log_utils.LOGDEBUG) return sources
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'film-container'}) if fragment: iframe_url = dom_parser2.parse_dom(fragment[0].content, 'iframe', req='src') if iframe_url: iframe_url = scraper_utils.urljoin(self.base_url, iframe_url[0].attrs['src']) headers = {'Referer': page_url} html = self._http_get(iframe_url, headers=headers, cache_limit=.5) sources = scraper_utils.parse_sources_list(self, html) for source in sources: quality = sources[source]['quality'] host = scraper_utils.get_direct_hostname(self, source) stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': iframe_url}) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} match = re.search('(\d+[a-z]bps)', source) if match: hoster['extra'] = match.group(1) hosters.append(hoster) hosters.sort(key=lambda x: x.get('extra', ''), reverse=True) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=8) for attrs, _content in dom_parser2.parse_dom(html, 'a', req='href'): stream_url = attrs['href'] if MOVIE_URL in stream_url: meta = scraper_utils.parse_movie_link(stream_url) stream_url = scraper_utils.pathify_url( stream_url) + scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) quality = scraper_utils.height_get_quality(meta['height']) hoster = { 'multi-part': False, 'host': scraper_utils.get_direct_hostname(self, stream_url), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True } if 'format' in meta: hoster['format'] = meta['format'] hosters.append(hoster) return hosters
def __get_cloud_links(self, html, page_url, sub): hosters = [] html = html.replace('\\"', '"').replace('\\/', '/') match = re.search("dizi_kapak_getir\('([^']+)", html) if match: ep_id = match.group(1) for attrs, _content in dom_parser2.parse_dom(html, 'script', {'data-cfasync': 'false'}, req='src'): script_url = attrs['src'] html = self._http_get(script_url, cache_limit=24) match1 = re.search("var\s+kapak_url\s*=\s*'([^']+)", html) match2 = re.search("var\s+aCtkp\s*=\s*'([^']+)", html) if match1 and match2: link_url = '%s?fileid=%s&access_token=%s' % (match1.group(1), ep_id, match2.group(1)) headers = {'Referer': page_url} html = self._http_get(link_url, headers=headers, cache_limit=.5) js_data = scraper_utils.parse_json(html, link_url) for variant in js_data.get('variants', {}): stream_host = random.choice(variant.get('hosts', [])) if stream_host: stream_url = stream_host + variant['path'] + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': page_url}) if not stream_url.startswith('http'): stream_url = 'http://' + stream_url host = scraper_utils.get_direct_hostname(self, stream_url) if 'width' in variant: quality = scraper_utils.width_get_quality(variant['width']) elif 'height' in variant: quality = scraper_utils.height_get_quality(variant['height']) else: quality = QUALITIES.HIGH hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} hoster['subs'] = sub hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) sources = self.__get_posts(html) sources.update(self.__get_ajax(html, url)) sources.update(self.__get_embedded(html, url)) for source in sources: stream_url = source + scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) host = scraper_utils.get_direct_hostname(self, source) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': sources[source], 'views': None, 'rating': None, 'url': stream_url, 'direct': True, 'subs': 'Turkish subtitles' } hosters.append(hoster) return hosters
def __get_episode_sources(self, source_url, video): hosters = [] links = self.__find_episode(source_url, video) if not links: return hosters hash_data = self.__get_hash_data([link[0] for link in links]) for link in links: try: status = hash_data['hashes'][link[0]]['status'] except KeyError: status = '' if status.lower() != 'finished': continue stream_url = 'hash_id=%s' % (link[0]) host = scraper_utils.get_direct_hostname(self, stream_url) quality = scraper_utils.blog_get_quality(video, link[1], '') hoster = { 'multi-part': False, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'host': host, 'quality': quality, 'direct': True } hoster['extra'] = link[1] hosters.append(hoster) return hosters
def __get_king_links(self, iframe_url): hosters = [] match = re.search('v=(.*)', iframe_url) if match: data = {'ID': match.group(1)} headers = {'Referer': iframe_url} headers.update(XHR) xhr_url = iframe_url.split('?')[0] html = self._http_get(xhr_url, params={'p': 'GetVideoSources'}, data=data, headers=headers, cache_limit=.5) js_data = scraper_utils.parse_json(html, xhr_url) try: for source in js_data['VideoSources']: stream_url = source['file'] + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) host = scraper_utils.get_direct_hostname(self, source['file']) label = source.get('label', '') if host == 'gvideo': quality = scraper_utils.gv_get_quality(source['file']) elif re.search('\d+p?', label): quality = scraper_utils.height_get_quality(label) else: quality = QUALITY_MAP.get(label, QUALITIES.HIGH) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True, 'subs': 'Turkish Subtitles'} hosters.append(hoster) except: pass return hosters
def get_sources(self, video): hosters = [] sources = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.25) match = re.search('var\s+view_id\s*=\s*"([^"]+)', html) if not match: return hosters view_id = match.group(1) for lang in ['or', 'tr']: subs = True if lang == 'tr' else False view_data = {'id': view_id, 'tip': 'view', 'dil': lang} html = self._http_get(self.ajax_url, data=view_data, headers=XHR, cache_limit=.25) html = html.strip() html = re.sub(r'\\n|\\t', '', html) match = re.search('var\s+sources\s*=\s*(\[.*?\])', html) if match: raw_data = match.group(1) raw_data = raw_data.replace('\\', '') else: raw_data = html js_data = scraper_utils.parse_json(raw_data, self.ajax_url) if 'data' not in js_data: continue src = dom_parser2.parse_dom(js_data['data'], 'iframe', req='src') if not src: continue html = self._http_get(src[0].attrs['src'], cache_limit=.25) for attrs, _content in dom_parser2.parse_dom(html, 'iframe', req='src'): src = attrs['src'] if not src.startswith('http'): continue sources.append({'label': '720p', 'file': src, 'direct': False, 'subs': subs}) sources += [{'file': url, 'subs': subs} for url in scraper_utils.parse_sources_list(self, html).iterkeys()] if sources: break for source in sources: direct = source.get('direct', True) stream_url = source['file'] + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) if direct: host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'label' in source: quality = scraper_utils.height_get_quality(source['label']) else: continue else: host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.height_get_quality(source['label']) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct} if source.get('subs'): hoster['subs'] = 'Turkish Subtitles' hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] sources = {} if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) for _attrs, div in dom_parser2.parse_dom(html, 'div', {'class': 'tabcontent'}): for attrs, _content in dom_parser2.parse_dom(div, 'source', req='src'): source = attrs['src'] + scraper_utils.append_headers( { 'User-Agent': scraper_utils.get_ua(), 'Referer': page_url }) sources[source] = {'quality': None, 'direct': True} iframe_url = dom_parser2.parse_dom(div, 'iframe', req='src') if iframe_url: iframe_url = iframe_url[0].attrs['src'] if 'songs2dl' in iframe_url: headers = {'Referer': page_url} iframe_html = self._http_get(iframe_url, headers=headers, cache_limit=1) sources.update( scraper_utils.parse_sources_list(self, iframe_html)) else: sources[iframe_url] = {'quality': None, 'direct': False} sources.update(self.__get_mirror_links(html, video)) page_quality = self.__get_best_quality(sources) for source, values in sources.iteritems(): direct = values['direct'] if direct: host = scraper_utils.get_direct_hostname(self, source) else: host = urlparse.urlparse(source).hostname if values['quality'] is None: values['quality'] = page_quality hoster = { 'multi-part': False, 'host': host, 'class': self, 'views': None, 'url': source, 'rating': None, 'quality': values['quality'], 'direct': direct } hosters.append(hoster) return hosters
def __get_embed_links(self, html): hosters = [] sources = scraper_utils.parse_sources_list(self, html) for source in sources: quality = source['quality'] stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) hoster = {'multi-part': False, 'host': scraper_utils.get_direct_hostname(self, source), 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True, 'subs': 'Turkish Subtitles'} hosters.append(hoster) return hosters
def __get_embedded_links(self, html, sub): hosters = [] html = html.replace('\\"', '"').replace('\\/', '/') sources = scraper_utils.parse_sources_list(self, html) for source in sources: host = scraper_utils.get_direct_hostname(self, source) quality = sources[source]['quality'] direct = sources[source]['direct'] hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': source, 'direct': direct} hoster['subs'] = sub hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=8) hosts = [ r.content for r in dom_parser2.parse_dom( html, 'p', {'class': 'server_servername'}) ] links = [ r.content for r in dom_parser2.parse_dom(html, 'p', {'class': 'server_play'}) ] for host, link_frag in zip(hosts, links): stream_url = dom_parser2.parse_dom(link_frag, 'a', req='href') if not stream_url: continue stream_url = stream_url[0].attrs['href'] host = re.sub('^Server\s*', '', host, re.I) host = re.sub('\s*Link\s+\d+', '', host) if host.lower() == 'google': sources = self.__get_gvideo_links(stream_url) else: sources = [{'host': host, 'link': stream_url}] for source in sources: host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) stream_url = source['link'] + scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) direct = True else: stream_url = scraper_utils.pathify_url(source['link']) host = HOST_SUB.get(source['host'].lower(), source['host']) quality = scraper_utils.get_quality( video, host, QUALITIES.HIGH) direct = False hoster = { 'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct } hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) page_quality = QUALITIES.HD720 if video.video_type == VIDEO_TYPES.MOVIE else QUALITIES.HIGH for _attrs, fragment in dom_parser2.parse_dom( html, 'div', {'class': 'embed-responsive'}): iframe_url = dom_parser2.parse_dom(fragment, 'iframe', req='data-src') if iframe_url: iframe_url = iframe_url[0].attrs['data-src'] iframe_host = urlparse.urlparse(iframe_url).hostname if iframe_host in DIRECT_HOSTS: sources = self.__parse_streams(iframe_url, url) else: sources = { iframe_url: { 'quality': scraper_utils.get_quality(video, iframe_host, page_quality), 'direct': False } } for source in sources: quality = sources[source]['quality'] direct = sources[source]['direct'] if direct: host = scraper_utils.get_direct_hostname(self, source) stream_url = source + scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) else: host = urlparse.urlparse(source).hostname stream_url = source hoster = { 'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': direct } hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) page_quality = dom_parser2.parse_dom(html, 'dd', {'class': 'status'}) if page_quality: page_quality = QUALITY_MAP.get(page_quality[0].content, QUALITIES.HIGH) else: page_quality = QUALITIES.HIGH if video.video_type == VIDEO_TYPES.EPISODE: fragment = dom_parser2.parse_dom(html, 'div', {'id': 'servers-list'}) gk_html = fragment[0].content if fragment else '' else: gk_html = html link_url = scraper_utils.urljoin(self.base_url, LINK_URL) player_url = scraper_utils.urljoin(self.base_url, PLAYER_URL) for stream_url, quality in scraper_utils.get_gk_links( self, gk_html, page_url, page_quality, link_url, player_url).iteritems(): host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': direct = True quality = quality else: host = urlparse.urlparse(stream_url).hostname quality = scraper_utils.get_quality(video, host, quality) direct = False if host is not None: stream_url += scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct } hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters for stream in self.__get_videos(source_url, video): if video.video_type == VIDEO_TYPES.EPISODE and not scraper_utils.release_check(video, stream['name']): continue host = scraper_utils.get_direct_hostname(self, stream['url']) hoster = {'multi-part': False, 'class': self, 'views': None, 'url': stream['url'], 'rating': None, 'host': host, 'quality': stream['quality'], 'direct': True} if 'size' in stream: hoster['size'] = scraper_utils.format_size(stream['size']) if 'name' in stream: hoster['extra'] = stream['name'] hosters.append(hoster) return hosters
def __get_movie_sources(self, source_url): hosters = [] query = kodi.parse_query(urlparse.urlparse(source_url).query) movie_id = query.get('movie_id') or self.__get_movie_id(source_url) if not movie_id: return hosters details_url = scraper_utils.urljoin(self.movie_base_url, MOVIE_DETAILS_URL) detail_data = self._json_get(details_url, params={'movie_id': movie_id}, cache_limit=24) try: torrents = detail_data['data']['movie']['torrents'] except KeyError: torrents = [] try: hashes = [torrent['hash'].lower() for torrent in torrents] except KeyError: hashes = [] hash_data = self.__get_hash_data(hashes) for torrent in torrents: hash_id = torrent['hash'].lower() try: status = hash_data['hashes'][hash_id]['status'] except KeyError: status = '' if status.lower() != 'finished': continue stream_url = 'hash_id=%s' % (hash_id) host = scraper_utils.get_direct_hostname(self, stream_url) quality = QUALITY_MAP.get(torrent['quality'], QUALITIES.HD720) hoster = { 'multi-part': False, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'host': host, 'quality': quality, 'direct': True } if 'size_bytes' in torrent: hoster['size'] = scraper_utils.format_size( torrent['size_bytes'], 'B') if torrent['quality'] == '3D': hoster['3D'] = True hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) if video.video_type == VIDEO_TYPES.EPISODE: gk_html = ''.join( match.group(0) for match in re.finditer( '<a[^>]*>(%s|Server \d+)</a>' % (video.episode), html, re.I)) else: gk_html = html link_url = scraper_utils.urljoin(self.base_url, LINK_URL) player_url = scraper_utils.urljoin(self.base_url, PLAYER_URL) sources = scraper_utils.get_gk_links(self, gk_html, page_url, QUALITIES.HIGH, link_url, player_url) sources.update(self.__get_ht_links(html, page_url)) for stream_url, quality in sources.iteritems(): host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': direct = True else: host = urlparse.urlparse(stream_url).hostname direct = False if host is None: continue stream_url += scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct } hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=0) match = re.search('var\s*video_id\s*=\s*"([^"]+)', html) if not match: return hosters video_id = match.group(1) headers = {'Referer': page_url} headers.update(XHR) # _html = self._http_get(scraper_utils.urljoin(self.base_url, 'av'), headers=headers, method='POST', cache_limit=0) vid_url = scraper_utils.urljoin(self.base_url, VIDEO_URL) html = self._http_get(vid_url, data={'v': video_id}, headers=headers, cache_limit=0) for source, value in scraper_utils.parse_json(html, vid_url).iteritems(): match = re.search('url=(.*)', value) if not match: continue stream_url = urllib.unquote(match.group(1)) host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(source) stream_url += scraper_utils.append_headers( {'User-Agent': scraper_utils.get_ua()}) hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True } hosters.append(hoster) return hosters
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=8) fragment = dom_parser2.parse_dom(html, 'div', {'class': 'playex'}) if fragment: html = fragment[0].content links = scraper_utils.parse_sources_list(self, html) for link in links: stream_url = link if self.base_url in link: redir_url = self._http_get(link, headers={'Referer': url}, allow_redirect=False, method='HEAD') if redir_url.startswith('http'): stream_url = redir_url host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = links[link]['quality'] stream_url += scraper_utils.append_headers({ 'User-Agent': scraper_utils.get_ua(), 'Referer': url }) source = { 'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True } hosters.append(source) return hosters
def __get_ajax(self, html, page_url): sources = {} pattern = '\$\.ajax\(\s*"([^"]+)' match = re.search(pattern, html) if not match: return sources post_url = match.group(1) headers = {'Referer': page_url} html = self._http_get(post_url, headers=headers, cache_limit=.5) js_result = scraper_utils.parse_json(html, post_url) for key in js_result: stream_url = js_result[key] host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(key) sources[stream_url] = quality return sources
def __get_posts(self, html): sources = {} pattern = '\$\.post\("([^"]+)"\s*,\s*\{(.*?)\}' match = re.search(pattern, html) if not match: return sources post_url, post_data = match.groups() data = self.__get_data(post_data) html = self._http_get(post_url, data=data, cache_limit=.5) js_result = scraper_utils.parse_json(html, post_url) for key in js_result: stream_url = js_result[key] host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(key) sources[stream_url] = quality return sources
def __get_json_links(self, html, sub): hosters = [] js_data = scraper_utils.parse_json(html) if 'sources' in js_data: for source in js_data.get('sources', []): stream_url = source.get('file') if stream_url is None: continue host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'label' in source: quality = scraper_utils.height_get_quality(source['label']) else: quality = QUALITIES.HIGH hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} hoster['subs'] = sub hosters.append(hoster) return hosters
def __get_direct(self, html, page_url): sources = [] best_quality = QUALITIES.HIGH match = re.search('''['"]?sources["']?\s*:\s*\[(.*?)\}\s*,?\s*\]''', html, re.DOTALL) if match: files = re.findall('''['"]?file['"]?\s*:\s*(.*?)['"]([^'"]+)''', match.group(1), re.DOTALL) labels = re.findall('''['"]?label['"]?\s*:\s*['"]([^'"]*)''', match.group(1), re.DOTALL) for stream, label in map(None, files, labels): func, stream_url = stream if 'atob' in func: stream_url = base64.b64decode(stream_url) stream_url = stream_url.replace(' ', '%20') host = scraper_utils.get_direct_hostname(self, stream_url) label = re.sub(re.compile('\s*HD', re.I), '', label) quality = scraper_utils.height_get_quality(label) if Q_ORDER[quality] > Q_ORDER[best_quality]: best_quality = quality stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': page_url}) source = {'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True} sources.append(source) return best_quality, sources
def __get_linked(self, html): sources = {} match = re.search('dizi=([^"]+)', html) if not match: return sources html = self._http_get(AJAX_URL, params={'dizi': match.group(1)}, headers=XHR, cache_limit=.5) js_result = scraper_utils.parse_json(html, AJAX_URL) for source in js_result.get('success', []): stream_url = source.get('src') if stream_url is None: continue if scraper_utils.get_direct_hostname(self, stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'label' in source: quality = scraper_utils.height_get_quality(source['label']) else: quality = QUALITIES.HIGH sources[stream_url] = quality return sources
def __get_ht_links(self, html, page_url): sources = {} match = re.search('Htplugins_Make_Player\("([^"]+)', html) if match: data = {'data': match.group(1)} url = scraper_utils.urljoin(self.base_url, LINK_URL2) headers = {'Referer': page_url} html = self._http_get(url, data=data, headers=headers, cache_limit=.25) js_data = scraper_utils.parse_json(html, url) if 'l' in js_data: for link in js_data['l']: if scraper_utils.get_direct_hostname(self, link) == 'gvideo': quality = scraper_utils.gv_get_quality(link) else: quality = QUALITIES.HIGH sources[link] = quality return sources
def get_sources(self, video): sources = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return sources object_id = self.__extract_id(source_url) if object_id is None: return sources source_url = TITLE_URL.format(id=object_id) page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._authed_http_get(page_url, cache_limit=.5) js_data = scraper_utils.parse_json(html, page_url) if video.video_type == VIDEO_TYPES.MOVIE: links = js_data.get('links', {}) else: links = self.__episode_match(js_data, video) prefix = js_data.get('domain', {}).get('prefix') suffix = js_data.get('domain', {}).get('suffix') for key, path in links.get('links', {}).iteritems(): for mirror in sorted(list(set(links.get('mirrors', [])))): stream_url = TEMPLATE.format(prefix=prefix, mirror=mirror, suffix=suffix, path=path) host = scraper_utils.get_direct_hostname(self, stream_url) quality = Q_MAP.get(key, QUALITIES.HIGH) source = { 'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': True } source['version'] = '(Mirror %d)' % (mirror) sources.append(source) return sources
def __get_links_from_xml(self, url, video, page_url, cookies): sources = {} try: headers = {'Referer': page_url} xml = self._http_get(url, cookies=cookies, headers=headers, cache_limit=.5) root = ET.fromstring(xml) for item in root.findall('.//item'): title = item.find('title').text if title and title.upper() == 'OOPS!': continue for source in item.findall( '{https://yesmovies.to/ajax/movie_sources/'): stream_url = source.get('file') label = source.get('label') if scraper_utils.get_direct_hostname( self, stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif label: quality = scraper_utils.height_get_quality(label) elif title: quality = scraper_utils.blog_get_quality( video, title, '') else: quality = scraper_utils.blog_get_quality( video, stream_url, '') sources[stream_url] = {'quality': quality, 'direct': True} logger.log( 'Adding stream: %s Quality: %s' % (stream_url, quality), log_utils.LOGDEBUG) except Exception as e: logger.log('Exception during YesMovies XML Parse: %s' % (e), log_utils.LOGWARNING) return sources
def get_sources(self, video): source_url = self.get_url(video) hosters = [] sources = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=1) iframes = dom_parser2.parse_dom(html, 'iframe', req='src') for attrs, _content in iframes: iframe_url = attrs['src'] if 'docs.google.com' in iframe_url: sources = scraper_utils.parse_google(self, iframe_url) break else: iframe_url = scraper_utils.urljoin(self.base_url, iframe_url) html = self._http_get(iframe_url, cache_limit=1) iframes += dom_parser2.parse_dom(html, 'iframe', req='src') for source in sources: host = scraper_utils.get_direct_hostname(self, source) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': scraper_utils.gv_get_quality(source), 'views': None, 'rating': None, 'url': source, 'direct': True} hosters.append(hoster) return hosters
def __get_links(self, url, video): hosters = [] search_url = scraper_utils.urljoin(self.base_url, SEARCH_URL) query = self.__translate_search(url) result = self._http_get(search_url, data=query, allow_redirect=False, cache_limit=.5) for item in result.get('files', []): checks = [False] * 6 if item.get('type', '').upper() != 'VIDEO': checks[0] = True if item.get('is_ready') != '1': checks[1] = True if item.get('av_result') in ['warning', 'infected']: checks[2] = True if 'video_info' not in item: checks[3] = True if item.get('video_info') and not re.search( '#0:(0|1)(\((eng|und)\))?:\s*Audio:', item['video_info'], re.I): checks[4] = True if not scraper_utils.release_check(video, item['name']): checks[5] = True if any(checks): logger.log( 'Furk.net result excluded: %s - |%s|' % (checks, item['name']), log_utils.LOGDEBUG) continue match = re.search('(\d{3,})\s*x\s*(\d{3,})', item['video_info']) if match: width, _height = match.groups() quality = scraper_utils.width_get_quality(width) else: if video.video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(item['name']) else: meta = scraper_utils.parse_episode_link(item['name']) quality = scraper_utils.height_get_quality(meta['height']) if 'url_pls' in item: size_gb = scraper_utils.format_size(int(item['size']), 'B') if self.max_bytes and int(item['size']) > self.max_bytes: logger.log( 'Result skipped, Too big: |%s| - %s (%s) > %s (%sGB)' % (item['name'], item['size'], size_gb, self.max_bytes, self.max_gb)) continue stream_url = item['url_pls'] host = scraper_utils.get_direct_hostname(self, stream_url) hoster = { 'multi-part': False, 'class': self, 'views': None, 'url': stream_url, 'rating': None, 'host': host, 'quality': quality, 'direct': True } hoster['size'] = size_gb hoster['extra'] = item['name'] hosters.append(hoster) else: logger.log( 'Furk.net result skipped - no playlist: |%s|' % (json.dumps(item)), log_utils.LOGDEBUG) return hosters
def get_sources(self, video): hosters = [] sources = {} source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) movie_id, watching_url, html = self.__get_source_page( video.video_type, page_url) links = [] for match in dom_parser2.parse_dom(html, 'li', {'class': 'ep-item'}, req=['data-id', 'data-server']): label = dom_parser2.parse_dom(match.content, 'a', req='title') if not label: continue if video.video_type == VIDEO_TYPES.EPISODE and not self.__episode_match( video, label[0].attrs['title']): continue links.append((match.attrs['data-server'], match.attrs['data-id'])) for link_type, link_id in links: if link_type in ['12', '13', '14', '15']: url = scraper_utils.urljoin( self.base_url, PLAYLIST_URL1.format(ep_id=link_id)) sources.update(self.__get_link_from_json(url)) elif kodi.get_setting('scraper_url'): url = scraper_utils.urljoin( self.base_url, PLAYLIST_URL2.format(ep_id=link_id)) params = self.__get_params(movie_id, link_id, watching_url) if params is not None: url += '?' + urllib.urlencode(params) sources.update( self.__get_links_from_json2(url, page_url, video.video_type)) for source in sources: if not source.lower().startswith('http'): continue if sources[source]['direct']: host = scraper_utils.get_direct_hostname(self, source) if host != 'gvideo': stream_url = source + scraper_utils.append_headers( { 'User-Agent': scraper_utils.get_ua(), 'Referer': page_url }) else: stream_url = source else: host = urlparse.urlparse(source).hostname stream_url = source hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': sources[source]['quality'], 'views': None, 'rating': None, 'url': stream_url, 'direct': sources[source]['direct'] } hosters.append(hoster) return hosters