def _get_episode_url(self, show_url, video): params = scraper_utils.parse_query(show_url) cmd = '{"jsonrpc": "2.0", "method": "VideoLibrary.GetEpisodes", "params": {"tvshowid": %s, "season": %s, "filter": {"field": "%s", "operator": "is", "value": "%s"}, \ "limits": { "start" : 0, "end": 25 }, "properties" : ["title", "season", "episode", "file", "streamdetails"], "sort": { "order": "ascending", "method": "label", "ignorearticle": true }}, "id": "libTvShows"}' base_url = 'video_type=%s&id=%s' episodes = [] force_title = scraper_utils.force_title(video) if not force_title: run = cmd % (params['id'], video.season, 'episode', video.episode) meta = xbmc.executeJSONRPC(run) meta = scraper_utils.parse_json(meta) logger.log('Episode Meta: %s' % (meta), log_utils.LOGDEBUG) if 'result' in meta and 'episodes' in meta['result']: episodes = meta['result']['episodes'] else: logger.log('Skipping S&E matching as title search is forced on: %s' % (video.trakt_id), log_utils.LOGDEBUG) if (force_title or kodi.get_setting('title-fallback') == 'true') and video.ep_title and not episodes: run = cmd % (params['id'], video.season, 'title', video.ep_title) meta = xbmc.executeJSONRPC(run) meta = scraper_utils.parse_json(meta) logger.log('Episode Title Meta: %s' % (meta), log_utils.LOGDEBUG) if 'result' in meta and 'episodes' in meta['result']: episodes = meta['result']['episodes'] for episode in episodes: if episode['file'].endswith('.strm'): continue return base_url % (video.video_type, episode['episodeid'])
def get_sources(self, video): hosters = [] sources = {} source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters page_url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) match = re.search("load_player\('([^']+)", html) if not match: return hosters headers = {'Referer': page_url, 'Server': 'cloudflare-nginx', 'Accept': 'text/html, */*; q=0.01', 'Accept-Language': 'en-US,en;q=0.5', 'Accept-Formating': 'application/json, text/javascript', 'Accept-Encoding': 'gzip, deflate'} headers.update(XHR) params = {'id': match.group(1)} player_url = scraper_utils.urljoin(self.base_url, PLAYER_URL) html = self._http_get(player_url, params=params, headers=headers, cache_limit=1) js_data = scraper_utils.parse_json(html, player_url) pl_url = js_data.get('value') or js_data.get('download') if not pl_url: return hosters headers = {'Referer': page_url} if pl_url.startswith('//'): pl_url = 'https:' + pl_url html = self._http_get(pl_url, headers=headers, allow_redirect=False, cache_limit=0) if html.startswith('http'): streams = [(html, '')] else: js_data = scraper_utils.parse_json(html, pl_url) try: streams = [(source['file'], source.get('label', '')) for source in js_data['playlist'][0]['sources']] except: streams = [] for stream in streams: stream_url, label = stream if scraper_utils.get_direct_hostname(self, stream_url) == 'gvideo': sources[stream_url] = {'quality': scraper_utils.gv_get_quality(stream_url), 'direct': True} else: if label: quality = scraper_utils.height_get_quality(label) else: quality = QUALITIES.HIGH sources[stream_url] = {'quality': quality, 'direct': False} for source, value in sources.iteritems(): direct = value['direct'] quality = value['quality'] if direct: host = scraper_utils.get_direct_hostname(self, source) else: host = urlparse.urlparse(source).hostname stream_url = source + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': direct} hosters.append(hoster) return hosters
def __get_gk_links(self, html, page_url): sources = {} for link in dom_parser.parse_dom(html, 'div', {'class': '[^"]*server_line[^"]*'}): film_id = dom_parser.parse_dom(link, 'a', ret='data-film') name_id = dom_parser.parse_dom(link, 'a', ret='data-name') server_id = dom_parser.parse_dom(link, 'a', ret='data-server') if film_id and name_id and server_id: data = { 'ipplugins': 1, 'ip_film': film_id[0], 'ip_server': server_id[0], 'ip_name': name_id[0] } headers = XHR headers['Referer'] = page_url url = urlparse.urljoin(self.base_url, LINK_URL) html = self._http_get(url, data=data, headers=headers, cache_limit=.25) js_data = scraper_utils.parse_json(html, url) if 's' in js_data: url = urlparse.urljoin(self.base_url, LINK_URL2) params = {'u': js_data['s'], 'w': '100%', 'h': 420} html = self._http_get(url, params=params, data=data, headers=headers, cache_limit=.25) js_data = scraper_utils.parse_json(html, url) if 'data' in js_data and js_data['data']: if isinstance(js_data['data'], basestring): sources[js_data['data']] = QUALITIES.HIGH else: for link in js_data['data']: stream_url = link['files'] if self._get_direct_hostname( stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality( stream_url) elif 'quality' in link: quality = scraper_utils.height_get_quality( link['quality']) else: quality = QUALITIES.HIGH sources[stream_url] = quality return sources
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] self.__get_token() if self.__token is None: return results search_url, u = self.__get_search_url() search_url = scraper_utils.urljoin(API_BASE_URL, search_url) timestamp = int(time.time() * 1000) s = self.__get_s() query = {'q': title, 'limit': '100', 'timestamp': timestamp, 'verifiedCheck': self.__token, 'set': s, 'rt': self.__get_rt(self.__token + s), 'sl': self.__get_sl(u)} headers = {'Referer': self.base_url} html = self._http_get(search_url, data=query, headers=headers, cache_limit=1) if video_type in [VIDEO_TYPES.TVSHOW, VIDEO_TYPES.EPISODE]: media_type = 'TV SHOW' else: media_type = 'MOVIE' for item in scraper_utils.parse_json(html, search_url): if not item['meta'].upper().startswith(media_type): continue match_year = str(item['year']) if 'year' in item and item['year'] else '' if not year or not match_year or year == match_year: result = {'title': scraper_utils.cleanse_title(item['title']), 'url': scraper_utils.pathify_url(item['permalink'].replace('/show/', '/tv-show/')), 'year': match_year} results.append(result) return results
def get_sources(self, video): hosters = [] source_url = self.get_url(video) if not source_url or source_url == FORCE_NO_MATCH: return hosters params = scraper_utils.parse_query(source_url) if video.video_type == VIDEO_TYPES.MOVIE: cmd = '{"jsonrpc": "2.0", "method": "VideoLibrary.GetMovieDetails", "params": {"movieid": %s, "properties" : ["file", "playcount", "streamdetails"]}, "id": "libMovies"}' result_key = 'moviedetails' else: cmd = '{"jsonrpc": "2.0", "method": "VideoLibrary.GetEpisodeDetails", "params": {"episodeid": %s, "properties" : ["file", "playcount", "streamdetails"]}, "id": "libTvShows"}' result_key = 'episodedetails' run = cmd % (params['id']) meta = xbmc.executeJSONRPC(run) meta = scraper_utils.parse_json(meta) logger.log('Source Meta: %s' % (meta), log_utils.LOGDEBUG) if result_key in meta.get('result', []): details = meta['result'][result_key] def_quality = [item[0] for item in sorted(SORT_KEYS['quality'].items(), key=lambda x:x[1])][self.def_quality] host = {'multi-part': False, 'class': self, 'url': details['file'], 'host': 'XBMC Library', 'quality': def_quality, 'views': details['playcount'], 'rating': None, 'direct': True} stream_details = details['streamdetails'] if len(stream_details['video']) > 0 and 'width' in stream_details['video'][0]: host['quality'] = scraper_utils.width_get_quality(stream_details['video'][0]['width']) hosters.append(host) return hosters
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] media_type = 'series' if video_type == VIDEO_TYPES.TVSHOW else 'movie' search_url = scraper_utils.urljoin( self.base_url, '/typeahead/%s' % (urllib.quote(title))) headers = {'Referer': self.base_url} headers.update(XHR) html = self._http_get(search_url, headers=headers, require_debrid=True, cache_limit=.5) for item in scraper_utils.parse_json(html, search_url): match_title = item.get('title') match_url = item.get('link') match_year = '' if item.get('type') == media_type and match_title and match_url: if not year or not match_year or year == match_year: result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def __get_movie_sources(self, page_url): sources = [] headers = {'Referer': ''} html = self._http_get(page_url, headers=headers, cache_limit=.5) match = re.search('APP_PATH\+"([^"]+)"\+([^"]+)\+"([^"]+)"', html) if match: url1, var, url2 = match.groups() match = re.search("%s\s*=\s*'([^']+)" % (var), html) if match: headers = {'Referer': page_url} headers.update(XHR) contents_url = '/' + url1 + match.group(1) + url2 contents_url = scraper_utils.urljoin(self.base_url, contents_url) js_data = scraper_utils.parse_json( self._http_get(contents_url, headers=headers, cache_limit=2), contents_url) if js_data: sources = [ item['src'] for item in js_data if 'src' in item ] match = re.search("openloadLink\s*=\s*'([^']+)", html, re.I) if match: sources.append(match.group(1)) return sources
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/search/searchBoxSuggestion') html = self._http_get(search_url, params={ 'top': 8, 'query': title }, cache_limit=8) js_data = scraper_utils.parse_json(html, search_url) for item in js_data: entityName = match_title_year = item.get('Value', '') if entityName: match_title, match_year2 = scraper_utils.extra_year( match_title_year) match_year = str(item.get('ReleaseYear', '')) if not match_year: match_year = match_year2 match_url = '/ontology/EntityDetails?' + urllib.urlencode( { 'entityName': entityName, 'ignoreMediaLinkError': 'false' }) if not year or not match_year or year == match_year: result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def __grab_links(self, grab_url, query, referer): try: sources = {} query['mobile'] = '0' query.update(self.__get_token(query)) grab_url = grab_url + '?' + urllib.urlencode(query) headers = XHR headers['Referer'] = referer html = self._http_get(grab_url, headers=headers, cache_limit=.5) js_data = scraper_utils.parse_json(html, grab_url) if 'data' in js_data: for link in js_data['data']: stream_url = link['file'] if self._get_direct_hostname(stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'label' in link: quality = scraper_utils.height_get_quality( link['label']) else: quality = QUALITIES.HIGH sources[stream_url] = {'direct': False, 'quality': quality} except Exception as e: log_utils.log('9Movies Link Parse Error: %s' % (e), log_utils.LOGWARNING) return sources
def __get_json_links(self, html, sub): hosters = [] js_data = scraper_utils.parse_json(html) if 'sources' in js_data: for source in js_data.get('sources', []): stream_url = source.get('file') if stream_url is None: continue host = scraper_utils.get_direct_hostname(self, stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'label' in source: quality = scraper_utils.height_get_quality(source['label']) else: quality = QUALITIES.HIGH hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True } hoster['subs'] = sub hosters.append(hoster) return hosters
def __get_source_page(self, video_type, page_url): match = re.search('/movie/(.*?)-(\d+)\.html', page_url) if not match: return '', '', '' slug, movie_id = match.groups() vid_type = 'movie' if video_type == VIDEO_TYPES.MOVIE else 'series' qp_url = QP_URL.format(slug=slug, movie_id=movie_id, vid_type=vid_type) qp_url = scraper_utils.urljoin(self.base_url, qp_url) headers = {'Referer': scraper_utils.urljoin(self.base_url, page_url)} headers.update(XHR) html = self._http_get(qp_url, headers=headers, cache_limit=8) watching_url = dom_parser2.parse_dom(html, 'a', {'title': re.compile('View all episodes')}, req='href') if not watching_url: return '', '', '' watching_url = watching_url[0].attrs['href'] page_html = self._http_get(watching_url, headers={'Referer': scraper_utils.urljoin(self.base_url, page_url)}, cache_limit=8) for attrs, _content in dom_parser2.parse_dom(page_html, 'img', {'class': 'hidden'}, req='src'): _img = self._http_get(attrs['src'], headers={'Referer': watching_url}, cache_limit=8) sl_url = SL_URL.format(movie_id=movie_id) sl_url = scraper_utils.urljoin(self.base_url, sl_url) html = self._http_get(sl_url, headers=headers, cache_limit=8) js_data = scraper_utils.parse_json(html, sl_url) try: html = js_data['html'] except: html = '' return movie_id, watching_url, html
def __movie_search(self, title, year): results = [] search_url = base64.decodestring(MOVIE_SEARCH_URL) % ( urllib.quote_plus(title)) html = self._http_get(search_url, cache_limit=1) js_data = scraper_utils.parse_json(html) if 'results' in js_data: norm_title = scraper_utils.normalize_title(title) for item in js_data['results']: match_url = urllib.unquote(item['url']) if '/buscar/' not in match_url: continue match_title_year = item['titleNoFormatting'] match_title_year = re.sub('^Ver\s+', '', match_title_year) match = re.search('(.*?)(?:\s+\(?(\d{4})\)?)', match_title_year) if match: match_title, match_year = match.groups() else: match_title = match_title_year match_year = '' if norm_title in scraper_utils.normalize_title( match_title) and (not year or not match_year or year == match_year): result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def _http_get(self, url, params=None, data=None, allow_redirect=True, cache_limit=8): if not self.username or not self.password: return {} if data is None: data = {} data.update({'customer_id': self.username, 'pin': self.password}) result = super(self.__class__, self)._http_get(url, params=params, data=data, allow_redirect=allow_redirect, cache_limit=cache_limit) js_result = scraper_utils.parse_json(result, url) if 'status' in js_result and js_result['status'] == 'error': logger.log( 'Premiumize V2 Scraper Error: %s - (%s)' % (url, js_result.get('message', 'Unknown Error')), log_utils.LOGWARNING) js_result = {} return js_result
def __get_gk_links(self, html, url): hosters = [] for match in re.finditer('gkpluginsphp.*?link\s*:\s*"([^"]+)', html): data = {'link': match.group(1)} headers = XHR headers['Referer'] = url gk_url = urlparse.urljoin(self.base_url, GK_URL) html = self._http_get(gk_url, data=data, headers=headers, cache_limit=.5) js_result = scraper_utils.parse_json(html, gk_url) if 'link' in js_result and 'func' not in js_result: if isinstance(js_result['link'], list): sources = dict((link['link'], scraper_utils.height_get_quality( link.get('label', 700))) for link in js_result['link']) else: sources = {js_result['link']: QUALITIES.HD720} for source in sources: if source: hoster = { 'multi-part': False, 'url': source, 'class': self, 'quality': sources[source], 'host': self._get_direct_hostname(source), 'rating': None, 'views': None, 'direct': True } hosters.append(hoster) return hosters
def __get_params(self, grab_url, episode_id, movie_id, page_url): hash_id, token, ts = None, None, None url = scraper_utils.urljoin(grab_url, '/token_v2.php', replace_path=True) headers = {'Referer': page_url} params = { 'eid': episode_id, 'mid': movie_id, '_': int(time.time() * 1000) } html = self._http_get(url, params=params, headers=headers, cache_limit=0) if aa_decoder.is_aaencoded(html): html = aa_decoder.decode(html) match1 = re.search("hash\s*=\s*'([^']+)", html) match2 = re.search("token\s*=\s*'([^']+)", html) match3 = re.search("_\s*=\s*'([^']+)", html) if match1 and match2 and match3: hash_id = match1.group(1) token = match2.group(1) ts = match3.group(1) else: js_data = scraper_utils.parse_json(html, url) hash_id, token, ts = js_data.get('hash'), js_data.get( 'token'), js_data.get('_') return hash_id, token, ts
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: page_url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(page_url, cache_limit=.5) match = re.search('var\s*video_id="([^"]+)', html) if match: video_id = match.group(1) data = {'v': video_id} headers = {'Referer': page_url} headers.update(XHR) html = self._http_get(self.info_url, data=data, headers=headers, cache_limit=0) sources = scraper_utils.parse_json(html, self.info_url) for source in sources: match = re.search('url=(.*)', sources[source]) if match: stream_url = urllib.unquote(match.group(1)) host = self._get_direct_hostname(stream_url) if host == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) else: quality = scraper_utils.height_get_quality(source) stream_url += scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua()}) hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} hosters.append(hoster) return hosters
def __get_gk_links(self, link, iframe_url): sources = {} data = {'link': link} headers = XHR headers.update({'Referer': iframe_url, 'User-Agent': USER_AGENT}) html = self._http_get(GK_URL, data=data, headers=headers, cache_limit=.25) js_data = scraper_utils.parse_json(html, GK_URL) if 'link' in js_data: if isinstance(js_data['link'], basestring): stream_url = js_data['link'] if scraper_utils.get_direct_hostname(self, stream_url) == 'gvideo': for source in scraper_utils.parse_google(self, stream_url): sources[source] = {'quality': scraper_utils.gv_get_quality(source), 'direct': True} else: sources[stream_url] = {'quality': QUALITIES.HIGH, 'direct': False} else: for link in js_data['link']: stream_url = link['link'] if scraper_utils.get_direct_hostname(self, stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'label' in link: quality = scraper_utils.height_get_quality(link['label']) else: quality = QUALITIES.HIGH sources[stream_url] = {'quality': quality, 'direct': True} return sources
def __search(self, video_type, title, year, season=''): results = [] search_url = (SEARCH_URL) % (urllib.quote_plus(title)) html = self._http_get(search_url, cache_limit=1) js_data = scraper_utils.parse_json(html) norm_title = scraper_utils.normalize_title(title) for item in js_data.get('results', []): if '/watch/' not in item['url'].lower(): continue is_season = re.search('Season\s+(\d+)', item['titleNoFormatting'], re.IGNORECASE) if (not is_season and video_type == VIDEO_TYPES.MOVIE) or (is_season and video_type == VIDEO_TYPES.SEASON): match_title_year = item['titleNoFormatting'] match_title_year = re.sub('^Watch\s+', '', match_title_year) match_url = item['url'] match_year = '' if video_type == VIDEO_TYPES.MOVIE: match = re.search('(.*?)(?:\s+\(?(\d{4})\)?)', match_title_year) if match: match_title, match_year = match.groups() else: match_title = match_title_year else: if season and int(is_season.group(1)) != int(season): continue match = re.search('(.*?)\s+\(\d{4}\)', match_title_year) if match: match_title = match.group(1) else: match_title = match_title_year if norm_title in scraper_utils.normalize_title(match_title) and (not year or not match_year or year == match_year): result = {'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url)} results.append(result) return results
def __get_link_from_json(self, url): sources = {} html = self._http_get(url, cache_limit=.5) js_result = scraper_utils.parse_json(html, url) if 'src' in js_result: sources[js_result['src']] = {'quality': QUALITIES.HIGH, 'direct': False} return sources
def __get_cloud_links(self, html, page_url, sub): hosters = [] html = html.replace('\\"', '"').replace('\\/', '/') match = re.search("dizi_kapak_getir\('([^']+)", html) if match: ep_id = match.group(1) for attrs, _content in dom_parser2.parse_dom(html, 'script', {'data-cfasync': 'false'}, req='src'): script_url = attrs['src'] html = self._http_get(script_url, cache_limit=24) match1 = re.search("var\s+kapak_url\s*=\s*'([^']+)", html) match2 = re.search("var\s+aCtkp\s*=\s*'([^']+)", html) if match1 and match2: link_url = '%s?fileid=%s&access_token=%s' % (match1.group(1), ep_id, match2.group(1)) headers = {'Referer': page_url} html = self._http_get(link_url, headers=headers, cache_limit=.5) js_data = scraper_utils.parse_json(html, link_url) for variant in js_data.get('variants', {}): stream_host = random.choice(variant.get('hosts', [])) if stream_host: stream_url = stream_host + variant['path'] + scraper_utils.append_headers({'User-Agent': scraper_utils.get_ua(), 'Referer': page_url}) if not stream_url.startswith('http'): stream_url = 'http://' + stream_url host = scraper_utils.get_direct_hostname(self, stream_url) if 'width' in variant: quality = scraper_utils.width_get_quality(variant['width']) elif 'height' in variant: quality = scraper_utils.height_get_quality(variant['height']) else: quality = QUALITIES.HIGH hoster = {'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': stream_url, 'direct': True} hoster['subs'] = sub hosters.append(hoster) return hosters
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if not source_url or source_url == FORCE_NO_MATCH: return hosters url = scraper_utils.urljoin(self.base_url, source_url) html = self._http_get(url, headers=XHR, cache_limit=8) js_data = scraper_utils.parse_json(html, url) quality = Q_MAP.get( js_data.get('Key', {}).get('MovieDefinition'), QUALITIES.HIGH) value = js_data.get('Value', {}) stream_url = value.get('VideoLink') if stream_url and value.get('ProviderSource', '').lower() == 'youtube': host = 'youtube.com' source = { 'multi-part': False, 'url': stream_url, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'direct': False } hosters.append(source) return hosters
def __get_gk_links(self, html, page_url): sources = {} match = re.search('{link\s*:\s*"([^"]+)', html) if match: data = {'link': match.group(1)} url = urlparse.urljoin(self.base_url, LINK_URL) headers = {'Referer': page_url} html = self._http_get(url, data=data, headers=headers, cache_limit=.25) js_data = scraper_utils.parse_json(html, url) if 'link' in js_data: for link in js_data['link']: if 'type' in link and link[ 'type'] == 'mp4' and 'link' in link: if self._get_direct_hostname(link['link']) == 'gvideo': quality = scraper_utils.gv_get_quality( link['link']) elif 'label' in link: quality = scraper_utils.height_get_quality( link['label']) else: quality = QUALITIES.HIGH sources[link['link']] = quality return sources
def __get_links_from_json2(self, url, page_url, video_type): sources = {} headers = {'Referer': page_url} headers.update(XHR) html = self._http_get(url, headers=headers, cache_limit=0) js_data = scraper_utils.parse_json(html, url) try: playlist = js_data.get('playlist', []) for source in playlist[0].get('sources', []): stream_url = source['file'] if scraper_utils.get_direct_hostname(self, stream_url) == 'gvideo': quality = scraper_utils.gv_get_quality(stream_url) elif 'label' in source: quality = scraper_utils.height_get_quality(source['label']) else: if video_type == VIDEO_TYPES.MOVIE: meta = scraper_utils.parse_movie_link(stream_url) else: meta = scraper_utils.parse_episode_link(stream_url) quality = scraper_utils.height_get_quality(meta['height']) sources[stream_url] = {'quality': quality, 'direct': True} logger.log( 'Adding stream: %s Quality: %s' % (stream_url, quality), log_utils.LOGDEBUG) except Exception as e: logger.log('Exception during ymovies extract: %s' % (e), log_utils.LOGDEBUG) return sources
def __get_gk_links(self, html): sources = {} match = re.search('{link\s*:\s*"([^"]+)', html) if match: iframe_url = match.group(1) data = {'link': iframe_url} headers = {'Referer': iframe_url} html = self._http_get(self.gk_url, data=data, headers=headers, cache_limit=.5) js_data = scraper_utils.parse_json(html, self.gk_url) links = js_data.get('link', []) if isinstance(links, basestring): links = [{'link': links}] for link in links: stream_url = link['link'] if scraper_utils.get_direct_hostname(self, stream_url) == 'openload.co': quality = scraper_utils.gv_get_quality(stream_url) direct = True elif 'label' in link: quality = scraper_utils.height_get_quality(link['label']) direct = True else: quality = QUALITIES.HIGH direct = False sources[stream_url] = {'quality': quality, 'direct': direct} return sources
def __get_ok(self, embed, flashvars): hosters = [] link = flashvars[0].attrs['value'] match = re.search('metadataUrl=([^"]+)', link) if match: referer = scraper_utils.cleanse_title( urllib.unquote(embed[0].attrs['data'])) ok_url = scraper_utils.cleanse_title(urllib.unquote( match.group(1))) html = self._http_get(ok_url, data='ok', headers={'Referer': referer}, cache_limit=.25) js_data = scraper_utils.parse_json(html, ok_url) stream_url = js_data.get('movie', {}).get('url') if stream_url is not None: host = urlparse.urlparse(stream_url).hostname hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': QUALITIES.HD720, 'views': None, 'rating': None, 'url': stream_url, 'direct': False, 'subs': 'Turkish Subtitles' } hosters.append(hoster) return hosters
def __get_framed_streams(self, vid_url, cookies, html, page_url): streams = {} iframe_url = dom_parser2.parse_dom(html, 'iframe', req='src') if not iframe_url: raise scraper.ScrapeError('No Iframe in: %s' % (vid_url)) iframe_url = iframe_url[0].attrs['src'] html = self._http_get(iframe_url, headers={'Referer': page_url}, cache_limit=.02) match = re.search('getScript\("([^"]+)', html) if not match: raise scraper.ScrapeError('No Script in: %s' % (iframe_url)) script_url = match.group(1) html = self._http_get(script_url, headers={'Referer': iframe_url}, cache_limit=.02) match = re.search("responseJson\s*=\s*'([^']+)", html) if not match: raise scraper.ScrapeError('No JSON in: %s' % (script_url)) js_data = scraper_utils.parse_json(match.group(1), script_url) media = js_data.get('medias', {}) if media: headers = {'Referer': page_url} headers.update(XHR) data = {'data': json.dumps({'medias': media, 'original': ''})} vid_html = self._http_get(vid_url, data=data, headers=headers, cookies=cookies, cache_limit=.02) streams.update(self.__get_js_sources(vid_html, vid_url, cookies, page_url, allow_framed=False)) return streams
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, '/ajax/search.php') timestamp = int(time.time() * 1000) query = { 'q': title, 'limit': 100, 'timestamp': timestamp, 'verifiedCheck': '' } html = self._http_get(search_url, data=query, headers=XHR, cache_limit=1) if video_type in [VIDEO_TYPES.TVSHOW, VIDEO_TYPES.EPISODE]: media_type = 'TV SHOW' else: media_type = 'MOVIE' js_data = scraper_utils.parse_json(html, search_url) for item in js_data: if not item['meta'].upper().startswith(media_type): continue result = { 'title': scraper_utils.cleanse_title(item['title']), 'url': scraper_utils.pathify_url(item['permalink']), 'year': '' } results.append(result) return results
def __get_ajax_sources(self, html, page_url): hosters = [] match = re.search('''url\s*:\s*"([^"]+)"\s*,\s*data:'id=''', html) if match: ajax_url = match.group(1) for data_id in re.findall("kaynakdegis\('([^']+)", html): url = scraper_utils.urljoin(self.base_url, ajax_url) data = {'id': data_id} headers = {'Referer': page_url} headers.update(XHR) result = self._http_get(url, data=data, headers=headers, cache_limit=.5) js_data = scraper_utils.parse_json(result, url) if 'iframe' in js_data: if self.base_url in js_data['iframe']: hosters += self.__get_iframe_sources( js_data['iframe'], page_url) else: hosters.append( self.__create_source(js_data['iframe'], 720, page_url, direct=False)) else: hosters += self.__get_js_sources(js_data, page_url) pass return hosters
def search(self, video_type, title, year, season=''): # @UnusedVariable results = [] search_url = scraper_utils.urljoin(self.base_url, SEARCH_URL) referer = scraper_utils.urljoin(self.base_url, '/search/?q=%s') referer = referer % (urllib.quote_plus(title)) headers = {'Referer': referer} headers.update(XHR) params = { 'searchTerm': title, 'type': SEARCH_TYPES[video_type], 'limit': 500 } html = self._http_get(search_url, params=params, headers=headers, auth=False, cache_limit=2) js_data = scraper_utils.parse_json(html, search_url) if 'results' in js_data: for result in js_data['results']: match_year = str(result.get('year', '')) match_url = result.get('permalink', '') match_title = result.get('title', '') if not year or not match_year or year == match_year: result = { 'title': scraper_utils.cleanse_title(match_title), 'year': match_year, 'url': scraper_utils.pathify_url(match_url) } results.append(result) return results
def get_sources(self, video): source_url = self.get_url(video) hosters = [] if source_url and source_url != FORCE_NO_MATCH: url = urlparse.urljoin(self.base_url, source_url) html = self._http_get(url, cache_limit=.5) for server_list in dom_parser.parse_dom(html, 'ul', {'class': 'episodes'}): labels = dom_parser.parse_dom(server_list, 'a') hash_ids = dom_parser.parse_dom(server_list, 'a', ret='data-id') for label, hash_id in zip(labels, hash_ids): if video.video_type == VIDEO_TYPES.EPISODE and not self.__episode_match( label, video.episode): continue hash_url = urlparse.urljoin(self.base_url, HASH_URL) query = {'id': hash_id, 'update': '0'} query.update(self.__get_token(query)) hash_url = hash_url + '?' + urllib.urlencode(query) headers = XHR headers['Referer'] = url html = self._http_get(hash_url, headers=headers, cache_limit=.5) js_data = scraper_utils.parse_json(html, hash_url) sources = {} link_type = js_data.get('type') target = js_data.get('target') grabber = js_data.get('grabber') params = js_data.get('params') if link_type == 'iframe' and target: sources[target] = { 'direct': False, 'quality': QUALITIES.HD720 } elif grabber and params: sources = self.__grab_links(grabber, params, url) for source in sources: direct = sources[source]['direct'] quality = sources[source]['quality'] if direct: host = self._get_direct_hostname(source) else: host = urlparse.urlparse(source).hostname hoster = { 'multi-part': False, 'host': host, 'class': self, 'quality': quality, 'views': None, 'rating': None, 'url': source, 'direct': direct } hosters.append(hoster) return hosters