def sources(self, url): sources = [] try: if url == None: return sources headers = {'User-Agent': random_agent()} html = BeautifulSoup( requests.get(url, headers=headers, timeout=30).content) r = html.findAll('div', attrs={'class': 'site'}) for container in r: r_url = container.findAll('a')[0]['data-actuallink'].encode( 'utf-8') host = re.findall( '([\w]+[.][\w]+)$', urlparse.urlparse(r_url.strip().lower()).netloc)[0] host = replaceHTMLCodes(host) host = host.encode('utf-8') sources.append({ 'source': host, 'quality': 'SD', 'scraper': self.name, 'url': r_url, 'direct': False }) except: pass return sources
def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb): try: if imdb is None: # TODO get imdb from title return imdb_title = self.get_imdb_title(imdb) headers = {'User-Agent': random_agent()} show_url = urlparse.urljoin(self.base_link, self.tv_link % imdb_title.replace(": ", "-").replace(' ', '-').replace(':', '-')).replace( '\'', '').lower() show_url += "/" show_url = str(''.join((c for c in unicodedata.normalize('NFD', show_url.decode("utf-8")) if unicodedata.category(c) != 'Mn'))) # remove accents html = BeautifulSoup(requests.get(show_url, headers=headers).content) season_containers = html.findAll('div', attrs={'class': 'Season container clear'}) for season_container in season_containers: try: links = season_container.findAll("a") for link in links: try: link_title = link.findAll("small")[0].text if 'season %s' % season in link_title.lower() and 'episode %s' % episode in link_title.lower(): return self.sources(link["href"]) except: continue except: continue except: pass return []
def scrape_music(self, title, artist): try: # print("ONEMUSIC") headers = {'User-Agent': random_agent()} query = self.search_link % (urllib.quote_plus(title.replace("'", ""))) query = urlparse.urljoin(self.base_link, query) # print("ONEMUSIC", query) artist_name = clean_title(artist) song_name = clean_title(title) # print("ONEMUSIC ARTIST", artist_name) html = BeautifulSoup(requests.get(query, headers=headers, timeout=30).content) self.musiclist = [] containers = html.findAll('div', attrs={'class': 'sr-songs-list'}) for blocks in containers: song_block = blocks.findAll('div', attrs={'class': 'item-caption'}) for item in song_block: href = item.findAll('a')[0]['href'] song_title = item.findAll('a')[0]['title'] href = href.encode('utf-8') song_title = song_title.encode('utf-8') if clean_title(song_title) == song_name: artist_block = item.findAll('span', attrs={'class': 'singer'})[0] artist = artist_block.findAll('a')[0]['title'] artist = artist.encode('utf-8') artist = clean_title(artist) print("ONEMUSIC", href, song_title, artist_name) if artist == artist_name: print("ONEMUSIC PASSED", href, song_title, artist) return self.sources(href, "HD") except: pass return []
def get(url, check, headers=None, data=None): if headers is None: headers = { 'User-Agent': random_agent(), } try: request = urllib2.Request(url, headers=headers, data=data) html = urllib2.urlopen(request, timeout=10).read() if check in str(html): return html except: pass try: new_url = get_proxy_url() % urllib.quote_plus(url) headers['Referer'] = 'http://%s/' % urlparse.urlparse(new_url).netloc request = urllib2.Request(new_url, headers=headers) response = urllib2.urlopen(request, timeout=10) html = response.read() response.close() if check in html: return html except: pass try: new_url = get_proxy_url() % urllib.quote_plus(url) headers['Referer'] = 'http://%s/' % urlparse.urlparse(new_url).netloc request = urllib2.Request(new_url, headers=headers) html = urllib2.urlopen(request, timeout=10).read() if check in html: return html except: pass return
def scrape_movie(self, title, year, imdb): try: # print("ONEMOVIES") headers = {'User-Agent': random_agent()} # print("ONEMOVIES", headers) query = self.search_link % (urllib.quote_plus(title.replace("'", " "))) query = urlparse.urljoin(self.base_link, query) cleaned_title = clean_title(title) # print("ONEMOVIES", query) html = BeautifulSoup(requests.get(query, headers=headers, timeout=30).content) containers = html.findAll('div', attrs={'class': 'ml-item'}) for result in containers: links = result.findAll('a') # print("ONEMOVIES", links) for link in links: link_title = str(link['title']) href = str(link['href']) info = str(link['data-url']) # print("ONEMOVIES", link_title, href, info) if clean_title(link_title) == cleaned_title: html = requests.get(info, headers=headers).content pattern = '<div class="jt-info">%s</div>' % year match = re.findall(pattern, html) if match: # print("ONEMOVIES MATCH", href) return self.sources(replaceHTMLCodes(href)) except: pass return []
def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb, debrid = False): try: if imdb is None: # TODO get imdb from title return imdb_title = self.get_imdb_title(imdb) headers = {'User-Agent': random_agent()} show_url = urlparse.urljoin(self.base_link, self.tv_link % imdb_title.replace(": ", "-").replace(' ', '-').replace(':', '-')).replace( '\'', '').lower() show_url += "/" show_url = str(''.join((c for c in unicodedata.normalize('NFD', show_url.decode("utf-8")) if unicodedata.category(c) != 'Mn'))) # remove accents html = BeautifulSoup(requests.get(show_url, headers=headers).content) season_containers = html.findAll('div', attrs={'class': 'Season container clear'}) for season_container in season_containers: try: links = season_container.findAll("a") for link in links: try: link_title = link.findAll("small")[0].text if 'season %s' % season in link_title.lower() and 'episode %s' % episode in link_title.lower(): return self.sources(link["href"]) except: continue except: continue except: pass return []
def scrape_movie(self, title, year, imdb): try: print("MOVIEXK") headers = {'User-Agent': random_agent()} query = self.search_link % (urllib.quote_plus(title) + "+" + str(year)) query = urlparse.urljoin(self.base_link, query) cleaned_title = clean_title(title) html = BeautifulSoup(requests.get(query, headers=headers, timeout=30).content) containers = html.findAll('div', attrs={'class': 'inner'}) for container in containers: print("MOVIEXK r1", container) movie_link = container.findAll('a')[0] r_href = movie_link['href'] print("MOVIEXK r2", r_href) r_title = movie_link['title'] link_year = container.findAll('span', attrs={'class': 'year'})[0].findAll('a')[0].text print("MOVIEXK r3", r_title) print("MOVIEXK RESULTS", r_title, r_href) if str(year) == link_year: if cleaned_title in clean_title(r_title): redirect = requests.get(r_href, headers=headers, timeout=30).text r_url = re.findall('<a href="(.*?)" class="btn-watch"', redirect)[0] r_url = r_url.encode('utf-8') print("MOVIEXK PLAY URL", r_url) return self.sources(replaceHTMLCodes(r_url)) except: pass return []
def sources(self, url): sources = [] try: if url == None: return sources absolute_url = urlparse.urljoin(self.base_link, url) referer_url = url.replace('watching.html', '') + 'watching.html' headers = {'User-Agent': random_agent} post = requests.get(absolute_url, headers=headers, timeout=30).content post = re.findall('movie=(\d+)', post)[0] post = {'id': post, 'episode_id': '0', 'link_id': '0', 'from': 'v3'} headers = {'X-Requested-With': 'XMLHttpRequest', 'Accept-Formating': 'application/json, text/javascript', 'Server': 'cloudflare-nginx'} headers['Referer'] = referer_url headers['User-Agent'] = random_agent() load_episode_url = urlparse.urljoin(self.base_link, '/ajax/movie/load_episodes') html = BeautifulSoup(requests.post(load_episode_url, data=post, headers=headers).content) pattern = re.compile("load_player\(\s*'([^']+)'\s*,\s*'?(\d+)\s*'?") links = html.findAll('a', attrs={'onclick': pattern}) for link in links: info = re.findall(pattern, link['onclick'])[0] # (id, quality) quality can be 0 try: play = urlparse.urljoin(self.base_link, '/ajax/movie/load_player_v2') post = {'id': info[0], 'quality': info[1]} player_url = requests.post(play, data=post, headers=headers).content json_url = json.loads(player_url)['link'] response = proxy.get_raw(json_url, headers=headers) video_url = response.geturl() try: unproxied_video_url = urlparse.parse_qs(urlparse.urlparse(video_url).query)['u'][0] except: pass try: unproxied_video_url = urlparse.parse_qs(urlparse.urlparse(video_url).query)['q'][0] except: pass pass if 'openload.' in unproxied_video_url: sources.append( {'source': 'openload.co', 'quality': 'HD', 'scraper': self.name, 'url': unproxied_video_url, 'direct': False}) else: sources.append( {'source': 'google video', 'quality': googletag(unproxied_video_url)[0]['quality'], 'scraper': self.name, 'url': unproxied_video_url, 'direct': True}) except: continue return sources except: return sources
def scrape_movie(self, title, year, imdb): try: title = title.translate(None, '\/:*?"\'<>|!,').replace(' ', '-').replace( '--', '-').lower() headers = {'User-Agent': random_agent()} search_url = urlparse.urljoin( self.base_link, self.moviesearch_hd_link % (title, year)) html = None try: prehtml = self.scraper.get(search_url, headers=headers, timeout=30) if html.status_code != 404: html = BeautifulSoup(prehtml.content) except: pass if html == None: search_url = urlparse.urljoin( self.base_link, self.moviesearch_sd_link % (title, year)) html = BeautifulSoup( self.scraper.get(search_url, headers=headers, timeout=30).content) if html == None: raise Exception() return self.sources(search_url) except: pass return []
def sources(self, url): sources = [] try: if url == None: return sources referer = urlparse.urljoin(self.base_link, url) headers = {'X-Requested-With': 'XMLHttpRequest'} headers['Referer'] = referer headers['User-Agent'] = random_agent() post = urlparse.parse_qs( urlparse.urlparse(referer).query).values()[0][0] post = {'v': post} url = urlparse.urljoin(self.base_link, '/video_info/iframe') html = requests.post(url, data=post, headers=headers).content quality_url_pairs = re.findall('"(\d+)"\s*:\s*"([^"]+)', html) for pair in quality_url_pairs: quality = pair[0] url = urllib.unquote(pair[1].split('url=')[-1]) sources.append({ 'source': 'google video', 'quality': quality, 'scraper': self.name, 'url': url, 'direct': True }) except: pass return sources
def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb): try: headers = {'User-Agent': random_agent()} query = "%s+season+%s" % (urllib.quote_plus(title), season) query = self.search_link % query query = urlparse.urljoin(self.base_link, query) cleaned_title = clean_title(title) checkseason = cleaned_title + "season" + season # print("ONEMOVIES", query,checkseason) html = BeautifulSoup( requests.get(query, headers=headers, timeout=30).content) containers = html.findAll('div', attrs={'class': 'ml-item'}) for result in containers: links = result.findAll('a') # print("ONEMOVIES", links) for link in links: link_title = str(link['title']) href = str(link['href']) # print("ONEMOVIES", link_title, href, info) if clean_title(link_title) == checkseason: ep_id = '?episode=%01d' % int(episode) href = href + ep_id # print("ONEMOVIES Passed", href) return self.sources(replaceHTMLCodes(href)) except: pass return []
def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb): try: headers = {'User-Agent': random_agent()} query = "%s+season+%s" % (urllib.quote_plus(title), season) query = self.search_link % query query = urlparse.urljoin(self.base_link, query) cleaned_title = clean_title(title) checkseason = cleaned_title + "season" + season # print("ONEMOVIES", query,checkseason) html = BeautifulSoup(requests.get(query, headers=headers, timeout=30).content) containers = html.findAll('div', attrs={'class': 'ml-item'}) for result in containers: links = result.findAll('a') # print("ONEMOVIES", links) for link in links: link_title = str(link['title']) href = str(link['href']) # print("ONEMOVIES", link_title, href, info) if clean_title(link_title) == checkseason: ep_id = '?episode=%01d' % int(episode) href = href + ep_id # print("ONEMOVIES Passed", href) return self.sources(replaceHTMLCodes(href)) except: pass return []
def scrape_movie(self, title, year, imdb): try: # print("ONEMOVIES") headers = {'User-Agent': random_agent()} # print("ONEMOVIES", headers) query = self.search_link % (urllib.quote_plus( title.replace("'", " "))) query = urlparse.urljoin(self.base_link, query) cleaned_title = clean_title(title) # print("ONEMOVIES", query) html = BeautifulSoup( requests.get(query, headers=headers, timeout=30).content) containers = html.findAll('div', attrs={'class': 'ml-item'}) for result in containers: links = result.findAll('a') # print("ONEMOVIES", links) for link in links: link_title = str(link['title']) href = str(link['href']) info = str(link['data-url']) # print("ONEMOVIES", link_title, href, info) if clean_title(link_title) == cleaned_title: html = requests.get(info, headers=headers).content pattern = '<div class="jt-info">%s</div>' % year match = re.findall(pattern, html) if match: # print("ONEMOVIES MATCH", href) return self.sources(replaceHTMLCodes(href)) except: pass return []
def sources(self, url): sources = [] try: if url == None: return sources headers = {'User-Agent': random_agent()} html = BeautifulSoup(requests.get(url, headers=headers, timeout=30).content) r = html.findAll('source') for r_source in r: url = r_source['src'].encode('utf-8') if not 'google' in url: try: req = requests.head(url, headers=headers) if req.headers['Location'] != "": url = req.headers['Location'] url = url.replace('https://', 'http://').replace(':443/', '/') except: pass if 'google' in url: quality = r_source['data-res'].encode('utf-8') if "1080" in quality: quality = "1080" elif "720" in quality: quality = "720" else: quality = "SD" print("MOVIEXK SOURCES", url, quality) sources.append({'source': 'google video', 'quality': quality, 'scraper': self.name, 'url': url, 'direct': True}) else: sources.append( {'source': 'moviexk', 'quality': 'SD', 'scraper': self.name, 'url': url, 'direct': True}) except: pass return sources
def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb): headers = {'User-Agent': random_agent()} q = (title.translate(None, '\/:*?"\'<>|!,')).replace(' ', '-').replace( '--', '-').lower() query = urlparse.urljoin(self.base_link, self.tv_search_link % q) cleaned_title = clean_title(title) html = BeautifulSoup( requests.get(query, headers=headers, timeout=30).content) links = html.findAll('a', attrs={'class': 'top-h1'}) show_url = None for link in links: link_title = link.text if cleaned_title == clean_title(link_title): show_url = link["href"] break if show_url: html = BeautifulSoup( requests.get(show_url, headers=headers, timeout=30).content) link_container = html.findAll("div", attrs={'class': 'bottom'})[-1] episode_links = link_container.findAll("a") episode_format1 = "S%02dE%02d" % (int(season), int(episode)) episode_format2 = "S%02d-E%02d" % (int(season), int(episode)) for episode_link in episode_links: button = episode_link.contents[0] episode_text = button.text if episode_format1 in episode_text or episode_format2 in episode_text: episode_url = episode_link["href"] return self.sources(episode_url, "SD")
def sources(self, url, quality): sources = [] try: headers = {'User-Agent': random_agent()} song_id = re.findall('-(\d+).html', url)[0] query = self.sources_link % song_id query = urlparse.urljoin(self.base_link, query) # print("ONEMUSIC SONG ID", song_id, query) response = requests.get(query, headers=headers).content source_json = json.loads(response) songs_json = source_json['sources'] for item in songs_json: hdmusic = item['link_320'] sdmusic = item['link_128'] hdmusic = hdmusic.encode('utf-8') sdmusic = sdmusic.encode('utf-8') hdmusic = hdmusic.replace(' ', '%20') sdmusic = sdmusic.replace(' ', '%20') if not "/mobile/" in hdmusic: sources.append( {'source': 'mp3', 'quality': 'HD', 'scraper': self.name, 'url': hdmusic, 'direct': True}) if not "mobile" in sdmusic: sources.append( {'source': 'mp3', 'quality': 'SD', 'scraper': self.name, 'url': sdmusic, 'direct': True}) # print("ONEMUSIC SOURCES", sources) except: pass return sources
def scrape_movie(self, title, year, imdb): try: headers = {'User-Agent': random_agent()} query = urlparse.urljoin(self.base_link, self.search_link) query = query % urllib.quote_plus(title) # print ("XMOVIES query", query) cleaned_title = clean_title(title) html = BeautifulSoup(requests.get(query, headers=headers, timeout=30).content) containers = html.findAll('div', attrs={'class': 'item_movie'}) # print ("XMOVIES r1", containers) for container in containers: try: links = container.findAll('h2', attrs={'class': 'tit'})[0] r = links.findAll('a') for link in r: link_title = link['title'].encode('utf-8') href = link['href'].encode('utf-8') if len(link_title) > 0 and len(href) > 0: parsed = re.findall('(.+?) \((\d{4})', link_title) parsed_title = parsed[0][0] parsed_years = parsed[0][1] if cleaned_title.lower() == clean_title(parsed_title).lower() and year == parsed_years: if not "http:" in href: href = "http:" + href return self.sources(replaceHTMLCodes(href)) except: pass except: pass return []
def sources(self, url): sources = [] try: if url == None: return sources referer = urlparse.urljoin(self.base_link, url) headers = {} headers['Referer'] = referer headers['User-Agent'] = random_agent() html = requests.get(referer, headers=headers, timeout=30).content player_id = re.compile('var\s*view_id\s*=\s*"(\d*)"').findall(html)[0] player_url = self.player_link % player_id player_html = requests.get(player_url, headers=headers, timeout=30).content player_html_parsed = BeautifulSoup(player_html) try: video_url = player_html_parsed.findAll('iframe')[-1]['src'] if 'openload' in video_url: host = 'openload.co' direct = False video_url = [{'url': video_url, 'quality': 'HD'}] elif 'ok.ru' in video_url: host = 'vk' direct = True video_url = odnoklassniki(video_url) elif 'vk.com' in video_url: host = 'vk' direct = True video_url = vk(video_url) else: raise Exception() for i in video_url: sources.append( {'source': host, 'quality': i['quality'], 'scraper': self.name, 'url': i['url'], 'direct': direct}) except: pass try: links = re.compile('"?file"?\s*:\s*"([^"]+)"\s*,\s*"?label"?\s*:\s*"(\d+)p?"').findall(player_html) for link in links: sources.append( {'source': 'google video', 'quality': link[1], 'scraper': self.name, 'url': link[0], 'direct': True}) except: pass except: pass return sources
def get_imdb_title(self, imdb): headers = {'User-Agent': random_agent(), 'Accept-Language': 'es-es'} html = BeautifulSoup( requests.get('http://www.imdb.com/title/%s' % imdb, headers=headers).content) html_title = html.findAll('title')[0].text.encode('utf-8') imdb_title = re.sub('(?:\(||\(TV Series\s|\s)\d{4}.+', '', html_title).strip() return imdb_title
def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb): try: print("MOVIEXK") headers = {'User-Agent': random_agent()} query = self.search_link % (urllib.quote_plus(title)) query = urlparse.urljoin(self.base_link, query) cleaned_title = clean_title(title) ep_id = int(episode) season_id = int(season) html = BeautifulSoup( requests.get(query, headers=headers, timeout=30).content) containers = html.findAll('div', attrs={'class': 'inner'}) for container in containers: print("MOVIEXK r1", container) show_link = container.findAll('a')[0] r_href = show_link['href'] print("MOVIEXK r2", r_href) r_title = show_link['title'] print("MOVIEXK r3", r_title) print("MOVIEXK r4", r_title, r_href) if cleaned_title in clean_title( r_title) and "tv" in r_title.lower(): redirect = requests.get(r_href, headers=headers, timeout=30).text r_url = re.findall('<a href="(.*?)" class="btn-watch"', redirect)[0] r_url = r_url.encode('utf-8') links = BeautifulSoup( requests.get(r_url, headers=headers, timeout=30).content) ep_items = links.findAll('ul', attrs={'class': 'episodelist'}) for items in ep_items: ep_links = items.findAll('a') for r in ep_links: print("MOVIEXK r5", r) ep_url = r['href'].encode('utf-8') ep_title = r['title'].encode('utf-8') print("MOVIEXK r6", ep_url, ep_title) clean_ep_title = clean_title(ep_title) if "s%02de%02d" % ( season_id, ep_id) in clean_ep_title or "s%02d%02d" % ( season_id, ep_id ) in clean_ep_title or "s%02d%d" % ( season_id, ep_id ) in clean_ep_title or "epse%d%d" % ( season_id, ep_id) in clean_ep_title: return self.sources(replaceHTMLCodes(ep_url)) except: pass return []
def Sources(self, url): sources = [] try: for movielink, referer in self.url: try: # print("CMOVIES SOURCE LINKS", movielink) referer = referer pages = requests.get(movielink).text scripts = re.findall('hash\s*:\s*"([^"]+)', pages)[0] # print("CMOVIES SERVER SCRIPT", scripts) if scripts: token = self.__get_token() key = hashlib.md5('(*&^%$#@!' + scripts[46:58]).hexdigest() cookie = '%s=%s' % (key, token) stream_url = self.stream_link % ( scripts, hashlib.md5('!@#$%^&*(' + token).hexdigest()) # print("CMOVIES PLAYABLE LINKS", stream_url) headers = { 'Referer': referer, 'User-Agent': random_agent(), 'Cookie': cookie } req = s.get(stream_url, headers=headers, timeout=5).json() playlist = req['playlist'][0]['sources'] #print playlist for item in playlist: url = item['file'].encode('utf-8') r_quality = item['label'].encode('utf-8') if r_quality in ['1080', '1080p', '1080P']: quality = "1080p" elif r_quality in ['720', '720p', '720P']: quality = "HD" else: quality = "SD" # print("CMOVIES playlist", quality ,url) sources.append({ 'source': 'gvideo', 'quality': quality, 'scraper': 'Watch5s', 'url': url, 'direct': True }) except: pass except: pass return sources
def scrape_movie(self, title, year, imdb): try: headers = {'User-Agent': random_agent()} q = (title.translate(None, '\/:*?"\'<>|!,')).replace(' ', '-').replace( '--', '-').lower() query = urlparse.urljoin(self.base_link, self.movie_search_link % q) cleaned_title = clean_title(title) html = requests.get(query, headers=headers, timeout=30).content containers = re.compile( '<a class="top-item".*href="(.*?)"><cite>(.*?)</cite></a>' ).findall(html) for href, title in containers: parsed = re.findall('(.+?) \((\d{4})', title) parsed_title = parsed[0][0] parsed_years = parsed[0][1] if cleaned_title == clean_title( parsed_title) and year == parsed_years: try: headers = {'User-Agent': random_agent()} html = requests.get(href, headers=headers, timeout=30).content parsed_html = BeautifulSoup(html) quality_title = parsed_html.findAll( "h3", attrs={'title': re.compile("Quality of ")})[0] quality = quality_title.findAll('span')[0].text match = re.search('href="([^"]+-full-movie-[^"]+)', html) if match: url = match.group(1) return self.sources(url, "SD") except: pass except: pass return []
def get_raw(url, headers=None, data=None): if headers is None: headers = { 'User-Agent': random_agent(), } try: new_url = get_proxy_url() % urllib.quote_plus(url) headers['Referer'] = 'http://%s/' % urlparse.urlparse(new_url).netloc request = urllib2.Request(new_url, headers=headers) response = urllib2.urlopen(request, timeout=10) return response except: pass
def sources(self, url): sources = [] try: if url == None: return sources headers = {'User-Agent': random_agent()} html = BeautifulSoup( requests.get(url, headers=headers, timeout=30).content) r = html.findAll('source') for r_source in r: url = r_source['src'].encode('utf-8') if not 'google' in url: try: req = requests.head(url, headers=headers) if req.headers['Location'] != "": url = req.headers['Location'] url = url.replace('https://', 'http://').replace(':443/', '/') except: pass if 'google' in url: quality = r_source['data-res'].encode('utf-8') if "1080" in quality: quality = "1080" elif "720" in quality: quality = "720" else: quality = "SD" print("MOVIEXK SOURCES", url, quality) sources.append({ 'source': 'google video', 'quality': quality, 'scraper': self.name, 'url': url, 'direct': True }) else: sources.append({ 'source': 'moviexk', 'quality': 'SD', 'scraper': self.name, 'url': url, 'direct': True }) except: pass return sources
def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb): try: headers = {'User-Agent': random_agent()} query = self.search_link % (urllib.quote_plus(title)) query = urlparse.urljoin(self.base_link, query) cleaned_title = clean_title(title) ep_id = int(episode) season_id = int(season) html = requests.get(query, headers=headers, timeout=30).json() results = html['series'] for item in results: r_title = item['label'].encode('utf-8') r_link = item['seo'].encode('utf-8') if cleaned_title == clean_title(r_title): r_page = self.base_link + "/" + r_link # print("WATCHEPISODES r1", r_title,r_page) r_html = BeautifulSoup( requests.get(r_page, headers=headers, timeout=30).content) r = r_html.findAll( 'div', attrs={'class': re.compile('\s*el-item\s*')}) for container in r: try: r_href = container.findAll('a')[0]['href'].encode( 'utf-8') r_title = container.findAll( 'a')[0]['title'].encode('utf-8') # print("WATCHEPISODES r3", r_href,r_title) episode_check = "[sS]%02d[eE]%02d" % (int(season), int(episode)) match = re.search(episode_check, r_title) if match: # print("WATCHEPISODES PASSED EPISODE", r_href) return self.sources(replaceHTMLCodes(r_href)) else: match2 = re.search(episode_check, r_href) if match2: # print("WATCHEPISODES PASSED EPISODE", r_href) return self.sources( replaceHTMLCodes(r_href)) except: pass except: pass return []
def sources(self, url): sources = [] try: if url == None: return sources headers = {'User-Agent': random_agent()} html = BeautifulSoup(requests.get(url, headers=headers, timeout=30).content) r = html.findAll('div', attrs={'class': 'site'}) for container in r: r_url = container.findAll('a')[0]['data-actuallink'].encode('utf-8') host = re.findall('([\w]+[.][\w]+)$', urlparse.urlparse(r_url.strip().lower()).netloc)[0] host = replaceHTMLCodes(host) host = host.encode('utf-8') sources.append({'source': host, 'quality': 'SD', 'scraper': self.name, 'url': r_url,'direct': False}) except: pass return sources
def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb): try: for try_year in [str(year), str(int(year) - 1)]: tvshowtitle = '%s %s: Season %s' % (title, try_year, season) headers = { 'X-Requested-With': 'XMLHttpRequest', 'User-Agent': random_agent() } post = { 'aspp': tvshowtitle, 'action': 'ajaxsearchpro_search', 'options': 'qtranslate_lang=0&set_exactonly=checked&set_intitle=None&customset[]=post', 'asid': '4', 'asp_inst_id': '4_1' } url = urlparse.urljoin(self.base_link, self.tvsearch_link) html = BeautifulSoup( self.scraper.post(url, data=post, headers=headers, timeout=30).content) links = html.findAll('a', attrs={'class': 'asp_res_url'}) show_url = None for link in links: href = link["href"] link_tvshowtitle = re.findall('(.+?: Season \d+)', link.contents[0].strip())[0] if title.lower() in link_tvshowtitle.lower() and str( season) in link_tvshowtitle: if try_year in link_tvshowtitle: show_url = href break if show_url is None: continue episode_url = show_url + '?episode=%01d' % int(episode) return self.sources(episode_url) except: pass return []
def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb): try: print("MOVIEXK") headers = {'User-Agent': random_agent()} query = self.search_link % (urllib.quote_plus(title)) query = urlparse.urljoin(self.base_link, query) cleaned_title = clean_title(title) ep_id = int(episode) season_id = int(season) html = BeautifulSoup(requests.get(query, headers=headers, timeout=30).content) containers = html.findAll('div', attrs={'class': 'inner'}) for container in containers: print("MOVIEXK r1", container) show_link = container.findAll('a')[0] r_href = show_link['href'] print("MOVIEXK r2", r_href) r_title = show_link['title'] print("MOVIEXK r3", r_title) print("MOVIEXK r4", r_title, r_href) if cleaned_title in clean_title(r_title) and "tv" in r_title.lower(): redirect = requests.get(r_href, headers=headers, timeout=30).text r_url = re.findall('<a href="(.*?)" class="btn-watch"', redirect)[0] r_url = r_url.encode('utf-8') links = BeautifulSoup(requests.get(r_url, headers=headers, timeout=30).content) ep_items = links.findAll('ul', attrs={'class': 'episodelist'}) for items in ep_items: ep_links = items.findAll('a') for r in ep_links: print("MOVIEXK r5", r) ep_url = r['href'].encode('utf-8') ep_title = r['title'].encode('utf-8') print("MOVIEXK r6", ep_url, ep_title) clean_ep_title = clean_title(ep_title) if "s%02de%02d" % (season_id, ep_id) in clean_ep_title or "s%02d%02d" % ( season_id, ep_id) in clean_ep_title or "s%02d%d" % ( season_id, ep_id) in clean_ep_title or "epse%d%d" % (season_id, ep_id) in clean_ep_title : return self.sources(replaceHTMLCodes(ep_url)) except: pass return []
def sources(self, url, quality): sources = [] try: headers = {'User-Agent': random_agent()} song_id = re.findall('-(\d+).html', url)[0] query = self.sources_link % song_id query = urlparse.urljoin(self.base_link, query) # print("ONEMUSIC SONG ID", song_id, query) response = requests.get(query, headers=headers).content source_json = json.loads(response) songs_json = source_json['sources'] for item in songs_json: hdmusic = item['link_320'] sdmusic = item['link_128'] hdmusic = hdmusic.encode('utf-8') sdmusic = sdmusic.encode('utf-8') hdmusic = hdmusic.replace(' ', '%20') sdmusic = sdmusic.replace(' ', '%20') if not "/mobile/" in hdmusic: sources.append({ 'source': 'mp3', 'quality': 'HD', 'scraper': self.name, 'url': hdmusic, 'direct': True }) if not "mobile" in sdmusic: sources.append({ 'source': 'mp3', 'quality': 'SD', 'scraper': self.name, 'url': sdmusic, 'direct': True }) # print("ONEMUSIC SOURCES", sources) except: pass return sources
def scrape_movie(self, title, year, imdb): try: headers = {'User-Agent': random_agent()} query = self.search_link % (urllib.quote_plus(title)) query = urlparse.urljoin(self.base_link, query) cleaned_title = clean_title(title) html = BeautifulSoup(requests.get(query, headers=headers, timeout=30).content) containers = html.findAll('div', attrs={'class': 'cell_container'}) for container in containers: links = container.findAll('a') for link in links: link_title = link['title'] href = link['href'] if len(link_title) > 0 and len(href) > 0: parsed = re.findall('(.+?) \((\d{4})', link_title) parsed_title = parsed[0][0] parsed_years = parsed[0][1] if cleaned_title == clean_title(parsed_title) and year == parsed_years: return self.sources(replaceHTMLCodes(href)) except: pass return []
def sources(self, url): #print '::::::::::::::'+url sources = [] try: if url == None: return sources count = 0 headers = {'User-Agent': random_agent()} html = requests.get(url, headers=headers, timeout=30).content r = re.compile('<div class="ll-item">.+?<a href="(.+?)"',re.DOTALL).findall(html) for url in r: while count<10: count +=1 PAGE = requests.get(url).content host_url = re.compile('<div class="wb-main">.+?<a rel="nofollow" target="_blank" href="(.+?)"',re.DOTALL).findall(PAGE) for final_url in host_url: holster = final_url.split('//')[1].replace('www.','') holster = holster.split('/')[0].split('.')[0].title() sources.append({'source': holster, 'quality': 'SD', 'scraper': self.name, 'url': final_url, 'direct': False}) except: pass return sources
def scrape_movie(self, title, year, imdb): try: # print("MOVIEGO INIT") headers = {'User-Agent': random_agent()} searchquery = self.search_link % (urllib.quote_plus(title), year) query = urlparse.urljoin(self.base_link, searchquery) cleaned_title = clean_title(title) html = requests.get(query, headers=headers).content html = BeautifulSoup(html) containers = html.findAll('div', attrs={'class': 'short_content'}) # print("MOVIEGO MOVIES",containers) for items in containers: href = items.findAll('a')[0]['href'] title = items.findAll('div', attrs={'class': 'short_header'})[0] if year in str(title): title = normalize(str(title)) if title == cleaned_title: return self.sources(replaceHTMLCodes(href)) except: return []
def scrape_music(self, title, artist, debrid=False): try: # print("ONEMUSIC") headers = {'User-Agent': random_agent()} query = self.search_link % (urllib.quote_plus( title.replace("'", ""))) query = urlparse.urljoin(self.base_link, query) # print("ONEMUSIC", query) artist_name = clean_title(artist) song_name = clean_title(title) # print("ONEMUSIC ARTIST", artist_name) html = BeautifulSoup( requests.get(query, headers=headers, timeout=30).content) self.musiclist = [] containers = html.findAll('div', attrs={'class': 'sr-songs-list'}) for blocks in containers: song_block = blocks.findAll('div', attrs={'class': 'item-caption'}) for item in song_block: href = item.findAll('a')[0]['href'] song_title = item.findAll('a')[0]['title'] href = href.encode('utf-8') song_title = song_title.encode('utf-8') if clean_title(song_title) == song_name: artist_block = item.findAll('span', attrs={'class': 'singer'})[0] artist = artist_block.findAll('a')[0]['title'] artist = artist.encode('utf-8') artist = clean_title(artist) print("ONEMUSIC", href, song_title, artist_name) if artist == artist_name: print("ONEMUSIC PASSED", href, song_title, artist) return self.sources(href, "HD") except: pass return []
def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb): try: headers = {'User-Agent': random_agent()} query = self.search_link % (urllib.quote_plus(title)) query = urlparse.urljoin(self.base_link, query) cleaned_title = clean_title(title) ep_id = int(episode) season_id = int(season) html = requests.get(query, headers=headers, timeout=30).json() results = html['series'] for item in results: r_title = item['label'].encode('utf-8') r_link = item['seo'].encode('utf-8') if cleaned_title == clean_title(r_title): r_page = self.base_link + "/" + r_link # print("WATCHEPISODES r1", r_title,r_page) r_html = BeautifulSoup(requests.get(r_page, headers=headers, timeout=30).content) r = r_html.findAll('div', attrs={'class': re.compile('\s*el-item\s*')}) for container in r: try: r_href = container.findAll('a')[0]['href'].encode('utf-8') r_title = container.findAll('a')[0]['title'].encode('utf-8') # print("WATCHEPISODES r3", r_href,r_title) episode_check = "[sS]%02d[eE]%02d" % (int(season), int(episode)) match = re.search(episode_check, r_title) if match: # print("WATCHEPISODES PASSED EPISODE", r_href) return self.sources(replaceHTMLCodes(r_href)) else: match2 = re.search(episode_check, r_href) if match2: # print("WATCHEPISODES PASSED EPISODE", r_href) return self.sources(replaceHTMLCodes(r_href)) except: pass except: pass return []
def scrape_movie(self, title, year, imdb): try: print("MOVIEXK") headers = {'User-Agent': random_agent()} query = self.search_link % (urllib.quote_plus(title) + "+" + str(year)) query = urlparse.urljoin(self.base_link, query) cleaned_title = clean_title(title) html = BeautifulSoup( requests.get(query, headers=headers, timeout=30).content) containers = html.findAll('div', attrs={'class': 'inner'}) for container in containers: print("MOVIEXK r1", container) movie_link = container.findAll('a')[0] r_href = movie_link['href'] print("MOVIEXK r2", r_href) r_title = movie_link['title'] link_year = container.findAll('span', attrs={'class': 'year' })[0].findAll('a')[0].text print("MOVIEXK r3", r_title) print("MOVIEXK RESULTS", r_title, r_href) if str(year) == link_year: if cleaned_title in clean_title(r_title): redirect = requests.get(r_href, headers=headers, timeout=30).text r_url = re.findall('<a href="(.*?)" class="btn-watch"', redirect)[0] r_url = r_url.encode('utf-8') print("MOVIEXK PLAY URL", r_url) return self.sources(replaceHTMLCodes(r_url)) except: pass return []
import xbmc import json import re import urllib import urlparse import requests from BeautifulSoup import BeautifulSoup as BS from nanscrapers.common import clean_title, random_agent, replaceHTMLCodes from ..scraper import Scraper session = requests.Session() headers = {"User-Agent": random_agent()} class BeeMP3(Scraper): domains = ['beemp3'] name = "BeeMP3" def __init__(self): self.base_link = 'https://beemp3.unblocked.bid' self.search_link = '/search?query=%s&field=artist' def scrape_music(self, title, artist, debrid=False): try: query = self.search_link % (urllib.quote_plus(artist)) query = urlparse.urljoin(self.base_link, query) html = BS(session.get(query, headers=headers).content) result = self.process_results_page(html, title, artist, query) if result: return result
def sources(self, url): sources = [] try: if url == None: return sources absolute_url = urlparse.urljoin(self.base_link, url) headers = {'User-Agent': random_agent()} html = BeautifulSoup( requests.get(absolute_url, headers=headers, timeout=30).content) pages = [] embed = html.findAll('div', attrs={'id': 'embed'})[0] pages.append(embed.findAll('iframe')[0]["src"]) for page in pages: try: if not page.startswith('http'): page = 'http:%s' % page html = BeautifulSoup( requests.get(page, headers=headers, timeout=30).content) # captions = html.findAll(text=re.compile('kind\s*:\s*(?:\'|\")captions(?:\'|\")')) # if not captions: break try: link_text = html.findAll(text=re.compile( 'url\s*:\s*\'(http(?:s|)://api.pcloud.com/.+?)\'') )[0] link = re.findall( 'url\s*:\s*\'(http(?:s|)://api.pcloud.com/.+?)\'', link_text)[0] variants = json.loads( requests.get(link, headers=headers, timeout=30).content)['variants'] for variant in variants: if 'hosts' in variant and 'path' in variant and 'height' in variant: video_url = '%s%s' % (variant['hosts'][0], variant['path']) heigth = variant['height'] if not video_url.startswith('http'): video_url = 'http://%s' % video_url sources.append({ 'source': 'cdn', 'quality': str(heigth), 'scraper': self.name, 'url': video_url, 'direct': False }) except: pass try: links_text = html.findAll(text=re.compile( '"?file"?\s*:\s*"(.+?)"\s*,\s*"?label"?\s*:\s*"(.+?)"' )) if len(links_text) > 0: for link_text in links_text: try: links = re.findall( '"?file"?\s*:\s*"([^"]+)"\s*,\s*"?label"?\s*:\s*"(\d+)p?[^"]*"', link_text) for link in links: video_url = link[0] if not video_url.startswith('http'): video_url = 'http:%s' % video_url try: req = requests.head( video_url, headers=headers) if req.headers['Location'] != "": video_url = req.headers[ 'Location'] except: pass quality = link[1] sources.append({ 'source': 'google video', 'quality': quality, 'scraper': self.name, 'url': video_url, 'direct': True }) except: continue except: pass except: pass except: pass return sources
import xbmc,xbmcaddon,time import re import requests from nanscrapers.common import clean_title,clean_search,random_agent,send_log,error_log from ..scraper import Scraper dev_log = xbmcaddon.Addon('script.module.nanscrapers').getSetting("dev_log") headers = {"User-Agent": random_agent()} class freemusic(Scraper): domains = ['freemusicdownloads'] name = "Freemusic" sources = [] def __init__(self): self.base_link = 'http://down.freemusicdownloads.world/' self.sources = [] if dev_log=='true': self.start_time = time.time() def scrape_music(self, title, artist, debrid=False): try: song_search = clean_title(title.lower()).replace(' ','+') artist_search = clean_title(artist.lower()).replace(' ','+') start_url = '%sresults?search_query=%s+%s' %(self.base_link,artist_search,song_search) html = requests.get(start_url, headers=headers, timeout=20).content match = re.compile('<h4 class="card-title">.+?</i>(.+?)</h4>.+?id="(.+?)"',re.DOTALL).findall(html) count = 0 for m, link in match:
def sources(self, url): sources = [] try: # print("ONEMOVIES SOURCES", url) if url == None: return sources referer = url headers = {'User-Agent': random_agent()} url = url.replace('/watching.html', '') html = requests.get(url, headers=headers).content # print ("ONEMOVIES Source", html) try: url, episode = re.findall('(.+?)\?episode=(\d*)$', url)[0] except: episode = None vid_id = re.findall('-(\d+)', url)[-1] # print ("ONEMOVIES", vid_id) quality = re.findall('<span class="quality">(.*?)</span>', html) quality = str(quality) if quality == 'cam' or quality == 'ts': quality = 'CAM' elif quality == 'hd': quality = '720' else: quality = '480' try: headers = {'X-Requested-With': 'XMLHttpRequest'} headers['Referer'] = referer headers['User-Agent'] = random_agent() u = urlparse.urljoin(self.base_link, self.server_link % vid_id) # print("SERVERS", u) r = BeautifulSoup(requests.get(u, headers=headers).content) # print("SERVERS", r) containers = r.findAll('div', attrs={'class': 'les-content'}) for result in containers: links = result.findAll('a') # print("ONEMOVIES", links) for link in links: title = str(link['title']) # print("ONEMOVIES TITLE", title) if not episode == None: title = re.findall('Episode\s+(\d+):', title)[0] title = '%01d' % int(title) if title == episode: episode_id = str(link['episode-id']) # print("ONEMOVIES EPISODE", episode_id) else: continue else: episode_id = str(link['episode-id']) onclick = str(link['onclick']) key_gen = ''.join(random.choice(string.ascii_lowercase + string.digits) for x in range(16)) ################# FIX FROM MUCKY DUCK & XUNITY TALK ################ key = '87wwxtp3dqii' key2 = '7bcq9826avrbi6m49vd7shxkn985mhod' cookie = hashlib.md5(episode_id + key).hexdigest() + '=%s' % key_gen a = episode_id + key2 b = key_gen i = b[-1] h = b[:-1] b = i + h + i + h + i + h hash_id = uncensored(a, b) ################# FIX FROM MUCKY DUCK & XUNITY TALK ################ serverurl = self.base_link + '/ajax/v2_get_sources/' + episode_id + '?hash=' + urllib.quote( hash_id) # print ("playurl ONEMOVIES", serverurl) headers = {'Accept-Language': 'en-US', 'Cookie': cookie, 'Referer': referer, 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.89 Safari/537.36', 'X-Requested-With': 'XMLHttpRequest'} # print ("playurl ONEMOVIES", headers) result = requests.get(serverurl, headers=headers).content # print ("RESULT ONEMOVIES", result) result = result.replace('\\', '') # print ("ONEMOVIES Result", result) url = re.findall('"?file"?\s*:\s*"(.+?)"', result) url = [googletag(i) for i in url] url = [i[0] for i in url if len(i) > 0] u = [] try: u += [[i for i in url if i['quality'] == '1080p'][0]] except: pass try: u += [[i for i in url if i['quality'] == '720'][0]] except: pass try: u += [[i for i in url if i['quality'] == '480'][0]] except: pass url = replaceHTMLCodes(u[0]['url']) quality = googletag(url)[0]['quality'] # print ("ONEMOVIES PLAY URL", quality, url) sources.append({'source': 'google video', 'quality': quality, 'scraper': self.name, 'url': url, 'direct': True}) except: pass except: pass return sources
def sources(self, url): sources = [] try: if url == None: return sources if not self.base_link in url: url = urlparse.urljoin(self.base_link, url) content = re.compile('(.+?)\?episode=\d*$').findall(url) video_type = 'movie' if len(content) == 0 else 'episode' try: url, episode = re.compile('(.+?)\?episode=(\d*)$').findall( url)[0] except: pass headers = {'User-Agent': random_agent()} html = self.scraper.get(url, headers=headers, timeout=30).content try: compressedstream = StringIO.StringIO(html) html = gzip.GzipFile(fileobj=compressedstream).read() html = BeautifulSoup(html) except: html = BeautifulSoup(html) links = html.findAll('a', attrs={'target': 'EZWebPlayer'}) for link in links: href = replaceHTMLCodes(link['href']) if not "get.php" in href: continue if video_type == 'episode': link_episode_number = re.compile('(\d+)').findall( link.string) if len(link_episode_number) > 0: link_episode_number = link_episode_number[-1] if not link_episode_number == '%01d' % int(episode): continue referer = url headers = {'User-Agent': random_agent(), 'Referer': referer} html = self.scraper.get(href, headers=headers, timeout=30).content source = re.findall('sources\s*:\s*\[(.+?)\]', html)[0] files = re.findall( '"file"\s*:\s*"(.+?)".+?"label"\s*:\s*"(.+?)"', source) if files: quality_url_pairs = [{ 'url': file[0], 'quality': file[1][:-1] } for file in files] else: files = re.findall('"file"\s*:\s*"(.+?)".+?}', source) quality_url_pairs = [{ 'url': file, 'quality': "SD" } for file in files] for pair in quality_url_pairs: sources.append({ 'source': 'google video', 'quality': pair['quality'], 'scraper': self.name, 'url': pair['url'], 'direct': True }) except: pass return sources
def get_imdb_title(self, imdb): headers = {'User-Agent': random_agent(), 'Accept-Language': 'es-es'} html = BeautifulSoup(requests.get('http://www.imdb.com/title/%s' % imdb, headers=headers).content) html_title = html.findAll('title')[0].text.encode('utf-8') imdb_title = re.sub('(?:\(||\(TV Series\s|\s)\d{4}.+', '', html_title).strip() return imdb_title
def sources(self, url): sources = [] alt_links = [] play_links = [] try: if url == None: return sources headers = {'User-Agent': random_agent()} mainpage = requests.get(url, headers=headers) html = BeautifulSoup(requests.get(url, headers=headers).content) try: film_quality = re.findall('<div class="poster-qulabel">(.*?)</div>', mainpage)[0] print ("MOVIEGO film_quality", film_quality) if "1080" in film_quality: quality = "1080" elif "720" in film_quality: quality = "720" else: quality = "SD" url = re.findall('file:\s+"([^"]+)"', mainpage)[0] url = url.encode('utf-8') sources.append({'source': 'CDN', 'quality': quality, 'scraper': self.name, 'url': url, 'direct': True}) except: pass iframe = html.findAll("iframe")[0] original_frame = iframe['src'] iframe_html = BeautifulSoup(requests.get(iframe["src"], headers=headers).content) scripts = iframe_html.findAll("script") unpacked_script = "" for script in scripts: try: unpacked_script += unpack(script.text) except: pass try: alternative_links = re.findall('Alternative (\d+)<', unpacked_script) for alts in alternative_links: alt_links.append(alts) except: pass # print ("MOVIEGO ALTS", alt_links) links = re.findall('<source src="(.*?)"', unpacked_script) if links: for link_url in links: if "google" in link_url: link_url = link_url.replace(' ', '') play_links.append(link_url) except: pass try: for ids in alt_links: headers = {'User-Agent': random_agent()} alt_frames = original_frame + "?source=a" + ids alt_iframe_html = BeautifulSoup(requests.get(alt_frames, headers=headers).content) alt_scripts = alt_iframe_html.findAll("script") unpacked_script = "" for script in alt_scripts: try: unpacked_script += unpack(script.text) except: pass links = re.findall('<source src="(.*?)"', unpacked_script) if links: for link_url in links: if "google" in link_url: link_url = link_url.replace(' ', '') play_links.append(link_url) except: pass ############# DUPLICATES CHECK ################ try: dupes = [] for url in play_links: if not url in dupes: dupes.append(url) print ("MOVIEGO PLAY url", url) quality = googletag(url)[0]['quality'] url = url.encode('utf-8') sources.append({'source': 'google video', 'quality': quality, 'scraper': self.name, 'url': url, 'direct': True}) except: pass return sources
def sources(self, url): sources = [] try: if url == None: return sources absolute_url = urlparse.urljoin(self.base_link, url) headers = {'User-Agent': random_agent()} html = BeautifulSoup(requests.get(absolute_url, headers=headers, timeout=30).content) pages = [] embed = html.findAll('div', attrs={'id': 'embed'})[0] pages.append(embed.findAll('iframe')[0]["src"]) for page in pages: try: if not page.startswith('http'): page = 'http:%s' % page html = BeautifulSoup(requests.get(page, headers=headers, timeout=30).content) # captions = html.findAll(text=re.compile('kind\s*:\s*(?:\'|\")captions(?:\'|\")')) # if not captions: break try: link_text = html.findAll(text=re.compile('url\s*:\s*\'(http(?:s|)://api.pcloud.com/.+?)\''))[0] link = re.findall('url\s*:\s*\'(http(?:s|)://api.pcloud.com/.+?)\'', link_text)[0] variants = json.loads(requests.get(link, headers=headers, timeout=30).content)['variants'] for variant in variants: if 'hosts' in variant and 'path' in variant and 'height' in variant: video_url = '%s%s' % (variant['hosts'][0], variant['path']) heigth = variant['height'] if not video_url.startswith('http'): video_url = 'http://%s' % video_url sources.append( {'source': 'cdn', 'quality': str(heigth), 'scraper': self.name, 'url': video_url, 'direct': False}) except: pass try: links_text = html.findAll( text=re.compile('"?file"?\s*:\s*"(.+?)"\s*,\s*"?label"?\s*:\s*"(.+?)"')) if len(links_text) > 0: for link_text in links_text: try: links = re.findall('"?file"?\s*:\s*"([^"]+)"\s*,\s*"?label"?\s*:\s*"(\d+)p?[^"]*"', link_text) for link in links: video_url = link[0] if not video_url.startswith('http'): video_url = 'http:%s' % video_url try: req = requests.head(video_url, headers=headers) if req.headers['Location'] != "": video_url = req.headers['Location'] except: pass quality = link[1] sources.append( {'source': 'google video', 'quality': quality, 'scraper': self.name, 'url': video_url, 'direct': True}) except: continue except: pass except: pass except: pass return sources
def sources(self, url): sources = [] try: if url == None: return sources referer = urlparse.urljoin(self.base_link, url) headers = {} headers['Referer'] = referer headers['User-Agent'] = random_agent() html = requests.get(referer, headers=headers, timeout=30).content player_id = re.compile('var\s*view_id\s*=\s*"(\d*)"').findall( html)[0] player_url = self.player_link % player_id player_html = requests.get(player_url, headers=headers, timeout=30).content player_html_parsed = BeautifulSoup(player_html) try: video_url = player_html_parsed.findAll('iframe')[-1]['src'] if 'openload' in video_url: host = 'openload.co' direct = False video_url = [{'url': video_url, 'quality': 'HD'}] elif 'ok.ru' in video_url: host = 'vk' direct = True video_url = odnoklassniki(video_url) elif 'vk.com' in video_url: host = 'vk' direct = True video_url = vk(video_url) else: raise Exception() for i in video_url: sources.append({ 'source': host, 'quality': i['quality'], 'scraper': self.name, 'url': i['url'], 'direct': direct }) except: pass try: links = re.compile( '"?file"?\s*:\s*"([^"]+)"\s*,\s*"?label"?\s*:\s*"(\d+)p?"' ).findall(player_html) for link in links: sources.append({ 'source': 'google video', 'quality': link[1], 'scraper': self.name, 'url': link[0], 'direct': True }) except: pass except: pass return sources
def sources(self, url, quality): sources = [] try: headers = { 'User-Agent': random_agent(), 'X-Requested-With': 'XMLHttpRequest', 'Referer': url } html = BeautifulSoup( requests.get(url, headers=headers, timeout=30).content) servers = html.findAll( "span", attrs={'class': re.compile(".*?btn-eps.*?")}) for server in servers: try: server_url = '/demo.php?v=%s' % server["link"] server_url = urlparse.urljoin(self.base_link, server_url) server_html = requests.get(server_url, headers=headers, timeout=30).content links = [] try: links.extend( re.findall(r'sources: \[ \{file: "(.*?)"', server_html, re.I | re.DOTALL)) except: pass try: links.extend( re.findall(r'<source.*?src="(.*?)"', server_html, re.I | re.DOTALL)) except: pass try: links.extend( re.findall(r'<iframe.*?src="(.*?)"', server_html, re.I | re.DOTALL)) except: pass for link in links: try: link_source = link.replace('../view.php?', 'view.php?').replace( './view.php?', 'view.php?') if not link_source.startswith('http'): link_source = urlparse.urljoin( self.base_link, link_source) if "m4u" in link_source: try: req = requests.head(link_source, headers=headers) if req.headers['Location'] != "": link_source = req.headers['Location'] except: pass if 'google' in link_source: quality = googletag(link_source)[0]['quality'] sources.append({ 'source': 'google video', 'quality': quality, 'scraper': self.name, 'url': link_source, 'direct': True }) elif 'openload.co' in link_source: sources.append({ 'source': 'openload.co', 'quality': quality, 'scraper': self.name, 'url': link_source, 'direct': False }) else: sources.append({ 'source': 'M4U', 'quality': quality, 'scraper': self.name, 'url': link_source, 'direct': True }) except: continue except: continue except: pass return sources
def sources(self, url): sources = [] try: if not url.startswith('http://'): url = urlparse.urljoin(self.base_link, url) headers = {'User-Agent': random_agent()} html = BeautifulSoup(requests.get(url, headers=headers).content) headers['Referer'] = url player_iframe_url = html.findAll("iframe")[0]["src"] html = BeautifulSoup(requests.get(player_iframe_url, headers=headers).content) buttons = html.findAll('div', attrs={'id': 'botones'})[0] player_links = buttons.findAll('a') for player_link in player_links: try: href = player_link["href"] if "thevideos.tv" in href: sources.append( {'source': 'thevideos.tv', 'quality': 'SD', 'scraper': self.name, 'url': href, 'direct': False}) continue elif "openload.co" in href: sources.append( {'source': 'openload.co', 'quality': 'SD', 'scraper': self.name, 'url': href, 'direct': False}) continue elif "pelispedia" in href: headers["referrer"] = player_iframe_url html = requests.get(href, headers=headers).content try: html_sources = re.findall('sources\s*:\s*\[(.+?)\]', html) for source in html_sources: files = re.findall('"file"\s*:\s*"(.+?)"', source) for file in files: file = file.split()[0].replace('\\/', '/') sources.append( {'source': 'google video', 'quality': googletag(file)[0]['quality'], 'scraper': self.name, 'url': file, 'direct': True}) except: pass try: headers["referrer"] = href headers['X-Requested-With'] = 'XMLHttpRequest' gks_url = urlparse.urljoin(self.base_link, '/Pe_flv_flsh/plugins/gkpluginsphp.php') post = {'link': re.findall('gkpluginsphp.*?link\s*:\s*"([^"]+)', html)[0]} episode_link = json.loads(requests.post(gks_url, data=post, headers=headers).content)['link'] sources.append( {'source': 'google video', 'quality': 'SD', 'scraper': self.name, 'url': episode_link, 'direct': True}) except: pass try: headers['X-Requested-With'] = 'XMLHttpRequest' post_parameters = re.findall('var\s+parametros\s*=\s*"([^"]+)', html)[0] post_pic = urlparse.parse_qs(urlparse.urlparse(post_parameters).query)['pic'][0] post = {'sou': 'pic', 'fv': '21', 'url': post_pic} protected_url = urlparse.urljoin(self.base_link, '/Pe_Player_Html5/pk/pk/plugins/protected.php') episode_link = json.loads(requests.post(protected_url, data=post, headers=headers))[0]["link"] sources.append( {'source': 'cdn', 'quality': 'SD', 'scraper': self.name, 'url': episode_link, 'direct': True}) except: pass except: continue return sources except: pass return sources