def get(url, check, headers=None, data=None): if headers is None: headers = { 'User-Agent': client.randomagent(), } try: html = client.request(url, headers=headers, post=data) if check in str(html): return html except: pass try: new_url = get_proxy_url() % urllib.quote_plus(url) headers['Referer'] = 'http://%s/' % urlparse.urlparse(new_url).netloc html = client.request(new_url, headers=headers, timeout=10) if check in html: return html except: pass try: new_url = get_proxy_url() % urllib.quote_plus(url) headers['Referer'] = 'http://%s/' % urlparse.urlparse(new_url).netloc html = client.request(new_url, headers=headers, timeout=10) if check in html: return html except: pass return
def scrape_movie(self, title, year, imdb, debrid = False): try: query = cleantitle.get(title) query = '/watch?v=%s_%s' % (query.replace(' ','_'),year) query = urlparse.urljoin(self.base_link, query) headers = {'User-Agent':self.userAgent} result = client.request(query, headers=headers) varid = re.compile('var frame_url = "(.+?)"',re.DOTALL).findall(result)[0].replace('/embed/','/streamdrive/info/') res_chk = re.compile('class="title"><h1>(.+?)</h1>',re.DOTALL).findall(result)[0] varid = 'http:'+varid holder = client.request(varid,headers=headers).content links = re.compile('"src":"(.+?)"',re.DOTALL).findall(holder) count = 0 for link in links: link = link.replace('\\/redirect?url=','') link = urllib.unquote(link).decode('utf8') if '1080' in res_chk: res= '1080p' elif '720' in res_chk: res='720p' else: res='DVD' count +=1 self.srcs.append({'source': 'Googlelink','parts' : '1', 'quality': res,'scraper': self.name,'url':link,'direct': False}) return self.srcs except Exception as e: logger.error('[%s] Exception : %s' % (self.__class__, e)) pass return []
def resolve(url, allowDebrid=False): u = url url = False # Custom Resolvers try: host = client.host(u) r = [i['class'] for i in info() if host in i['host']][0] r = __import__(r, globals(), locals(), [], -1) url = r.resolve(u) if url == False: raise Exception() except: pass # URLResolvers try: if not url == False: raise Exception() logger.debug('Trying URL Resolver for %s' % u, __name__) hmf = urlresolver.HostedMediaFile(url=u, include_disabled=True, include_universal=allowDebrid) if hmf.valid_url() == True: url = hmf.resolve() else: url = False except: pass try: headers = url.rsplit('|', 1)[1] except: headers = '' headers = urllib.quote_plus(headers).replace('%3D', '=').replace( '%26', '&') if ' ' in headers else headers headers = dict(urlparse.parse_qsl(headers)) if url.startswith('http') and '.m3u8' in url: result = client.request(url.split('|')[0], headers=headers, output='geturl', timeout='20') if result == None: raise Exception() elif url.startswith('http'): result = client.request(url.split('|')[0], headers=headers, output='chunk', timeout='20') if result == None: logger.debug('Resolved %s but unable to play' % url, __name__) raise Exception() return url
def sources(self, url): logger.debug('SOURCES URL %s' % url, __name__) try: srcs = [] if url == None: return srcs #url = urlparse.urljoin(self.base_link, url) try: result = client.request(url, referer=self.base_link) except: result = '' result = result.decode('iso-8859-1').encode('utf-8') result = result.replace('\n', '').replace('\t', '') result = client.parseDOM( result, "div", attrs={"class": "entry-content clearfix single-post-content"}) result = client.parseDOM(result, "p", attrs={"style": "text-align: center;"}) items = client.parseDOM(result, "a", ret="href") for item in items: try: url = item if 'digibolly.se' in url: result = client.request(url) url = re.findall('<iframe src="(.+?)"', result, re.IGNORECASE)[0] host = client.host(url) srcs.append({ 'source': host, 'parts': '1', 'quality': 'HD', 'scraper': self.name, 'url': url, 'direct': False }) except: pass logger.debug('SOURCES [%s]' % srcs, __name__) return srcs except Exception as e: logger.error('[%s] Exception : %s' % (self.__class__, e)) return srcs
def sources(self, url): logger.debug('SOURCES URL %s' % url, __name__) try: srcs = [] if url == None: return srcs url = urlparse.urljoin(self.base_link, url) try: result = client.request(url, referer=self.base_link) except: result = '' result = result.decode('iso-8859-1').encode('utf-8') result = result.replace('\n','').replace('\t','') items = client.parseDOM(result, "div", attrs={"class":"entry-content"}) for item in items: try : url = re.compile('(SRC|src|data-config)=[\'|\"](.+?)[\'|\"]').findall(item)[0][1] host = client.host(url) srcs.append({'source': host, 'parts' : '1', 'quality': 'HD', 'scraper': self.name, 'url': url, 'direct':False}) except : pass logger.debug('SOURCES [%s]' % srcs, __name__) return srcs except Exception as e: logger.error('[%s] Exception : %s' % (self.__class__, e)) return srcs
def sources(self, url): logger.debug('SOURCES URL %s' % url, __name__) try: if url == None: return self.srcs try: result = client.request(url, referer=self.base_link) except: result = '' items = client.parseDOM(result, "source", ret="src") for item in items: try: url = item host = client.host(url) self.srcs.append({ 'source': host, 'parts': '1', 'quality': 'HD', 'scraper': self.name, 'url': url, 'direct': False }) except: pass logger.debug('SOURCES [%s]' % self.srcs, __name__) return self.srcs except Exception as e: logger.error('[%s] Exception : %s' % (self.__class__, e)) return self.srcs
def scrape_movie(self, title, year, imdb, debrid = False): try: t = cleantitle.get(title) try: query = '%s %s' % (title, year) query = base64.b64decode(self.search_link) % urllib.quote_plus(query) result = client.request(query) result = json.loads(result)['items'] r = [(i['link'], i['title']) for i in result] r = [(i[0], re.compile('(.+?) [\d{4}|(\d{4})]').findall(i[1])) for i in r] r = [(i[0], i[1][0]) for i in r if len(i[1]) > 0] r = [x for y,x in enumerate(r) if x not in r[:y]] r = [i for i in r if t == cleantitle.get(i[1])] #u = [i[0] for i in r][0] if r == None: raise Exception except: return return self.sources(r) except Exception as e: logger.error('[%s] Exception : %s' % (self.__class__, e)) pass return []
def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb, debrid=False): try: query = '%s %s' % (title, episode) query = self.search_link % (urllib.quote_plus(query)) try: result = client.request(self.base_link + query) except: result = '' result = result.decode('iso-8859-1').encode('utf-8') result = result.replace('\n', '').replace('\t', '') items = client.parseDOM(result, 'content:encoded')[0] items = re.compile('class=\"single-heading\">(.+?)<span').findall( items) for i in range(0, len(items)): self.source(items[i]) return self.srcs except: return self.srcs
def scrape_movie(self, title, year, imdb, debrid=False): try: query = '%s %s' % (title, year) query = self.search_link % (urllib.quote_plus(query)) query = urlparse.urljoin(self.base_link, query) result = client.request(query, error=True) items = client.parseDOM(result, "item") cleanedTitle = cleantitle.get(title) for item in items: linkTitle = client.parseDOM(item, "title")[0] if cleanedTitle in cleantitle.get(linkTitle): url = client.parseDOM(item, "a", attrs={"rel": "nofollow"}, ret="href")[0] break return self.sources(client.replaceHTMLCodes(url)) except Exception as e: logger.error('[%s] Exception : %s' % (self.__class__, e)) pass return []
def scrape_movie(self, title, year, imdb, debrid = False): try: langMap = {'hi':'hindi', 'ta':'tamil', 'te':'telugu', 'ml':'malayalam', 'kn':'kannada', 'bn':'bengali', 'mr':'marathi', 'pa':'punjabi'} lang = 'http://www.imdb.com/title/%s/' % imdb lang = client.request(lang) lang = re.findall('href\s*=\s*[\'|\"](.+?)[\'|\"]', lang) lang = [i for i in lang if 'primary_language' in i] lang = [urlparse.parse_qs(urlparse.urlparse(i).query) for i in lang] lang = [i['primary_language'] for i in lang if 'primary_language' in i] lang = langMap[lang[0][0]] q = self.search_link % (lang, urllib.quote_plus(title)) q = urlparse.urljoin(self.base_link, q) t = cleantitle.get(title) r = self.request(q) r = client.parseDOM(r, 'li') r = [(client.parseDOM(i, 'a', ret='href'), client.parseDOM(i, 'h3'), client.parseDOM(i, 'div', attrs = {'class': 'info'})) for i in r] r = [(i[0][0], i[1][0], i[2][0]) for i in r if i[0] and i[1] and i[2]] r = [(re.findall('(\d+)', i[0]), i[1], re.findall('(\d{4})', i[2])) for i in r] r = [(i[0][0], i[1], i[2][0]) for i in r if i[0] and i[2]] r = [i[0] for i in r if t == cleantitle.get(i[1]) and year == i[2]][0] url = str(r) return self.sources(client.replaceHTMLCodes(url)) except Exception as e: logger.error('[%s] Exception : %s' % (self.__class__, e)) pass return []
def scrape_movie(self, title, year, imdb, debrid=False): try: query = '%s %s' % (title, year) query = self.search_link % (urllib.quote_plus(query)) query = urlparse.urljoin(self.base_link, query) result = client.request(query) result = result.decode('iso-8859-1').encode('utf-8') result = client.parseDOM(result, "item") cleanedTitle = cleantitle.get(title) for item in result: linkTitle = client.parseDOM(item, "title")[0] if cleanedTitle == cleantitle.get(linkTitle): url = client.parseDOM(item, "link")[0] break return self.sources(client.replaceHTMLCodes(url)) except Exception as e: logger.error('[%s] Exception : %s' % (self.__class__, e)) pass return []
def scrape_movie(self, title, year, imdb, debrid=False): try: query = '%s' % (title) query = self.search_link % (urllib.quote_plus(query)) query = urlparse.urljoin(self.base_link, query) cleanedTitle = cleantitle.get(title) result = client.request(query) result = result.decode('iso-8859-1').encode('utf-8') items = client.parseDOM(result, "item") for item in items: linkTitle = client.parseDOM(item, 'title')[0] try: parsed = re.compile('(.+?) \((\d{4})\) ').findall( linkTitle)[0] parsedTitle = parsed[0] parsedYears = parsed[1] except: parsedTitle = '' pass if cleanedTitle == cleantitle.get(parsedTitle): url = client.parseDOM(item, "link")[0] return self.sources(client.replaceHTMLCodes(url)) except Exception as e: logger.error('[%s] Exception : %s' % (self.__class__, e)) pass return []
def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb, debrid=False): try: query = '%s %s' % (title, episode) query = self.search_link % (urllib.quote_plus(query)) result = client.request(self.base_link + query) result = result.decode('iso-8859-1').encode('utf-8') result = result.replace('\n', '').replace('\t', '') items = client.parseDOM(result, 'item') cleanedTitle = cleantitle.get('%s %s' % (title, episode)) for item in items: linkTitle = client.parseDOM(item, 'title')[0] linkTitle = cleantitle.get(linkTitle).replace( 'watchonlineepisodehd', '') if cleanedTitle == linkTitle: url = client.parseDOM(item, "link")[0] break return self.sources(client.replaceHTMLCodes(url)) except: return self.srcs
def resolve(url): try: result = client.request(url) dek = EnkDekoder.dekode(result) if not dek == None: url = client.parseDOM(dek, "param", attrs={"name": "flashvars"}, ret="value")[0] else: dek = result url = re.compile('file*:*"(http.+?)"').findall(dek)[0] if re.search(';video_url', url): url = re.findall(';video_url=(.+?)&', url)[0] elif re.search('iframe src=', url): url = re.findall('<iframe src="(.+?)"', url)[0] url = url.replace('_ipod.mp4', '.flv') url = url.replace('preview', 'edit') logger.debug('URL [%s]' % url, __name__) return url except: return False
def sources(self, url): try: logger.debug('SOURCES URL %s' % url, __name__) quality = 'HD' srcs = [] try: result = client.request(url) except: result = '' result = result.decode('iso-8859-1').encode('utf-8') result = result.replace('\n','').replace('\t','') result = client.parseDOM(result, "div", attrs={"class": "single-post-video"})[0] items = re.compile('(SRC|src|data-config)=[\'|\"](.+?)[\'|\"]').findall(result) for item in items: if item[1].endswith('png'): continue host = client.host(item[1]) url = item[1] parts = [url] srcs.append({'source':host, 'parts': len(parts), 'quality':quality,'scraper':self.name,'url':"##".join(parts), 'direct':False}) logger.debug('SOURCES [%s]' % srcs, __name__) return srcs except: return srcs
def getTVShowPosterFromGoogle(self, showName, retry): if retry == 0: return '' #baseURL = 'https://ajax.googleapis.com/ajax/services/search/images?v=1.0&q={query}' keyBing = 'btcCcvQ4Sfo9P2Q7u62eOREA1NfLEQPezqCNb+2LVhY' # get Bing key from: https://datamarket.azure.com/account/keys credentialBing = 'Basic ' + (':%s' % keyBing).encode( 'base64' )[:-1] # the "-1" is to remove the trailing "\n" which encode adds headers = {} headers['Authorization'] = credentialBing baseURL = 'https://api.datamarket.azure.com/Bing/Search/v1/Image?Query=%27{query}%27&$format=json' query = showName.lower() + ' poster' url = baseURL.format(query=urllib.quote_plus(query)) try: result = client.request(url, headers=headers) results = json.loads(result)['d']['results'] for image_info in results: iconImage = image_info['MediaUrl'] break if iconImage is not None: return iconImage else: return '0' except: return self.getTVShowPosterFromGoogle(showName, retry - 1) return ''
def resolve(url): try: result = client.request(url) url = re.findall('file: "(.+?)"',result)[0] logger.debug('URL [%s]' % url, __name__) return url except: return
def resolve(url): try: url = 'http://playu.net/embed-%s.html' % str(getVideoID(url)) result = client.request(url) url = re.findall('file: "(.+?)"', result)[0] logger.debug('URL [%s]' % url, __name__) return url except: return False
def sources(self, url): try: logger.debug('SOURCES URL %s' % url, __name__) if url == None: return [] data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] cleanedTitle = cleantitle.get(title) hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s S%02dE%02d' % (data['tvshowtitle'], int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else '%s %s' % (data['title'], data['year']) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) r = client.request(url) posts = client.parseDOM(r, 'item') items = [] for post in posts: try: t = client.parseDOM(post, 'title')[0] post = post.replace('\n','').replace('\t','') post = re.compile('<span style="color: #ff0000">Single Link</b></span><br />(.+?)<span style="color: #ff0000">').findall(post)[0] u = re.findall('<a href="(http(?:s|)://.+?)">', post) items += [(t, i) for i in u] except: pass for item in items: try: name = client.replaceHTMLCodes(item[0]) linkTitle = re.sub('(\.|\(|\[|\s)(\d{4}|S\d*E\d*|S\d*|3D)(\.|\)|\]|\s|)(.+|)', '', name) if not cleanedTitle == cleantitle.get(linkTitle): raise Exception() year = re.findall('[\.|\(|\[|\s](\d{4}|S\d*E\d*|S\d*)[\.|\)|\]|\s]', name)[-1].upper() if not year == hdlr: raise Exception() self.source(item) except: pass logger.debug('SOURCES [%s]' % self.srcs, __name__) return self.srcs except: return self.srcs
def source(self, url): logger.debug('SOURCES URL %s' % url, __name__) try: srcs = [] if url == None: return srcs result, response_code, response_headers, headers, cookie = client.request(url, output='extended') result = result.replace('\n','').replace('\t','').replace('\r','') referer = headers.get('Referer') result = client.parseDOM(result, 'div', attrs={"class":"detail ls_item"})[0] link = client.parseDOM(result, 'div', attrs={"class":"loaer_detai"})[0] link = client.parseDOM(link, 'a', ret='href')[0] link = urlparse.urljoin(referer, link) result = client.request(link) result = re.compile('sources:\s\[(.+?)\]').findall(result)[0] result = '[%s]' % result result = json.loads(result) for item in result: url = item.get('file') label = item.get('label') if '1080p' in label: quality = '1080p' elif '720p' in label : quality = 'HD' elif '360p' in label: quality = 'SD' else: quality = 'SCR' host = client.host(url) srcs.append({'source': host, 'parts' : '1', 'quality': quality, 'scraper': self.name, 'url': url, 'direct': False}) logger.debug('SOURCES [%s]' % srcs, __name__) return srcs except : return srcs
def source(self, item): try: try: if '720p' in item: quality = 'HD' else: quality = 'SD' urls = client.parseDOM(item, "a", ret="href") for j in range(0, len(urls)): videoID = self.getVideoID(urls[j]) result = client.request(self.info_link % videoID) result = result.decode('iso-8859-1').encode('utf-8') item = client.parseDOM( result, name="div", attrs={ "style": "float:none;height:700px;margin-left:200px" })[0] rUrl = re.compile( '(SRC|src|data-config)=[\'|\"](.+?)[\'|\"]').findall( item)[0][1] if not rUrl.startswith('http:') and not rUrl.startswith( 'https:'): rUrl = '%s%s' % ('http:', rUrl) urls[j] = rUrl host = client.host(urls[0]) url = "##".join(urls) self.srcs.append({ 'source': host, 'parts': str(len(urls)), 'quality': quality, 'scraper': self.name, 'url': url, 'direct': False }) urls = [] except Exception as e: logger.error(e) pass except: return self.srcs
def get_raw(url, headers=None, data=None): if headers is None: headers = { 'User-Agent': client.randomagent(), } try: new_url = get_proxy_url() % urllib.quote_plus(url) headers['Referer'] = 'http://%s/' % urlparse.urlparse(new_url).netloc response = client.request(new_url, headers=headers, timeout=10) return response except: pass
def resolve(url): try: rUrl = None hdUrl = None try: result = client.request(url) rUrl = client.parseDOM(result, name="source", ret="src")[0] videoId = getVideoID(rUrl) rUrl = 'http://www.apnasave.in/media/player/config_embed.php?vkey=%s' % videoId result = client.request(rUrl) try: hdUrl = client.parseDOM(result, name="hd")[0] url = hdUrl except: pass if hdUrl == None: url = client.parseDOM(result, name="src")[0] except: pass logger.debug('URL [%s]' % url, __name__) return url except: return False
def resolve(url): try: result = client.request(url) url = re.findall('sources: \[(.+?)\]', result)[0] url = url.split(',') for i in url: i = i.replace('\"', '') if 'mp4' in i: url = i break logger.debug('URL [%s]' % url, __name__) return url except Exception as e: return False
def resolve(url): res = ['720', '480', '360', '240'] try: result = client.request(url) for r in res: try: url = client.parseDOM(result, name="source", attrs={"res": r}, ret="src")[0] break except: pass logger.debug('URL [%s]' % url, __name__) return url except Exception as e: return False
def source(self, item): title = item[0] links = item[1] urls = [] if '720p' in title: quality = 'HD' else: quality = 'SD' parts = client.parseDOM(links, "a", ret="href") srcs = [] for part in parts: try: part = client.request(part) part = part.decode('iso-8859-1').encode('utf-8') part = client.parseDOM( part, "td", attrs={ "style": "vertical-align:middle;text-align:center;" })[0] tUrl = re.compile('(SRC|src|data-config)=[\'|\"](.+?)[\'|\"]' ).findall(part)[0][1] host = client.host(tUrl) urls.append(tUrl) except Exception as e: logger.error(e) pass url = "##".join(urls) srcs.append({ 'source': host, 'parts': len(urls), 'quality': quality, 'scraper': self.name, 'url': url, 'direct': False }) return srcs
def resolve(url): try: result = client.request(url) packed = re.search('(eval\(function.*?)\s*</script>', result, re.DOTALL) if packed: js = jsunpack.unpack(packed.group(1)) else: js = result link = re.search('file\s*:\s*[\'|"]([^\'|"]+)', js) if link: url = link.group(1) else: url = None logger.debug('URL [%s]' % url, __name__) return url except Exception as e: return False
def sources(self, url): logger.debug('SOURCES URL %s' % url, __name__) try: quality = '' srcs = [] if url == None: return srcs result = client.request(url) result = result.decode('iso-8859-1').encode('utf-8') result = result.replace('\n', '') result = client.parseDOM(result, "div", attrs={"class": "post-content bottom"})[0] items = client.parseDOM(result, "p") hosts = client.parseDOM(result, "span", attrs={"style": "color: red;"}) links = [] for item in items: if 'a href' in item: links.append(item) items = zip(hosts, links) for item in items: self.srcs.extend(self.source(item)) logger.debug('SOURCES [%s]' % self.srcs, __name__) return self.srcs except Exception as e: logger.error(e) return self.srcs
def sources(self, url): logger.debug('SOURCES URL %s' % url, __name__) try: srcs = [] if url == None: return srcs url = urlparse.urljoin(self.base_link, url) try: result = client.request(url, referer=self.base_link) except: result = '' result = result.decode('iso-8859-1').encode('utf-8') result = result.replace('\n', '').replace('\t', '') result = client.parseDOM(result, "div", attrs={"id": "list-dl"}) items = client.parseDOM(result, "a", ret="href") for item in items: try: url = item host = client.host(url) srcs.append({ 'source': host, 'parts': '1', 'quality': 'HD', 'scraper': self.name, 'url': url, 'direct': False }) except: pass logger.debug('SOURCES [%s]' % srcs, __name__) return srcs except Exception as e: logger.error('[%s] Exception : %s' % (self.__class__, e)) return srcs
def sources(self, url): logger.debug('SOURCES URL %s' % url, __name__) try: srcs = [] if url == None: return srcs if 'hd' in url.lower(): quality = 'HD' else: quality = 'SD' html = client.request(url) mlink = SoupStrainer("div", {"class": "entry"}) videoclass = BeautifulSoup(html, parseOnlyThese=mlink) try: links = videoclass.findAll('iframe') for link in links: url = link.get('src') host = client.host(url) srcs.append({ 'source': host, 'parts': '1', 'quality': quality, 'scraper': self.name, 'url': url, 'direct': False }) except: pass return srcs except: return srcs