def scrape_movie(self, title, year, imdb, debrid=False): try: start_time = time.time() search_id = clean_search(title.lower()) start_url = '%s/search?query=%s' % (self.base_link, urllib.quote_plus(search_id)) #print 'scraperchk - scrape_movie - start_url: ' + start_url html = client.request(start_url) posts = client.parseDOM(html, 'div', attrs={'class': 'one_movie-item'}) for post in posts: data = dom.parse_dom(post, 'a', req='href', attrs={'class': 'movie-title'})[0] if not clean_title(title) == clean_title(data.content): continue qual = client.parseDOM(post, 'span', attrs={'data-title': 'Quality'})[0] qual = client.replaceHTMLCodes(qual) item_url = urlparse.urljoin(self.base_link, data.attrs['href']) self.get_source(item_url, title, year, start_time, qual) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources
def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb, debrid=False): try: start_time = time.time() hdlr = 'S%02dE%02d' % (int(season), int(episode)) search_id = '%s %s' % (title, hdlr) start_url = self.base_link + self.search_link % urllib.quote_plus( search_id) html = client.request(start_url) items = client.parseDOM(html, 'item') for item in items: name = client.parseDOM(item, 'title')[0] name = client.replaceHTMLCodes(name) t = name.split(hdlr)[0] if not clean_title(title) == clean_title(t): continue if not hdlr in name: continue self.get_source(item, title, year, season, episode, debrid, start_time) #print self.sources return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument)
def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb, debrid=False): try: start_time = time.time() hdlr = 'S%02dE%02d' % (int(season), int(episode)) query = clean_search(title) query = urllib.quote_plus(query + ' ' + hdlr) urls = [] for link in self.search_links: try: url = urlparse.urljoin(self.base_link, link % query) url = urlparse.urljoin(self.base_link, url) r = client.request(url) posts = client.parseDOM(r, 'tbody') posts = client.parseDOM(posts, 'tr') urls += [(client.parseDOM(i, 'button', ret='data-clipboard-text')[0]) for i in posts if i] except: pass count = 0 for url in urls: name = url.split('/')[-1].lower() name = client.replaceHTMLCodes(name).replace('%20', '') if 'movies' in url: continue if any(x in url for x in ['italian', 'dubbed', 'teaser', 'subs', 'sub', 'dub', 'samples', 'extras', 'french', 'trailer', 'trailers', 'sample']): continue t = re.sub('(\.|\(|\[|\s)(S\d+E\d+|S\d+)(\.|\)|\]|\s)(.+|)', '', name, flags=re.I) if clean_title(t) not in clean_title(title): continue y = re.findall('[\.|\(|\[|\s](S\d+E\d+|S\d+)[\.|\)|\]|\s]', name, re.I)[-1].upper() if not y == hdlr: continue res, info = quality_tags.get_release_quality(name, url) if any(x in url for x in ['hastidl', '1tehmovies', '62.210.103.107', '79.127', '213.32.113.82', 'dl5.downloadha', '89.163.255.42', '185.56.20.142', 's1.0music', 'dl3.yoozdl', 'dl4.lavinmovie.net', 'dl6.lavinmovie.net', 'dl3.upload08.com', 'dl8.uploadt.com', '163.172.6.218', 'samba.allunix.ru', 'server417']): count += 1 url += '|User-Agent=%s&Referer=%s' % (client.agent(), self.base_link) url = urllib.quote(url, '|:?/&+=_-') self.sources.append( {'source': 'DirectLink', 'quality': res, 'scraper': self.name, 'url': url, 'direct': True}) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources #filepursuit().scrape_movie('Black Panther', '2018', '')
def get_source(self, item_url, title, year, start_time, qual): try: #print 'PASSEDURL >>>>>>'+item_url count = 0 OPEN = client.request(item_url) frame = client.parseDOM(OPEN, 'iframe', ret='src')[0] if 'openload' in frame: count += 1 self.sources.append( {'source': 'openload', 'quality': qual, 'scraper': self.name, 'url': frame, 'direct': False}) extra_links = re.findall('''window.open\(['"]([^'"]+)['"]\).+?server:([^<]+)''', OPEN, re.DOTALL) for link, host in extra_links: if not filter_host(host.replace(' ', '')): continue link = client.replaceHTMLCodes(link).encode('utf-8') link = urlparse.urljoin(self.base_link, link) count += 1 self.sources.append({'source': host, 'quality': qual, 'scraper': self.name, 'url': link, 'direct': False}) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year) except Exception, argument: if dev_log == 'true': error_log(self.name, argument)
def get_source(self, item_url, title, year, season, episode, start_time): count = 0 try: if item_url is None: return self.sources qual = re.search('Quality\s*:(.+?)<br', item_url, re.DOTALL).groups()[0] qual = re.sub('<.+?>', '', qual) qual, info = quality_tags.get_release_quality(qual, qual) headers = { 'Origin': self.base_link, 'Referer': client.parseDOM(item_url, 'link')[0], 'X-Requested-With': 'XMLHttpRequest', 'User_Agent': client.agent() } fn = client.parseDOM(item_url, 'input', attrs={'name': 'FName'}, ret='value')[0] fs = client.parseDOM(item_url, 'input', attrs={'name': 'FSize'}, ret='value')[0] fsid = client.parseDOM(item_url, 'input', attrs={'name': 'FSID'}, ret='value')[0] #params = re.compile('<input name="FName" type="hidden" value="(.+?)" /><input name="FSize" type="hidden" value="(.+?)" /><input name="FSID" type="hidden" value="(.+?)"').findall(html) post_url = self.base_link + '/thanks-for-downloading/' form_data = {'FName': fn, 'FSize': fs, 'FSID': fsid} #link = requests.post(request_url, data=form_data, headers=headers).content link = client.request(post_url, post=form_data, headers=headers) stream_url = client.parseDOM(link, 'meta', attrs={'http-equiv': 'refresh'}, ret='content')[0] stream_url = client.replaceHTMLCodes(stream_url).split('url=')[-1] stream_url += '|User-Agent=%s' % urllib.quote(client.agent()) count += 1 self.sources.append({ 'source': 'DirectLink', 'quality': qual, 'scraper': self.name, 'url': stream_url, 'direct': True }) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title + ' | ' + stream_url, year, season=season, episode=episode) except: pass
def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb, debrid=False): try: start_time = time.time() #season_pull = '%02d' % int(season) #"0%s"%season if len(season)<2 else season #episode_pull = '%02d' % int(episode) #"0%s"%episode if len(episode)<2 else episode sepi = 'S%02dE%02d' % (int(season), int(episode)) search_id = '%s %s' % (title, sepi) movie_url = self.base_link + self.search_link % urllib.quote_plus( search_id) #print ' ##MOVIE URL## %s' % movie_url r = client.request(movie_url) items = client.parseDOM(r, 'article', attrs={'id': 'post-\d+'}) for item in items: name = client.parseDOM(item, 'a')[0] name = client.replaceHTMLCodes(name) t = re.sub( '(\.|\(|\[|\s)(\d{4}|S\d+E\d+|S\d+|3D)(\.|\)|\]|\s|)(.+|)', '', name, flags=re.I) if not clean_title(title).lower() in clean_title(t).lower(): continue y = re.findall('[\.|\(|\[|\s](S\d*E\d*|S\d*)[\.|\)|\]|\s]', name, flags=re.I)[-1].upper() if y not in sepi: continue link = client.parseDOM(item, 'a', ret='href')[0] if not y == sepi: link = link else: link += '2' if link.endswith('/') else '/2' #print ' ##final Item to pass## %s' % link self.get_source(link, title, year, season, episode, start_time) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument)
def scrape_movie(self, title, year, imdb, debrid=False): try: start_time = time.time() search_id = '%s %s' % (title, year) movie_url = self.base_link + self.search_link % urllib.quote_plus( search_id) r = client.request(movie_url) items = client.parseDOM(r, 'article', attrs={'id': 'post-\d+'}) #xbmc.log('@#@ITEMS:%s' % items, xbmc.LOGNOTICE) links = [] for item in items: name = client.parseDOM(item, 'a')[0] name = client.replaceHTMLCodes(name) t = re.sub( '(\.|\(|\[|\s)(\d{4}|S\d+E\d+|S\d+|3D)(\.|\)|\]|\s|)(.+|)', '', name, flags=re.I) if not clean_title(title) == clean_title(t): continue if not year in name: continue link = client.parseDOM(item, 'a', ret='href')[0] link += '/2/' links.append(link) #xbmc.log('@#@LINKS:%s' % links, xbmc.LOGNOTICE) threads = [] for i in links: threads.append( workers.Thread(self.get_source, i, title, year, '', '', str(start_time))) [i.start() for i in threads] alive = [x for x in threads if x.is_alive() is True] while alive: alive = [x for x in threads if x.is_alive() is True] time.sleep(0.1) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument)
def get_sources(self, episode_url, title, year, season, episode, start_time): try: r = client.request(episode_url) links = client.parseDOM(r, 'div', attrs={'class': 'host-link'}) try: links += client.parseDOM(r, 'IFRAME', ret='SRC')[0] except: pass count = 0 for link in links: host = client.parseDOM(link, 'span')[0] host = client.replaceHTMLCodes(host) host = host.encode('utf-8') if not filter_host(host): continue # icon('ciaHR0cDovL3d3dy5zcGVlZHZpZC5uZXQvMGZvcjBqbTYwcDdzd') # icon(\'ciaHR0cDovL3d3dy5zcGVlZHZpZC5uZXQvMGZvcjBqbTYwcDdzd\') url = re.findall('''icon\(.+?(\w+).+?\)''', link, re.DOTALL)[0] url = urlparse.urljoin(self.base_link, '/cale/%s' % url) count += 1 self.sources.append({ 'source': host, 'quality': 'DVD', 'scraper': self.name, 'url': url, 'direct': False }) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year, season=season, episode=episode) except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources
def get_source(self,url, title, year, season, episode, start_time): try: scraper = cfscrape.create_scraper() headers = {'Origin': 'http://hdpopcorns.com', 'Referer': url, 'X-Requested-With': 'XMLHttpRequest', 'User-Agent': client.agent()} count = 0 data = scraper.get(url, headers=headers).content data = client.parseDOM(data, 'div', attrs={'class': 'thecontent'})[0] FN720p = client.parseDOM(data, 'input', ret='value', attrs={'name': 'FileName720p'})[0] FS720p = client.parseDOM(data, 'input', ret='value', attrs={'name': 'FileSize720p'})[0] FSID720p = client.parseDOM(data, 'input', ret='value', attrs={'name': 'FSID720p'})[0] FN1080p = client.parseDOM(data, 'input', ret='value', attrs={'name': 'FileName1080p'})[0] FS1080p = client.parseDOM(data, 'input', ret='value', attrs={'name': 'FileSize1080p'})[0] FSID1080p = client.parseDOM(data, 'input', ret='value', attrs={'name': 'FSID1080p'})[0] post = {'FileName720p': FN720p, 'FileSize720p': FS720p, 'FSID720p': FSID720p, 'FileName1080p': FN1080p, 'FileSize1080p': FS1080p, 'FSID1080p': FSID1080p, 'x': 173, 'y': 22} data = scraper.post('%s/select-movie-quality.php' % self.base_link, data=post).content data = client.parseDOM(data, 'div', attrs={'id': 'btn_\d+p'}) u = [client.parseDOM(i, 'a', ret='href')[0] for i in data] for url in u: quality, info = quality_tags.get_release_quality(url, url) url = client.replaceHTMLCodes(url) url = url.encode('utf-8') count += 1 self.sources.append( {'source': 'DirectLink', 'quality': quality, 'scraper': self.name, 'url': url, 'direct': True}) if dev_log=='true': end_time = time.time() - start_time send_log(self.name,end_time,count,title,year, season=season,episode=episode) except: pass #hdpopcorn().scrape_movie('Blade Runner 2049', '2017', '', False) title contains 2 years #hdpopcorn().scrape_movie('Deadpool 2', '2018', '', False) title contains number
def get_sources(self, url, title, year, season, episode, start_time): try: if url is None: return self.sources count = 0 url, hdlr = url[0], url[1] main = [] try: headers = { 'User-Agent': client.agent(), 'Referer': self.base_link } scraper = cfscrape.create_scraper() data = scraper.get(url, headers=headers).content main = dom.parse_dom(data, 'div', {'class': 'postContent'}) main = [i.content for i in main] comments = dom.parse_dom(data, 'div', {'class': re.compile('content')}) main += [i.content for i in comments] except: pass for con in main: try: frames = client.parseDOM(con, 'a', ret='href') for link in frames: if 'youtube' in link: continue if any(x in link for x in ['.rar', '.zip', '.iso']) or any( link.endswith(x) for x in ['.rar', '.zip', '.iso']): continue host = re.findall( '([\w]+[.][\w]+)$', urlparse.urlparse(link.strip().lower()).netloc)[0] host = client.replaceHTMLCodes(host) host = host.encode('utf-8') if not hdlr.lower() in link.lower(): continue quality, info = quality_tags.get_release_quality( link, link) if link in str(self.sources): continue rd_domains = get_rd_domains() if host in rd_domains: count += 1 self.sources.append({ 'source': host, 'quality': quality, 'scraper': self.name, 'url': link, 'direct': False, 'debridonly': True }) except: pass if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year, season=season, episode=episode) # xbmc.log('@#@SOURCES:%s' % self._sources, xbmc.LOGNOTICE) except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources #Releasebb().scrape_movie('Black Panther', '2018', '', True)
def _get_sources(self, link, title, year, _type, season, episode, start_time): try: squery = self.query.replace('%20', '+') self.headers = { 'User-Agent': self.ua, 'Referer': self.search_referer.format(squery) } srch = cache.get(client.request, 8, self.base_link) srch = client.parseDOM(srch, 'form', ret='action', attrs={'name': 'frm'})[0] srch = srch[1:] if srch.startswith('/') else srch link = urlparse.urljoin(self.base_link, link % (srch, self.query)) r = client.request(link, headers=self.headers) posts = client.parseDOM(r, 'tbody')[0] posts = client.parseDOM(posts, 'tr') urls = [(client.parseDOM(i, 'a', ret='href')[1], client.parseDOM(i, 'a')[1], client.parseDOM(i, 'a', ret='href', attrs={'id': 'refer.+?'})[0]) for i in posts if i] count = 0 for url, name, host in urls: name = client.replaceHTMLCodes(name).replace('%20', ' ').replace( '%27', "'") if any(x in url.lower() for x in [ 'italian', 'teaser', 'bonus.disc', 'subs', 'sub', 'samples', 'extras', 'french', 'trailer', 'trailers', 'sample' ]): continue if _type == 'movie': t = name.split(year)[0] if clean_title(t) not in clean_title(title): continue y = re.findall( '[\.|\(|\[|\s\_|\-](\d{4})[\.|\)|\]|\s|\_|\-]', name, re.I)[-1].upper() if not year == y: continue else: hdlr = 'S%02dE%02d' % (int(season), int(episode)) t = name.split(hdlr)[0] if clean_title(t) not in clean_title(title): continue y = re.findall( '[\.|\(|\[|\s|\_](S\d+E\d+|S\d+)[\.|\)|\]|\s|\_]', name, re.I)[-1].upper() if not y == hdlr: continue quality, info = quality_tags.get_release_quality(name, url) info = ' | '.join(info) res = '{0} | {1}'.format(quality, info) count += 1 url = urlparse.urljoin(self.base_link, url) if url.startswith('/') else url host = host.split('/')[2] self.sources.append({ 'source': host, 'quality': res, 'scraper': self.name, 'url': url, 'direct': True }) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year) except: pass
def scrape_episode(self, title, show_year, year, season, episode, imdb, tvdb, debrid=False): try: start_time = time.time() hdlr = 'S%02dE%02d' % (int(season), int(episode)) query = clean_search(title) query = urllib.quote_plus(query + ' ' + hdlr).replace('+', '%20') urls = [] for link in self.search_links: try: url = urlparse.urljoin(self.base_link, link % query) url = urlparse.urljoin(self.base_link, url) r = client.request(url) posts = client.parseDOM(r, 'tbody') posts = client.parseDOM(posts, 'tr') urls += [(client.parseDOM(i, 'button', ret='data-clipboard-text')[0]) for i in posts if i] except BaseException: return count = 0 for url in urls: name = url.split('/')[-1].lower() name = client.replaceHTMLCodes(name).replace('%20', '').replace( '%27', "'") if 'movies' in url: continue if any(x in url for x in [ 'italian', 'dubbed', 'teaser', 'subs', 'sub', 'dub', 'samples', 'extras', 'french', 'trailer', 'trailers', 'sample' ]): continue t = re.sub('(\.|\(|\[|\s)(S\d+E\d+|S\d+)(\.|\)|\]|\s)(.+|)', '', name, flags=re.I) if clean_title(t) not in clean_title(title): continue y = re.findall('[\.|\(|\[|\s](S\d+E\d+|S\d+)[\.|\)|\]|\s]', name, re.I)[-1].upper() if not y == hdlr: continue res, info = quality_tags.get_release_quality(name, url) count += 1 url += '|User-Agent=%s&Referer=%s' % (client.agent(), self.base_link) url = urllib.quote(url, '|%:?/&+=_-') host = url.split('/')[2] self.sources.append({ 'source': host, 'quality': res, 'scraper': self.name, 'url': url, 'direct': True }) if dev_log == 'true': end_time = time.time() - start_time send_log(self.name, end_time, count, title, year) return self.sources except Exception, argument: if dev_log == 'true': error_log(self.name, argument) return self.sources #filepursuit().scrape_movie('Black Panther', '2018', '')