def _get_sources(self, name, url): try: headers = {'User-Agent': client.agent()} r = self.scraper.get(url, headers=headers).content name = client.replaceHTMLCodes(name) l = dom_parser.parse_dom(r, 'div', {'class': 'ppu2h'}) s = '' for i in l: s += i.content urls = re.findall( r'''((?:http|ftp|https)://[\w_-]+(?:(?:\.[\w_-]+)+)[\w.,@?^=%&:/~+#-]*[\w@?^=%&/~+#-])''', i.content, flags=re.MULTILINE | re.DOTALL) urls = [ i for i in urls if '.rar' not in i or '.zip' not in i or '.iso' not in i or '.idx' not in i or '.sub' not in i ] for url in urls: if url in str(self.sources): continue valid, host = source_utils.is_host_valid(url, self.hostDict) if not valid: continue host = client.replaceHTMLCodes(host) host = host.encode('utf-8') quality, info = source_utils.get_release_quality(name, url) try: size = re.findall( '((?:\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', name)[0] div = 1 if size.endswith(('GB', 'GiB')) else 1024 size = float(re.sub('[^0-9|/.|/,]', '', size)) / div size = '%.2f GB' % size info.append(size) except: pass info = ' | '.join(info) self.sources.append({ 'source': host, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True }) except: source_utils.scraper_error('RAPIDMOVIEZ') pass
def sources(self, url, hostDict, hostprDict): try: self.sources = [] if url is None: return self.sources if debrid.status() is False: raise Exception() self.hostDict = hostDict + hostprDict data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] # title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') hdlr = data['year'] hdlr2 = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else '' imdb = data['imdb'] url = self.search(title, hdlr) headers = {'User-Agent': client.agent()} r = self.scraper.get(url, headers=headers).content if hdlr2 == '': r = dom_parser.parse_dom(r, 'ul', {'id': 'releases'})[0] else: r = dom_parser.parse_dom(r, 'ul', {'id': 'episodes'})[0] r = dom_parser.parse_dom(r.content, 'a', req=['href']) r = [(i.content, urlparse.urljoin(self.base_link, i.attrs['href'])) for i in r if i and i.content != 'Watch'] if hdlr2 != '': r = [(i[0], i[1]) for i in r if hdlr2.lower() in i[0].lower()] threads = [] for i in r: threads.append(workers.Thread(self._get_sources, i[0], i[1])) [i.start() for i in threads] # [i.join() for i in threads] alive = [x for x in threads if x.is_alive() is True] while alive: alive = [x for x in threads if x.is_alive() is True] time.sleep(0.1) return self.sources except: source_utils.scraper_error('RAPIDMOVIEZ') return self.sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if not url: return sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['title'] year = data['year'] t = title + year query = '%s' % data['title'] query = re.sub(r'(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) url = self.search_link.format(urllib.quote_plus(query)) url = urlparse.urljoin(self.base_link, url) r = client.request(url) items = client.parseDOM(r, 'li') items = [(dom.parse_dom(i, 'a', req='href')[0]) for i in items if year in i] items = [(i.attrs['href'], re.sub('<.+?>|\n', '', i.content).strip()) for i in items] item = [ i[0].replace('movie', 'view') for i in items if cleantitle.get(t) == cleantitle.get(i[1]) ][0] html = client.request(item) streams = re.findall('sources\:\s*\[(.+?)\]\,', html, re.DOTALL)[0] streams = re.findall( 'file:\s*[\'"](.+?)[\'"].+?label:\s*[\'"](.+?)[\'"]', streams, re.DOTALL) for link, label in streams: quality = source_utils.get_release_quality(label, label)[0] link += '|User-Agent=%s&Referer=%s' % (urllib.quote( client.agent()), item) sources.append({ 'source': 'Direct', 'quality': quality, 'language': 'en', 'url': link, 'direct': True, 'debridonly': False }) return sources except BaseException: return sources
def _get_items(self, url): try: headers = {'User-Agent': client.agent()} r = client.request(url, headers=headers) posts = client.parseDOM(r, 'table', attrs={'class': 'table2'})[0] posts = client.parseDOM(posts, 'tr') for post in posts: data = client.parseDOM(post, 'a', ret='href')[1] if '/search/' in data: continue # Remove non-ASCII characters...freakin limetorrents try: data = data.encode('ascii', 'ignore') except: pass # some broken links with withespace data = re.sub('\s', '', data).strip() link = urlparse.urljoin(self.base_link, data) name = client.parseDOM(post, 'a')[1] t = name.split(self.hdlr)[0].replace(self.year, '').replace('(', '').replace(')', '').replace('&', 'and') if cleantitle.get(t) != cleantitle.get(self.title): continue if self.hdlr not in name: continue try: size = re.findall('((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] div = 1 if size.endswith('GB') else 1024 size = float(re.sub('[^0-9|/.|/,]', '', size.replace(',', '.'))) / div size = '%.2f GB' % size except: size = '0' pass self.items.append((name, link, size)) return self.items except: source_utils.scraper_error('LIMETORRENTS') return self.items
def _get_items(self, url): items = [] try: headers = {'User-Agent': client.agent()} r = client.request(url, headers=headers) posts = client.parseDOM(r, 'tr', attrs={'class': 't-row'}) posts = [i for i in posts if not 'racker:' in i] for post in posts: ref = client.parseDOM(post, 'a', ret='href') url = [i for i in ref if 'magnet:' in i][0] if any(x in url.lower() for x in [ 'french', 'italian', 'spanish', 'truefrench', 'dublado', 'dubbed' ]): continue name = client.parseDOM(post, 'a', ret='title')[0] t = name.split(self.hdlr)[0].replace(self.year, '').replace( '(', '').replace(')', '').replace('&', 'and') if cleantitle.get(t) != cleantitle.get(self.title): continue if self.hdlr not in name: continue try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] div = 1 if size.endswith('GB') else 1024 size = float( re.sub('[^0-9|/.|/,]', '', size.replace(',', '.'))) / div size = '%.2f GB' % size except: size = '0' pass items.append((name, url, size)) return items except: source_utils.scraper_error('GLODLS') return items
def _get_items(self, url): try: headers = {'User-Agent': client.agent()} r = client.request(url, headers=headers) posts = client.parseDOM(r, 'tr', attrs={'id': 'torrent_latest_torrents'}) for post in posts: ref = client.parseDOM(post, 'a', attrs={'title': 'Torrent magnet link'}, ret='href')[0] link = urllib.unquote(ref).decode('utf8').replace( 'https://mylink.me.uk/?url=', '').replace('https://mylink.cx/?url=', '') name = urllib.unquote_plus( re.search('dn=([^&]+)', link).groups()[0]) t = name.split(self.hdlr)[0].replace(self.year, '').replace( '(', '').replace(')', '').replace('&', 'and') if cleantitle.get(t) != cleantitle.get(self.title): continue if self.hdlr not in name: continue try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] div = 1 if size.endswith('GB') else 1024 size = float( re.sub('[^0-9|/.|/,]', '', size.replace(',', '.'))) / div size = '%.2f GB' % size except: size = '0' pass self.items.append((name, link, size)) return self.items except: source_utils.scraper_error('KICKASS2') return self.items
def searchMovie(self, title, year): try: query = self.search_link % urllib.quote_plus( cleantitle.getsearch(title + ' ' + year)) url = urlparse.urljoin(self.base_link, query) headers = {'User-Agent': client.agent(), 'Referer': self.base_link} scraper = cfscrape.create_scraper() r = scraper.get(url, headers=headers).content r = client.parseDOM(r, 'item') r = [(client.parseDOM(i, 'title')[0], i) for i in r if i] r = [ i[1] for i in r if cleantitle.get(title) in cleantitle.get(i[0]) and year in i[0] ] return r[0] except BaseException: return
def _get_items(self, url): try: headers = {'User-Agent': client.agent()} r = client.request(url, headers=headers) posts = client.parseDOM(r, 'tr', attrs={'id': 'torrent_latest_torrents'}) for post in posts: data = client.parseDOM(post, 'a', attrs={'title': 'Torrent magnet link'}, ret='href')[0] link = urllib.unquote(data).decode('utf8').replace( 'https://mylink.me.uk/?url=', '') name = urllib.unquote_plus( re.search('dn=([^&]+)', link).groups()[0]) t = name.split(self.hdlr)[0] if not cleantitle.get(re.sub('(|)', '', t)) == cleantitle.get( self.title): continue try: y = re.findall( '[\.|\(|\[|\s|\_|\-](S\d+E\d+|S\d+)[\.|\)|\]|\s|\_|\-]', name, re.I)[-1].upper() except BaseException: y = re.findall( '[\.|\(|\[|\s\_|\-](\d{4})[\.|\)|\]|\s\_|\-]', name, re.I)[-1].upper() if not y == self.hdlr: continue try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] div = 1 if size.endswith('GB') else 1024 size = float( re.sub('[^0-9|/.|/,]', '', size.replace(',', '.'))) / div size = '%.2f GB' % size except BaseException: size = '0' self.items.append((name, link, size)) return self.items except BaseException: return self.items
def _get_items(self, url): try: headers = {'User-Agent': client.agent()} r = client.request(url, headers=headers) if '<tbody' not in r: return self.items posts = client.parseDOM(r, 'tbody')[0] posts = client.parseDOM(posts, 'tr') for post in posts: data = client.parseDOM(post, 'a', ret='href')[1] link = urlparse.urljoin(self.base_link, data) name = client.parseDOM(post, 'a')[1] t = name.split(self.hdlr)[0].replace(self.year, '').replace( '(', '').replace(')', '').replace('&', 'and') if cleantitle.get(t) != cleantitle.get(self.title): continue if self.hdlr not in name: raise Exception() try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] div = 1 if size.endswith('GB') else 1024 size = float( re.sub('[^0-9|/.|/,]', '', size.replace(',', '.'))) / div size = '%.2f GB' % size except: size = '0' pass self.items.append((name, link, size)) return self.items except: source_utils.scraper_error('1337X') return self.items
def sources(self, url, hostDict, hostprDict): sources = [] try: if url is None: return sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] imdb = data['imdb'] if not 'tvshowtitle' in data: url = urlparse.urljoin(self.base_link, 'movie.php?imdbid=%s' % imdb) data = self.scraper(url, referer=self.base_link).content links = client.parseDOM(data, 'source', ret='src') else: tm_api = '9c61dcfae7065994ce1008535ece53eb' tm_url = 'http://api.themoviedb.org/3/find/%s?api_key=%s&external_source=imdb_id' % (imdb, tm_api) tm_id = self.scraper.get(tm_url).content import json tm_id = json.loads(tm_id)['tv_results'][0]['id'] tm_eplink = 'https://api.themoviedb.org/3/tv/%d/season/%d/episode/%d?api_key=%s' % \ (int(tm_id), int(data['season']), int(data['episode']), tm_api) tm_epiid = self.scraper.get(tm_eplink).content tm_epiid = json.loads(tm_epiid)['id'] url = urlparse.urljoin(self.base_link, 'single-episode.php?epiid=%s&tvid=%s' % (tm_epiid, tm_id)) data = self.scraper.get(url, referer=self.base_link).content links = client.parseDOM(data, 'source', ret='src') for link in links: link = link.replace('/playlist.m3u8', '') link = 'http:{0}'.format(link) if link.startswith('//') else link link = urllib.quote(link, ':/_+!@#$%^&*-') link = link.replace(' ', '%20') + '|User-Agent={0}&Referer={1}'.format( urllib.quote(client.agent()), url) sources.append( {'source': 'DL', 'quality': '720p', 'language': 'en', 'url': link, 'direct': True, 'debridonly': False}) return sources except Exception: return sources
def sources(self, url, hostDict, hostprDict): try: self._sources = [] if url is None: return self._sources if debrid.status() is False: return self._sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) self.title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] self.title = self.title.replace('&', 'and').replace('Special Victims Unit', 'SVU') self.hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else data['year'] self.year = data['year'] query = '%s %s' % (self.title, self.hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) if 'tvshowtitle' in data: url = self.search.format('8', urllib.quote(query)) else: url = self.search.format('4', urllib.quote(query)) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) headers = {'User-Agent': client.agent()} _html = client.request(url, headers=headers) threads = [] for i in re.findall(r'<item>(.+?)</item>', _html, re.DOTALL): threads.append(workers.Thread(self._get_items, i)) [i.start() for i in threads] [i.join() for i in threads] return self._sources except: source_utils.scraper_error('TORRENTDOWNLOADS') return self._sources
def search(self, title, year): try: url = urlparse.urljoin( self.base_link, self.search_link % (urllib.quote_plus(title))) headers = {'User-Agent': client.agent()} r = self.scraper.get(url, headers=headers).content r = dom_parser2.parse_dom(r, 'div', {'class': 'list_items'})[0] r = dom_parser2.parse_dom(r.content, 'li') r = [(dom_parser2.parse_dom(i, 'a', {'class': 'title'})) for i in r] r = [(i[0].attrs['href'], i[0].content) for i in r] r = [(urlparse.urljoin(self.base_link, i[0])) for i in r if cleantitle.get(title) in cleantitle.get(i[1]) and year in i[1]] if r: return r[0] else: return except: return
def _get_items(self, url): items = [] try: headers = {'User-Agent': client.agent()} r = client.request(url, headers=headers) posts = client.parseDOM(r, 'tr', attrs={'class': 't-row'}) posts = [i for i in posts if not 'racker:' in i] for post in posts: data = client.parseDOM(post, 'a', ret='href') url = [i for i in data if 'magnet:' in i][0] name = client.parseDOM(post, 'a', ret='title')[0] t = name.split(self.hdlr)[0] if not cleantitle.get(re.sub('(|)', '', t)) == cleantitle.get( self.title): continue try: y = re.findall( '[\.|\(|\[|\s|\_|\-](S\d+E\d+|S\d+)[\.|\)|\]|\s|\_|\-]', name, re.I)[-1].upper() except BaseException: y = re.findall( '[\.|\(|\[|\s\_|\-](\d{4})[\.|\)|\]|\s\_|\-]', name, re.I)[-1].upper() if not y == self.hdlr: continue try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] div = 1 if size.endswith('GB') else 1024 size = float( re.sub('[^0-9|/.|/,]', '', size.replace(',', '.'))) / div size = '%.2f GB' % size except BaseException: size = '0' items.append((name, url, size)) return items except BaseException: return items
def _get_items(self, url): try: headers = {'User-Agent': client.agent()} r = client.request(url, headers=headers) posts = client.parseDOM(r, 'tbody')[0] posts = client.parseDOM(posts, 'tr') for post in posts: data = dom.parse_dom(post, 'a', req='href')[1] link = urlparse.urljoin(self.base_link, data.attrs['href']) name = data.content t = name.split(self.hdlr)[0] if not cleantitle.get(re.sub('(|)', '', t)) == cleantitle.get( self.title): continue try: y = re.findall( '[\.|\(|\[|\s|\_|\-](S\d+E\d+|S\d+)[\.|\)|\]|\s|\_|\-]', name, re.I)[-1].upper() except BaseException: y = re.findall( '[\.|\(|\[|\s\_|\-](\d{4})[\.|\)|\]|\s\_|\-]', name, re.I)[-1].upper() if not y == self.hdlr: continue try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] div = 1 if size.endswith('GB') else 1024 size = float( re.sub('[^0-9|/.|/,]', '', size.replace(',', '.'))) / div size = '%.2f GB' % size except BaseException: size = '0' self.items.append((name, link, size)) return self.items except BaseException: return self.items
def searchShow(self, title, season): try: sea = '%s season %d' % (title, int(season)) query = self.search_link % urllib.quote_plus( cleantitle.getsearch(sea)) url = urlparse.urljoin(self.base_link, query) headers = {'User-Agent': client.agent(), 'Referer': self.base_link} scraper = cfscrape.create_scraper() r = scraper.get(url, headers=headers).content # r = client.request(url) r = client.parseDOM(r, 'item') r = [(client.parseDOM(i, 'title')[0], i) for i in r if i] r = [ i[1] for i in r if sea.lower() in i[0].replace(' ', ' ').lower() ] links = re.findall('''<h4>(EP\d+)</h4>.+?src="(.+?)"''', r[0], re.I | re.DOTALL) links = [(i[0], i[1].lstrip()) for i in links if i] return links except BaseException: return
def sources(self, url, hostDict, hostprDict): try: self._sources = [] if url is None: return self._sources if debrid.status() is False: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) self.title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] self.hdlr = 'S%02dE%02d' % ( int(data['season']), int(data['episode']) ) if 'tvshowtitle' in data else data['year'] self.hostDict = hostprDict + hostDict items = [] urls = [] posts = [] links = [] if 'tvshowtitle' not in data: url = urlparse.urljoin(self.base_link, self.search_link % data['imdb']) r = client.request(url, headers={'User-Agent': client.agent()}) posts = client.parseDOM(r, 'item') else: url = urlparse.urljoin( self.base_link, self.search_link % (cleantitle.geturl(self.title).replace('-', '+') + '+' + self.hdlr)) r = client.request(url, headers={'User-Agent': client.agent()}) posts = client.parseDOM(r, 'item') if not posts: return self._sources for post in posts: try: t = client.parseDOM(post, 'title')[0] u = client.parseDOM(post, 'link')[0] s = re.search( '((?:\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post) s = s.groups()[0] if s else '0' items += [(t, u, s)] except BaseException: pass items = set(items) threads = [] for i in items: threads.append(workers.Thread(self._get_sources, i)) [i.start() for i in threads] [i.join() for i in threads] return self._sources except BaseException: return self._sources
def google(url): try: if any(x in url for x in ['youtube.', 'docid=']): url = 'https://drive.google.com/file/d/%s/view' % \ re.compile('docid=([\w-]+)').findall(url)[0] netloc = urlparse.urlparse(url.strip().lower()).netloc netloc = netloc.split('.google')[0] if netloc == 'docs' or netloc == 'drive': url = url.split('/preview', 1)[0] url = url.replace('drive.google.com', 'docs.google.com') headers = {'User-Agent': client.agent()} result = client.request(url, output='extended', headers=headers) try: headers['Cookie'] = result[2]['Set-Cookie'] except: pass result = result[0] if netloc == 'docs' or netloc == 'drive': result = re.compile('"fmt_stream_map",(".+?")').findall(result)[0] result = json.loads(result) result = [i.split('|')[-1] for i in result.split(',')] result = sum([googletag(i, append_height=True) for i in result], []) elif netloc == 'photos': result = result.replace('\r', '').replace('\n', '').replace('\t', '') result = re.compile('"\d*/\d*x\d*.+?","(.+?)"').findall(result)[0] result = result.replace('\\u003d', '=').replace('\\u0026', '&') result = re.compile('url=(.+?)&').findall(result) result = [urllib.unquote(i) for i in result] result = sum([googletag(i, append_height=True) for i in result], []) elif netloc == 'picasaweb': id = re.compile('#(\d*)').findall(url)[0] result = re.search('feedPreload:\s*(.*}]}})},', result, re.DOTALL).group(1) result = json.loads(result)['feed']['entry'] if len(result) > 1: result = [ i for i in result if str(id) in i['link'][0]['href'] ][0] elif len(result) == 1: result = result[0] result = result['media']['content'] result = [i['url'] for i in result if 'video' in i['type']] result = sum([googletag(i, append_height=True) for i in result], []) elif netloc == 'plus': id = urlparse.urlparse(url).path.split('/')[-1] result = result.replace('\r', '').replace('\n', '').replace('\t', '') result = result.split('"%s"' % id)[-1].split(']]')[0] result = result.replace('\\u003d', '=').replace('\\u0026', '&') result = re.compile('url=(.+?)&').findall(result) result = [urllib.unquote(i) for i in result] result = sum([googletag(i, append_height=True) for i in result], []) result = sorted(result, key=lambda i: i.get('height', 0), reverse=True) url = [] for q in ['4K', '1440p', '1080p', 'HD', 'SD']: try: url += [[i for i in result if i.get('quality') == q][0]] except: pass for i in url: i.pop('height', None) i.update({'url': i['url'] + '|%s' % urllib.urlencode(headers)}) if not url: return return url except: return
def sources(self, url, hostDict, hostprDict): try: sources = [] if url is None: return sources if debrid.status() is False: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) url = self.searchMovie(data['title'], data['year']) if url is None: return sources r = client.request(url) data = client.parseDOM(r, 'div', attrs={'class': 'playex'})[0] frames = client.parseDOM(data, 'iframe', ret='src') frames += re.compile('''<iframe\s*src=['"](.+?)['"]''', re.DOTALL).findall(data) quality = client.parseDOM(r, 'span', attrs={'class': 'qualityx'})[0] for frame in frames: url = frame.split('=')[1] if frame.startswith('<') else frame url = client.replaceHTMLCodes(url) url = url.encode('utf-8') valid, host = source_utils.is_host_valid(url, hostDict) if valid: quality, info = source_utils.get_release_quality( quality, url) info = ' | '.join(info) sources.append({ 'source': host, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': False }) elif url.endswith('mp4'): url += '|User-Agent=%s' % urllib.quote_plus(client.agent()) sources.append({ 'source': 'MP4', 'quality': quality, 'language': 'en', 'url': url, 'direct': True, 'debridonly': False }) elif 'mystream' in url: data = client.request(url) links = dom_parser2.parse_dom(data, 'source', req=['src', 'label']) for link in links: label = link.attrs['label'] url = link.attrs[ 'src'] + '|User-Agent=%s' % urllib.quote_plus( client.agent()) sources.append({ 'source': 'MYSTREAM', 'quality': label, 'language': 'en', 'url': url, 'direct': True, 'debridonly': False }) else: continue return sources except Exception: return sources