def _get_items(self, url): try: headers = {'User-Agent': client.agent()} r = self.scraper.get(url,headers=headers).content posts = client.parseDOM(r, 'table', attrs={'class': 'table2'})[0] posts = client.parseDOM(posts, 'tr') for post in posts: data = client.parseDOM(post, 'a', ret='href')[1] if '/search/' in data: continue try: data = data.encode('ascii', 'ignore') except: pass data = re.sub('\s', '', data).strip() link = urlparse.urljoin(self.base_link, data) name = client.parseDOM(post, 'a')[1] name = urllib.unquote_plus(name) name = re.sub('[^A-Za-z0-9]+', '.', name).lstrip('.') if source_utils.remove_lang(name): continue match = source_utils.check_title(self.title, name, self.hdlr, self.year) if not match: continue try: seeders = int(client.parseDOM(post, 'td', attrs={'class': 'tdseed'})[0].replace(',', '')) if self.min_seeders > seeders: continue except: seeders = 0 pass try: size = re.findall('((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] dsize, isize = source_utils._size(size) except: isize = '0' dsize = 0 pass self.items.append((name, link, isize, dsize, seeders)) return self.items except: source_utils.scraper_error('LIMETORRENTS') return self.items
def sources(self, url, hostDict, hostprDict): try: self.sources = [] if url is None: return self.sources if debrid.status() is False: raise Exception() self.hostDict = hostDict + hostprDict data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] title = title.replace('&', 'and').replace('Special Victims Unit', 'SVU') hdlr = data['year'] hdlr2 = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else '' imdb = data['imdb'] url = self.search(title, hdlr) headers = {'User-Agent': client.agent()} r = self.scraper.get(url, headers=headers).content if hdlr2 == '': r = dom_parser.parse_dom(r, 'ul', {'id': 'releases'})[0] else: r = dom_parser.parse_dom(r, 'ul', {'id': 'episodes'})[0] r = dom_parser.parse_dom(r.content, 'a', req=['href']) r = [(i.content, urlparse.urljoin(self.base_link, i.attrs['href'])) for i in r if i and i.content != 'Watch'] if hdlr2 != '': r = [(i[0], i[1]) for i in r if hdlr2.lower() in i[0].lower()] threads = [] for i in r: threads.append(workers.Thread(self._get_sources, i[0], i[1])) [i.start() for i in threads] # [i.join() for i in threads] alive = [x for x in threads if x.is_alive() is True] while alive: alive = [x for x in threads if x.is_alive() is True] time.sleep(0.1) return self.sources except: source_utils.scraper_error('RAPIDMOVIEZ') return self.sources
def _get_items(self, url): items = [] try: headers = {'User-Agent': client.agent()} r = client.request(url, headers=headers) posts = client.parseDOM(r, 'tr', attrs={'class': 't-row'}) posts = [i for i in posts if not 'racker:' in i] for post in posts: ref = client.parseDOM(post, 'a', ret='href') url = [i for i in ref if 'magnet:' in i][0] name = client.parseDOM(post, 'a', ret='title')[0] name = urllib.unquote_plus(name) name = re.sub('[^A-Za-z0-9]+', '.', name).lstrip('.') if source_utils.remove_lang(name): continue match = source_utils.check_title(self.title, name, self.hdlr, self.year) if not match: continue try: seeders = int( re.findall( "<td.*?<font color='green'><b>([0-9]+|[0-9]+,[0-9]+)</b>", post)[0].replace(',', '')) if self.min_seeders > seeders: continue except: seeders = 0 pass try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] dsize, isize = source_utils._size(size) except: isize = '0' dsize = 0 pass items.append((name, url, isize, dsize, seeders)) return items except: source_utils.scraper_error('GLODLS') return items
def _get_items(self, url): try: headers = {'User-Agent': client.agent()} r = client.request(url, headers=headers) posts = client.parseDOM(r, 'tr', attrs={'id': 'torrent_latest_torrents'}) for post in posts: data = client.parseDOM(post, 'a', attrs={'title': 'Torrent magnet link'}, ret='href')[0] link = urllib.unquote(data).decode('utf8').replace( 'https://mylink.me.uk/?url=', '') name = urllib.unquote_plus( re.search('dn=([^&]+)', link).groups()[0]) t = name.split(self.hdlr)[0] if not cleantitle.get(re.sub('(|)', '', t)) == cleantitle.get( self.title): continue try: y = re.findall( '[\.|\(|\[|\s|\_|\-](S\d+E\d+|S\d+)[\.|\)|\]|\s|\_|\-]', name, re.I)[-1].upper() except BaseException: y = re.findall( '[\.|\(|\[|\s\_|\-](\d{4})[\.|\)|\]|\s\_|\-]', name, re.I)[-1].upper() if not y == self.hdlr: continue try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] div = 1 if size.endswith('GB') else 1024 size = float( re.sub('[^0-9|/.|/,]', '', size.replace(',', '.'))) / div size = '%.2f GB' % size except BaseException: size = '0' self.items.append((name, link, size)) return self.items except BaseException: return self.items
def _get_sources(self, name, url): try: headers = {'User-Agent': client.agent()} r = self.scraper.get(url, headers=headers).content name = client.replaceHTMLCodes(name) l = dom_parser.parse_dom(r, 'div', {'class': 'ppu2h'}) s = '' for i in l: s += i.content urls = re.findall( r'''((?:http|ftp|https)://[\w_-]+(?:(?:\.[\w_-]+)+)[\w.,@?^=%&:/~+#-]*[\w@?^=%&/~+#-])''', i.content, flags=re.MULTILINE | re.DOTALL) urls = [ i for i in urls if '.rar' not in i or '.zip' not in i or '.iso' not in i or '.idx' not in i or '.sub' not in i ] for url in urls: if url in str(self.sources): continue valid, host = source_utils.is_host_valid(url, self.hostDict) if not valid: continue host = client.replaceHTMLCodes(host) host = host.encode('utf-8') quality, info = source_utils.get_release_quality(name, url) try: size = re.findall( '((?:\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', name)[0] div = 1 if size.endswith(('GB', 'GiB')) else 1024 size = float(re.sub('[^0-9|/.|/,]', '', size)) / div size = '%.2f GB' % size info.append(size) except BaseException: pass info = ' | '.join(info) self.sources.append({ 'source': host, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True }) except: pass
def _get_sources(self, name, url): try: headers = {'User-Agent': client.agent()} r = self.scraper.get(url, headers=headers).content name = client.replaceHTMLCodes(name) if name.startswith('['): name = name.split(']')[1] name = name.strip().replace(' ', '.') l = dom_parser.parse_dom(r, 'div', {'class': 'ppu2h'}) if l == []: return s = '' for i in l: s += i.content urls = re.findall(r'''((?:http|ftp|https)://[\w_-]+(?:(?:\.[\w_-]+)+)[\w.,@?^=%&:/~+#-]*[\w@?^=%&/~+#-])''', i.content, flags=re.MULTILINE|re.DOTALL) urls = [i for i in urls if '.rar' not in i or '.zip' not in i or '.iso' not in i or '.idx' not in i or '.sub' not in i] for url in urls: if url in str(self.sources): continue valid, host = source_utils.is_host_valid(url, self.hostDict) if not valid: continue host = client.replaceHTMLCodes(host) host = host.encode('utf-8') quality, info = source_utils.get_release_quality(name, url) try: size = re.findall('((?:\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', name)[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 pass fileType = source_utils.getFileType(name) info.append(fileType) info = ' | '.join(info) if fileType else info[0] self.sources.append({'source': host, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize}) except: source_utils.scraper_error('RAPIDMOVIEZ') pass
def _get_items(self, url): items = [] try: headers = {'User-Agent': client.agent()} r = client.request(url, headers=headers) posts = client.parseDOM(r, 'tr', attrs={'class': 't-row'}) posts = [i for i in posts if not 'racker:' in i] for post in posts: ref = client.parseDOM(post, 'a', ret='href') url = [i for i in ref if 'magnet:' in i][0] if any(x in url.lower() for x in [ 'french', 'italian', 'spanish', 'truefrench', 'dublado', 'dubbed' ]): continue name = client.parseDOM(post, 'a', ret='title')[0] t = name.split(self.hdlr)[0].replace(self.year, '').replace( '(', '').replace(')', '').replace('&', 'and') if cleantitle.get(t) != cleantitle.get(self.title): continue if self.hdlr not in name: continue try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] div = 1 if size.endswith('GB') else 1024 size = float( re.sub('[^0-9|/.|/,]', '', size.replace(',', '.'))) / div size = '%.2f GB' % size except: size = '0' pass items.append((name, url, size)) return items except: source_utils.scraper_error('GLODLS') return items
def _get_items(self, url): try: headers = {'User-Agent': client.agent()} r = client.request(url, headers=headers) if '<tbody' not in r: return self.items posts = client.parseDOM(r, 'tbody')[0] posts = client.parseDOM(posts, 'tr') for post in posts: data = client.parseDOM(post, 'a', ret='href')[1] link = urlparse.urljoin(self.base_link, data) name = client.parseDOM(post, 'a')[1] name = urllib.unquote_plus(name).replace(' ', '.') if source_utils.remove_lang(name): return t = name.split(self.hdlr)[0].replace(self.year, '').replace( '(', '').replace(')', '').replace('&', 'and').replace( '.US.', '.').replace('.us.', '.') if cleantitle.get(t) != cleantitle.get(self.title): continue if self.hdlr not in name: continue try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] div = 1 if size.endswith('GB') else 1024 size = float( re.sub('[^0-9|/.|/,]', '', size.replace(',', '.'))) / div size = '%.2f GB' % size except: size = '0' pass self.items.append((name, link, size)) return self.items except: source_utils.scraper_error('1337X') return self.items
def _get_items(self, url): try: headers = {'User-Agent': client.agent()} r = client.request(url, headers=headers) if '<tbody' not in r: return self.items posts = client.parseDOM(r, 'tbody')[0] posts = client.parseDOM(posts, 'tr') for post in posts: data = client.parseDOM(post, 'a', ret='href')[1] link = urljoin(self.base_link, data) try: seeders = int(client.parseDOM(post, 'td', attrs={'class': 'coll-2 seeds'})[0].replace(',', '')) if self.min_seeders > seeders: continue except: seeders = 0 pass name = client.parseDOM(post, 'a')[1] name = unquote_plus(name) name = re.sub('[^A-Za-z0-9]+', '.', name).lstrip('.') if source_utils.remove_lang(name): continue match = source_utils.check_title(self.title, name, self.hdlr, self.year) if not match: continue try: size = re.findall('((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] dsize, isize = source_utils._size(size) except: isize = '0' dsize = 0 pass self.items.append((name, link, isize, dsize, seeders)) return self.items except: source_utils.scraper_error('1337X') return self.items
def sources(self, url, hostDict, hostprDict): try: self._sources = [] if url is None: return self._sources if debrid.status() is False: return self._sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) self.title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] self.title = self.title.replace('&', 'and').replace( 'Special Victims Unit', 'SVU') self.hdlr = 'S%02dE%02d' % ( int(data['season']), int(data['episode']) ) if 'tvshowtitle' in data else data['year'] self.year = data['year'] query = '%s %s' % (self.title, self.hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) if 'tvshowtitle' in data: url = self.search.format('8', urllib.quote(query)) else: url = self.search.format('4', urllib.quote(query)) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) headers = {'User-Agent': client.agent()} _html = client.request(url, headers=headers) threads = [] for i in re.findall(r'<item>(.+?)</item>', _html, re.DOTALL): threads.append(workers.Thread(self._get_items, i)) [i.start() for i in threads] [i.join() for i in threads] return self._sources except: source_utils.scraper_error('TORRENTDOWNLOADS') return self._sources
def searchMovie(self, title, year): try: query = self.search_link % urllib.quote_plus( cleantitle.getsearch(title + ' ' + year)) url = urlparse.urljoin(self.base_link, query) headers = {'User-Agent': client.agent(), 'Referer': self.base_link} scraper = cfscrape.create_scraper() r = scraper.get(url, headers=headers).content r = client.parseDOM(r, 'item') r = [(client.parseDOM(i, 'title')[0], i) for i in r if i] r = [ i[1] for i in r if cleantitle.get(title) in cleantitle.get(i[0]) and year in i[0] ] return r[0] except BaseException: return
def search(self, title, year): try: url = urlparse.urljoin( self.base_link, self.search_link % (urllib.quote_plus(title))) headers = {'User-Agent': client.agent()} r = self.scraper.get(url, headers=headers).content r = dom_parser.parse_dom(r, 'div', {'class': 'list_items'})[0] r = dom_parser.parse_dom(r.content, 'li') r = [(dom_parser.parse_dom(i, 'a', {'class': 'title'})) for i in r] r = [(i[0].attrs['href'], i[0].content) for i in r] r = [(urlparse.urljoin(self.base_link, i[0])) for i in r if cleantitle.get(title) in cleantitle.get(i[1]) and year in i[1]] if r: return r[0] else: return except: return
def _get_items(self, url): items = [] try: headers = {'User-Agent': client.agent()} r = client.request(url, headers=headers) posts = client.parseDOM(r, 'tr', attrs={'class': 't-row'}) posts = [i for i in posts if not 'racker:' in i] for post in posts: data = client.parseDOM(post, 'a', ret='href') url = [i for i in data if 'magnet:' in i][0] name = client.parseDOM(post, 'a', ret='title')[0] t = name.split(self.hdlr)[0] if not cleantitle.get(re.sub('(|)', '', t)) == cleantitle.get( self.title): continue try: y = re.findall( '[\.|\(|\[|\s|\_|\-](S\d+E\d+|S\d+)[\.|\)|\]|\s|\_|\-]', name, re.I)[-1].upper() except BaseException: y = re.findall( '[\.|\(|\[|\s\_|\-](\d{4})[\.|\)|\]|\s\_|\-]', name, re.I)[-1].upper() if not y == self.hdlr: continue try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] div = 1 if size.endswith('GB') else 1024 size = float( re.sub('[^0-9|/.|/,]', '', size.replace(',', '.'))) / div size = '%.2f GB' % size except BaseException: size = '0' items.append((name, url, size)) return items except BaseException: return items
def _get_items(self, url): try: headers = {'User-Agent': client.agent()} r = client.request(url, headers=headers) posts = client.parseDOM(r, 'tr', attrs={'id': 'torrent_latest_torrents'}) for post in posts: ref = client.parseDOM(post, 'a', attrs={'title': 'Torrent magnet link'}, ret='href')[0] try: link = unquote(ref).decode('utf8').replace('https://mylink.me.uk/?url=', '').replace('https://mylink.cx/?url=', '') except: link = unquote(ref).replace('https://mylink.me.uk/?url=', '').replace('https://mylink.cx/?url=', '') name = unquote_plus(re.search('dn=([^&]+)', link).groups()[0]) name = re.sub('[^A-Za-z0-9]+', '.', name).lstrip('.') if source_utils.remove_lang(name): continue match = source_utils.check_title(self.title, name, self.hdlr, self.year) if not match: continue try: seeders = int(re.findall('<td class="green center">([0-9]+|[0-9]+,[0-9]+)</td>', post, re.DOTALL)[0].replace(',', '')) if self.min_seeders > seeders: continue except: seeders = 0 pass try: size = re.findall('((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] dsize, isize = source_utils._size(size) except: isize = '0' dsize = 0 pass self.items.append((name, link, isize, dsize, seeders)) return self.items except: source_utils.scraper_error('KICKASS2') return self.items
def sources(self, url, hostDict, hostprDict): sources = [] try: if not url: return sources data = parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['title'] year = data['year'] t = title + year query = '%s' % data['title'] query = re.sub(r'(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) url = self.search_link.format(quote_plus(query)) url = urljoin(self.base_link, url) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) r = client.request(url) if r is None: return sources if 'Not Found' in r: return sources items = client.parseDOM(r, 'li') items = [(dom.parse_dom(i, 'a', req='href')[0]) for i in items if year in i] items = [(i.attrs['href'], re.sub('<.+?>|\n', '', i.content).strip()) for i in items] item = [i[0].replace('movie', 'view') for i in items if cleantitle.get(t) == cleantitle.get(i[1])][0] html = client.request(item) streams = re.findall('sources\:\s*\[(.+?)\]\,', html, re.DOTALL)[0] streams = re.findall('src:\s*[\'"](.+?)[\'"].+?size:\s*[\'"](.+?)[\'"]', streams, re.DOTALL) for link, label in streams: quality = source_utils.get_release_quality(label, label)[0] link += '|User-Agent=%s&Referer=%s' % (quote(client.agent()), item) sources.append({'source': 'direct', 'quality': quality, 'info': '', 'language': 'en', 'url': link, 'direct': True, 'debridonly': False}) return sources except: source_utils.scraper_error('IWAATCH') return sources
def _get_items(self, url): try: headers = {'User-Agent': client.agent()} r = client.request(url, headers=headers) posts = client.parseDOM(r, 'tbody')[0] posts = client.parseDOM(posts, 'tr') for post in posts: data = dom.parse_dom(post, 'a', req='href')[1] link = urlparse.urljoin(self.base_link, data.attrs['href']) name = data.content t = name.split(self.hdlr)[0] if not cleantitle.get(re.sub('(|)', '', t)) == cleantitle.get( self.title): continue # try: # y = re.findall('[\.|\(|\[|\s|\_|\-](S\d+E\d+|S\d+)[\.|\)|\]|\s|\_|\-]', name, re.I)[-1].upper() # except BaseException: # y = re.findall('[\.|\(|\[|\s\_|\-](\d{4})[\.|\)|\]|\s\_|\-]', name, re.I)[-1].upper() # if not y == self.hdlr: # continue if self.hdlr not in name: raise Exception() try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] div = 1 if size.endswith('GB') else 1024 size = float( re.sub('[^0-9|/.|/,]', '', size.replace(',', '.'))) / div size = '%.2f GB' % size except BaseException: size = '0' self.items.append((name, link, size)) return self.items except BaseException: return self.items
def _get_items(self, url): items = [] try: headers = {'User-Agent': client.agent()} r = client.request(url, headers=headers) posts = client.parseDOM(r, 'tr', attrs={'class': 't-row'}) posts = [i for i in posts if not 'racker:' in i] for post in posts: ref = client.parseDOM(post, 'a', ret='href') url = [i for i in ref if 'magnet:' in i][0] name = client.parseDOM(post, 'a', ret='title')[0] name = urllib.unquote_plus(name).replace(' ', '.') if source_utils.remove_lang(name): continue t = name.split(self.hdlr)[0].replace(self.year, '').replace( '(', '').replace(')', '').replace('&', 'and').replace( '.US.', '.').replace('.us.', '.') if cleantitle.get(t) != cleantitle.get(self.title): continue if self.hdlr not in name: continue try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] dsize, isize = source_utils._size(size) except: isize = '0' dsize = 0 pass items.append((name, url, isize, dsize)) return items except: source_utils.scraper_error('GLODLS') return items
def sources(self, url, hostDict, hostprDict): try: self._sources = [] if url is None: return self._sources if debrid.status() is False: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] hdlr = 'S%02dE%02d' % (int(data['season']), int( data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s S%02dE%02d' % ( data['tvshowtitle'], int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else '%s %s' % ( data['title'], data['year']) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) query = cleantitle.geturl(query) url = urlparse.urljoin(self.base_link, query) headers = {'User-Agent': client.agent()} r = client.request(url, headers=headers) posts = dom_parser2.parse_dom(r, 'li', { 'class': re.compile('.+?'), 'id': re.compile('comment-.+?') }) self.hostDict = hostDict + hostprDict threads = [] for i in posts: threads.append(workers.Thread(self._get_sources, i.content)) [i.start() for i in threads] [i.join() for i in threads] return self._sources except Exception: return self._sources
def search(self, title, year): try: url = urlparse.urljoin(self.base_link, self.search_link % (urllib.quote_plus(title))) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) headers = {'User-Agent': client.agent()} r = self.scraper.get(url, headers=headers).content # switch to client.parseDOM() to rid import r = dom_parser.parse_dom(r, 'div', {'class': 'list_items'})[0] r = dom_parser.parse_dom(r.content, 'li') r = [(dom_parser.parse_dom(i, 'a', {'class': 'title'})) for i in r] r = [(i[0].attrs['href'], i[0].content) for i in r] r = [(urlparse.urljoin(self.base_link, i[0])) for i in r if cleantitle.get(title) in cleantitle.get(i[1]) and year in i[1]] if r: return r[0] else: return except: source_utils.scraper_error('RAPIDMOVIEZ') return
def searchShow(self, title, season): try: sea = '%s season %d' % (title, int(season)) query = self.search_link % urllib.quote_plus( cleantitle.getsearch(sea)) url = urlparse.urljoin(self.base_link, query) headers = {'User-Agent': client.agent(), 'Referer': self.base_link} scraper = cfscrape.create_scraper() r = scraper.get(url, headers=headers).content #r = client.request(url) r = client.parseDOM(r, 'item') r = [(client.parseDOM(i, 'title')[0], i) for i in r if i] r = [ i[1] for i in r if sea.lower() in i[0].replace(' ', ' ').lower() ] links = re.findall('''<h4>(EP\d+)</h4>.+?src="(.+?)"''', r[0], re.I | re.DOTALL) links = [(i[0], i[1].lstrip()) for i in links if i] return links except BaseException: return
def _get_items(self, url): try: headers = {'User-Agent': client.agent()} r = client.request(url, headers=headers) posts = client.parseDOM(r, 'tr', attrs={'id': 'torrent_latest_torrents'}) for post in posts: ref = client.parseDOM(post, 'a', attrs={'title': 'Torrent magnet link'}, ret='href')[0] link = urllib.unquote(ref).decode('utf8').replace('https://mylink.me.uk/?url=', '').replace('https://mylink.cx/?url=', '') name = urllib.unquote_plus(re.search('dn=([^&]+)', link).groups()[0]) name = name.replace(' ', '.') if source_utils.remove_lang(name): continue t = name.split(self.hdlr)[0].replace(self.year, '').replace('(', '').replace(')', '').replace('&', 'and').replace('.US.', '.').replace('.us.', '.') if cleantitle.get(t) != cleantitle.get(self.title): continue if self.hdlr not in name: continue try: size = re.findall('((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] dsize, isize = source_utils._size(size) except: isize = '0' dsize = 0 pass self.items.append((name, link, isize, dsize)) return self.items except: source_utils.scraper_error('KICKASS2') return self.items
def sources(self, url, hostDict, hostprDict): try: self._sources = [] if url is None: return self._sources if debrid.status() is False: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) self.title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] self.hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s S%02dE%02d' % ( data['tvshowtitle'], int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else '%s %s' % ( data['title'], data['year']) query = re.sub(r'(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) if 'tvshowtitle' in data: url = self.search.format('8', urllib.quote(query)) else: url = self.search.format('4', urllib.quote(query)) self.hostDict = hostDict + hostprDict headers = {'User-Agent': client.agent()} _html = client.request(url, headers=headers) threads = [] for i in re.findall(r'<item>(.+?)</item>', _html, re.DOTALL): threads.append(workers.Thread(self._get_items, i)) [i.start() for i in threads] [i.join() for i in threads] return self._sources except BaseException: return self._sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url is None: return sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) imdb = data['imdb'] url = urlparse.urljoin(self.base_link, 'player/play.php?imdb=%s' % imdb) data = client.request(url, referer=self.base_link) links = dom.parse_dom(data, 'jwplayer:source', req=['file', 'label']) for link in links: url = link.attrs['file'] url = url.replace( ' ', '%20') + '|User-Agent={0}&Referer={1}'.format( urllib.quote(client.agent()), url) quality, info = source_utils.get_release_quality( link.attrs['label']) sources.append({ 'source': 'GVIDEO', 'quality': quality, 'language': 'en', 'url': url, 'direct': True, 'debridonly': False }) return sources except BaseException: return sources
def get_sources_packs(self, link): # log_utils.log('link = %s' % link, __name__, log_utils.LOGDEBUG) try: headers = {'User-Agent': client.agent()} r = client.request(link, headers=headers) if not r: return posts = client.parseDOM(r, 'tr', attrs={'id': 'torrent_latest_torrents'}) for post in posts: ref = client.parseDOM(post, 'a', attrs={'title': 'Torrent magnet link'}, ret='href')[0] link = ref.split('url=')[1] url = unquote_plus(link).replace('&', '&').replace(' ', '.') url = url.split('&tr')[0] hash = re.compile('btih:(.*?)&').findall(url)[0] name = unquote_plus(url.split('&dn=')[1]) name = source_utils.clean_name(self.title, name) if source_utils.remove_lang(name): continue if not self.search_series: if not self.bypass_filter: if not source_utils.filter_season_pack(self.title, self.aliases, self.year, self.season_x, name): continue package = 'season' elif self.search_series: if not self.bypass_filter: valid, last_season = source_utils.filter_show_pack(self.title, self.aliases, self.imdb, self.year, self.season_x, name, self.total_seasons) if not valid: continue else: last_season = self.total_seasons package = 'show' try: seeders = int(re.findall('<td class="green center">([0-9]+|[0-9]+,[0-9]+)</td>', post, re.DOTALL)[0].replace(',', '')) if self.min_seeders > seeders: continue except: seeders = 0 pass quality, info = source_utils.get_release_quality(name, url) try: size = re.findall('((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] dsize, isize = source_utils._size(size) info.insert(0, isize) except: dsize = 0 pass info = ' | '.join(info) item = {'source': 'torrent', 'seeders': seeders, 'hash': hash, 'name': name, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True, 'size': dsize, 'package': package} if self.search_series: item.update({'last_season': last_season}) self.sources.append(item) except: source_utils.scraper_error('KICKASS2') pass
def sources(self, url, hostDict, hostprDict): try: sources = [] if url is None: return sources scraper = cfscrape.create_scraper() headers = {'User-Agent': client.agent(), "Referer": self.base_link} data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data[ 'title'] year = data['year'] query = '%s' % data[ 'tvshowtitle'] if 'tvshowtitle' in data else '%s %s' % ( data['title'], data['year']) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) r = scraper.get(url, headers=headers).content posts = client.parseDOM(r, 'div', attrs={'class': 'ml-item'}) items = [] for post in posts: try: t = client.parseDOM(post, 'a', ret='oldtitle')[0] u = client.parseDOM(post, 'a', ret='href')[0] try: y = re.findall('[\.|\(|\[|\s](\d{4})[\.|\)|\]|\s]', t, re.I)[-1].upper() except BaseException: y = client.parseDOM(post, 'a', attrs={'rel': 'tag'})[0] items += [(t, u, y)] except BaseException: pass urls = [] for item in items: try: link = item[1] if item[1].startswith( 'http') else 'https:%s' % item[1] t = re.sub( '(\.|\(|\[|\s)(\d{4}|S\d+E\d+|S\d+|3D)(\.|\)|\]|\s|)(.+|)', '', item[0], flags=re.I) if not cleantitle.get(t) == cleantitle.get(title): raise Exception() if not item[2] == year: raise Exception() if 'series' in link: r = client.request(link) sep = 'season-%d-episode-%d' % (int( data['season']), int(data['episode'])) url = client.parseDOM(r, 'a', ret='href') url = [i for i in url if sep in i][0] else: url = link r = scraper.get(url, headers=headers).content data = client.parseDOM(r, 'div', attrs={'id': 'list-dl'}) urls = client.parseDOM(r, 'source', ret='src') urls += client.parseDOM(data, 'a', ret='href') except BaseException: pass for item in urls: try: if 'ouo.io' in item: url = item.split('?s=')[1] elif 'linkshrink' in item: url = item.split('=')[1] else: url = item if 'openload' in url: raise Exception() url = 'https:%s' % url if not url.startswith( 'http') else url quality, info = source_utils.get_release_quality(url, url) info = ' | '.join(info) url = urllib.quote(url, '?:/.-_') sources.append({ 'source': 'DL', 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': False }) except BaseException: pass return sources except BaseException: return sources
def google(url): try: if any(x in url for x in ['youtube.', 'docid=']): url = 'https://drive.google.com/file/d/%s/view' % re.compile( 'docid=([\w-]+)').findall(url)[0] netloc = urlparse.urlparse(url.strip().lower()).netloc netloc = netloc.split('.google')[0] if netloc == 'docs' or netloc == 'drive': url = url.split('/preview', 1)[0] url = url.replace('drive.google.com', 'docs.google.com') headers = {'User-Agent': client.agent()} result = client.request(url, output='extended', headers=headers) try: headers['Cookie'] = result[2]['Set-Cookie'] except: pass result = result[0] if netloc == 'docs' or netloc == 'drive': result = re.compile('"fmt_stream_map",(".+?")').findall(result)[0] result = json.loads(result) result = [i.split('|')[-1] for i in result.split(',')] result = sum([googletag(i, append_height=True) for i in result], []) elif netloc == 'photos': result = result.replace('\r', '').replace('\n', '').replace('\t', '') result = re.compile('"\d*/\d*x\d*.+?","(.+?)"').findall(result)[0] result = result.replace('\\u003d', '=').replace('\\u0026', '&') result = re.compile('url=(.+?)&').findall(result) result = [urllib.unquote(i) for i in result] result = sum([googletag(i, append_height=True) for i in result], []) elif netloc == 'picasaweb': id = re.compile('#(\d*)').findall(url)[0] result = re.search('feedPreload:\s*(.*}]}})},', result, re.DOTALL).group(1) result = json.loads(result)['feed']['entry'] if len(result) > 1: result = [ i for i in result if str(id) in i['link'][0]['href'] ][0] elif len(result) == 1: result = result[0] result = result['media']['content'] result = [i['url'] for i in result if 'video' in i['type']] result = sum([googletag(i, append_height=True) for i in result], []) elif netloc == 'plus': id = urlparse.urlparse(url).path.split('/')[-1] result = result.replace('\r', '').replace('\n', '').replace('\t', '') result = result.split('"%s"' % id)[-1].split(']]')[0] result = result.replace('\\u003d', '=').replace('\\u0026', '&') result = re.compile('url=(.+?)&').findall(result) result = [urllib.unquote(i) for i in result] result = sum([googletag(i, append_height=True) for i in result], []) result = sorted(result, key=lambda i: i.get('height', 0), reverse=True) url = [] for q in ['4K', '1440p', '1080p', 'HD', 'SD']: try: url += [[i for i in result if i.get('quality') == q][0]] except: pass for i in url: i.pop('height', None) i.update({'url': i['url'] + '|%s' % urllib.urlencode(headers)}) if not url: return return url except: return
def sources(self, url, hostDict, hostprDict): try: self._sources = [] if url is None: return self._sources if debrid.status() is False: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) self.title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] self.hdlr = 'S%02dE%02d' % ( int(data['season']), int(data['episode']) ) if 'tvshowtitle' in data else data['year'] self.hostDict = hostprDict + hostDict items = [] urls = [] posts = [] links = [] if 'tvshowtitle' not in data: url = urlparse.urljoin(self.base_link, self.search_link % data['imdb']) r = client.request(url, headers={'User-Agent': client.agent()}) posts = client.parseDOM(r, 'item') else: url = urlparse.urljoin( self.base_link, self.search_link % (cleantitle.geturl(self.title).replace('-', '+') + '+' + self.hdlr)) r = client.request(url, headers={'User-Agent': client.agent()}) posts = client.parseDOM(r, 'item') if not posts: return self._sources for post in posts: try: t = client.parseDOM(post, 'title')[0] u = client.parseDOM(post, 'link')[0] s = re.search( '((?:\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post) s = s.groups()[0] if s else '0' items += [(t, u, s)] except BaseException: pass items = set(items) threads = [] for i in items: threads.append(workers.Thread(self._get_sources, i)) [i.start() for i in threads] [i.join() for i in threads] return self._sources except BaseException: return self._sources
def sources(self, url, hostDict, hostprDict): try: sources = [] if url is None: return sources if debrid.status() is False: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) url = self.searchMovie(data['title'], data['year']) if url is None: return sources r = client.request(url) data = client.parseDOM(r, 'div', attrs={'class': 'playex'})[0] frames = client.parseDOM(data, 'iframe', ret='src') frames += re.compile('''<iframe\s*src=['"](.+?)['"]''', re.DOTALL).findall(data) quality = client.parseDOM(r, 'span', attrs={'class': 'qualityx'})[0] for frame in frames: url = frame.split('=')[1] if frame.startswith('<') else frame url = client.replaceHTMLCodes(url) url = url.encode('utf-8') valid, host = source_utils.is_host_valid(url, hostDict) if valid: quality, info = source_utils.get_release_quality( quality, url) info = ' | '.join(info) sources.append({ 'source': host, 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': False }) elif url.endswith('mp4'): url += '|User-Agent=%s' % urllib.quote_plus(client.agent()) sources.append({ 'source': 'MP4', 'quality': quality, 'language': 'en', 'url': url, 'direct': True, 'debridonly': False }) elif 'mystream' in url: data = client.request(url) links = dom_parser2.parse_dom(data, 'source', req=['src', 'label']) for link in links: label = link.attrs['label'] url = link.attrs[ 'src'] + '|User-Agent=%s' % urllib.quote_plus( client.agent()) sources.append({ 'source': 'MYSTREAM', 'quality': label, 'language': 'en', 'url': url, 'direct': True, 'debridonly': False }) else: continue return sources except Exception: return sources
def google(url, ref=None): try: if 'lh3.googleusercontent' in url or 'bp.blogspot' in url: newheaders = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36', 'Accept': '*/*', 'Host': 'lh3.googleusercontent.com', 'Accept-Language': 'en-US,en;q=0.8,de;q=0.6,es;q=0.4', 'Accept-Encoding': 'identity;q=1, *;q=0', 'Referer': ref, 'Connection': 'Keep-Alive', 'X-Client-Data': 'CJK2yQEIo7bJAQjEtskBCPqcygEIqZ3KAQjSncoBCKijygE=', 'Range': 'bytes=0-' } resp = client.request(url, headers=newheaders, redirect=False, output='extended', timeout='10') loc = resp[2]['Location'] c = resp[2]['Set-Cookie'].split(';')[0] url = '%s|Cookie=%s' % (loc, c) return url if any(x in url for x in ['youtube.', 'docid=']): url = 'https://drive.google.com/file/d/%s/view' % re.compile( 'docid=([\w-]+)').findall(url)[0] netloc = urlparse(url.strip().lower()).netloc netloc = netloc.split('.google')[0] if netloc == 'docs' or netloc == 'drive': url = url.split('/preview', 1)[0] url = url.replace('drive.google.com', 'docs.google.com') headers = {'User-Agent': client.agent()} result = client.request(url, output='extended', headers=headers) try: headers['Cookie'] = result[2]['Set-Cookie'] except: pass result = result[0] if netloc == 'docs' or netloc == 'drive': result = re.compile('"fmt_stream_map",(".+?")').findall(result)[0] result = json.loads(result) result = [i.split('|')[-1] for i in result.split(',')] result = sum([googletag(i, append_height=True) for i in result], []) elif netloc == 'photos': result = result.replace('\r', '').replace('\n', '').replace('\t', '') result = re.compile('"\d*/\d*x\d*.+?","(.+?)"').findall(result)[0] result = result.replace('\\u003d', '=').replace('\\u0026', '&') result = re.compile('url=(.+?)&').findall(result) result = [unquote(i) for i in result] result = sum([googletag(i, append_height=True) for i in result], []) elif netloc == 'picasaweb': id = re.compile('#(\d*)').findall(url)[0] result = re.search('feedPreload:\s*(.*}]}})},', result, re.DOTALL).group(1) result = json.loads(result)['feed']['entry'] if len(result) > 1: result = [ i for i in result if str(id) in i['link'][0]['href'] ][0] elif len(result) == 1: result = result[0] result = result['media']['content'] result = [i['url'] for i in result if 'video' in i['type']] result = sum([googletag(i, append_height=True) for i in result], []) elif netloc == 'plus': id = (urlparse(url).path).split('/')[-1] result = result.replace('\r', '').replace('\n', '').replace('\t', '') result = result.split('"%s"' % id)[-1].split(']]')[0] result = result.replace('\\u003d', '=').replace('\\u0026', '&') result = re.compile('url=(.+?)&').findall(result) result = [unquote(i) for i in result] result = sum([googletag(i, append_height=True) for i in result], []) result = sorted(result, key=lambda i: i.get('height', 0), reverse=True) url = [] for q in ['4K', '1440p', '1080p', '720p', 'SD']: try: url += [[i for i in result if i.get('quality') == q][0]] except: pass for i in url: i.pop('height', None) i.update({'url': i['url'] + '|%s' % urlencode(headers)}) if not url: return return url except: log_utils.error() return