def searchMovie(self, title, year): try: query = self.search_link % urllib.quote_plus( cleantitle.getsearch(title)) url = urlparse.urljoin(self.base_link, query) r = client.request(url) r = client.parseDOM(r, 'item') r = [(client.parseDOM(i, 'title')[0], client.parseDOM(i, 'link')[0]) for i in r if i] r = [ i[1] for i in r if cleantitle.get(title) == cleantitle.get(i[0]) ] return r[0] except Exception: return
def sources(self, url, hostDict, hostprDict): sources = [] try: if url == None: return sources headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0', 'Referer': 'http://iitv.pl/' } result = client.request(url) result = client.parseDOM(result, 'div', attrs={'class':'tab-wrapper'})[0] lektor = client.parseDOM(result, 'ul', attrs={'id':'lecPL'}) if len(lektor) > 0: links = client.parseDOM(lektor, 'a', ret='href') for link in links: if str(link).startswith("http://"): link = self.getlink(link) q = source_utils.check_sd_url(link) valid, host = source_utils.is_host_valid(link, hostDict) if not valid: continue sources.append({'source': host, 'quality': q, 'language': 'pl', 'url': link, 'info': 'Lektor', 'direct': False, 'debridonly': False}) napisy = client.parseDOM(result, 'ul', attrs={'id':'subPL'}) if len(napisy) > 0: links = client.parseDOM(napisy, 'a', ret='href') for link in links: if str(link).startswith("http://"): link = self.getlink(link) q = source_utils.check_sd_url(link) valid, host = source_utils.is_host_valid(link, hostDict) if not valid: continue sources.append({'source': host, 'quality': q, 'language': 'pl', 'url': link, 'info': 'Napisy', 'direct': False, 'debridonly': False}) eng = client.parseDOM(result, 'ul', attrs={'id':'org'}) if len(eng) > 0: links = client.parseDOM(eng, 'a', ret='href') for link in links: if str(link).startswith("http://"): link = self.getlink(link) q = source_utils.check_sd_url(link) valid, host = source_utils.is_host_valid(link, hostDict) if not valid: continue sources.append({'source': host, 'quality': q, 'language': 'en', 'url': link, 'info': 'ENG', 'direct': False, 'debridonly': False}) debug =1; return sources except Exception, e: print str(e) return sources
def __get_link(self, link): try: if not link: return query = self.get_link % link query = urlparse.urljoin(self.base_link, query) r = client.request(query) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'boton'}) r = dom_parser.parse_dom(r, 'a', req='href') r = r[0].attrs['href'] return r except: return
def __get_base_url(self, fallback): try: for domain in self.domains: try: url = 'https://%s' % domain r = client.request(url, limit=1, timeout='10') r = dom_parser.parse_dom(r, 'meta', attrs={'name': 'author'}, req='content') if r and 'movie4k' in r[0].attrs.get('content').lower(): return url except: pass except: pass return fallback
def sources(self, url, hostDict, hostprDict): try: sources = [] if url == None: return sources if (self.user == '' or self.password == ''): raise Exception() url = urlparse.urljoin(self.base_link, url) url = client.request(url, headers=self.headers) url = json.loads(url)['url'] sources.append({'source': 'ororo', 'quality': 'HD', 'language': 'en', 'url': url, 'direct': True, 'debridonly': False}) return sources except: return sources
def __search(self, titles): try: query = self.search_link % urllib.quote_plus( cleantitle.query(titles[0])) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] r = client.request(query) r = json.loads(r) r = [(i.get('id'), i.get('value')) for i in r] r = [i[0] for i in r if cleantitle.get(i[1]) in t][0] return r except: return
def _get_items(self, url): items = [] try: headers = {'User-Agent': client.agent()} r = client.request(url, headers=headers) posts = client.parseDOM(r, 'tr', attrs={'class': 't-row'}) posts = [i for i in posts if not 'racker:' in i] for post in posts: ref = client.parseDOM(post, 'a', ret='href') url = [i for i in ref if 'magnet:' in i][0] name = client.parseDOM(post, 'a', ret='title')[0] name = urllib.unquote_plus(name).replace(' ', '.') if source_utils.remove_lang(name): continue t = name.split(self.hdlr)[0].replace(self.year, '').replace( '(', '').replace(')', '').replace('&', 'and').replace( '.US.', '.').replace('.us.', '.') if cleantitle.get(t) != cleantitle.get(self.title): continue if self.hdlr not in name: continue try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] div = 1 if size.endswith('GB') else 1024 size = float( re.sub('[^0-9|/.|/,]', '', size.replace(',', '.'))) / div size = '%.2f GB' % size except: size = '0' pass items.append((name, url, size)) return items except: source_utils.scraper_error('GLODLS') return items
def sources(self, url, hostDict, hostprDict): self.sources = [] try: if url is None: return self.sources if debrid.status() is False: return self.sources data = parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) self.title = data['tvshowtitle'].lower( ) if 'tvshowtitle' in data else data['title'].lower() self.title = self.title.replace('&', 'and').replace( 'Special Victims Unit', 'SVU') self.hdlr = 's%02de%02d' % ( int(data['season']), int(data['episode']) ) if 'tvshowtitle' in data else data['year'] self.year = data['year'] query = '%s %s' % (self.title, self.hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) url = self.search_link % quote_plus(query) url = urljoin(self.base_link, url).replace('+', '-') # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) r = client.request(url, timeout='5') if r is None: return self.sources links = re.findall('<a href="(/torrent/.+?)"', r, re.DOTALL) threads = [] for link in links: threads.append(workers.Thread(self.get_sources, link)) [i.start() for i in threads] [i.join() for i in threads] return self.sources except: source_utils.scraper_error('YOURBITTORRENT') return self.sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url is None: return sources if debrid.status() is False: raise Exception() data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else data['year'] query = '%s s%02de%02d' % (data['tvshowtitle'], int(data['season']), int(data['episode'])) \ if 'tvshowtitle' in data else '%s %s' % (data['title'], data['year']) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', ' ', query) url = urlparse.urljoin(self.base_link, self.search_link.format(query[0].lower(), cleantitle.geturl(query))) r = client.request(url) r = client.parseDOM(r, 'tbody')[0] posts = client.parseDOM(r, 'tr') posts = [i for i in posts if 'magnet:' in i] for post in posts: post = post.replace(' ', ' ') name = client.parseDOM(post, 'a', ret='title')[1] t = name.split(hdlr)[0] if not cleantitle.get(re.sub('(|)', '', t)) == cleantitle.get(title): continue try: y = re.findall('[\.|\(|\[|\s|\_|\-](S\d+E\d+|S\d+)[\.|\)|\]|\s|\_|\-]', name, re.I)[-1].upper() except BaseException: y = re.findall('[\.|\(|\[|\s\_|\-](\d{4})[\.|\)|\]|\s\_|\-]', name, re.I)[-1].upper() if not y == hdlr: continue links = client.parseDOM(post, 'a', ret='href') magnet = [i.replace('&', '&') for i in links if 'magnet:' in i][0] url = magnet.split('&tr')[0] quality, info = source_utils.get_release_quality(name, name) try: size = re.findall('((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] div = 1 if size.endswith(('GB', 'GiB')) else 1024 size = float(re.sub('[^0-9|/.|/,]', '', size.replace(',', '.'))) / div size = '%.2f GB' % size except BaseException: size = '0' info.append(size) info = ' | '.join(info) sources.append({'source': 'Torrent', 'quality': quality, 'language': 'en', 'url': url, 'info': info, 'direct': False, 'debridonly': True}) return sources except BaseException: return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if not url: return sources url = urlparse.urljoin(self.base_link, url) r = client.request(url) r = r.replace('\\"', '"') links = dom_parser.parse_dom(r, 'tr', attrs={'id': 'tablemoviesindex2'}) for i in links: try: host = dom_parser.parse_dom(i, 'img', req='alt')[0].attrs['alt'] host = host.split()[0].rsplit('.', 1)[0].strip().lower() host = host.encode('utf-8') valid, host = source_utils.is_host_valid(host, hostDict) if not valid: continue url = dom_parser.parse_dom(i, 'a', req='href')[0].attrs['href'] url = client.replaceHTMLCodes(url) url = urlparse.urljoin(self.base_link, url) url = url.encode('utf-8') sources.append({ 'source': host, 'quality': 'SD', 'language': 'de', 'url': url, 'direct': False, 'debridonly': False }) except: pass return sources except: return sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if url is None: return sources hostDict = [(i.rsplit('.', 1)[0], i) for i in hostDict] locDict = [i[0] for i in hostDict] print urlparse.urljoin(self.base_link, url) r = client.request(urlparse.urljoin(self.base_link, url)) r = client.parseDOM(r, 'div', attrs={'class': 'filmicerik'}) r = client.parseDOM(r, 'p') r = [(client.parseDOM(i, 'iframe', ret='src'), client.parseDOM(i, 'b'), client.parseDOM(r, 'span', attrs={'class': 'lg'})) for i in r] r = [(i[0], [x.lower().replace('lecteur', '').strip() for x in i[1]], i[2][0]) for i in r if len(i[0]) > 0 and len(i[1]) > 0 and len(i[2]) > 0] r = [(i[0], [[y[1] for y in hostDict if y[0] == x][0] for x in i[1] if x in locDict], i[2], re.findall('\((.+?)\)$', i[2])) for i in r] r = [(dict(zip(i[0], i[1])), i[3][0] if len(i[2]) > 0 else i[2]) for i in r] for links, lang in r: for link, host in links.iteritems(): sources.append({ 'source': host, 'quality': 'SD', 'language': 'fr', 'info': lang, 'url': link, 'direct': False, 'debridonly': False }) return sources except: return sources
def sources(self, url, hostDict, hostprDict): self.sources = [] try: if url is None: return self.sources if debrid.status() is False: return self.sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) self.title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] self.title = self.title.replace('&', 'and').replace( 'Special Victims Unit', 'SVU') self.hdlr = 'S%02dE%02d' % ( int(data['season']), int(data['episode']) ) if 'tvshowtitle' in data else data['year'] self.year = data['year'] query = '%s %s' % (self.title, self.hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) try: r = client.request(url) links = client.parseDOM(r, "td", attrs={"nowrap": "nowrap"}) threads = [] for link in links: threads.append(workers.Thread(self.get_sources, link)) [i.start() for i in threads] [i.join() for i in threads] return self.sources except: source_utils.scraper_error('ETTV') return self.sources except: source_utils.scraper_error('ETTV') return self.sources
def sources(self, url, hostDict, hostprDict): sources = [] try: if not url: return sources r = client.request(urlparse.urljoin(self.base_link, url)) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'hosterSiteVideo'}) r = dom_parser.parse_dom( r, 'li', attrs={'data-lang-key': re.compile('[1|3]')}) r = [(dom_parser.parse_dom(i, 'a', req='href'), dom_parser.parse_dom(i, 'h4'), 'subbed' if i.attrs['data-lang-key'] == '3' else '') for i in r] r = [(i[0][0].attrs['href'], i[1][0].content.lower(), i[2]) for i in r if len(i[0]) > 0 and len(i[1]) > 0] r = [(i[0], i[1], re.findall('(.+?)\s*<br\s*/?>(.+?)$', i[1], re.DOTALL), i[2]) for i in r] r = [(i[0], i[2][0][0] if len(i[2]) > 0 else i[1], i[2][0][1] if len(i[2]) > 0 else '', i[3]) for i in r] r = [(i[0], i[1], 'HD' if 'hosterhdvideo' in i[2] else 'SD', i[3]) for i in r] for link, host, quality, info in r: valid, host = source_utils.is_host_valid(host, hostDict) if not valid: continue sources.append({ 'source': host, 'quality': quality, 'language': 'de', 'url': link, 'info': info, 'direct': False, 'debridonly': False }) return sources except: return sources
def _get_items(self, url): try: headers = {'User-Agent': client.agent()} r = client.request(url, headers=headers) posts = client.parseDOM(r, 'tr', attrs={'id': 'torrent_latest_torrents'}) for post in posts: data = client.parseDOM(post, 'a', attrs={'title': 'Torrent magnet link'}, ret='href')[0] link = urllib.unquote(data).decode('utf8').replace( 'https://mylink.me.uk/?url=', '') name = urllib.unquote_plus( re.search('dn=([^&]+)', link).groups()[0]) t = name.split(self.hdlr)[0] if not cleantitle.get(re.sub('(|)', '', t)) == cleantitle.get( self.title): continue try: y = re.findall( '[\.|\(|\[|\s|\_|\-](S\d+E\d+|S\d+)[\.|\)|\]|\s|\_|\-]', name, re.I)[-1].upper() except BaseException: y = re.findall( '[\.|\(|\[|\s\_|\-](\d{4})[\.|\)|\]|\s\_|\-]', name, re.I)[-1].upper() if not y == self.hdlr: continue try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] div = 1 if size.endswith('GB') else 1024 size = float( re.sub('[^0-9|/.|/,]', '', size.replace(',', '.'))) / div size = '%.2f GB' % size except BaseException: size = '0' self.items.append((name, link, size)) return self.items except BaseException: return self.items
def do_search(self, search_string, title, localtitle, year, search_type): r = client.request("http://ekino-tv.pl/se/search?q=%s" % str.lower(search_string + " HD").replace(" ", "+")) r = client.parseDOM(r, 'div', attrs={'class': 'movies-list-item'}) r = [x.encode('utf-8') for x in r] local_simple = cleantitle.get(localtitle) title_simple = cleantitle.get(title) for row in r: row = client.parseDOM(row, 'div', attrs={'class': 'opis-list'})[0] title_found = client.parseDOM(row, 'div', attrs={'class': 'title'})[0] link = client.parseDOM(title_found, 'a', ret='href')[0] if not search_type in link: continue local_found = client.parseDOM( str(title_found).replace("Å ", "ń"), 'a')[0] local_found = local_found.replace(' ', '') local_found = local_found.replace('ENG', '') local_found = local_found.replace('CAM', '') local_found = local_found.replace('HD', '') local_found = local_found.replace('-', '') local_found = local_found.replace(' ', '') title_found = client.parseDOM(title_found, 'a', attrs={'class': 'blue'}) if not title_found or not title_found[0]: title_found = local_found else: title_found = title_found[0] local_found = local_found.replace(' ', '') title_found = title_found.replace(' ', '') year_found = client.parseDOM(row, 'p', attrs={'class': 'cates'}) if year_found: year_found = year_found[0][:4] title_match = cleantitle.get( local_found) == local_simple or cleantitle.get( title_found) == title_simple year_match = (not year_found) or year == year_found if title_match and year_match: return link
def sources(self, url, hostDict, hostprDict): self.sources = [] try: if url is None: return self.sources if debrid.status() is False: return self.sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) self.title = data['tvshowtitle'] if 'tvshowtitle' in data else data['title'] self.title = self.title.replace('&', 'and').replace('Special Victims Unit', 'SVU') self.hdlr = 'S%02dE%02d' % (int(data['season']), int(data['episode'])) if 'tvshowtitle' in data else data['year'] self.year = data['year'] query = '%s %s' % (self.title, self.hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) url = self.search_link % urllib.quote_plus(query) url = urlparse.urljoin(self.base_link, url) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) try: r = client.request(url) div = client.parseDOM(r, 'div', attrs={'class': 'panel panel-default'})[0] table = client.parseDOM(div, 'table', attrs={'class': 'table table-striped table-bordered table-hover table-condensed'})[0] links = re.findall('<a href="(.+?)">', table, re.DOTALL) # log_utils.log('links = %s' % links, log_utils.LOGDEBUG) threads = [] for link in links: threads.append(workers.Thread(self.get_sources, link)) [i.start() for i in threads] [i.join() for i in threads] return self.sources except: source_utils.scraper_error('TORLOCK') return self.sources except: source_utils.scraper_error('TORLOCK') return self.sources
def sources(self, url, hostDict, hostprDict): try: sources = [] r = client.request(url) match = re.compile( '<iframe class="metaframe rptss" src="(.+?)"').findall(r) for url in match: sources.append({ 'source': 'Openload', 'quality': 'HD', 'language': 'en', 'url': url, 'direct': False, 'debridonly': False }) except Exception: return return sources
def movie(self, imdb, title, localtitle, aliases, year): try: mtitle = cleantitle.geturl(title).replace('-', '+') url = self.base_link + self.search_link % mtitle mSearchPage = client.request(url) section = client.parseDOM(mSearchPage, "div", attrs={"class": "title"}) for item in section: results = re.compile( '<a href="(.+?)">(.+?)</a>.+?<span class="year">(.+?)</span>' ).findall(item) for url, mName, mYear in results: if cleantitle.get(title) in cleantitle.get(mName): if year in str(mYear): return url except: return
def __search(self, titles): try: query = self.search_link % urllib.quote_plus( cleantitle.query(titles[0])) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] r = client.request(query) r = dom_parser.parse_dom(r, 'div', attrs={'id': 'container-search'}) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'movie-cat'}) r = dom_parser.parse_dom(r, 'h4', attrs={'class': 'title'}) r = dom_parser.parse_dom(r, 'a', req=['title', 'href']) r = [(i.attrs['href'], i.attrs['title']) for i in r] r = [(i[0]) for i in r if cleantitle.get(i[1]) in t][0] return source_utils.strip_domain(r) except: return
def movie(self, imdb, title, localtitle, aliases, year): try: query = self.search_link % urllib.quote_plus(title) query = urlparse.urljoin(self.base_link, query.lower()) print query result = client.request(query, referer=self.base_link) result = client.parseDOM(result, 'div', attrs={'class': 'index_item.+?'}) result = [(dom.parse_dom(i, 'a', req=['href', 'title'])[0]) for i in result if i] result = [(i.attrs['href']) for i in result if cleantitle.get(title) == cleantitle.get( re.sub('(\.|\(|\[|\s)(\d{4}|S\d+E\d+|S\d+|3D)(\.|\)|\]|\s|)(.+|)', '', i.attrs['title'], flags=re.I))][0] url = client.replaceHTMLCodes(result) url = url.encode('utf-8') return url except Exception: return
def __search(self, titles, year): try: query = self.search_link % (urllib.quote_plus(cleantitle.getsearch(titles[0] + ' ' + year))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i][0] r = client.request(query) r = client.parseDOM(r, 'div', attrs={'class': 'bd'}) for i in r: r = dom_parser.parse_dom(i, 'h3') r = dom_parser.parse_dom(r, 'a') title = r[0][1] y = re.findall('(\d{4})', title, re.DOTALL)[0] title = cleantitle.get(title.split('(')[0]) if title in t and year == y: return source_utils.strip_domain(r[0][0]['href']) return except: return
def sources(self, url, hostDict, hostprDict): try: sources = [] if url is None: return sources result = client.request(url) box_result = client.parseDOM(result, 'li', attrs={'class': 'elemento'}) if (len(box_result) != 0): sources = self.get_links_from_box(box_result) sources += self.get_from_main_player(result, sources) return sources except: return sources
def __search(self, titles, year): try: t = [cleantitle.get(i) for i in set(titles) if i] y = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0'] r = client.request(urlparse.urljoin(self.base_link, self.search_link), post={'query': cleantitle.query(titles[0])}) r = dom_parser.parse_dom(r, 'li', attrs={'class': 'entTd'}) r = dom_parser.parse_dom(r, 'div', attrs={'class': 've-screen'}, req='title') r = [(dom_parser.parse_dom(i, 'a', req='href'), i.attrs['title'].split(' - ')[0]) for i in r] r = [(i[0][0].attrs['href'], i[1], re.findall('(.+?) \(*(\d{4})', i[1])) for i in r] r = [(i[0], i[2][0][0] if len(i[2]) > 0 else i[1], i[2][0][1] if len(i[2]) > 0 else '0') for i in r] r = sorted(r, key=lambda i: int(i[2]), reverse=True) # with year > no year r = [i[0] for i in r if cleantitle.get(i[1]) in t and i[2] in y][0] return source_utils.strip_domain(r) except: return
def movie(self, imdb, title, localtitle, aliases, year): try: t = cleantitle.geturl(title).replace('-', '+').replace('++', '+') self.title = t url = self.base_link + self.search_link % (t, year) r = client.request(url) u = client.parseDOM(r, "div", attrs={"class": "col-md-2 col-sm-2 mrgb"}) for i in u: link = re.compile('<a href="(.+?)"').findall(i) for url in link: if not cleantitle.get(title) in cleantitle.get(url): continue return url except: source_utils.scraper_error('HDMTO') return
def __search(self, imdb): try: l = ['1', '15'] r = client.request(urlparse.urljoin(self.base_link, self.search_link % imdb)) r = dom_parser.parse_dom(r, 'table', attrs={'id': 'RsltTableStatic'}) r = dom_parser.parse_dom(r, 'tr') r = [(dom_parser.parse_dom(i, 'a', req='href'), dom_parser.parse_dom(i, 'img', attrs={'alt': 'language'}, req='src')) for i in r] r = [(i[0][0].attrs['href'], i[0][0].content, i[1][0].attrs['src']) for i in r if i[0] and i[1]] r = [(i[0], i[1], re.findall('.+?(\d+)\.', i[2])) for i in r] r = [(i[0], i[1], i[2][0] if len(i[2]) > 0 else '0') for i in r] r = sorted(r, key=lambda i: int(i[2])) # german > german/subbed r = [i[0] for i in r if i[2] in l][0] return source_utils.strip_domain(r) except: return
def __search(self, titles): try: query = self.search_link % (quote_plus(quote_plus(cleantitle.query(titles[0])))) query = urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] r = client.request(query) r = dom_parser.parse_dom(r, 'ul', attrs={'class': 'coverBox'}) r = dom_parser.parse_dom(r, 'li') r = dom_parser.parse_dom(r, 'span', attrs={'class': 'name'}) r = dom_parser.parse_dom(r, 'a') title = r[0][1] title = cleantitle.get(title) if title in t: return source_utils.strip_domain(r[0][0]['href']) else: return except: return
def _get_items(self, url): items = [] try: headers = {'User-Agent': client.agent()} r = client.request(url, headers=headers) posts = client.parseDOM(r, 'tr', attrs={'class': 't-row'}) posts = [i for i in posts if not 'racker:' in i] for post in posts: data = client.parseDOM(post, 'a', ret='href') url = [i for i in data if 'magnet:' in i][0] name = client.parseDOM(post, 'a', ret='title')[0] t = name.split(self.hdlr)[0] if not cleantitle.get(re.sub('(|)', '', t)) == cleantitle.get( self.title): continue try: y = re.findall( '[\.|\(|\[|\s|\_|\-](S\d+E\d+|S\d+)[\.|\)|\]|\s|\_|\-]', name, re.I)[-1].upper() except BaseException: y = re.findall( '[\.|\(|\[|\s\_|\-](\d{4})[\.|\)|\]|\s\_|\-]', name, re.I)[-1].upper() if not y == self.hdlr: continue try: size = re.findall( '((?:\d+\,\d+\.\d+|\d+\.\d+|\d+\,\d+|\d+)\s*(?:GiB|MiB|GB|MB))', post)[0] div = 1 if size.endswith('GB') else 1024 size = float( re.sub('[^0-9|/.|/,]', '', size.replace(',', '.'))) / div size = '%.2f GB' % size except BaseException: size = '0' items.append((name, url, size)) return items except BaseException: return items
def sources(self, url, hostDict, hostprDict): sources = [] try: if url is None: return sources result = client.request(url) result = client.parseDOM(result, 'div', attrs={'id': 'downloads'})[0] rows = client.parseDOM(result, 'tr') for row in rows: try: cols = client.parseDOM(row, 'td') host = client.parseDOM(cols, 'img', ret='src')[0] host = host.rpartition('=')[-1] link = client.parseDOM(cols, 'a', ret='href')[0] valid, host = source_utils.is_host_valid(host, hostDict) if not valid: continue q = 'SD' if 'Wysoka' in cols[2]: q = 'HD' lang, info = self.get_lang_by_type(cols[3]) sources.append({ 'source': host, 'quality': q, 'language': lang, 'url': link, 'info': info, 'direct': False, 'debridonly': False }) except: pass return sources except: return sources
def sources(self, url, hostDict, hostprDict): self._sources = [] try: if url is None: return self._sources if debrid.status() is False: return self._sources data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) self.title = data[ 'tvshowtitle'] if 'tvshowtitle' in data else data['title'] self.title = self.title.replace('&', 'and').replace( 'Special Victims Unit', 'SVU') self.hdlr = 'S%02dE%02d' % ( int(data['season']), int(data['episode']) ) if 'tvshowtitle' in data else data['year'] self.year = data['year'] query = '%s %s' % (self.title, self.hdlr) query = re.sub('(\\\|/| -|:|;|\*|\?|"|\'|<|>|\|)', '', query) if 'tvshowtitle' in data: url = self.search.format('8', urllib.quote(query)) else: url = self.search.format('4', urllib.quote(query)) # log_utils.log('url = %s' % url, log_utils.LOGDEBUG) headers = {'User-Agent': client.agent()} _html = client.request(url, headers=headers) threads = [] for i in re.findall(r'<item>(.+?)</item>', _html, re.DOTALL): threads.append(workers.Thread(self._get_items, i)) [i.start() for i in threads] [i.join() for i in threads] return self._sources except: source_utils.scraper_error('TORRENTDOWNLOADS') return self._sources
def episode(self, url, imdb, tvdb, title, premiered, season, episode): try: if not url: return s = 'staffel-%s-episode-%s' % (season, episode) s = '(?<=<a class=\"episode-name\" href=\")(.*?)(?='+s+')(.*?)(?=\")' url = '/serien' + re.sub('\.\w+$', '', url) url = urlparse.urljoin(self.base_link, url) r = client.request(url, mobile=True) p = dom_parser.parse_dom(r, 'div', attrs={'id': 'seasonss'}) url = re.search(s, p[0][1]).group() return url except: return