def search(self, title, localtitle, year, is_movie_search): try: titles = [] titles.append(cleantitle.normalize(cleantitle.getsearch(title))) titles.append(cleantitle.normalize(cleantitle.getsearch(localtitle))) for title in titles: url = self.search_link + str(title) result = self.session.get(url).content result = result.decode('utf-8') h = HTMLParser() result = h.unescape(result) result = client.parseDOM(result, 'div', attrs={'class': 'col-sm-4'}) for item in result: try: link = str(client.parseDOM(item, 'a', ret='href')[0]) if link.startswith('//'): link = "https:" + link nazwa = str(client.parseDOM(item, 'a', ret='title')[0]) name = cleantitle.normalize(cleantitle.getsearch(nazwa)) name = name.replace(" ", " ") title = title.replace(" ", " ") words = title.split(" ") if self.contains_all_words(name, words) and str(year) in link: return link except: continue except: return
def search(self, title, localtitle, year, is_movie_search): try: titles = [] titles.append(cleantitle.normalize(cleantitle.getsearch(title))) titles.append(cleantitle.normalize(cleantitle.getsearch(localtitle))) for title in titles: url = urljoin(self.base_link, self.search_link) url = url % quote(str(title).replace(" ", "_")) result = client.request(url) result = client.parseDOM(result, 'div', attrs={'class': 'video-clip-wrapper'}) linki = [] for item in result: try: link = str(client.parseDOM(item, 'a', ret='href')[0]) nazwa = str(client.parseDOM(item, 'a', attrs={'class': 'link-title-visit'})[0]) name = cleantitle.normalize(cleantitle.getsearch(nazwa)) name = name.replace(" ", " ") title = title.replace(" ", " ") words = title.split(" ") if self.contains_all_words(name, words) and str(year) in name: linki.append(link) except: continue return linki except: return
def search(self, title, localtitle, year, search_type): try: titles = [] titles.append(cleantitle.normalize(cleantitle.getsearch(title))) titles.append(cleantitle.normalize(cleantitle.getsearch(localtitle))) cookies = client.request(self.base_link, output='cookie') cache.cache_insert('alltube_cookie', cookies) for title in titles: r = client.request(urljoin(self.base_link, self.search_link), post={'search': cleantitle.query(title)}, headers={'Cookie': cookies}) r = self.get_rows(r, search_type) for row in r: url = client.parseDOM(row, 'a', ret='href')[0] names_found = client.parseDOM(row, 'h3')[0] if names_found.startswith('Zwiastun') and not title.startswith('Zwiastun'): continue names_found = names_found.encode('utf-8').split('/') names_found = [cleantitle.normalize(cleantitle.getsearch(i)) for i in names_found] for name in names_found: name = name.replace(" ", " ") title = title.replace(" ", " ") words = title.split(" ") found_year = self.try_read_year(url) if self.contains_all_words(name, words) and (not found_year or found_year == year): return url else: continue continue except: return
def search(self, title, localtitle, year): try: titles = [] title2 = title.split('.')[0] localtitle2 = localtitle.split('.')[0] titles.append(cleantitle.normalize(cleantitle.getsearch(title2))) titles.append( cleantitle.normalize(cleantitle.getsearch(localtitle2))) titles.append(title2) titles.append(localtitle2) for title in titles: title = title.replace(" ", "+") result = client.request(self.search_link % title) result = client.parseDOM(result, 'div', attrs={'class': 'col-xs-4'}) for item in result: try: rok = client.parseDOM(item, 'div', attrs={'class': 'col-sm-8'}) rok_nazwa = client.parseDOM(rok, 'p')[0].lower() link = client.parseDOM(item, 'a', ret='href')[0] link = self.base_link + link words = title.lower().split(" ") if self.contains_all_words( rok_nazwa, words) and year in rok_nazwa: return link except: continue return except: return
def search(self, title, localtitle, year, is_movie_search): try: titles = [] titles.append(cleantitle.normalize(cleantitle.getsearch(title))) titles.append( cleantitle.normalize(cleantitle.getsearch(localtitle))) titles.append(title) titles.append(localtitle) for title in titles: try: url = self.search_link + str(title) result = self.session.get(url).content result = result.decode('utf-8') h = HTMLParser() result = h.unescape(result) result = client.parseDOM(result, 'div', attrs={'class': 'card-body p-2'}) for item in result: try: nazwa = re.findall("""Film online: (.*?)\"""", item)[0] try: nazwa = re.findall(""">(.*?)<""", nazwa)[0] except: pass name = cleantitle.normalize( cleantitle.getsearch(nazwa)) rok = re.findall( """Rok wydania filmu online\".*>(.*?)<""", item)[0] item = str(item).replace( "<span style='color:red'>", "").replace("</span>", "") link = re.findall("""href=\"(.*?)\"""", item)[0] if link.startswith('//'): link = "https:" + link name = name.replace(" ", " ") title = title.replace(" ", " ") words = name.split(" ") if self.contains_all_words( title, words) and str(year) in rok: return link except: continue except: continue except: return
def search_ep(self, titles, season, episode, year): try: searchtitles = titles for searchtitle in searchtitles: response = requests.get(self.base_link + self.search_serial % searchtitle) result = response.content h = HTMLParser() result = h.unescape(result) result = client.parseDOM(result, 'ul', attrs={'class': 'resultsList hits'}) items = client.parseDOM(result, 'li') items = [x for x in items if not str(x).startswith("<a href")] orgtitles = [] for content in items: try: orgtitle = str( client.parseDOM( content, 'div', attrs={'class': 'filmPreview__originalTitle'})[0]) except: orgtitle = "0" pass orgtitles.append(orgtitle) ids = client.parseDOM(items, 'data', ret='data-id') titles = client.parseDOM(result, 'data', ret='data-title') years = client.parseDOM(result, 'span', attrs={'class': 'filmPreview__year'}) for item in zip(titles, ids, years, orgtitles): f_title = str(item[0]) f_id = str(item[1]) f_year = str(item[2]) f_orgtitle = str(item[3]) teststring = cleantitle.normalize( cleantitle.getsearch(searchtitle)) words = cleantitle.normalize( cleantitle.getsearch(f_title)).split(" ") if self.contains_all_wors(teststring, words) and year == f_year: return (f_title, f_id, f_year, f_orgtitle, "SERIAL", season, episode) except: return
def searchMovie(self, title, year, aliases, headers): try: title = cleantitle.normalize(title) url = urljoin(self.base_link, self.search_link % cleantitle.geturl(title)) r = self.scraper.get(url).content r = client.parseDOM(r, 'div', attrs={'class': 'ml-item'}) r = zip(client.parseDOM(r, 'a', ret='href'), client.parseDOM(r, 'a', ret='oldtitle')) results = [(i[0], i[1], re.findall('\((\d{4})', i[1])) for i in r] try: r = [(i[0], i[1], i[2][0]) for i in results if len(i[2]) > 0] url = [ i[0] for i in r if self.matchAlias(i[1], aliases) and (year == i[2]) ][0] except: url = None pass if url is None: try: url = [ i[0] for i in results if self.matchAlias(i[1], aliases) ][0] except: return url = urljoin(self.base_link, '%s/watching.html' % url) return url except: source_utils.scraper_error('SERIES9') return
def searchMovie(self, title, year, aliases, headers): try: title = cleantitle.normalize(title) url = urlparse.urljoin( self.base_link, self.search_link % urllib.quote_plus(cleantitle.getsearch(title))) s = cfscrape.create_scraper() r = s.get(url).content r = client.parseDOM(r, 'li', attrs={'class': 'movie-item'}) r = zip(client.parseDOM(r, 'a', ret='href'), client.parseDOM(r, 'a', ret='title')) results = [(i[0], i[1]) for i in r] try: r = [(i[0], i[1]) for i in results] url = [i[0] for i in r if self.matchAlias(i[1], aliases)][0] except BaseException: url = None pass if (url is None): url = [ i[0] for i in results if self.matchAlias(i[1], aliases) ][0] return url except BaseException: return
def searchMovie(self, title, year, aliases, headers): try: title = cleantitle.normalize(title) url = urlparse.urljoin(self.base_link, self.search_link % cleantitle.geturl(title)) r = client.request(url, headers=headers, timeout='10') r = client.parseDOM(r, 'div', attrs={'class': 'ml-item'}) r = zip(client.parseDOM(r, 'a', ret='href'), client.parseDOM(r, 'a', ret='oldtitle')) results = [(i[0], i[1], re.findall('\((\d{4})', i[1])) for i in r] try: r = [(i[0], i[1], i[2][0]) for i in results if len(i[2]) > 0] url = [ i[0] for i in r if self.matchAlias(i[1], aliases) and (year == i[2]) ][0] except: url = None pass if (url == None): url = [ i[0] for i in results if self.matchAlias(i[1], aliases) ][0] url = urlparse.urljoin(self.base_link, '%s/watching.html' % url) return url except: return
def search_ep(self, titles, season, episode, year): try: query = 'S{:02d}E{:02d}'.format(int(season), int(episode)) for title in titles: url = self.search_link + str(title) result = self.session.get(url).content result = result.decode('utf-8') h = HTMLParser() result = h.unescape(result) result = client.parseDOM(result, 'div', attrs={'class': 'card-body p-2'}) for item in result: nazwa = re.findall("""Film online: (.*?)\"""", item)[0] name = cleantitle.normalize(cleantitle.getsearch(nazwa)) rok = re.findall("""Rok wydania filmu online\".*>(.*?)<""", item)[0] item = str(item).replace("<span style='color:red'>", "").replace("</span>", "") link = re.findall("""href=\"(.*?)\"""", item)[0] if link.startswith('//'): link = "https:" + link name = name.replace(" ", " ") title = title.replace(" ", " ") words = title.split(" ") if self.contains_all_words(name, words) and str(year) in rok: content = requests.get(link.replace('filmy', 'seriale')).content content = client.parseDOM(content, 'div', attrs={'class': 'tabela_wiersz mb-1'}) for odcinek in content: if query.lower() in odcinek.lower(): link = str(client.parseDOM(odcinek, 'a', ret='href')[0]) return self.base_link + link except: return
def searchMovie(self, title, year, aliases, headers): try: title = cleantitle.normalize(title) url = urlparse.urljoin( self.base_link, self.search_link % urllib.quote_plus(cleantitle.getsearch(title))) r = self.scraper.get(url).content r = client.parseDOM(r, 'div', attrs={'class': 'ml-item'}) r = zip(client.parseDOM(r, 'a', ret='href'), client.parseDOM(r, 'a', ret='title')) results = [(i[0], i[1], re.findall('\((\d{4})', i[1])) for i in r] try: r = [(i[0], i[1], i[2][0]) for i in results if len(i[2]) > 0] url = [ i[0] for i in r if self.matchAlias(i[1], aliases) and (year == i[2]) ][0] except Exception: url = None pass if (url is None): url = [ i[0] for i in results if self.matchAlias(i[1], aliases) ][0] return url except Exception: return
def tvshow(self, imdb, tvdb, tvshowtitle, localtvshowtitle, aliases, year): try: query = self.search_link % urllib.quote_plus( cleantitle.query(tvshowtitle)) # req page 3 times to workaround their BS random 404's # responses (legit & BS 404s) are actually very fast: timeout prob not important for i in range(4): result = client.request(query, timeout=3) if not result == None: break t = [tvshowtitle] + source_utils.aliases_to_array(aliases) t = [cleantitle.get(i) for i in set(t) if i] result = re.compile( 'itemprop="url"\s+href="([^"]+).*?itemprop="name"\s+class="serie-title">([^<]+)', re.DOTALL).findall(result) for i in result: if cleantitle.get(cleantitle.normalize( i[1])) in t and year in i[1]: url = i[0] url = url.encode('utf-8') #log_utils.log('\n\n~~~ outgoing tvshow() url') #log_utils.log(url) # returned 'url' format like: /serie/x_files return url except: return
def tvshow(self, imdb, tvdb, tvshowtitle, localtvshowtitle, aliases, year): try: query = self.search_link % urllib.quote_plus( cleantitle.query(tvshowtitle)) for i in range(3): result = self.scraper.get(query).content if not result is None: break t = [tvshowtitle] + source_utils.aliases_to_array(aliases) t = [cleantitle.get(i) for i in set(t) if i] items = dom_parser.parse_dom(result, 'div', attrs={'class': 'result'}) url = None for i in items: result = re.findall(r'href="([^"]+)">(.*)<', i.content) if re.sub('<[^<]+?>', '', cleantitle.get(cleantitle.normalize(result[0][1]))) in t and year in \ result[0][1]: url = result[0][0] if not url is None: break url = url.encode('utf-8') return url except: return
def _query(self, url): content_type = 'episode' if 'tvshowtitle' in url else 'movie' if content_type == 'movie': title = cleantitle.normalize(url.get('title')) year = int(url.get('year')) years = '%s,%s,%s' % (str(year - 1), year, str(year + 1)) query = '"%s" %s' % (title, years) else: title = cleantitle.normalize(url.get('tvshowtitle')) season = int(url.get('season')) episode = int(url.get('episode')) query = '%s S%02dE%02d' % (title, season, episode) return query
def searchShow(self, title, season): title = cleantitle.normalize(title) search = '%s Season %01d' % (title, int(season)) url = self.search_link % cleantitle.geturl(search) r = self.scraper.get(url, params={'link_web': self.base_link}).content r = client.parseDOM(r, 'div', attrs={'class': 'ml-item'}) r = zip(client.parseDOM(r, 'a', ret='href'), client.parseDOM(r, 'a', ret='title')) r = [(i[0], i[1], re.findall('(.*?)\s+-\s+Season\s+(\d)', i[1])) for i in r] r = [(i[0], i[1], i[2][0]) for i in r if len(i[2]) > 0] url = [i[0] for i in r if cleantitle.get(i[2][0]) == cleantitle.get(title) and i[2][1] == season][0] url = urlparse.urljoin(self.base_link, '%s/watching.html' % url) return url
def searchShow(self, title, season, aliases, headers): try: title = cleantitle.normalize(title) search = '%s Season %01d' % (title, int(season)) url = urlparse.urljoin(self.base_link, self.search_link % cleantitle.geturl(search)) r = client.request(url, headers=headers, timeout='10') r = client.parseDOM(r, 'div', attrs={'class': 'ml-item'}) r = zip(client.parseDOM(r, 'a', ret='href'), client.parseDOM(r, 'a', ret='title')) r = [(i[0], i[1], re.findall('(.*?)\s+-\s+Season\s+(\d)', i[1])) for i in r] r = [(i[0], i[1], i[2][0]) for i in r if len(i[2]) > 0] url = [i[0] for i in r if self.matchAlias(i[2][0], aliases) and i[2][1] == season][0] url = urlparse.urljoin(self.base_link, '%s/watching.html' % url) return url except: return
def tvshow(self, imdb, tvdb, tvshowtitle, localtvshowtitle, aliases, year): try: query = self.search_link % urllib.quote_plus(cleantitle.query(tvshowtitle)) result = client.request(query) t = [tvshowtitle] + source_utils.aliases_to_array(aliases) t = [cleantitle.get(i) for i in set(t) if i] result = re.compile('itemprop="url"\s+href="([^"]+).*?itemprop="name"\s+class="serie-title">([^<]+)', re.DOTALL).findall(result) for i in result: if cleantitle.get(cleantitle.normalize(i[1])) in t and year in i[1]: url = i[0] url = url.encode('utf-8') return url except: return
def searchShow(self, title, season, aliases, headers): try: title = cleantitle.normalize(title) search = '%s Season %01d' % (title, int(season)) url = urlparse.urljoin(self.base_link, self.search_link % urllib.quote_plus(cleantitle.getsearch(search))) log_utils.log('shit Returned: %s' % str(url), log_utils.LOGNOTICE) r = self.scraper.get(url).content r = client.parseDOM(r, 'div', attrs={'class': 'ml-item'}) r = zip(client.parseDOM(r, 'a', ret='href'), client.parseDOM(r, 'a', ret='title')) r = [(i[0], i[1], re.findall('(.*?)\s+-\s+Season\s+(\d)', i[1])) for i in r] r = [(i[0], i[1], i[2][0]) for i in r if len(i[2]) > 0] url = [i[0] for i in r if self.matchAlias(i[2][0], aliases) and i[2][1] == season][0] return url except Exception: return
def searchMovie(self, title, year, aliases, headers): try: title = cleantitle.normalize(title) url = urlparse.urljoin( self.base_link, self.search_link % urllib.quote_plus(cleantitle.getsearch(title))) r = self.scraper.get(url, headers=headers).content r = client.parseDOM(r, 'div', attrs={'class': 'ml-item'}) r = zip(client.parseDOM(r, 'a', ret='href'), client.parseDOM(r, 'a', ret='title')) r = [(i[0], i[1], re.findall('(\d+)', i[0])[0]) for i in r] results = [] for i in r: try: info = client.request(urlparse.urljoin( self.base_link, self.info_link % i[2]), headers=headers, timeout='15') y = re.findall('<div\s+class="jt-info">(\d{4})', info)[0] if self.matchAlias(i[1], aliases) and (year == y): url = i[0] break # results.append([i[0], i[1], re.findall('<div\s+class="jt-info">(\d{4})', info)[0]]) except: url = None pass # try: # r = [(i[0], i[1], i[2][0]) for i in results if len(i[2]) > 0] # url = [i[0] for i in r if self.matchAlias(i[1], aliases) and (year == i[2])][0] # except: # url = None # pass if (url == None): url = [ i[0] for i in results if self.matchAlias(i[1], aliases) ][0] return url except: return
def searchMovie(self, title, year): title = cleantitle.normalize(title) url = self.search_link % cleantitle.geturl(title) r = self.scraper.get(url, params={'link_web': self.base_link}).content r = client.parseDOM(r, 'div', attrs={'class': 'ml-item'}) r = zip(client.parseDOM(r, 'a', ret='href'), client.parseDOM(r, 'a', ret='title')) results = [(i[0], i[1], re.findall('\((\d{4})', i[1])) for i in r] try: r = [(i[0], i[1], i[2][0]) for i in results if len(i[2]) > 0] url = [i[0] for i in r if cleantitle.get(i[1]).endswith(cleantitle.get(title)) and (year == i[2])][0] except: url = None pass try: if url is None: url = [i[0] for i in results if cleantitle.get(i[1]).endswith(cleantitle.get(title))][0] except: url = None pass return url
def searchShow(self, title, season, aliases, headers): try: title = cleantitle.normalize(title) search = '%s Season %01d' % (title, int(season)) url = urlparse.urljoin( self.base_link, self.search_link % urllib.quote_plus(cleantitle.getsearch(search))) s = cfscrape.create_scraper() r = s.get(url).content r = client.parseDOM(r, 'li', attrs={'class': 'movie-item'}) r = zip(client.parseDOM(r, 'a', ret='href'), client.parseDOM(r, 'a', ret='title')) r = [(i[0], i[1], re.findall('(.*?)\s+-\s+Season\s+(\d)', i[1])) for i in r] r = [(i[0], i[1], i[2][0]) for i in r if len(i[2]) > 0] url = [ i[0] for i in r if self.matchAlias(i[2][0], aliases) and i[2][1] == season ][0] return url except BaseException: return
def sources(self, url, hostDict, hostprDict): api_key = self.get_api() if not api_key: return sources = [] try: content_type = 'episode' if 'tvshowtitle' in url else 'movie' match = 'extended' moderated = 'no' if content_type == 'episode' else 'yes' search_in = '' if content_type == 'movie': title = cleantitle.normalize(url.get('title')) year = url.get('year') query = '@name+%s+%s+@files+%s+%s' % (title, year, title, year) elif content_type == 'episode': title = cleantitle.normalize(url.get('tvshowtitle')) season = int(url['season']) episode = int(url['episode']) seasEpList = self._seas_ep_query_list(season, episode) query = '@name+%s+@files+%s+|+%s+|+%s+|+%s+|+%s' % (title, seasEpList[0], seasEpList[1], seasEpList[2], seasEpList[3], seasEpList[4]) s = requests.Session() link = self.base_link + self.search_link % \ (api_key, query, match, moderated, search_in) p = s.get(link) p = json.loads(p.text) if p['status'] != 'ok': return files = p['files'] for i in files: if i['is_ready'] == '1' and i['type'] == 'video': try: source = 'SINGLE' if int(i['files_num_video']) > 3: source = 'PACK [B](x%02d)[/B]' % int(i['files_num_video']) file_name = i['name'] file_id = i['id'] file_dl = i['url_dl'] size = float(i['size']) / 1073741824 if content_type == 'episode': url = json.dumps({'content': 'episode', 'file_id': file_id, 'season': season, 'episode': episode}) else: url = json.dumps({'content': 'movie', 'file_id': file_id, 'title': title, 'year': year}) quality = source_utils.get_release_quality(file_name, file_dl)[0] info = source_utils.getFileType(file_name) info = '%.2f GB | %s | %s' % (size, info, file_name.replace('.', ' ').upper()) sources.append({'source': source, 'quality': quality, 'language': "en", 'url': url, 'info': info, 'direct': True, 'debridonly': False}) except: pass else: continue return sources except: print("Unexpected error in Furk Script: source", sys.exc_info()[0]) exc_type, exc_obj, exc_tb = sys.exc_info() print(exc_type, exc_tb.tb_lineno) pass
def sources(self, url, hostDict, hostprDict): sources = [] is_anime = url[3] try: titles = [] titles.append(url[0]) titles.append(url[1]) try: year = url[2] except: year = '' for url_single in titles: url_single = cleantitle.normalize( cleantitle.getsearch(url_single)) words = url_single.split(' ') search_url = urlparse.urljoin( self.base_link, self.search_link) % (url_single + " " + year) cookies = client.request(self.base_link, output='cookie') verifyGet = client.request(self.verify, cookie=cookies) cookies = cookies + ";tmvh=" + self.crazy_cookie_hash( verifyGet) cache.cache_insert('szukajka_cookie', cookies) result = client.request(search_url, cookie=cookies) result = client.parseDOM(result, 'div', attrs={'class': 'element'}) for el in result: found_title = str( client.parseDOM( el, 'div', attrs={'class': 'title'})[0]).lower().replace( "_", " ").replace(".", " ").replace("-", " ") if is_anime: numbers = [ int(s) for s in found_title.split() if s.isdigit() ] if not int(words[-1]) in numbers: continue if ("zwiastun" or "trailer") in str(found_title).lower(): continue if len(words) >= 4 or is_anime: if not self.contains_all_words(found_title, words): continue else: if not self.contains_all_words( found_title, words) or year not in found_title: continue q = 'SD' if self.contains_word(found_title, '1080p') or self.contains_word( found_title, 'FHD'): q = '1080p' elif self.contains_word(found_title, '720p'): q = 'HD' link = client.parseDOM(el, 'a', attrs={'class': 'link'}, ret='href')[0] transl_type = client.parseDOM(el, 'span', attrs={'class': 'version'})[0] transl_type = transl_type.split(' ') transl_type = transl_type[-1] host = client.parseDOM(el, 'span', attrs={'class': 'host'})[0] host = host.split(' ') host = host[-1] lang, info = self.get_lang_by_type(transl_type) sources.append({ 'source': host, 'quality': q, 'language': lang, 'url': link, 'info': info, 'direct': False, 'debridonly': False }) continue return sources except Exception as e: print(str(e)) return sources