def search(self, title, localtitle, year): try: titles = [] title2 = title.split('.')[0] localtitle2 = localtitle.split('.')[0] titles.append(cleantitle.normalize(cleantitle.getsearch(title2))) titles.append( cleantitle.normalize(cleantitle.getsearch(localtitle2))) titles.append(title2) titles.append(localtitle2) for title in titles: title = title.replace(" ", "+") result = client.request(self.search_link % title) result = client.parseDOM(result, 'div', attrs={'class': 'col-xs-4'}) for item in result: try: rok = client.parseDOM(item, 'div', attrs={'class': 'col-sm-8'}) rok_nazwa = client.parseDOM(rok, 'p')[0].lower() link = client.parseDOM(item, 'a', ret='href')[0] link = self.base_link + link words = title.lower().split(" ") if self.contains_all_words( rok_nazwa, words) and year in rok_nazwa: return link except: continue return except: return
def search(self, title, localtitle, year, search_type): try: titles = [] titles.append(cleantitle.normalize(cleantitle.getsearch(title))) titles.append(cleantitle.normalize(cleantitle.getsearch(localtitle))) cookies = client.request(self.base_link, output='cookie') cache.cache_insert('alltube_cookie', cookies) for title in titles: r = client.request(urlparse.urljoin(self.base_link, self.search_link), post={'search': cleantitle.query(title)}, headers={'Cookie': cookies}) r = self.get_rows(r, search_type) for row in r: url = client.parseDOM(row, 'a', ret='href')[0] names_found = client.parseDOM(row, 'h3')[0] if names_found.startswith('Zwiastun') and not title.startswith('Zwiastun'): continue names_found = names_found.encode('utf-8').split('/') names_found = [cleantitle.normalize(cleantitle.getsearch(i)) for i in names_found] for name in names_found: name = name.replace(" ", " ") title = title.replace(" ", " ") words = title.split(" ") found_year = self.try_read_year(url) if self.contains_all_words(name, words) and (not found_year or found_year == year): return url else: continue continue except: return
def search_ep(self, title1, title2): try: titles = [] titles.append(cleantitle.normalize(cleantitle.getsearch(title1))) titles.append(cleantitle.normalize(cleantitle.getsearch(title2))) for title in titles: url = urlparse.urljoin(self.base_link, self.search_link_ep) url = url % urllib.quote(str(title).replace(" ", "_")) result = client.request(url) result = client.parseDOM(result, 'div', attrs={'class': 'video-clip-wrapper'}) linki = [] for item in result: try: link = str(client.parseDOM(item, 'a', ret='href')[0]) nazwa = str( client.parseDOM( item, 'a', attrs={'class': 'link-title-visit'})[0]) name = cleantitle.normalize( cleantitle.getsearch(nazwa)) name = name.replace(" ", " ") title = title.replace(" ", " ") words = title.split(" ") if self.contains_all_words(name, words): linki.append(link) except: continue return linki except: return
def search(self, title, localtitle, year, is_movie_search): try: titles = [] titles.append(cleantitle.normalize(cleantitle.getsearch(title))) titles.append( cleantitle.normalize(cleantitle.getsearch(localtitle))) for title in titles: url = self.search_link + str(title) result = self.session.get(url).content result = result.decode('utf-8') h = HTMLParser() result = h.unescape(result) result = client.parseDOM(result, 'div', attrs={'class': 'col-sm-4'}) for item in result: try: link = str(client.parseDOM(item, 'a', ret='href')[0]) if link.startswith('//'): link = "https:" + link nazwa = str(client.parseDOM(item, 'a', ret='title')[0]) name = cleantitle.normalize( cleantitle.getsearch(nazwa)) name = name.replace(" ", " ") title = title.replace(" ", " ") words = title.split(" ") if self.contains_all_words( name, words) and str(year) in link: return link except: continue except: return
def search(self, title, localtitle, year, is_movie_search): try: titles = [] titles.append(cleantitle.normalize(cleantitle.getsearch(title))) titles.append( cleantitle.normalize(cleantitle.getsearch(localtitle))) titles.append(title) titles.append(localtitle) for title in titles: try: url = self.search_link + str(title) result = self.session.get(url).content result = result.decode('utf-8') h = HTMLParser() result = h.unescape(result) result = client.parseDOM(result, 'div', attrs={'class': 'card-body p-2'}) for item in result: try: nazwa = re.findall("""Film online: (.*?)\"""", item)[0] try: nazwa = re.findall(""">(.*?)<""", nazwa)[0] except: pass name = cleantitle.normalize( cleantitle.getsearch(nazwa)) rok = re.findall( """Rok wydania filmu online\".*>(.*?)<""", item)[0] item = str(item).replace( "<span style='color:red'>", "").replace("</span>", "") link = re.findall("""href=\"(.*?)\"""", item)[0] if link.startswith('//'): link = "https:" + link name = name.replace(" ", " ") title = title.replace(" ", " ") words = name.split(" ") if self.contains_all_words( title, words) and str(year) in rok: return link except: continue except: continue except: return
def search_ep(self, titles, season, episode, year): try: searchtitles = titles for searchtitle in searchtitles: response = requests.get(self.base_link + self.search_serial % searchtitle) result = response.content h = HTMLParser() result = h.unescape(result) result = client.parseDOM(result, 'ul', attrs={'class': 'resultsList hits'}) items = client.parseDOM(result, 'li') items = [x for x in items if not str(x).startswith("<a href")] orgtitles = [] for content in items: try: orgtitle = str( client.parseDOM( content, 'div', attrs={'class': 'filmPreview__originalTitle'})[0]) except: orgtitle = "0" pass orgtitles.append(orgtitle) ids = client.parseDOM(items, 'data', ret='data-id') titles = client.parseDOM(result, 'data', ret='data-title') years = client.parseDOM(result, 'span', attrs={'class': 'filmPreview__year'}) for item in zip(titles, ids, years, orgtitles): f_title = str(item[0]) f_id = str(item[1]) f_year = str(item[2]) f_orgtitle = str(item[3]) teststring = cleantitle.normalize( cleantitle.getsearch(searchtitle)) words = cleantitle.normalize( cleantitle.getsearch(f_title)).split(" ") if self.contains_all_wors(teststring, words) and year == f_year: return (f_title, f_id, f_year, f_orgtitle, "SERIAL", season, episode) except: return
def searchMovie(self, title, year, aliases, headers): try: title = cleantitle.normalize(title) url = urlparse.urljoin(self.base_link, self.search_link % urllib.quote_plus(cleantitle.getsearch(title))) r = self.scraper.get(url, headers=headers).content r = client.parseDOM(r, 'div', attrs={'class': 'ml-item'}) r = zip(client.parseDOM(r, 'a', ret='href'), client.parseDOM(r, 'a', ret='title')) r = [(i[0], i[1], re.findall('(\d+)', i[0])[0]) for i in r] results = [] for i in r: try: info = client.request(urlparse.urljoin(self.base_link, self.info_link % i[2]), headers=headers, timeout='15') y = re.findall('<div\s+class="jt-info">(\d{4})', info)[0] if self.matchAlias(i[1], aliases) and (year == y): url = i[0] break # results.append([i[0], i[1], re.findall('<div\s+class="jt-info">(\d{4})', info)[0]]) except: url = None pass # try: # r = [(i[0], i[1], i[2][0]) for i in results if len(i[2]) > 0] # url = [i[0] for i in r if self.matchAlias(i[1], aliases) and (year == i[2])][0] # except: # url = None # pass if (url == None): url = [i[0] for i in results if self.matchAlias(i[1], aliases)][0] return url except: return
def searchMovie(self, title, year, aliases): try: title = cleantitle.normalize(title) url = urlparse.urljoin(self.base_link, self.search_link % cleantitle.geturl(title)) r = self.scraper.get(url).content r = client.parseDOM(r, 'div', attrs={'class': 'ml-item'}) r = zip(client.parseDOM(r, 'a', ret='href'), client.parseDOM(r, 'a', ret='title')) results = [(i[0], i[1], re.findall('\((\d{4})', i[1])) for i in r] try: r = [(i[0], i[1], i[2][0]) for i in results if len(i[2]) > 0] url = [ i[0] for i in r if self.matchAlias(i[1], aliases) and (year == i[2]) ][0] except: url = None pass if (url == None): url = [ i[0] for i in results if self.matchAlias(i[1], aliases) ][0] url = urlparse.urljoin(self.base_link, '%s/watching.html' % url) return url except: return
def tvshow(self, imdb, tvdb, tvshowtitle, localtvshowtitle, aliases, year): try: query = self.search_link % urllib.quote_plus( cleantitle.query(tvshowtitle)) for i in range(3): result = self.scraper.get(query).content if not result is None: break t = [tvshowtitle] + source_utils.aliases_to_array(aliases) t = [cleantitle.get(i) for i in set(t) if i] items = dom_parser.parse_dom(result, 'div', attrs={'class': 'result'}) url = None for i in items: result = re.findall(r'href="([^"]+)">(.*)<', i.content) if re.sub('<[^<]+?>', '', cleantitle.get(cleantitle.normalize(result[0][1]))) in t and year in \ result[0][1]: url = result[0][0] if not url is None: break url = url.encode('utf-8') return url except: return
def searchMovie(self, title, year): title = cleantitle.normalize(title) url = self.search_link % cleantitle.geturl(title) r = self.scraper.get(url, params={'link_web': self.base_link}).content r = client.parseDOM(r, 'div', attrs={'class': 'ml-item'}) r = zip(client.parseDOM(r, 'a', ret='href'), client.parseDOM(r, 'a', ret='title')) results = [(i[0], i[1], re.findall('\((\d{4})', i[1])) for i in r] try: r = [(i[0], i[1], i[2][0]) for i in results if len(i[2]) > 0] url = [ i[0] for i in r if cleantitle.get(i[1]) == cleantitle.get(title) and ( year == i[2]) ][0] except: url = None log_utils.log('series9 - Exception: \n' + str(traceback.format_exc())) pass if (url == None): url = [ i[0] for i in results if cleantitle.get(i[1]) == cleantitle.get(title) ][0] url = urlparse.urljoin(self.base_link, '%s/watching.html' % url) return url
def searchShow(self, title, season, aliases, headers): try: title = cleantitle.normalize(title) search = '%s Season %01d' % (title, int(season)) url = urlparse.urljoin(self.base_link, self.search_link % urllib.quote_plus(cleantitle.getsearch(search))) r = self.scraper.get(url, headers=headers).content r = client.parseDOM(r, 'div', attrs={'class': 'ml-item'}) r = zip(client.parseDOM(r, 'a', ret='href'), client.parseDOM(r, 'a', ret='title')) r = [(i[0], i[1], re.findall('(.*?)\s+-\s+Season\s+(\d)', i[1])) for i in r] r = [(i[0], i[1], i[2][0]) for i in r if len(i[2]) > 0] url = [i[0] for i in r if self.matchAlias(i[2][0], aliases) and i[2][1] == season][0] return url except: return
def searchShow(self, title, season, aliases): try: title = cleantitle.normalize(title) search = '%s Season %01d' % (title, int(season)) url = urlparse.urljoin(self.base_link, self.search_link % cleantitle.geturl(search)) r = client.request(url, timeout='10') r = client.parseDOM(r, 'div', attrs={'class': 'ml-item'}) r = zip(client.parseDOM(r, 'a', ret='href'), client.parseDOM(r, 'a', ret='title')) r = [(i[0], i[1], re.findall('(.*?)\s+-\s+Season\s+(\d)', i[1])) for i in r] r = [(i[0], i[1], i[2][0]) for i in r if len(i[2]) > 0] url = [i[0] for i in r if self.matchAlias(i[2][0], aliases) and i[2][1] == season][0] url = urlparse.urljoin(self.base_link, '%s/watching.html' % url) return url except: return
def searchShow(self, title, season): title = cleantitle.normalize(title) search = '%s Season %01d' % (title, int(season)) url = self.search_link % cleantitle.geturl(search) r = self.scraper.get(url, params={'link_web': self.base_link}).content r = client.parseDOM(r, 'div', attrs={'class': 'ml-item'}) r = zip(client.parseDOM(r, 'a', ret='href'), client.parseDOM(r, 'a', ret='title')) r = [(i[0], i[1], re.findall('(.*?)\s+-\s+Season\s+(\d)', i[1])) for i in r] r = [(i[0], i[1], i[2][0]) for i in r if len(i[2]) > 0] url = [ i[0] for i in r if cleantitle.get(i[2][0]) == cleantitle.get(title) and i[2][1] == season ][0] url = urlparse.urljoin(self.base_link, '%s/watching.html' % url) return url
def search_ep(self, titles, season, episode, year): try: query = 'S{:02d}E{:02d}'.format(int(season), int(episode)) for title in titles: url = self.search_link + str(title) result = self.session.get(url).content result = result.decode('utf-8') h = HTMLParser() result = h.unescape(result) result = client.parseDOM(result, 'div', attrs={'class': 'card-body p-2'}) for item in result: nazwa = re.findall("""Film online: (.*?)\"""", item)[0] name = cleantitle.normalize(cleantitle.getsearch(nazwa)) rok = re.findall("""Rok wydania filmu online\".*>(.*?)<""", item)[0] item = str(item).replace("<span style='color:red'>", "").replace("</span>", "") link = re.findall("""href=\"(.*?)\"""", item)[0] if link.startswith('//'): link = "https:" + link name = name.replace(" ", " ") title = title.replace(" ", " ") words = title.split(" ") if self.contains_all_words(name, words) and str(year) in rok: content = requests.get(link.replace( 'filmy', 'seriale')).content content = client.parseDOM( content, 'div', attrs={'class': 'tabela_wiersz mb-1'}) for odcinek in content: if query.lower() in odcinek.lower(): link = str( client.parseDOM(odcinek, 'a', ret='href')[0]) return self.base_link + link except: return
def tvshow(self, imdb, tvdb, tvshowtitle, localtvshowtitle, aliases, year): try: query = self.search_link % urllib.quote_plus( cleantitle.query(tvshowtitle)) result = client.request(query) t = [tvshowtitle] + source_utils.aliases_to_array(aliases) t = [cleantitle.get(i) for i in set(t) if i] result = re.compile( 'itemprop="url"\s+href="([^"]+).*?itemprop="name"\s+class="serie-title">([^<]+)', re.DOTALL).findall(result) for i in result: if cleantitle.get(cleantitle.normalize( i[1])) in t and year in i[1]: url = i[0] url = url.encode('utf-8') return url except: return
def sources(self, url, hostDict, hostprDict): sources = [] is_anime = url[3] try: titles = [] titles.append(url[0]) titles.append(url[1]) try: year = url[2] except: year = '' for url_single in titles: url_single = cleantitle.normalize( cleantitle.getsearch(url_single)) words = url_single.split(' ') search_url = urlparse.urljoin( self.base_link, self.search_link) % (url_single + " " + year) cookies = client.request(self.base_link, output='cookie') verifyGet = client.request(self.verify, cookie=cookies) cookies = cookies + ";tmvh=" + self.crazy_cookie_hash( verifyGet) cache.cache_insert('szukajka_cookie', cookies) result = client.request(search_url, cookie=cookies) result = client.parseDOM(result, 'div', attrs={'class': 'element'}) for el in result: found_title = str( client.parseDOM( el, 'div', attrs={'class': 'title'})[0]).lower().replace( "_", " ").replace(".", " ").replace("-", " ") if is_anime: numbers = [ int(s) for s in found_title.split() if s.isdigit() ] if not int(words[-1]) in numbers: continue if ("zwiastun" or "trailer") in str(found_title).lower(): continue if len(words) >= 4 or is_anime: if not self.contains_all_words(found_title, words): continue else: if not self.contains_all_words( found_title, words) or year not in found_title: continue q = 'SD' if self.contains_word(found_title, '1080p') or self.contains_word( found_title, 'FHD'): q = '1080p' elif self.contains_word(found_title, '720p'): q = 'HD' link = client.parseDOM(el, 'a', attrs={'class': 'link'}, ret='href')[0] transl_type = client.parseDOM(el, 'span', attrs={'class': 'version'})[0] transl_type = transl_type.split(' ') transl_type = transl_type[-1] host = client.parseDOM(el, 'span', attrs={'class': 'host'})[0] host = host.split(' ') host = host[-1] lang, info = self.get_lang_by_type(transl_type) sources.append({ 'source': host, 'quality': q, 'language': lang, 'url': link, 'info': info, 'direct': False, 'debridonly': False }) continue return sources except Exception as e: print(str(e)) return sources