def __search(self, titles, type, year, season=0, episode=False): try: years = [str(year), str(int(year) + 1), str(int(year) - 1)] years = ['&veroeffentlichung[]=%s' % i for i in years] query = self.search_link % (type, urllib.quote_plus(cleantitle.query(titles[0]))) query += ''.join(years) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] r = self.__proceed_search(query) r = [i[0] for i in r if cleantitle.get(i[1]) in t and int(i[2]) == int(season)][0] url = source_utils.strip_domain(r) if episode: r = client.request(urlparse.urljoin(self.base_link, url)) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'season-list'}) r = dom_parser.parse_dom(r, 'li') r = dom_parser.parse_dom(r, 'a', req='href') r = [i.attrs['href'] for i in r if i and int(i.content) == int(episode)][0] url = source_utils.strip_domain(r) return url except: return
def __search(self, titles, year): try: query = self.search_link % (urllib.quote_plus(cleantitle.getsearch(titles[0]))) query = urlparse.urljoin(self.base_link, query) t = cleantitle.get(titles[0]) scraper = cfscrape.create_scraper() data = scraper.get(query).content #data = client.request(query, referer=self.base_link) data = client.parseDOM(data, 'div', attrs={'class': 'result-item'}) r = dom_parser.parse_dom(data, 'div', attrs={'class': 'title'}) r = zip(dom_parser.parse_dom(r, 'a'), dom_parser.parse_dom(data, 'span', attrs={'class': 'year'})) url = [] for i in range(len(r)): title = cleantitle.get(r[i][0][1]) title = re.sub('(\d+p|4k|3d|hd|season\d+)','',title) y = r[i][1][1] link = r[i][0][0]['href'] if 'season' in title: continue if t == title and y == year: if 'season' in link: url.append(source_utils.strip_domain(link)) print url[0] return url[0] else: url.append(source_utils.strip_domain(link)) return url except: return
def __search(self, titles, year, content): try: query = self.search_link % (urllib.quote_plus(cleantitle.getsearch(titles[0]))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i][0] #cleantitle.get(titles[0]) r = client.request(query) r = client.parseDOM(r, 'div', attrs={'class': 'tab-content clearfix'}) if content == 'movies': r = client.parseDOM(r, 'div', attrs={'id': 'movies'}) else: r = client.parseDOM(r, 'div', attrs={'id': 'series'}) data = dom_parser.parse_dom(r, 'figcaption') for i in data: title = i[0]['title'] title = cleantitle.get(title) if title in t: x = dom_parser.parse_dom(i, 'a', req='href') return source_utils.strip_domain(x[0][0]['href']) else: url = dom_parser.parse_dom(i, 'a', req='href') data = client.request(url[0][0]['href']) data = re.findall('<h1><a.+?">(.+?)\((\d{4})\).*?</a></h1>', data, re.DOTALL)[0] if titles[0] in data[0] and year == data[1]: return source_utils.strip_domain(url[0][0]['href']) return except: return
def __search(self, titles, year, season=0, episode=False): try: query = self.search_link % (urllib.quote_plus(cleantitle.query(titles[0]))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] y = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0'] r = client.request(query) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'container'}) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'ml-item-content'}) f = [] for i in r: _url = dom_parser.parse_dom(i, 'a', attrs={'class': 'ml-image'}, req='href')[0].attrs['href'] _title = re.sub('<.+?>|</.+?>', '', dom_parser.parse_dom(i, 'h6')[0].content).strip() try: _title = re.search('(.*?)\s(?:staf+el|s)\s*(\d+)', _title, re.I).group(1) except: pass _season = '0' _year = re.findall('calendar.+?>.+?(\d{4})', ''.join([x.content for x in dom_parser.parse_dom(i, 'ul', attrs={'class': 'item-params'})])) _year = _year[0] if len(_year) > 0 else '0' if season > 0: s = dom_parser.parse_dom(i, 'span', attrs={'class': 'season-label'}) s = dom_parser.parse_dom(s, 'span', attrs={'class': 'el-num'}) if s: _season = s[0].content.strip() if cleantitle.get(_title) in t and _year in y and int(_season) == int(season): f.append((_url, _year)) r = f r = sorted(r, key=lambda i: int(i[1]), reverse=True) # with year > no year r = [i[0] for i in r if r[0]][0] url = source_utils.strip_domain(r) if episode: r = client.request(urlparse.urljoin(self.base_link, url)) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'season-list'}) r = dom_parser.parse_dom(r, 'li') r = dom_parser.parse_dom(r, 'a', req='href') r = [(i.attrs['href'], i.content) for i in r] r = [i[0] for i in r if i[1] and int(i[1]) == int(episode)][0] url = source_utils.strip_domain(r) return url except: return
def __search(self, titles, year): try: url = urlparse.urljoin(self.base_link, self.search_link) t = [cleantitle.get(i) for i in set(titles) if i] y = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0'] post = {'story': titles[0], 'years_ot': str(int(year) - 1), 'years_do': str(int(year) + 1)} r = client.request(url, post=post, XHR=True) if len(r) < 1000: url = urlparse.urljoin(self.base_link, self.search_old % urllib.quote_plus(titles[0])) r = client.request(url) r = r.decode('cp1251').encode('utf-8') r = dom_parser.parse_dom(r, 'article') r = dom_parser.parse_dom(r, 'div', attrs={'class': 'full'}) r = [(dom_parser.parse_dom(i, 'a', attrs={'itemprop': 'url'}, req='href'), dom_parser.parse_dom(i, 'h3', attrs={'class': 'name'}, req='content'), dom_parser.parse_dom(i, 'div', attrs={'class': 'origin-name'}, req='content'), dom_parser.parse_dom(i, 'div', attrs={'class': 'year'})) for i in r] r = [(i[0][0].attrs['href'], i[1][0].attrs['content'], i[2][0].attrs['content'], dom_parser.parse_dom(i[3], 'a', attrs={'itemprop': 'copyrightYear'})) for i in r if i[0] and i[1] and i[2]] r = [(i[0], i[1], i[2], i[3][0].content) for i in r if i[3]] r = [i[0] for i in r if (cleantitle.get(i[1]) in t or cleantitle.get(i[2]) in t) and i[3] in y][0] return source_utils.strip_domain(r) except: return
def __search(self, titles, year, season='0'): try: url = urlparse.urljoin(self.base_link, self.search_link) t = [cleantitle.get(i) for i in set(titles) if i] y = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0'] post = {'story': utils.uni2cp(titles[0]), 'titleonly': 3, 'do': 'search', 'subaction': 'search', 'search_start': 1, 'full_search': 0, 'result_from': 1} r = client.request(url, post=post) r = r.decode('cp1251').encode('utf-8') r = dom_parser.parse_dom(r, 'table', attrs={'class': 'eBlock'}) r = [(dom_parser.parse_dom(i, 'div', attrs={'class': 'eTitle'}), dom_parser.parse_dom(i[1], 'a', attrs={'href': re.compile('.*\d+_goda/')})) for i in r] r = [(dom_parser.parse_dom(i[0][0], 'a', req='href'), [x.content for x in i[1] if re.match('\d{4}', x.content)][0] if i[1] else '0') for i in r if i[0]] r = [(i[0][0].attrs['href'], i[0][0].content, i[1]) for i in r if i[0]] r = [(i[0], i[1], i[2], re.findall('(.+?) \(*(\d{4})', i[1])) for i in r if i] r = [(i[0], i[3][0][0] if i[3] else i[1], i[2]) for i in r] r = [(i[0], i[1], i[2], re.findall(u'(.+?)\s+(\d+)\s+(?:сезон)', i[1])) for i in r] r = [(i[0], i[3][0][0] if len(i[3]) > 0 else i[1], i[2], i[3][0][1] if len(i[3]) > 0 else '0') for i in r] r = sorted(r, key=lambda i: int(i[2]), reverse=True) # with year > no year r = [i[0] for i in r if cleantitle.get(i[1]) in t and i[2] in y and int(i[3]) == int(season)][0] return source_utils.strip_domain(r) except: return
def __search(self, search_link, imdb, titles): try: query = search_link % (urllib.quote_plus(cleantitle.query(titles[0]))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] r = client.request(query) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'big-list'}) r = dom_parser.parse_dom(r, 'table', attrs={'class': 'row'}) r = dom_parser.parse_dom(r, 'td', attrs={'class': 'list-name'}) r = dom_parser.parse_dom(r, 'a', req='href') r = [i.attrs['href']for i in r if i and cleantitle.get(i.content) in t][0] url = source_utils.strip_domain(r) r = client.request(urlparse.urljoin(self.base_link, url)) r = dom_parser.parse_dom(r, 'a', attrs={'href': re.compile('.*/tt\d+.*')}, req='href') r = [re.findall('.+?(tt\d+).*?', i.attrs['href']) for i in r] r = [i[0] for i in r if i] return url if imdb in r else None except: return
def __search(self, titles, year, season='0'): try: t = [cleantitle.get(i) for i in set(titles) if i] y = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0'] post = {'story': utils.uni2cp(titles[0]), 'titleonly': 3, 'do': 'search', 'subaction': 'search', 'search_start': 1, 'full_search': 0, 'result_from': 1} html = client.request(self.base_link, post=post) html = html.decode('cp1251').encode('utf-8') r = dom_parser.parse_dom(html, 'div', attrs={'id': re.compile('news-id-\d+')}) r = [(i.attrs['id'], dom_parser.parse_dom(i, 'a', req='href')) for i in r] r = [(re.sub('[^\d]+', '', i[0]), dom_parser.parse_dom(i[1], 'img', req='title')) for i in r] r = [(i[0], i[1][0].attrs['title'], '') for i in r if i[1]] r = [(i[0], i[1], i[2], re.findall(u'(.+?)\s+(\d+)\s+(?:сезон)', i[1])) for i in r] r = [(i[0], i[3][0][0] if len(i[3]) > 0 else i[1], i[2], i[3][0][1] if len(i[3]) > 0 else '0') for i in r] r = [(i[0], i[1], re.findall('(.+?) \(*(\d{4})', i[1]), i[3]) for i in r] r = [(i[0], i[2][0][0] if len(i[2]) > 0 else i[1], i[2][0][1] if len(i[2]) > 0 else '0', i[3]) for i in r] r = [(i[0], i[1], i[2], '1' if int(season) > 0 and i[3] == '0' else i[3]) for i in r] r = sorted(r, key=lambda i: int(i[2]), reverse=True) # with year > no year r = [i[0] for i in r if cleantitle.get(i[1]) in t and i[2] in y and int(i[3]) == int(season)][0] r = dom_parser.parse_dom(html, 'a', attrs={'href': re.compile('.*/%s-' % r)}, req='href')[0].attrs['href'] return source_utils.strip_domain(r) except: return
def __search(self, titles, year): try: query = self.search_link % (urllib.quote_plus(cleantitle.getsearch(titles[0]+' '+year))) query = urlparse.urljoin(self.base_link, query) t = cleantitle.get(titles[0]) r = client.request(query) r = client.parseDOM(r, 'div', attrs={'class': 'card'}) r = client.parseDOM(r, 'h3') for i in r: data = re.findall('<span.*?>(.+?)</span>.+?date">\s*\((\d{4}).*?</span>', i, re.DOTALL) for title, year in data: title = cleantitle.get(title) y = year if title in t and year == y: url = client.parseDOM(i, 'a', ret='href')[0] return source_utils.strip_domain(url) return except: return
def episode(self, url, imdb, tvdb, title, premiered, season, episode): try: if not url: return data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) tvshowtitle = data['tvshowtitle'] localtvshowtitle = data['localtvshowtitle'] aliases = source_utils.aliases_to_array(eval(data['aliases'])) url = self.__search([localtvshowtitle] + aliases, data['year'], season) if not url and tvshowtitle != localtvshowtitle: url = self.__search([tvshowtitle] + aliases, data['year'], season) if not url: return r = client.request(urlparse.urljoin(self.base_link, url)) r = dom_parser.parse_dom(r, 'ul', attrs={'class': ['list-inline', 'list-film']}) r = dom_parser.parse_dom(r, 'li') r = dom_parser.parse_dom(r, 'a', req='href') r = [(i.attrs['href'], i.content) for i in r if i] r = [(i[0], i[1] if re.compile("^(\d+)$").match(i[1]) else '0') for i in r] r = [i[0] for i in r if int(i[1]) == int(episode)][0] return source_utils.strip_domain(r) except: return
def __search(self, titles, year, season='0'): try: query = self.search_link % (urllib.quote_plus(titles[0])) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] y = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0'] r = client.request(query) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'list_movies'}) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'item_movie'}) r = dom_parser.parse_dom(r, 'h2', attrs={'class': 'tit'}) r = dom_parser.parse_dom(r, 'a', req='href') r = [(i.attrs['href'], i.content.lower()) for i in r if i] r = [(i[0], i[1], re.findall('(.+?) \(*(\d{4})', i[1])) for i in r] r = [(i[0], i[2][0][0] if len(i[2]) > 0 else i[1], i[2][0][1] if len(i[2]) > 0 else '0') for i in r] r = [(i[0], i[1], i[2], re.findall('(.+?)\s+(?:\s*-?\s*(?:season|s))\s*(\d+)', i[1])) for i in r] r = [(i[0], i[3][0][0] if len(i[3]) > 0 else i[1], i[2], i[3][0][1] if len(i[3]) > 0 else '0') for i in r] r = [(i[0], i[1], i[2], '1' if int(season) > 0 and i[3] == '0' else i[3]) for i in r] r = sorted(r, key=lambda i: int(i[2]), reverse=True) # with year > no year r = [i[0] for i in r if cleantitle.get(i[1]) in t and i[2] in y and int(i[3]) == int(season)][0] return source_utils.strip_domain(r) except: return
def __search(self, titles, year, season='0'): try: query = self.search_link % (urllib.quote_plus(cleantitle.query(titles[0]))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] y = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0'] r = client.request(query) r = dom_parser.parse_dom(r, 'ul', attrs={'class': ['products', 'row']}) r = dom_parser.parse_dom(r, 'div', attrs={'class': ['box-product', 'clearfix']}) if int(season) > 0: r = [i for i in r if dom_parser.parse_dom(i, 'div', attrs={'class': 'episode'})] else: r = [i for i in r if not dom_parser.parse_dom(i, 'div', attrs={'class': 'episode'})] r = dom_parser.parse_dom(r, 'h3', attrs={'class': 'title-product'}) r = dom_parser.parse_dom(r, 'a', req='href') r = [(i.attrs['href'], i.content.lower()) for i in r if i] r = [(i[0], i[1], re.findall('(.+?) \(*(\d{4})', i[1])) for i in r] r = [(i[0], i[2][0][0] if len(i[2]) > 0 else i[1], i[2][0][1] if len(i[2]) > 0 else '0') for i in r] r = [(i[0], i[1], i[2], re.findall('(.+?)\s+(?:staf+el|s)\s+(\d+)', i[1])) for i in r] r = [(i[0], i[3][0][0] if len(i[3]) > 0 else i[1], i[2], i[3][0][1] if len(i[3]) > 0 else '0') for i in r] r = [(i[0], i[1].replace(' hd', ''), i[2], '1' if int(season) > 0 and i[3] == '0' else i[3]) for i in r] r = sorted(r, key=lambda i: int(i[2]), reverse=True) # with year > no year r = [i[0]for i in r if cleantitle.get(i[1]) in t and i[2] in y and int(i[3]) == int(season)][0] url = source_utils.strip_domain(r) url = url.replace('-info', '-stream') return url except: return
def __search(self, titles): try: query = self.search_link % (urllib.quote_plus(cleantitle.query(titles[0]))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] r = client.request(query) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'nag'}) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'item-video'}) r = dom_parser.parse_dom(r, 'h2', attrs={'class': 'entry-title'}) r = dom_parser.parse_dom(r, 'a', req='href') for i in r: title = i[1] if re.search('\*(?:.*?)\*', title) is not None: title = re.sub('\*(?:.*?)\*', '', title) title = cleantitle.get(title) if title in t: return source_utils.strip_domain(i[0]['href']) else: return except: return
def __search(self, titles, year): try: query = self.search_link % (cleantitle.getsearch(titles[0].replace(' ','%20'))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i][0] r = client.request(query) r = client.parseDOM(r, 'li', attrs={'class': 'item everyone-item over_online haveTooltip'}) for i in r: title = client.parseDOM(i, 'a', ret='title')[0] url = client.parseDOM(i, 'a', ret='href')[0] data = client.request(url) y = re.findall('<p><span>Año:</span>(\d{4})',data)[0] original_t = re.findall('movie-text">.+?h2.+?">\((.+?)\)</h2>',data, re.DOTALL)[0] original_t, title = cleantitle.get(original_t), cleantitle.get(title) if (t in title or t in original_t) and y == year : x = dom_parser.parse_dom(i, 'a', req='href') return source_utils.strip_domain(x[0][0]['href']) return except: return
def __search(self, titles, year, content): try: t = [cleantitle.get(i) for i in set(titles) if i] c = client.request(urlparse.urljoin(self.base_link, self.year_link % int(year)), output='cookie') p = urllib.urlencode({'search': cleantitle.query(titles[0])}) c = client.request(urlparse.urljoin(self.base_link, self.search_link), cookie=c, post=p, output='cookie') r = client.request(urlparse.urljoin(self.base_link, self.type_link % content), cookie=c, post=p) r = dom_parser.parse_dom(r, 'div', attrs={'id': 'content'}) r = dom_parser.parse_dom(r, 'tr') r = [dom_parser.parse_dom(i, 'td') for i in r] r = [dom_parser.parse_dom(i, 'a', req='href') for i in r] r = [(i[0].attrs['href'], i[0].content, i[1].content) for i in r if i] x = [] for i in r: if re.search('(?<=<i>\().*$', i[1]): x.append((i[0], re.search('(.*?)(?=\s<)', i[1]).group(), re.search('(?<=<i>\().*$', i[1]).group(), i[2])) else: x.append((i[0], i[1], i[1], i[2])) r = [i[0] for i in x if (cleantitle.get(i[1]) in t or cleantitle.get(i[2]) in t) and i[3] == year][0] return source_utils.strip_domain(r) except: return
def __search(self, titles, year, imdb): try: query = self.search_link % (urllib.quote_plus(titles[0])) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] y = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0'] r = client.request(query) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'movie_cell'}) r = [(dom_parser.parse_dom(i, 'div', attrs={'class': 'bottom'}), dom_parser.parse_dom(i, 'div', attrs={'class': 'year'})) for i in r] r = [(dom_parser.parse_dom(i[0], 'a', req=['href', 'title']), re.findall('[(](\d{4})[)]', i[1][0].content)) for i in r if i[0] and i[1]] r = [(i[0][0].attrs['href'], i[0][0].content, i[1][0]) for i in r if i[0] and i[1]] r = [(i[0], i[1].lower(), i[2]) for i in r if i[2] in y] r = sorted(r, key=lambda i: int(i[2]), reverse=True) # with year > no year r = [i[0] for i in r if cleantitle.get(i[1]) in t] if len(r) > 1: for i in r: data = client.request(urlparse.urljoin(self.base_link, i)) data = dom_parser.parse_dom(data, 'a', attrs={'name': re.compile('.*/tt\d+.*')}, req='name') data = [re.findall('.+?(tt\d+).*?', d.attrs['name']) for d in data] data = [d[0] for d in data if len(d) > 0 and d[0] == imdb] if len(data) >= 1: url = i else: url = r[0] if url: return source_utils.strip_domain(url) except: return
def __search(self, titles, year): try: query = self.search_link % (urllib.quote_plus(cleantitle.query(titles[0])+' '+year)) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i][0] r = client.request(query) r = client.parseDOM(r, 'div', attrs={'class': 'karatula'}) for i in r: title = client.parseDOM(i, 'a', ret='title')[0] y = re.findall('(\d{4})',title)[0] title = cleantitle.get_simple(title) if t in title and y == year : x = dom_parser.parse_dom(i, 'a', req='href') return source_utils.strip_domain(x[0][0]['href']) return except: return
def __search(self, titles, year): try: query = self.search_link % (urllib.quote_plus(cleantitle.getsearch(titles[0]+' '+year))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i][0] r = client.request(query) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'v_pict'}) for i in r: title = re.findall('alt="(.+?)"',i[1], re.DOTALL)[0] y = re.findall('(\d{4})', title, re.DOTALL)[0] title = re.sub('<\w+>|</\w+>','',title) title = cleantitle.get(title) title = re.findall('(\w+)', cleantitle.get(title))[0] if title in t and year == y: url = re.findall('href="(.+?)"',i[1], re.DOTALL)[0] return source_utils.strip_domain(url) return except: return
def __search(self, titles, year, episode='0'): try: title = titles[0] if int(episode) > 0: title += ' episode %s' % episode t = [cleantitle.get(i) for i in set(titles) if i] y = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0'] r = client.request(urlparse.urljoin(self.base_link, self.search_link) % urllib.quote_plus(cleantitle.query(title))) r = dom_parser.parse_dom(r, 'div', attrs={'id': 'entries'}) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'post'}) r = dom_parser.parse_dom(r, 'h3', attrs={'class': 'title'}) r = dom_parser.parse_dom(r, 'a', req='href') r = [(i.attrs['href'], i.content.lower()) for i in r if i] r = [(i[0], i[1], re.findall('(.+?) \(*(\d{4})', i[1])) for i in r] r = [(i[0], i[2][0][0] if len(i[2]) > 0 else i[1], i[2][0][1] if len(i[2]) > 0 else '0') for i in r] r = [(i[0], i[1], i[2], re.findall('(.+?)\s+(?:episode)\s+(\d+)', i[1])) for i in r] r = [(i[0], i[3][0][0] if len(i[3]) > 0 else i[1], i[2], i[3][0][1] if len(i[3]) > 0 else '0') for i in r] r = [(i[0], i[1].replace(' hd', ''), i[2], '1' if int(episode) > 0 and i[3] == '0' else i[3]) for i in r] r = sorted(r, key=lambda i: int(i[2]), reverse=True) # with year > no year r = [i[0] for i in r if cleantitle.get(i[1]) in t and i[2] in y and int(i[3]) == int(episode)][0] return source_utils.strip_domain(r) except: return
def episode(self, url, imdb, tvdb, title, premiered, season, episode): try: if not url: return url = '%s%01d-sezon-%01d-bolum.html' % (url.replace('.html', ''), int(season), int(episode)) return source_utils.strip_domain(url) except: return []
def tvshow(self, imdb, tvdb, tvshowtitle, localtvshowtitle, aliases, year): try: t = [tvshowtitle] + source_utils.aliases_to_array(aliases) t = [cleantitle.get(i) for i in set(t) if i] url = [i[0] for i in cache.get(self.sezonlukdizi_tvcache, 120) if cleantitle.get(i[1]) in t][0] return source_utils.strip_domain(url) except: return
def tvshow(self, imdb, tvdb, tvshowtitle, localtvshowtitle, aliases, year): try: t = [tvshowtitle] + source_utils.aliases_to_array(aliases) t = [cleantitle.get(i) for i in set(t) if i] url = [i[0] for i in cache.get(self.sezonlukdizi_tvcache, 120) if cleantitle.get(i[1]) in t][0] return source_utils.strip_domain(url) except: failure = traceback.format_exc() log_utils.log('SezonlukDizi - Exception: \n' + str(failure)) return
def __search(self, title): try: t = cleantitle.get(title) r = client.request(urlparse.urljoin(self.base_link, self.search_link), post={'suchbegriff': title}) r = dom_parser.parse_dom(r, 'a', attrs={'class': 'ausgabe_1'}, req='href') r = [(i.attrs['href'], i.content) for i in r] r = [i[0] for i in r if cleantitle.get(i[1]) == t][0] return source_utils.strip_domain(r) except: return
def episode(self, url, imdb, tvdb, title, premiered, season, episode): try: if not url: return url = urlparse.urljoin(self.base_link, url) r = client.request(url) r = dom_parser.parse_dom(r, 'a', attrs={'class': 'episode-block', 'href': re.compile('.*/saison-%s/episode-%s/.*' % (season, episode))}, req='href') r = [i.attrs['href'] for i in r][0] # maybe also get the VF/VOSTFR to get the VF first return source_utils.strip_domain(r) except: return
def episode(self, url, imdb, tvdb, title, premiered, season, episode): try: if not url: return r = client.request(urlparse.urljoin(self.base_link, url)) r = dom_parser.parse_dom(r, 'div', attrs={'id': 'episodes'}) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'panel-body'}) r = dom_parser.parse_dom(r, 'a', attrs={'href': re.compile('.*/episode-%s.*' % episode)}, req='href')[0].attrs['href'] return source_utils.strip_domain(r) except: return
def episode(self, url, imdb, tvdb, title, premiered, season, episode): try: if not url: return r = client.request(urlparse.urljoin(self.base_link, url)) r = dom_parser.parse_dom(r, 'article', {'class': 'SeasonList'}) r = dom_parser.parse_dom(r, 'ul') r = dom_parser.parse_dom(r, 'li') r = dom_parser.parse_dom(r, 'a', attrs={'href': re.compile('[^"]+-season-%s-episode-%s(?!\d)[^"]*' % (season, episode))}, req='href')[0].attrs['href'] return source_utils.strip_domain(r) except: return
def __search(self, titles, year): try: t = [cleantitle.get(i) for i in set(titles) if i] y = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0'] r = client.request(urlparse.urljoin(self.base_link, self.search_link), post=urllib.urlencode({'val': cleantitle.query(titles[0])}), XHR=True) r = dom_parser.parse_dom(r, 'li') r = dom_parser.parse_dom(r, 'a', req='href') r = [(i.attrs['href'], i.content, re.findall('\((\d{4})', i.content)) for i in r] r = [(i[0], i[1], i[2][0] if i[2] else '0') for i in r] r = [i[0] for i in r if cleantitle.get(i[1]) in t and i[2] in y][0] return source_utils.strip_domain(r) except: return
def __search_movie(self, imdb, year, type='filme'): try: years = [str(year), str(int(year) + 1), str(int(year) - 1)] years = ['&veroeffentlichung[]=%s' % i for i in years] query = self.search_link % (type, imdb) query += ''.join(years) query = urlparse.urljoin(self.base_link, query) r = self.__proceed_search(query) if len(r) == 1: return source_utils.strip_domain(r[0][0]) except: return
def __search(self, titles): try: query = self.search_link % urllib.quote_plus(cleantitle.query(titles[0])) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] r = client.request(query, XHR=True) r = json.loads(r) r = [(i.get('url'), i.get('name'))for i in r] r = [(i[0]) for i in r if cleantitle.get(i[1]) in t][0] return source_utils.strip_domain(r) except: return
def __get_episode_link(self, url, episode='1'): try: if not url: return url = urlparse.urljoin(self.base_link, url) r = client.request(url) r = dom_parser.parse_dom(r, 'ul', attrs={'class': 'all-episode'}) r = dom_parser.parse_dom(r, 'li') r = dom_parser.parse_dom(r, 'a', attrs={'href': re.compile('.*-episode-%s\.\w+.*?' % episode)}, req='href')[0].attrs['href'] return source_utils.strip_domain(r) except: return
def movie(self, imdb, title, localtitle, aliases, year): try: query = self.search_link % (urllib.quote_plus(title)) query = urlparse.urljoin(self.base_link, query) t = cleantitle.get(title) y = [ '%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0' ] r = client.request(query) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'item'}) r = [(dom_parser.parse_dom(i, 'a', attrs={'class': 'cluetip'}, req='href'), dom_parser.parse_dom(i, 'div', attrs={'class': 'description'})) for i in r] r = [(i[0][0].attrs['href'], dom_parser.parse_dom(i[1], 'h3', attrs={'class': 'text-nowrap'}), dom_parser.parse_dom(i[1], 'div', attrs={'class': 'meta'})) for i in r if i[0] and i[1]] r = [(i[0], i[1][0].content, dom_parser.parse_dom(i[2], 'span', attrs={'class': 'pull-left'})) for i in r if i[0] and i[1] and i[2]] r = [(i[0], i[1], re.sub('[^\d]+', '', i[2][0].content)) for i in r if i[2]] r = sorted(r, key=lambda i: int(i[2]), reverse=True) # with year > no year r = [i[0] for i in r if cleantitle.get(i[1]) == t and i[2] in y][0] return source_utils.strip_domain(r) except: return
def __search(self, titles, year): try: t = [cleantitle.get(i) for i in set(titles) if i] for title in titles: query = self.search_link try: title.encode('UTF-8') query %= urllib.quote_plus(title) except: query %= title.decode('UTF-8').encode('Windows-1252') query = urlparse.urljoin(self.base_link, query) r = client.request(query) links = dom_parser.parse_dom(r, 'div', attrs={'class': 'search_frame'}) links = [dom_parser.parse_dom(i, 'a') for i in links] links = [(i[1], i[2]) for i in links] links = [(i[0].attrs['href'], re.findall('>(.*?)<', i[0].content)[0], i[1].content) for i in links] links = sorted(links, key=lambda i: int(i[2]), reverse=True) # with year > no year links = [ i[0] for i in links if cleantitle.get(i[1]) in t and year == i[2] ] if len(links) > 0: return source_utils.strip_domain(links[0]) return except: try: source_faultlog.logFault(__name__, source_faultlog.tagSearch, titles[0]) except: return return
def __search(self, titles, imdb, year): try: query = self.search_link % (urllib.quote_plus(cleantitle.query(titles[0]))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] y = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0'] r = client.request(query, XHR=True) r = json.loads(r) r = [(i.get('title'), i.get('custom_fields', {})) for i in r.get('posts', [])] r = [(i[0], i[1]) for i in r if i[0] and i[1]] r = [(i[0], i[1].get('Streaming', ['']), i[1].get('Jahr', ['0']), i[1].get('IMDb-Link', [''])) for i in r if i] r = [(i[0], i[1][0], i[2][0], re.findall('.+?(tt\d+).*?', i[3][0])) for i in r if i[0] and i[1] and i[2] and i[3]] r = [i[1] for i in r if imdb in i[3] or (cleantitle.get(i[0]) in t and i[2] in y)][0] return source_utils.strip_domain(r) except: return
def __search(self, titles, year): try: query = self.search_link % (urllib.quote_plus( cleantitle.query(titles[0] + ' ' + year))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] r = self.scraper.get(query).content r = dom_parser.parse_dom(r, 'figure', attrs={'class': 'pretty-figure'}) r = dom_parser.parse_dom(r, 'figcaption') for i in r: title = client.replaceHTMLCodes(i[0]['title']) title = cleantitle.get(title) if title in t: x = dom_parser.parse_dom(i, 'a', req='href') return source_utils.strip_domain(x[0][0]['href']) return except: return
def __search(self, titles): try: query = self.search_link % (urllib.quote_plus(urllib.quote_plus(cleantitle.query(titles[0])))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] post = urllib.urlencode({'movlang_de': '1', 'movlang': ''}) r = client.request(query, post=post) r = dom_parser.parse_dom(r, 'table', attrs={'class': 'table'}) r = dom_parser.parse_dom(r, 'a', attrs={'class': 'PreviewImage'}) for x in r: title = cleantitle.get(x[1]) if title in t: return source_utils.strip_domain(x[0]['href']) return except: return
def episode(self, url, imdb, tvdb, title, premiered, season, episode): try: if not url: return url = urlparse.urljoin(self.base_link, url) url = client.request(url, output='geturl') if season == 1 and episode == 1: season = episode = '' r = client.request(url) r = dom_parser.parse_dom(r, 'ul', attrs={'class': 'episodios'}) r = dom_parser.parse_dom( r, 'a', attrs={ 'href': re.compile('[^\'"]*%s' % ('-%sx%s' % (season, episode))) })[0].attrs['href'] return source_utils.strip_domain(r) except: return
def __search(self, titles): try: query = self.search_link % (urllib.quote_plus(cleantitle.query(titles[0]))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] r = client.request(query) r = dom_parser.parse_dom(r, 'ul', attrs={'class': 'MovieList'}) r = dom_parser.parse_dom(r, 'li', attrs={'class': 'TPostMv'}) r = dom_parser.parse_dom(r, 'a') for i in r: title = dom_parser.parse_dom(i, 'h2', attrs={'class': 'Title'}) title = cleantitle.get(title[0][1]) if title in t: return source_utils.strip_domain(i[0]['href']) except: return
def episode(self, url, imdb, tvdb, title, premiered, season, episode): try: if not url: return r = self.scraper.get(url).content if season == 1 and episode == 1: season = episode = '' r = dom_parser.parse_dom(r, 'ul', attrs={'class': 'episodios'}) r = dom_parser.parse_dom( r, 'a', attrs={ 'href': re.compile('[^\'"]*%s' % ('-%sx%s' % (season, episode))) })[0].attrs['href'] return source_utils.strip_domain(r) except: return ""
def __search(self, titles, year): try: query = self.search_link % (urllib.quote_plus(cleantitle.query(titles[0]))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] y = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0'] r = client.request(query) r = dom_parser.parse_dom(r, 'article') r = [(dom_parser.parse_dom(i, 'div', attrs={'class': 'title'}), dom_parser.parse_dom(i, 'span', attrs={'class': 'year'})) for i in r] r = [(dom_parser.parse_dom(i[0][0], 'a', req='href'), i[1][0].content) for i in r if i[0] and i[1]] r = [(i[0][0].attrs['href'], i[0][0].content, i[1]) for i in r if i[0]] r = sorted(r, key=lambda i: int(i[2]), reverse=True) # with year > no year r = [i[0] for i in r if cleantitle.get(i[1]) in t and i[2] in y][0] return source_utils.strip_domain(r) except: return
def episode(self, url, imdb, tvdb, title, premiered, season, episode): try: if not url: return data = urlparse.parse_qs(url) data = dict([(i, data[i][0]) if data[i] else (i, '') for i in data]) tvshowtitle = data['tvshowtitle'] aliases = source_utils.aliases_to_array(eval(data['aliases'])) aliases.append(data['localtvshowtitle']) url = self.__search([tvshowtitle] + aliases, data['year'], season) if not url: return urlWithEpisode = url + "?episode=" + str( episode) + "?season=" + str(season) return source_utils.strip_domain(urlWithEpisode) except: return
def __get_episode_link(self, url, episode='1'): try: if not url: return url = urlparse.urljoin(self.base_link, url) r = client.request(url) r = dom_parser.parse_dom(r, 'ul', attrs={'class': 'all-episode'}) r = dom_parser.parse_dom(r, 'li') r = dom_parser.parse_dom( r, 'a', attrs={'href': re.compile('.*-episode-%s\.\w+.*?' % episode)}, req='href')[0].attrs['href'] return source_utils.strip_domain(r) except: return
def __search(self, title, year): try: r = urllib.urlencode({'keyword': title}) r = client.request(urlparse.urljoin(self.base_link, self.search_link), XHR=True, post=r) t = cleantitle.get(title) y = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0'] r = json.loads(r) r = [(i['link'], re.sub('<.+?>|</.+?>', '', i['title'])) for i in r if 'title' in i and 'link' in i] r = [(i[0], i[1], re.findall('(.+?)\s*Movie \d+:.+?$', i[1], re.DOTALL)) for i in r] r = [(i[0], i[2][0] if len(i[2]) > 0 else i[1]) for i in r] r = [(i[0], i[1], re.findall('(.+?) \((\d{4})\)?', i[1])) for i in r] r = [(i[0], i[2][0][0] if len(i[2]) > 0 else i[1], i[2][0][1] if len(i[2]) > 0 else '0') for i in r] r = sorted(r, key=lambda i: int(i[2]), reverse=True) # with year > no year r = [i[0] for i in r if t == cleantitle.get(i[1]) and i[2] in y][0] return source_utils.strip_domain(r) except: return
def __search(self, titles, year): try: r = cache.get(client.request, 48, urlparse.urljoin(self.base_link, self.search_link)) t = [cleantitle.get(i) for i in set(titles) if i] links = dom_parser.parse_dom(r, "div", attrs={"class" : "genre"}) links = dom_parser.parse_dom(links, "a") links = [(i.attrs["href"], i.content) for i in links] links = [i for i in links if cleantitle.get(i[1]) in t or any([a in cleantitle.get(i[1]) for a in t])] if len(links) > 1: links = [i for i in links if cleantitle.get(i[1]) in t] if len(links) > 0: return source_utils.strip_domain(links[0][0]) raise Exception() except: return
def __search(self, titles, year): try: query = self.search_link % (urllib.quote_plus( cleantitle.getsearch(titles[0] + ' ' + year))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i][0] r = client.request(query) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'v_pict'}) for i in r: title = re.findall('alt="(.+?)"', i[1], re.DOTALL)[0] y = re.findall('(\d{4})', title, re.DOTALL)[0] title = re.sub('<\w+>|</\w+>', '', title) title = cleantitle.get(title) title = re.findall('(\w+)', cleantitle.get(title))[0] if title in t and year == y: url = re.findall('href="(.+?)"', i[1], re.DOTALL)[0] return source_utils.strip_domain(url) return except: return
def tvshow(self, imdb, tvdb, tvshowtitle, localtvshowtitle, aliases, year): try: clean_title = cleantitle.geturl(tvshowtitle) search_url = urlparse.urljoin( self.base_link, self.search_link % clean_title.replace('-', '+')) r = client.request(search_url) r = client.parseDOM(r, 'div', {'class': 'result-item'}) r = [(dom_parser2.parse_dom(i, 'a', req='href')[0], client.parseDOM(i, 'img', ret='alt')[0], dom_parser2.parse_dom(i, 'span', attrs={'class': 'year'})) for i in r] r = [(i[0].attrs['href'], i[1], i[2][0].content) for i in r if (cleantitle.get(i[1]) == cleantitle.get(tvshowtitle) and i[2][0].content == year)] url = source_utils.strip_domain(r[0][0]) return url except: return
def __search(self, imdb): try: r = client.request(urlparse.urljoin(self.base_link, self.search_link % imdb)) r = dom_parser.parse_dom(r, 'a', req='href') r = [i.attrs['href'] for i in r if i] if len(r) > 1: for i in r: data = client.request(urlparse.urljoin(self.base_link, i)) data = re.compile('(imdbid\s*[=|:]\s*"%s"\s*,)' % imdb, re.DOTALL).findall(data) if len(data) >= 1: url = i else: url = r[0] if url: return source_utils.strip_domain(url) except: return
def __search(self, titles, year, episode='0'): try: title = titles[0] if int(episode) > 0: title += ' episode %s' % episode t = [cleantitle.get(i) for i in set(titles) if i] y = [ '%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0' ] r = client.request( urlparse.urljoin(self.base_link, self.search_link) % urllib.quote_plus(cleantitle.query(title))) r = dom_parser.parse_dom(r, 'div', attrs={'id': 'entries'}) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'post'}) r = dom_parser.parse_dom(r, 'h3', attrs={'class': 'title'}) r = dom_parser.parse_dom(r, 'a', req='href') r = [(i.attrs['href'], i.content.lower()) for i in r if i] r = [(i[0], i[1], re.findall('(.+?) \(*(\d{4})', i[1])) for i in r] r = [(i[0], i[2][0][0] if len(i[2]) > 0 else i[1], i[2][0][1] if len(i[2]) > 0 else '0') for i in r] r = [(i[0], i[1], i[2], re.findall('(.+?)\s+(?:episode)\s+(\d+)', i[1])) for i in r] r = [(i[0], i[3][0][0] if len(i[3]) > 0 else i[1], i[2], i[3][0][1] if len(i[3]) > 0 else '0') for i in r] r = [(i[0], i[1].replace(' hd', ''), i[2], '1' if int(episode) > 0 and i[3] == '0' else i[3]) for i in r] r = sorted(r, key=lambda i: int(i[2]), reverse=True) # with year > no year r = [ i[0] for i in r if cleantitle.get(i[1]) in t and i[2] in y and int(i[3]) == int(episode) ][0] return source_utils.strip_domain(r) except: return
def __search(self, titles, year): try: t = [cleantitle.get(i) for i in set(titles) if i] y = [ '%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0' ] r = cache.get(client.request, 4, urlparse.urljoin(self.base_link, self.search_link), post={'query': cleantitle.query(titles[0])}) r = dom_parser.parse_dom(r, 'li', attrs={'class': 'entTd'}) r = dom_parser.parse_dom(r, 'div', attrs={'class': 've-screen'}, req='title') r = [(dom_parser.parse_dom(i, 'a', req='href'), i.attrs['title'].split(' - ')[0]) for i in r] r = [(i[0][0].attrs['href'], i[1], re.findall('(.+?) \(*(\d{4})', i[1])) for i in r] r = [(i[0], i[2][0][0] if len(i[2]) > 0 else i[1], i[2][0][1] if len(i[2]) > 0 else '0') for i in r] r = sorted(r, key=lambda i: int(i[2]), reverse=True) # with year > no year r = [i[0] for i in r if cleantitle.get(i[1]) in t and i[2] in y] if len(r) > 0: r = r[0] else: return return source_utils.strip_domain(r) except: try: source_faultlog.logFault(__name__, source_faultlog.tagSearch, titles[0]) except: return return
def __search(self, isSerieSearch, titles, isTitleClean): try: t = [cleantitle.get(i) for i in set(titles) if i] if isTitleClean: t = [cleantitle.get(self.titleclean(i)) for i in set(titles) if i] for title in titles: if isTitleClean: title = self.titleclean(title) query = self.search_link % (urllib.quote_plus(title)) query = urlparse.urljoin(self.base_link, query) oRequest = cRequestHandler(query) oRequest.removeBreakLines(False) oRequest.removeNewLines(False) r = oRequest.request() r = dom_parser.parse_dom(r, 'article') r = dom_parser.parse_dom(r, 'a', attrs={'class': 'rb'}, req='href') r = [(i.attrs['href'], i.content) for i in r] if len(r) > 0: if isSerieSearch: r = [i[0] for i in r if cleantitle.get(i[1]) in t and not isSerieSearch or cleantitle.get( re.findall('(.*?)S\d', i[1])[0]) and isSerieSearch] else: r = [i[0] for i in r if cleantitle.get( i[1]) in t and not isSerieSearch] if len(r) > 0: url = source_utils.strip_domain(r[0]) return url return except: try: source_faultlog.logFault( __name__, source_faultlog.tagSearch, titles[0]) except: return return
def __search(self, isSerieSearch, titles, year): try: q = self.search_link % titles[0] q = urlparse.urljoin(self.base_link, q) t = [cleantitle.get(i) for i in set(titles) if i] r = cache.get(self.scraper.get, 4, q).content links = dom_parser.parse_dom( r, 'tr', attrs={'id': re.compile('coverPreview.+?')}) tds = [dom_parser.parse_dom(i, 'td') for i in links] tuples = [(dom_parser.parse_dom(i[0], 'a')[0], re.findall('>(\d{4})', i[1].content)) for i in tds if 'ger' in i[4].content] tuplesSortByYear = [(i[0].attrs['href'], i[0].content) for i in tuples if year in i[1]] if len(tuplesSortByYear) > 0 and not isSerieSearch: tuples = tuplesSortByYear elif isSerieSearch: tuples = [(i[0].attrs['href'], i[0].content) for i in tuples if "serie" in i[0].content.lower()] else: tuples = [(i[0].attrs['href'], i[0].content) for i in tuples] urls = [i[0] for i in tuples if cleantitle.get(i[1]) in t] if len(urls) == 0: urls = [i[0] for i in tuples if 'untertitel' not in i[1]] if len(urls) > 0: return source_utils.strip_domain(urls[0]) except: try: source_faultlog.logFault(__name__, source_faultlog.tagSearch, titles[0]) except: return return
def __search(self, titles): try: t = [cleantitle.get(i) for i in set(titles) if i] for title in titles: params = { 'do': 'search', 'subaction': 'search', 'story': title } result = cache.get(client.request, 4, self.base_link, post=params, headers={ 'Content-Type': 'application/x-www-form-urlencoded' }, error=True) links = dom_parser.parse_dom(result, 'div', attrs={'class': 'shortstory-in'}) links = [dom_parser.parse_dom(i, 'a')[0] for i in links] links = [(i.attrs['href'], i.attrs['title']) for i in links] links = [ i[0] for i in links if any(a in cleantitle.get(i[1]) for a in t) ] if len(links) > 0: return source_utils.strip_domain(links[0]) return except: try: source_faultlog.logFault(__name__, source_faultlog.tagSearch, titles[0]) except: return return
def __search(self, titles, year): try: query = self.search_link % (urllib.quote_plus(cleantitle.query(titles[0]))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] r = client.request(query) r = dom_parser.parse_dom(r, 'div', attrs={'id': 'main'}) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'panel-body'}) r = [(dom_parser.parse_dom(i.content, 'h4', attrs={'class': 'title-list'}), dom_parser.parse_dom(i.content, 'a', attrs={'href': re.compile('.*/year/.*')})) for i in r] r = [(dom_parser.parse_dom(i[0][0].content, 'a', req='href'), i[1][0].content if i[1] else '0') for i in r if i[0]] r = [(i[0][0].attrs['href'], i[0][0].content, re.sub('<.+?>|</.+?>', '', i[1])) for i in r if i[0] and i[1]] r = [(i[0], i[1], i[2].strip()) for i in r if i[2]] r = sorted(r, key=lambda i: int(i[2]), reverse=True) # with year > no year r = [i[0] for i in r if cleantitle.get(i[1]) in t and i[2] == year][0] return source_utils.strip_domain(r) except: return
def __search(self, titles): try: query = urlparse.urljoin(self.base_link, self.search_link) post = urllib.urlencode({'keyword': titles[0]}) t = [cleantitle.get(i) for i in set(titles) if i] r = client.request(query, post=post) r = json.loads(r) r = r['content'] r = dom_parser.parse_dom(r, 'li') for i in r: title = dom_parser.parse_dom(i[1], 'a', attrs={'class': 'ss-title'}) if cleantitle.get(title[0][1]) in t: return source_utils.strip_domain(title[0][0]['href']) except: return
def __search(self, titles, year): try: r = cache.get(client.request, 4, urlparse.urljoin(self.base_link, self.search_link)) t = [cleantitle.get(i) for i in set(titles) if i] links = dom_parser.parse_dom(r, "div", attrs={"class" : "genre"}) links = dom_parser.parse_dom(links, "a") links = [(i.attrs["href"], i.content) for i in links] links = [i[0] for i in links if cleantitle.get(i[1]) in t] if len(links) > 0: return source_utils.strip_domain(links[0]) return "" except: try: source_faultlog.logFault(__name__, source_faultlog.tagSearch, titles[0]) except: return return
def episode(self, url, imdb, tvdb, title, premiered, season, episode): try: if not url: return url = urlparse.urljoin(self.base_link, url) scraper = cfscrape.create_scraper() data = scraper.get(url).content data = client.parseDOM(data, 'ul', attrs={'class': 'episodios'}) links = client.parseDOM(data, 'div', attrs={'class': 'episodiotitle'}) sp = zip(client.parseDOM(data, 'div', attrs={'class': 'numerando'}), client.parseDOM(links, 'a', ret='href')) Sea_Epi = '%dx%d'% (int(season), int(episode)) for i in sp: sep = i[0] if sep == Sea_Epi: url = source_utils.strip_domain(i[1]) return url except: return
def episode(self, url, imdb, tvdb, title, premiered, season, episode): try: if not url: return r = client.request(urlparse.urljoin(self.base_link, url)) r = dom_parser.parse_dom(r, 'table', attrs={'class': 'episodes'}) r = dom_parser.parse_dom( r, 'tr', attrs={'class': ['episode', 'season_%s' % season]}) r = dom_parser.parse_dom(r, 'span', attrs={'class': 'normal'}) r = [(dom_parser.parse_dom(i, 'a', req='href'), dom_parser.parse_dom(i, 'b')) for i in r] r = [(i[0][0].attrs['href'], i[1][0].content) for i in r if i[0] and i[1]] r = [i[0] for i in r if i[1].upper() == 'E%s' % episode] if len(r) >= 1: return source_utils.strip_domain(r[0]) except: return
def __get_episode_link(self, url, episode='1'): try: if not url: return url = urlparse.urljoin(self.base_link, url) r = client.request(url) r = dom_parser.parse_dom(r, 'div', attrs={'id': 'list-espisode'}) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'movie-item-espisode'}) r = dom_parser.parse_dom(r, 'a', req='href') r = [(i.attrs['href'], i.content) for i in r] r = [(i[0], re.findall('EP\s*(\d+)$', i[1])) for i in r] r = [i[0] for i in r if i[1] and int(i[1][0]) == int(episode)][0] return source_utils.strip_domain(r) except: return
def __search(self, isSerieSearch, titles): try: t = [cleantitle.get(i) for i in set(titles) if i] url = self.search % titles[0] sHtmlContent = cache.get(self.scraper.get, 4, url).content search_results = dom_parser.parse_dom(sHtmlContent, 'div', attrs={'class': 'title'}) search_results = dom_parser.parse_dom(search_results, 'a') search_results = [(i.attrs['href'], i.content) for i in search_results] search_results = [(i[0], re.findall('(.*?)\((\d+)', i[1])[0]) for i in search_results] search_results = [i[0] for i in search_results if cleantitle.get(i[1][0]) in t and not isSerieSearch or isSerieSearch and cleantitle.get(re.findall('(.*?)Staffel', i[1][0])[0])] if len(search_results) > 0: return source_utils.strip_domain(search_results[0]) return except: try: source_faultlog.logFault(__name__, source_faultlog.tagSearch, titles[0]) except: return return
def episode(self, url, imdb, tvdb, title, premiered, season, episode): try: if not url: return url = urlparse.urljoin(self.base_link, url) r = cache.get(client.request, 4, url) seasons = dom_parser.parse_dom(r, "div", attrs={"class": "section-watch-season"}) seasons = seasons[len(seasons)-int(season)] episodes = dom_parser.parse_dom(seasons, "tr") episodes = [(dom_parser.parse_dom(i, "th")[0].content, i.attrs["onclick"]) for i in episodes if "onclick" in i.attrs] episodes = [re.findall("'(.*?)'", i[1])[0] for i in episodes if i[0] == episode][0] return source_utils.strip_domain(episodes) except: try: source_faultlog.logFault(__name__, source_faultlog.tagSearch, title) except: return return ""
def __search(self, imdb): try: r = cache.get(self.scraper.get, 4, urlparse.urljoin(self.base_link, self.search_link % imdb)).content r = dom_parser.parse_dom(r, 'table', attrs={'id': 'RsltTableStatic'}) r = dom_parser.parse_dom(r, 'tr') r = [(dom_parser.parse_dom(i, 'a', req='href'), dom_parser.parse_dom(i, 'img', attrs={'alt': 'language'}, req='src')) for i in r] r = [(i[0][0].attrs['href'], i[0][0].content, i[1][0].attrs['src']) for i in r if i[0] and i[1]] r = [(i[0], i[1], re.findall('.+?(\d+)\.', i[2])) for i in r] r = [(i[0], i[1], i[2][0] if len(i[2]) > 0 else '0') for i in r] r = sorted(r, key=lambda i: int(i[2])) # german > german/subbed r = [i[0] for i in r if i[2] in ['1', '15']] if len(r) > 0: return source_utils.strip_domain(r[0]) return "" except: try: source_faultlog.logFault(__name__, source_faultlog.tagSearch, imdb) except: return return