def __search(self, titles, year): try: t = [cleantitle.get(i) for i in set(titles) if i] y = [ '%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0' ] r = client.request(urlparse.urljoin(self.base_link, self.search_link), post={'query': cleantitle.query(titles[0])}) r = dom_parser.parse_dom(r, 'li', attrs={'class': 'entTd'}) r = dom_parser.parse_dom(r, 'div', attrs={'class': 've-screen'}, req='title') r = [(dom_parser.parse_dom(i, 'a', req='href'), i.attrs['title'].split(' - ')[0]) for i in r] r = [(i[0][0].attrs['href'], i[1], re.findall('(.+?) \(*(\d{4})', i[1])) for i in r] r = [(i[0], i[2][0][0] if len(i[2]) > 0 else i[1], i[2][0][1] if len(i[2]) > 0 else '0') for i in r] r = sorted(r, key=lambda i: int(i[2]), reverse=True) # with year > no year r = [i[0] for i in r if cleantitle.get(i[1]) in t and i[2] in y][0] return source_utils.strip_domain(r) except: return
def __search(self, titles, year): try: query = self.search_link % (urllib.quote_plus( cleantitle.query(titles[0] + ' ' + year))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] r = client.request(query) r = dom_parser.parse_dom(r, 'figure', attrs={'class': 'pretty-figure'}) r = dom_parser.parse_dom(r, 'figcaption') for i in r: title = client.replaceHTMLCodes(i[0]['title']) title = cleantitle.get(title) if title in t: x = dom_parser.parse_dom(i, 'a', req='href') return source_utils.strip_domain(x[0][0]['href']) return except: return
def __search(self, search_link, imdb, titles): try: query = search_link % (urllib.quote_plus(cleantitle.query(titles[0]))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] r = client.request(query) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'big-list'}) r = dom_parser.parse_dom(r, 'table', attrs={'class': 'row'}) r = dom_parser.parse_dom(r, 'td', attrs={'class': 'list-name'}) r = dom_parser.parse_dom(r, 'a', req='href') r = [i.attrs['href']for i in r if i and cleantitle.get(i.content) in t][0] url = source_utils.strip_domain(r) r = client.request(urlparse.urljoin(self.base_link, url)) r = dom_parser.parse_dom(r, 'a', attrs={'href': re.compile('.*/tt\d+.*')}, req='href') r = [re.findall('.+?(tt\d+).*?', i.attrs['href']) for i in r] r = [i[0] for i in r if i] return url if imdb in r else None except: return
def __search(self, titles, imdb, year): try: query = self.search_link % (urllib.quote_plus( cleantitle.query(titles[0]))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] y = [ '%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0' ] r = client.request(query, XHR=True) r = json.loads(r) r = [(i.get('title'), i.get('custom_fields', {})) for i in r.get('posts', [])] r = [(i[0], i[1]) for i in r if i[0] and i[1]] r = [(i[0], i[1].get('Streaming', ['']), i[1].get('Jahr', ['0']), i[1].get('IMDb-Link', [''])) for i in r if i] r = [(i[0], i[1][0], i[2][0], re.findall('.+?(tt\d+).*?', i[3][0])) for i in r if i[0] and i[1] and i[2] and i[3]] r = [ i[1] for i in r if imdb in i[3] or (cleantitle.get(i[0]) in t and i[2] in y) ][0] return source_utils.strip_domain(r) except: return
def __search(self, titles, year): try: query = self.search_link % (urllib.quote_plus( cleantitle.query(titles[0]))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] y = [ '%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0' ] r = client.request(query) r = dom_parser.parse_dom(r, 'article') r = [(dom_parser.parse_dom(i, 'div', attrs={'class': 'title'}), dom_parser.parse_dom(i, 'span', attrs={'class': 'year'})) for i in r] r = [(dom_parser.parse_dom(i[0][0], 'a', req='href'), i[1][0].content) for i in r if i[0] and i[1]] r = [(i[0][0].attrs['href'], i[0][0].content, i[1]) for i in r if i[0]] r = sorted(r, key=lambda i: int(i[2]), reverse=True) # with year > no year r = [i[0] for i in r if cleantitle.get(i[1]) in t and i[2] in y][0] return source_utils.strip_domain(r) except: return
def __search(self, titles, year, content): try: t = [cleantitle.get(i) for i in set(titles) if i] c = client.request(urlparse.urljoin(self.base_link, self.year_link % int(year)), output='cookie') p = urllib.urlencode({'search': cleantitle.query(titles[0])}) c = client.request(urlparse.urljoin(self.base_link, self.search_link), cookie=c, post=p, output='cookie') r = client.request(urlparse.urljoin(self.base_link, self.type_link % content), cookie=c, post=p) r = dom_parser.parse_dom(r, 'div', attrs={'id': 'content'}) r = dom_parser.parse_dom(r, 'tr') r = [dom_parser.parse_dom(i, 'td') for i in r] r = [dom_parser.parse_dom(i, 'a', req='href') for i in r] r = [(i[0].attrs['href'], i[0].content, i[1].content) for i in r if i] x = [] for i in r: if re.search('(?<=<i>\().*$', i[1]): x.append((i[0], re.search('(.*?)(?=\s<)', i[1]).group(), re.search('(?<=<i>\().*$', i[1]).group(), i[2])) else: x.append((i[0], i[1], i[1], i[2])) r = [i[0] for i in x if (cleantitle.get(i[1]) in t or cleantitle.get(i[2]) in t) and i[3] == year][0] return source_utils.strip_domain(r) except: return
def __search(self, titles): try: query = self.search_link % (urllib.quote_plus( cleantitle.query(titles[0]))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] r = client.request(query) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'nag'}) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'item-video'}) r = dom_parser.parse_dom(r, 'h2', attrs={'class': 'entry-title'}) r = dom_parser.parse_dom(r, 'a', req='href') for i in r: title = i[1] if re.search('\*(?:.*?)\*', title) is not None: title = re.sub('\*(?:.*?)\*', '', title) title = cleantitle.get(title) if title in t: return source_utils.strip_domain(i[0]['href']) else: return except: return
def __search(self, titles, year, season='0'): try: query = self.search_link % (urllib.quote_plus( cleantitle.query(titles[0]))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] y = [ '%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0' ] r = client.request(query) r = dom_parser.parse_dom(r, 'ul', attrs={'class': ['products', 'row']}) r = dom_parser.parse_dom( r, 'div', attrs={'class': ['box-product', 'clearfix']}) if int(season) > 0: r = [ i for i in r if dom_parser.parse_dom( i, 'div', attrs={'class': 'episode'}) ] else: r = [ i for i in r if not dom_parser.parse_dom( i, 'div', attrs={'class': 'episode'}) ] r = dom_parser.parse_dom(r, 'h3', attrs={'class': 'title-product'}) r = dom_parser.parse_dom(r, 'a', req='href') r = [(i.attrs['href'], i.content.lower()) for i in r if i] r = [(i[0], i[1], re.findall('(.+?) \(*(\d{4})', i[1])) for i in r] r = [(i[0], i[2][0][0] if len(i[2]) > 0 else i[1], i[2][0][1] if len(i[2]) > 0 else '0') for i in r] r = [(i[0], i[1], i[2], re.findall('(.+?)\s+(?:staf+el|s)\s+(\d+)', i[1])) for i in r] r = [(i[0], i[3][0][0] if len(i[3]) > 0 else i[1], i[2], i[3][0][1] if len(i[3]) > 0 else '0') for i in r] r = [(i[0], i[1].replace(' hd', ''), i[2], '1' if int(season) > 0 and i[3] == '0' else i[3]) for i in r] r = sorted(r, key=lambda i: int(i[2]), reverse=True) # with year > no year r = [ i[0] for i in r if cleantitle.get(i[1]) in t and i[2] in y and int(i[3]) == int(season) ][0] url = source_utils.strip_domain(r) url = url.replace('-info', '-stream') return url except: return
def __search(self, titles, year, imdb): try: query = self.search_link % urllib.quote_plus( cleantitle.query(titles[0])) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] y = [ '%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0' ] r = client.request(query) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'movie_cell'}) r = [(dom_parser.parse_dom(i, 'div', attrs={'class': 'bottom'}), dom_parser.parse_dom(i, 'div', attrs={'class': 'year'})) for i in r] r = [(dom_parser.parse_dom(i[0], 'a', req=['href', 'title']), re.findall('[(](\d{4})[)]', i[1][0].content)) for i in r if i[0] and i[1]] r = [(i[0][0].attrs['href'], i[0][0].content, i[1][0]) for i in r if i[0] and i[1]] r = [(i[0], i[1].lower(), i[2]) for i in r if i[2] in y] r = sorted(r, key=lambda i: int(i[2]), reverse=True) # with year > no year r = [i[0] for i in r if cleantitle.get(i[1]) in t] if len(r) > 1: for i in r: data = client.request(urlparse.urljoin(self.base_link, i)) data = dom_parser.parse_dom( data, 'a', attrs={'name': re.compile('.*/tt\d+.*')}, req='name') data = [ re.findall('.+?(tt\d+).*?', d.attrs['name']) for d in data ] data = [d[0] for d in data if len(d) > 0 and d[0] == imdb] if len(data) >= 1: url = i else: url = r[0] if url: return source_utils.strip_domain(url) except: return
def __search(self, titles, year, season='0'): try: aj = cache.get(self.__get_ajax_object, 24) t = [cleantitle.get(i) for i in set(titles) if i] y = [ '%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0' ] r = client.request(aj.get('ajax_url'), post={ 'action': aj.get('search'), 'nonce': aj.get('snonce'), 'query': cleantitle.query(titles[0]) }) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'search-result'}) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'search-item-content'}) r = dom_parser.parse_dom(r, 'a', req='href') r = [(i.attrs['href'], i.content.lower()) for i in r if i] r = [(i[0], i[1], re.findall('(.+?) \(*(\d{4})', i[1])) for i in r] r = [(i[0], i[2][0][0] if len(i[2]) > 0 else i[1], i[2][0][1] if len(i[2]) > 0 else '0') for i in r] r = [(i[0], i[1], i[2], re.findall('(.+?)\s+(?:staf+el|s)\s+(\d+)', i[1])) for i in r] r = [(i[0], i[3][0][0] if len(i[3]) > 0 else i[1], i[2], i[3][0][1] if len(i[3]) > 0 else '0') for i in r] r = [(i[0], i[1].replace(' hd', ''), i[2], '1' if int(season) > 0 and i[3] == '0' else i[3]) for i in r] r = sorted(r, key=lambda i: int(i[2]), reverse=True) # with year > no year r = [ i[0] for i in r if cleantitle.get(i[1]) in t and i[2] in y and int(i[3]) == int(season) ][0] return source_utils.strip_domain(r) except: return
def __search(self, titles): try: query = self.search_link % urllib.quote_plus( cleantitle.query(titles[0])) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] r = client.request(query) r = json.loads(r) r = [(i.get('id'), i.get('value')) for i in r] r = [i[0] for i in r if cleantitle.get(i[1]) in t][0] return r except: return
def __search(self, titles, year): try: n = cache.get(self.__get_nonce, 24) query = self.search_link % (urllib.quote_plus(cleantitle.query(titles[0])), n) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] y = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0'] r = client.request(query) r = json.loads(r) r = [(r[i].get('url'), r[i].get('title'), r[i].get('extra').get('date')) for i in r] r = sorted(r, key=lambda i: int(i[2]), reverse=True) # with year > no year r = [i[0] for i in r if cleantitle.get(i[1]) in t and i[2] in y][0] return source_utils.strip_domain(r) except: return
def __search(self, titles, year, season='0'): try: query = self.search_link % urllib.quote_plus( cleantitle.query(titles[0])) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] y = [ '%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0' ] r = client.request(query) r = dom_parser.parse_dom(r, 'article', attrs={'class': 'shortstory'}) r = dom_parser.parse_dom(r, 'div', attrs={'class': 's_info'}) r = dom_parser.parse_dom(r, 'h2') r = dom_parser.parse_dom(r, 'a', req='href') r = [(i.attrs['href'], i.content.lower()) for i in r if i] r = [(i[0], re.sub('<.+?>|</.+?>', '', i[1]), re.findall('(.+?) \(*(\d{4})', i[1])) for i in r] r = [(i[0], i[2][0][0] if len(i[2]) > 0 else i[1], i[2][0][1] if len(i[2]) > 0 else '0') for i in r] r = [(i[0], i[1], i[2], re.findall('(.+?)(\d+)\s+(?:staf+el|s)', i[1])) for i in r] r = [(i[0], i[3][0][0] if len(i[3]) > 0 else i[1], i[2], i[3][0][1] if len(i[3]) > 0 else '0') for i in r] r = [(i[0], i[1], i[2], '1' if int(season) > 0 and i[3] == '0' else i[3]) for i in r] r = sorted(r, key=lambda i: int(i[2]), reverse=True) # with year > no year r = [ i[0] for i in r if cleantitle.get(i[1]) in t and i[2] in y and int(i[3]) == int(season) ][0] return source_utils.strip_domain(r) except: return
def tvshow(self, imdb, tvdb, tvshowtitle, localtvshowtitle, aliases, year): try: query = self.search_link % urllib.quote_plus( cleantitle.query(tvshowtitle)) result = client.request(query) #tvshowtitle = cleantitle.get(tvshowtitle) t = [tvshowtitle] + source_utils.aliases_to_array(aliases) t = [cleantitle.get(i) for i in set(t) if i] result = re.compile( 'itemprop="url"\s+href="([^"]+).*?itemprop="name"\s+class="serie-title">([^<]+)', re.DOTALL).findall(result) for i in result: if cleantitle.get(cleantitle.normalize( i[1])) in t and year in i[1]: url = i[0] url = url.encode('utf-8') return url except: return
def __search(self, titles): try: query = self.search_link % (urllib.quote_plus( cleantitle.query(titles[0]))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] r = client.request(query) r = dom_parser.parse_dom(r, 'ul', attrs={'class': 'MovieList'}) r = dom_parser.parse_dom(r, 'li', attrs={'class': 'TPostMv'}) r = dom_parser.parse_dom(r, 'a') for i in r: title = dom_parser.parse_dom(i, 'h2', attrs={'class': 'Title'}) title = cleantitle.get(title[0][1]) if title in t: return source_utils.strip_domain(i[0]['href']) except: return
def __search(self, titles): try: query = self.search_link % (urllib.quote_plus( urllib.quote_plus(cleantitle.query(titles[0])))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] post = urllib.urlencode({'movlang_de': '1', 'movlang': ''}) r = client.request(query, post=post) r = dom_parser.parse_dom(r, 'table', attrs={'class': 'table'}) r = dom_parser.parse_dom(r, 'a', attrs={'class': 'PreviewImage'}) for x in r: title = cleantitle.get(x[1]) if title in t: return source_utils.strip_domain(x[0]['href']) return except: return
def __search(self, titles, year, content_type): try: query = self.search_link % (urllib.quote_plus( cleantitle.query(titles[0])), content_type) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] y = [ '%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0' ] r = client.request(query) r = dom_parser.parse_dom(r, 'div', attrs={'id': 'search'}) r = dom_parser.parse_dom(r, 'table') r = dom_parser.parse_dom(r, 'tr', attrs={'class': re.compile('entry\d+')}) r = [(dom_parser.parse_dom(i, 'a'), dom_parser.parse_dom(i, 'img', attrs={ 'class': 'flag', 'alt': 'de' })) for i in r] r = [i[0] for i in r if i[0] and i[1]] r = [(i[0].attrs['href'], i[0].content) for i in r] r = [(i[0], i[1], re.findall('(.+?) \(*(\d{4})', i[1])) for i in r] r = [(i[0], i[2][0][0] if len(i[2]) > 0 else i[1], i[2][0][1] if len(i[2]) > 0 else '0') for i in r] r = sorted(r, key=lambda i: int(i[2]), reverse=True) # with year > no year r = [i[0] for i in r if cleantitle.get(i[1]) in t and i[2] in y][0] return source_utils.strip_domain(r) except: return
def __search(self, titles, year): try: r = urllib.urlencode({'keyword': titles[0]}) r = client.request(urlparse.urljoin(self.base_link, self.search_link), XHR=True, post=r) if r is None: r = urllib.urlencode({'keyword': cleantitle.query(titles[0])}) r = client.request(urlparse.urljoin(self.base_link, self.search_link), XHR=True, post=r) t = [cleantitle.get(i) for i in set(titles) if i] y = [ '%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0' ] r = json.loads(r) r = [(i['link'], re.sub('<.+?>|</.+?>', '', i['title'])) for i in r if 'title' in i and 'link' in i] r = [(i[0], i[1], re.findall('(.+?)\s*Movie \d+:.+?$', i[1], re.DOTALL)) for i in r] r = [(i[0], i[2][0] if len(i[2]) > 0 else i[1]) for i in r] r = [(i[0], i[1], re.findall('(.+?) \((\d{4})\)?', i[1])) for i in r] r = [(i[0], i[2][0][0] if len(i[2]) > 0 else i[1], i[2][0][1] if len(i[2]) > 0 else '0') for i in r] r = sorted(r, key=lambda i: int(i[2]), reverse=True) # with year > no year r = [i[0] for i in r if cleantitle.get(i[1]) in t and i[2] in y][0] return source_utils.strip_domain(r) except: return
def __search(self, titles, episode): try: query = self.search_link % urllib.quote_plus( cleantitle.query(titles[0]) + ' ' + str(episode)) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) + str(episode) for i in set(titles) if i] r = client.request(query) r = r.split('</style>')[-1].strip() r = json.loads(r) r = [(i.get('title', {}).get('rendered'), i.get('content', {}).get('rendered')) for i in r] r = [(re.sub('ger (?:sub|dub)', '', i[0], flags=re.I).strip(), i[1]) for i in r if i[0] and i[1]] r = [(i[0], re.findall('(.+?) (\d*)$', i[0]), i[1]) for i in r] r = [ (i[0] if not i[1] else i[1][0][0] + ' ' + str(int(i[1][0][1])), i[2]) for i in r ] r = [ dom_parser.parse_dom(i[1], 'div') for i in r if cleantitle.get(i[0]) in t ] r = [[ x.attrs['href'] for x in dom_parser.parse_dom(i, 'a', req='href') ] + [ x.attrs['src'] for x in dom_parser.parse_dom(i, 'iframe', req='src') ] for i in r] return r[0] except: return
def __search(self, titles, year): try: t = [cleantitle.get(i) for i in set(titles) if i] y = [ '%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0' ] r = client.request( urlparse.urljoin(self.base_link, self.search_link), post=urllib.urlencode({'val': cleantitle.query(titles[0])}), XHR=True) r = dom_parser.parse_dom(r, 'li') r = dom_parser.parse_dom(r, 'a', req='href') r = [(i.attrs['href'], i.content, re.findall('\((\d{4})', i.content)) for i in r] r = [(i[0], i[1], i[2][0] if i[2] else '0') for i in r] r = [i[0] for i in r if cleantitle.get(i[1]) in t and i[2] in y][0] return source_utils.strip_domain(r) except: return
def __search(self, titles, year): try: query = self.search_link % (urllib.quote_plus( cleantitle.query(titles[0]))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] r = client.request(query) r = dom_parser.parse_dom(r, 'div', attrs={'id': 'main'}) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'panel-body'}) r = [ (dom_parser.parse_dom(i.content, 'h4', attrs={'class': 'title-list'}), dom_parser.parse_dom(i.content, 'a', attrs={'href': re.compile('.*/year/.*')})) for i in r ] r = [(dom_parser.parse_dom(i[0][0].content, 'a', req='href'), i[1][0].content if i[1] else '0') for i in r if i[0]] r = [(i[0][0].attrs['href'], i[0][0].content, re.sub('<.+?>|</.+?>', '', i[1])) for i in r if i[0] and i[1]] r = [(i[0], i[1], i[2].strip()) for i in r if i[2]] r = sorted(r, key=lambda i: int(i[2]), reverse=True) # with year > no year r = [ i[0] for i in r if cleantitle.get(i[1]) in t and i[2] == year ][0] return source_utils.strip_domain(r) except: return
def __search(self, titles, year): try: query = self.search_link % urllib.quote_plus(cleantitle.query(titles[0])) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] y = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0'] r = client.request(query, XHR=True) if r and r.startswith('{'): r = '[%s]' % r r = json.loads(r) r = [(i['url'], i['name']) for i in r if 'name' in i and 'url' in i] r = [(i[0], i[1], re.findall('(.+?) \(*(\d{4})?\)*$', i[1])) for i in r] r = [(i[0], i[2][0][0] if len(i[2]) > 0 else i[1], i[2][0][1] if len(i[2]) > 0 else '0') for i in r] r = sorted(r, key=lambda i: int(i[2]), reverse=True) # with year > no year r = [i[0] for i in r if cleantitle.get(i[1]) in t and i[2] in y][0] url = source_utils.strip_domain(r) url = url.replace('serien/', '') return url except: return
def __search(self, titles): try: query = self.search_link % (urllib.quote_plus( cleantitle.query(titles[0]))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] r = client.request(query) r = dom_parser.parse_dom(r, 'article') r = dom_parser.parse_dom(r, 'div', attrs={'class': 'title'}) r = dom_parser.parse_dom(r, 'a', req='href') for i in r: title = client.replaceHTMLCodes(r[0][1]) title = cleantitle.get(title) if title in t: return source_utils.strip_domain(i[0]['href']) return except: return
def __search(self, titles): try: query = self.search_link % (urllib.quote_plus( urllib.quote_plus(cleantitle.query(titles[0])))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] r = client.request(query) r = dom_parser.parse_dom(r, 'ul', attrs={'class': 'coverBox'}) r = dom_parser.parse_dom(r, 'li') r = dom_parser.parse_dom(r, 'span', attrs={'class': 'name'}) r = dom_parser.parse_dom(r, 'a') title = r[0][1] title = cleantitle.get(title) if title in t: return source_utils.strip_domain(r[0][0]['href']) else: return except: return
def __search(self, imdb, titles, year): try: q = self.search_link % urllib.quote_plus( cleantitle.query(titles[0])) q = urlparse.urljoin(self.base_link, q) t = [cleantitle.get(i) for i in set(titles) if i] y = [ '%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0' ] r = client.request(q) r = dom_parser.parse_dom( r, 'tr', attrs={'id': re.compile('coverPreview.+?')}) r = [(dom_parser.parse_dom(i, 'a', req='href'), dom_parser.parse_dom(i, 'div', attrs={'style': re.compile('.+?')}), dom_parser.parse_dom(i, 'img', req='src')) for i in r] r = [(i[0][0].attrs['href'].strip(), i[0][0].content.strip(), i[1], i[2]) for i in r if i[0] and i[2]] r = [(i[0], i[1], [ x.content for x in i[2] if x.content.isdigit() and len(x.content) == 4 ], i[3]) for i in r] r = [(i[0], i[1], i[2][0] if i[2] else '0', i[3]) for i in r] r = [ i for i in r if any('us_ger_' in x.attrs['src'] for x in i[3]) ] r = [(i[0], i[1], i[2], [ re.findall('(\d+)', x.attrs['src']) for x in i[3] if 'smileys' in x.attrs['src'] ]) for i in r] r = [(i[0], i[1], i[2], [x[0] for x in i[3] if x]) for i in r] r = [(i[0], i[1], i[2], int(i[3][0]) if i[3] else 0) for i in r] r = sorted(r, key=lambda x: x[3])[::-1] r = [(i[0], i[1], i[2], re.findall('\((.+?)\)$', i[1])) for i in r] r = [(i[0], i[1], i[2]) for i in r if not i[3]] r = [i for i in r if i[2] in y] r = sorted(r, key=lambda i: int(i[2]), reverse=True) # with year > no year r = [(client.replaceHTMLCodes(i[0]), i[1], i[2]) for i in r] match = [ i[0] for i in r if cleantitle.get(i[1]) in t and year == i[2] ] match2 = [i[0] for i in r] match2 = [x for y, x in enumerate(match2) if x not in match2[:y]] if match2 == []: return for i in match2[:5]: try: if match: url = match[0] break r = client.request(urlparse.urljoin(self.base_link, i)) r = re.findall('(tt\d+)', r) if imdb in r: url = i break except: pass return source_utils.strip_domain(url) except: return
def __search(self, titles, year, season=0, episode=False): try: query = self.search_link % (urllib.quote_plus( cleantitle.query(titles[0]))) query = urlparse.urljoin(self.base_link, query) t = [cleantitle.get(i) for i in set(titles) if i] y = [ '%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0' ] r = client.request(query) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'container'}) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'ml-item-content'}) f = [] for i in r: _url = dom_parser.parse_dom(i, 'a', attrs={'class': 'ml-image'}, req='href')[0].attrs['href'] _title = re.sub('<.+?>|</.+?>', '', dom_parser.parse_dom(i, 'h6')[0].content).strip() try: _title = re.search('(.*?)\s(?:staf+el|s)\s*(\d+)', _title, re.I).group(1) except: pass _season = '0' _year = re.findall( 'calendar.+?>.+?(\d{4})', ''.join([ x.content for x in dom_parser.parse_dom( i, 'ul', attrs={'class': 'item-params'}) ])) _year = _year[0] if len(_year) > 0 else '0' if season > 0: s = dom_parser.parse_dom(i, 'span', attrs={'class': 'season-label'}) s = dom_parser.parse_dom(s, 'span', attrs={'class': 'el-num'}) if s: _season = s[0].content.strip() if cleantitle.get(_title) in t and _year in y and int( _season) == int(season): f.append((_url, _year)) r = f r = sorted(r, key=lambda i: int(i[1]), reverse=True) # with year > no year r = [i[0] for i in r if r[0]][0] url = source_utils.strip_domain(r) if episode: r = client.request(urlparse.urljoin(self.base_link, url)) r = dom_parser.parse_dom(r, 'div', attrs={'class': 'season-list'}) r = dom_parser.parse_dom(r, 'li') r = dom_parser.parse_dom(r, 'a', req='href') r = [(i.attrs['href'], i.content) for i in r] r = [i[0] for i in r if i[1] and int(i[1]) == int(episode)][0] url = source_utils.strip_domain(r) return url except: return