Exemplo n.º 1
0
    def __search(self, titles, year, season='0'):
        try:
            url = urlparse.urljoin(self.base_link, self.search_link)

            t = [cleantitle.get(i) for i in set(titles) if i]
            y = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0']

            post = {'story': utils.uni2cp(titles[0]), 'titleonly': 3, 'do': 'search', 'subaction': 'search', 'search_start': 1, 'full_search': 0, 'result_from': 1}
            r = client.request(url, post=post)

            r = r.decode('cp1251').encode('utf-8')

            r = dom_parser.parse_dom(r, 'table', attrs={'class': 'eBlock'})
            r = [(dom_parser.parse_dom(i, 'div', attrs={'class': 'eTitle'}), dom_parser.parse_dom(i[1], 'a', attrs={'href': re.compile('.*\d+_goda/')})) for i in r]
            r = [(dom_parser.parse_dom(i[0][0], 'a', req='href'), [x.content for x in i[1] if re.match('\d{4}', x.content)][0] if i[1] else '0') for i in r if i[0]]
            r = [(i[0][0].attrs['href'], i[0][0].content, i[1]) for i in r if i[0]]
            r = [(i[0], i[1], i[2], re.findall('(.+?) \(*(\d{4})', i[1])) for i in r if i]
            r = [(i[0], i[3][0][0] if i[3] else i[1], i[2]) for i in r]
            r = [(i[0], i[1], i[2], re.findall(u'(.+?)\s+(\d+)\s+(?:сезон)', i[1])) for i in r]
            r = [(i[0], i[3][0][0] if len(i[3]) > 0 else i[1], i[2], i[3][0][1] if len(i[3]) > 0 else '0') for i in r]
            r = sorted(r, key=lambda i: int(i[2]), reverse=True)  # with year > no year
            r = [i[0] for i in r if cleantitle.get(i[1]) in t and i[2] in y and int(i[3]) == int(season)][0]

            return source_utils.strip_domain(r)
        except:
            return
Exemplo n.º 2
0
    def __search(self, titles, year, season='0'):
        try:
            t = [cleantitle.get(i) for i in set(titles) if i]
            y = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0']

            post = {'story': utils.uni2cp(titles[0]), 'titleonly': 3, 'do': 'search', 'subaction': 'search', 'search_start': 1, 'full_search': 0, 'result_from': 1}
            html = client.request(self.base_link, post=post)

            html = html.decode('cp1251').encode('utf-8')

            r = dom_parser.parse_dom(html, 'div', attrs={'id': re.compile('news-id-\d+')})
            r = [(i.attrs['id'], dom_parser.parse_dom(i, 'a', req='href')) for i in r]
            r = [(re.sub('[^\d]+', '', i[0]), dom_parser.parse_dom(i[1], 'img', req='title')) for i in r]
            r = [(i[0], i[1][0].attrs['title'], '') for i in r if i[1]]
            r = [(i[0], i[1], i[2], re.findall(u'(.+?)\s+(\d+)\s+(?:сезон)', i[1])) for i in r]
            r = [(i[0], i[3][0][0] if len(i[3]) > 0 else i[1], i[2], i[3][0][1] if len(i[3]) > 0 else '0') for i in r]
            r = [(i[0], i[1], re.findall('(.+?) \(*(\d{4})', i[1]), i[3]) for i in r]
            r = [(i[0], i[2][0][0] if len(i[2]) > 0 else i[1], i[2][0][1] if len(i[2]) > 0 else '0', i[3]) for i in r]
            r = [(i[0], i[1], i[2], '1' if int(season) > 0 and i[3] == '0' else i[3]) for i in r]
            r = sorted(r, key=lambda i: int(i[2]), reverse=True)  # with year > no year
            r = [i[0] for i in r if cleantitle.get(i[1]) in t and i[2] in y and int(i[3]) == int(season)][0]
            r = dom_parser.parse_dom(html, 'a', attrs={'href': re.compile('.*/%s-' % r)}, req='href')[0].attrs['href']

            return source_utils.strip_domain(r)
        except:
            return
Exemplo n.º 3
0
    def __search(self, titles, year, season='0'):
        try:
            url = urlparse.urljoin(self.base_link, self.search_link)

            t = [cleantitle.get(i) for i in set(titles) if i]
            y = ['%s' % str(year), '%s' % str(int(year) + 1), '%s' % str(int(year) - 1), '0']

            post = {'story': utils.uni2cp(titles[0]), 'titleonly': 3, 'do': 'search', 'subaction': 'search', 'search_start': 1, 'full_search': 0, 'result_from': 1}
            r = client.request(url, post=post)

            r = r.decode('cp1251').encode('utf-8')

            r = dom_parser.parse_dom(r, 'table', attrs={'class': 'eBlock'})
            r = [(dom_parser.parse_dom(i, 'div', attrs={'class': 'eTitle'}), dom_parser.parse_dom(i[1], 'a', attrs={'href': re.compile('.*\d+_goda/')})) for i in r]
            r = [(dom_parser.parse_dom(i[0][0], 'a', req='href'), [x.content for x in i[1] if re.match('\d{4}', x.content)][0] if i[1] else '0') for i in r if i[0]]
            r = [(i[0][0].attrs['href'], i[0][0].content, i[1]) for i in r if i[0]]
            r = [(i[0], i[1], i[2], re.findall('(.+?) \(*(\d{4})', i[1])) for i in r if i]
            r = [(i[0], i[3][0][0] if i[3] else i[1], i[2]) for i in r]
            r = [(i[0], i[1], i[2], re.findall(u'(.+?)\s+(\d+)\s+(?:сезон)', i[1])) for i in r]
            r = [(i[0], i[3][0][0] if len(i[3]) > 0 else i[1], i[2], i[3][0][1] if len(i[3]) > 0 else '0') for i in r]
            r = sorted(r, key=lambda i: int(i[2]), reverse=True)  # with year > no year
            r = [i[0] for i in r if cleantitle.get(i[1]) in t and i[2] in y and int(i[3]) == int(season)][0]

            return source_utils.strip_domain(r)
        except:
            return