Exemplo n.º 1
0
 def __init__(self):
     self.cache = Cache('kinopoisk.db')
     self.html = Clear()
     
     self.http = HTTP()
     self.headers = {
         'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:10.0.2) Gecko/20100101 Firefox/10.0.2',
         'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
         'Accept-Language': 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3',
         'Cache-Control': 'no-cache',
         'Referer': 'http://www.kinopoisk.ru/level/7/'
     }
Exemplo n.º 2
0
    def __init__(self):
        self.api_key = '1D62F2F90030C444'

        self.cache = Cache('tvdb.db')

        self.http = HTTP()
        self.headers = {
            'User-Agent':
            'Mozilla/5.0 (Windows NT 6.1; rv:10.0.2) Gecko/20100101 Firefox/10.0.2',
            'Accept':
            'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3',
            'Cache-Control': 'no-cache',
            'Referer': 'http://www.thetvdb.com/'
        }
Exemplo n.º 3
0
 def __init__(self):
     self.setting = Setting()
     self.re_auth = re.compile(r'profile\.php\?mode=sendpassword"')
     self.re_captcha = re.compile(r'<img src="(\/\/[^\/]+/captcha/[^"]+)"')
     self.re_captcha_sid = re.compile(r'<input type="hidden" name="cap_sid" value="([^"]+)">')
     self.re_captcha_code = re.compile(r'<input type="text" name="(cap_code_[^"]+)"')
     self.captcha_sid = None
     self.captcha_code = None
     self.captcha_code_value = None
     self.http = HTTP()
     
     self.headers = {
         'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36',
         'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
         'Accept-Language': 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3',
         'Cache-Control': 'no-cache',
         'Referer': 'http://rutracker.lib/forum/index.php'
     }
Exemplo n.º 4
0
 def __init__(self):
     self.cache = Cache('kinopoisk.db')
     self.html = Clear()
     
     self.http = HTTP()
     self.headers = {
         'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:10.0.2) Gecko/20100101 Firefox/10.0.2',
         'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
         'Accept-Language': 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3',
         'Cache-Control': 'no-cache',
         'Referer': 'http://www.kinopoisk.ru/level/7/'
     }
Exemplo n.º 5
0
 def __init__(self):
     self.setting = Setting()
     self.re_auth = re.compile(r'"profile\.php\?mode=sendpassword"')
     self.http = HTTP()
     
     self.headers = {
         'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:10.0.2) Gecko/20100101 Firefox/10.0.2',
         'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
         'Accept-Language': 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3',
         'Cache-Control': 'no-cache',
         'Referer': 'http://rutracker.org/forum/index.php'
     }
Exemplo n.º 6
0
 def __init__(self):
     self.api_key = '1D62F2F90030C444'
     
     self.cache = Cache('tvdb.db')
     
     self.http = HTTP()
     self.headers = {
         'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:10.0.2) Gecko/20100101 Firefox/10.0.2',
         'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
         'Accept-Language': 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3',
         'Cache-Control': 'no-cache',
         'Referer': 'http://www.thetvdb.com/'
     }
Exemplo n.º 7
0
 def __init__(self):
     self.setting = Setting()
     self.re_auth = re.compile(r'profile\.php\?mode=sendpassword"')
     self.re_captcha = re.compile(r'<img src="(\/\/[^\/]+/captcha/[^"]+)"')
     self.re_captcha_sid = re.compile(r'<input type="hidden" name="cap_sid" value="([^"]+)">')
     self.re_captcha_code = re.compile(r'<input type="text" name="(cap_code_[^"]+)"')
     self.captcha_sid = None
     self.captcha_code = None
     self.captcha_code_value = None
     self.http = HTTP()
     
     self.headers = {
         'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36',
         'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
         'Accept-Language': 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3',
         'Cache-Control': 'no-cache',
         'Referer': 'http://rutracker.nl/forum/index.php'
     }
Exemplo n.º 8
0
class KinoPoisk:
    """
    
    API:
        scraper  - скрапер
        movie    - профайл фильма
        search   - поиск фильма
        best     - поиск лучших фильмов
        person   - поиск персон
        work     - информация о работах персоны
        
    """
    def __init__(self):
        self.cache = Cache('kinopoisk.db')
        self.html = Clear()

        self.http = HTTP()
        self.headers = {
            'User-Agent':
            'Mozilla/5.0 (Windows NT 6.1; rv:10.0.2) Gecko/20100101 Firefox/10.0.2',
            'Accept':
            'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3',
            'Cache-Control': 'no-cache',
            'Referer': 'http://www.kinopoisk.ru/level/7/'
        }

    # API

    def scraper(self, name, year=None, trailer_quality=None):

        try:
            tag = 'scraper:' + urllib.quote_plus(name.encode('windows-1251'))
        except:
            return None
        else:

            if year:
                tag += ':' + str(year)

            id = self.cache.get(tag, self._scraper, name, year)
            if not id:
                return None

            return self.movie(id, trailer_quality)

    def movie(self, id, trailer_quality=None):
        id = str(id)

        if trailer_quality is None:
            trailer_quality = 6

        movie = self.cache.get('movie:' + id, self._movie, id)
        if not movie:
            return None

        if movie['trailers']:
            # компилируем список с нужным нам качеством
            video = []
            for m in movie['trailers']:
                url = [x for x in m['video'] if x[0] <= trailer_quality]
                if url:
                    m['video'] = url[-1]
                    video.append(m)

            movie['trailers'] = video

            if movie['trailers']:
                # готовим главный трейлер
                r = [x for x in movie['trailers'] if x['trailer']]
                if r:
                    movie['info']['trailer'] = r[0]['video'][1]
                else:
                    # если трейлер не найден, то отдаем что попало...
                    movie['info']['trailer'] = movie['trailers'][0]['video'][1]

        return movie

    def search(self, name, trailer_quality=None):
        return self._search_movie(name)

    def best(self, **kwarg):
        page = kwarg.get('page', 1)
        limit = kwarg.get('limit', 50)

        url = 'http://www.kinopoisk.ru/top/navigator/m_act%5Bis_film%5D/on/m_act%5Bnum_vote%5D/' + str(
            kwarg.get('votes', 100)) + '/'

        if kwarg.get('dvd'):
            url += 'm_act%5Bis_dvd%5D/on/'

        if kwarg.get('decade'):
            url += 'm_act%5Bdecade%5D/' + str(kwarg['decade']) + '/'

        if kwarg.get('genre'):
            url += 'm_act%5Bgenre%5D/' + str(GENRE[kwarg['genre']]) + '/'

        if kwarg.get('country'):
            url += 'm_act%5Bcountry%5D/' + str(kwarg['country']) + '/'

        if kwarg.get('rate'):
            url += 'm_act%5Brating%5D/' + str(kwarg['rate']) + ':/'

        if kwarg.get('mpaa'):
            url += 'm_act%5Bmpaa%5D/' + str(kwarg['mpaa']) + '/'

        url += 'perpage/' + str(limit) + '/order/ex_rating/'

        if page > 1:
            url += 'page/' + str(page) + '/'

        response = self.http.fetch(url, headers=self.headers)
        if response.error:
            return None

        res = {'pages': (1, 0, 1, 0), 'data': []}

        r = re.compile('<div class="pagesFromTo(.+?)<div class="pagesFromTo',
                       re.U | re.S).search(
                           response.body.decode('windows-1251'))
        if r:

            body = r.group(1)

            # compile pagelist
            p = re.compile('>([0-9]+)&mdash;[0-9]+[^0-9]+?([0-9]+)',
                           re.U).search(body)
            if p:
                page = (int(p.group(1)) - 1) / limit + 1
                total = int(p.group(2))
                pages = total / limit
                if limit * pages != total:
                    pages += 1
                res['pages'] = (pages, 0 if page == 1 else page - 1, page,
                                0 if page == pages else page + 1)
            # end compile

            for id in re.compile('<div id="tr_([0-9]+)"',
                                 re.U | re.S).findall(body):
                res['data'].append(int(id))

        return res

    def person(self, name):
        #response = self.http.fetch('https://www.kinopoisk.ru/index.php?level=7&from=forma&result=adv&m_act%5Bfrom%5D=forma&m_act%5Bwhat%5D=actor&m_act%5Bfind%5D=' + urllib.quote_plus(name.encode('windows-1251')), headers=self.headers)
        response = self.http.fetch(
            'http://www.kinopoisk.ru/s/type/people/list/1/find/' +
            urllib.quote_plus(name.encode('windows-1251')) +
            '/order/relevant/',
            headers=self.headers)
        if response.error:
            return None

        res = []
        body = re.compile(
            '<div class="navigator">(.+?)<div class="navigator">',
            re.U | re.S).search(response.body.decode('windows-1251'))
        if body:

            for block in re.compile('<p class="pic">(.+?)<div class="clear">',
                                    re.U | re.S).findall(body.group(1)):

                id, name, original, year, poster = None, None, None, None, None

                r = re.compile(
                    '<p class="name"><a href="/name/([0-9]+)[^>]+>([^<]+)</a>',
                    re.U | re.S).search(block)
                if r:
                    id = r.group(1)
                    name = r.group(2).strip()

                    if id and name:

                        r = re.compile('<span class="gray">([^<]+)</span>',
                                       re.U | re.S).search(block)
                        if r:
                            original = r.group(1).strip()
                            if not original:
                                original = None

                        r = re.compile('<span class="year">([0-9]{4})</span>',
                                       re.U | re.S).search(block)
                        if r:
                            year = int(r.group(1))

                        if block.find('no-poster.gif') == -1:
                            poster = 'http://st.kinopoisk.ru/images/actor/' + id + '.jpg'

                        res.append({
                            'id': int(id),
                            'name': name,
                            'originalname': original,
                            'year': year,
                            'poster': poster
                        })

        return {'pages': (1, 0, 1, 0), 'data': res}

    def work(self, id):
        response = self.http.fetch('http://www.kinopoisk.ru/name/' + str(id) +
                                   '/',
                                   headers=self.headers)
        if response.error:
            return None

        res = {}

        r = re.compile('id="sort_block">(.+?)<div id="block_right"', re.U
                       | re.S).search(response.body.decode('windows-1251'))
        if r:
            for block in r.group(1).split(
                    u'<tr><td colspan="3" class="specializationBox')[1:]:

                work = None

                for w in ('actor', 'director', 'writer', 'producer',
                          'producer_ussr', 'composer', 'operator', 'editor',
                          'design', 'voice', 'voice_director'):
                    if block.find(u'id="' + w + u'"') != -1:
                        work = 'producer' if w == 'producer_ussr' else w
                        break

                if work:

                    movies = []

                    for id, name in re.compile(
                            '<span class="name"><a href="/film/([0-9]+)/[^>]+>([^<]+?)</a>',
                            re.U).findall(block):
                        for tag in (u'(мини-сериал)', u'(сериал)'):
                            if name.find(tag) != -1:
                                break
                        else:
                            movies.append(int(id))

                    if movies:
                        res.setdefault(work, []).extend(movies)

        return res

    def review(self, id, query):
        query_s = 'all' if query == 'stat' else query
        data = self.cache.get('review:' + str(id) + ':' + query_s,
                              self._review, id, query_s)
        if not data:
            return data
        return data[query]

    def countries(self):
        return COUNTRIES

    def country(self, id, default=None):
        country = [x[1] for x in COUNTRIES if x[0] == id]
        return country[0] if country else default

    # PRIVATE

    def _search_movie(self, name, year=None):
        url = 'http://www.kinopoisk.ru/s/type/film/list/1/find/' + urllib.quote_plus(
            name.encode('windows-1251')) + '/order/relevant'

        if year:
            url += '/m_act%5Byear%5D/' + str(year)
        url += '/m_act%5Btype%5D/film/'

        response = self.http.fetch(url, headers=self.headers)
        if response.error:
            return None

        res = []
        r = re.compile('<div class="navigator">(.+?)<div class="navigator">',
                       re.U | re.S).search(
                           response.body.decode('windows-1251'))
        if r:
            for id in re.compile(
                    '<p class="name"><a href="/level/1/film/([0-9]+)',
                    re.U | re.S).findall(r.group(1)):
                res.append(int(id))

        return {'pages': (1, 0, 1, 0), 'data': res}

    def _scraper(self, name, year):
        timeout = True

        # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте)
        if year and year >= time.gmtime(time.time()).tm_year:
            timeout = 7 * 24 * 60 * 60  #week

        ids = self._search_movie(name, year)

        if ids is None:
            return False, None

        elif not ids['data']:
            # сохраняем пустой результат на 3-е суток
            return 259200, None

        else:
            return timeout, ids['data'][0]

    def _review(self, id, query):
        url = 'http://www.kinopoisk.ru/film/' + str(id) + '/ord/rating/'
        if query in ('good', 'bad', 'neutral'):
            url += 'status/' + query + '/'
        url += 'perpage/200/'

        response = self.http.fetch(url, headers=self.headers)
        if response.error:
            return False, None

        html = response.body.decode('windows-1251')

        res = {
            'stat': {
                'all': 0,
                'good': 0,
                'bad': 0,
                'neutral': 0
            },
            query: []
        }

        r = re.compile('<ul class="resp_type">(.+?)</ul>',
                       re.U | re.S).search(html)
        if r:
            ul = r.group(1)

            for q, t in (('pos', 'good'), ('neg', 'bad'), ('neut', 'neutral')):
                r = re.compile(
                    '<li class="' + q +
                    '"><a href="[^>]+>[^<]+</a><b>([0-9]+)</b></li>',
                    re.U).search(ul)
                if r:
                    res['stat'][t] = int(r.group(1))

            res['stat']['all'] = res['stat']['good'] + res['stat'][
                'bad'] + res['stat']['neutral']

        r = re.compile('<div class="navigator">(.+?)<div class="navigator">',
                       re.U | re.S).search(html)
        if r:

            for block in r.group(1).split('itemprop="reviews"'):

                review = {
                    'nick': None,
                    'count': None,
                    'title': None,
                    'review': None,
                    'time': None
                }

                r = re.compile('itemprop="reviewBody">(.+?)</div>',
                               re.U | re.S).search(block)
                if r:

                    text = r.group(1)
                    for tag1, tag2 in ((u'<=end=>', u'\n'), (u'<b>', u'[B]'),
                                       (u'</b>', u'[/B]'), (u'<i>', u'[I]'),
                                       (u'</i>', u'[/I]'), (u'<u>', u'[U]'),
                                       (u'</u>', u'[/U]')):
                        text = text.replace(tag1, tag2)

                    r = self.html.text(text)
                    if r:
                        review['review'] = r

                user = None
                r = re.compile(
                    '<p class="profile_name"><s></s><a href="[^>]+>([^<]+)</a></p>'
                ).search(block)
                if r:
                    user = self.html.string(r.group(1))
                else:
                    r = re.compile('<p class="profile_name"><s></s>([^<]+)</p>'
                                   ).search(block)
                    if r:
                        user = self.html.string(r.group(1))
                if user:
                    review['nick'] = user

                r = re.compile('<p class="sub_title"[^>]+>([^<]+)</p>').search(
                    block)
                if r:
                    title = self.html.string(r.group(1))
                    if title:
                        review['title'] = title

                r = re.compile('<span class="date">([^<]+)</span>',
                               re.U | re.S).search(block)
                if r:
                    review['time'] = r.group(1).replace(u' |', u',')

                r = re.compile(u'<a href="[^>]+>рецензии \(([0-9]+)\)</a>',
                               re.U | re.S).search(block)
                if r:
                    review['count'] = int(r.group(1))

                if review['nick'] and review['review']:
                    res[query].append(review)

        return 3600, res  # one hour

    def _movie(self, id):
        response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/',
                                   headers=self.headers)
        if response.error:
            return False, None

        html = response.body.decode('windows-1251')

        res = {
            'id': int(id),
            'thumb': None,
            'fanart': None,
            'trailers': [],
            'info': {}
        }

        # имя, оригинальное имя, девиз, цензура, год, top250
        # runtime - длительность фильма (в отдельную переменную, иначе не видно размер файла)
        for tag, reg, cb in (
            ('title', '<h1 class="moviename-big" itemprop="name">(.+?)</h1>',
             self.html.string),
            ('originaltitle', 'itemprop="alternativeHeadline">([^<]*)</span>',
             self.html.string),
            ('tagline',
             '<td style="color\: #555">&laquo;(.+?)&raquo;</td></tr>',
             self.html.string), ('mpaa', 'images/mpaa/([^\.]+).gif',
                                 self.html.string),
            ('runtime',
             '<td class="time" id="runtime">[^<]+<span style="color\: #999">/</span>([^<]+)</td>',
             self.html.string),
            ('year', '<a href="/lists/m_act%5Byear%5D/([0-9]+)/"',
             int), ('top250', '<a href="/level/20/#([0-9]+)', int)):
            r = re.compile(reg, re.U).search(html)
            if r:
                value = r.group(1).strip()
                if value:
                    res['info'][tag] = cb(value)

        # режисеры, сценаристы, жанры
        for tag, reg in (('director', u'<td itemprop="director">(.+?)</td>'), (
                'writer',
                u'<td class="type">сценарий</td><td[^>]*>(.+?)</td>'),
                         ('genre', u'<span itemprop="genre">(.+?)</span>')):
            r = re.compile(reg, re.U | re.S).search(html)
            if r:
                r2 = []
                for r in re.compile('<a href="[^"]+">([^<]+)</a>',
                                    re.U).findall(r.group(1)):
                    r = self.html.string(r)
                    if r and r != '...':
                        r2.append(r)
                if r2:
                    res['info'][tag] = u', '.join(r2)

        # описание фильма
        r = re.compile(
            '<span class="_reachbanner_"><div class="brand_words film-synopsys" itemprop="description">(.+?)</div></span>',
            re.U).search(html)
        if r:
            plot = self.html.text(r.group(1).replace('<=end=>', '\n'))
            if plot:
                res['info']['plot'] = plot

        # IMDB
        r = re.compile('IMDb: ([0-9.]+) \(([0-9\s]+)\)</div>',
                       re.U).search(html)
        if r:
            res['info']['rating'] = float(r.group(1).strip())
            res['info']['votes'] = r.group(2).strip()

        # премьера
        r = re.compile(u'премьера \(мир\)</td>(.+?)</tr>',
                       re.U | re.S).search(html)
        if r:
            r = re.compile(u'data\-ical\-date="([^"]+)"',
                           re.U | re.S).search(r.group(1))
            if r:
                data = r.group(1).split(' ')
                if len(data) == 3:
                    i = 0
                    for mon in (u'января', u'февраля', u'марта', u'апреля',
                                u'мая', u'июня', u'июля', u'августа',
                                u'сентября', u'октября', u'ноября',
                                u'декабря'):
                        i += 1
                        if mon == data[1]:
                            mon = str(i)
                            if len(mon) == 1:
                                mon = '0' + mon
                            day = data[0]
                            if len(day) == 1:
                                day = '0' + day
                            res['info']['premiered'] = '-'.join(
                                [data[2], mon, day])
                            break

        # постер
        r = re.compile(u'onclick="openImgPopup\(([^\)]+)\)',
                       re.U | re.S).search(html)
        if r:
            poster = r.group(1).replace("'", '').strip()
            if poster:
                res['thumb'] = 'http://kinopoisk.ru' + poster

        # актеры
        r = re.compile(u'<h4>В главных ролях:</h4>(.+?)</ul>',
                       re.U | re.S).search(html)
        if r:
            actors = []
            for r in re.compile(
                    '<li itemprop="actors"><a [^>]+>([^<]+)</a></li>',
                    re.U).findall(r.group(1)):
                r = self.html.string(r)
                if r and r != '...':
                    actors.append(r)
            if actors:
                res['info']['cast'] = actors[:]

        menu = re.compile(
            '<ul id="newMenuSub" class="clearfix(.+?)<!\-\- /menu \-\->',
            re.U | re.S).search(html)
        if menu:
            menu = menu.group(1)

            # фанарт
            if menu.find('/film/' + id + '/wall/') != -1:
                response = self.http.fetch('http://www.kinopoisk.ru/film/' +
                                           id + '/wall/',
                                           headers=self.headers)
                if not response.error:
                    html = response.body.decode('windows-1251')
                    fanart = re.compile(
                        '<a href="/picture/([0-9]+)/w_size/([0-9]+)/">',
                        re.U).findall(html)
                    if fanart:
                        fanart.sort(cmp=lambda (id1, size1),
                                    (id2, size2): cmp(int(size1), int(size2)))

                        # пробуем взять максимально подходящее
                        fanart_best = [x for x in fanart if int(x[1]) <= 1280]
                        if fanart_best:
                            fanart = fanart_best

                        response = self.http.fetch(
                            'http://www.kinopoisk.ru/picture/' +
                            fanart[-1][0] + '/w_size/' + fanart[-1][1] + '/',
                            headers=self.headers)
                        if not response.error:
                            html = response.body.decode('windows-1251')
                            r = re.compile('id="image" src="([^"]+)"',
                                           re.U | re.S).search(html)
                            if r:
                                res['fanart'] = r.group(1).strip()
                                if res['fanart'].startswith('//'):
                                    res['fanart'] = 'http:' + res['fanart']

            # если нет фанарта (обоев), то пробуем получить кадры
            if not res['fanart'] and menu.find('/film/' + id +
                                               '/stills/') != -1:
                response = self.http.fetch('http://www.kinopoisk.ru/film/' +
                                           id + '/stills/',
                                           headers=self.headers)
                if not response.error:
                    html = response.body.decode('windows-1251')
                    fanart = re.compile(
                        '<a href="/picture/([0-9]+)/"><img  src="[^<]+</a>[^<]+<b><i>([0-9]+)&times;([0-9]+)</i>',
                        re.U).findall(html)
                    if fanart:
                        fanart.sort(cmp=lambda (id1, size1, t1), (
                            id2, size2, t2): cmp(int(size1), int(size2)))

                        # пробуем взять максимально подходящее
                        fanart_best = [
                            x for x in fanart
                            if int(x[1]) <= 1280 and int(x[1]) > int(x[2])
                        ]
                        if fanart_best:
                            fanart = fanart_best

                        response = self.http.fetch(
                            'http://www.kinopoisk.ru/picture/' +
                            fanart[-1][0] + '/',
                            headers=self.headers)
                        if not response.error:
                            html = response.body.decode('windows-1251')
                            r = re.compile('id="image" src="([^"]+)"',
                                           re.U | re.S).search(html)
                            if r:
                                res['fanart'] = r.group(1).strip()
                                if res['fanart'].startswith('//'):
                                    res['fanart'] = 'http:' + res['fanart']

            # студии
            if menu.find('/film/' + id + '/studio/') != -1:
                response = self.http.fetch('http://www.kinopoisk.ru/film/' +
                                           id + '/studio/',
                                           headers=self.headers)
                if not response.error:
                    html = response.body.decode('windows-1251')
                    r = re.compile(u'<b>Производство:</b>(.+?)</table>',
                                   re.U | re.S).search(html)
                    if r:
                        studio = []
                        for r in re.compile(
                                '<a href="/lists/m_act%5Bstudio%5D/[0-9]+/" class="all">(.+?)</a>',
                                re.U).findall(r.group(1)):
                            r = self.html.string(r)
                            if r:
                                studio.append(r)
                        if studio:
                            res['info']['studio'] = u', '.join(studio)

            # трэйлеры

            trailers1 = []  # русские трейлеры
            trailers2 = []  # другие русские видео
            trailers3 = []  # трейлеры
            trailers4 = []  # другие видео

            if menu.find('/film/' + id + '/video/') != -1:
                response = self.http.fetch('http://www.kinopoisk.ru/film/' +
                                           id + '/video/',
                                           headers=self.headers)
                if not response.error:
                    html = response.body.decode('windows-1251')

                    for row in re.compile(
                            u'<!-- ролик -->(.+?)<!-- /ролик -->',
                            re.U | re.S).findall(html):

                        # отсекаем лишние блоки
                        if row.find(u'>СМОТРЕТЬ</a>') != -1:

                            # русский ролик?
                            if row.find('class="flag flag2"') == -1:
                                is_ru = False
                            else:
                                is_ru = True

                            # получаем имя трейлера
                            r = re.compile(
                                '<a href="/film/' + id +
                                '/video/[0-9]+/[^>]+ class="all">(.+?)</a>',
                                re.U).search(row)
                            if r:
                                name = self.html.string(r.group(1))
                                if name:

                                    trailer = {
                                        'name': name,
                                        'time': None,
                                        'trailer': False,
                                        'ru': is_ru,
                                        'video': []
                                    }

                                    # трейлер или тизер?
                                    for token in (u'Трейлер', u'трейлер',
                                                  u'Тизер', u'тизер'):
                                        if name.find(token) != -1:
                                            trailer['trailer'] = True
                                            break

                                    # получаем время трейлера
                                    r = re.compile(
                                        u'clock.gif"[^>]+></td>\s*<td style="color\: #777">[^0-9]*([0-9\:]+)</td>',
                                        re.U | re.S).search(row)
                                    if r:
                                        trailer['time'] = r.group(1).strip()

                                    # делим ролики по качеству
                                    for r in re.compile(
                                            'trailer/([1-3])a.gif"(.+?)link=([^"]+)" class="continue">.+?<td style="color\:#777">([^<]+)</td>\s*</tr>',
                                            re.U | re.S).findall(row):
                                        quality = int(r[0])
                                        if r[1].find('icon-hd') != -1:
                                            quality += 3

                                        trailer['video'].append(
                                            (quality, r[2].strip(), r[3]))

                                    if id == '462754':
                                        #raise
                                        pass

                                    if trailer['video']:
                                        if trailer['ru']:
                                            if trailer['trailer']:
                                                trailers1.append(trailer)
                                            else:
                                                trailers2.append(trailer)
                                        else:
                                            if trailer['trailer']:
                                                trailers3.append(trailer)
                                            else:
                                                trailers4.append(trailer)

            # склеиваем трейлеры
            res['trailers'].extend(trailers1)
            res['trailers'].extend(trailers2)
            res['trailers'].extend(trailers3)
            res['trailers'].extend(trailers4)

        timeout = True
        # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте)
        if 'year' not in res['info'] or int(
                res['info']['year']) >= time.gmtime(time.time()).tm_year:
            timeout = 7 * 24 * 60 * 60  #week

        return timeout, res
Exemplo n.º 9
0
class KinoPoisk:
    """
    
    API:
        scraper  - скрапер
        movie    - профайл фильма
        search   - поиск фильма
        best     - поиск лучших фильмов
        person   - поиск персон
        work     - информация о работах персоны
        
    """
    
    def __init__(self):
        self.cache = Cache('kinopoisk.db')
        self.html = Clear()
        
        self.http = HTTP()
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:10.0.2) Gecko/20100101 Firefox/10.0.2',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3',
            'Cache-Control': 'no-cache',
            'Referer': 'http://www.kinopoisk.ru/level/7/'
        }
        
        
    # API
    
    def scraper(self, name, year=None, trailer_quality=None):
        
        try:
            tag = 'scraper:' + urllib.quote_plus(name.encode('windows-1251'))
        except:
            return None
        else:
            
            if year:
                tag += ':' + str(year)
            
            id = self.cache.get(tag, self._scraper, name, year)
            if not id:
                return None
            
            return self.movie(id, trailer_quality)
    
    
    def movie(self, id, trailer_quality=None):
        id = str(id)
        
        if trailer_quality is None:
            trailer_quality = 6
        
        movie = self.cache.get('movie:' + id, self._movie, id)
        if not movie:
            return None
        
        if movie['trailers']:
            # компилируем список с нужным нам качеством
            video = []
            for m in movie['trailers']:
                url = [x for x in m['video'] if x[0] <= trailer_quality]
                if url:
                    m['video'] = url[-1]
                    video.append(m)
            
            movie['trailers'] = video
            
            if movie['trailers']:
                # готовим главный трейлер
                r = [x for x in movie['trailers'] if x['trailer']]
                if r:
                    movie['info']['trailer'] = r[0]['video'][1]
                else:
                    # если трейлер не найден, то отдаем что попало...
                    movie['info']['trailer'] = movie['trailers'][0]['video'][1]
        
        return movie
    
    
    def search(self, name, trailer_quality=None):
        return self._search_movie(name)
    
    
    def best(self, **kwarg):
        page = kwarg.get('page', 1)
        limit = kwarg.get('limit', 50)
        
        url = 'http://www.kinopoisk.ru/top/navigator/m_act%5Bis_film%5D/on/m_act%5Bnum_vote%5D/' + str(kwarg.get('votes', 100)) + '/'
        
        if kwarg.get('dvd'):
            url += 'm_act%5Bis_dvd%5D/on/'
        
        if kwarg.get('decade'):
            url += 'm_act%5Bdecade%5D/' + str(kwarg['decade']) + '/'
        
        if kwarg.get('genre'):
            url += 'm_act%5Bgenre%5D/' + str(GENRE[kwarg['genre']]) + '/'
        
        if kwarg.get('country'):
            url += 'm_act%5Bcountry%5D/' + str(kwarg['country']) + '/'
        
        if kwarg.get('rate'):
            url += 'm_act%5Brating%5D/' + str(kwarg['rate']) + ':/'
        
        if kwarg.get('mpaa'):
            url += 'm_act%5Bmpaa%5D/' + str(kwarg['mpaa']) + '/'
        
        url += 'perpage/' + str(limit) + '/order/ex_rating/'
        
        if page > 1:
            url += 'page/' + str(page) + '/'
        
        response = self.http.fetch(url, headers=self.headers)
        if response.error:
            return None
        
        res = {'pages': (1, 0, 1, 0), 'data': []}
        
        r = re.compile('<div class="pagesFromTo(.+?)<div class="pagesFromTo', re.U|re.S).search(response.body.decode('windows-1251'))
        if r:
            
            body = r.group(1)
            
            # compile pagelist
            p = re.compile('>([0-9]+)&mdash;[0-9]+[^0-9]+?([0-9]+)', re.U).search(body)
            if p:
                page = (int(p.group(1))-1)/limit + 1
                total = int(p.group(2))
                pages = total/limit
                if limit*pages != total:
                    pages += 1
                res['pages'] = (pages, 0 if page == 1 else page-1, page, 0 if page==pages else page+1)
            # end compile
            
            for id in re.compile('<div id="tr_([0-9]+)"', re.U|re.S).findall(body):
                res['data'].append(int(id))
        
        return res
    
    
    def person(self, name):
        response = self.http.fetch('http://www.kinopoisk.ru/s/type/people/list/1/find/' + urllib.quote_plus(name.encode('windows-1251')) + '/order/relevant/', headers=self.headers)
        if response.error:
            return None
        
        res = []
        body = re.compile('<div class="navigator">(.+?)<div class="navigator">', re.U|re.S).search(response.body.decode('windows-1251'))
        if body:
            
            for block in re.compile('<p class="pic">(.+?)<div class="clear">', re.U|re.S).findall(body.group(1)):
                
                id, name, original, year, poster = None, None, None, None, None
                
                r = re.compile('<p class="name"><a href="http://www\.kinopoisk\.ru/level/4/people/([0-9]+)[^>]+>([^<]+)</a>', re.U|re.S).search(block)
                if r:
                    id = r.group(1)
                    name = r.group(2).strip()
                    
                    if id and name:
                        
                        r = re.compile('<span class="gray">([^<]+)</span>', re.U|re.S).search(block)
                        if r:
                            original = r.group(1).strip()
                            if not original:
                                original = None
                        
                        r = re.compile('<span class="year">([0-9]{4})</span>', re.U|re.S).search(block)
                        if r:
                            year = int(r.group(1))
                        
                        if block.find('no-poster.gif') == -1:
                            poster = 'http://st.kinopoisk.ru/images/actor/' + id + '.jpg'
                        
                        res.append({'id': int(id), 'name': name, 'originalname': original, 'year': year, 'poster': poster})
                
        return {'pages': (1, 0, 1, 0), 'data': res}
    
    
    def work(self, id):
        response = self.http.fetch('http://www.kinopoisk.ru/name/' + str(id) + '/', headers=self.headers)
        if response.error:
            return None
        
        res = {}

        r = re.compile('id="sort_block">(.+?)<style>', re.U|re.S).search(response.body.decode('windows-1251'))
        if r:
            for block in r.group(1).split(u'<table cellspacing="0" cellpadding="0" border="0" width="100%">'):
                work = None
                
                for w in ('actor', 'director', 'writer', 'producer', 'producer_ussr', 'composer', 'operator', 'editor', 'design', 'voice', 'voice_director'):
                    if block.find(u'id="' + w + u'"') != -1:
                        work = 'producer' if w == 'producer_ussr' else w
                        break
                
                if work:
                    
                    movies = []
                    
                    for id, name in re.compile('<span class="name"><a href="/film/([0-9]+)/" >([^<]+?)</a>', re.U).findall(block):
                        for tag in (u'(мини-сериал)', u'(сериал)'):
                            if name.find(tag) != -1:
                                break
                        else:
                            movies.append(int(id))
                    
                    if movies:
                        res.setdefault(work, []).extend(movies)
            
        return res
    
    
    def review(self, id, query):
        query_s = 'all' if query == 'stat' else query
        data = self.cache.get('review:' + str(id) + ':' + query_s, self._review, id, query_s)
        if not data:
            return data
        return data[query]
    
    
    def countries(self):
        return COUNTRIES
    
    def country(self, id, default=None):
        country = [x[1] for x in COUNTRIES if x[0] == id]
        return country[0] if country else default
        
    
    # PRIVATE
    
    
    def _search_movie(self, name, year=None):
        url = 'http://www.kinopoisk.ru/s/type/film/list/1/find/' + urllib.quote_plus(name.encode('windows-1251')) + '/order/relevant'
        if year:
            url += '/m_act%5Byear%5D/' + str(year)
        url += '/m_act%5Btype%5D/film/'
        
        response = self.http.fetch(url, headers=self.headers)
        if response.error:
            return None
        
        res = []
        r = re.compile('<div class="navigator">(.+?)<div class="navigator">', re.U|re.S).search(response.body.decode('windows-1251'))
        if r:
            for id in re.compile('<p class="name"><a href="/level/1/film/([0-9]+)', re.U|re.S).findall(r.group(1)):
                res.append(int(id))
        
        return {'pages': (1, 0, 1, 0), 'data': res}
    
        
    def _scraper(self, name, year):
        timeout = True
        
        # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте)
        if year and year >= time.gmtime(time.time()).tm_year:
            timeout = 7*24*60*60 #week
        
        ids = self._search_movie(name, year)
        
        if ids is None:
            return False, None
        
        elif not ids['data']:
            # сохраняем пустой результат на 3-е суток
            return 259200, None
        
        else:
            return timeout, ids['data'][0]
    
    
    def _review(self, id, query):
        url = 'http://www.kinopoisk.ru/film/' + str(id) + '/ord/rating/'
        if query in ('good', 'bad', 'neutral'):
            url += 'status/' + query + '/'
        url += 'perpage/200/'
        
        response = self.http.fetch(url, headers=self.headers)
        if response.error:
            return False, None
        
        html = response.body.decode('windows-1251')
        
        res = {
            'stat': {'all': 0, 'good': 0, 'bad': 0, 'neutral': 0},
            query: []
        }
        
        r = re.compile('<ul class="resp_type">(.+?)</ul>', re.U|re.S).search(html)
        if r:
            ul = r.group(1)
            
            for q, t in (('pos', 'good'), ('neg', 'bad'), ('neut', 'neutral')):
                r = re.compile('<li class="' + q + '"><a href="[^>]+>[^<]+</a><b>([0-9]+)</b></li>', re.U).search(ul)
                if r:
                    res['stat'][t] = int(r.group(1))
            
            res['stat']['all'] = res['stat']['good'] + res['stat']['bad'] + res['stat']['neutral']
        
        r = re.compile('<div class="navigator">(.+?)<div class="navigator">', re.U|re.S).search(html)
        if r:
            
            for block in r.group(1).split('itemprop="reviews"'):
                
                review = {
                    'nick': None,
                    'count': None,
                    'title': None,
                    'review': None,
                    'time': None
                }
                
                
                r = re.compile('itemprop="reviewBody">(.+?)</div>', re.U|re.S).search(block)
                if r:
                    
                    text = r.group(1)
                    for tag1, tag2 in ((u'<=end=>', u'\n'), (u'<b>', u'[B]'), (u'</b>', u'[/B]'), (u'<i>', u'[I]'), (u'</i>', u'[/I]'), (u'<u>', u'[U]'), (u'</u>', u'[/U]')):
                        text = text.replace(tag1, tag2)
                        
                    r = self.html.text(text)
                    if r:
                        review['review'] = r
                
                
                user = None
                r = re.compile('<p class="profile_name"><s></s><a href="[^>]+>([^<]+)</a></p>').search(block)
                if r:
                    user = self.html.string(r.group(1))
                else:
                    r = re.compile('<p class="profile_name"><s></s>([^<]+)</p>').search(block)
                    if r:
                        user = self.html.string(r.group(1))
                if user:
                    review['nick'] = user
                            
                
                r = re.compile('<p class="sub_title"[^>]+>([^<]+)</p>').search(block)
                if r:
                    title = self.html.string(r.group(1))
                    if title:
                        review['title'] = title
                
                
                r = re.compile('<span class="date">([^<]+)</span>', re.U|re.S).search(block)
                if r:
                    review['time'] = r.group(1).replace(u' |', u',')
                
                
                r = re.compile(u'<a href="[^>]+>рецензии \(([0-9]+)\)</a>', re.U|re.S).search(block)
                if r:
                    review['count'] = int(r.group(1))
                
                
                if review['nick'] and review['review']:
                    res[query].append(review)
        
        return 3600, res # one hour
            
    
    def _movie(self, id):
        response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/', headers=self.headers)
        if response.error:
            return False, None
        
        html = response.body.decode('windows-1251')
        
        res = {
            'id': int(id),
            'thumb': None,
            'fanart': None,
            'trailers': [],
            'info': {}
        }
        
        # имя, оригинальное имя, девиз, цензура, год, top250
        # runtime - длительность фильма (в отдельную переменную, иначе не видно размер файла)
        for tag, reg, t in (
            ('title', '<title>(.+?)</title>', 'str'),
            ('originaltitle', 'itemprop="alternativeHeadline">([^<]*)</span>', 'str'),
            ('tagline', '<td style="color\: #555">&laquo;(.+?)&raquo;</td></tr>', 'str'),
            ('mpaa', 'images/mpaa/([^\.]+).gif', 'str'),
            ('runtime', '<td class="time" id="runtime">[^<]+<span style="color\: #999">/</span>([^<]+)</td>', 'str'),
            ('year', '<a href="/lists/m_act%5Byear%5D/([0-9]+)/"', 'int'),
            ('top250', 'Топ250\: <a\shref="/level/20/#([0-9]+)', 'int')
            
            ):
            r = re.compile(reg, re.U).search(html)
            if r:
                value = r.group(1).strip()
                if value:
                    res['info'][tag] = value
                    if t == 'int':
                        res['info'][tag] = int(res['info'][tag])
                    else:
                        res['info'][tag] = self.html.string(res['info'][tag])
        
        # режисеры, сценаристы, жанры
        for tag, reg in (
            ('director', u'<td itemprop="director">(.+?)</td>'),
            ('writer', u'<td class="type">сценарий</td><td[^>]*>(.+?)</td>'),
            ('genre', u'<span itemprop="genre">(.+?)</span>')
            ):
            r = re.compile(reg, re.U|re.S).search(html)
            if r:
                r2 = []
                for r in re.compile('<a href="[^"]+">([^<]+)</a>', re.U).findall(r.group(1)):
                    r = self.html.string(r)
                    if r and r != '...':
                        r2.append(r)
                if r2:
                    res['info'][tag] = u', '.join(r2)
        
        # актеры
        r = re.compile(u'<h4>В главных ролях:</h4>(.+?)</ul>', re.U|re.S).search(html)
        if r:
            actors = []
            for r in re.compile('<li itemprop="actors"><a [^>]+>([^<]+)</a></li>', re.U).findall(r.group(1)):
                r = self.html.string(r)
                if r and r != '...':
                    actors.append(r)
            if actors:
                res['info']['cast'] = actors[:]
                #res['info']['castandrole'] = actors[:]

        # описание фильма
        r = re.compile('<span class="_reachbanner_"><div class="brand_words" itemprop="description">(.+?)</div></span>', re.U).search(html)
        if r:
            plot = self.html.text(r.group(1).replace('<=end=>', '\n'))
            if plot:
                res['info']['plot'] = plot
        
        # IMDB
        r = re.compile('IMDb: ([0-9.]+) \(([0-9\s]+)\)</div>', re.U).search(html)
        if r:
            res['info']['rating'] = float(r.group(1).strip())
            res['info']['votes'] = r.group(2).strip()

        
        # премьера
        r = re.compile(u'премьера \(мир\)</td>(.+?)</tr>', re.U|re.S).search(html)
        if r:
            r = re.compile(u'data\-ical\-date="([^"]+)"', re.U|re.S).search(r.group(1))
            if r:
                data = r.group(1).split(' ')
                if len(data) == 3:
                    i = 0
                    for mon in (u'января', u'февраля', u'марта', u'апреля', u'мая', u'июня', u'июля', u'августа', u'сентября', u'октября', u'ноября', u'декабря'):
                        i += 1
                        if mon == data[1]:
                            mon = str(i)
                            if len(mon) == 1:
                                mon = '0' + mon
                            day = data[0]
                            if len(day) == 1:
                                day = '0' + day
                            res['info']['premiered'] = '-'.join([data[2], mon, day])
                            break


        # постер
        r = re.compile(u'onclick="openImgPopup\(([^\)]+)\)', re.U|re.S).search(html)
        if r:
            poster = r.group(1).replace("'", '').strip()
            if poster:
                res['thumb'] = 'http://kinopoisk.ru' + poster

        menu = re.compile('<ul id="newMenuSub" class="clearfix(.+?)<!\-\- /menu \-\->', re.U|re.S).search(html)
        if menu:
            menu = menu.group(1)

            # фанарт
            if menu.find('/film/' + id + '/wall/') != -1:
                response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/wall/', headers=self.headers)
                if not response.error:
                    html = response.body.decode('windows-1251')
                    fanart = re.compile('<a href="/picture/([0-9]+)/w_size/([0-9]+)/">', re.U).findall(html)
                    if fanart:
                        fanart.sort(cmp=lambda (id1, size1), (id2, size2): cmp(int(size1), int(size2)))

                        # пробуем взять максимально подходящее
                        fanart_best = [x for x in fanart if int(x[1]) <= 1280]
                        if fanart_best:
                            fanart = fanart_best

                        response = self.http.fetch('http://www.kinopoisk.ru/picture/' + fanart[-1][0] + '/w_size/' + fanart[-1][1] + '/', headers=self.headers)
                        if not response.error:
                            html = response.body.decode('windows-1251')
                            r = re.compile('id="image" src="([^"]+)"', re.U|re.S).search(html)
                            if r:
                                res['fanart'] = r.group(1).strip()
                                

            # если нет фанарта (обоев), то пробуем получить кадры
            if not res['fanart'] and menu.find('/film/' + id + '/stills/') != -1:
                response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/stills/', headers=self.headers)
                if not response.error:
                    html = response.body.decode('windows-1251')
                    fanart = re.compile('<a href="/picture/([0-9]+)/"><img  src="[^<]+</a>[^<]+<b><i>([0-9]+)&times;([0-9]+)</i>', re.U).findall(html)
                    if fanart:
                        fanart.sort(cmp=lambda (id1, size1, t1), (id2, size2, t2): cmp(int(size1), int(size2)))

                        # пробуем взять максимально подходящее
                        fanart_best = [x for x in fanart if int(x[1]) <= 1280 and int(x[1]) > int(x[2])]
                        if fanart_best:
                            fanart = fanart_best

                        response = self.http.fetch('http://www.kinopoisk.ru/picture/' + fanart[-1][0] + '/', headers=self.headers)
                        if not response.error:
                            html = response.body.decode('windows-1251')
                            r = re.compile('id="image" src="([^"]+)"', re.U|re.S).search(html)
                            if r:
                                res['fanart'] = r.group(1).strip()

            
            # студии
            if menu.find('/film/' + id + '/studio/') != -1:
                response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/studio/', headers=self.headers)
                if not response.error:
                    html = response.body.decode('windows-1251')
                    r = re.compile(u'<b>Производство:</b>(.+?)</table>', re.U|re.S).search(html)
                    if r:
                        studio = []
                        for r in re.compile('<a href="/lists/m_act%5Bstudio%5D/[0-9]+/" class="all">(.+?)</a>', re.U).findall(r.group(1)):
                            r = self.html.string(r)
                            if r:
                                studio.append(r)
                        if studio:
                            res['info']['studio'] = u', '.join(studio)


            # трэйлеры
            
            trailers1 = [] # русские трейлеры
            trailers2 = [] # другие русские видео
            trailers3 = [] # трейлеры
            trailers4 = [] # другие видео
            
            if menu.find('/film/' + id + '/video/') != -1:
                response = self.http.fetch('http://www.kinopoisk.ru/film/' + id + '/video/', headers=self.headers)
                if not response.error:
                    html = response.body.decode('windows-1251')
                    
                    for row in re.compile(u'<!-- ролик -->(.+?)<!-- /ролик -->', re.U|re.S).findall(html):
                        
                        # отсекаем лишние блоки
                        if row.find(u'>СМОТРЕТЬ</a>') != -1:

                            # русский ролик?
                            if row.find('class="flag flag2"') == -1:
                                is_ru = False
                            else:
                                is_ru = True
                            
                            # получаем имя трейлера
                            r = re.compile('<a href="/film/' + id + '/video/[0-9]+/[^>]+ class="all">(.+?)</a>', re.U).search(row)
                            if r:
                                name = self.html.string(r.group(1))
                                if name:
                                    
                                    trailer = {
                                        'name': name,
                                        'time': None,
                                        'trailer': False,
                                        'ru': is_ru,
                                        'video': []
                                    }
                                    
                                    # трейлер или тизер?
                                    for token in (u'Трейлер', u'трейлер', u'Тизер', u'тизер'):
                                        if name.find(token) != -1:
                                            trailer['trailer'] = True
                                            break
                                    
                                    # получаем время трейлера
                                    r = re.compile(u'clock.gif"[^>]+></td>\s*<td style="color\: #777">[^0-9]*([0-9\:]+)</td>', re.U|re.S).search(row)
                                    if r:
                                        trailer['time'] = r.group(1).strip()
                                    print 'F**K'
                                    # делим ролики по качеству
                                    for r in re.compile('trailer/([1-3])a.gif"(.+?)link=([^"]+)" class="continue">.+?<td style="color\:#777">([^<]+)</td>\s*</tr>', re.U|re.S).findall(row):
                                        print str(r)
                                        quality = int(r[0])
                                        if r[1].find('icon-hd') != -1:
                                            quality += 3
                                        
                                        trailer['video'].append((quality, r[2].strip(), r[3]))

                                    print str(trailer)
                                    if id == '462754':
                                        #raise
                                        pass
                                    
                                    if trailer['video']:
                                        if trailer['ru']:
                                            if trailer['trailer']:
                                                trailers1.append(trailer)
                                            else:
                                                trailers2.append(trailer)
                                        else:
                                            if trailer['trailer']:
                                                trailers3.append(trailer)
                                            else:
                                                trailers4.append(trailer)
            
            # склеиваем трейлеры
            res['trailers'].extend(trailers1)
            res['trailers'].extend(trailers2)
            res['trailers'].extend(trailers3)
            res['trailers'].extend(trailers4)
        
        timeout = True
        # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте)
        if 'year' not in res['info'] or int(res['info']['year']) >= time.gmtime(time.time()).tm_year:
            timeout = 7*24*60*60 #week
        
        return timeout, res
Exemplo n.º 10
0
class TvDb:
    """
    
    API:
        scraper  - скрапер
        search   - поиск сериалов
        movie    - профайл фильма
        
    """
    def __init__(self):
        self.api_key = '1D62F2F90030C444'

        self.cache = Cache('tvdb.db')

        self.http = HTTP()
        self.headers = {
            'User-Agent':
            'Mozilla/5.0 (Windows NT 6.1; rv:10.0.2) Gecko/20100101 Firefox/10.0.2',
            'Accept':
            'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3',
            'Cache-Control': 'no-cache',
            'Referer': 'http://www.thetvdb.com/'
        }

    # API

    def scraper(self, name, year=None):
        try:
            tag = 'scraper:' + urllib.quote_plus(name.encode('utf8'))
        except:
            return None
        else:

            if year:
                tag += ':' + str(year)

            id = self.cache.get(tag, self._scraper, name, year)
            if not id:
                return None

            return self.movie(id)

    def search(self, name):
        return self._search(name)

    def movie(self, id):
        id = str(id)
        return self.cache.get('movie:' + id, self._movie, id)

    def _movie(self, id):
        dirname = tempfile.mkdtemp()
        response = self.http.fetch('http://www.thetvdb.com/api/' +
                                   self.api_key + '/series/' + id +
                                   '/all/ru.zip',
                                   headers=self.headers,
                                   download=os.path.join(dirname, 'movie.zip'))
        if response.error:
            self._movie_clear(dirname)
            return False, None

        try:
            filezip = zipfile.ZipFile(os.path.join(dirname, 'movie.zip'), 'r')
            filezip.extractall(dirname)
            filezip.close()
            movie = file(os.path.join(dirname, 'ru.xml'),
                         'rb').read().decode('utf8')
        except:
            self._movie_clear(dirname)
            return False, None

        self._movie_clear(dirname)

        body = re.compile(r'<Series>(.+?)</Series>', re.U | re.S).search(movie)
        if not body:
            return False, None

        body = body.group(1)

        res = {
            'id': int(id),
            'thumb': None,
            'fanart': None,
            'trailers': [],
            'info': {}
        }

        # режисеры и сценаристы
        for tag in ('Director', 'Writer'):
            people = {}
            people_list = []
            [
                people_list.extend(x.split('|'))
                for x in re.compile(r'<' + tag + r'>([^<]+)</' + tag +
                                    r'>', re.U | re.S).findall(movie)
            ]
            [
                people.update({x: 1})
                for x in [x.strip() for x in people_list] if x
            ]
            if people:
                res['info'][tag.lower()] = u', '.join(
                    [x for x in people.keys() if x])

        for tag, retag, typeof in (('plot', 'Overview',
                                    None), ('mpaa', 'ContentRating', None),
                                   ('premiered', 'FirstAired',
                                    None), ('studio', 'Network', None),
                                   ('title', 'SeriesName',
                                    None), ('runtime', 'Runtime', None),
                                   ('votes', 'RatingCount',
                                    None), ('rating', 'Rating', float),
                                   ('genre', 'Genre', list), ('cast', 'Actors',
                                                              list)):
            r = re.compile(r'<' + retag + r'>([^<]+)</' + retag + r'>',
                           re.U | re.S).search(body)
            if r:
                r = r.group(1).strip()
                if typeof == float:
                    res['info'][tag] = float(r)
                elif typeof == list:
                    res['info'][tag] = [
                        x for x in [x.strip() for x in r.split(u'|')] if x
                    ]
                    if tag == 'genre':
                        res['info'][tag] = u', '.join(res['info'][tag])
                else:
                    res['info'][tag] = r

        # год
        if 'premiered' in res['info']:
            res['info']['year'] = int(res['info']['premiered'].split('-')[0])

        # постер
        r = re.compile(r'<poster>([^<]+)</poster>', re.U | re.S).search(body)
        if r:
            res['thumb'] = 'http://thetvdb.com/banners/' + r.group(1).strip()

        # фанарт
        r = re.compile(r'<fanart>([^<]+)</fanart>', re.U | re.S).search(body)
        if r:
            res['fanart'] = 'http://thetvdb.com/banners/' + r.group(1).strip()

        timeout = True
        # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте)
        if 'year' not in res['info'] or int(
                res['info']['year']) >= time.gmtime(time.time()).tm_year:
            timeout = 7 * 24 * 60 * 60  #week

        return timeout, res

    def _movie_clear(self, dirname):
        for filename in os.listdir(dirname):
            try:
                os.unlink(os.path.join(dirname, filename))
            except:
                raise
        try:
            os.rmdir(dirname)
        except:
            raise

    def _search(self, name):
        response = self.http.fetch(
            'http://www.thetvdb.com/api/GetSeries.php?language=ru&seriesname='
            + urllib.quote_plus(name.encode('utf8')),
            headers=self.headers)
        if response.error:
            return None

        res = []
        rows = re.compile('<Series>(.+?)</Series>',
                          re.U | re.S).findall(response.body.decode('utf8'))
        if rows:

            recmd = re.compile('<seriesid>([0-9]+)</seriesid>', re.U | re.S)

            for row in [
                    x for x in rows if x.find(u'<language>ru</language>') != -1
            ]:
                r = recmd.search(row)
                if r:
                    res.append(int(r.group(1)))

        return {'pages': (1, 0, 1, 0), 'data': res}

    def _scraper(self, name, year):
        timeout = True

        # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте)
        if year and year >= time.gmtime(time.time()).tm_year:
            timeout = 7 * 24 * 60 * 60  #week

        ids = self._search(name)

        if ids is None:
            return False, None

        elif not ids['data']:
            # сохраняем пустой результат на 3-е суток
            return 259200, None

        else:
            return timeout, ids['data'][0]
Exemplo n.º 11
0
class RuTrackerHTTP:
    def __init__(self):
        self.setting = Setting()
        self.re_auth = re.compile(r'"profile\.php\?mode=sendpassword"')
        self.http = HTTP()
        
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:10.0.2) Gecko/20100101 Firefox/10.0.2',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3',
            'Cache-Control': 'no-cache',
            'Referer': 'http://rutracker.org/forum/index.php'
        }
    
    def guest(self, url):
        response = self.http.fetch(url, headers=self.headers)
        if response.error:
            return None
        else:
            body = response.body.decode('windows-1251')
            if body.find(u'>форум временно отключен</p>') != -1:
                return 0
            return body
    
    def get(self, url):
        return self._fetch('GET', url)
    
    def post(self, url, params):
        return self._fetch('POST', url, params)
    
    def download(self, id):
        id = str(id)
        
        # проверяем авторизацию
        html = self.get('http://rutracker.org/forum/viewtopic.php?t=' + id)
        if not html:
            return html
        
        # хакаем куки
        cookies = cookielib.MozillaCookieJar()
        cookies.load(self.http.request.cookies)
        cookies.set_cookie(cookielib.Cookie(version=0, name='bb_dl', value=id, port=None, port_specified=False, domain='.rutracker.org', domain_specified=False, domain_initial_dot=False, path='/', path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False))
        cookies.save(self.http.request.cookies, ignore_discard=True, ignore_expires=True)
        
        # тянем торрент
        response = self.http.fetch('http://dl.rutracker.org/forum/dl.php?t=' + id, cookies='rutracker.moz', headers=self.headers, method='POST')
        if response.error:
            return None
        else:
            return response.body
    
    
    def _fetch(self, method, url, params=None):
        while True:
            response = self.http.fetch(url, cookies='rutracker.moz', headers=self.headers, method=method, params=params)
            if response.error:
                return None
            else:
                body = response.body.decode('windows-1251')
                if body.find(u'>форум временно отключен</p>') != -1:
                    return 0
                if not self.re_auth.search(body):
                    return body
                else:
                    xbmc.log('RUTRACKER: Request auth', xbmc.LOGDEBUG)
                    auth = self._auth()
                    if not auth:
                        return auth
        
    def _auth(self):
        while True:
            login = self.setting['rutracker_login']
            password = self.setting['rutracker_password']
            if not login or not password:
                login, password = self._setting(login, password)
                if not login:
                    return False
            
            response = self.http.fetch('http://login.rutracker.org/forum/login.php', cookies='rutracker.moz', headers=self.headers, method='POST', params={'login_username': login, 'login_password': password, 'login': r'Вход'})
            if response.error:
                return False
            else:
                body = response.body.decode('windows-1251')
                if body.find(u'>форум временно отключен</p>') != -1:
                    return 0
                if not self.re_auth.search(body):
                    return True
                else:
                    login, password = self._setting(login, password)
                    if not login:
                        return False
            
    
    def _setting(self, login, password):
        self.setting.dialog()
        login2 = self.setting['rutracker_login']
        password2 = self.setting['rutracker_password']
        if login == login2 and password == password2:
            return None, None
        else:
            return login2, password2
Exemplo n.º 12
0
class TvDb:
    """
    
    API:
        scraper  - скрапер
        search   - поиск сериалов
        movie    - профайл фильма
        
    """
    
    def __init__(self):
        self.api_key = '1D62F2F90030C444'
        
        self.cache = Cache('tvdb.db')
        
        self.http = HTTP()
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; rv:10.0.2) Gecko/20100101 Firefox/10.0.2',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3',
            'Cache-Control': 'no-cache',
            'Referer': 'http://www.thetvdb.com/'
        }
        
        
    # API
    
    def scraper(self, name, year=None):
        try:
            tag = 'scraper:' + urllib.quote_plus(name.encode('utf8'))
        except:
            return None
        else:
            
            if year:
                tag += ':' + str(year)
            
            id = self.cache.get(tag, self._scraper, name, year)
            if not id:
                return None
            
            return self.movie(id)
        
    def search(self, name):
        return self._search(name)
    
    
    def movie(self, id):
        id = str(id)
        return self.cache.get('movie:' + id, self._movie, id)
    
    
    def _movie(self, id):
        dirname = tempfile.mkdtemp()
        response = self.http.fetch('http://www.thetvdb.com/api/' + self.api_key + '/series/' + id + '/all/ru.zip', headers=self.headers, download=os.path.join(dirname, 'movie.zip'))
        if response.error:
            self._movie_clear(dirname)
            return False, None
        
        try:
            filezip = zipfile.ZipFile(os.path.join(dirname, 'movie.zip'), 'r')
            filezip.extractall(dirname)
            filezip.close()
            movie = file(os.path.join(dirname, 'ru.xml'), 'rb').read().decode('utf8')
        except:
            self._movie_clear(dirname)
            return False, None
        
        self._movie_clear(dirname)
        
        body = re.compile(r'<Series>(.+?)</Series>', re.U|re.S).search(movie)
        if not body:
            return False, None
        
        body = body.group(1)
        
        res = {
            'id': int(id),
            'thumb': None,
            'fanart': None,
            'trailers': [],
            'info': {}
        }
        
        # режисеры и сценаристы
        for tag in ('Director', 'Writer'):
            people = {}
            people_list = []
            [people_list.extend(x.split('|')) for x in re.compile(r'<' + tag + r'>([^<]+)</' + tag + r'>', re.U|re.S).findall(movie)]
            [people.update({x: 1}) for x in [x.strip() for x in people_list] if x]
            if people:
                res['info'][tag.lower()] = u', '.join([x for x in people.keys() if x])
        
        for tag, retag, typeof in (
                    ('plot', 'Overview', None),
                    ('mpaa', 'ContentRating', None),
                    ('premiered', 'FirstAired', None),
                    ('studio', 'Network', None),
                    ('title', 'SeriesName', None),
                    ('runtime', 'Runtime', None),
                    ('votes', 'RatingCount', None),
                    ('rating', 'Rating', float),
                    ('genre', 'Genre', list),
                    ('cast', 'Actors', list)
                    ):
            r = re.compile(r'<' + retag + r'>([^<]+)</' + retag + r'>', re.U|re.S).search(body)
            if r:
                r = r.group(1).strip()
                if typeof == float:
                    res['info'][tag] = float(r)
                elif typeof == list:
                    res['info'][tag] = [x for x in [x.strip() for x in r.split(u'|')] if x]
                    if tag == 'genre':
                        res['info'][tag] = u', '.join(res['info'][tag])
                else:
                    res['info'][tag] = r
        
        # год
        if 'premiered' in res['info']:
            res['info']['year'] = int(res['info']['premiered'].split('-')[0])
        
        # постер
        r = re.compile(r'<poster>([^<]+)</poster>', re.U|re.S).search(body)
        if r:
            res['thumb'] = 'http://thetvdb.com/banners/' + r.group(1).strip()
        
        # фанарт
        r = re.compile(r'<fanart>([^<]+)</fanart>', re.U|re.S).search(body)
        if r:
            res['fanart'] = 'http://thetvdb.com/banners/' + r.group(1).strip()
        
        timeout = True
        # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте)
        if 'year' not in res['info'] or int(res['info']['year']) >= time.gmtime(time.time()).tm_year:
            timeout = 7*24*60*60 #week
        
        return timeout, res
            
    
    def _movie_clear(self, dirname):
        for filename in os.listdir(dirname):
            try:
                os.unlink(os.path.join(dirname, filename))
            except:
                raise
        try:
            os.rmdir(dirname)
        except:
            raise
        
    
    def _search(self, name):
        response = self.http.fetch('http://www.thetvdb.com/api/GetSeries.php?language=ru&seriesname=' + urllib.quote_plus(name.encode('utf8')), headers=self.headers)
        if response.error:
            return None
        
        res = []
        rows = re.compile('<Series>(.+?)</Series>', re.U|re.S).findall(response.body.decode('utf8'))
        if rows:
            
            recmd = re.compile('<seriesid>([0-9]+)</seriesid>', re.U|re.S)
            
            for row in [x for x in rows if x.find(u'<language>ru</language>') != -1]:
                r = recmd.search(row)
                if r:
                    res.append(int(r.group(1)))
                
        return {'pages': (1, 0, 1, 0), 'data': res}
    
    
    def _scraper(self, name, year):
        timeout = True
        
        # если фильм свежий, то кладем в кэш НЕ на долго (могут быть обновления на сайте)
        if year and year >= time.gmtime(time.time()).tm_year:
            timeout = 7*24*60*60 #week
        
        ids = self._search(name)
        
        if ids is None:
            return False, None
        
        elif not ids['data']:
            # сохраняем пустой результат на 3-е суток
            return 259200, None
        
        else:
            return timeout, ids['data'][0]
Exemplo n.º 13
0
class RuTrackerHTTP:
    def __init__(self):
        self.setting = Setting()
        self.re_auth = re.compile(r'profile\.php\?mode=sendpassword"')
        self.re_captcha = re.compile(r'<img src="(\/\/[^\/]+/captcha/[^"]+)"')
        self.re_captcha_sid = re.compile(
            r'<input type="hidden" name="cap_sid" value="([^"]+)">')
        self.re_captcha_code = re.compile(
            r'<input type="text" name="(cap_code_[^"]+)"')
        self.captcha_sid = None
        self.captcha_code = None
        self.captcha_code_value = None
        self.http = HTTP()

        self.headers = {
            'User-Agent':
            'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36',
            'Accept':
            'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3',
            'Cache-Control': 'no-cache',
            'Referer': 'http://rutracker.lib/forum/index.php'
        }

    def guest(self, url):
        response = self.http.fetch(url, headers=self.headers)
        if response.error:
            return None
        else:
            body = response.body.decode('windows-1251')
            if body.find(u'>форум временно отключен</p>') != -1:
                return 0
            return body

    def get(self, url):
        return self._fetch('GET', url)

    def post(self, url, params):
        return self._fetch('POST', url, params)

    def download(self, id):
        id = str(id)

        # проверяем авторизацию
        html = self.get('http://rutracker.lib/forum/viewtopic.php?t=' + id)
        if not html:
            return html

        # хакаем куки
        cookies = cookielib.MozillaCookieJar()
        cookies.load(self.http.request.cookies)
        cookies.set_cookie(
            cookielib.Cookie(version=0,
                             name='bb_dl',
                             value=id,
                             port=None,
                             port_specified=False,
                             domain='.rutracker.lib',
                             domain_specified=False,
                             domain_initial_dot=False,
                             path='/',
                             path_specified=True,
                             secure=False,
                             expires=None,
                             discard=True,
                             comment=None,
                             comment_url=None,
                             rest={'HttpOnly': None},
                             rfc2109=False))
        cookies.save(self.http.request.cookies,
                     ignore_discard=True,
                     ignore_expires=True)

        # тянем торрент
        response = self.http.fetch('http://rutracker.lib/forum/dl.php?t=' + id,
                                   cookies='rutracker.moz',
                                   headers=self.headers,
                                   method='POST')
        if response.error:
            return None
        else:
            return response.body

    def _fetch(self, method, url, params=None):
        while True:
            response = self.http.fetch(url,
                                       cookies='rutracker.moz',
                                       headers=self.headers,
                                       method=method,
                                       params=params)
            if response.error:
                return None
            else:
                body = response.body.decode('windows-1251')
                if body.find(u'>форум временно отключен</p>') != -1:
                    return 0
                if not self.re_auth.search(body):
                    return body
                xbmc.log('RUTRACKER: Request auth', xbmc.LOGDEBUG)
                auth = self._auth()
                if not auth:
                    return auth

    def _auth(self):
        self.captcha_sid, self.captcha_code, self.captcha_code_value = None, None, None
        while True:
            login = self.setting['rutracker_login']
            password = self.setting['rutracker_password']
            if not login or not password:
                self.setting.dialog()
                login = self.setting['rutracker_login']
                password = self.setting['rutracker_password']
                if not login or not password:
                    return None

            params = {
                'login_username': login,
                'login_password': password,
                'login': r'вход'
            }
            if self.captcha_sid:
                params['login'] = r'Вход'
                params['cap_sid'] = self.captcha_sid
                params[self.captcha_code] = self.captcha_code_value

            response = self.http.fetch('http://rutracker.lib/forum/login.php',
                                       cookies='rutracker.moz',
                                       headers=self.headers,
                                       method='POST',
                                       params=params)
            self.captcha_sid, self.captcha_code, self.captcha_code_value = None, None, None
            if response.error:
                return None

            body = response.body.decode('windows-1251')

            if body.find(u'>форум временно отключен</p>') != -1:
                return 0

            if not self.re_auth.search(body):
                return True

            # проверяем капчу
            r = self.re_captcha.search(body)
            if r:
                r_sid = self.re_captcha_sid.search(body)
                if not r_sid:
                    return None
                self.captcha_sid = r_sid.group(1)
                r_code = self.re_captcha_code.search(body)
                if not r_code:
                    return None
                self.captcha_code = r_code.group(1)
                self.captcha_code_value = self._captcha('http:' + r.group(1))
                if not self.captcha_code_value:
                    return None

            # get login
            k = xbmc.Keyboard('', 'Enter login')
            k.doModal()
            if k.isConfirmed():
                login = k.getText()
            else:
                return None

            # get password
            k = xbmc.Keyboard('', 'Enter password', True)
            k.doModal()
            if k.isConfirmed():
                password = k.getText()
            else:
                return None

            if not login or not password:
                return None

            self.setting['rutracker_login'] = login
            self.setting['rutracker_password'] = password

    def _captcha(self, captcha):
        response = self.http.fetch(captcha, headers=self.headers, method='GET')
        if response.error:
            return

        import tempfile
        filename = tempfile.gettempdir() + '/captcha'
        file(filename, 'wb').write(response.body)

        win = xbmcgui.Window(xbmcgui.getCurrentWindowId())

        # width = 120px, height = 72px
        image = xbmcgui.ControlImage(win.getWidth() / 2 - int(120 / 2), 20,
                                     120, 72, filename)
        win.addControl(image)
        k = xbmc.Keyboard('', 'Enter captcha code')
        k.doModal()
        code = k.getText() if k.isConfirmed() else None
        win.removeControl(image)
        return code if code else None
Exemplo n.º 14
0
class RuTrackerHTTP:
    def __init__(self):
        self.setting = Setting()
        self.re_auth = re.compile(r'profile\.php\?mode=sendpassword"')
        self.re_captcha = re.compile(r'<img src="(\/\/[^\/]+/captcha/[^"]+)"')
        self.re_captcha_sid = re.compile(r'<input type="hidden" name="cap_sid" value="([^"]+)">')
        self.re_captcha_code = re.compile(r'<input type="text" name="(cap_code_[^"]+)"')
        self.captcha_sid = None
        self.captcha_code = None
        self.captcha_code_value = None
        self.http = HTTP()
        
        self.headers = {
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36',
            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'ru-ru,ru;q=0.8,en-us;q=0.5,en;q=0.3',
            'Cache-Control': 'no-cache',
            'Referer': 'http://rutracker.nl/forum/index.php'
        }
    
    def guest(self, url):
        response = self.http.fetch(url, headers=self.headers)
        if response.error:
            return None
        else:
            body = response.body.decode('windows-1251')
            if body.find(u'>форум временно отключен</p>') != -1:
                return 0
            return body
    
    def get(self, url):
        return self._fetch('GET', url)
    
    def post(self, url, params):
        return self._fetch('POST', url, params)
    
    def download(self, id):
        id = str(id)

        # проверяем авторизацию
        html = self.get('http://rutracker.nl/forum/viewtopic.php?t=' + id)
        if not html:
            return html

        # хакаем куки
        cookies = cookielib.MozillaCookieJar()
        cookies.load(self.http.request.cookies)
        cookies.set_cookie(cookielib.Cookie(version=0, name='bb_dl', value=id, port=None, port_specified=False, domain='.rutracker.nl', domain_specified=False, domain_initial_dot=False, path='/', path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False))
        cookies.save(self.http.request.cookies, ignore_discard=True, ignore_expires=True)

        # тянем торрент
        response = self.http.fetch('http://rutracker.nl/forum/dl.php?t=' + id, cookies='rutracker.moz', headers=self.headers, method='POST')
        if response.error:
            return None
        else:
            return response.body
    
    
    def _fetch(self, method, url, params=None):
        while True:
            response = self.http.fetch(url, cookies='rutracker.moz', headers=self.headers, method=method, params=params)
            if response.error:
                return None
            else:
                body = response.body.decode('windows-1251')
                if body.find(u'>форум временно отключен</p>') != -1:
                    return 0
                if not self.re_auth.search(body):
                    return body
                xbmc.log('RUTRACKER: Request auth', xbmc.LOGDEBUG)
                auth = self._auth()
                if not auth:
                    return auth
        
    def _auth(self):
        self.captcha_sid, self.captcha_code, self.captcha_code_value = None, None, None
        while True:
            login = self.setting['rutracker_login']
            password = self.setting['rutracker_password']
            if not login or not password:
                self.setting.dialog()
                login = self.setting['rutracker_login']
                password = self.setting['rutracker_password']
                if not login or not password:
                    return None

            params = {'login_username': login, 'login_password': password, 'login': r'вход'}
            if self.captcha_sid:
                params['login'] = r'Вход'
                params['cap_sid'] = self.captcha_sid
                params[self.captcha_code] = self.captcha_code_value

            response = self.http.fetch('http://rutracker.nl/forum/login.php', cookies='rutracker.moz', headers=self.headers, method='POST', params=params)
            self.captcha_sid, self.captcha_code, self.captcha_code_value = None, None, None
            if response.error:
                return None

            body = response.body.decode('windows-1251')

            if body.find(u'>форум временно отключен</p>') != -1:
                return 0

            if not self.re_auth.search(body):
                return True

            # проверяем капчу
            r = self.re_captcha.search(body)
            if r:
                r_sid = self.re_captcha_sid.search(body)
                if not r_sid:
                    return None
                self.captcha_sid = r_sid.group(1)
                r_code = self.re_captcha_code.search(body)
                if not r_code:
                    return None
                self.captcha_code = r_code.group(1)
                self.captcha_code_value = self._captcha('http:' + r.group(1))
                if not self.captcha_code_value:
                    return None

            # get login
            k = xbmc.Keyboard('', 'Enter login')
            k.doModal()
            if k.isConfirmed():
                login = k.getText()
            else:
                return None

            # get password
            k = xbmc.Keyboard('', 'Enter password', True)
            k.doModal()
            if k.isConfirmed():
                password = k.getText()
            else:
                return None

            if not login or not password:
                return None

            self.setting['rutracker_login'] = login
            self.setting['rutracker_password'] = password


    def _captcha(self, captcha):
        response = self.http.fetch(captcha, headers=self.headers, method='GET')
        if response.error:
            return

        import tempfile
        filename = tempfile.gettempdir() + '/captcha'
        file(filename, 'wb').write(response.body)

        win = xbmcgui.Window(xbmcgui.getCurrentWindowId())

        # width = 120px, height = 72px
        image = xbmcgui.ControlImage(win.getWidth()/2 - int(120/2), 20, 120, 72, filename)
        win.addControl(image)
        k = xbmc.Keyboard('', 'Enter captcha code')
        k.doModal()
        code = k.getText() if k.isConfirmed() else None
        win.removeControl(image)
        return code if code else None