Пример #1
0
    def get_download_info(self, url, cat, season, episode):
        if LostFilmShow.get_seasons_url(url) is None:
            return None

        def parse_download(table):
            quality = table.find('div', class_="inner-box--label").text.strip()
            download_url = table.find('a').attrs['href']

            return LostFileDownloadInfo(LostFilmQuality.parse(quality),
                                        download_url)

        cookies = self.get_cookies()

        download_redirect_url = self.download_url_pattern.format(
            cat=cat, season=season, episode=episode)
        download_redirect = scraper.get(
            download_redirect_url,
            headers=self._headers,
            cookies=cookies,
            **self.tracker_settings.get_requests_kwargs())

        soup = get_soup(download_redirect.text)
        meta_content = soup.find('meta').attrs['content']
        download_page_url = meta_content.split(';')[1].strip()[4:]

        download_page = scraper.get(
            download_page_url,
            headers=self._headers,
            **self.tracker_settings.get_requests_kwargs())

        soup = get_soup(download_page.text)
        return list(
            map(parse_download, soup.find_all('div',
                                              class_='inner-box--item')))
Пример #2
0
    def get_download_info(self, url, season, episode):
        match = self._regex.match(url)

        if match is None:
            return None

        def parse_download(table):
            quality = table.find('img').attrs['src'][11:-4]
            download_url = table.find('a').attrs['href']
            return {
                'quality': self._parse_quality(quality),
                'download_url': download_url
            }

        cat = int(match.group('cat'))

        cookies = self.get_cookies()

        download_redirect_url = self.download_url_pattern.format(cat=cat, season=season, episode=episode)
        download_redirecy = requests.get(download_redirect_url, headers=self._headers, cookies=cookies,
                                         timeout=self.tracker_settings.requests_timeout)

        soup = get_soup(download_redirecy.text)
        meta_content = soup.find('meta').attrs['content']
        download_page_url = meta_content.split(';')[1].strip()[4:]

        download_page = requests.get(download_page_url, headers=self._headers,
                                     timeout=self.tracker_settings.requests_timeout)

        soup = get_soup(download_page.text)
        return list(map(parse_download, soup.find_all('table')[2:]))
Пример #3
0
    def parse_url(self, url):
        if not self.can_parse_url(url):
            return None

        parsed_url = urlparse(url)
        match = self._regex.match(parsed_url.path)
        if match is None:
            return None

        r = requests.get(url,
                         allow_redirects=True,
                         timeout=self.tracker_settings.requests_timeout)
        soup = get_soup(r.content)
        if soup.h2 is None:
            # rutracker doesn't return 404 for not existing topic
            # it return regular page with text 'Тема не найдена'
            # and we can check it by not existing heading of the requested topic
            return None
        title = soup.h2.string.strip()
        if title.lower().startswith(self.title_header_start):
            title = title[len(self.title_header_start):].strip()
        if title.lower().endswith(self.title_header_end):
            title = title[:-len(self.title_header_end)].strip()

        return self._get_title(title)
Пример #4
0
    def get_download_url(self, url, vformat):
        if not self.can_parse_url(url):
            return None

        match = self._tracker_regex.match(url)
        if match is None:
            return None

        r = requests.get(url,
                         allow_redirects=True,
                         **self.tracker_settings.get_requests_kwargs())
        soup = get_soup(r.text)

        flist = self._find_format_list(soup)

        try:
            torrent_idx = -1

            if flist is not None and vformat is not None:
                torrent_idx = flist.index(vformat) if vformat in flist else -1

            a = soup.find_all("a", class_="torrent-download-link")[torrent_idx]
        except IndexError:
            return None

        return None if a is None else "https://www.anilibria.tv" + a["href"]
    def login(cls, username, password):
        login_url = "https://login1.bogi.ru/login.php?referer=https%3A%2F%2Fwww.lostfilm.tv%2F"
        profile_url = 'http://www.lostfilm.tv/my.php'
        search_usess_re = re.compile(u'\(usess=([a-f0-9]{32})\)', re.IGNORECASE)

        cls_params = {'login': username, 'password': password}

        s = Session()
        # login over bogi.ru
        params = {"login": username, "password": password}
        r1 = s.post(login_url, params, verify=False)
        # in case of failed login, bogi redirects to:
        # http://www.lostfilm.tv/blg.php?code=6&text=incorrect%20login/password
        if r1.request.url != login_url:
            raise Exception('Can\'t login into lostfilm.tv')

        soup = get_soup(r1.text)
        inputs = soup.findAll("input")
        action = soup.find("form")['action']
        cparams = dict([(i['name'], i['value']) for i in inputs if 'value' in i.attrs])
        cls_params['bogi_uid'] = cparams['uid']
        cls_params['email'] = cparams['email']
        s.post(action, cparams, verify=False, allow_redirects=False)
        r3 = s.get(profile_url)
        cls_params['uid'] = s.cookies['uid']
        cls_params['_pass'] = s.cookies['pass']
        cls_params['usess'] = search_usess_re.findall(r3.text)[0]

        return cls(**cls_params)
Пример #6
0
    def parse_url(self, url, parse_series=False):
        """
        :rtype: requests.Response | LostFilmShow
        """
        name, url = LostFilmShow.get_seasons_url_info(url)
        if url is None:
            return None

        response = scraper.get(url,
                               headers=self._headers,
                               allow_redirects=False,
                               **self.tracker_settings.get_requests_kwargs())
        if response.status_code != 200 or response.url != url \
            or '<meta http-equiv="refresh" content="0; url=/">' in response.text:
            return response
        # lxml have some issue with parsing lostfilm on Windows, so replace it on html5lib for Windows
        soup = get_soup(response.text,
                        'html5lib' if sys.platform == 'win32' else None)
        title_block = soup.find('div', class_='title-block')
        follow_show = title_block.find(
            'div', onclick=self._follow_show_re).attrs['onclick']
        follow_show_match = self._follow_show_re.match(follow_show)

        result = LostFilmShow(
            original_name=title_block.find('h2', class_='title-en').text,
            russian_name=title_block.find('h1', class_='title-ru').text,
            url_name=name,
            cat=int(follow_show_match.group('cat')))
        if parse_series:
            for season in self._parse_series(soup):
                result.add_season(season)
        return result
Пример #7
0
 def get_download_url(self, url):
     cookies = self.get_cookies()
     page = requests.get(url,
                         cookies=cookies,
                         timeout=self.tracker_settings.requests_timeout)
     page_soup = get_soup(page.content)
     download = page_soup.find("a", {"class": "genmed"})
     return "http://tapochek.net/" + download.attrs['href']
Пример #8
0
 def get_download_url(self, url):
     cookies = self.get_cookies()
     page = requests.get(url,
                         cookies=cookies,
                         **self.tracker_settings.get_requests_kwargs())
     page_soup = get_soup(page.content)
     download = page_soup.find("a", href=re.compile("download"))
     return "http://tapochek.net/" + download.attrs['href']
Пример #9
0
 def get_download_url(self, url, vformat):
     cookies = self.get_cookies()
     page = requests.get(url, cookies=cookies, **self.tracker_settings.get_requests_kwargs())
     page_soup = get_soup(page.content)
     flist = self._find_format_list(page_soup)
     for f in flist:
         if f.text.strip() == vformat:
             href = f['href'][1:]
             at = page_soup.select_one('div[class="torrent"] div#'+href+' a')
             return self.root_url + at['href']
     return None
Пример #10
0
 def get_download_url(self, url):
     cookies = self.get_cookies()
     page = requests.get(url, cookies=cookies, **self.tracker_settings.get_requests_kwargs())
     page_soup = get_soup(page.text, 'html5lib' if sys.platform == 'win32' else None)
     anchors = page_soup.find_all("a")
     da = list(filter(lambda tag: tag.has_attr('href') and tag.attrs['href'].startswith("download.php?id="),
                      anchors))
     # not a free torrent
     if len(da) == 0:
         return None
     download_url = 'http://' + self.tracker_domains[0] + '/forum/' + da[0].attrs['href']
     return download_url
Пример #11
0
 def get_download_url(self, url):
     cookies = self.get_cookies()
     page = requests.get(url, cookies=cookies, **self.tracker_settings.get_requests_kwargs())
     page_soup = get_soup(page.text, 'html5lib' if sys.platform == 'win32' else None)
     anchors = page_soup.find_all("a")
     da = list(filter(lambda tag: tag.has_attr('href') and tag.attrs['href'].startswith("download.php?id="),
                      anchors))
     # not a free torrent
     if len(da) == 0:
         return None
     download_url = 'https://' + self.tracker_domains[0] + '/forum/' + da[0].attrs['href']
     return download_url
Пример #12
0
    def test_default_not_lxml_parser(self):
        lxml_module = sys.modules.get('lxml', None)
        if 'lxml' in sys.modules:
            del sys.modules['lxml']

        try:
            soup = get_soup(self.CONTENT)

            self.assertIsNotNone(soup)
            self.assertTrue(isinstance(soup.builder, HTMLParserTreeBuilder))
        finally:
            if lxml_module:
                sys.modules['lxml'] = lxml_module
Пример #13
0
    def parse_url(self, url):
        match = self._regex.match(url)
        if match is None:
            return None

        r = requests.get(url, allow_redirects=False, timeout=self.tracker_settings.requests_timeout)

        soup = get_soup(r.text)
        if soup.h1 is None:
            # rutracker doesn't return 404 for not existing topic
            # it return regular page with text 'Тема не найдена'
            # and we can check it by not existing heading of the requested topic
            return None
        title = soup.h1.text.strip()

        return {'original_name': title}
Пример #14
0
    def parse_url(self, url):
        if not self.can_parse_url(url):
            return None

        r = requests.get(url, **self.tracker_settings.get_requests_kwargs())
        if r.status_code != 200 or (r.url != url and not self.can_parse_url(r.url)):
            return None
        r.encoding = 'utf-8'
        soup = get_soup(r.text)
        title = soup.title.string.strip()
        for title_header in self.title_headers:
            if title.lower().startswith(title_header):
                title = title[len(title_header):].strip()
                break

        return self._get_title(title)
Пример #15
0
    def parse_url(self, url):
        if not self.can_parse_url(url):
            return None

        r = requests.get(url, **self.tracker_settings.get_requests_kwargs())
        if r.status_code != 200 or (r.url != url and not self.can_parse_url(r.url)):
            return None
        r.encoding = 'utf-8'
        soup = get_soup(r.text)
        title = soup.title.string.strip()
        for title_header in self.title_headers:
            if title.lower().startswith(title_header):
                title = title[len(title_header):].strip()
                break

        return self._get_title(title)
Пример #16
0
    def parse_url(self, url):
        match = self._regex.match(url)
        if match is None:
            return None

        r = requests.get(url, allow_redirects=True, **self.tracker_settings.get_requests_kwargs())

        soup = get_soup(r.content)
        if soup.h1 is None:
            # rutracker doesn't return 404 for not existing topic
            # it return regular page with text 'Тема не найдена'
            # and we can check it by not existing heading of the requested topic
            return None
        title = soup.h1.text.strip()

        return {'original_name': title}
Пример #17
0
    def get_download_url(self, url):
        if not self.can_parse_url(url):
            return None

        match = self._tracker_regex.match(url)
        if match is None:
            return None

        r = requests.get(url, allow_redirects=True, **self.tracker_settings.get_requests_kwargs())
        soup = get_soup(r.text)

        try:
            a = soup.find_all("a", class_="torrent-download-link")[-1]
        except IndexError:
            return None

        return None if a is None else "https://www."+self.tracker_domain+a["href"]
Пример #18
0
    def parse_url(self, url, parse_series=False):
        match = self._regex.match(url)
        if match is None:
            return None

        r = requests.get(url, headers=self._headers, allow_redirects=False,
                         timeout=self.tracker_settings.requests_timeout)
        if r.status_code != 200:
            return r
        # lxml have some issue with parsing lostfilm on Windows, so replace it on html5lib for Windows
        soup = get_soup(r.text, 'html5lib' if sys.platform == 'win32' else None)
        title = soup.find('div', class_='mid').find('h1').string
        result = self._parse_title(title)
        result['cat'] = int(match.group('cat'))
        if parse_series:
            result.update(self._parse_series(soup))
        return result
Пример #19
0
    def parse_url(self, url):
        if not self.can_parse_url(url):
            return None

        match = self._tracker_regex.match(url)
        if match is None:
            return None

        r = requests.get(url, allow_redirects=True, **self.tracker_settings.get_requests_kwargs())
        soup = get_soup(r.text)

        if not soup.title.string.endswith(self.title_end):
            return None

        title = soup.title.string[:-len(self.title_end)].strip()

        return {'original_name': title}
Пример #20
0
    def parse_url(self, url):
        url = self.get_url(url)
        if not url or not self.can_parse_url(url):
            return None
        parsed_url = urlparse(url)
        if not parsed_url.path == '/forum/viewtopic.php':
            return None

        r = requests.get(url, allow_redirects=False, **self.tracker_settings.get_requests_kwargs())
        if r.status_code != 200:
            return None
        soup = get_soup(r.text)
        title = soup.title.string.strip()
        for title_header in self.title_headers:
            if title.lower().endswith(title_header):
                title = title[:-len(title_header)].strip()
                break

        return self._get_title(title)
Пример #21
0
    def get_last_torrent_update(self, url):
        response = requests.get(url,
                                **self.tracker_settings.get_requests_kwargs())
        response.raise_for_status()

        soup = get_soup(response.text)
        content = soup.find("div", {"class": "mn1_content"})
        last_update_text_element = content.find('b',
                                                text=self.last_update_text_re)
        if last_update_text_element is None:
            return None

        last_update_all_text = six.text_type(last_update_text_element.string)
        last_update_text_match = self.last_update_text_re.match(
            last_update_all_text)
        last_update_text = last_update_text_match.group(1)

        parsed_datetime = self.date_parser.parse(last_update_text)
        return parsed_datetime.astimezone(pytz.utc)
Пример #22
0
    def parse_url(self, url):
        url = self.get_url(url)
        if not url or not self.can_parse_url(url):
            return None
        parsed_url = urlparse(url)
        if not parsed_url.path == '/forum/viewtopic.php':
            return None

        r = requests.get(url, allow_redirects=False, **self.tracker_settings.get_requests_kwargs())
        if r.status_code != 200:
            return None
        soup = get_soup(r.text)
        title = soup.title.string.strip()
        for title_header in self.title_headers:
            if title.lower().endswith(title_header):
                title = title[:-len(title_header)].strip()
                break

        return self._get_title(title)
Пример #23
0
    def parse_url(self, url):
        if not self.can_parse_url(url):
            return None
        parsed_url = urlparse(url)
        match = self._regex.match(parsed_url.path)
        if match is None:
            return None

        r = requests.get(url, allow_redirects=False, timeout=self.tracker_settings.requests_timeout)
        if r.status_code != 200:
            return None
        r.encoding = 'utf-8'
        soup = get_soup(r.text)
        title = soup.title.string.strip()
        for title_header in self.title_headers:
            if title.lower().startswith(title_header):
                title = title[len(title_header):].strip()
                break

        return self._get_title(title)
Пример #24
0
    def test_parse_all_series(self):
        pytest.fail("This test need to be updated")

        error_hrefs = []
        lock = Lock()
        queue = Queue()
        tracker = LostFilmTVTracker()
        threads = []

        def process():
            while True:
                try:
                    url = queue.get(False)
                except Empty:
                    return
                try:
                    tracker.parse_url(url, True)
                except Exception:
                    with lock:
                        error_hrefs.append(url)

        all_series = requests.get('http://www.lostfilm.tv/serials.php')
        soup = get_soup(all_series.text, 'html5')

        mid = soup.find('div', class_='mid')
        series = mid.find_all('a', class_='bb_a')
        for s in series:
            queue.put('http://www.lostfilm.tv' + s.attrs['href'])

        for i in range(0, 20):
            t = Thread(target=process)
            threads.append(t)
            t.start()

        for i in range(0, len(threads)):
            threads[i].join()

        for e in error_hrefs:
            print("Error parse: {0}".format(e))

        self.assertEqual(0, len(error_hrefs))
Пример #25
0
    def _get_params(self):
        with DBSession() as db:
            cred = db.query(UTorrentCredentials).first()

            if not cred:
                return False

            if not cred.port:
                cred.port = self.DEFAULT_PORT

            try:
                session = requests.Session()
                session.auth = (cred.username, cred.password)
                target = self.REQUEST_FORMAT.format(cred.host, cred.port)
                response = session.get(target + "token.html",
                                       auth=(cred.username, cred.password))
                soup = get_soup(response.text)
                token = soup.div.text
                return {'session': session, 'target': target, 'token': token}
            except Exception as e:
                return False
Пример #26
0
    def parse_url(self, url):
        match = self._regex.match(url)
        if match is None:
            return None

        r = requests.get(url, allow_redirects=False, **self.tracker_settings.get_requests_kwargs())
        # tr.anidub.com doesn't return encoding in content-type
        r.encoding = 'utf-8'
        soup = get_soup(r.text)
        title = soup.find('span', id='news-title')
        if title is None:
            return None
        title = title.text.strip()
        result = {'original_name': title}
        # Format
        format_list = []
        flist = self._find_format_list(soup)
        for q in flist:
            format_list.append(q.text.strip())
        result['format_list'] = format_list
        return result
Пример #27
0
    def parse_url(self, url):
        match = self._regex.match(url)
        if match is None:
            return None

        # without slash response gets f****d up
        if not url.endswith("/"):
            url += "/"
        r = requests.get(url, allow_redirects=False, **self.tracker_settings.get_requests_kwargs())

        soup = get_soup(r.content)
        if soup.h1 is None:
            # tapochek doesn't return 404 for not existing topic
            # it return regular page with text 'Тема не найдена'
            # and we can check it by not existing heading of the requested topic
            return None
        title = soup.title.string.strip()
        if title.lower().endswith(self.title_header):
            title = title[:-len(self.title_header)].strip()

        return {'original_name': title}
Пример #28
0
    def _get_params(self):
        with DBSession() as db:
            cred = db.query(UTorrentCredentials).first()

            if not cred:
                return False

            if not cred.port:
                cred.port = self.DEFAULT_PORT

            try:
                session = requests.Session()
                session.auth = (cred.username, cred.password)
                target = self.REQUEST_FORMAT.format(cred.host, cred.port)
                response = session.get(target + "token.html",
                                       auth=(cred.username, cred.password))
                soup = get_soup(response.text)
                token = soup.div.text
                return {'session': session, 'target': target, 'token': token}
            except Exception as e:
                return False
    def test_parse_all_series(self):
        error_hrefs = []
        lock = Lock()
        queue = Queue()
        tracker = LostFilmTVTracker()
        threads = []

        def process():
            while True:
                try:
                    url = queue.get(False)
                except Empty:
                    return
                try:
                    tracker.parse_url(url, True)
                except Exception:
                    with lock:
                        error_hrefs.append(url)

        all_series = requests.get('http://www.lostfilm.tv/serials.php')
        soup = get_soup(all_series.text, 'html5')

        mid = soup.find('div', class_='mid')
        series = mid.find_all('a', class_='bb_a')
        for s in series:
            queue.put('http://www.lostfilm.tv' + s.attrs['href'])

        for i in range(0, 20):
            t = Thread(target=process)
            threads.append(t)
            t.start()

        for i in range(0, len(threads)):
            threads[i].join()

        for e in error_hrefs:
            print("Error parse: {0}".format(e))

        self.assertEqual(0, len(error_hrefs))
Пример #30
0
    def parse_url(self, url):
        if not self.can_parse_url(url):
            return None
        parsed_url = urlparse(url)
        match = self._regex.match(parsed_url.path)
        if match is None:
            return None

        r = requests.get(url,
                         allow_redirects=False,
                         timeout=self.tracker_settings.requests_timeout)
        if r.status_code != 200:
            return None
        r.encoding = 'utf-8'
        soup = get_soup(r.text)
        title = soup.title.string.strip()
        for title_header in self.title_headers:
            if title.lower().startswith(title_header):
                title = title[len(title_header):].strip()
                break

        return self._get_title(title)
Пример #31
0
    def parse_url(self, url):
        match = self._regex.match(url)
        if match is None:
            return None

        # without slash response gets f****d up
        if not url.endswith("/"):
            url += "/"
        r = requests.get(url,
                         allow_redirects=False,
                         timeout=self.tracker_settings.requests_timeout)

        soup = get_soup(r.content)
        if soup.h1 is None:
            # tapochek doesn't return 404 for not existing topic
            # it return regular page with text 'Тема не найдена'
            # and we can check it by not existing heading of the requested topic
            return None
        title = soup.title.string.strip()
        if title.lower().endswith(self.title_header):
            title = title[:-len(self.title_header)].strip()

        return {'original_name': title}
Пример #32
0
    def parse_url(self, url):
        if not self.can_parse_url(url):
            return None

        match = self._tracker_regex.match(url)
        if match is None:
            return None

        r = requests.get(url,
                         allow_redirects=True,
                         **self.tracker_settings.get_requests_kwargs())
        soup = get_soup(r.text)

        title = soup.title.string

        if self._title_regex.match(title) is None:
            return None

        format_list = self._find_format_list(soup)
        if format_list is not None:
            format_list.sort()

        return {'original_name': title, 'format_list': format_list}
Пример #33
0
    def parse_url(self, url):
        if not self.can_parse_url(url):
            return None

        parsed_url = urlparse(url)
        match = self._regex.match(parsed_url.path)
        if match is None:
            return None

        r = requests.get(url, allow_redirects=True, **self.tracker_settings.get_requests_kwargs())
        soup = get_soup(r.content)
        if soup.h2 is None:
            # rutracker doesn't return 404 for not existing topic
            # it return regular page with text 'Тема не найдена'
            # and we can check it by not existing heading of the requested topic
            return None
        title = soup.h2.string.strip()
        if title.lower().startswith(self.title_header_start):
            title = title[len(self.title_header_start):].strip()
        if title.lower().endswith(self.title_header_end):
            title = title[:-len(self.title_header_end)].strip()

        return self._get_title(title)
Пример #34
0
    def login(self, username, password):
        s = Session()
        s.headers.update(self._headers)
        # login over bogi.ru
        params = {"login": username, "password": password}
        r1 = s.post(self.login_url, params, verify=False, timeout=self.tracker_settings.requests_timeout)
        # in case of failed login, bogi redirects to:
        # http://www.lostfilm.tv/blg.php?code=6&text=incorrect%20login/password
        if r1.request.url != self.login_url:
            url = urlparse(r1.url)
            if url.netloc == self.netloc:
                query = parse_qs(url.query)
                code = int(query.get('code', ['-1'])[0])
                text = query.get('text', ["-"])[0]
                r1.encoding = 'windows-1251'
                message = r1.text
                raise LostFilmTVLoginFailedException(code, text, message)
            else:
                raise LostFilmTVLoginFailedException(-1, None, None)

        # callback to lostfilm.tv
        soup = get_soup(r1.text)
        inputs = soup.findAll("input")
        action = soup.find("form")['action']
        cparams = dict([(i['name'], i['value']) for i in inputs if 'value' in i.attrs])
        r2 = s.post(action, cparams, verify=False, allow_redirects=False, timeout=self.tracker_settings.requests_timeout)
        if r2.status_code != 302 or r2.headers.get('location', None) != '/':
            raise LostFilmTVLoginFailedException(-2, None, None)

        # call to profile page
        r3 = s.get(self.profile_url, timeout=self.tracker_settings.requests_timeout)

        # read required params
        self.c_uid = s.cookies['uid']
        self.c_pass = s.cookies['pass']
        self.c_usess = self.search_usess_re.findall(r3.text)[0]
Пример #35
0
    def test_direct_lxml_parser(self):
        soup = get_soup(self.CONTENT, 'lxml')

        self.assertIsNotNone(soup)
        self.assertTrue(isinstance(soup.builder, LXMLTreeBuilder))
Пример #36
0
 def get_download_url(self, url):
     cookies = self.get_cookies()
     page = requests.get(url, cookies=cookies, timeout=self.tracker_settings.requests_timeout)
     page_soup = get_soup(page.content)
     download = page_soup.find("a", {"class": "genmed"})
     return download.attrs['href']
Пример #37
0
def upgrade_0_to_1(operations_factory):
    from monitorrent.settings_manager import SettingsManager
    settings_manager = SettingsManager()
    tracker_settings = None
    with operations_factory() as operations:
        operations.add_column(AnilibriaTvTopic.__tablename__,
                              Column('format', String, nullable=True))
        operations.add_column(AnilibriaTvTopic.__tablename__,
                              Column('format_list', String, nullable=True))
        topic_values = []
        m = MetaData()
        ani_topics = Table(
            AnilibriaTvTopic.__tablename__, m,
            Column("id", Integer, ForeignKey('topics.id'), primary_key=True),
            Column("hash", String, nullable=True),
            Column("format", String, nullable=True),
            Column("format_list", String, nullable=True))
        m1 = MetaData()
        base_topics = Table(Topic.__tablename__, m1,
                            Column("id", Integer, primary_key=True),
                            Column("url", String), Column("type", String),
                            Column('status', String))
        topics = operations.db.query(base_topics).filter(
            base_topics.c.type == PLUGIN_NAME)
        for topic in topics:
            raw_topic = row2dict(topic, base_topics)
            # noinspection PyBroadException
            try:
                if tracker_settings is None:
                    tracker_settings = settings_manager.tracker_settings
                response = requests.get(
                    raw_topic['url'], **tracker_settings.get_requests_kwargs())
                soup = get_soup(response.text)
                format_list = AnilibriaTvTracker._find_format_list(soup)
                format_list.sort()
                topic_values.append({
                    'id': raw_topic['id'],
                    'format_list': ",".join(format_list),
                    'format': format_list[0],
                    'status': Status.Ok.__str__()
                })
            except:
                exc_info = sys.exc_info()
                print(u''.join(traceback.format_exception(*exc_info)))
                topic_values.append({
                    'id': raw_topic['id'],
                    'status': Status.Error.__str__()
                })

        for upd in topic_values:
            if 'format' in upd:
                operations.db.execute(
                    ani_topics.update(
                        whereclause=(ani_topics.c.id == upd['id']),
                        values={
                            'format_list': upd['format_list'],
                            'format': upd['format']
                        }))
            operations.db.execute(
                base_topics.update(whereclause=(base_topics.c.id == upd['id']),
                                   values={'status': upd['status']}))
Пример #38
0
    def test_direct_html5lib_parser(self):
        soup = get_soup(self.CONTENT, 'html5lib')

        self.assertIsNotNone(soup)
        self.assertTrue(isinstance(soup.builder, HTML5TreeBuilder))
Пример #39
0
 def get_download_url(self, url):
     cookies = self.get_cookies()
     page = requests.get(url, cookies=cookies, **self.tracker_settings.get_requests_kwargs())
     page_soup = get_soup(page.content)
     download = page_soup.find("a", {"class": "genmed"})
     return "http://tapochek.net/"+download.attrs['href']
Пример #40
0
 def get_download_url(self, url):
     cookies = self.get_cookies()
     page = requests.get(url, cookies=cookies, **self.tracker_settings.get_requests_kwargs())
     page_soup = get_soup(page.content)
     download = page_soup.find("a", {"class": "genmed"})
     return download.attrs['href']
Пример #41
0
def upgrade_3_to_4(engine, operations_factory):
    # Version 3
    m3 = MetaData()
    lostfilm_series_3 = Table(
        'lostfilmtv_series', m3,
        Column("id", Integer, ForeignKey('topics.id'), primary_key=True),
        Column("search_name", String, nullable=False),
        Column("season", Integer, nullable=True),
        Column("episode", Integer, nullable=True),
        Column("quality", String, nullable=False))
    lostfilm_credentials_3 = Table(
        "lostfilmtv_credentials", m3,
        Column('username', String, primary_key=True),
        Column('password', String, primary_key=True), Column('uid', String),
        Column('pass', String), Column('usess', String),
        Column('default_quality', String, nullable=False, server_default='SD'))

    # Version 4
    m4 = MetaData(engine)
    topic_last = Table('topics', m4,
                       *[c.copy() for c in Topic.__table__.columns])
    lostfilm_series_4 = Table(
        'lostfilmtv_series4', m4,
        Column("id", Integer, ForeignKey('topics.id'), primary_key=True),
        Column("cat", Integer, nullable=False),
        Column("season", Integer, nullable=True),
        Column("episode", Integer, nullable=True),
        Column("quality", String, nullable=False))
    lostfilm_credentials_4 = Table(
        "lostfilmtv_credentials4", m4,
        Column('username', String, primary_key=True),
        Column('password', String, primary_key=True),
        Column('session', String),
        Column('default_quality', String, nullable=False, server_default='SD'))

    cat_re = re.compile(
        six.text_type(
            r'https?://(www|old)\.lostfilm\.tv/browse\.php\?cat=_?(?P<cat>\d+)'
        ), re.UNICODE)

    from monitorrent.settings_manager import SettingsManager
    settings_manager = SettingsManager()

    tracker_settings = None

    with operations_factory() as operations:
        # if previuos run fails, it can not delete this table
        if operations.has_table(lostfilm_series_4.name):
            operations.drop_table(lostfilm_series_4.name)
        operations.create_table(lostfilm_series_4)

        lostfilm_topics = operations.db.query(lostfilm_series_3)
        topics = operations.db.query(topic_last)
        topics = [row2dict(t, topic_last) for t in topics]
        topics = {t['id']: t for t in topics}
        for topic in lostfilm_topics:
            raw_lostfilm_topic = row2dict(topic, lostfilm_series_3)
            raw_topic = topics[raw_lostfilm_topic['id']]
            match = cat_re.match(raw_topic['url'])

            topic_values = {}

            if not match:
                print("can't parse old url: {0}".format(raw_topic['url']))
                raw_lostfilm_topic['cat'] = 0
                topic_values['status'] = Status.Error
            else:
                cat = int(match.group('cat'))
                raw_lostfilm_topic['cat'] = cat

                try:
                    if tracker_settings is None:
                        tracker_settings = settings_manager.tracker_settings

                    old_url = 'https://www.lostfilm.tv/browse.php?cat={0}'.format(
                        cat)
                    url_response = scraper.get(
                        old_url, **tracker_settings.get_requests_kwargs())

                    soup = get_soup(url_response.text)
                    meta_content = soup.find('meta').attrs['content']
                    redirect_url = meta_content.split(';')[1].strip()[4:]

                    if redirect_url.startswith('/'):
                        redirect_url = redirect_url[1:]

                    redirect_url = u'https://www.lostfilm.tv/{0}'.format(
                        redirect_url)
                    url = LostFilmShow.get_seasons_url(redirect_url)

                    if url is None:
                        raise Exception(
                            "Can't parse url from {0} it was redirected to {1}"
                            .format(old_url, redirect_url))

                    topic_values['url'] = url
                except:
                    exc_info = sys.exc_info()
                    print(u''.join(traceback.format_exception(*exc_info)))
                    topic_values['status'] = Status.Error

            operations.db.execute(lostfilm_series_4.insert(),
                                  raw_lostfilm_topic)
            operations.db.execute(
                topic_last.update(
                    whereclause=(topic_last.c.id == raw_topic['id']),
                    values=topic_values))

        # drop original table
        operations.drop_table(lostfilm_series_3.name)
        # rename new created table to old one
        operations.rename_table(lostfilm_series_4.name, lostfilm_series_3.name)

        # if previuos run fails, it can not delete this table
        if operations.has_table(lostfilm_credentials_4.name):
            operations.drop_table(lostfilm_credentials_4.name)
        operations.create_table(lostfilm_credentials_4)
        credentials = list(operations.db.query(lostfilm_credentials_3))
        for credential in credentials:
            raw_credential = row2dict(credential, lostfilm_credentials_3)
            operations.db.execute(lostfilm_credentials_4.insert(),
                                  raw_credential)

        # drop original table
        operations.drop_table(lostfilm_credentials_3.name)
        # rename new created table to old one
        operations.rename_table(lostfilm_credentials_4.name,
                                lostfilm_credentials_3.name)