Python ParserBeautifulSoup示例，subliminal.providers.ParserBeautifulSoup Python示例

示例#1

0

显示文件

文件： addic7ed.py 项目： droiter/Auto-Subliminal

    def _search_show_id(self, series, year=None):
        """Search the show id from the `series` and `year`.

        :param str series: series of the episode.
        :param year: year of the series, if any.
        :type year: int
        :return: the show id, if found.
        :rtype: int

        """
        # addic7ed doesn't support search with quotes
        series = series.replace('\'', ' ')

        # build the params
        series_year = '%s %d' % (series, year) if year is not None else series
        params = {'search': series_year, 'Submit': 'Search'}

        # make the search
        logger.info('Searching show ids with %r', params)
        r = self.session.get(self.server_url + 'srch.php', params=params, timeout=10)
        r.raise_for_status()
        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        # get the suggestion
        suggestion = soup.select('span.titulo > a[href^="/show/"]')
        if not suggestion:
            logger.warning('Show id not found: no suggestion')
            return None
        if not sanitize(suggestion[0].i.text.replace('\'', ' ')) == sanitize(series_year):
            logger.warning('Show id not found: suggestion does not match')
            return None
        show_id = int(suggestion[0]['href'][6:])
        logger.debug('Found show id %d', show_id)

        return show_id

示例#2

0

显示文件

文件： addic7ed.py 项目： h3llrais3r/Auto-Subliminal

    def _get_show_ids(self):
        """Get the ``dict`` of show ids per series by querying the `shows.php` page.

        :return: show id per series, lower case and without quotes.
        :rtype: dict

        """
        # get the show page
        logger.info('Getting show ids')
        r = self.session.get(self.server_url + 'shows.php', timeout=10)
        r.raise_for_status()

        # LXML parser seems to fail when parsing Addic7ed.com HTML markup.
        # Last known version to work properly is 3.6.4 (next version, 3.7.0, fails)
        # Assuming the site's markup is bad, and stripping it down to only contain what's needed.
        show_cells = re.findall(show_cells_re, r.content)
        if show_cells:
            soup = ParserBeautifulSoup(b''.join(show_cells), ['lxml', 'html.parser'])
        else:
            # If RegEx fails, fall back to original r.content and use 'html.parser'
            soup = ParserBeautifulSoup(r.content, ['html.parser'])

        # populate the show ids
        show_ids = {}
        for show in soup.select('td.version > h3 > a[href^="/show/"]'):
            show_ids[sanitize(show.text)] = int(show['href'][6:])
        logger.debug('Found %d show ids', len(show_ids))

        return show_ids

示例#3

0

显示文件

文件： subscenter.py 项目： billwurles/SportRage

    def _search_url_titles(self, title):
        """Search the URL titles by kind for the given `title`.

        :param str title: title to search for.
        :return: the URL titles by kind.
        :rtype: collections.defaultdict

        """
        # make the search
        logger.info('Searching title name for %r', title)
        r = self.session.get(self.server_url + 'subtitle/search/',
                             params={'q': title},
                             timeout=10)
        r.raise_for_status()

        # check for redirections
        if r.history and all([h.status_code == 302 for h in r.history]):
            logger.debug('Redirected to the subtitles page')
            links = [r.url]
        else:
            # get the suggestions (if needed)
            soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
            links = [
                link.attrs['href']
                for link in soup.select('#processes div.generalWindowTop a')
            ]
            logger.debug('Found %d suggestions', len(links))

        url_titles = defaultdict(list)
        for link in links:
            parts = link.split('/')
            url_titles[parts[-3]].append(parts[-2])

        return url_titles

示例#4

0

显示文件

文件： tvsubtitles.py 项目： pannal/Sub-Zero.bundle

    def query(self, show_id, series, season, episode, year=None):
        # get the episode ids
        episode_ids = self.get_episode_ids(show_id, season)
        # Provider doesn't store multi episode information
        episode = min(episode) if episode and isinstance(episode, list) else episode

        if episode not in episode_ids:
            logger.error('Episode %d not found', episode)
            return []

        # get the episode page
        logger.info('Getting the page for episode %d', episode_ids[episode])
        r = self.session.get(self.server_url + 'episode-%d.html' % episode_ids[episode], timeout=10)
        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        # loop over subtitles rows
        subtitles = []
        for row in soup.select('.subtitlen'):
            # read the item
            language = Language.fromtvsubtitles(row.h5.img['src'][13:-4])
            subtitle_id = int(row.parent['href'][10:-5])
            page_link = self.server_url + 'subtitle-%d.html' % subtitle_id
            rip = row.find('p', title='rip').text.strip() or None
            release = row.find('h5').text.strip() or None

            subtitle = self.subtitle_class(language, page_link, subtitle_id, series, season, episode, year, rip,
                                           release)
            logger.info('Found subtitle %s', subtitle)
            subtitles.append(subtitle)

        soup.decompose()
        soup = None

        return subtitles

示例#5

0

显示文件

文件： hosszupuska.py 项目： rubicon/bazarr

    def query(self, series, season, episode, year=None, video=None):
        # Search for s01e03 instead of s1e3
        seasona = "%02d" % season
        episodea = "%02d" % episode
        seriesa = fix_inconsistent_naming(series)
        seriesa = series.replace(' ', '+')

        # get the episode page
        logger.info('Getting the page for episode %s', episode)
        url = self.server_url + "sorozatok.php?cim=" + seriesa + "&evad="+str(seasona) + \
            "&resz="+str(episodea)+"&nyelvtipus=%25&x=24&y=8"
        logger.info('Url %s', url)

        r = self.session.get(url, timeout=10).content

        soup = ParserBeautifulSoup(r, ['lxml'])

        subtitles = []

        for num, temp in enumerate(soup.find_all("table")):
            if "this.style.backgroundImage='url(css/over2.jpg)" in str(
                    temp) and "css/infooldal.png" in str(temp):
                logger.debug("Found valid table (%d index)", num)
                subtitles += self._loop_over_table(temp, season, episode,
                                                   video)

        return subtitles

示例#6

0

显示文件

    def _get_show_ids(self):
        """Get the ``dict`` of show ids per series by querying the `series.php` page.

        :return: show id per series, lower case and without quotes.
        :rtype: dict

        """
        # get the show page
        logger.info('Getting show ids')
        r = self.session.get(self.series_url, timeout=10)
        r.raise_for_status()

        if r.status_code != 200:
            logger.error('Error getting show ids')
            raise ProviderError('Error getting show ids')

        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        # populate the show ids
        show_ids = {}
        for show in soup.select('td > a[href^="/show/"]'):
            show_ids[sanitize(show.get_text())] = int(show['href'][6:])
        logger.debug('Found %d show ids', len(show_ids))

        return show_ids

示例#7

0

显示文件

    def _get_suggestions(self, title):
        """Search the show or movie id from the `title` and `year`.

        :param str title: title of the show.
        :return: the show suggestions found.
        :rtype: list of dict

        """
        # make the search
        logger.info('Searching show ids with %r', title)
        r = self.session.get(self.server_url + self.search_url.format(title),
                             headers={'Referer': self.server_url},
                             timeout=10)
        r.raise_for_status()

        if not r.content:
            logger.debug('No data returned from provider')
            return []

        soup = ParserBeautifulSoup(r.content, ['html.parser'])
        suggestions = [{
            'link': l.attrs['value'],
            'title': l.text
        } for l in soup.select('select[name="Mov_sel"] > option[value]')]
        logger.debug('Found suggestions: %r', suggestions)

        return suggestions

示例#8

0

显示文件

    def _search_movie(self, movie_id):
        subs = []
        url = self.server_url + self.movie_info_url + movie_id
        r = self.session.get(url, timeout=10)
        r.raise_for_status()

        if len(r.content) < 10:
            logger.debug(
                "Too short content-length in response: [{}]. Treating as No Subtitles Found "
                .format(str(r.content)))
            return []

        html = ParserBeautifulSoup(r.content, ["html.parser"])
        sub_rows = html.select("table#subtitlesList tbody > tr")

        for row in sub_rows:
            columns = row.find_all("td")
            sub = {"id": movie_id}
            for index, column in enumerate(columns):
                if index == 0:
                    sub["rls"] = column.get_text().strip().split("\n")[0]
                if index == 5:
                    sub["sub_id"] = column.find(
                        "a", attrs={"data-subtitle-id":
                                    True})["data-subtitle-id"]

            if 'sub_id' in sub:
                subs.append(sub)
        return subs

示例#9

0

显示文件

文件： zimuku.py 项目： zx900930/bazarr

    def _parse_episode_page(self, link, year):
        r = self.session.get(link)
        bs_obj = ParserBeautifulSoup(r.content.decode("utf-8", "ignore"),
                                     ["html.parser"])
        subs_body = bs_obj.find("div",
                                class_="subs box clearfix").find("tbody")
        subs = []
        for sub in subs_body.find_all("tr"):
            a = sub.find("a")
            name = _extract_name(a.text)
            name = os.path.splitext(name)[
                0]  # remove ext because it can be an archive type

            language = Language("eng")
            for img in sub.find("td", class_="tac lang").find_all("img"):
                if ("hongkong" in img.attrs["src"]
                        or "china" in img.attrs["src"]
                        or "jollyroger" in img.attrs["src"]):
                    language = Language("zho")
                    break
            sub_page_link = urljoin(self.server_url, a.attrs["href"])
            backup_session = copy.deepcopy(self.session)
            backup_session.headers["Referer"] = link

            subs.append(
                self.subtitle_class(language, sub_page_link, name,
                                    backup_session, year))

        return subs

示例#10

0

显示文件

    def search_show_id(self, series, year=None):
        """Search the show id from the `series` and `year`.
        :param string series: series of the episode.
        :param year: year of the series, if any.
        :type year: int or None
        :return: the show id, if any.
        :rtype: int or None
        """
        # make the search
        logger.info('Searching show id for %r', series)
        r = self.session.post(self.server_url + 'search.php', data={'q': series}, timeout=10)
        r.raise_for_status()

        # get the series out of the suggestions
        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
        show_id = None
        for suggestion in soup.select('div.left li div a[href^="/tvshow-"]'):
            match = link_re.match(suggestion.text)
            if not match:
                logger.error('Failed to match %s', suggestion.text)
                continue

            if sanitize(match.group('series')).lower() == series.lower():
                if year is not None and int(match.group('first_year')) != year:
                    logger.debug('Year does not match')
                    continue
                show_id = int(suggestion['href'][8:-5])
                logger.debug('Found show id %d', show_id)
                break

        soup.decompose()
        soup = None

        return show_id

示例#11

0

显示文件

    def query(self, series, season, episode, year=None):
        # search the show id
        show_id = self.search_show_id(series, year)
        if show_id is None:
            logger.error('No show id found for %r (%r)', series, {'year': year})
            return []

        # get the episode ids
        episode_ids = self.retry(lambda: self.get_episode_ids(show_id, season))
        if episode not in episode_ids:
            logger.error('Episode %d not found', episode)
            return []

        # get the episode page
        logger.info('Getting the page for episode %d', episode_ids[episode])
        r = self.retry(lambda: self.session.get(self.server_url + 'episode-%d.html' % episode_ids[episode], timeout=10))
        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        # loop over subtitles rows
        subtitles = []
        for row in soup.select('.subtitlen'):
            # read the item
            language = Language.fromtvsubtitles(row.h5.img['src'][13:-4])
            subtitle_id = int(row.parent['href'][10:-5])
            page_link = self.server_url + 'subtitle-%d.html' % subtitle_id
            rip = row.find('p', title='rip').text.strip() or None
            release = row.find('p', title='release').text.strip() or None

            subtitle = PatchedTVsubtitlesSubtitle(language, page_link, subtitle_id, series, season, episode, year, rip,
                                           release)
            logger.info('Found subtitle %s', subtitle)
            subtitles.append(subtitle)

        return subtitles

示例#12

0

显示文件

文件： legendastv.py 项目： snamds/Medusa

    def initialize(self):
        self.session = Session()
        self.session.headers[
            'User-Agent'] = 'Subliminal/%s' % __short_version__

        # login
        if self.username and self.password:
            logger.info('Logging in')
            data = {
                '_method': 'POST',
                'data[User][username]': self.username,
                'data[User][password]': self.password
            }
            r = self.session.post(self.server_url + 'login',
                                  data,
                                  allow_redirects=False,
                                  timeout=10)
            raise_for_status(r)

            soup = ParserBeautifulSoup(r.content, ['html.parser'])
            if soup.find('div', {'class': 'alert-error'},
                         string=re.compile(u'Usuário ou senha inválidos')):
                raise AuthenticationError(self.username)

            logger.debug('Logged in')
            self.logged_in = True

示例#13

0

显示文件

文件： xsubs.py 项目： mvanbaak/bazarr

    def _get_show_ids(self):
        # get the shows page
        logger.info('Getting show ids')
        r = self.session.get(self.server_url + self.all_series_url, timeout=10)
        r.raise_for_status()

        if not r.content:
            logger.debug('No data returned from provider')
            return []

        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        # populate the show ids
        show_ids = {}
        for show_category in soup.findAll('seriesl'):
            if show_category.attrs['category'] == u'Σειρές':
                for show in show_category.findAll('series'):
                    series = show.text
                    series_match = series_sanitize_re.match(series)
                    if series_match:
                        series = series_match.group(1)
                    show_ids[sanitize(series)] = int(show['srsid'])
                break
        logger.debug('Found %d show ids', len(show_ids))

        return show_ids

示例#14

0

显示文件

    def _parse_subtitles_page(self, video, response, language):
        subtitles = []

        page_soup = ParserBeautifulSoup(
            response.content.decode('iso-8859-1', 'ignore'),
            ['lxml', 'html.parser'])
        title_soups = page_soup.find_all("div",
                                         {'id': 'menu_detalle_buscador'})
        body_soups = page_soup.find_all("div", {'id': 'buscador_detalle'})

        for subtitle in range(0, len(title_soups)):
            title_soup, body_soup = title_soups[subtitle], body_soups[subtitle]

            # title
            title = title_soup.find("a").text.replace("Subtitulos de ", "")
            page_link = title_soup.find("a")["href"]

            # description
            description = body_soup.find("div", {
                'id': 'buscador_detalle_sub'
            }).text
            description = description.replace(",", " ").lower()

            # uploader
            uploader = body_soup.find("a", {'class': 'link1'}).text

            subtitle = self.subtitle_class(language, video, page_link, title,
                                           description, uploader)

            logger.debug('Found subtitle %r', subtitle)
            subtitles.append(subtitle)

        return subtitles

示例#15

0

显示文件

    def login(self):
        logger.info('Logging in')
        self.headers['Referer'] = self.site + '/index.php'
        self.session.headers.update(self.headers.items())
        res = self.session.get(self.loginpage)
        bsoup = ParserBeautifulSoup(res.content, ['lxml'])

        _allinputs = bsoup.findAll('input')
        fields = {}
        for field in _allinputs:
            fields[field.get('name')] = field.get('value')

        fields['username'] = self.username
        fields['password'] = self.password
        fields['autologin'] = '******'
        fields['viewonline'] = 'on'

        self.headers['Referer'] = self.loginpage
        self.session.headers.update(self.headers.items())
        res = self.session.post(self.loginpage, fields)
        try:
            logger.debug('Got session id %s' %
                         self.session.cookies.get_dict()['PHPSESSID'])
        except KeyError as e:
            logger.error(repr(e))
            logger.error("Didn't get session id, check your credentials")
            return False
        except Exception as e:
            logger.error(repr(e))
            logger.error('uncached error #legendasdivx #AA')
            return False

        return True

示例#16

0

显示文件

    def _get_show_ids(self):
        """Get the ``dict`` of show ids per series by querying the `shows.php` page.

        :return: show id per series, lower case and without quotes.
        :rtype: dict
        """
        # get the show page
        logger.info('Getting show ids')
        r = self.session.get(self.server_url + 'shows.php',
                             timeout=20,
                             cookies=self.cookies)
        r.raise_for_status()

        # LXML parser seems to fail when parsing Addic7ed.com HTML markup.
        # Last known version to work properly is 3.6.4 (next version, 3.7.0, fails)
        # Assuming the site's markup is bad, and stripping it down to only contain what's needed.
        show_cells = re.findall(show_cells_re, r.content)
        if show_cells:
            soup = ParserBeautifulSoup(b''.join(show_cells),
                                       ['lxml', 'html.parser'])
        else:
            # If RegEx fails, fall back to original r.content and use 'html.parser'
            soup = ParserBeautifulSoup(r.content, ['html.parser'])

        # populate the show ids
        show_ids = {}
        for show in soup.select('td.vr > h3 > a[href^="/show/"]'):
            show_ids[sanitize(show.text)] = int(show['href'][6:])
        logger.debug('Found %d show ids', len(show_ids))

        return show_ids

示例#17

0

显示文件

文件： tvsubtitles.py 项目： strafer/Sub-Zero.bundle

    def search_show_id(self, series, year=None):
        """Search the show id from the `series` and `year`.
        :param string series: series of the episode.
        :param year: year of the series, if any.
        :type year: int or None
        :return: the show id, if any.
        :rtype: int or None
        """
        # make the search
        series_clean = self.clean_punctuation(series).lower()
        logger.info('Searching show id for %r', series_clean)
        r = self.session.post(self.server_url + 'search.php', data={'q': series_clean}, timeout=10)
        r.raise_for_status()

        # get the series out of the suggestions
        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
        show_id = None
        for suggestion in soup.select('div.left li div a[href^="/tvshow-"]'):
            match = link_re.match(self.clean_punctuation(suggestion.text))
            if not match:
                logger.error('Failed to match %s', suggestion.text)
                continue

            if self.clean_punctuation(match.group('series')).lower() == series_clean:
                if year is not None and int(match.group('first_year')) != year:
                    logger.debug('Year does not match')
                    continue
                show_id = int(suggestion['href'][8:-5])
                logger.debug('Found show id %d', show_id)
                break

        return show_id

示例#18

0

显示文件

文件： subscenter.py 项目： ArthurGarnier/SickRage

    def _search_url_titles(self, title):
        """Search the URL titles by kind for the given `title`.

        :param str title: title to search for.
        :return: the URL titles by kind.
        :rtype: collections.defaultdict

        """
        # make the search
        logger.info('Searching title name for %r', title)
        r = self.session.get(self.server_url + 'subtitle/search/', params={'q': title}, timeout=10)
        r.raise_for_status()

        # check for redirections
        if r.history and all([h.status_code == 302 for h in r.history]):
            logger.debug('Redirected to the subtitles page')
            links = [r.url]
        else:
            # get the suggestions (if needed)
            soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
            links = [link.attrs['href'] for link in soup.select('#processes div.generalWindowTop a')]
            logger.debug('Found %d suggestions', len(links))

        url_titles = defaultdict(list)
        for link in links:
            parts = link.split('/')
            url_titles[parts[-3]].append(parts[-2])

        return url_titles

示例#19

0

显示文件

文件： tusubtitulo.py 项目： youdroid/SickChill

    def query(self, series, season, episode, year=None):
        # get the show id
        show_id = self.get_show_id(series, year)
        if show_id is None:
            logger.error('No show id found for %s (%r)', series, year)
            return []

        # get the episode url
        episode_url = self.get_episode_url(show_id, series, season, episode, year)
        if episode_url is None:
            logger.error('No episode url found for %s, season %d, episode %d', series, season, episode)
            return []

        # get the page of the episode of the show
        r = self.session.get(episode_url, timeout=10)
        r.raise_for_status()
        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        # get episode title
        title_pattern = re.compile('Subt.+tulos de {}(.+){}x{:02d} - (.+)'.format(series, season, episode).lower())
        title = title_pattern.search(soup.select('#cabecera-subtitulo')[0].get_text().strip().lower()).group(2)

        # loop over subtitle rows
        subtitles = []

        for sub in soup.find_all('div', attrs={'id': re.compile('version([0-9]+)')}):
            # read the release subtitle
            release = sanitize_release_group(release_pattern.search(sub.find('p', class_='title-sub')
                                                                    .contents[2]).group(1))

            for html_language in sub.select('ul.sslist'):
                language = Language.fromtusubtitulo(html_language.find_next('b').get_text().strip())
                hearing_impaired = False

                # modify spanish latino subtitle language to only spanish and set hearing_impaired = True
                # because if exists spanish and spanish latino subtitle for the same episode, the score will be
                # higher with spanish subtitle. Spanish subtitle takes priority.
                if language == Language('spa', 'MX'):
                    language = Language('spa')
                    hearing_impaired = True

                # ignore incomplete subtitles
                status = sanitize(html_language.find_next('li', class_=re.compile('li-estado')).get_text())
                if status != 'completado':
                    logger.debug('Ignoring subtitle with status %s', status)
                    continue

                # get the most updated version of the subtitle and if it doesn't exist get the original version
                html_status = html_language.select('a[href^="updated/"]')
                if len(html_status) == 0:
                    html_status = html_language.select('a[href^="original/"]')

                subtitle_url = self.server_url + html_status[0]['href']
                subtitle = TuSubtituloSubtitle(language, hearing_impaired, episode_url, series, season, episode, title,
                                               year, release, subtitle_url)
                logger.debug('Found subtitle %r', subtitle)
                subtitles.append(subtitle)

        return subtitles

示例#20

0

显示文件

文件： subtitulamos.py 项目： xottl/SickChill

    def query(self, series, season, episode, year=None):
        # get the episode url
        episode_url = self._search_url_titles(series, season, episode, year)
        if episode_url is None:
            logger.info(
                f"[{self.provider_name}]: No episode url found for {series}, season {season}, episode {episode}"
            )
            return []

        r = self.session.get(episode_url,
                             headers={"Referer": self.server_url},
                             timeout=10)
        r.raise_for_status()
        soup = ParserBeautifulSoup(r.content, ["lxml", "html.parser"])

        # get episode title
        title_pattern = re.compile("{}(.+){}x{:02d}- (.+)".format(
            series, season, episode).lower())
        title = title_pattern.search(
            soup.select("#episode_title")[0].get_text().strip().lower()).group(
                2)

        subtitles = []
        for sub in soup.find_all("div", attrs={"id": "progress_buttons_row"}):
            # read the language
            language = Language.fromsubtitulamos(
                sub.find_previous(
                    "div", class_="subtitle_language").get_text().strip())
            hearing_impaired = False

            # modify spanish latino subtitle language to only spanish and set hearing_impaired = True
            # because if exists spanish and spanish latino subtitle for the same episode, the score will be
            # higher with spanish subtitle. Spanish subtitle takes priority.
            if language == Language("spa", "MX"):
                language = Language("spa")
                hearing_impaired = True

            # read the release subtitle
            release = sub.find_next("div",
                                    class_="version_name").get_text().strip()

            # ignore incomplete subtitles
            status = sub.find_next("div",
                                   class_="subtitle_buttons").contents[1]
            if status.name != "a":
                logger.debug("Ignoring subtitle in [%s] not finished",
                             language)
                continue

            # read the subtitle url
            subtitle_url = self.server_url + status["href"][1:]
            subtitle = SubtitulamosSubtitle(language, hearing_impaired,
                                            episode_url, series, season,
                                            episode, title, year, release,
                                            subtitle_url)
            logger.debug("Found subtitle %r", subtitle)
            subtitles.append(subtitle)

        return subtitles

示例#21

0

显示文件

文件： titrari.py 项目： vitiko98/bazarr

    def query(self, languages=None, title=None, imdb_id=None, video=None):
        subtitles = []

        params = self.getQueryParams(imdb_id, title)

        search_response = self.session.get(self.api_url, params=params, timeout=15)
        search_response.raise_for_status()

        if not search_response.content:
            logger.debug('[#### Provider: titrari.ro] No data returned from provider')
            return []

        soup = ParserBeautifulSoup(search_response.content.decode('utf-8', 'ignore'), ['lxml', 'html.parser'])

        # loop over subtitle cells
        rows = soup.select('td[rowspan=\'5\']')
        for index, row in enumerate(rows):
            result_anchor_el = row.select_one('a')

            # Download link
            href = result_anchor_el.get('href')
            download_link = self.api_url + href

            fullTitle = row.parent.find("h1").find("a").text

            #Get title
            try:
                title = fullTitle.split("(")[0]
            except:
                logger.error("[#### Provider: titrari.ro] Error parsing title.")

            # Get downloads count
            try:
                downloads = int(row.parent.parent.select("span")[index].text[12:])
            except:
                logger.error("[#### Provider: titrari.ro] Error parsing downloads.")

            # Get year
            try:
                year = int(fullTitle.split("(")[1].split(")")[0])
            except:
                year = None
                logger.error("[#### Provider: titrari.ro] Error parsing year.")

            # Get imdbId
            sub_imdb_id = self.getImdbIdFromSubtitle(row)

            try:
                comments = row.parent.parent.find_all("td", class_=re.compile("comment"))[index*2+1].text
            except:
                logger.error("Error parsing comments.")

            subtitle = self.subtitle_class(next(iter(languages)), download_link, index, None, title, sub_imdb_id, year, downloads, comments)
            logger.debug('[#### Provider: titrari.ro] Found subtitle %r', str(subtitle))
            subtitles.append(subtitle)

        ordered_subs = self.order(subtitles, video)

        return ordered_subs

示例#22

0

显示文件

文件： nekur.py 项目： zx900930/bazarr

    def query(self, title):
        subtitles = []

        data = {
            'ajax': '1',
            'sSearch': title,
        }

        r = self.session.post(self.search_url, data=data, timeout=10)
        r.raise_for_status()

        if not r.content:
            logger.debug('No data returned from provider')
            return []

        soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'),
                                   ['lxml', 'html.parser'])

        # loop over subtitle cells
        rows = soup.select('tbody > tr')
        for row in rows:
            # title
            title_anchor_el = row.select_one('.title > a')
            title_inner_text = [
                element for element in title_anchor_el
                if isinstance(element, NavigableString)
            ]
            title = title_inner_text[0].strip()

            # year
            year = row.select_one('.year').text.strip('()')

            # download link
            href = title_anchor_el.get('href')
            download_link = self.server_url + href

            # imdb id
            imdb_td = row.select_one('td:nth-of-type(4)')
            imdb_link = imdb_td.select_one('a').get('href')
            imdb_id = imdb_link.split('/')[-2]

            # fps
            fps = row.select_one('.fps').text.strip()

            # additional notes
            notes = row.select_one('.notes').text.strip()

            # page link = download link (there is no seperate subtitle page link)
            page_link = download_link

            # create/add the subitle
            subtitle = self.subtitle_class(Language.fromalpha2('lv'),
                                           page_link, download_link, title,
                                           year, imdb_id, fps, notes)
            logger.debug('nekur: Found subtitle %r', subtitle)
            subtitles.append(subtitle)

        return subtitles

示例#23

0

显示文件

文件： addic7ed.py 项目： halali/Auto-Subliminal

    def query(self, series, season, year=None, country=None):
        # get the show id
        show_id = self.get_show_id(series, year, country)
        if show_id is None:
            logger.error('No show id found for %r (%r)', series, {
                'year': year,
                'country': country
            })
            return []

        # get the page of the season of the show
        logger.info('Getting the page of show id %d, season %d', show_id,
                    season)
        r = self.session.get(self.server_url + 'show/%d' % show_id,
                             params={'season': season},
                             timeout=10)
        r.raise_for_status()

        if not r.content:
            # Provider returns a status of 304 Not Modified with an empty content
            # raise_for_status won't raise exception for that status code
            logger.debug('No data returned from provider')
            return []

        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        # loop over subtitle rows
        match = series_year_re.match(
            soup.select('#header font')[0].text.strip()[:-10])
        series = match.group('series')
        year = int(match.group('year')) if match.group('year') else None
        subtitles = []
        for row in soup.select('tr.epeven'):
            cells = row('td')

            # ignore incomplete subtitles
            status = cells[5].text
            if status != 'Completed':
                logger.debug('Ignoring subtitle with status %s', status)
                continue

            # read the item
            language = Language.fromaddic7ed(cells[3].text)
            hearing_impaired = bool(cells[6].text)
            page_link = self.server_url + cells[2].a['href'][1:]
            season = int(cells[0].text)
            episode = int(cells[1].text)
            title = cells[2].text
            version = cells[4].text
            download_link = cells[9].a['href'][1:]

            subtitle = self.subtitle_class(language, hearing_impaired,
                                           page_link, series, season, episode,
                                           title, year, version, download_link)
            logger.debug('Found subtitle %r', subtitle)
            subtitles.append(subtitle)

        return subtitles

示例#24

0

显示文件

文件： subtitulamos.py 项目： vdweegen/SickChill

    def query(self, series, season, episode, year=None):
        # get the episode url
        episode_url = self._search_url_titles(series, season, episode, year)
        if episode_url is None:
            logger.error('No episode url found for %s, season %d, episode %d',
                         series, season, episode)
            return []

        r = self.session.get(episode_url,
                             headers={'Referer': self.server_url},
                             timeout=10)
        r.raise_for_status()
        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        # get episode title
        title_pattern = re.compile('{}(.+){}x{:02d}- (.+)'.format(
            series, season, episode).lower())
        title = title_pattern.search(
            soup.select('#episode_title')[0].get_text().strip().lower()).group(
                2)

        subtitles = []
        for sub in soup.find_all('div', attrs={'id': 'progress_buttons_row'}):
            # read the language
            language = Language.fromsubtitulamos(
                sub.find_previous(
                    'div', class_='subtitle_language').get_text().strip())
            hearing_impaired = False

            # modify spanish latino subtitle language to only spanish and set hearing_impaired = True
            # because if exists spanish and spanish latino subtitle for the same episode, the score will be
            # higher with spanish subtitle. Spanish subtitle takes priority.
            if language == Language('spa', 'MX'):
                language = Language('spa')
                hearing_impaired = True

            # read the release subtitle
            release = sub.find_next('div',
                                    class_='version_name').get_text().strip()

            # ignore incomplete subtitles
            status = sub.find_next('div',
                                   class_='subtitle_buttons').contents[1]
            if status.name != 'a':
                logger.debug('Ignoring subtitle in [%s] not finished',
                             language)
                continue

            # read the subtitle url
            subtitle_url = self.server_url + status['href'][1:]
            subtitle = SubtitulamosSubtitle(language, hearing_impaired,
                                            episode_url, series, season,
                                            episode, title, year, release,
                                            subtitle_url)
            logger.debug('Found subtitle %r', subtitle)
            subtitles.append(subtitle)

        return subtitles

示例#25

0

显示文件

    def download_subtitle(self, subtitle):
        if isinstance(subtitle, Subs4FreeSubtitle):
            # download the subtitle
            logger.info('Downloading subtitle %r', subtitle)
            r = self.session.get(subtitle.download_link,
                                 headers={'Referer': subtitle.page_link},
                                 timeout=10)
            r.raise_for_status()

            if not r.content:
                logger.debug(
                    'Unable to download subtitle. No data returned from provider'
                )
                return

            soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
            download_element = soup.select_one('input[name="id"]')
            image_element = soup.select_one('input[type="image"]')
            subtitle_id = download_element[
                'value'] if download_element else None
            width = int(str(
                image_element['width']).strip('px')) if image_element else 0
            height = int(str(
                image_element['height']).strip('px')) if image_element else 0

            if not subtitle_id:
                logger.debug(
                    'Unable to download subtitle. No download link found')
                return

            self.apply_anti_block(subtitle)

            download_url = self.server_url + self.download_url
            r = self.session.post(download_url,
                                  data={
                                      'id': subtitle_id,
                                      'x': random.randint(0, width),
                                      'y': random.randint(0, height)
                                  },
                                  headers={'Referer': subtitle.download_link},
                                  timeout=10)
            r.raise_for_status()

            if not r.content:
                logger.debug(
                    'Unable to download subtitle. No data returned from provider'
                )
                return

            archive = _get_archive(r.content)

            subtitle_content = _get_subtitle_from_archive(
                archive) if archive else r.content

            if subtitle_content:
                subtitle.content = fix_line_ending(subtitle_content)
            else:
                logger.debug('Could not extract subtitle from %r', archive)

示例#26

0

显示文件

    def query(self, series, season, episode, year=None):
        # get the show id
        show_id = self.get_show_id(series, year)
        if show_id is None:
            logger.error("No show id found for %s (%r)", series, year)
            return []

        # get the episode url
        episode_url = self.get_episode_url(show_id, series, season, episode, year)
        if episode_url is None:
            logger.info(f"[{self.provider_name}]: No episode url found for {series}, season {season}, episode {episode}")
            return []

        # get the page of the episode of the show
        r = self.session.get(episode_url, timeout=10)
        r.raise_for_status()
        soup = ParserBeautifulSoup(r.content, ["lxml", "html.parser"])

        # get episode title
        title_pattern = re.compile("Subt.+tulos de {}(.+){}x{:02d} - (.+)".format(series, season, episode).lower())
        title = title_pattern.search(soup.select("#cabecera-subtitulo")[0].get_text().strip().lower()).group(2)

        # loop over subtitle rows
        subtitles = []

        for sub in soup.find_all("div", attrs={"id": re.compile("version([0-9]+)")}):
            # read the release subtitle
            release = sanitize_release_group(release_pattern.search(sub.find("p", class_="title-sub").contents[2]).group(1))

            for html_language in sub.select("ul.sslist"):
                language = Language.fromtusubtitulo(html_language.find_next("b").get_text().strip())
                hearing_impaired = False

                # modify spanish latino subtitle language to only spanish and set hearing_impaired = True
                # because if exists spanish and spanish latino subtitle for the same episode, the score will be
                # higher with spanish subtitle. Spanish subtitle takes priority.
                if language == Language("spa", "MX"):
                    language = Language("spa")
                    hearing_impaired = True

                # ignore incomplete subtitles
                status = sanitize(html_language.find_next("li", class_=re.compile("li-estado")).get_text())
                if status != "completado":
                    logger.debug("Ignoring subtitle with status %s", status)
                    continue

                # get the most updated version of the subtitle and if it doesn't exist get the original version
                html_status = html_language.select('a[href^="updated/"]')
                if len(html_status) == 0:
                    html_status = html_language.select('a[href^="original/"]')

                subtitle_url = self.server_url + html_status[0]["href"]
                subtitle = TuSubtituloSubtitle(language, hearing_impaired, episode_url, series, season, episode, title, year, release, subtitle_url)
                logger.debug("Found subtitle %r", subtitle)
                subtitles.append(subtitle)

        return subtitles

示例#27

0

显示文件

    def login(self):
        logger.debug('Legendasdivx.pt :: Logging in')
        try:
            # sleep for a 1 second before another request
            sleep(1)
            res = self.session.get(self.loginpage)
            res.raise_for_status()
            bsoup = ParserBeautifulSoup(res.content, ['lxml'])

            _allinputs = bsoup.findAll('input')
            data = {}
            # necessary to set 'sid' for POST request
            for field in _allinputs:
                data[field.get('name')] = field.get('value')

            # sleep for a 1 second before another request
            sleep(1)
            data['username'] = self.username
            data['password'] = self.password
            res = self.session.post(self.loginpage, data)
            res.raise_for_status()
            # make sure we're logged in
            logger.debug(
                'Legendasdivx.pt :: Logged in successfully: PHPSESSID: %s',
                self.session.cookies.get_dict()['PHPSESSID'])
            cj = self.session.cookies.copy()
            store_cks = ("PHPSESSID", "phpbb3_2z8zs_sid", "phpbb3_2z8zs_k",
                         "phpbb3_2z8zs_u", "lang")
            for cn in iter(self.session.cookies.keys()):
                if cn not in store_cks:
                    del cj[cn]
            # store session cookies on cache
            logger.debug(
                "Legendasdivx.pt :: Storing legendasdivx session cookies: %r",
                cj)
            region.set("legendasdivx_cookies2", cj)

        except KeyError:
            logger.error(
                "Legendasdivx.pt :: Couldn't get session ID, check your credentials"
            )
            raise AuthenticationError(
                "Legendasdivx.pt :: Couldn't get session ID, check your credentials"
            )
        except HTTPError as e:
            if "bloqueado" in res.text.lower():
                logger.error(
                    "LegendasDivx.pt :: Your IP is blocked on this server.")
                raise IPAddressBlocked(
                    "LegendasDivx.pt :: Your IP is blocked on this server.")
            logger.error("Legendasdivx.pt :: HTTP Error %s", e)
            raise TooManyRequests("Legendasdivx.pt :: HTTP Error %s", e)
        except Exception as e:
            logger.error("LegendasDivx.pt :: Uncaught error: %r", e)
            raise ServiceUnavailable("LegendasDivx.pt :: Uncaught error: %r",
                                     e)

示例#28

0

显示文件

    def _parse_subtitles_page(self, video, response, language):
        subtitles = []

        page_soup = ParserBeautifulSoup(
            response.content.decode("utf-8", "ignore"),
            ["lxml", "html.parser"])
        title_soups = page_soup.find_all("div",
                                         {"id": "menu_detalle_buscador"})
        body_soups = page_soup.find_all("div", {"id": "buscador_detalle"})
        episode = isinstance(video, Episode)

        for subtitle in range(0, len(title_soups)):
            title_soup, body_soup = title_soups[subtitle], body_soups[subtitle]
            # title
            title = _clean_title(title_soup.find("a").text)

            # Forced subtitles are not supported
            if title.lower().rstrip().endswith(("forzado", "forzados")):
                logger.debug("Skipping forced subtitles: %s", title)
                continue

            # Check movie title (if the video is a movie)
            if not episode and not _check_movie(video, title):
                continue

            # Data
            datos = body_soup.find("div", {
                "id": "buscador_detalle_sub_datos"
            }).text
            # Ignore multi-disc and non-srt subtitles
            if not any(item in datos for item in ("Cds:</b> 1", "SubRip")):
                continue

            spain = "/pais/7.gif" in datos
            language = Language.fromalpha2("es") if spain else Language(
                "spa", "MX")

            # description
            sub_details = body_soup.find("div", {
                "id": "buscador_detalle_sub"
            }).text
            description = sub_details.replace(",", " ")

            # uploader
            uploader = body_soup.find("a", {"class": "link1"}).text
            download_url = _get_download_url(body_soup)
            page_link = title_soup.find("a")["href"]

            subtitle = self.subtitle_class(language, video, page_link, title,
                                           description, uploader, download_url)

            logger.debug("Found subtitle %r", subtitle)
            subtitles.append(subtitle)

        return subtitles

示例#29

0

显示文件

    def query(self, keyword, season=None, episode=None, year=None):
        params = keyword
        if season and episode:
            params += ' S{season:02d}E{episode:02d}'.format(season=season,
                                                            episode=episode)
        elif year:
            params += ' {:4d}'.format(year)

        logger.debug('Searching subtitles %r', params)
        subtitles = []
        search_link = self.server_url + text_type(
            self.search_url).format(params)
        while True:
            r = self.session.get(search_link, timeout=30)
            r.raise_for_status()

            if not r.content:
                logger.debug('No data returned from provider')
                return []

            soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'),
                                       ['lxml', 'html.parser'])

            # loop over subtitles cells
            for cell in soup.select('td.latest_name > a:nth-of-type(1)'):
                # read the item
                subtitle_id = int(cell['href'].rsplit('/', 2)[1])
                page_link = cell['href']
                language = Language.fromalpha2(
                    cell.parent.find('img')['src'].split('/')[-1].split('.')
                    [0])
                version = cell.text.strip() or None
                if version is None:
                    version = ""

                subtitle = self.subtitle_class(
                    language, page_link, version,
                    self.download_url.format(subtitle_id))

                logger.debug('Found subtitle %r', subtitle)
                subtitles.append(subtitle)

            anchors = soup.select('td a')
            next_page_available = False
            for anchor in anchors:
                if 'Next' in anchor.text and 'search.php' in anchor['href']:
                    search_link = self.server_url + anchor['href']
                    next_page_available = True
                    break
            if not next_page_available:
                break

        return subtitles

示例#30

0

显示文件

    def download_subtitle(self, subtitle):
        if isinstance(subtitle, Subs4SeriesSubtitle):
            # download the subtitle
            logger.info('Downloading subtitle %r', subtitle)
            r = self.session.get(subtitle.download_link,
                                 headers={'Referer': subtitle.page_link},
                                 timeout=10)
            r.raise_for_status()

            if not r.content:
                logger.debug(
                    'Unable to download subtitle. No data returned from provider'
                )
                return

            soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])
            download_element = soup.select_one('a.style55ws')
            if not download_element:
                download_element = soup.select_one('form[method="post"]')
                target = download_element[
                    'action'] if download_element else None
            else:
                target = download_element['href']

            if not target:
                logger.debug(
                    'Unable to download subtitle. No download link found')
                return

            self.apply_anti_block(subtitle)

            download_url = self.server_url + target
            r = self.session.get(download_url,
                                 headers={'Referer': subtitle.download_link},
                                 timeout=10)
            r.raise_for_status()

            if not r.content:
                logger.debug(
                    'Unable to download subtitle. No data returned from provider'
                )
                return

            archive = _get_archive(r.content)
            subtitle_content = _get_subtitle_from_archive(
                archive) if archive else r.content

            if subtitle_content:
                subtitle.content = fix_line_ending(subtitle_content)
            else:
                logger.debug('Could not extract subtitle from %r', archive)

示例#31

0

显示文件

文件： addic7ed.py 项目： h3llrais3r/Auto-Subliminal

    def query(self, series, season, year=None, country=None):
        # get the show id
        show_id = self.get_show_id(series, year, country)
        if show_id is None:
            logger.error('No show id found for %r (%r)', series, {'year': year, 'country': country})
            return []

        # get the page of the season of the show
        logger.info('Getting the page of show id %d, season %d', show_id, season)
        r = self.session.get(self.server_url + 'show/%d' % show_id, params={'season': season}, timeout=10)
        r.raise_for_status()

        if not r.content:
            # Provider returns a status of 304 Not Modified with an empty content
            # raise_for_status won't raise exception for that status code
            logger.debug('No data returned from provider')
            return []

        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        # loop over subtitle rows
        match = series_year_re.match(soup.select('#header font')[0].text.strip()[:-10])
        series = match.group('series')
        year = int(match.group('year')) if match.group('year') else None
        subtitles = []
        for row in soup.select('tr.epeven'):
            cells = row('td')

            # ignore incomplete subtitles
            status = cells[5].text
            if status != 'Completed':
                logger.debug('Ignoring subtitle with status %s', status)
                continue

            # read the item
            language = Language.fromaddic7ed(cells[3].text)
            hearing_impaired = bool(cells[6].text)
            page_link = self.server_url + cells[2].a['href'][1:]
            season = int(cells[0].text)
            episode = int(cells[1].text)
            title = cells[2].text
            version = cells[4].text
            download_link = cells[9].a['href'][1:]

            subtitle = self.subtitle_class(language, hearing_impaired, page_link, series, season, episode, title, year,
                                           version, download_link)
            logger.debug('Found subtitle %r', subtitle)
            subtitles.append(subtitle)

        return subtitles

示例#32

0

显示文件

    def get_episode_url(self, show_id, series, season, episode, year=None):
        """Get the url best matching show id for `series`, `season`, `episode` and `year`.

        :param int show_id: show id of the series
        :param str series: serie of the episode.
        :param int season: season of the episode.
        :param int episode: number of the episode.
        :param int year: year of the series.
        :return: the episode url, if found.
        :rtype: str

        """
        # get the page of the season of the show
        logger.info('Getting the page of show id %d, season %d', show_id,
                    season)

        series_sanitized = sanitize(series)
        episode_url = None

        r = self.session.get(self.subtitles_url,
                             params={
                                 'show': show_id,
                                 'season': season
                             },
                             timeout=10)
        r.raise_for_status()
        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        # loop over episodes rows
        for html_episode in soup.select('td > a[href*="/episodes/"]'):
            title = sanitize(html_episode.get_text())

            # attempt series with year
            if sanitize('{} {} {}x{:02d}'.format(series_sanitized, year,
                                                 season, episode)) in title:
                episode_url = 'https://' + html_episode['href'][2:]
                logger.debug(
                    'Subtitle found for %s, season: %d, episode: %d. URL: %s',
                    series, season, episode, episode_url)
                break
            elif sanitize('{} {}x{:02d}'.format(series_sanitized, season,
                                                episode)) in title:
                episode_url = 'https://' + html_episode['href'][2:]
                logger.debug(
                    'Subtitle found for %s, season: %d, episode: %d. URL: %s',
                    series, season, episode, episode_url)
                break

        return episode_url

示例#33

0

显示文件

    def query(self, movie_id, title, year):
        # get the season list of the show
        logger.info('Getting the subtitle list of show id %s', movie_id)
        if movie_id:
            page_link = self.server_url + '/' + movie_id
        else:
            page_link = self.server_url + self.search_url.format(' '.join(
                [title, str(year)]))

        r = self.session.get(page_link, timeout=10)
        r.raise_for_status()

        if not r.content:
            logger.debug('No data returned from provider')
            return []

        soup = ParserBeautifulSoup(r.content, ['html.parser'])

        year = None
        year_element = soup.select_one('td#dates_header > table div')
        matches = False
        if year_element:
            matches = year_re.match(str(year_element.contents[2]).strip())
        if matches:
            year = int(matches.group(1))

        title_tag = soup.select_one('td#dates_header > table u')
        show_title = str(title_tag.contents[0]).strip() if title_tag else None

        subtitles = []
        # loop over episode rows
        for subs_tag in soup.select('.movie-details'):
            # read common info
            version = subs_tag.find('span').text
            download_link = self.server_url + subs_tag.find('a')['href']
            uploader = subs_tag.select_one('.movie-info').find('p').find(
                'a').text
            language_code = subs_tag.select_one('.sprite')['class'][1].split(
                'gif')[0]
            language = Language.fromietf(language_code)

            subtitle = self.subtitle_class(language, page_link, show_title,
                                           year, version, download_link,
                                           uploader)

            logger.debug('Found subtitle {!r}'.format(subtitle))
            subtitles.append(subtitle)

        return subtitles

示例#34

0

显示文件

    def _parse_subtitles_page(self, video, response, language):
        subtitles = []

        page_soup = ParserBeautifulSoup(
            response.content.decode("utf-8", "ignore"),
            ["lxml", "html.parser"])
        title_soups = page_soup.find_all("div",
                                         {"id": "menu_detalle_buscador"})
        body_soups = page_soup.find_all("div", {"id": "buscador_detalle"})

        for subtitle in range(0, len(title_soups)):
            title_soup, body_soup = title_soups[subtitle], body_soups[subtitle]
            # title
            title = self._clean_title(title_soup.find("a").text)
            # discard subtitles if a year between parenthesis is present in title and doesn't match the one provided
            # in video object
            if re.match(r'(\(\d{4}\))', title):
                if video.year and str(video.year) not in title:
                    continue

            # Data
            datos = body_soup.find("div", {
                "id": "buscador_detalle_sub_datos"
            }).text
            # Ignore multi-disc and non-srt subtitles
            if not any(item in datos for item in ("Cds:</b> 1", "SubRip")):
                continue

            spain = "/pais/7.gif" in datos
            language = Language.fromalpha2("es") if spain else Language(
                "spa", "MX")

            # description
            sub_details = body_soup.find("div", {
                "id": "buscador_detalle_sub"
            }).text
            description = sub_details.replace(",", " ").lower()

            # uploader
            uploader = body_soup.find("a", {"class": "link1"}).text
            page_link = title_soup.find("a")["href"]

            subtitle = self.subtitle_class(language, video, page_link, title,
                                           description, uploader)

            logger.debug("Found subtitle %r", subtitle)
            subtitles.append(subtitle)

        return subtitles

示例#35

0

显示文件

文件： zimuku.py 项目： rickytin/bazarr

    def download_subtitle(self, subtitle):
        if isinstance(subtitle, ZimukuSubtitle):
            # download the subtitle
            logger.info('Downloading subtitle %r', subtitle)
            r = self.session.get(subtitle.download_link,
                                 headers={'Referer': subtitle.page_link},
                                 timeout=30)
            r.raise_for_status()

            if not r.content:
                logger.debug(
                    'Unable to download subtitle. No data returned from provider'
                )
                return

            soup = ParserBeautifulSoup(r.content.decode('utf-8', 'ignore'),
                                       ['lxml', 'html.parser'])
            links = soup.find("div", {"class": "clearfix"}).find_all('a')
            # TODO: add settings for choice

            for down_link in links:
                url = down_link.get('href').encode('utf-8')
                url = self.server_url + url
                r = self.session.get(
                    url,
                    headers={'Referer': subtitle.download_link},
                    timeout=30)
                r.raise_for_status()

                if len(r.content) > 1024:
                    break

            archive_stream = io.BytesIO(r.content)
            archive = None
            if rarfile.is_rarfile(archive_stream):
                logger.debug('Identified rar archive')
                archive = rarfile.RarFile(archive_stream)
                subtitle_content = _get_subtitle_from_archive(archive)
            elif zipfile.is_zipfile(archive_stream):
                logger.debug('Identified zip archive')
                archive = zipfile.ZipFile(archive_stream)
                subtitle_content = _get_subtitle_from_archive(archive)
            else:
                subtitle_content = r.content

            if subtitle_content:
                subtitle.content = fix_line_ending(subtitle_content)
            else:
                logger.debug('Could not extract subtitle from %r', archive)

示例#36

0

显示文件

文件： tvsubtitles.py 项目： pannal/Sub-Zero.bundle

    def get_episode_ids(self, show_id, season):
        """Get episode ids from the show id and the season.

        :param int show_id: show id.
        :param int season: season of the episode.
        :return: episode ids per episode number.
        :rtype: dict

        """
        # get the page of the season of the show
        logger.info('Getting the page of show id %d, season %d', show_id, season)
        r = self.session.get(self.server_url + 'tvshow-%d-%d.html' % (show_id, season), timeout=10)
        soup = ParserBeautifulSoup(r.content, ['lxml', 'html.parser'])

        # loop over episode rows
        episode_ids = {}
        for row in soup.select('table#table5 tr'):
            # skip rows that do not have a link to the episode page
            if not row('a', href=episode_id_re):
                continue

            # extract data from the cells
            cells = row('td')
            episode = int(cells[0].text.split('x')[1])
            episode_id = int(cells[1].a['href'][8:-5])
            episode_ids[episode] = episode_id

        if episode_ids:
            logger.debug('Found episode ids %r', episode_ids)
        else:
            logger.warning('No episode ids found')

        soup.decompose()
        soup = None

        return episode_ids

示例#37

0

显示文件

文件： hosszupuska.py 项目： pannal/Sub-Zero.bundle

    def query(self, series, season, episode, year=None, video=None):

        # Search for s01e03 instead of s1e3
        seasona = "%02d" % season
        episodea = "%02d" % episode
        series = fix_inconsistent_naming(series)
        seriesa = series.replace(' ', '+')

        # get the episode page
        logger.info('Getting the page for episode %s', episode)
        url = self.server_url + "sorozatok.php?cim=" + seriesa + "&evad="+str(seasona) + \
            "&resz="+str(episodea)+"&nyelvtipus=%25&x=24&y=8"
        logger.info('Url %s', url)

        r = self.session.get(url, timeout=10).content

        i = 0
        soup = ParserBeautifulSoup(r, ['lxml'])

        table = soup.find_all("table")[9]

        subtitles = []
        # loop over subtitles rows
        for row in table.find_all("tr"):
            i = i + 1
            if "this.style.backgroundImage='url(css/over2.jpg)" in str(row) and i > 5:
                datas = row.find_all("td")

                # Currently subliminal not use these params, but maybe later will come in handy
                # hunagrian_name = re.split('s(\d{1,2})', datas[1].find_all('b')[0].getText())[0]
                # Translator of subtitle
                # sub_translator = datas[3].getText()
                # Posting date of subtitle
                # sub_date = datas[4].getText()

                sub_year = sub_english_name = sub_version = None
                # Handle the case when '(' in subtitle
                if datas[1].getText().count('(') == 1:
                    sub_english_name = re.split('s(\d{1,2})e(\d{1,2})', datas[1].getText())[3]
                if datas[1].getText().count('(') == 2:
                    sub_year = re.findall(r"(?<=\()(\d{4})(?=\))", datas[1].getText().strip())[0]
                    sub_english_name = re.split('s(\d{1,2})e(\d{1,2})', datas[1].getText().split('(')[0])[0]

                if not sub_english_name:
                    continue

                sub_season = int((re.findall('s(\d{1,2})', datas[1].find_all('b')[0].getText(), re.VERBOSE)[0])
                                 .lstrip('0'))
                sub_episode = int((re.findall('e(\d{1,2})', datas[1].find_all('b')[0].getText(), re.VERBOSE)[0])
                                  .lstrip('0'))

                if sub_season == season and sub_episode == episode:
                    sub_language = self.get_language(datas[2].find_all('img')[0]['src'].split('/')[1])
                    sub_downloadlink = datas[6].find_all('a')[1]['href']
                    sub_id = sub_downloadlink.split('=')[1].split('.')[0]

                    if datas[1].getText().count('(') == 1:
                        sub_version = datas[1].getText().split('(')[1].split(')')[0]
                    if datas[1].getText().count('(') == 2:
                        sub_version = datas[1].getText().split('(')[2].split(')')[0]

                    # One subtitle can be used for several releases
                    sub_releases = [s.strip() for s in sub_version.split(',')]
                    subtitle = self.subtitle_class(sub_language, sub_downloadlink, sub_id, sub_english_name.strip(),
                                                   sub_season, sub_episode, sub_version, sub_releases, sub_year,
                                                   asked_for_release_group=video.release_group,
                                                   asked_for_episode=episode)

                    logger.debug('Found subtitle: %r', subtitle)
                    subtitles.append(subtitle)

        return subtitles