示例#1
0
    def init(self):
        """Descramble the stream data and build Stream instances.

        The initialization process takes advantage of Python's
        "call-by-reference evaluation," which allows dictionary transforms to
        be applied in-place, instead of holding references to mutations at each
        interstitial step.

        :rtype: None

        """
        logger.info('init started')

        self.vid_info = {k: v for k, v in parse_qsl(self.vid_info)}
        if self.age_restricted:
            self.player_config_args = self.vid_info
        else:
            self.player_config_args = extract.get_ytplayer_config(
                self.watch_html, )['args']

            # Fix for KeyError: 'title' issue #434
            if 'title' not in self.player_config_args:
                i_start = (self.watch_html.lower().index('<title>') +
                           len('<title>'))
                i_end = self.watch_html.lower().index('</title>')
                title = self.watch_html[i_start:i_end].strip()
                index = title.lower().rfind(' - youtube')
                title = title[:index] if index > 0 else title
                self.player_config_args['title'] = title

        self.vid_descr = extract.get_vid_descr(self.watch_html)
        # https://github.com/nficano/pytube/issues/165
        stream_maps = ['url_encoded_fmt_stream_map']
        if 'adaptive_fmts' in self.player_config_args:
            stream_maps.append('adaptive_fmts')

        # unscramble the progressive and adaptive stream manifests.
        for fmt in stream_maps:
            if not self.age_restricted and fmt in self.vid_info:
                mixins.apply_descrambler(self.vid_info, fmt)
            mixins.apply_descrambler(self.player_config_args, fmt)

            try:
                mixins.apply_signature(self.player_config_args, fmt, self.js)
            except TypeError:
                self.js_url = extract.js_url(
                    self.embed_html,
                    self.age_restricted,
                )
                self.js = request.get(self.js_url)
                mixins.apply_signature(self.player_config_args, fmt, self.js)

            # build instances of :class:`Stream <Stream>`
            self.initialize_stream_objects(fmt)

        # load the player_response object (contains subtitle information)
        apply_mixin(self.player_config_args, 'player_response', json.loads)

        self.initialize_caption_objects()
        logger.info('init finished successfully')
示例#2
0
def apply_descrambler(stream_data, key):
    """Apply various in-place transforms to YouTube's media stream data.

    Creates a ``list`` of dictionaries by string splitting on commas, then
    taking each list item, parsing it as a query string, converting it to a
    ``dict`` and unquoting the value.

    :param dict dct:
        Dictionary containing query string encoded values.
    :param str key:
        Name of the key in dictionary.

    **Example**:

    >>> d = {'foo': 'bar=1&var=test,em=5&t=url%20encoded'}
    >>> apply_descrambler(d, 'foo')
    >>> print(d)
    {'foo': [{'bar': '1', 'var': 'test'}, {'em': '5', 't': 'url encoded'}]}

    """
    stream_data[key] = [{k: unquote(v)
                         for k, v in parse_qsl(i)}
                        for i in stream_data[key].split(',')]
    logger.debug(
        'applying descrambler\n%s',
        pprint.pformat(stream_data[key], indent=2),
    )
示例#3
0
文件: mixins.py 项目: nficano/pytube
def apply_descrambler(stream_data, key):
    """Apply various in-place transforms to YouTube's media stream data.

    Creates a ``list`` of dictionaries by string splitting on commas, then
    taking each list item, parsing it as a query string, converting it to a
    ``dict`` and unquoting the value.

    :param dict dct:
        Dictionary containing query string encoded values.
    :param str key:
        Name of the key in dictionary.

    **Example**:

    >>> d = {'foo': 'bar=1&var=test,em=5&t=url%20encoded'}
    >>> apply_descrambler(d, 'foo')
    >>> print(d)
    {'foo': [{'bar': '1', 'var': 'test'}, {'em': '5', 't': 'url encoded'}]}

    """
    stream_data[key] = [
        {k: unquote(v) for k, v in parse_qsl(i)}
        for i in stream_data[key].split(',')
    ]
    logger.debug(
        'applying descrambler\n%s',
        pprint.pformat(stream_data[key], indent=2),
    )
示例#4
0
    def init(self):
        logger.info('init started')

        self.vid_info = {k: v for k, v in parse_qsl(self.vid_info)}
        if self.age_restricted:
            self.player_config_args = self.vid_info
        else:
            self.player_config_args = extract.get_ytplayer_config(
                self.watch_html, )['args']

            # ---> ADD THIS PART <---
            if 'title' not in self.player_config_args:
                # for more reliability when parsing, we may use a trained parser
                try:
                    from bs4 import BeautifulSoup
                    soup = BeautifulSoup(self.watch_html, 'lxml')
                    title = soup.title.get_text().strip()
                except ModuleNotFoundError:
                    # since this parsing is actually pretty simple, we may just
                    # parse it using index()
                    i_start = self.watch_html.lower().index('<title>') + len(
                        '<title>')
                    i_end = self.watch_html.lower().index('</title>')
                    title = self.watch_html[i_start:i_end].strip()
                # remove the ' - youtube' part that is added to the browser tab's title
                index = title.lower().rfind(' - youtube')
                title = title[:index] if index > 0 else title
                self.player_config_args['title'] = title
            # / ---> ADD THIS PART <---

        self.vid_descr = extract.get_vid_descr(self.watch_html)
        # https://github.com/nficano/pytube/issues/165
        stream_maps = ['url_encoded_fmt_stream_map']
        if 'adaptive_fmts' in self.player_config_args:
            stream_maps.append('adaptive_fmts')

        # unscramble the progressive and adaptive stream manifests.
        for fmt in stream_maps:
            if not self.age_restricted and fmt in self.vid_info:
                mixins.apply_descrambler(self.vid_info, fmt)
            mixins.apply_descrambler(self.player_config_args, fmt)

            try:
                mixins.apply_signature(self.player_config_args, fmt, self.js)
            except TypeError:
                self.js_url = extract.js_url(
                    self.embed_html,
                    self.age_restricted,
                )
                self.js = request.get(self.js_url)
                mixins.apply_signature(self.player_config_args, fmt, self.js)

            # build instances of :class:`Stream <Stream>`
            self.initialize_stream_objects(fmt)

        # load the player_response object (contains subtitle information)
        apply_mixin(self.player_config_args, 'player_response', json.loads)

        self.initialize_caption_objects()
        logger.info('init finished successfully')
示例#5
0
def apply_descrambler(stream_data, key):
    """Apply various in-place transforms to YouTube's media stream data.

    Creates a ``list`` of dictionaries by string splitting on commas, then
    taking each list item, parsing it as a query string, converting it to a
    ``dict`` and unquoting the value.

    :param dict dct:
        Dictionary containing query string encoded values.
    :param str key:
        Name of the key in dictionary.

    **Example**:

    >>> d = {'foo': 'bar=1&var=test,em=5&t=url%20encoded'}
    >>> apply_descrambler(d, 'foo')
    >>> print(d)
    {'foo': [{'bar': '1', 'var': 'test'}, {'em': '5', 't': 'url encoded'}]}

    """
    if key == 'url_encoded_fmt_stream_map' and not stream_data.get(
            'url_encoded_fmt_stream_map'):
        formats = json.loads(
            stream_data['player_response'])['streamingData']['formats']
        formats.extend(
            json.loads(stream_data['player_response'])['streamingData']
            ['adaptiveFormats'])
        try:
            stream_data[key] = [{
                u'url': format_item[u'url'],
                u'type': format_item[u'mimeType'],
                u'quality': format_item[u'quality'],
                u'itag': format_item[u'itag']
            } for format_item in formats]
        except KeyError:
            cipher_url = [
                parse_qs(formats[i]['cipher'])
                for i, data in enumerate(formats)
            ]
            stream_data[key] = [{
                u'url': cipher_url[i][u'url'][0],
                u's': cipher_url[i][u's'][0],
                u'type': format_item[u'mimeType'],
                u'quality': format_item[u'quality'],
                u'itag': format_item[u'itag']
            } for i, format_item in enumerate(formats)]
    else:
        stream_data[key] = [{k: unquote(v)
                             for k, v in parse_qsl(i)}
                            for i in stream_data[key].split(',')]
    logger.debug(
        'applying descrambler\n%s',
        pprint.pformat(stream_data[key], indent=2),
    )
示例#6
0
    def init(self):
        """Descramble the stream data and build Stream instances.

        The initialization process takes advantage of Python's
        "call-by-reference evaluation," which allows dictionary transforms to
        be applied in-place, instead of holding references to mutations at each
        interstitial step.

        :rtype: None

        """
        logger.info('init started')

        self.vid_info = {k: v for k, v in parse_qsl(self.vid_info)}
        if self.age_restricted:
            self.player_config_args = self.vid_info
        else:
            self.player_config_args = extract.get_ytplayer_config(
                self.watch_html,
            )['args']

        # https://github.com/nficano/pytube/issues/165
        stream_maps = ['url_encoded_fmt_stream_map']
        if 'adaptive_fmts' in self.player_config_args:
            stream_maps.append('adaptive_fmts')

        # unscramble the progressive and adaptive stream manifests.
        for fmt in stream_maps:
            if not self.age_restricted and fmt in self.vid_info:
                mixins.apply_descrambler(self.vid_info, fmt)
            mixins.apply_descrambler(self.player_config_args, fmt)

            # apply the signature to the download url.
            mixins.apply_signature(self.player_config_args, fmt, self.js)

            # build instances of :class:`Stream <Stream>`
            self.initialize_stream_objects(fmt)

        # load the player_response object (contains subtitle information)
        apply_mixin(self.player_config_args, 'player_response', json.loads)

        self.initialize_caption_objects()
        logger.info('init finished successfully')
示例#7
0
    def init(self):
        """Descramble the stream data and build Stream instances.

        The initialization process takes advantage of Python's
        "call-by-reference evaluation," which allows dictionary transforms to
        be applied in-place, instead of holding references to mutations at each
        interstitial step.

        :rtype: None

        """
        logger.info('init started')

        self.vid_info = {k: v for k, v in parse_qsl(self.vid_info)}
        if self.age_restricted:
            self.player_config_args = self.vid_info
        else:
            self.player_config_args = extract.get_ytplayer_config(
                self.watch_html, )['args']

        # https://github.com/nficano/pytube/issues/165
        stream_maps = ['url_encoded_fmt_stream_map']
        if 'adaptive_fmts' in self.player_config_args:
            stream_maps.append('adaptive_fmts')

        # unscramble the progressive and adaptive stream manifests.
        for fmt in stream_maps:
            if not self.age_restricted and fmt in self.vid_info:
                mixins.apply_descrambler(self.vid_info, fmt)
            mixins.apply_descrambler(self.player_config_args, fmt)

            # apply the signature to the download url.
            mixins.apply_signature(self.player_config_args, fmt, self.js)

            # build instances of :class:`Stream <Stream>`
            self.initialize_stream_objects(fmt)

        # load the player_response object (contains subtitle information)
        apply_mixin(self.player_config_args, 'player_response', json.loads)

        self.initialize_caption_objects()
        logger.info('init finished successfully')
示例#8
0
    def init(self):
        """Descramble the stream data and build Stream instances.

        The initialization process takes advantage of Python's
        "call-by-reference evaluation," which allows dictionary transforms to
        be applied in-place, instead of holding references to mutations at each
        interstitial step.

        :rtype: None

        """
        logger.info('init started')

        self.vid_info = {k: v for k, v in parse_qsl(self.vid_info)}
        self.player_config = extract.get_ytplayer_config(self.watch_html)

        progressive_fmts = 'url_encoded_fmt_stream_map'
        adaptive_fmts = 'adaptive_fmts'
        config_args = self.player_config['args']

        # unscramble the progressive and adaptive stream manifests.
        mixins.apply_descrambler(self.vid_info, progressive_fmts)
        mixins.apply_descrambler(self.vid_info, adaptive_fmts)
        mixins.apply_descrambler(config_args, progressive_fmts)
        mixins.apply_descrambler(config_args, adaptive_fmts)

        # apply the signature to the download url.
        mixins.apply_signature(config_args, progressive_fmts, self.js)
        mixins.apply_signature(config_args, adaptive_fmts, self.js)

        # load the player_response object (contains subtitle information)
        apply_mixin(config_args, 'player_response', json.loads)

        # build instances of :class:`Stream <Stream>`
        self.initialize_stream_objects(progressive_fmts)
        self.initialize_stream_objects(adaptive_fmts)

        self.initialize_caption_objects()
        logger.info('init finished successfully')
示例#9
0
def apply_descrambler(stream_data, key):
    """Apply various in-place transforms to YouTube's media stream data.

    Creates a ``list`` of dictionaries by string splitting on commas, then
    taking each list item, parsing it as a query string, converting it to a
    ``dict`` and unquoting the value.

    :param dict dct:
        Dictionary containing query string encoded values.
    :param str key:
        Name of the key in dictionary.

    **Example**:

    >>> d = {'foo': 'bar=1&var=test,em=5&t=url%20encoded'}
    >>> apply_descrambler(d, 'foo')
    >>> print(d)
    {'foo': [{'bar': '1', 'var': 'test'}, {'em': '5', 't': 'url encoded'}]}

    """
    import urllib.parse

    if key == "url_encoded_fmt_stream_map" and not stream_data.get(
            "url_encoded_fmt_stream_map"):

        try:
            formats = json.loads(
                stream_data["player_response"])["streamingData"]["formats"]
            formats.extend(
                json.loads(stream_data["player_response"])["streamingData"]
                ["adaptiveFormats"])
        except BaseException:
            formats = json.loads(stream_data["player_response"]
                                 )["streamingData"]["adaptiveFormats"]
        try:
            stream_data[key] = [{
                u"url": format_item[u"url"],
                u"type": format_item[u"mimeType"],
                u"quality": format_item[u"quality"],
                u"itag": format_item[u"itag"],
            } for format_item in formats]
        except BaseException:
            stream_data[key] = [{
                u"url":
                urllib.parse.unquote([
                    url_item for url_item in format_item[u"cipher"].split("&")
                    if "url=" in url_item
                ][0].split("=")[1]),
                u"sp":
                urllib.parse.unquote([
                    url_item for url_item in format_item[u"cipher"].split("&")
                    if "sp=" in url_item
                ][0].split("=")[1]),
                u"s":
                urllib.parse.unquote([
                    url_item for url_item in format_item[u"cipher"].split("&")
                    if "s=" in url_item
                ][0].split("=")[1]),
                u"type":
                format_item[u"mimeType"],
                u"quality":
                format_item[u"quality"],
                u"itag":
                format_item[u"itag"],
            } for format_item in formats]
    else:
        stream_data[key] = [{k: unquote(v)
                             for k, v in parse_qsl(i)}
                            for i in stream_data[key].split(",")]
    logger.debug(
        "applying descrambler\n%s",
        pprint.pformat(stream_data[key], indent=2),
    )
示例#10
0
    :param dict dct:
        Dictionary containing query string encoded values.
    :param str key:
        Name of the key in dictionary.

    **Example**:

    >>> d = {'foo': 'bar=1&var=test,em=5&t=url%20encoded'}
    >>> apply_descrambler(d, 'foo')
    >>> print(d)
    {'foo': [{'bar': '1', 'var': 'test'}, {'em': '5', 't': 'url encoded'}]}

    """
     if key == 'url_encoded_fmt_stream_map' and not stream_data.get('url_encoded_fmt_stream_map'):
        formats = json.loads(stream_data['player_response'])['streamingData']['formats']
        formats.extend(json.loads(stream_data['player_response'])['streamingData']['adaptiveFormats'])
        stream_data[key] = [{u'url': format_item[u'url'],
                             u'type': format_item[u'mimeType'],
                             u'quality': format_item[u'quality'],
                             u'itag': format_item[u'itag']} for format_item in formats]
    else:
        stream_data[key] = [
            {k: unquote(v) for k, v in parse_qsl(i)}
            for i in stream_data[key].split(',')
        ]
    logger.debug(
        'applying descrambler\n%s',
        pprint.pformat(stream_data[key], indent=2),
    )