Exemplo n.º 1
0
 def test_parse_xml_encoding(self):
     tree = parse_xml("""<?xml version="1.0" encoding="UTF-8"?><test>ä</test>""")
     self.assertEqual(tree.xpath(".//text()"), ["ä"])
     tree = parse_xml("""<test>ä</test>""")
     self.assertEqual(tree.xpath(".//text()"), ["ä"])
     tree = parse_xml(b"""<?xml version="1.0" encoding="UTF-8"?><test>\xC3\xA4</test>""")
     self.assertEqual(tree.xpath(".//text()"), ["ä"])
     tree = parse_xml(b"""<test>\xC3\xA4</test>""")
     self.assertEqual(tree.xpath(".//text()"), ["ä"])
Exemplo n.º 2
0
    def test_parse_xml_ns_ignore(self):
        expected = Element("test", {"foo": "bar"})
        actual = parse_xml("""<test foo="bar" xmlns="foo:bar"/>""", ignore_ns=True)
        self.assertEqual(expected.tag, actual.tag)
        self.assertEqual(expected.attrib, actual.attrib)

        actual = parse_xml("""<test	foo="bar"	xmlns="foo:bar"/>""", ignore_ns=True)
        self.assertEqual(expected.tag, actual.tag)
        self.assertEqual(expected.attrib, actual.attrib)

        actual = parse_xml("""<test\nfoo="bar"\nxmlns="foo:bar"/>""", ignore_ns=True)
        self.assertEqual(expected.tag, actual.tag)
        self.assertEqual(expected.attrib, actual.attrib)
Exemplo n.º 3
0
 def test_parse_xml_validate(self):
     expected = Element("test", {"foo": "bar"})
     actual = parse_xml(
         """<test foo="bar"/>""",
         schema=validate.Schema(xml_element(tag="test", attrib={"foo": str}))
     )
     self.assertEqual(expected.tag, actual.tag)
     self.assertEqual(expected.attrib, actual.attrib)
Exemplo n.º 4
0
 def test_parse_xml_entities(self):
     expected = Element("test", {"foo": "bar &"})
     actual = parse_xml(
         """<test foo="bar &"/>""",
         schema=validate.Schema(xml_element(tag="test", attrib={"foo": str})),
         invalid_char_entities=True
     )
     self.assertEqual(expected.tag, actual.tag)
     self.assertEqual(expected.attrib, actual.attrib)
Exemplo n.º 5
0
    def parse_manifest(cls, session, url_or_manifest: str,
                       **args) -> Dict[str, "DASHStream"]:
        """
        Parse a DASH manifest file and return its streams.

        :param streamlink.Streamlink session: Streamlink session instance
        :param url_or_manifest: URL of the manifest file or an XML manifest string
        :param args: Additional keyword arguments passed to :meth:`requests.Session.request`
        """

        if url_or_manifest.startswith('<?xml'):
            mpd = MPD(parse_xml(url_or_manifest, ignore_ns=True))
        else:
            res = session.http.get(url_or_manifest,
                                   **session.http.valid_request_args(**args))
            url = res.url

            urlp = list(urlparse(url))
            urlp[2], _ = urlp[2].rsplit("/", 1)

            mpd = MPD(session.http.xml(res, ignore_ns=True),
                      base_url=urlunparse(urlp),
                      url=url)

        video, audio = [], []

        # Search for suitable video and audio representations
        for aset in mpd.periods[0].adaptationSets:
            if aset.contentProtection:
                raise PluginError("{} is protected by DRM".format(url))
            for rep in aset.representations:
                if rep.mimeType.startswith("video"):
                    video.append(rep)
                elif rep.mimeType.startswith("audio"):
                    audio.append(rep)

        if not video:
            video = [None]

        if not audio:
            audio = [None]

        locale = session.localization
        locale_lang = locale.language
        lang = None
        available_languages = set()

        # if the locale is explicitly set, prefer that language over others
        for aud in audio:
            if aud and aud.lang:
                available_languages.add(aud.lang)
                try:
                    if locale.explicit and aud.lang and Language.get(
                            aud.lang) == locale_lang:
                        lang = aud.lang
                except LookupError:
                    continue

        if not lang:
            # filter by the first language that appears
            lang = audio[0] and audio[0].lang

        log.debug(
            "Available languages for DASH audio streams: {0} (using: {1})".
            format(", ".join(available_languages) or "NONE", lang or "n/a"))

        # if the language is given by the stream, filter out other languages that do not match
        if len(available_languages) > 1:
            audio = list(
                filter(lambda a: a.lang is None or a.lang == lang, audio))

        ret = []
        for vid, aud in itertools.product(video, audio):
            stream = DASHStream(session, mpd, vid, aud, **args)
            stream_name = []

            if vid:
                stream_name.append("{:0.0f}{}".format(
                    vid.height or vid.bandwidth_rounded,
                    "p" if vid.height else "k"))
            if audio and len(audio) > 1:
                stream_name.append("a{:0.0f}k".format(aud.bandwidth))
            ret.append(('+'.join(stream_name), stream))

        # rename duplicate streams
        dict_value_list = defaultdict(list)
        for k, v in ret:
            dict_value_list[k].append(v)

        def sortby_bandwidth(dash_stream: DASHStream) -> int:
            if dash_stream.video_representation:
                return dash_stream.video_representation.bandwidth
            if dash_stream.audio_representation:
                return dash_stream.audio_representation.bandwidth
            return 0  # pragma: no cover

        ret_new = {}
        for q in dict_value_list:
            items = dict_value_list[q]

            try:
                items = sorted(items, key=sortby_bandwidth, reverse=True)
            except AttributeError:
                pass

            for n in range(len(items)):
                if n == 0:
                    ret_new[q] = items[n]
                elif n == 1:
                    ret_new[f'{q}_alt'] = items[n]
                else:
                    ret_new[f'{q}_alt{n}'] = items[n]
        return ret_new
Exemplo n.º 6
0
 def xml(cls, res, *args, **kwargs):
     """Parses XML from a response."""
     return parse_xml(res.text, *args, **kwargs)
Exemplo n.º 7
0
 def test_parse_xml_ns(self):
     expected = Element("{foo:bar}test", {"foo": "bar"})
     actual = parse_xml("""<h:test foo="bar" xmlns:h="foo:bar"/>""")
     self.assertEqual(expected.tag, actual.tag)
     self.assertEqual(expected.attrib, actual.attrib)