示例#1
0
    def test_xml_element(self):
        el = Element("tag", attrib={"key": "value"})
        el.text = "test"

        assert validate(xml_element("tag"), el).tag == "tag"
        assert validate(xml_element(text="test"), el).text == "test"
        assert validate(xml_element(attrib={"key": text}), el).attrib == {"key": "value"}
示例#2
0
    def test_xml_element(self):
        el = Element("tag")
        el.set("key", "value")
        el.text = "test"
        childA = Element("childA")
        childB = Element("childB")
        el.append(childA)
        el.append(childB)

        upper = transform(str.upper)
        newelem: Element = validate(
            xml_element(tag=upper, text=upper, attrib={upper: upper}), el)

        assert newelem.tag == "TAG"
        assert newelem.text == "TEST"
        assert newelem.attrib == {"KEY": "VALUE"}
        assert list(newelem.iterchildren()) == [childA, childB]

        with self.assertRaises(ValueError) as cm:
            validate(xml_element(tag="invalid"), el)
        assert str(cm.exception).startswith("Unable to validate XML tag: ")

        with self.assertRaises(ValueError) as cm:
            validate(xml_element(text="invalid"), el)
        assert str(cm.exception).startswith("Unable to validate XML text: ")

        with self.assertRaises(ValueError) as cm:
            validate(xml_element(attrib={"key": "invalid"}), el)
        assert str(
            cm.exception).startswith("Unable to validate XML attributes: ")
示例#3
0
    def test_xml_element(self):
        el = Element("tag", attrib={"key": "value"})
        el.text = "test"

        assert validate(xml_element("tag"), el).tag == "tag"
        assert validate(xml_element(text="test"), el).text == "test"
        assert validate(xml_element(attrib={"key": text}), el).attrib == {"key": "value"}
示例#4
0
 def test_parse_xml_validate(self):
     expected = ET.Element("test", {"foo": "bar"})
     actual = parse_xml(u"""<test foo="bar"/>""",
                        schema=validate.Schema(
                            xml_element(tag="test", attrib={"foo": text})))
     self.assertEqual(expected.tag, actual.tag)
     self.assertEqual(expected.attrib, actual.attrib)
示例#5
0
 def test_parse_xml_entities(self):
     expected = ET.Element("test", {"foo": "bar &"})
     actual = parse_xml(u"""<test foo="bar &"/>""",
                        schema=validate.Schema(xml_element(tag="test", attrib={"foo": text})),
                        invalid_char_entities=True)
     self.assertEqual(expected.tag, actual.tag)
     self.assertEqual(expected.attrib, actual.attrib)
示例#6
0
 def test_parse_xml_entities(self):
     expected = ET.Element("test", {"foo": "bar &"})
     actual = parse_xml("""<test foo="bar &"/>""",
                        schema=validate.Schema(xml_element(tag="test", attrib={"foo": text})),
                        invalid_char_entities=True)
     self.assertEqual(expected.tag, actual.tag)
     self.assertEqual(expected.attrib, actual.attrib)
示例#7
0
 def test_failure_schema(self):
     with pytest.raises(validate.ValidationError) as cm:
         validate.validate(validate.xml_element(), "not-an-element")
     assert_validationerror(
         cm.value, """
         ValidationError(Callable):
           iselement('not-an-element') is not true
     """)
示例#8
0
class RTE(Plugin):
    VOD_API_URL = 'http://www.rte.ie/rteavgen/getplaylist/?type=web&format=json&id={0}'
    LIVE_API_URL = 'http://feeds.rasset.ie/livelistings/playlist'

    _url_re = re.compile(
        r'http://www\.rte\.ie/player/[a-z0-9]+/(?:show/[a-z-]+-[0-9]+/(?P<video_id>[0-9]+)|live/(?P<channel_id>[0-9]+))'
    )

    _vod_api_schema = validate.Schema({
        'current_date':
        validate.text,
        'shows':
        validate.Schema(
            list,
            validate.length(1),
            validate.get(0),
            validate.Schema({
                'valid_start':
                validate.text,
                'valid_end':
                validate.text,
                'media:group':
                validate.Schema(
                    list,
                    validate.length(1),
                    validate.get(0),
                    validate.Schema(
                        {
                            'hls_server': validate.url(),
                            'hls_url': validate.text,
                            'hds_server': validate.url(),
                            'hds_url': validate.text,
                            # API returns RTMP streams that don't seem to work, ignore them
                            # 'url': validate.any(
                            #     validate.url(scheme="rtmp"),
                            #     validate.url(scheme="rtmpe")
                            # )
                        },
                        validate.transform(lambda x: [
                            x['hls_server'] + x['hls_url'], x['hds_server'] +
                            x['hds_url']
                        ])),
                ),
            }),
        )
    })

    _live_api_schema = validate.Schema(
        validate.xml_findall('.//{http://search.yahoo.com/mrss/}content'), [
            validate.all(validate.xml_element(attrib={'url': validate.url()}),
                         validate.get('url'))
        ])
    _live_api_iphone_schema = validate.Schema(
        list, validate.length(1), validate.get(0),
        validate.Schema({'fullUrl': validate.any(validate.url(), 'none')},
                        validate.get('fullUrl')))

    @classmethod
    def can_handle_url(cls, url):
        return RTE._url_re.match(url)

    def _get_streams(self):
        match = self._url_re.match(self.url)
        video_id = match.group('video_id')

        if video_id is not None:
            # VOD
            res = http.get(self.VOD_API_URL.format(video_id))
            stream_data = http.json(res, schema=self._vod_api_schema)

            # Check whether video format is expired
            current_date = datetime.strptime(stream_data['current_date'],
                                             '%Y-%m-%dT%H:%M:%S.%f')
            valid_start = datetime.strptime(
                stream_data['shows']['valid_start'], '%Y-%m-%dT%H:%M:%S')
            valid_end = datetime.strptime(stream_data['shows']['valid_end'],
                                          '%Y-%m-%dT%H:%M:%S')
            if current_date < valid_start or current_date > valid_end:
                self.logger.error(
                    'Failed to access stream, may be due to expired content')
                return

            streams = stream_data['shows']['media:group']
        else:
            # Live
            channel_id = match.group('channel_id')
            # Get live streams for desktop
            res = http.get(self.LIVE_API_URL, params={'channelid': channel_id})
            streams = http.xml(res, schema=self._live_api_schema)

            # Get HLS streams for Iphone
            res = http.get(self.LIVE_API_URL,
                           params={
                               'channelid': channel_id,
                               'platform': 'iphone'
                           })
            stream = http.json(res, schema=self._live_api_iphone_schema)
            if stream != 'none':
                streams.append(stream)

        for stream in streams:
            if '.f4m' in stream:
                for s in HDSStream.parse_manifest(self.session,
                                                  stream).items():
                    yield s
            if '.m3u8' in stream:
                for s in HLSStream.parse_variant_playlist(
                        self.session, stream).items():
                    yield s
示例#9
0
                path=validate.endswith(".m3u8")
            ),
        }, None)
    },
    validate.optional("playerUri"): validate.text,
    validate.optional("viewerPlusSwfUrl"): validate.url(scheme="http"),
    validate.optional("lsPlayerSwfUrl"): validate.text,
    validate.optional("hdPlayerSwfUrl"): validate.text
})
_smil_schema = validate.Schema(validate.union({
    "http_base": validate.all(
        validate.xml_find("{http://www.w3.org/2001/SMIL20/Language}head/"
                          "{http://www.w3.org/2001/SMIL20/Language}meta"
                          "[@name='httpBase']"),
        validate.xml_element(attrib={
            "content": validate.text
        }),
        validate.get("content")
    ),
    "videos": validate.all(
        validate.xml_findall("{http://www.w3.org/2001/SMIL20/Language}body/"
                             "{http://www.w3.org/2001/SMIL20/Language}switch/"
                             "{http://www.w3.org/2001/SMIL20/Language}video"),
        [
            validate.all(
                validate.xml_element(attrib={
                    "src": validate.text,
                    "system-bitrate": validate.all(
                        validate.text,
                        validate.transform(int)
                    )
class DeutscheWelle(Plugin):
    default_channel = "1"
    url_re = re.compile(r"https?://(?:www\.)?dw\.com/")

    channel_re = re.compile(r'''<a.*?data-id="(\d+)".*?class="ici"''')
    live_stream_div = re.compile(
        r'''
        <div\s+class="mediaItem"\s+data-channel-id="(\d+)".*?>.*?
        <input\s+type="hidden"\s+name="file_name"\s+value="(.*?)"\s*>.*?<div
    ''', re.DOTALL | re.VERBOSE)

    smil_api_url = "http://www.dw.com/smil/{}"
    html5_api_url = "http://www.dw.com/html5Resource/{}"
    vod_player_type_re = re.compile(
        r'<input type="hidden" name="player_type" value="(?P<stream_type>.+?)">'
    )
    stream_vod_data_re = re.compile(
        r'<input\s+type="hidden"\s+name="file_name"\s+value="(?P<stream_url>.+?)">.*?'
        r'<input\s+type="hidden"\s+name="media_id"\s+value="(?P<stream_id>\d+)">',
        re.DOTALL)

    smil_schema = validate.Schema(
        validate.union({
            "base":
            validate.all(validate.xml_find(".//meta"),
                         validate.xml_element(attrib={"base": validate.text}),
                         validate.get("base")),
            "streams":
            validate.all(validate.xml_findall(".//switch/*"), [
                validate.all(
                    validate.getattr("attrib"), {
                        "src":
                        validate.text,
                        "system-bitrate":
                        validate.all(
                            validate.text,
                            validate.transform(int),
                        ),
                        validate.optional("width"):
                        validate.all(validate.text, validate.transform(int))
                    })
            ])
        }))

    @classmethod
    def can_handle_url(cls, url):
        return cls.url_re.match(url) is not None

    def _create_stream(self, url, quality=None):
        if url.startswith('rtmp://'):
            return (quality, RTMPStream(self.session, {'rtmp': url}))
        if url.endswith('.m3u8'):
            return HLSStream.parse_variant_playlist(self.session, url).items()

        return (quality, HTTPStream(self.session, url))

    def _get_live_streams(self, page):
        # check if a different language has been selected
        qs = dict(parse_qsl(urlparse(self.url).query))
        channel = qs.get("channel")

        if not channel:
            m = self.channel_re.search(page.text)
            channel = m and m.group(1)

        self.logger.debug("Using sub-channel ID: {0}", channel)

        # extract the streams from the page, mapping between channel-id and stream url
        media_items = self.live_stream_div.finditer(page.text)
        stream_map = dict([mi.groups((1, 2)) for mi in media_items])

        stream_url = stream_map.get(str(channel) or self.default_channel)
        if stream_url:
            return self._create_stream(stream_url)

    def _get_vod_streams(self, stream_type, page):
        m = self.stream_vod_data_re.search(page.text)
        if m is None:
            return
        stream_url, stream_id = m.groups()

        if stream_type == "video":
            stream_api_id = "v-{}".format(stream_id)
            default_quality = "vod"
        elif stream_type == "audio":
            stream_api_id = "a-{}".format(stream_id)
            default_quality = "audio"
        else:
            return

        # Retrieve stream embedded in web page
        yield self._create_stream(stream_url, default_quality)

        # Retrieve streams using API
        res = self.session.http.get(self.smil_api_url.format(stream_api_id))
        videos = self.session.http.xml(res, schema=self.smil_schema)

        for video in videos['streams']:
            url = videos["base"] + video["src"]
            if url == stream_url or url.replace("_dwdownload.",
                                                ".") == stream_url:
                continue

            if video["system-bitrate"] > 0:
                # If width is available, use it to select the best stream
                # amongst those with same bitrate
                quality = "{}k".format(
                    (video["system-bitrate"] + video.get("width", 0)) // 1000)
            else:
                quality = default_quality

            yield self._create_stream(url, quality)

    def _get_streams(self):
        res = self.session.http.get(self.url)
        m = self.vod_player_type_re.search(res.text)
        if m is None:
            return

        stream_type = m.group("stream_type")
        if stream_type == "dwlivestream":
            return self._get_live_streams(res)

        return self._get_vod_streams(stream_type, res)
示例#11
0
            validate.optional("play_url"): validate.url(scheme="http"),
            validate.optional("m3u8_url"): validate.url(
                scheme="http",
                path=validate.endswith(".m3u8")
            ),
        }, None)
    },
    validate.optional("playerUri"): validate.text
})
_smil_schema = validate.Schema(validate.union({
    "http_base": validate.all(
        validate.xml_find("{http://www.w3.org/2001/SMIL20/Language}head/"
                          "{http://www.w3.org/2001/SMIL20/Language}meta"
                          "[@name='httpBase']"),
        validate.xml_element(attrib={
            "content": validate.text
        }),
        validate.get("content")
    ),
    "videos": validate.all(
        validate.xml_findall("{http://www.w3.org/2001/SMIL20/Language}body/"
                             "{http://www.w3.org/2001/SMIL20/Language}switch/"
                             "{http://www.w3.org/2001/SMIL20/Language}video"),
        [
            validate.all(
                validate.xml_element(attrib={
                    "src": validate.text,
                    "system-bitrate": validate.all(
                        validate.text,
                        validate.transform(int)
                    )
示例#12
0
class TestXmlElementSchema:
    upper = validate.transform(str.upper)

    @pytest.fixture(scope="function")
    def element(self):
        childA = Element("childA", {"a": "1"})
        childB = Element("childB", {"b": "2"})
        childC = Element("childC")
        childA.text = "childAtext"
        childA.tail = "childAtail"
        childB.text = "childBtext"
        childB.tail = "childBtail"
        childB.append(childC)

        parent = Element("parent", {
            "attrkey1": "attrval1",
            "attrkey2": "attrval2"
        })
        parent.text = "parenttext"
        parent.tail = "parenttail"
        parent.append(childA)
        parent.append(childB)

        return parent

    @pytest.mark.parametrize(
        "schema, expected",
        [
            (
                validate.xml_element(),
                ("<parent attrkey1=\"attrval1\" attrkey2=\"attrval2\">"
                 "parenttext"
                 "<childA a=\"1\">childAtext</childA>"
                 "childAtail"
                 "<childB b=\"2\">childBtext<childC/></childB>"
                 "childBtail"
                 "</parent>"
                 "parenttail"),
            ),
            (
                validate.xml_element(
                    tag=upper, attrib={upper: upper}, text=upper, tail=upper),
                ("<PARENT ATTRKEY1=\"ATTRVAL1\" ATTRKEY2=\"ATTRVAL2\">"
                 "PARENTTEXT"
                 "<childA a=\"1\">childAtext</childA>"
                 "childAtail"
                 "<childB b=\"2\">childBtext<childC/></childB>"
                 "childBtail"
                 "</PARENT>"
                 "PARENTTAIL"),
            ),
        ],
        ids=[
            "empty",
            "subschemas",
        ],
    )
    def test_success(self, element, schema, expected):
        newelement = validate.validate(schema, element)
        assert etree_tostring(newelement).decode("utf-8") == expected
        assert newelement is not element
        assert newelement[0] is not element[0]
        assert newelement[1] is not element[1]
        assert newelement[1][0] is not element[1][0]

    @pytest.mark.parametrize("schema, error", [
        (
            validate.xml_element(tag="invalid"),
            """
                    ValidationError(XmlElementSchema):
                      Unable to validate XML tag
                      Context(equality):
                        'parent' does not equal 'invalid'
                """,
        ),
        (
            validate.xml_element(attrib={"invalid": "invalid"}),
            """
                    ValidationError(XmlElementSchema):
                      Unable to validate XML attributes
                      Context(dict):
                        Key 'invalid' not found in {'attrkey1': 'attrval1', 'attrkey2': 'attrval2'}
                """,
        ),
        (
            validate.xml_element(text="invalid"),
            """
                    ValidationError(XmlElementSchema):
                      Unable to validate XML text
                      Context(equality):
                        'parenttext' does not equal 'invalid'
                """,
        ),
        (
            validate.xml_element(tail="invalid"),
            """
                    ValidationError(XmlElementSchema):
                      Unable to validate XML tail
                      Context(equality):
                        'parenttail' does not equal 'invalid'
                """,
        ),
    ],
                             ids=[
                                 "tag",
                                 "attrib",
                                 "text",
                                 "tail",
                             ])
    def test_failure(self, element, schema, error):
        with pytest.raises(validate.ValidationError) as cm:
            validate.validate(schema, element)
        assert_validationerror(cm.value, error)

    def test_failure_schema(self):
        with pytest.raises(validate.ValidationError) as cm:
            validate.validate(validate.xml_element(), "not-an-element")
        assert_validationerror(
            cm.value, """
            ValidationError(Callable):
              iselement('not-an-element') is not true
        """)