示例#1
0
 def test_parse_json(self):
     self.assertEqual({}, parse_json("{}"))
     self.assertEqual({"test": 1}, parse_json("""{"test": 1}"""))
     self.assertEqual({"test": 1},
                      parse_json("""{"test": 1}""",
                                 schema=validate.Schema({"test": 1})))
     self.assertRaises(PluginError, parse_json, """{"test: 1}""")
     self.assertRaises(IOError,
                       parse_json,
                       """{"test: 1}""",
                       exception=IOError)
     self.assertRaises(PluginError, parse_json, """{"test: 1}""" * 10)
示例#2
0
    def _get_show_streams(self,
                          stream_data,
                          show,
                          episode,
                          platform="desktop"):
        video_id = parse_json(stream_data.group(1), schema=self.vod_id_schema)
        res = http.get(self.vod_api,
                       params={
                           "platform": platform,
                           "id": video_id
                       })

        # create a unique list of the stream manifest URLs
        streams = []
        urldups = []
        for stream in parse_xml(res.text, schema=self._vod_api_schema):
            if stream["url"] not in urldups:
                streams.append(stream)
                urldups.append(stream["url"])

        mapper = StreamMapper(lambda fmt, strm: strm["url"].endswith(fmt))
        mapper.map(".m3u8", self._make_hls_hds_stream,
                   HLSStream.parse_variant_playlist)
        mapper.map(".f4m",
                   self._make_hls_hds_stream,
                   HDSStream.parse_manifest,
                   is_akamai=True)
        mapper.map(
            ".mp4", lambda s:
            (s["bitrate"] + "k", HTTPStream(self.session, s["url"])))

        for q, s in mapper(streams):
            yield q, s
示例#3
0
    def _get_streams(self):
        """
        Find the streams for web.tv
        :return:
        """
        headers = {}
        res = http.get(self.url, headers=headers)
        headers["Referer"] = self.url

        sources = self._sources_re.findall(res.text)
        if len(sources):
            sdata = parse_json(sources[0], schema=self._sources_schema)
            for source in sdata:
                self.logger.debug("Found stream of type: {}", source[u'type'])
                if source[u'type'] == u"application/vnd.apple.mpegurl":
                    url = update_scheme(self.url, source[u"src"])

                    try:
                        # try to parse the stream as a variant playlist
                        variant = HLSStream.parse_variant_playlist(
                            self.session, url, headers=headers)
                        if variant:
                            for q, s in variant.items():
                                yield q, s
                        else:
                            # and if that fails, try it as a plain HLS stream
                            yield 'live', HLSStream(self.session,
                                                    url,
                                                    headers=headers)
                    except IOError:
                        self.logger.warning(
                            "Could not open the stream, perhaps the channel is offline"
                        )
示例#4
0
    def _get_vod_stream(self):
        vod_url = self.url
        if vod_url.endswith('/'):
            vod_url = vod_url[:-1]

        json_url = '{0}.securevideo.json'.format(vod_url)

        res = http.get(json_url)
        match = _json_re.search(res.text)
        if not match:
            return
        data = parse_json(match.group(1))

        res = http.get(API_VOD.format(data['clientid'], data['mzid']))
        data = http.json(res, schema=_stream_schema)

        for d in data['targetUrls']:
            if d['type'] == 'HDS':
                hds_url = d['url']
                for s in HDSStream.parse_manifest(self.session,
                                                  hds_url).items():
                    yield s

            if d['type'] == 'HLS':
                hls_url = d['url']
                for s in HLSStream.parse_variant_playlist(
                        self.session, hls_url).items():
                    yield s
示例#5
0
 def get_pdata(self, channel):
     """
     Get the params for the post request
     :param channel: channel name
     :return: "gcp" and "ogn"
     """
     res = http.get(self.pdata_url.format(channel=channel))
     return parse_json(res.text, schema=self.pdata_schema)
示例#6
0
 def _get_stream_data(self, **params):
     api_url = self.britecove.format(**params)
     res = http.get(api_url,
                    headers={
                        "Accept":
                        "application/json;pk={policy_key}".format(**params)
                    })
     return parse_json(res.text, schema=self.britecove_schema)
示例#7
0
 def _get_streams(self):
     if "eltrecetv.com.ar/vivo" in self.url.lower():
         try:
             http.headers = {
                 'Referer': self.url,
                 'User-Agent': useragents.ANDROID
             }
             res = http.get(
                 'https://api.iamat.com/metadata/atcodes/eltrece')
             yt_id = parse_json(
                 res.text
             )["atcodes"][0]["context"]["ahora"]["vivo"]["youtubeVideo"]
             yt_url = "https://www.youtube.com/watch?v={0}".format(yt_id)
             return self.session.streams(yt_url)
         except BaseException:
             self.logger.info(
                 "Live content is temporarily unavailable. Please try again later."
             )
     else:
         try:
             http.headers = {
                 'Referer': self.url,
                 'User-Agent': useragents.CHROME
             }
             res = http.get(self.url)
             _player_re = re.compile(r'''data-kaltura="([^"]+)"''')
             match = _player_re.search(res.text)
             if not match:
                 return
             entry_id = parse_json(match.group(1).replace(""",
                                                          '"'))["entryId"]
             hls_url = "https://vodgc.com/p/111/sp/11100/playManifest/entryId/{0}/format/applehttp/protocol/https/a.m3u8".format(
                 entry_id)
             return HLSStream.parse_variant_playlist(self.session, hls_url)
         except BaseException:
             self.logger.error("The requested VOD content is unavailable.")
示例#8
0
    def _get_live_stream(self, stream_data, show, episode=None):
        # parse the stream info as json
        stream_info = parse_json(stream_data.group(1), schema=self.live_schema)
        # get the stream ID
        stream_id = None
        show_info = stream_info[u"streams"][show]

        if episode:
            self.logger.debug("Loading replay of episode: {0}/{1}", show,
                              episode)
            for epi in show_info[u"archiveEpisodes"]:
                if epi[u"slug"] == episode:
                    stream_id = epi[u"id"]
        elif show_info.get("isLive") or not len(show_info[u"archiveEpisodes"]):
            self.logger.debug("Loading LIVE streams for: {0}", show)
            stream_id = show_info[u"stream"]
        else:  # off-air
            if len(show_info[u"archiveEpisodes"]):
                epi = show_info[u"archiveEpisodes"][0]
                self.logger.debug("Loading replay of episode: {0}/{1}", show,
                                  epi[u"slug"])
                stream_id = epi[u"id"]
            else:
                self.logger.error("This stream is currently offline")
                return

        if stream_id:
            api_url = self.API_URL.format(id=stream_id)

            res = http.get(api_url,
                           headers={"User-Agent": useragents.SAFARI_8})
            stream_data = http.json(res, schema=self._api_schema)

            mapper = StreamMapper(lambda fmt, surl: surl.endswith(fmt))
            mapper.map(".m3u8", HLSStream.parse_variant_playlist, self.session)
            mapper.map(".f4m", HDSStream.parse_manifest, self.session)

            stream_urls = [
                asset[u"url"]
                for asset in stream_data[u'data'][u'stream'][u'assets']
            ]
            for q, s in mapper(stream_urls):
                yield q, s

        else:
            self.logger.error(
                "Couldn't find the stream ID for this stream: {0}".format(
                    show))
示例#9
0
    def find_vpid(self, url, res=None):
        """
        Find the Video Packet ID in the HTML for the provided URL

        :param url: URL to download, if res is not provided.
        :param res: Provide a cached version of the HTTP response to search
        :type url: string
        :type res: requests.Response
        :return: Video Packet ID for a Programme in iPlayer
        :rtype: string
        """
        self.logger.debug("Looking for vpid on {0}", url)
        # Use pre-fetched page if available
        res = res or http.get(url)
        m = self.mediator_re.search(res.text)
        vpid = m and parse_json(m.group(1), schema=self.mediator_schema)
        return vpid
示例#10
0
    def _get_live_stream(self, channel):
        channel = 'vualto_{0}'.format(channel)
        _live_json_re = re.compile(
            r'''"{0}":\s(\173[^\173\175]+\175)'''.format(channel))

        res = http.get(API_LIVE)
        match = _live_json_re.search(res.text)
        if not match:
            return
        data = parse_json(match.group(1))

        hls_url = data['hls']

        if hls_url:
            for s in HLSStream.parse_variant_playlist(self.session,
                                                      hls_url).items():
                yield s
示例#11
0
    def _extract_nonce(cls, http_result):
        """
        Given an HTTP response from the sessino endpoint, extract the nonce, so we can "sign" requests with it.
        We don't really sign the requests in the traditional sense of a nonce, we just incude them in the auth requests.

        :param http_result: HTTP response from the bbc session endpoint.
        :type http_result: requests.Response
        :return: nonce to "sign" url requests with
        :rtype: string
        """

        # Extract the redirect URL from the last call
        last_redirect_url = urlparse(http_result.history[-1].request.url)
        last_redirect_query = dict(parse_qsl(last_redirect_url.query))
        # Extract the nonce from the query string in the redirect URL
        final_url = urlparse(last_redirect_query['goto'])
        goto_url = dict(parse_qsl(final_url.query))
        goto_url_query = parse_json(goto_url['state'])

        # Return the nonce we can use for future queries
        return goto_url_query['nonce']
示例#12
0
    def get_stream_url(self, data):
        """
        Get the hls_url from the post request
        :param data: dict with "gcp" and "ogn"
        :return: hls_url
        """
        try:
            res = http.post(self.gate_url, headers=self.headers, data=data)
        except Exception as e:
            if "403" in str(e):
                self.logger.error(
                    "This Video is Not Available in Your Country.")
            raise NoStreamsError(self.url)

        r_data = parse_json(res.text)
        hls_url = r_data.get("stream")
        suffix = r_data.get("suffix")

        if hls_url is None and suffix:
            hls_url = self.create_hls_url(suffix)
        return hls_url
示例#13
0
 def _get_streams(self):
     res = http.get(self.url)
     data = self.data_re.search(res.text)
     next_date = self.next_date_re.search(res.text)
     if data:
         try:
             ovva_url = parse_json(b64decode(data.group(1)).decode("utf8"),
                                   schema=self.ovva_data_schema)
             stream_url = http.get(ovva_url,
                                   schema=self.ovva_redirect_schema)
         except PluginError as e:
             self.logger.error("Could not find stream URL: {0}", e)
         else:
             return HLSStream.parse_variant_playlist(
                 self.session, stream_url)
     elif next_date:
         self.logger.info("The broadcast will be available at {0}".format(
             datetime.fromtimestamp(int(
                 next_date.group(1))).strftime('%Y-%m-%d %H:%M:%S')))
     else:
         self.logger.error("Could not find player data.")
示例#14
0
    def _get_streams(self):
        res = http.get(self.url, headers={'User-Agent': useragents.CHROME})
        video_search = res.text
        video_search = video_search[
            video_search.index('{"top":{"view":"PlayerContainer","model":{'):]
        video_search = video_search[:video_search.index('}]}}') + 4] + "}"

        video_url_found_hls = ""
        video_url_found_http = ""

        json_video_search = parse_json(video_search)
        json_video_search_sources = json_video_search["top"]["model"][
            "videos"][0]["sources"]
        self.logger.debug('Video ID found: {0}',
                          json_video_search["top"]["model"]["id"])
        for current_video_source in json_video_search_sources:
            if "HLS" in current_video_source["type"]:
                video_url_found_hls = "http://telefe.com" + current_video_source[
                    "url"]
                self.logger.debug("HLS content available")
            if "HTTP" in current_video_source["type"]:
                video_url_found_http = "http://telefe.com" + current_video_source[
                    "url"]
                self.logger.debug("HTTP content available")

        http.headers = {
            'Referer': self.url,
            'User-Agent': useragents.CHROME,
            'X-Requested-With': 'ShockwaveFlash/25.0.0.148'
        }

        if video_url_found_hls:
            hls_streams = HLSStream.parse_variant_playlist(
                self.session, video_url_found_hls)
            for s in hls_streams.items():
                yield s

        if video_url_found_http:
            yield "http", HTTPStream(self.session, video_url_found_http)
示例#15
0
    def _get_prid(self, subtitles=False):
        res = http.get(self.url)
        bprid = None

        # Locate the asset id for the content on the page
        for alt, _, prid in self.prid_re.findall(res.text):
            if alt and subtitles:
                bprid = prid
            elif bprid is None:
                bprid = prid

        if bprid is None:
            m = self.react_re.search(res.text)
            if m:
                data = parse_json(m.group("data").replace(""", '"'))
                bprid = data.get("mid")

        if bprid is None:
            m = self.media_id_re.search(res.text)
            if m:
                bprid = m.group('media_id')

        return bprid
示例#16
0
    def _get_streams(self):
        http.headers = {"User-Agent": useragents.ANDROID}
        res = http.get(self.url)
        iframe_url = self.find_iframe(res)

        if iframe_url:
            self.logger.debug("Found iframe: {0}", iframe_url)
            res = http.get(iframe_url, headers={"Referer": self.url})
            data = self.data_re.search(res.text)
            if data:
                try:
                    ovva_url = parse_json(b64decode(
                        data.group(1)).decode("utf8"),
                                          schema=self.ovva_data_schema)
                    stream_url = http.get(ovva_url,
                                          schema=self.ovva_redirect_schema)
                except PluginError as e:
                    self.logger.error("Could not find stream URL: {0}", e)
                else:
                    return HLSStream.parse_variant_playlist(
                        self.session, stream_url)
            else:
                self.logger.error("Could not find player data.")
示例#17
0
def js_to_json(data):
    js_re = re.compile(r'(?!<")(\w+):(?!/)')
    trimmed = [y.replace("\r", "").strip() for y in data.split(",")]
    jsons = ','.join([js_re.sub(r'"\1":', x, count=1) for x in trimmed])
    return parse_json(jsons)
示例#18
0
 def get_hls_url(self, media_id):
     res = http.get(self.api_url, params=dict(media_id=media_id))
     try:
         return parse_json(res.text, schema=self.api_schema)
     except PluginError:
         return