def auth_url(self, url): parsed = urlparse(url) path, _ = parsed.path.rsplit("/", 1) token_res = http.get(self.token_url, params=dict(acl=path + "/*")) authparams = http.json(token_res, schema=self.token_schema) existing = dict(parse_qsl(parsed.query)) existing.update(dict(parse_qsl(authparams))) return urlunparse(parsed._replace(query=urlencode(existing)))
def _get_streams(self): http.headers.update({ "User-Agent": useragents.CHROME, "Referer": self.referer }) fragment = dict(parse_qsl(urlparse(self.url).fragment)) link = fragment.get("link") if not link: link = self._get_tv_link() if not link: self.logger.error("Missing link fragment: stream unavailable") return player_url = self._api_url.format(link) self.logger.debug("Requesting player API: {0} (referer={1})", player_url, self.referer) res = http.get(player_url, params={"_": int(time.time() * 1000)}, headers={"X-Requested-With": "XMLHttpRequest"}) try: data = http.json(res, schema=self.api_schema) except PluginError as e: print(e) self.logger.error("Cannot play this stream type") else: if data["status"]: if data["file"].startswith("<"): self.logger.error("Cannot play embedded streams") else: return HLSStream.parse_variant_playlist( self.session, data["file"]) else: self.logger.error(data["text"])
def filter_urlquery(url, keys=[], keys_status=False): """Removes unwanted urlquerys :param url: an URL :param keys: list of query names :param keys_status: False = removes querys that are in keys True = allow only querys that are in keys :return: URL with filtered query """ parts = urlparse(url) query_dict = dict(parse_qsl(parts.query)) new_query_dict = {} for key in keys: try: if keys_status is True: new_query_dict[key] = query_dict[key] else: del query_dict[key] except KeyError: continue new_parts = list(parts) if keys_status is True: new_parts[4] = unquote(urlencode(new_query_dict)) else: new_parts[4] = unquote(urlencode(query_dict)) url = urlunparse(new_parts) return url
def from_url(cls, session, url): purl = urlparse(url) querys = dict(parse_qsl(purl.query)) account_id, player_id, _ = purl.path.lstrip("/").split("/", 3) video_id = querys.get("videoId") bp = cls(session, account_id=account_id, player_id=player_id) return bp.get_streams(video_id)
def get_stream_url(self, event_id): url_m = self.url_re.match(self.url) site = url_m.group(1) or url_m.group(2) api_url = self.api_url.format(id=event_id, site=site.upper()) self.logger.debug("Calling API: {0}", api_url) stream_url = http.get(api_url).text.strip("\"'") parsed = urlparse(stream_url) query = dict(parse_qsl(parsed.query)) return urlunparse(parsed._replace(query="")), query
def parse_qsd(data, name="query string", exception=PluginError, schema=None, **params): """Parses a query string into a dict. Unlike parse_qs and parse_qsl, duplicate keys are not preserved in favor of a simpler return value. """ value = dict(parse_qsl(data, **params)) if schema: value = schema.validate(value, name=name, exception=exception) return value
def _extract_nonce(cls, http_result): """ Given an HTTP response from the sessino endpoint, extract the nonce, so we can "sign" requests with it. We don't really sign the requests in the traditional sense of a nonce, we just incude them in the auth requests. :param http_result: HTTP response from the bbc session endpoint. :type http_result: requests.Response :return: nonce to "sign" url requests with :rtype: string """ # Extract the redirect URL from the last call last_redirect_url = urlparse(http_result.history[-1].request.url) last_redirect_query = dict(parse_qsl(last_redirect_url.query)) # Extract the nonce from the query string in the redirect URL final_url = urlparse(last_redirect_query['goto']) goto_url = dict(parse_qsl(final_url.query)) goto_url_query = parse_json(goto_url['state']) # Return the nonce we can use for future queries return goto_url_query['nonce']
def _get_streams(self): params = dict(parse_qsl(urlparse(self.url).query)) vod_id = params.get("vod") match = _url_re.match(self.url) channel = match.group("channel") if vod_id: self.logger.debug("Looking for VOD {0} from channel: {1}", vod_id, channel) return self._get_vod_stream(vod_id) else: self.logger.debug("Looking for channel: {0}", channel) return self._get_live_stream(channel)
def _get_streams(self): docid = self.url_re.match(self.url).group(1) self.logger.debug("Google Docs ID: {0}", docid) res = http.get(self.api_url, params=dict(docid=docid)) data = dict(parse_qsl(res.text)) if data["status"] == "ok": fmts = dict( [s.split('/')[:2] for s in data["fmt_list"].split(",")]) streams = [s.split('|') for s in data["fmt_stream_map"].split(",")] for qcode, url in streams: _, h = fmts[qcode].split("x") yield "{0}p".format(h), HTTPStream(self.session, url) else: self.logger.error("{0} (ID: {1})", data["reason"], docid)
def _britecove_params(self, url): res = http.get(url, headers={ "User-Agent": useragents.FIREFOX, "Referer": self.url }) acc = self.account_id_re.search(res.text) pk = self.policy_key_re.search(res.text) query = dict(parse_qsl(urlparse(url).query)) return { "video_id": query.get("videoId"), "account_id": acc and acc.group(1), "policy_key": pk and pk.group(1), }
def _get_stream_info(self, url): match = _url_re.match(url) user = match.group("user") live_channel = match.group("liveChannel") if user: video_id = self._find_channel_video() elif live_channel: return self._find_canonical_stream_info() else: video_id = match.group("video_id") or match.group("video_id_2") if video_id == "live_stream": query_info = dict(parse_qsl(urlparse(url).query)) if "channel" in query_info: video_id = self._get_channel_video(query_info["channel"]) if not video_id: return # normal _params_1 = {"el": "detailpage"} # age restricted _params_2 = {"el": "embedded"} # embedded restricted _params_3 = { "eurl": "https://youtube.googleapis.com/v/{0}".format(video_id) } count = 0 for _params in (_params_1, _params_2, _params_3): count += 1 params = {"video_id": video_id} params.update(_params) res = http.get(API_VIDEO_INFO, params=params, headers=HLS_HEADERS) info_parsed = parse_query(res.text, name="config", schema=_config_schema) if info_parsed.get("status") == "fail": self.logger.debug("get_video_info - {0}: {1}".format( count, info_parsed.get("reason"))) continue self.stream_title = info_parsed.get("title") self.logger.debug("get_video_info - {0}: Found data".format(count)) break return info_parsed
def _create_adaptive_streams(self, info, streams, protected): adaptive_streams = {} best_audio_itag = None # Extract audio streams from the DASH format list for stream_info in info.get("adaptive_fmts", []): if stream_info.get("s"): protected = True continue stream_params = dict(parse_qsl(stream_info["url"])) if "itag" not in stream_params: continue itag = int(stream_params["itag"]) # extract any high quality streams only available in adaptive formats adaptive_streams[itag] = stream_info["url"] stream_type, stream_format = stream_info["type"] if stream_type == "audio": stream = HTTPStream(self.session, stream_info["url"]) name = "audio_{0}".format(stream_format) streams[name] = stream # find the best quality audio stream m4a, opus or vorbis if best_audio_itag is None or self.adp_audio[ itag] > self.adp_audio[best_audio_itag]: best_audio_itag = itag if best_audio_itag and adaptive_streams and MuxedStream.is_usable( self.session): aurl = adaptive_streams[best_audio_itag] for itag, name in self.adp_video.items(): if itag in adaptive_streams: vurl = adaptive_streams[itag] self.logger.debug( "MuxedStream: v {video} a {audio} = {name}".format( audio=best_audio_itag, name=name, video=itag, )) streams[name] = MuxedStream(self.session, HTTPStream(self.session, vurl), HTTPStream(self.session, aurl)) return streams, protected
def get_video_id(self): parsed = urlparse(self.url) qinfo = dict(parse_qsl(parsed.query or parsed.fragment.lstrip("?"))) site, video_id = None, None url_m = self.url_re.match(self.url) # look for the video id in the URL, otherwise find it in the page if "tvLiveId" in qinfo: video_id = qinfo["tvLiveId"] site = url_m.group(1) elif url_m.group(2): site, video_id = url_m.group(1), url_m.group(2) else: video_id_m = http.get(self.url, schema=self.video_id_schema) if video_id_m: site, video_id = video_id_m.groups() return site, video_id
def _get_streams(self): args = dict(parse_qsl(urlparse(self.url).query)) if "k" in args: self.logger.debug("Loading channel: {k}", **args) res = http.get(self.url) stream_data_m = self.stream_data_re.search(res.text) if stream_data_m: script_vars = b64decode(stream_data_m.group(1)).decode("utf8") url_m = self.m3u8_re.search(script_vars) hls_url = url_m and url_m.group("url") if hls_url: for s in HLSStream.parse_variant_playlist( self.session, hls_url).items(): yield s f4m_m = self.f4mm_re.search(script_vars) f4m_url = f4m_m and f4m_m.group("url") if f4m_url: for n, s in HDSStream.parse_manifest( self.session, f4m_url).items(): yield n, s
def _get_streams(self): url_params = dict(parse_qsl(urlparse(self.url).query)) video_id = url_params.get("videoid") if video_id: vali = '{0}l{1}m{2}'.format(self._random_t(4), self._random_t(4), self._random_t(5)) data = { 'userid': 1, 'videoid': video_id, 'area': '', 'h5': 1, 'vali': vali } self.logger.debug("Found Video ID: {0}".format(video_id)) res = http.post(self.api_url, data=data) data = http.json(res, schema=self.api_schema) hls = self._make_stream(data["video_info"]["hlsvideosource"]) video = self._make_stream(data["video_info"]["videosource"]) if hls: yield "live", hls if video: yield "live", video
def get_event_id(cls, url): return dict(parse_qsl(urlparse(url).query.lower())).get("eventid")
def test_filter_urlquery(self): test_data = [ { "url": "http://example.com/z/manifest.f4m?hdnea=st=123~exp=123~acl=/*~hmac=123&n=20&b=496,896,1296,1896", "keys": ["hdnea"], "keys_status": False, "result": "http://example.com/z/manifest.f4m?n=20&b=496,896,1296,1896" }, { "url": "http://example.com/i/master.m3u8?hdnea=st=123~exp=123~acl=/*~hmac=123&n=10&__b__=240&b=240,120,64,496,896,1296,1896", "keys": ["hdnea", "invalid"], "keys_status": False, "result": "http://example.com/i/master.m3u8?__b__=240&b=240,120,64,496,896,1296,1896&n=10" }, { "url": "http://example.com/i/master.m3u8?hdnea=st=123~exp=123~acl=/*~hmac=123&n=10&__b__=240&b=240,120,64,496,896,1296,1896", "keys": ["invalid"], "keys_status": False, "result": "http://example.com/i/master.m3u8?hdnea=st=123~exp=123~acl=/*~hmac=123&n=10&__b__=240&b=240,120,64,496,896,1296,1896" }, { "url": "http://example.com/z/manifest.f4m?hdnea=st=123~exp=123~acl=/*~hmac=123&n=20&b=496,896,1296,1896", "keys": ["n", "b"], "keys_status": False, "result": "http://example.com/z/manifest.f4m?hdnea=st=123~exp=123~acl=/*~hmac=123" }, { "url": "http://example.com/z/manifest.f4m?hdnea=st=123~exp=123~acl=/*~hmac=123&n=20&b=496,896,1296,1896", "keys": ["hdnea"], "keys_status": True, "result": "http://example.com/z/manifest.f4m?hdnea=st=123~exp=123~acl=/*~hmac=123" }, { "url": "http://example.com/i/master.m3u8?hdnea=st=123~exp=123~acl=/*~hmac=123&n=10&__b__=240&b=240,120,64,496,896,1296,1896", "keys": ["hdnea", "invalid"], "keys_status": True, "result": "http://example.com/i/master.m3u8?hdnea=st=123~exp=123~acl=/*~hmac=123" }, { "url": "http://example.com/i/master.m3u8?hdnea=st=123~exp=123~acl=/*~hmac=123&n=10&__b__=240&b=240,120,64,496,896,1296,1896", "keys": ["invalid"], "keys_status": True, "result": "http://example.com/i/master.m3u8" }, { "url": "http://example.com/z/manifest.f4m?hdnea=st=123~exp=123~acl=/*~hmac=123&n=20&b=496,896,1296,1896", "keys": ["hdnea"], "keys_status": False, "new_dict": { "FOO": "BAR" }, "result": "http://example.com/z/manifest.f4m?n=20&b=496,896,1296,1896&FOO=BAR" }, { "url": "http://example.com/i/master.m3u8?hdnea=st=123~exp=123~acl=/*~hmac=123&n=10&__b__=240&b=240,120,64,496,896,1296,1896", "keys": ["invalid"], "keys_status": True, "new_dict": { "FOO": "BAR" }, "result": "http://example.com/i/master.m3u8?FOO=BAR" }, { "url": "http://example.com/i/master.m3u8?hdnea=st=123~exp=123~acl=/*~hmac=123&n=10&__b__=240&b=240,120,64,496,896,1296,1896", "keys": ["invalid"], "keys_status": True, "new_dict": { "FOO": "BAR", "FOO2": "BAR2" }, "result": "http://example.com/i/master.m3u8?FOO=BAR&FOO2=BAR2" }, ] for test_dict in test_data: # new_url == result_url can't be tested because of different sorting sometimes # we will only test the parameters as a dict self.assertDictEqual( dict(parse_qsl(urlparse(test_dict["result"]).query)), dict( parse_qsl( urlparse( filter_urlquery(test_dict["url"], test_dict["keys"], test_dict["keys_status"], test_dict.get("new_dict", {}))).query)))
def filter_urlquery(url, keys=[], keys_status=False, new_dict={}): """manipulate parameters from an url Examples: All Examples uses this url. url = "http://example.com/z/manifest.f4m?FOO=BAR&n=20&b=1896" 1. allows only specified parameter and remove all other filter_urlquery(url, ["FOO"], True) http://example.com/z/manifest.f4m?FOO=BAR 2. same as 1. and add's a custom parameter filter_urlquery(url, ["FOO"], True, {'2FOO2': '2BAR2'}) http://example.com/z/manifest.f4m?FOO=BAR&2FOO2=2BAR2 3. remove only specified parameter filter_urlquery(url, ["FOO"], False) http://example.com/z/manifest.f4m?n=20&b=1896 4. remove all parameter filter_urlquery(url, keys_status=True) http://example.com/z/manifest.f4m 5. add new parameter filter_urlquery(url, new_dict={'QFOO': 'QBAR', 'AFOO': 'ABAR'}) http://example.com/z/manifest.f4m?FOO=BAR&n=20&b=1896&QFOO=QBAR&AFOO=ABAR :param url: an URL :param keys: list of query names :param keys_status: False = removes querys that are in keys True = allow only querys that are in keys :param new_dict: dict of new custom urlquerys :return: URL with filtered query """ parts = urlparse(url) query_dict = dict(parse_qsl(parts.query)) new_query_dict = {} for key in keys: try: if keys_status is True: new_query_dict[key] = query_dict[key] else: del query_dict[key] except KeyError: continue new_parts = list(parts) if keys_status is True: query_dict = new_query_dict query_dict.update(new_dict) new_parts[4] = unquote(urlencode(query_dict)) url = urlunparse(new_parts) return url
def test_filter_urlquery(self): test_data = [ { "url": "http://example.com/z/manifest.f4m?hdnea=st=123~exp=123~acl=/*~hmac=123&n=20&b=496,896,1296,1896", "keys": ["hdnea"], "keys_status": False, "result": "http://example.com/z/manifest.f4m?n=20&b=496,896,1296,1896" }, { "url": "http://example.com/i/master.m3u8?hdnea=st=123~exp=123~acl=/*~hmac=123&n=10&__b__=240&b=240,120,64,496,896,1296,1896", "keys": ["hdnea", "invalid"], "keys_status": False, "result": "http://example.com/i/master.m3u8?__b__=240&b=240,120,64,496,896,1296,1896&n=10" }, { "url": "http://example.com/i/master.m3u8?hdnea=st=123~exp=123~acl=/*~hmac=123&n=10&__b__=240&b=240,120,64,496,896,1296,1896", "keys": ["invalid"], "keys_status": False, "result": "http://example.com/i/master.m3u8?hdnea=st=123~exp=123~acl=/*~hmac=123&n=10&__b__=240&b=240,120,64,496,896,1296,1896" }, { "url": "http://example.com/z/manifest.f4m?hdnea=st=123~exp=123~acl=/*~hmac=123&n=20&b=496,896,1296,1896", "keys": ["n", "b"], "keys_status": False, "result": "http://example.com/z/manifest.f4m?hdnea=st=123~exp=123~acl=/*~hmac=123" }, { "url": "http://example.com/z/manifest.f4m?hdnea=st=123~exp=123~acl=/*~hmac=123&n=20&b=496,896,1296,1896", "keys": ["hdnea"], "keys_status": True, "result": "http://example.com/z/manifest.f4m?hdnea=st=123~exp=123~acl=/*~hmac=123" }, { "url": "http://example.com/i/master.m3u8?hdnea=st=123~exp=123~acl=/*~hmac=123&n=10&__b__=240&b=240,120,64,496,896,1296,1896", "keys": ["hdnea", "invalid"], "keys_status": True, "result": "http://example.com/i/master.m3u8?hdnea=st=123~exp=123~acl=/*~hmac=123" }, { "url": "http://example.com/i/master.m3u8?hdnea=st=123~exp=123~acl=/*~hmac=123&n=10&__b__=240&b=240,120,64,496,896,1296,1896", "keys": ["invalid"], "keys_status": True, "result": "http://example.com/i/master.m3u8" }, ] for test_dict in test_data: self.assertDictEqual( dict(parse_qsl(urlparse(test_dict["result"]).query)), dict( parse_qsl( urlparse( filter_urlquery(test_dict["url"], test_dict["keys"], test_dict["keys_status"])).query)))
def _get_streams(self): info = self._get_stream_info(self.url) if not info: return formats = info.get("fmt_list") streams = {} protected = False for stream_info in info.get("url_encoded_fmt_stream_map", []): if stream_info.get("s"): protected = True continue stream = HTTPStream(self.session, stream_info["url"]) name = formats.get(stream_info["itag"]) or stream_info["quality"] if stream_info.get("stereo3d"): name += "_3d" streams[name] = stream adaptive_streams = {} best_audio_itag = None # Extract audio streams from the DASH format list for stream_info in info.get("adaptive_fmts", []): if stream_info.get("s"): protected = True continue stream_params = dict(parse_qsl(stream_info["url"])) if "itag" not in stream_params: continue itag = int(stream_params["itag"]) # extract any high quality streams only available in adaptive formats adaptive_streams[itag] = stream_info["url"] stream_type, stream_format = stream_info["type"] if stream_type == "audio": stream = HTTPStream(self.session, stream_info["url"]) name = "audio_{0}".format(stream_format) streams[name] = stream # find the best quality audio stream m4a, opus or vorbis if best_audio_itag is None or self.adp_audio[ itag] > self.adp_audio[best_audio_itag]: best_audio_itag = itag if best_audio_itag and adaptive_streams and MuxedStream.is_usable( self.session): aurl = adaptive_streams[best_audio_itag] for itag, name in self.adp_video.items(): if itag in adaptive_streams: vurl = adaptive_streams[itag] streams[name] = MuxedStream(self.session, HTTPStream(self.session, vurl), HTTPStream(self.session, aurl)) hls_playlist = info.get("hlsvp") if hls_playlist: parsed = urlparse(self.url) params = parse_query(parsed.query) time_offset = params.get("t") if time_offset: self.session.set_option("hls-start-offset", time_to_offset(params.get("t"))) try: hls_streams = HLSStream.parse_variant_playlist( self.session, hls_playlist, headers=HLS_HEADERS, namekey="pixels") streams.update(hls_streams) except IOError as err: self.logger.warning("Failed to extract HLS streams: {0}", err) if not streams and protected: raise PluginError("This plugin does not support protected videos, " "try youtube-dl instead") return streams
def _play_stream(HTTPBase, redirect=False): """Creates a livecli session and plays the stream.""" session = Livecli() session.set_logprefix("[ID-{0}]".format(str(int(time()))[4:])) logger = session.logger.new_module("livecli-server") session.set_loglevel("info") logger.info("User-Agent: {0}".format( HTTPBase.headers.get("User-Agent", "???"))) logger.info("Client: {0}".format(HTTPBase.client_address)) logger.info("Address: {0}".format(HTTPBase.address_string())) # Load custom user plugins if os.path.isdir(PLUGINS_DIR): session.load_plugins(PLUGINS_DIR) old_data = parse_qsl(urlparse(HTTPBase.path).query) data = [] for k, v in old_data: data += [(unquote_plus(k), unquote_plus(v))] data_other, session = command_session(session, data) url = data_other.get("url") if not url: HTTPBase._headers(404, "text/html") logger.error("No URL provided.") return quality = (data_other.get("q") or data_other.get("quality") or data_other.get("stream") or data_other.get("default-stream") or "best") try: cache = data_other.get("cache") or 4096 except TypeError: cache = 4096 loglevel = data_other.get("l") or data_other.get("loglevel") or "debug" session.set_loglevel(loglevel) try: if redirect is True: streams = session.streams(url, stream_types=["hls", "http"]) else: streams = session.streams(url) except Exception as e: HTTPBase._headers(404, "text/html") logger.error("No Stream Found!") return if not streams: HTTPBase._headers(404, "text/html") return # XXX: only one quality will work currently try: stream = streams[quality] except KeyError: stream = streams["best"] quality = "best" if isinstance(stream, HTTPStream) is False and isinstance( stream, HDSStream) is False: # allow only http based streams: HDS HLS HTTP # RTMP is not supported HTTPBase._headers(404, "text/html") return if redirect is True: logger.info("301 - URL: {0}".format(stream.url)) HTTPBase.send_response(301) HTTPBase.send_header("Location", stream.url) HTTPBase.end_headers() logger.info("301 - done") return hls_session_reload = data_other.get("hls-session-reload") if hls_session_reload: livecli_cache = Cache(filename="streamdata.json", key_prefix="cache:{0}".format(stream.url)) livecli_cache.set("cache_stream_name", quality, (int(hls_session_reload) + 60)) livecli_cache.set("cache_url", url, (int(hls_session_reload) + 60)) session.set_option("hls-session-reload", int(hls_session_reload)) try: fd = stream.open() except StreamError as err: HTTPBase._headers(404, "text/html") logger.error("Could not open stream: {0}".format(err)) return HTTPBase._headers(200, "video/unknown") try: logger.debug("Pre-buffering {0} bytes".format(cache)) while True: buff = fd.read(cache) if not buff: logger.error("No Data!") break HTTPBase.wfile.write(buff) HTTPBase.wfile.close() except socket.error as e: if isinstance(e.args, tuple): if e.errno == errno.EPIPE: # remote peer disconnected logger.info("Detected remote disconnect") pass else: logger.error(str(e)) else: logger.error(str(e)) fd.close() logger.info("Stream ended") fd = None
def _get_streams(self): params = dict(parse_qsl(urlparse(self.url).query)) live_id = params.get("lid") if live_id: return self._get_live_stream(live_id)
def can_handle_url(cls, url): if cls.url_re.match(url) is not None: args = dict(parse_qsl(urlparse(url).query)) return args.get("y") == "tv"