def _get_streams(self): """ Get the config object from the page source and call the API to get the list of streams :return: """ # attempt a login self.login() res = http.get(self.url) # decode the config for the page matches = self.config_re.finditer(res.text) try: config = self.config_schema.validate( dict([m.group("key", "value") for m in matches])) except PluginError: return if config["selectedVideoHID"]: self.logger.debug("Found video hash ID: {0}", config["selectedVideoHID"]) api_url = urljoin( self.url, urljoin(config["videosURL"], config["selectedVideoHID"])) elif config["livestreamURL"]: self.logger.debug("Found live stream URL: {0}", config["livestreamURL"]) api_url = urljoin(self.url, config["livestreamURL"]) else: return ares = http.get(api_url) data = http.json(ares, schema=self.api_schema) viewing_urls = data["viewing_urls"] if "error" in viewing_urls: self.logger.error("Failed to load streams: {0}", viewing_urls["error"]) else: for url in viewing_urls["urls"]: try: label = "{0}p".format(url.get("res", url["label"])) except KeyError: label = "live" if url["type"] == "rtmp/mp4" and RTMPStream.is_usable( self.session): params = { "rtmp": url["src"], "pageUrl": self.url, "live": True, } yield label, RTMPStream(self.session, params) elif url["type"] == "application/x-mpegURL": for s in HLSStream.parse_variant_playlist( self.session, url["src"]).items(): yield s
def _get_hls_url(self, content_id): # make the api url relative to the current domain if "cnnturk" in self.url or "teve2.com.tr" in self.url: self.logger.debug("Using new content API url") api_url = urljoin(self.url, self.new_content_api.format(id=content_id)) else: api_url = urljoin(self.url, self.content_api.format(id=content_id)) apires = http.get(api_url) stream_data = http.json(apires, schema=self.content_api_schema) d = stream_data["Media"]["Link"] return urljoin((d["ServiceUrl"] or d["DefaultServiceUrl"]), d["SecurePath"])
def _get_streams(self): data_url = http.get(self.url, schema=self._player_url_schema) if data_url: res = http.get(urljoin(self.url, data_url)) stream_info = http.xml(res, schema=self._livestream_schema) for stream in stream_info: url = stream["url"] try: if ".m3u8" in url: for s in HLSStream.parse_variant_playlist( self.session, url, name_key="bitrate").items(): yield s elif ".f4m" in url: for s in HDSStream.parse_manifest( self.session, url, pvswf=self.swf_url, is_akamai=True).items(): yield s elif ".mp4" in url: yield "{0}k".format(stream["bitrate"]), HTTPStream( self.session, url) except IOError as err: self.logger.warning("Error parsing stream: {0}", err)
def uri(self, uri): if uri and urlparse(uri).scheme: return uri elif self.base_uri and uri: return urljoin(self.base_uri, uri) else: return uri
def _parse_smil(self, url, swf_url): res = http.get(url) smil = http.xml(res, "SMIL config", schema=_smil_schema) for src, bitrate in smil["videos"]: url = urljoin(smil["http_base"], src) yield bitrate, AkamaiHDStream(self.session, url, swf=swf_url)
def _get_vod_stream(self, vod_id): res = self._get_api_res("recordings", vod_id) for sdata in http.json(res, schema=self._vod_schema): if sdata["format"] == "hls": hls_url = urljoin(sdata["url"], "manifest.m3u8") yield "{0}p".format(sdata["height"]), HLSStream( self.session, hls_url)
def repair_url(self, url, base_url, stream_base): """repair a broken url""" # remove \ new_url = url.replace("\\", "") # repairs broken scheme if new_url.startswith("http://"): new_url = "http:" + new_url[9:] elif new_url.startswith("https://"): new_url = "https:" + new_url[10:] # creates a valid url from path only urls and adds missing scheme for // urls if stream_base and new_url[1] is not "/": if new_url[0] is "/": new_url = new_url[1:] new_url = urljoin(stream_base, new_url) else: new_url = urljoin(base_url, new_url) return new_url
def create_hls_url(self, suffix): """ creates a valid hls_url :param suffix: url session params :return: hls_url """ m = self._suffix_re.search(suffix) if not m: return self.logger.debug("create hls_url from suffix") channel = self._url_re.match(self.url).group("channel") domain = self._channel_domains[channel] path = m.group("path") path_suffix = "master.m3u8?{suffix}".format(suffix=suffix) hls_url = urljoin(domain, path) hls_url = urljoin(hls_url, path_suffix) return hls_url
def _get_streams(self): res = http.get(self.url, headers={"User-Agent": useragents.CHROME}) m = self.js_re.search(res.text) if m: self.logger.debug("Found js key: {0}", m.group(1)) js_url = m.group(0) res = http.get(urljoin(self.url, js_url)) for url in self.player_re.findall(res.text): if "adblock" not in url: yield "live", HLSStream(self.session, url)
def find_videopage(self): self.logger.debug("Not a videopage") res = http.get(self.url) m = self._videopage_re.search(res.text) if not m: self.logger.debug( "No stream path, stream might be offline or invalid url.") raise NoStreamsError(self.url) path = m.group("path") self.logger.debug("Found new path: {0}".format(path)) return urljoin(self.url, path)
def _get_streams(self): res = http.get(self.url) # some pages have embedded players iframe_m = self.iframe_re.search(res.text) if iframe_m: url = urljoin(self.url, iframe_m.group("url")) res = http.get(url) video = self.src_re.search(res.text) stream_src = video and video.group("url") if stream_src and stream_src.endswith("m3u8"): return HLSStream.parse_variant_playlist(self.session, stream_src)
def _get_streams(self): http.headers.update({"User-Agent": useragents.CHROME}) res = http.get(self.url) self.logger.debug("search for js_re") m = self.js_re.search(res.text) if m: self.logger.debug("Found js key: {0}", m.group(1)) js_url = m.group(0) http.headers.update({"Referer": self.url}) res = http.get(urljoin(self.url, js_url)) self.logger.debug("search for player_re") for url in self.player_re.findall(res.text): self.logger.debug("Found url: {0}".format(url)) if "adblock" not in url: yield "live", HLSStream(self.session, url)
def _get_streams(self): # Get the stream type from the url (tv/radio). stream_type = _url_re.match(self.url).group(1).upper() cookie = {"NRK_PLAYER_SETTINGS_{0}".format(stream_type): COOKIE_PARAMS} # Construct API URL for this program. baseurl = http.get(self.url, cookies=cookie, schema=_schema) program_id = _id_re.search(self.url).group(1) # Extract media URL. json_url = urljoin(baseurl, "mediaelement/{0}".format(program_id)) res = http.get(json_url, cookies=cookie) media_element = http.json(res, schema=_mediaelement_schema) media_url = media_element["mediaUrl"] return HLSStream.parse_variant_playlist(self.session, media_url)
def _get_streams(self): page = http.get(self.url, schema=_schema) if not page: return pubkey_pem = get_public_key(self.cache, urljoin(self.url, page["clientlibs"])) if not pubkey_pem: raise PluginError("Unable to get public key") flashvars = page["flashvars"] params = { "cashPath": int(time.time() * 1000) } res = http.get(urljoin(self.url, flashvars["country"]), params=params) if not res: return language = http.xml(res, schema=_language_schema) api_params = {} for key in ("ss_id", "mv_id", "device_cd", "ss1_prm", "ss2_prm", "ss3_prm"): if flashvars.get(key, ""): api_params[key] = flashvars[key] aeskey = crypto_number.long_to_bytes(random.getrandbits(8 * 32), 32) params = { "s": flashvars["s"], "c": language, "e": self.url, "d": aes_encrypt(aeskey, json.dumps(api_params)), "a": rsa_encrypt(pubkey_pem, aeskey) } res = http.get(urljoin(self.url, flashvars["init"]), params=params) if not res: return rtn = http.json(res, schema=_init_schema) if not rtn: return init_data = parse_json(aes_decrypt(aeskey, rtn)) parsed = urlparse(init_data["play_url"]) if parsed.scheme != "https" or not parsed.path.startswith("/i/") or not parsed.path.endswith("/master.m3u8"): return hlsstream_url = init_data["play_url"] streams = HLSStream.parse_variant_playlist(self.session, hlsstream_url) if "caption_url" in init_data: if self.get_option("mux_subtitles") and FFMPEGMuxer.is_usable(self.session): res = http.get(init_data["caption_url"]) srt = http.xml(res, ignore_ns=True, schema=_xml_to_srt_schema) subfiles = [] metadata = {} for i, lang, srt in ((i, s[0], s[1]) for i, s in enumerate(srt)): subfile = tempfile.TemporaryFile() subfile.write(srt.encode("utf8")) subfile.seek(0) subfiles.append(FileStream(self.session, fileobj=subfile)) metadata["s:s:{0}".format(i)] = ["language={0}".format(lang)] for n, s in streams.items(): yield n, MuxedStream(self.session, s, *subfiles, maps=list(range(0, len(metadata) + 1)), metadata=metadata) return else: self.logger.info("Subtitles: {0}".format(init_data["caption_url"])) for s in streams.items(): yield s
def host(self): host = self._host or self.API_URL.format(randint( 0, 0xffffff), self.media_id, self.application, "lp-" + self._cluster) return urljoin(host, "/1/ustream")
def _make_url_list(self, old_list, base_url, url_type="", stream_base=""): """Creates a list of validate urls from a list of broken urls and removes every blacklisted url Args: old_list: List of broken urls base_url: url that will get used for scheme and netloc url_type: can be iframe or playlist - iframe is used for --resolve-whitelist-netloc - playlist is not used at the moment stream_base: basically same as base_url, but used for .f4m files. Returns: List of validate urls """ blacklist_netloc_user = self.get_option("blacklist_netloc") blacklist_netloc = ( "127.0.0.1", "about:blank", "abv.bg", "adfox.ru", "googletagmanager.com", "javascript:false", ) whitelist_netloc_user = self.get_option("whitelist_netloc") blacklist_path = [ ("expressen.se", "/_livetvpreview/"), ("facebook.com", "/plugins"), ("vesti.ru", "/native_widget.html"), ] # Add --resolve-blacklist-path to blacklist_path blacklist_path_user = self.get_option("blacklist_path") if blacklist_path_user is not None: blacklist_path = self.merge_path_list(blacklist_path, blacklist_path_user) whitelist_path = [] whitelist_path_user = self.get_option("whitelist_path") if whitelist_path_user is not None: whitelist_path = self.merge_path_list(whitelist_path, whitelist_path_user) blacklist_endswith = ( ".gif", ".jpg", ".png", ".svg", ".vtt", "/chat.html", "/chat", ) new_list = [] for url in old_list: # Don't add the same url as self.url to the list. if url == self.url: continue # Repair the scheme new_url = url.replace("\\", "") if new_url.startswith("http://"): new_url = "http:" + new_url[9:] elif new_url.startswith("https://"): new_url = "https:" + new_url[10:] # Repair the domain if stream_base and new_url[1] is not "/": if new_url[0] is "/": new_url = new_url[1:] new_url = urljoin(stream_base, new_url) else: new_url = urljoin(base_url, new_url) # Parse the url and remove not wanted urls parse_new_url = urlparse(new_url) REMOVE = False # sorted after the way livecli will try to remove an url status_remove = [ "WL-netloc", # - Allow only whitelisted domains --resolve-whitelist-netloc "WL-path", # - Allow only whitelisted paths from a domain --resolve-whitelist-path "BL-static", # - Removes blacklisted domains "BL-netloc", # - Removes blacklisted domains --resolve-blacklist-netloc "BL-path", # - Removes blacklisted paths from a domain --resolve-blacklist-path "BL-ew", # - Removes images and chatrooms "ADS", # - Remove obviously ad urls ] if REMOVE is False: count = 0 for url_status in ((url_type == "iframe" and whitelist_netloc_user is not None and parse_new_url.netloc.endswith(tuple(whitelist_netloc_user)) is False), (url_type == "iframe" and whitelist_path_user is not None and self.compare_url_path(parse_new_url, whitelist_path) is False), (parse_new_url.netloc.endswith(blacklist_netloc)), (blacklist_netloc_user is not None and parse_new_url.netloc.endswith(tuple(blacklist_netloc_user))), (self.compare_url_path(parse_new_url, blacklist_path) is True), (parse_new_url.path.endswith(blacklist_endswith)), (self._ads_path.match(parse_new_url.path))): count += 1 if url_status: REMOVE = True break if REMOVE is True: self.logger.debug("{0} - Removed url: {1}".format(status_remove[count - 1], new_url)) continue # Add url to the list new_list += [new_url] # Remove duplicates new_list = list(set(new_list)) return new_list
def absolute_url(baseurl, url): if not url.startswith("http"): return urljoin(baseurl, url) else: return url
def _make_url_list(self, old_list, base_url, stream_base=""): """Creates a list of validate urls from a list of broken urls and removes every blacklisted url Args: old_list: List of broken urls base_url: url that will get used for scheme and netloc stream_base: basically same as base_url, but used for .f4m files. Returns: List of validate urls """ blacklist_netloc_user = self.get_option("blacklist_netloc") blacklist_netloc = ( "about:blank", "adfox.ru", "googletagmanager.com", "javascript:false", ) blacklist_path = [ ("facebook.com", "/plugins"), ("vesti.ru", "/native_widget.html"), ] # Add --resolve-blacklist-path to blacklist_path blacklist_path_user = self.get_option("blacklist_path") if blacklist_path_user is not None: for _path_url in blacklist_path_user: if not _path_url.startswith(("http", "//")): _path_url = update_scheme("http://", _path_url) _parsed_path_url = urlparse(_path_url) if _parsed_path_url.netloc and _parsed_path_url.path: blacklist_path += [(_parsed_path_url.netloc, _parsed_path_url.path)] new_list = [] for url in old_list: # Don't add the same url as self.url to the list. if url == self.url: continue # Repair the scheme new_url = url.replace("\\", "") if new_url.startswith("http://"): new_url = "http:" + new_url[9:] elif new_url.startswith("https://"): new_url = "https:" + new_url[10:] # Repair the domain if stream_base and new_url[1] is not "/": if new_url[0] is "/": new_url = new_url[1:] new_url = urljoin(stream_base, new_url) else: new_url = urljoin(base_url, new_url) # Parse the url and remove not wanted urls parse_new_url = urlparse(new_url) REMOVE = False # Removes blacklisted domains if REMOVE is False and parse_new_url.netloc.endswith( blacklist_netloc): REMOVE = True # Removes blacklisted domains from --resolve-blacklist-netloc if REMOVE is False and blacklist_netloc_user is not None and parse_new_url.netloc.endswith( tuple(blacklist_netloc_user)): REMOVE = True # Removes blacklisted paths from a domain if REMOVE is False: for netloc, path in blacklist_path: if parse_new_url.netloc.endswith( netloc) and parse_new_url.path.startswith(path): REMOVE = True continue # Removes images and chatrooms if REMOVE is False and parse_new_url.path.endswith( (".jpg", ".png", ".svg", "/chat")): REMOVE = True # Remove obviously ad urls if REMOVE is False and self._ads_path.match(parse_new_url.path): REMOVE = True if REMOVE is True: self.logger.debug("Removed url: {0}".format(new_url)) continue # Add url to the list new_list += [new_url] # Remove duplicates new_list = list(set(new_list)) return new_list