def test_parse_html(self): assert validate( parse_html(), '<!DOCTYPE html><body>"perfectly"<a>valid<div>HTML' ).tag == "html" with self.assertRaises(ValueError) as cm: validate(parse_html(), None) assert str(cm.exception ) == "Unable to parse HTML: can only parse strings (None)"
def niconico_web_login(self): user_session = self.get_option("user-session") email = self.get_option("email") password = self.get_option("password") if user_session is not None: log.info("Logging in via provided user session cookie") self.session.http.cookies.set( "user_session", user_session, path="/", domain="nicovideo.jp" ) self.save_cookies() elif self.session.http.cookies.get("user_session"): log.info("Logging in via cached user session cookie") elif email is not None and password is not None: log.info("Logging in via provided email and password") root = self.session.http.post( self.LOGIN_URL, data={"mail_tel": email, "password": password}, params=self.LOGIN_URL_PARAMS, schema=validate.Schema(validate.parse_html())) input_with_value = {} for elem in root.xpath(".//input"): if elem.attrib.get("value"): input_with_value[elem.attrib.get("name")] = elem.attrib.get("value") else: if elem.attrib.get("id") == "oneTimePw": maxlength = int(elem.attrib.get("maxlength")) try: oneTimePw = self.input_ask("Enter the 6 digit number included in email") if len(oneTimePw) > maxlength: log.error("invalid user input") return except FatalPluginError: return input_with_value[elem.attrib.get("name")] = oneTimePw else: log.debug(f"unknown input: {elem.attrib.get('name')}") root = self.session.http.post( urljoin("https://account.nicovideo.jp", root.xpath("string(.//form[@action]/@action)")), data=input_with_value, schema=validate.Schema(validate.parse_html())) log.debug(f"Cookies: {self.session.http.cookies.get_dict()}") if self.session.http.cookies.get("user_session") is None: error = root.xpath("string(//div[@class='formError']/div/text())") log.warning(f"Login failed: {error or 'unknown reason'}") else: log.info("Logged in.") self.save_cookies()
def __init__(self, url: str): super().__init__(url) self._json_data_re = re.compile(r'teliaPlayer\((\{.*?\})\);', re.DOTALL) self.main_page_schema = validate.Schema( validate.parse_html(), validate.xml_xpath_string( ".//iframe[contains(@src, 'ltv.lsm.lv/embed')][1]/@src"), validate.url()) self.embed_code_schema = validate.Schema( validate.parse_html(), validate.xml_xpath_string(".//live[1]/@*[name()=':embed-data']"), str, validate.parse_json(), {"source": { "embed_code": str }}, validate.get(("source", "embed_code")), validate.parse_html(), validate.xml_xpath_string(".//iframe[@src][1]/@src"), ) self.player_apicall_schema = validate.Schema( validate.transform(self._json_data_re.search), validate.any( None, validate.all( validate.get(1), validate.transform(lambda s: s.replace("'", '"')), validate.transform( lambda s: re.sub(r",\s*\}", "}", s, flags=re.DOTALL)), validate.parse_json(), {"channel": str}, validate.get("channel")))) self.sources_schema = validate.Schema( validate.parse_json(), { "source": { "sources": validate.all([{ "type": str, "src": validate.url() }], validate.filter(lambda src: src["type"] == "application/x-mpegURL"), validate.map(lambda src: src.get("src"))), } }, validate.get(("source", "sources")))
def _get_streams(self): self.id, self.title = self.session.http.get( self.url, schema=validate.Schema( validate.parse_html(), validate.union(( validate.xml_xpath_string( ".//script[@class='dacast-video'][@id]/@id"), validate.xml_xpath_string(".//head/title[1]/text()"), )))) if not self.id: return if re.match(r"\w+_\w+_\w+", self.id): provider = "dacast" else: provider = "universe" data = self.session.http.get( f"https://playback.dacast.com/content/access?contentId={self.id}&provider={provider}", acceptable_status=(200, 400, 403, 404), schema=validate.Schema( validate.parse_json(), validate.any( {"error": str}, {"hls": validate.url()}, ))) if data.get("error"): log.error(data["error"]) return return HLSStream.parse_variant_playlist(self.session, data["hls"])
def _get_streams(self): try: hls = self.session.http.get(self.url, schema=validate.Schema( validate.parse_html(), validate.xml_xpath_string(".//script[@type='application/json'][@id='__NEXT_DATA__']/text()"), str, validate.parse_json(), { "props": { "pageProps": { "type": "live", "url": validate.all( str, validate.transform(lambda url: url.replace("https:////", "https://")), validate.url(path=validate.endswith(".m3u8")), ) } } }, validate.get(("props", "pageProps", "url")), )) except PluginError: return return HLSStream.parse_variant_playlist(self.session, hls)
def _get_streams(self): root = self.session.http.get(self.url, schema=validate.Schema( validate.parse_html())) video_id = root.xpath( "string(.//div[@data-provider='dvideo'][@data-id][1]/@data-id)") if video_id: return self._get_streams_api(str(video_id)) yt_id = root.xpath( "string(.//script[contains(@src,'/yt.js')][@data-video]/@data-video)" ) if yt_id: return self.session.streams( f"https://www.youtube.com/watch?v={yt_id}") yt_iframe = root.xpath( "string(.//iframe[starts-with(@src,'https://www.youtube.com/')][1]/@src)" ) if yt_iframe: return self.session.streams(str(yt_iframe)) delfi = root.xpath( "string(.//iframe[@name='delfi-stream'][@src][1]/@src)") if delfi: return self._get_streams_delfi(str(delfi))
def follow_vk_redirect(self): if self._has_video_id(): return try: parsed_url = urlparse(self.url) true_path = next( unquote(v).split("/")[0] for k, v in parse_qsl(parsed_url.query) if k == "z" and len(v) > 0) self.url = f"{parsed_url.scheme}://{parsed_url.netloc}/{true_path}" if self._has_video_id(): return except StopIteration: pass try: self.url = self.session.http.get( self.url, schema=validate.Schema( validate.parse_html(), validate.xml_xpath_string( ".//head/meta[@property='og:url'][@content]/@content"), str)) except PluginError: pass if self._has_video_id(): return raise NoStreamsError(self.url)
def _get_live_streams(self): video_id = self.session.http.get( self.url, schema=validate.Schema( validate.parse_html(), validate.xml_xpath_string( ".//div[@data-google-src]/@data-video-id"))) if video_id: return self.session.streams( f"https://www.youtube.com/watch?v={video_id}") info_url = self.session.http.get( self.API_URL.format(subdomain=self.match.group("subdomain")), schema=validate.Schema( validate.parse_json(), {"url": validate.url()}, validate.get("url"), validate.transform( lambda url: update_scheme("https://", url)))) hls_url = self.session.http.get(info_url, schema=validate.Schema( validate.parse_json(), { "status": "ok", "protocol": "hls", "primary": validate.url() }, validate.get("primary"))) return HLSStream.parse_variant_playlist(self.session, hls_url)
def login_csrf(self): return self.session.http.get( self.login_url, schema=validate.Schema( validate.parse_html(), validate.xml_xpath_string( ".//input[@name='{0}'][1]/@value".format(self.CSRF_NAME))))
def _get_streams_delfi(self, src): try: data = self.session.http.get(src, schema=validate.Schema( validate.parse_html(), validate.xml_xpath_string(".//script[contains(text(),'embedJs.setAttribute(')][1]/text()"), validate.any(None, validate.all( validate.text, validate.transform(re.compile(r"embedJs\.setAttribute\('src',\s*'(.+?)'").search), validate.any(None, validate.all( validate.get(1), validate.transform(lambda url: parse_qsd(urlparse(url).fragment)), {"stream": validate.text}, validate.get("stream"), validate.parse_json(), {"versions": [{ "hls": validate.text }]}, validate.get("versions") )) )) )) except PluginError: log.error("Failed to get streams from iframe") return for stream in data: src = update_scheme("https://", stream["hls"], force=False) for s in HLSStream.parse_variant_playlist(self.session, src).items(): yield s
def get_wss_api_url(self): try: data = self.session.http.get( self.url, schema=validate.Schema( validate.parse_html(), validate.xml_find( ".//script[@id='embedded-data'][@data-props]"), validate.get("data-props"), validate.parse_json(), { "site": { "relive": { "webSocketUrl": validate.url(scheme="wss") }, validate.optional("frontendId"): int } }, validate.get("site"), validate.union_get(("relive", "webSocketUrl"), "frontendId"))) except PluginError: return wss_api_url, frontend_id = data if frontend_id is not None: wss_api_url = update_qsd(wss_api_url, {"frontend_id": frontend_id}) return wss_api_url
def _get_streams(self): self.title, hls_url = self.session.http.get( self.url, schema=validate.Schema( validate.parse_html(), validate.xml_xpath_string(".//script[contains(text(), 'HLS')]/text()"), validate.any(None, validate.all( validate.transform(self._re_content.search), validate.any(None, validate.all( validate.get(1), validate.parse_json(), {str: {"children": {"top": {"model": {"videos": [{ "title": str, "sources": validate.all( [{"url": str, "type": str}], validate.filter(lambda p: p["type"].lower() == "hls"), validate.get((0, "url"))) }]}}}}}, validate.transform(lambda k: next(iter(k.values()))), validate.get(("children", "top", "model", "videos", 0)), validate.union_get("title", "sources") )) )) ) ) return HLSStream.parse_variant_playlist(self.session, urljoin(self.url, hls_url))
def _get_streams(self): root = self.session.http.get(self.url, schema=validate.Schema( validate.parse_html())) # https://www.ntv.com.tr/canli-yayin/ntv?youtube=true yt_iframe = root.xpath( "string(.//iframe[contains(@src,'youtube.com')][1]/@src)") # https://www.startv.com.tr/canli-yayin dm_iframe = root.xpath( "string(.//iframe[contains(@src,'dailymotion.com')][1]/@src)") # https://www.kralmuzik.com.tr/tv/kral-tv # https://www.kralmuzik.com.tr/tv/kral-pop-tv yt_script = root.xpath( "string(.//script[contains(text(), 'youtube.init')][1]/text())") if yt_script: m = self._re_yt_script.search(yt_script) if m: yt_iframe = "https://www.youtube.com/watch?v={0}".format( m.group(1)) iframe = yt_iframe or dm_iframe if iframe: return self.session.streams(iframe) # http://eurostartv.com.tr/canli-izle dd_script = root.xpath( "string(.//script[contains(text(), '/live/hls/')][1]/text())") if dd_script: m = self._re_live_hls.search(dd_script) if m: return HLSStream.parse_variant_playlist( self.session, m.group(1))
def test_failure(self): with pytest.raises(validate.ValidationError) as cm: validate.validate(validate.parse_html(), None) assert_validationerror( cm.value, """ ValidationError: Unable to parse HTML: can only parse strings (None) """)
def _schema_canonical(self, data): schema_canonical = validate.Schema( validate.parse_html(), validate.xml_xpath_string(".//link[@rel='canonical'][1]/@href"), validate.transform(self.matcher.match), validate.get("video_id") ) return schema_canonical.validate(data)
def _get_streams(self): root = self.session.http.get(self.url, schema=validate.Schema( validate.parse_html())) return (self._streams_brightcove(root) or self._streams_dailymotion(root) or self._streams_brightcove_js(root) or self._streams_audio(root))
def _get_streams(self): re_room_id = re.compile( r"share_url:\"https:[^?]+?\?room_id=(?P<room_id>\d+)\"") room_id = self.session.http.get( self.url, schema=validate.Schema( validate.parse_html(), validate.xml_xpath_string( ".//script[contains(text(),'share_url:\"https:')][1]/text()" ), validate.any( None, validate.all(validate.transform(re_room_id.search), validate.any(None, validate.get("room_id")))))) if not room_id: return live_status, self.title = self.session.http.get( "https://www.showroom-live.com/api/live/live_info", params={"room_id": room_id}, schema=validate.Schema( validate.parse_json(), { "live_status": int, "room_name": str, }, validate.union_get( "live_status", "room_name", ))) if live_status != self.LIVE_STATUS: log.info("This stream is currently offline") return url = self.session.http.get( "https://www.showroom-live.com/api/live/streaming_url", params={ "room_id": room_id, "abr_available": 1, }, schema=validate.Schema( validate.parse_json(), { "streaming_url_list": [{ "type": str, "url": validate.url(), }] }, validate.get("streaming_url_list"), validate.filter(lambda p: p["type"] == "hls_all"), validate.get((0, "url"))), ) res = self.session.http.get(url, acceptable_status=(200, 403, 404)) if res.headers["Content-Type"] != "application/x-mpegURL": log.error("This stream is restricted") return return HLSStream.parse_variant_playlist(self.session, url)
def get_live(self, username): netloc = self.session.http.get(self.url, schema=validate.Schema( validate.parse_html(), validate.xml_xpath_string(".//script[contains(@src,'/stream/player.js')][1]/@src"), validate.any(None, validate.transform(lambda src: urlparse(src).netloc)) )) if not netloc: log.error("Could not find server netloc") return channel, multistreams = self.session.http.get(self.API_URL_LIVE.format(username=username), schema=validate.Schema( validate.parse_json(), { "channel": validate.any(None, { "stream_name": str, "title": str, "online": bool, "private": bool, "categories": [{"label": str}], }), "getMultiStreams": validate.any(None, { "multistream": bool, "streams": [{ "name": str, "online": bool, }], }), }, validate.union_get("channel", "getMultiStreams") )) if not channel or not multistreams: log.debug("Missing channel or streaming data") return log.trace(f"netloc={netloc!r}") log.trace(f"channel={channel!r}") log.trace(f"multistreams={multistreams!r}") if not channel["online"]: log.error("User is not online") return if channel["private"]: log.info("This is a private stream") return self.author = username self.category = channel["categories"][0]["label"] self.title = channel["title"] hls_url = self.HLS_URL.format( netloc=netloc, file_name=channel["stream_name"] ) return HLSStream.parse_variant_playlist(self.session, hls_url)
def _get_streams(self): self.session.http.headers.update( {"Referer": "https://tviplayer.iol.pt/"}) data = self.session.http.get( self.url, schema=validate.Schema( validate.parse_html(), validate.xml_xpath_string( ".//script[contains(text(),'.m3u8')]/text()"), validate.text, validate.transform(self._re_jsonData.search), validate.any( None, validate.all( validate.get("json"), validate.parse_json(), { "id": validate.text, "liveType": validate.text, "videoType": validate.text, "videoUrl": validate.url(path=validate.endswith(".m3u8")), validate.optional("channel"): validate.text, })))) if not data: return log.debug("{0!r}".format(data)) if data["liveType"].upper() == "DIRETO" and data["videoType"].upper( ) == "LIVE": geo_path = "live" else: geo_path = "vod" data_geo = self.session.http.get( "https://services.iol.pt/direitos/rights/{0}?id={1}".format( geo_path, data['id']), acceptable_status=(200, 403), schema=validate.Schema( validate.parse_json(), { "code": validate.text, "error": validate.any(None, validate.text), "detail": validate.text, })) log.debug("{0!r}".format(data_geo)) if data_geo["detail"] != "ok": log.error("{0}".format(data_geo['detail'])) return wmsAuthSign = self.session.http.get( "https://services.iol.pt/matrix?userId=", schema=validate.Schema(validate.text)) hls_url = update_qsd(data["videoUrl"], {"wmsAuthSign": wmsAuthSign}) return HLSStream.parse_variant_playlist(self.session, hls_url)
def _find_steamid(self, url): return self.session.http.get( url, schema=validate.Schema( validate.parse_html(), validate.xml_xpath_string( ".//div[@id='webui_config']/@data-broadcast"), validate.any( None, validate.all(str, validate.parse_json(), {"steamid": str}, validate.get("steamid")))))
def _get_streams(self): try: data_url = self.session.http.get( self.url, schema=validate.Schema( validate.parse_html(), validate.xml_find(".//*[@data-ctrl-player]"), validate.get("data-ctrl-player"), validate.transform(lambda s: s.replace("'", "\"")), validate.parse_json(), {"url": validate.text}, validate.get("url"))) except PluginError: return data_url = urljoin(self._URL_DATA_BASE, data_url) log.debug("Player URL: '{0}'", data_url) self.title, media = self.session.http.get( data_url, schema=validate.Schema( validate.parse_json(name="MEDIAINFO"), { "mc": { validate.optional("_title"): validate.text, "_mediaArray": [ validate.all( { "_mediaStreamArray": [ validate.all( { "_quality": validate.any( validate.text, int), "_stream": [validate.url()], }, validate.union_get( "_quality", ("_stream", 0))) ] }, validate.get("_mediaStreamArray"), validate.transform(dict)) ] } }, validate.get("mc"), validate.union_get("_title", ("_mediaArray", 0)))) if media.get("auto"): for s in HLSStream.parse_variant_playlist( self.session, media.get("auto")).items(): yield s else: for quality, stream in media.items(): yield self._QUALITY_MAP.get(quality, quality), HTTPStream( self.session, stream)
def _get_streams(self): hls_url = self.session.http.get( self.url, schema=validate.Schema( validate.parse_html(), validate.xml_xpath_string( ".//video/source[@src][@type='application/x-mpegURL'][1]/@src" ))) if not hls_url: return return HLSStream.parse_variant_playlist(self.session, hls_url)
def _get_streams(self): root = self.session.http.get(self.url, schema=validate.Schema( validate.parse_html())) player_type = root.xpath( "string(.//input[@type='hidden'][@name='player_type'][1]/@value)") if player_type == "dwlivestream": return self._get_live_streams(root) elif player_type == "video": return self._get_vod_streams(root) elif player_type == "audio": return self._get_audio_streams(root)
def get_hls_url(self): self.session.http.cookies.clear() url_parts = self.session.http.get( url=self.url, schema=validate.Schema( validate.parse_html(), validate.xml_xpath_string(".//iframe[contains(@src,'embed')]/@src"))) if not url_parts: raise NoStreamsError("Missing url_parts") log.trace(f"url_parts={url_parts}") self.session.http.headers.update({"Referer": self.url}) try: url_ovva = self.session.http.get( url=urljoin(self.url, url_parts), schema=validate.Schema( validate.parse_html(), validate.xml_xpath_string(".//script[@type='text/javascript'][contains(text(),'ovva-player')]/text()"), str, validate.transform(self._re_data.search), validate.get(1), validate.transform(lambda x: b64decode(x).decode()), validate.parse_json(), {"balancer": validate.url()}, validate.get("balancer") )) except (PluginError, TypeError) as err: log.error(f"ovva-player: {err}") return log.debug(f"url_ovva={url_ovva}") url_hls = self.session.http.get( url=url_ovva, schema=validate.Schema( validate.transform(lambda x: x.split("=")), ["302", validate.url(path=validate.endswith(".m3u8"))], validate.get(1))) return url_hls
def get_channels(self): data = self.session.http.get( self.url, schema=validate.Schema( validate.parse_html(), validate.xml_xpath( ".//*[contains(@class,'channel-list')]//a[@data-id][@data-code]" ), [ validate.union_get("data-id", "data-code"), ], )) return {k: v for k, v in data}
def _get_streams(self): data = self.session.http.get( self.url, schema=validate.Schema( validate.parse_html(), validate.xml_xpath_string( ".//script[@id='js-live-data'][@data-json]/@data-json"), validate.any( None, validate.all( validate.parse_json(), { "is_live": int, "room_id": int, validate.optional("room"): { "content_region_permission": int, "is_free": int } }, )))) if not data: # URL without livestream return log.debug(f"{data!r}") if data["is_live"] != 1: log.info("This stream is currently offline") return url = self.session.http.get( "https://www.showroom-live.com/api/live/streaming_url", params={ "room_id": data["room_id"], "abr_available": 1 }, schema=validate.Schema( validate.parse_json(), { "streaming_url_list": [{ "type": str, "url": validate.url(), }] }, validate.get("streaming_url_list"), validate.filter(lambda p: p["type"] == "hls_all"), validate.get((0, "url"))), ) res = self.session.http.get(url, acceptable_status=(200, 403, 404)) if res.headers["Content-Type"] != "application/x-mpegURL": log.error("This stream is restricted") return return ShowroomHLSStream.parse_variant_playlist(self.session, url)
def _parse_streams(self, res): stream_url = validate.Schema( validate.parse_html(), validate.xml_xpath_string( ".//head/meta[@property='og:video:url'][@content][1]/@content") ).validate(res.text) if not stream_url: log.debug("No meta og:video:url") else: if ".mpd" in stream_url: for s in DASHStream.parse_manifest(self.session, stream_url).items(): yield s return elif ".mp4" in stream_url: yield "vod", HTTPStream(self.session, stream_url) return for match in self._src_re.finditer(res.text): stream_url = match.group("url") if "\\/" in stream_url: # if the URL is json encoded, decode it stream_url = parse_json("\"{}\"".format(stream_url)) if ".mpd" in stream_url: for s in DASHStream.parse_manifest(self.session, stream_url).items(): yield s elif ".mp4" in stream_url: yield match.group(1), HTTPStream(self.session, stream_url) else: log.debug("Non-dash/mp4 stream: {0}".format(stream_url)) match = self._dash_manifest_re.search(res.text) if match: # facebook replaces "<" characters with the substring "\\x3C" manifest = match.group("manifest").replace("\\/", "/") if is_py3: manifest = bytes(unquote_plus(manifest), "utf-8").decode("unicode_escape") else: manifest = unquote_plus(manifest).decode("string_escape") # Ignore unsupported manifests until DASH SegmentBase support is implemented if "SegmentBase" in manifest: log.error("Skipped DASH manifest with SegmentBase streams") else: for s in DASHStream.parse_manifest(self.session, manifest).items(): yield s
def _get_streams(self): root = self.session.http.get(self.url, schema=validate.Schema( validate.parse_html())) for needle, errormsg in ( ( "This service is not available in your Country", "The content is not available in your region", ), ( "Silahkan login Menggunakan akun MyIndihome dan berlangganan minipack", "The content is not available without a subscription", ), ): if validate.Schema( validate.xml_xpath( """.//script[contains(text(), '"{0}"')]""".format( needle))).validate(root): log.error(errormsg) return url = validate.Schema( validate.any( validate.all( validate.xml_xpath_string(""" .//script[contains(text(), 'laylist.m3u8') or contains(text(), 'manifest.mpd')][1]/text() """), validate.text, validate.transform( re.compile( r"""(?P<q>['"])(?P<url>https://.*?/(?:[Pp]laylist\.m3u8|manifest\.mpd).+?)(?P=q)""" ).search), validate.any( None, validate.all(validate.get("url"), validate.url())), ), validate.all( validate.xml_xpath_string( ".//video[@id='video-player']/source/@src"), validate.any(None, validate.url()), ), )).validate(root) if url and ".m3u8" in url: return HLSStream.parse_variant_playlist(self.session, url) elif url and ".mpd" in url: return DASHStream.parse_manifest(self.session, url)
def _get_vod_stream(self): root = self.session.http.get(self.url, schema=validate.Schema( validate.parse_html() )) video_url = root.xpath("string(.//meta[@property='og:video'][1]/@content)") if video_url: return dict(vod=HTTPStream(self.session, video_url)) video_id = root.xpath("string(.//div[@data-google-src]/@data-video-id)") if video_id: return self.session.streams(f"https://www.youtube.com/watch?v={video_id}") video_url = root.xpath("string(.//iframe[@id='pfpPlayer'][starts-with(@src,'https://www.youtube.com/')][1]/@src)") if video_url: return self.session.streams(video_url)
def _get_streams(self): hls_url, self.title = self.session.http.get( self.url, schema=validate.Schema( validate.parse_html(), validate.union(( validate.xml_xpath_string( ".//video/source[@src][@type='application/x-mpegURL'][1]/@src" ), validate.xml_xpath_string(".//head/title[1]/text()"), )))) if not hls_url: return return HLSStream.parse_variant_playlist(self.session, hls_url, headers={"Referer": self.url})