def _get_streams(self): self.id, self.title = self.session.http.get( self.url, schema=validate.Schema( validate.parse_html(), validate.union(( validate.xml_xpath_string( ".//script[@class='dacast-video'][@id]/@id"), validate.xml_xpath_string(".//head/title[1]/text()"), )))) if not self.id: return if re.match(r"\w+_\w+_\w+", self.id): provider = "dacast" else: provider = "universe" data = self.session.http.get( f"https://playback.dacast.com/content/access?contentId={self.id}&provider={provider}", acceptable_status=(200, 400, 403, 404), schema=validate.Schema( validate.parse_json(), validate.any( {"error": str}, {"hls": validate.url()}, ))) if data.get("error"): log.error(data["error"]) return return HLSStream.parse_variant_playlist(self.session, data["hls"])
def __init__(self, url: str): super().__init__(url) self._json_data_re = re.compile(r'teliaPlayer\((\{.*?\})\);', re.DOTALL) self.main_page_schema = validate.Schema( validate.parse_html(), validate.xml_xpath_string( ".//iframe[contains(@src, 'ltv.lsm.lv/embed')][1]/@src"), validate.url()) self.embed_code_schema = validate.Schema( validate.parse_html(), validate.xml_xpath_string(".//live[1]/@*[name()=':embed-data']"), str, validate.parse_json(), {"source": { "embed_code": str }}, validate.get(("source", "embed_code")), validate.parse_html(), validate.xml_xpath_string(".//iframe[@src][1]/@src"), ) self.player_apicall_schema = validate.Schema( validate.transform(self._json_data_re.search), validate.any( None, validate.all( validate.get(1), validate.transform(lambda s: s.replace("'", '"')), validate.transform( lambda s: re.sub(r",\s*\}", "}", s, flags=re.DOTALL)), validate.parse_json(), {"channel": str}, validate.get("channel")))) self.sources_schema = validate.Schema( validate.parse_json(), { "source": { "sources": validate.all([{ "type": str, "src": validate.url() }], validate.filter(lambda src: src["type"] == "application/x-mpegURL"), validate.map(lambda src: src.get("src"))), } }, validate.get(("source", "sources")))
def _get_token_req_url(self): token_req_host_re = re.compile(r"""jQuery\.get\s*\(['"]([^'"]+)['"]""") schema = validate.Schema( validate.xml_xpath_string( ".//script[contains(text(), 'LIVE_URL')]/text()"), validate.any( None, validate.all( validate.transform(token_req_host_re.search), validate.any( None, validate.all( validate.get(1), validate.url(), )), )), ) token_req_host = validate.validate(schema, self.page) log.debug("token_req_host={0}".format(token_req_host)) token_req_str_re = re.compile( r"""Math\.floor\(Date\.now\(\)\s*/\s*3600000\),\s*['"]([^'"]+)['"]""" ) schema = validate.Schema( validate.xml_xpath_string( ".//script[contains(text(), 'LIVE_URL')]/text()"), validate.any( None, validate.all( validate.transform(token_req_str_re.search), validate.any( None, validate.all( validate.get(1), validate.text, )), )), ) token_req_str = validate.validate(schema, self.page) log.debug("token_req_str={0}".format(token_req_str)) if not token_req_str: return date = int(time.time() // 3600) token_req_token = self.transform_token(token_req_str, date) or self.transform_token( token_req_str, date - 1) if token_req_host and token_req_token: return update_qsd(token_req_host, {"rsk": token_req_token})
def _get_streams(self): root = self.session.http.get(self.url, schema=validate.Schema( validate.parse_html())) for needle, errormsg in ( ( "This service is not available in your Country", "The content is not available in your region", ), ( "Silahkan login Menggunakan akun MyIndihome dan berlangganan minipack", "The content is not available without a subscription", ), ): if validate.Schema( validate.xml_xpath( """.//script[contains(text(), '"{0}"')]""".format( needle))).validate(root): log.error(errormsg) return url = validate.Schema( validate.any( validate.all( validate.xml_xpath_string(""" .//script[contains(text(), 'laylist.m3u8') or contains(text(), 'manifest.mpd')][1]/text() """), validate.text, validate.transform( re.compile( r"""(?P<q>['"])(?P<url>https://.*?/(?:[Pp]laylist\.m3u8|manifest\.mpd).+?)(?P=q)""" ).search), validate.any( None, validate.all(validate.get("url"), validate.url())), ), validate.all( validate.xml_xpath_string( ".//video[@id='video-player']/source/@src"), validate.any(None, validate.url()), ), )).validate(root) if url and ".m3u8" in url: return HLSStream.parse_variant_playlist(self.session, url) elif url and ".mpd" in url: return DASHStream.parse_manifest(self.session, url)
def _get_streams(self): try: hls = self.session.http.get(self.url, schema=validate.Schema( validate.parse_html(), validate.xml_xpath_string(".//script[@type='application/json'][@id='__NEXT_DATA__']/text()"), str, validate.parse_json(), { "props": { "pageProps": { "type": "live", "url": validate.all( str, validate.transform(lambda url: url.replace("https:////", "https://")), validate.url(path=validate.endswith(".m3u8")), ) } } }, validate.get(("props", "pageProps", "url")), )) except PluginError: return return HLSStream.parse_variant_playlist(self.session, hls)
def _get_streams(self): hls_url, self.title = self.session.http.get( self.url, schema=validate.Schema( validate.parse_html(), validate.union(( validate.xml_xpath_string( ".//video/source[@src][@type='application/x-mpegURL'][1]/@src" ), validate.xml_xpath_string(".//head/title[1]/text()"), )))) if not hls_url: return return HLSStream.parse_variant_playlist(self.session, hls_url, headers={"Referer": self.url})
def _is_token_based_site(self): schema = validate.Schema( validate.xml_xpath_string(".//script[contains(text(), 'jQuery.get')]/text()"), ) is_token_based_site = validate.validate(schema, self.page) is not None log.debug(f"is_token_based_site={is_token_based_site}") return is_token_based_site
def _get_vod(self, root): schema_vod = validate.Schema( validate.xml_xpath_string( ".//script[@type='application/ld+json'][contains(text(),'VideoObject')][1]/text()" ), str, validate.transform( lambda jsonlike: re.sub(r"[\r\n]+", "", jsonlike)), validate.parse_json(), validate.any( validate.all( {"@graph": [dict]}, validate.get("@graph"), validate.filter(lambda obj: obj["@type"] == "VideoObject"), validate.get(0)), dict), {"contentUrl": validate.url()}, validate.get("contentUrl"), validate.transform( lambda content_url: update_scheme("https://", content_url))) try: vod = schema_vod.validate(root) except PluginError: return if urlparse(vod).path.endswith(".m3u8"): return HLSStream.parse_variant_playlist(self.session, vod) return {"vod": HTTPStream(self.session, vod)}
def login_csrf(self): return self.session.http.get( self.login_url, schema=validate.Schema( validate.parse_html(), validate.xml_xpath_string( ".//input[@name='{0}'][1]/@value".format(self.CSRF_NAME))))
def get_script_str(self, root, search_string, custom_pattern=None, custom_schema=None): if custom_pattern: pattern = custom_pattern else: pattern = fr"{search_string}\s*=\s*'([^']+)';" _schema = validate.Schema( validate.xml_xpath_string( f".//script[@type='text/javascript'][contains(text(),'{search_string}')]/text()" ), validate.any( None, validate.all( validate.transform(re.compile(pattern).search), validate.any(None, validate.all(validate.get(1), str))))) _string = validate.validate(_schema, root) if not _string: log.debug(f"Failed to find {search_string}") if custom_schema: try: _string = validate.validate(custom_schema, _string) except ValueError: pass return _string
def _get_streams(self): self.title, hls_url = self.session.http.get( self.url, schema=validate.Schema( validate.parse_html(), validate.xml_xpath_string(".//script[contains(text(), 'HLS')]/text()"), validate.any(None, validate.all( validate.transform(self._re_content.search), validate.any(None, validate.all( validate.get(1), validate.parse_json(), {str: {"children": {"top": {"model": {"videos": [{ "title": str, "sources": validate.all( [{"url": str, "type": str}], validate.filter(lambda p: p["type"].lower() == "hls"), validate.get((0, "url"))) }]}}}}}, validate.transform(lambda k: next(iter(k.values()))), validate.get(("children", "top", "model", "videos", 0)), validate.union_get("title", "sources") )) )) ) ) return HLSStream.parse_variant_playlist(self.session, urljoin(self.url, hls_url))
def follow_vk_redirect(self): if self._has_video_id(): return try: parsed_url = urlparse(self.url) true_path = next( unquote(v).split("/")[0] for k, v in parse_qsl(parsed_url.query) if k == "z" and len(v) > 0) self.url = f"{parsed_url.scheme}://{parsed_url.netloc}/{true_path}" if self._has_video_id(): return except StopIteration: pass try: self.url = self.session.http.get( self.url, schema=validate.Schema( validate.parse_html(), validate.xml_xpath_string( ".//head/meta[@property='og:url'][@content]/@content"), str)) except PluginError: pass if self._has_video_id(): return raise NoStreamsError(self.url)
def _get_live_streams(self): video_id = self.session.http.get( self.url, schema=validate.Schema( validate.parse_html(), validate.xml_xpath_string( ".//div[@data-google-src]/@data-video-id"))) if video_id: return self.session.streams( f"https://www.youtube.com/watch?v={video_id}") info_url = self.session.http.get( self.API_URL.format(subdomain=self.match.group("subdomain")), schema=validate.Schema( validate.parse_json(), {"url": validate.url()}, validate.get("url"), validate.transform( lambda url: update_scheme("https://", url)))) hls_url = self.session.http.get(info_url, schema=validate.Schema( validate.parse_json(), { "status": "ok", "protocol": "hls", "primary": validate.url() }, validate.get("primary"))) return HLSStream.parse_variant_playlist(self.session, hls_url)
def _get_streams_delfi(self, src): try: data = self.session.http.get(src, schema=validate.Schema( validate.parse_html(), validate.xml_xpath_string(".//script[contains(text(),'embedJs.setAttribute(')][1]/text()"), validate.any(None, validate.all( validate.text, validate.transform(re.compile(r"embedJs\.setAttribute\('src',\s*'(.+?)'").search), validate.any(None, validate.all( validate.get(1), validate.transform(lambda url: parse_qsd(urlparse(url).fragment)), {"stream": validate.text}, validate.get("stream"), validate.parse_json(), {"versions": [{ "hls": validate.text }]}, validate.get("versions") )) )) )) except PluginError: log.error("Failed to get streams from iframe") return for stream in data: src = update_scheme("https://", stream["hls"], force=False) for s in HLSStream.parse_variant_playlist(self.session, src).items(): yield s
def test_failure_schema(self): with pytest.raises(validate.ValidationError) as cm: validate.validate(validate.xml_xpath_string("."), "not-an-element") assert_validationerror( cm.value, """ ValidationError(Callable): iselement('not-an-element') is not true """)
def _schema_canonical(self, data): schema_canonical = validate.Schema( validate.parse_html(), validate.xml_xpath_string(".//link[@rel='canonical'][1]/@href"), validate.transform(self.matcher.match), validate.get("video_id") ) return schema_canonical.validate(data)
def _streams_audio(self, root): schema_audio = validate.Schema(validate.any( validate.all( validate.xml_xpath_string(".//audio/source[contains(@src,'.mp3')][1]/@src"), str ), validate.all( validate.xml_xpath_string(".//div[contains(@class,'audio-player')][@data-media-url][1]/@data-media-url"), str ) )) try: audio_url = schema_audio.validate(root) except PluginError: return return {"audio": HTTPStream(self.session, audio_url)}
def get_live(self, username): netloc = self.session.http.get(self.url, schema=validate.Schema( validate.parse_html(), validate.xml_xpath_string(".//script[contains(@src,'/stream/player.js')][1]/@src"), validate.any(None, validate.transform(lambda src: urlparse(src).netloc)) )) if not netloc: log.error("Could not find server netloc") return channel, multistreams = self.session.http.get(self.API_URL_LIVE.format(username=username), schema=validate.Schema( validate.parse_json(), { "channel": validate.any(None, { "stream_name": str, "title": str, "online": bool, "private": bool, "categories": [{"label": str}], }), "getMultiStreams": validate.any(None, { "multistream": bool, "streams": [{ "name": str, "online": bool, }], }), }, validate.union_get("channel", "getMultiStreams") )) if not channel or not multistreams: log.debug("Missing channel or streaming data") return log.trace(f"netloc={netloc!r}") log.trace(f"channel={channel!r}") log.trace(f"multistreams={multistreams!r}") if not channel["online"]: log.error("User is not online") return if channel["private"]: log.info("This is a private stream") return self.author = username self.category = channel["categories"][0]["label"] self.title = channel["title"] hls_url = self.HLS_URL.format( netloc=netloc, file_name=channel["stream_name"] ) return HLSStream.parse_variant_playlist(self.session, hls_url)
def _get_streams(self): re_room_id = re.compile( r"share_url:\"https:[^?]+?\?room_id=(?P<room_id>\d+)\"") room_id = self.session.http.get( self.url, schema=validate.Schema( validate.parse_html(), validate.xml_xpath_string( ".//script[contains(text(),'share_url:\"https:')][1]/text()" ), validate.any( None, validate.all(validate.transform(re_room_id.search), validate.any(None, validate.get("room_id")))))) if not room_id: return live_status, self.title = self.session.http.get( "https://www.showroom-live.com/api/live/live_info", params={"room_id": room_id}, schema=validate.Schema( validate.parse_json(), { "live_status": int, "room_name": str, }, validate.union_get( "live_status", "room_name", ))) if live_status != self.LIVE_STATUS: log.info("This stream is currently offline") return url = self.session.http.get( "https://www.showroom-live.com/api/live/streaming_url", params={ "room_id": room_id, "abr_available": 1, }, schema=validate.Schema( validate.parse_json(), { "streaming_url_list": [{ "type": str, "url": validate.url(), }] }, validate.get("streaming_url_list"), validate.filter(lambda p: p["type"] == "hls_all"), validate.get((0, "url"))), ) res = self.session.http.get(url, acceptable_status=(200, 403, 404)) if res.headers["Content-Type"] != "application/x-mpegURL": log.error("This stream is restricted") return return HLSStream.parse_variant_playlist(self.session, url)
def _get_streams(self): self.session.http.headers.update( {"Referer": "https://tviplayer.iol.pt/"}) data = self.session.http.get( self.url, schema=validate.Schema( validate.parse_html(), validate.xml_xpath_string( ".//script[contains(text(),'.m3u8')]/text()"), validate.text, validate.transform(self._re_jsonData.search), validate.any( None, validate.all( validate.get("json"), validate.parse_json(), { "id": validate.text, "liveType": validate.text, "videoType": validate.text, "videoUrl": validate.url(path=validate.endswith(".m3u8")), validate.optional("channel"): validate.text, })))) if not data: return log.debug("{0!r}".format(data)) if data["liveType"].upper() == "DIRETO" and data["videoType"].upper( ) == "LIVE": geo_path = "live" else: geo_path = "vod" data_geo = self.session.http.get( "https://services.iol.pt/direitos/rights/{0}?id={1}".format( geo_path, data['id']), acceptable_status=(200, 403), schema=validate.Schema( validate.parse_json(), { "code": validate.text, "error": validate.any(None, validate.text), "detail": validate.text, })) log.debug("{0!r}".format(data_geo)) if data_geo["detail"] != "ok": log.error("{0}".format(data_geo['detail'])) return wmsAuthSign = self.session.http.get( "https://services.iol.pt/matrix?userId=", schema=validate.Schema(validate.text)) hls_url = update_qsd(data["videoUrl"], {"wmsAuthSign": wmsAuthSign}) return HLSStream.parse_variant_playlist(self.session, hls_url)
def _find_steamid(self, url): return self.session.http.get( url, schema=validate.Schema( validate.parse_html(), validate.xml_xpath_string( ".//div[@id='webui_config']/@data-broadcast"), validate.any( None, validate.all(str, validate.parse_json(), {"steamid": str}, validate.get("steamid")))))
def _get_streams(self): hls_url = self.session.http.get( self.url, schema=validate.Schema( validate.parse_html(), validate.xml_xpath_string( ".//video/source[@src][@type='application/x-mpegURL'][1]/@src" ))) if not hls_url: return return HLSStream.parse_variant_playlist(self.session, hls_url)
def get_hls_url(self): self.session.http.cookies.clear() url_parts = self.session.http.get( url=self.url, schema=validate.Schema( validate.parse_html(), validate.xml_xpath_string(".//iframe[contains(@src,'embed')]/@src"))) if not url_parts: raise NoStreamsError("Missing url_parts") log.trace(f"url_parts={url_parts}") self.session.http.headers.update({"Referer": self.url}) try: url_ovva = self.session.http.get( url=urljoin(self.url, url_parts), schema=validate.Schema( validate.parse_html(), validate.xml_xpath_string(".//script[@type='text/javascript'][contains(text(),'ovva-player')]/text()"), str, validate.transform(self._re_data.search), validate.get(1), validate.transform(lambda x: b64decode(x).decode()), validate.parse_json(), {"balancer": validate.url()}, validate.get("balancer") )) except (PluginError, TypeError) as err: log.error(f"ovva-player: {err}") return log.debug(f"url_ovva={url_ovva}") url_hls = self.session.http.get( url=url_ovva, schema=validate.Schema( validate.transform(lambda x: x.split("=")), ["302", validate.url(path=validate.endswith(".m3u8"))], validate.get(1))) return url_hls
def _streams_dailymotion(self, root): schema_dailymotion = validate.Schema( validate.xml_xpath_string(".//iframe[contains(@src,'dailymotion.com/')][1]/@src"), str, validate.transform(lambda src: src.split("/")[-1]) ) try: video_id = schema_dailymotion.validate(root) except PluginError: return log.debug(f"Found dailymotion video ID: {video_id}") return self.session.streams(f"https://www.dailymotion.com/embed/video/{video_id}")
def _get_streams(self): data = self.session.http.get( self.url, schema=validate.Schema( validate.parse_html(), validate.xml_xpath_string( ".//script[@id='js-live-data'][@data-json]/@data-json"), validate.any( None, validate.all( validate.parse_json(), { "is_live": int, "room_id": int, validate.optional("room"): { "content_region_permission": int, "is_free": int } }, )))) if not data: # URL without livestream return log.debug(f"{data!r}") if data["is_live"] != 1: log.info("This stream is currently offline") return url = self.session.http.get( "https://www.showroom-live.com/api/live/streaming_url", params={ "room_id": data["room_id"], "abr_available": 1 }, schema=validate.Schema( validate.parse_json(), { "streaming_url_list": [{ "type": str, "url": validate.url(), }] }, validate.get("streaming_url_list"), validate.filter(lambda p: p["type"] == "hls_all"), validate.get((0, "url"))), ) res = self.session.http.get(url, acceptable_status=(200, 403, 404)) if res.headers["Content-Type"] != "application/x-mpegURL": log.error("This stream is restricted") return return ShowroomHLSStream.parse_variant_playlist(self.session, url)
def _get_live_url(self): live_url_re = re.compile(r"""LIVE_URL\s*=\s*['"]([^'"]+)['"]""") schema = validate.Schema( validate.xml_xpath_string(".//script[contains(text(), 'LIVE_URL')]/text()"), validate.any(None, validate.all( validate.transform(live_url_re.search), validate.any(None, validate.all( validate.get(1), validate.url(), )), )), ) live_url = validate.validate(schema, self.page) log.debug(f"live_url={live_url}") return live_url
def _parse_streams(self, res): stream_url = validate.Schema( validate.parse_html(), validate.xml_xpath_string( ".//head/meta[@property='og:video:url'][@content][1]/@content") ).validate(res.text) if not stream_url: log.debug("No meta og:video:url") else: if ".mpd" in stream_url: for s in DASHStream.parse_manifest(self.session, stream_url).items(): yield s return elif ".mp4" in stream_url: yield "vod", HTTPStream(self.session, stream_url) return for match in self._src_re.finditer(res.text): stream_url = match.group("url") if "\\/" in stream_url: # if the URL is json encoded, decode it stream_url = parse_json("\"{}\"".format(stream_url)) if ".mpd" in stream_url: for s in DASHStream.parse_manifest(self.session, stream_url).items(): yield s elif ".mp4" in stream_url: yield match.group(1), HTTPStream(self.session, stream_url) else: log.debug("Non-dash/mp4 stream: {0}".format(stream_url)) match = self._dash_manifest_re.search(res.text) if match: # facebook replaces "<" characters with the substring "\\x3C" manifest = match.group("manifest").replace("\\/", "/") if is_py3: manifest = bytes(unquote_plus(manifest), "utf-8").decode("unicode_escape") else: manifest = unquote_plus(manifest).decode("string_escape") # Ignore unsupported manifests until DASH SegmentBase support is implemented if "SegmentBase" in manifest: log.error("Skipped DASH manifest with SegmentBase streams") else: for s in DASHStream.parse_manifest(self.session, manifest).items(): yield s
def _get_streams(self): hls_url = self.session.http.get( self.url, schema=validate.Schema( validate.parse_html(), validate.xml_xpath_string( ".//source[contains(@src,'.m3u8')]/@src"), )) if not hls_url: return res = self.session.http.get(hls_url, acceptable_status=(200, 403, 404)) if res.status_code != 200 or len(res.text) <= 10: log.error("This stream is currently offline") return return {"live": HLSStream(self.session, hls_url)}
def _get_media_app(self): video_id = self.match.group("video_id") if video_id: return video_id, "recorded" channel_id = self.match.group("channel_id") if not channel_id: channel_id = self.session.http.get( self.url, headers={"User-Agent": useragents.CHROME}, schema=validate.Schema( validate.parse_html(), validate.xml_xpath_string( ".//meta[@name='ustream:channel_id'][@content][1]/@content" ))) return channel_id, "channel"
def _get_hls(self, root): schema_live = validate.Schema( validate.xml_xpath_string( ".//*[contains(@data-broadcast,'m3u8')]/@data-broadcast"), str, validate.parse_json(), validate.any(validate.all({"files": list}, validate.get("files")), list), [{ "url": validate.url(path=validate.endswith(".m3u8")) }], validate.get((0, "url")), validate.transform( lambda content_url: update_scheme("https://", content_url))) try: live = schema_live.validate(root) except PluginError: return return HLSStream.parse_variant_playlist(self.session, live)