def _get_streams(self): player_js = self.session.http.get( self.url, schema=validate.Schema( validate.transform(self._re_player.search), validate.any( None, validate.Schema( validate.get(1), validate.transform( lambda url: update_scheme("https:", url)))))) if not player_js: return log.debug(f"Found player js {player_js}") data = self.session.http.get( player_js, schema=validate.Schema( validate.transform(self._re_json.match), validate.get(1), validate.parse_json(), validate.get("mediaResource"), validate.get("dflt"), { validate.optional("audioURL"): validate.url(), validate.optional("videoURL"): validate.url() })) if data.get("videoURL"): yield from HLSStream.parse_variant_playlist( self.session, update_scheme("https:", data.get("videoURL"))).items() if data.get("audioURL"): yield "audio", HTTPStream( self.session, update_scheme("https:", data.get("audioURL")))
def test_update_scheme(): assert update_scheme("https://other.com/bar", "//example.com/foo" ) == "https://example.com/foo", "should become https" assert update_scheme( "http://other.com/bar", "//example.com/foo") == "http://example.com/foo", "should become http" assert update_scheme("https://other.com/bar", "http://example.com/foo" ) == "http://example.com/foo", "should remain http" assert update_scheme( "https://other.com/bar", "example.com/foo") == "https://example.com/foo", "should become https"
def test_update_scheme(self): self.assertEqual( "https://example.com/foo", # becomes https update_scheme("https://other.com/bar", "//example.com/foo")) self.assertEqual( "http://example.com/foo", # becomes http update_scheme("http://other.com/bar", "//example.com/foo")) self.assertEqual( "http://example.com/foo", # remains unchanged update_scheme("https://other.com/bar", "http://example.com/foo")) self.assertEqual( "https://example.com/foo", # becomes https update_scheme("https://other.com/bar", "example.com/foo"))
def _get_vod(self, root): schema_vod = validate.Schema( validate.xml_xpath_string( ".//script[@type='application/ld+json'][contains(text(),'VideoObject')][1]/text()" ), str, validate.transform( lambda jsonlike: re.sub(r"[\r\n]+", "", jsonlike)), validate.parse_json(), validate.any( validate.all( {"@graph": [dict]}, validate.get("@graph"), validate.filter(lambda obj: obj["@type"] == "VideoObject"), validate.get(0)), dict), {"contentUrl": validate.url()}, validate.get("contentUrl"), validate.transform( lambda content_url: update_scheme("https://", content_url))) try: vod = schema_vod.validate(root) except PluginError: return if urlparse(vod).path.endswith(".m3u8"): return HLSStream.parse_variant_playlist(self.session, vod) return {"vod": HTTPStream(self.session, vod)}
def _get_live_streams(self): video_id = self.session.http.get( self.url, schema=validate.Schema( validate.parse_html(), validate.xml_xpath_string( ".//div[@data-google-src]/@data-video-id"))) if video_id: return self.session.streams( f"https://www.youtube.com/watch?v={video_id}") info_url = self.session.http.get( self.API_URL.format(subdomain=self.match.group("subdomain")), schema=validate.Schema( validate.parse_json(), {"url": validate.url()}, validate.get("url"), validate.transform( lambda url: update_scheme("https://", url)))) hls_url = self.session.http.get(info_url, schema=validate.Schema( validate.parse_json(), { "status": "ok", "protocol": "hls", "primary": validate.url() }, validate.get("primary"))) return HLSStream.parse_variant_playlist(self.session, hls_url)
def _get_streams(self): postdata = { "clientID": self.match.group(1), "showEncoder": True, "showMediaAssets": True, "showStreams": True, "includePrivate": False, "advancedDetails": True, "VAST": True, "eventID": self.match.group(2) } headers = { "Content-Type": "application/json", "wsc-api-key": self.WSC_API_KEY, "Authorization": "embedder" } res = self.session.http.post(self.API_URL, data=json.dumps(postdata), headers=headers) api_response = self.session.http.json(res, schema=self.api_response_schema) if api_response is None: return hls_url = api_response["data"]["streamingURIs"]["main"] return HLSStream.parse_variant_playlist( self.session, update_scheme("https://", hls_url))
def _get_streams(self): data = self.match.groupdict() url = update_scheme("https://", data.get("url"), force=False) params = parse_params(data.get("params")) log.debug(f"URL={url}; params={params}") return DASHStream.parse_manifest(self.session, url, **params)
def _get_streams_api(self, video_id): log.debug("Found video ID: {0}".format(video_id)) tld = self.match.group("tld") try: data = self.session.http.get( self._api.get(tld, "lt"), params=dict(video_id=video_id), schema=validate.Schema( validate.parse_json(), { "success": True, "data": { "versions": { validate.text: validate.all( [{ "type": validate.text, "src": validate.text, }], validate.filter(lambda item: item["type"] == "application/x-mpegurl") ) } } }, validate.get(("data", "versions")) ) ) except PluginError: log.error("Failed to get streams from API") return for stream in itertools.chain(*data.values()): src = update_scheme("https://", stream["src"], force=False) for s in HLSStream.parse_variant_playlist(self.session, src).items(): yield s
class TV999(Plugin): iframe_re = re.compile(r'<iframe.*src="([^"]+)"') hls_re = re.compile(r'src="([^"]+)"\s+type="application/x-mpegURL"') iframe_schema = validate.Schema( validate.transform(iframe_re.search), validate.any(None, validate.all( validate.get(1), validate.url(), )), ) hls_schema = validate.Schema( validate.transform(hls_re.search), validate.any( None, validate.all( validate.get(1), validate.transform(lambda x: update_scheme('http:', x)), validate.url(), )), ) def _get_streams(self): iframe_url = self.session.http.get(self.url, schema=self.iframe_schema) if not iframe_url: log.error('Failed to find IFRAME URL') return hls_url = self.session.http.get(iframe_url, schema=self.hls_schema) if hls_url: return {'live': HLSStream(self.session, hls_url)}
def _get_streams(self): """ Find the streams for web.tv :return: """ headers = {} res = self.session.http.get(self.url, headers=headers) headers["Referer"] = self.url sources = self._sources_re.findall(res.text) if len(sources): sdata = parse_json(sources[0], schema=self._sources_schema) for source in sdata: log.debug(f"Found stream of type: {source['type']}") if source["type"] == "application/vnd.apple.mpegurl": url = update_scheme("https://", source["src"], force=False) try: # try to parse the stream as a variant playlist variant = HLSStream.parse_variant_playlist( self.session, url, headers=headers) if variant: yield from variant.items() else: # and if that fails, try it as a plain HLS stream yield 'live', HLSStream(self.session, url, headers=headers) except OSError: log.warning( "Could not open the stream, perhaps the channel is offline" )
def _get_live_streams(self, match): """ Get the live stream in a particular language :param match: :return: """ live_url = self._live_api_url.format(match.get("subdomain")) live_res = self.session.http.json(self.session.http.get(live_url), schema=self._live_schema) api_url = update_scheme("{0}:///".format(match.get("scheme")), live_res["url"]) api_res = self.session.http.json(self.session.http.get(api_url), schema=self._stream_api_schema) try: parse_hls = bool(strtobool(self.get_option('parse_hls'))) except AttributeError: parse_hls = True if parse_hls: return HLSStream.parse_variant_playlist(self.session, api_res["primary"]) else: return dict(stream=HTTPStream(self.session, api_res["primary"]))
def _get_streams(self): data = self.match.groupdict() url = update_scheme("https://", data.get("url"), force=False) params = parse_params(data.get("params")) log.debug("URL={0}; params={1}".format(url, params)) return {"live": HTTPStream(self.session, url, **params)}
def _get_streams_delfi(self, src): try: data = self.session.http.get(src, schema=validate.Schema( validate.parse_html(), validate.xml_xpath_string(".//script[contains(text(),'embedJs.setAttribute(')][1]/text()"), validate.any(None, validate.all( validate.text, validate.transform(re.compile(r"embedJs\.setAttribute\('src',\s*'(.+?)'").search), validate.any(None, validate.all( validate.get(1), validate.transform(lambda url: parse_qsd(urlparse(url).fragment)), {"stream": validate.text}, validate.get("stream"), validate.parse_json(), {"versions": [{ "hls": validate.text }]}, validate.get("versions") )) )) )) except PluginError: log.error("Failed to get streams from iframe") return for stream in data: src = update_scheme("https://", stream["hls"], force=False) for s in HLSStream.parse_variant_playlist(self.session, src).items(): yield s
def _get_streams(self): res = self.session.http.get(self.url) # Look for Youtube embedded video first for iframe in itertags(res.text, "iframe"): if urlparse(iframe.attributes.get("src")).netloc.endswith( "youtube.com"): log.debug("Handing off to YouTube plugin") return self.session.streams(iframe.attributes.get("src")) # Next check for HLS URL with token mobile_url_m = self.mobile_url_re.search(res.text) mobile_url = mobile_url_m and update_scheme( "https://", mobile_url_m.group("url"), force=False) if mobile_url: log.debug("Found mobile stream: {0}".format(mobile_url_m.group(0))) token = mobile_url_m and mobile_url_m.group("token") if not token and "kralmuzik" in self.url: log.debug("Getting Kral Muzik HLS stream token from API") token = self.session.http.get(self.kral_token_url).text elif not token: # if no token is in the url, try to find it else where in the page log.debug("Searching for HLS stream token in URL") token_m = self.token_re.search(res.text) token = token_m and token_m.group("token") return HLSStream.parse_variant_playlist( self.session, mobile_url + token, headers={"Referer": self.url})
def _get_streams(self): data = self.session.http.get(self.url, schema=self.config_schema) for info in data["files"].values(): stream_url = update_scheme("https://", info["url"]) # pick the smaller of the two dimensions, for landscape v. portrait videos res = min(info["width"], info["height"]) yield "{0}p".format(res), HTTPStream(self.session, stream_url)
def test_update_scheme(self): self.assertEqual( "https://example.com/foo", # becomes https update_scheme("https://other.com/bar", "//example.com/foo") ) self.assertEqual( "http://example.com/foo", # becomes http update_scheme("http://other.com/bar", "//example.com/foo") ) self.assertEqual( "http://example.com/foo", # remains unchanged update_scheme("https://other.com/bar", "http://example.com/foo") ) self.assertEqual( "https://example.com/foo", # becomes https update_scheme("https://other.com/bar", "example.com/foo") )
def merge_path_list(self, static, user): for _path_url in user: if not _path_url.startswith(('http', '//')): _path_url = update_scheme('http://', _path_url, force=False) _parsed_path_url = urlparse(_path_url) if _parsed_path_url.netloc and _parsed_path_url.path: static += [(_parsed_path_url.netloc, _parsed_path_url.path)] return static
def _get_streams(self): self.session.http.headers.update({ 'Referer': 'http://www.abweb.com/BIS-TV-Online/bistvo-tele-universal.aspx' }) login_username = self.get_option('username') login_password = self.get_option('password') if self.options.get('purge_credentials'): self.clear_cookies() self._authed = False log.info('All credentials were successfully removed.') if self._authed: log.info('Attempting to authenticate using cached cookies') elif not self._authed and not (login_username and login_password): log.error('A login for ABweb is required, use --abweb-username USERNAME --abweb-password PASSWORD') return elif not self._authed and not self._login(login_username, login_password): return log.debug('get iframe_url') res = self.session.http.get(self.url) for iframe in itertags(res.text, 'iframe'): iframe_url = iframe.attributes.get('src') if iframe_url.startswith('/'): iframe_url = url_concat('https://www.abweb.com', iframe_url) else: iframe_url = update_scheme('https://', iframe_url) log.debug(f'iframe_url={iframe_url}') break else: raise PluginError('No iframe_url found.') self.session.http.headers.update({'Referer': iframe_url}) res = self.session.http.get(iframe_url) m = self._hls_re.search(res.text) if not m: raise PluginError('No hls_url found.') hls_url = update_scheme('https://', m.group('url')) streams = HLSStream.parse_variant_playlist(self.session, hls_url) if streams: yield from streams.items() else: yield 'live', HLSStream(self.session, hls_url)
def resolve_url(self, url: str, follow_redirect: bool = True) -> Plugin: """Attempts to find a plugin that can use this URL. The default protocol (http) will be prefixed to the URL if not specified. Raises :exc:`NoPluginError` on failure. :param url: a URL to match against loaded plugins :param follow_redirect: follow redirects """ url = update_scheme("https://", url, force=False) matcher: Matcher candidate: Optional[Type[Plugin]] = None priority = NO_PRIORITY for name, plugin in self.plugins.items(): if plugin.matchers: for matcher in plugin.matchers: if matcher.priority > priority and matcher.pattern.match( url) is not None: candidate = plugin priority = matcher.priority # TODO: remove deprecated plugin resolver elif hasattr(plugin, "can_handle_url") and callable( plugin.can_handle_url) and plugin.can_handle_url(url): prio = plugin.priority(url) if hasattr( plugin, "priority") and callable( plugin.priority) else NORMAL_PRIORITY if prio > priority: log.info( f"Resolved plugin {name} with deprecated can_handle_url API" ) candidate = plugin priority = prio if candidate: return candidate(url) if follow_redirect: # Attempt to handle a redirect URL try: res = self.http.head(url, allow_redirects=True, acceptable_status=[501]) # Fall back to GET request if server doesn't handle HEAD. if res.status_code == 501: res = self.http.get(url, stream=True) if res.url != url: return self.resolve_url(res.url, follow_redirect=follow_redirect) except PluginError: pass raise NoPluginError
class Mico(Plugin): author = None category = None title = None url_re = re.compile(r'https?://(?:www\.)?micous\.com/live/\d+') json_data_re = re.compile(r'win._profile\s*=\s*({.*})') _json_data_schema = validate.Schema( validate.transform(json_data_re.search), validate.any(None, validate.all( validate.get(1), validate.transform(parse_json), validate.any(None, validate.all({ 'mico_id': int, 'nickname': validate.text, 'h5_url': validate.all( validate.transform(lambda x: update_scheme('http:', x)), validate.url(), ), 'is_live': bool, })), )), ) @classmethod def can_handle_url(cls, url): return cls.url_re.match(url) is not None def get_author(self): if self.author is not None: return self.author def get_category(self): if self.category is not None: return self.category def get_title(self): if self.title is not None: return self.title def _get_streams(self): json_data = self.session.http.get(self.url, schema=self._json_data_schema) if not json_data: log.error('Failed to get JSON data') return if not json_data['is_live']: log.info('This stream is no longer online') return self.author = json_data['mico_id'] self.category = 'Live' self.title = json_data['nickname'] return HLSStream.parse_variant_playlist(self.session, json_data['h5_url'])
def _get_http_streams(self, info): name = QUALITY_MAP.get(info["_quality"], "vod") urls = info["_stream"] if not isinstance(info["_stream"], list): urls = [urls] for url in urls: stream = HTTPStream(self.session, update_scheme("https://", url)) yield name, stream
def _get_streams(self): data = self.match.groupdict() url = update_scheme("https://", data.get("url"), force=False) params = parse_params(data.get("params")) log.debug("URL={0}; params={1}".format(url, params)) streams = HLSStream.parse_variant_playlist(self.session, url, **params) return streams if streams else { "live": HLSStream(self.session, url, **params) }
def _get_streams(self): iframe_url = self._get_xpath_string(self.url, ".//iframe[@src]/@src") if not iframe_url: return hls_url = self._get_xpath_string( iframe_url, ".//source[contains(@src,'m3u8')]/@src") if not hls_url: return return { "live": HLSStream(self.session, update_scheme("http://", hls_url)) }
def _get_streams(self): res = self.session.http.get(self.url) m = self.embed_url_re.search(res.text) platform_url = m and m.group("url") if platform_url: url = update_scheme("https://", platform_url) # hand off to ThePlatform plugin p = ThePlatform(url) p.bind(self.session, "plugin.nbc") return p.streams()
def _get_streams(self): params = self.session.http.get( self.url, schema=validate.Schema( validate.transform(self._re_player_manager.search), validate.any( None, validate.all( validate.get("json"), validate.parse_json(), { "contentId": validate.any(str, int), validate.optional("streamId"): str, validate.optional("idec"): str, validate.optional("token"): str })))) if not params: log.error("Could not find player manager data") return params.update({ "video": (unquote(params.pop("token")) if params.get("token") is not None else params.pop("streamId")), "noflash": "yes", "embedded": "0", }) url_parsed = urlparse(self.url) skip_vods = url_parsed.netloc.endswith( "m4sport.hu") and url_parsed.path.startswith("/elo") self.session.http.headers.update({"Referer": self.url}) playlists = self.session.http.get( self.PLAYER_URL, params=params, schema=validate.Schema( validate.transform(self._re_player_json.search), validate.any( None, validate.all( validate.get("json"), validate.parse_json(), {"playlist": [{ "file": validate.url(), "type": str }]}, validate.get("playlist"), validate.filter(lambda p: p["type"] == "hls"), validate.filter( lambda p: not skip_vods or "vod" not in p["file"]), validate.map( lambda p: update_scheme("https://", p["file"])))))) for url in playlists or []: yield from HLSStream.parse_variant_playlist(self.session, url).items()
def _get_streams(self): m = self._live_url_re.search(self.page.text) playlist_url = m and update_scheme("https://", m.group(1), force=False) player_url = self.url live_channel = None p = urlparse(player_url) channelnumber = 0 if p.netloc.endswith("tvc.com.ec"): live_channel = "Canal5" elif p.netloc.endswith("rts.com.ec"): live_channel = "Guayaquil" elif p.netloc.endswith("atv.pe"): if p.path.endswith(("ATVMas", "ATVMas/")): live_channel = "ATVMas" channelnumber = 1 else: live_channel = "ATV" token = self._get_token(channelnumber) log.debug("token {0}".format(token)) if playlist_url: log.debug("Found playlist URL in the page") else: if live_channel: log.debug("Live channel: {0}".format(live_channel)) player_url = self._channel_urls[live_channel] + quote(token) page = self.session.http.get(player_url, raise_for_status=False) if "block access from your country." in page.text: raise PluginError("Content is geo-locked") m = self._playlist_re.search(page.text) playlist_url = m and update_scheme( "https://", m.group(1), force=False) else: log.error("Could not find the live channel") if playlist_url: stream_url = "{0}?{1}".format(playlist_url, urlencode({"iut": token})) return HLSStream.parse_variant_playlist( self.session, stream_url, headers={"referer": player_url})
def _get_streams(self): if "cdn.bg" in urlparse(self.url).netloc: iframe_url = self.url h = self.session.get_option("http-headers") if h and h.get("Referer"): _referer = h.get("Referer") else: log.error( "Missing Referer for iframe URL, use --http-header \"Referer=URL\" " ) return else: _referer = self.url res = self.session.http.get(self.url) m = self._re_frame.search(res.text) if m: iframe_url = m.group(1) else: for iframe in itertags(res.text, "iframe"): iframe_url = iframe.attributes.get("src") if iframe_url and "cdn.bg" in iframe_url: iframe_url = update_scheme("https://", html_unescape(iframe_url), force=False) break else: return log.debug(f"Found iframe: {iframe_url}") res = self.session.http.get(iframe_url, headers={"Referer": _referer}) stream_url = self.stream_schema.validate(res.text) if "geoblock" in stream_url: log.error("Geo-restricted content") return return HLSStream.parse_variant_playlist( self.session, update_scheme(iframe_url, stream_url), headers={"Referer": "https://i.cdn.bg/"}, )
def _get_live_streams(self, match): """ Get the live stream in a particular language :param match: :return: """ live_url = self._live_api_url.format(match.get("subdomain")) live_res = self.session.http.json(self.session.http.get(live_url), schema=self._live_schema) api_url = update_scheme("{0}:///".format(match.get("scheme")), live_res["url"]) api_res = self.session.http.json(self.session.http.get(api_url), schema=self._stream_api_schema) return HLSStream.parse_variant_playlist(self.session, api_res["primary"])
def _get_streams(self): data = self.session.http.get( self.url, schema=validate.Schema( validate.transform(self._re_json_base.search), validate.any( None, validate.all( validate.get("json"), validate.parse_json(), { "cam": { validate.text: { "live_type": validate.text, "html5_streamingdomain": validate.text, "html5_streampath": validate.text, "group": validate.text, "location": validate.text, "title": validate.text, "liveon": validate.text, "defaulttab": validate.text, } } }, validate.get("cam"))))) if not data: return cam_name = parse_qsd(self.url).get("cam") or next( iter(data.keys()), None) cam_data = data.get(cam_name) if not cam_data: return # exclude everything other than live video streams if cam_data["live_type"] != "flashvideo" or cam_data[ "liveon"] != "true" or cam_data["defaulttab"] != "live": return log.debug("Found cam {0}".format(cam_name)) hls_domain = cam_data["html5_streamingdomain"] hls_playpath = cam_data["html5_streampath"] self.author = cam_data["group"] self.category = cam_data["location"] self.title = cam_data["title"] if hls_playpath: hls_url = update_scheme("https://", "{0}{1}".format(hls_domain, hls_playpath)) for s in HLSStream.parse_variant_playlist(self.session, hls_url).items(): yield s
def __init__(self, url): super(Generic, self).__init__(url) self.url = update_scheme('http://', self.match.group('url'), force=False) self.html_text = '' # START - cache every used url and set a referer if hasattr(GenericCache, 'cache_url_list'): GenericCache.cache_url_list += [self.url] # set the last url as a referer self.referer = GenericCache.cache_url_list[-2] else: GenericCache.cache_url_list = [self.url] self.referer = self.url self.session.http.headers.update({'Referer': self.referer}) # END # START - how often _get_streams already run self._run = len(GenericCache.cache_url_list)
def _get_hls(self, root): schema_live = validate.Schema( validate.xml_xpath_string( ".//*[contains(@data-broadcast,'m3u8')]/@data-broadcast"), str, validate.parse_json(), validate.any(validate.all({"files": list}, validate.get("files")), list), [{ "url": validate.url(path=validate.endswith(".m3u8")) }], validate.get((0, "url")), validate.transform( lambda content_url: update_scheme("https://", content_url))) try: live = schema_live.validate(root) except PluginError: return return HLSStream.parse_variant_playlist(self.session, live)