def _get_streams(self): res = http.get(self.url) match = _player_js.search(res.text) if match: player_js = match.group(0) self.logger.info("Found player js {0}", player_js) else: self.logger.info( "Didn't find player js. Probably this page doesn't contain a video" ) return res = http.get(player_js) jsonp_start = res.text.find('(') + 1 jsonp_end = res.text.rfind(')') if jsonp_start <= 0 or jsonp_end <= 0: self.logger.info( "Couldn't extract json metadata from player.js: {0}", player_js) return json_s = res.text[jsonp_start:jsonp_end] stream_metadata = json.loads(json_s) return HDSStream.parse_manifest( self.session, stream_metadata['mediaResource']['dflt']['videoURL']).items()
def _get_live_streams(self): # Get channel id match = self._url_re.match(self.url) channel = match.group('channel') # Retrieve live player URL res = http.get(self.PLAYER_URL) match = self._live_player_re.search(res.text) if match is None: return [] live_player_url = update_scheme(self.url, match.group('live_player_url')) # Extract streams from the live player page res = http.get(live_player_url) stream_datas = re.findall( r'{0}(?:_MINI)?:({{.+?}}]}}]}})'.format(self.CHANNEL_MAP[channel]), res.text) streams = [] for s in stream_datas: for u in self._live_streams_schema.validate(s): if u not in streams: streams.append(u) return streams
def _get_vod_stream(self): vod_url = self.url if vod_url.endswith('/'): vod_url = vod_url[:-1] json_url = '{0}.securevideo.json'.format(vod_url) res = http.get(json_url) match = _json_re.search(res.text) if not match: return data = parse_json(match.group(1)) res = http.get(API_VOD.format(data['clientid'], data['mzid'])) data = http.json(res, schema=_stream_schema) for d in data['targetUrls']: if d['type'] == 'HDS': hds_url = d['url'] for s in HDSStream.parse_manifest(self.session, hds_url).items(): yield s if d['type'] == 'HLS': hls_url = d['url'] for s in HLSStream.parse_variant_playlist( self.session, hls_url).items(): yield s
def _get_streams(self): res = http.get(self.url) match = self._video_id_re.search( res.text) or self._video_id_alt_re.search(res.text) if match is None: return broadcaster_id = match.group('broadcaster_id') video_type = match.group('video_type') video_id = match.group('video_id') videos = http.get(self.DACAST_API_URL.format(broadcaster_id, video_type, video_id), schema=self._api_schema) token = http.get(self.DACAST_TOKEN_URL.format(broadcaster_id, video_type, video_id), schema=self._token_schema) parsed = [] for video_url in videos: video_url += token # Ignore duplicate video URLs if video_url in parsed: continue parsed.append(video_url) # Ignore HDS streams (broken) if '.m3u8' in video_url: for s in HLSStream.parse_variant_playlist( self.session, video_url).items(): yield s
def _get_streams(self): headers = {"User-Agent": useragents.FIREFOX, "Referer": self.url} res = http.get(self.url, headers=headers) files = self._files_re.findall(res.text) if files is None: return files = list(set(files)) for url in files: if ".m3u8" in url: if url.startswith("https://weltmediathek-vh."): url = "https://www.welt.de/video/services/token/{0}".format( quote(url, safe="")) res = http.get(url, headers=headers) r_json = http.json(res) url = r_json["urlWithToken"] for s in HLSStream.parse_variant_playlist( self.session, url, headers=headers).items(): yield s elif url.endswith(".mp4"): m = self._mp4_bitrate_re.search(url) bitrate = m.group("bitrate") if bitrate: name = "{0}k".format(bitrate) else: name = "mp4" yield name, HTTPStream(self.session, url, headers=headers)
def _get_streams(self): data_url = http.get(self.url, schema=self._player_url_schema) if data_url: res = http.get(urljoin(self.url, data_url)) stream_info = http.xml(res, schema=self._livestream_schema) for stream in stream_info: url = stream["url"] try: if ".m3u8" in url: for s in HLSStream.parse_variant_playlist( self.session, url, name_key="bitrate").items(): yield s elif ".f4m" in url: for s in HDSStream.parse_manifest( self.session, url, pvswf=self.swf_url, is_akamai=True).items(): yield s elif ".mp4" in url: yield "{0}k".format(stream["bitrate"]), HTTPStream( self.session, url) except IOError as err: self.logger.warning("Error parsing stream: {0}", err)
def _res_text(self, url): """Content of a website Args: url: URL with an embedded Video Player. Returns: Content of the response """ try: res = http.get(url, allow_redirects=True) except Exception as e: if "Received response with content-encoding: gzip" in str(e): headers = { "User-Agent": useragents.FIREFOX, "Referer": self.referer, "Accept-Encoding": "deflate" } res = http.get(url, headers=headers, allow_redirects=True) elif "403 Client Error" in str(e): self.logger.error("Website Access Denied/Forbidden, you might be geo-blocked or other params are missing.") raise NoStreamsError(self.url) elif "404 Client Error" in str(e): self.logger.error("Website was not found, the link is broken or dead.") raise NoStreamsError(self.url) else: raise e if res.history: for resp in res.history: self.logger.debug("Redirect: {0} - {1}".format(resp.status_code, resp.url)) self.logger.debug("URL: {0}".format(res.url)) return res.text
def _get_streams(self): headers = {"Referer": self.url, "User-Agent": useragents.FIREFOX} res = http.get(self.url, headers=headers) m = self._playlist_re.search(res.text) if not m: return res = http.get(m.group("url"), headers=headers) if not res.text.startswith("#EXTM3U"): hls_url = http.json(res).get("redir") else: hls_url = m.group("url") if hls_url is not None: self.logger.debug("HLS URL: {0}".format(hls_url)) streams = HLSStream.parse_variant_playlist(self.session, hls_url, headers=headers) if not streams: return { "live": HLSStream(self.session, hls_url, headers=headers) } else: return streams
def _get_streams(self): # Discover root match = _url_re.search(self.url) root = match.group(1) # Download main URL res = http.get(self.url) # Find playlist match = _playlist_re.search(res.text) playlist_url = root + match.group(1) + "d" # Download playlist res = http.get(playlist_url) # Find manifest match = _manifest_re.search(res.text) manifest_url = match.group(1) # Find SWF match = _swf_re.search(res.text) swf_url = match.group(1) streams = {} streams.update( HDSStream.parse_manifest(self.session, manifest_url, pvswf=swf_url)) return streams
def _get_streams(self): match = _url_re.match(self.url) channel = match.group("channel") domain = match.group("domain") headers = {"Referer": self.url, "User-Agent": useragents.FIREFOX} if domain == "canlitv.plus": res = http.get(EMBED_URL_1.format(channel), headers=headers) elif domain == "ecanlitvizle.net": res = http.get(EMBED_URL_2.format(channel), headers=headers) else: res = http.get(self.url, headers=headers) url_match = _m3u8_re.search(res.text) if url_match: hls_url = url_match.group("url") if domain in ("canlitvlive.live", "canlitvlive.site"): hls_url = "http:" + hls_url self.logger.debug("Found URL: {0}".format(hls_url)) try: s = [] for s in HLSStream.parse_variant_playlist( self.session, hls_url).items(): yield s if not s: yield "live", HLSStream(self.session, hls_url) except IOError as err: self.logger.error("Failed to extract streams: {0}", err)
def _get_streams(self): # Retrieve URL page and search for new type of video ID res = http.get(self.url) match = _id_re.search(res.text) # Use API if match, otherwise resort to old method if match: vid = match.group("id") res = http.get(API_URL.format(vid)) videos = http.json(res, schema=_video_schema) mapper = StreamMapper( cmp=lambda format, video: video["format"] == format) mapper.map("hls", self._create_streams, "HLS", HLSStream.parse_variant_playlist) mapper.map("hds", self._create_streams, "HDS", HDSStream.parse_manifest) else: res = http.get(self.url, params=dict(output="json")) videos = http.json(res, schema=_old_video_schema) mapper = StreamMapper( cmp=lambda type, video: video["playerType"] == type) mapper.map("ios", self._create_streams, "HLS", HLSStream.parse_variant_playlist) mapper.map("flash", self._create_streams, "HDS", HDSStream.parse_manifest) return mapper(videos)
def _login(self, username, password): '''login and update cached cookies''' self.logger.debug('login ...') http.get(self.url) data = { 'pass': password, 'email': username, 'done': 'livechat', 'keep_login': 1 } http.post(self.url_login, data=data, allow_redirects=True) for cookie in http.cookies: self._session_attributes.set(cookie.name, cookie.value, expires=3600 * 24) if (self._session_attributes.get('fcu') and self._session_attributes.get('fgcv') and self._session_attributes.get('FCSID') and self._session_attributes.get('login_status') and self._session_attributes.get('glgd_val') and self._session_attributes.get('PHPSESSID') and self._session_attributes.get('secure_check_fc2')): self.logger.debug('New session data') self.set_expires_time_cache() return True else: self.logger.error('Failed to login, check your username/password') return False
def _get_streams(self): match = _url_re.match(self.url) if not match: return channel, media_id = match.group("channel", "media_id") self.logger.debug("Matched URL: channel={0}, media_id={1}".format( channel, media_id)) if not media_id: res = http.get(LIVE_API.format(channel)) livestream = http.json(res, schema=_live_schema) if livestream.get("media_hosted_media"): hosted = _live_schema.validate( livestream["media_hosted_media"]) self.logger.info("{0} is hosting {1}", livestream["media_user_name"], hosted["media_user_name"]) livestream = hosted if not livestream["media_is_live"]: return media_id = livestream["media_id"] media_type = "live" else: media_type = "video" res = http.get(PLAYER_API.format(media_type, media_id)) player = http.json(res, schema=_player_schema) if media_type == "live": return self._get_live_streams(player) else: return self._get_video_streams(player)
def _get_streams(self): zdf_json = http.get(self.url, schema=_api_schema) if zdf_json is None: return headers = { "Api-Auth": "Bearer {0}".format(zdf_json['apiToken']), "Referer": self.url } res = http.get(zdf_json['content'], headers=headers) document = http.json(res, schema=_documents_schema) stream_request_url = document["mainVideoContent"][ "http://zdf.de/rels/target"]["http://zdf.de/rels/streams/ptmd"] stream_request_url = API_URL + stream_request_url res = http.get(stream_request_url, headers=headers) res = http.json(res, schema=_schema) streams = {} for format_ in self._extract_streams(res): streams.update(format_) return streams
def _get_streams(self): user = self.login(self.options.get("email"), self.options.get("password")) if user: self.logger.debug("Logged in to Schoolism as {0}", user) res = http.get(self.url, headers={"User-Agent": useragents.SAFARI_8}) lesson_playlist = self.playlist_schema.validate(res.text) part = self.options.get("part") self.logger.info("Attempting to play lesson Part {0}", part) found = False # make request to key-time api, to get key specific headers res = http.get(self.key_time_url, headers={"User-Agent": useragents.SAFARI_8}) for i, video in enumerate(lesson_playlist, 1): if video["sources"] and i == part: found = True for source in video["sources"]: for s in HLSStream.parse_variant_playlist( self.session, source["src"], headers={ "User-Agent": useragents.SAFARI_8, "Referer": self.url }).items(): yield s if not found: self.logger.error("Could not find lesson Part {0}", part)
def _get_streams(self): # Get video ID and channel from URL match = self._url_re.match(self.url) video_id = match.group('video_id') if video_id is None: # Retrieve URL page and search for video ID res = http.get(self.url) match = self._video_id_re.search(res.text) if match is None: return video_id = match.group('video_id') res = http.get(self.API_URL.format(video_id)) videos = http.json(res, schema=self._api_schema) parsed = [] headers = {'User-Agent': self._user_agent} # Some videos may be also available on Dailymotion (especially on CNews) if videos['ID_DM'] != '': for stream in self.session.streams('https://www.dailymotion.com/video/' + videos['ID_DM']).items(): yield stream for quality, video_url in list(videos['MEDIA']['VIDEOS'].items()): # Ignore empty URLs if video_url == '': continue # Ignore duplicate video URLs if video_url in parsed: continue parsed.append(video_url) try: # HDS streams don't seem to work for live videos if '.f4m' in video_url and 'LIVE' not in videos['TYPE']: for stream in HDSStream.parse_manifest(self.session, video_url, params={'hdcore': self.HDCORE_VERSION}, headers=headers).items(): yield stream elif '.m3u8' in video_url: for stream in HLSStream.parse_variant_playlist(self.session, video_url, headers=headers).items(): yield stream elif '.mp4' in video_url: # Get bitrate from video filename match = self._mp4_bitrate_re.match(video_url) if match is not None: bitrate = match.group('bitrate') else: bitrate = quality yield bitrate, HTTPStream(self.session, video_url, params={'secret': self.SECRET}, headers=headers) except IOError as err: if '403 Client Error' in str(err): self.logger.error('Failed to access stream, may be due to geo-restriction')
def _get_qq_streams(self, vid): res = http.get(QQ_STREAM_INFO_URL % (vid, 1)) info = http.json(res, schema=_qq_schema) yield "live", HTTPStream(self.session, info) res = http.get(QQ_STREAM_INFO_URL % (vid, 2)) info = http.json(res, schema=_qq_schema) yield "live", HLSStream(self.session, info)
def _get_streams(self): """ Get the config object from the page source and call the API to get the list of streams :return: """ # attempt a login self.login() res = http.get(self.url) # decode the config for the page matches = self.config_re.finditer(res.text) try: config = self.config_schema.validate( dict([m.group("key", "value") for m in matches])) except PluginError: return if config["selectedVideoHID"]: self.logger.debug("Found video hash ID: {0}", config["selectedVideoHID"]) api_url = urljoin( self.url, urljoin(config["videosURL"], config["selectedVideoHID"])) elif config["livestreamURL"]: self.logger.debug("Found live stream URL: {0}", config["livestreamURL"]) api_url = urljoin(self.url, config["livestreamURL"]) else: return ares = http.get(api_url) data = http.json(ares, schema=self.api_schema) viewing_urls = data["viewing_urls"] if "error" in viewing_urls: self.logger.error("Failed to load streams: {0}", viewing_urls["error"]) else: for url in viewing_urls["urls"]: try: label = "{0}p".format(url.get("res", url["label"])) except KeyError: label = "live" if url["type"] == "rtmp/mp4" and RTMPStream.is_usable( self.session): params = { "rtmp": url["src"], "pageUrl": self.url, "live": True, } yield label, RTMPStream(self.session, params) elif url["type"] == "application/x-mpegURL": for s in HLSStream.parse_variant_playlist( self.session, url["src"]).items(): yield s
def _get_stream_info(self, url): res = http.get(url, headers=HEADERS) match = re.search(r"embed.swf\?p=(\d+)", res.text) if not match: return program = match.group(1) res = http.get(BEAT_PROGRAM.format(program), headers=HEADERS) return http.json(res, schema=_schema)
def _get_streams(self): url_channel_name = self._url_re.match(self.url).group(1) # Handle VODs first, since their "channel name" is different if url_channel_name.endswith(".flv"): self.logger.debug("Possible VOD stream...") page = http.get(self.url) vod_streams = self._get_vod_stream(page) if vod_streams: for s in vod_streams.items(): yield s return else: self.logger.warning("Probably a VOD stream but no VOD found?") ci = http.get(self.CHANNEL_API_URL.format(channel=url_channel_name), raise_for_status=False) if ci.status_code == 404: self.logger.error( "The channel {0} does not exist".format(url_channel_name)) return channel_api_json = json.loads(ci.text) if channel_api_json["online"] is not True: self.logger.error("The channel {0} is currently offline".format( url_channel_name)) return server = None token = "public" channel = channel_api_json["name"] # Extract preferred edge server and available techs from the undocumented channel API channel_server_res = http.post(self.VIDEO_API_URL, data={"loadbalancinginfo": channel}) info_json = json.loads(channel_server_res.text) pref = info_json["preferedEdge"] for i in info_json["edges"]: if i["id"] == pref: server = i["ep"] break self.logger.debug( "Using load balancing server {0} : {1} for channel {2}", pref, server, channel) for i in info_json["techs"]: if i["label"] == "HLS": for s in self._create_hls_stream(server, channel, token).items(): yield s elif i["label"] == "RTMP Flash": stream = self._create_flash_stream(server, channel, token) yield "live", stream
def _get_streams(self): res = http.get(self.url, headers={"User-Agent": useragents.CHROME}) m = self.js_re.search(res.text) if m: self.logger.debug("Found js key: {0}", m.group(1)) js_url = m.group(0) res = http.get(urljoin(self.url, js_url)) for url in self.player_re.findall(res.text): if "adblock" not in url: yield "live", HLSStream(self.session, url)
def _get_vod_streams(self): # Retrieve URL page and search for video ID res = http.get(self.url) match = self._video_id_re.search(res.text) if match is None: return [] video_id = match.group('video_id') res = http.get(self.VOD_API_URL.format(video_id)) streams = http.json(res, schema=self._vod_api_schema) return streams
def _get_live_streams(self, subdomain): """ Get the live stream in a particular language :param subdomain: :return: """ res = http.get(self._live_api_url.format(subdomain)) live_res = http.json(res, schema=self._live_schema) api_res = http.get(live_res[u"url"]) stream_data = http.json(api_res, schema=self._stream_api_schema) return HLSStream.parse_variant_playlist(self.session, stream_data[u'primary'])
def _get_streams(self): res = http.get(self.url) match = _meta_xmlurl_id_re.search(res.text) if not match: return xml_info_url = STREAMS_INFO_URL.format(match.group(1)) video_info_res = http.get(xml_info_url) parsed_info = http.xml(video_info_res) live_el = parsed_info.find("live") live = live_el is not None and live_el.text == "1" streams = {} hdsurl_el = parsed_info.find("hdsurl") if hdsurl_el is not None and hdsurl_el.text is not None: hdsurl = hdsurl_el.text streams.update(HDSStream.parse_manifest(self.session, hdsurl)) if live: vurls_el = parsed_info.find("vurls") if vurls_el is not None: for i, vurl_el in enumerate(vurls_el): bitrate = vurl_el.get("bitrate") name = bitrate + "k" if bitrate is not None else "rtmp{0}".format( i) params = { "rtmp": vurl_el.text, } streams[name] = RTMPStream(self.session, params) parsed_urls = set() mobileurls_el = parsed_info.find("mobileurls") if mobileurls_el is not None: for mobileurl_el in mobileurls_el: text = mobileurl_el.text if not text: continue if text in parsed_urls: continue parsed_urls.add(text) url = urlparse(text) if url[0] == "http" and url[2].endswith("m3u8"): streams.update( HLSStream.parse_variant_playlist(self.session, text)) return streams
def _get_streams(self): res = http.get(self.url) # some pages have embedded players iframe_m = self.iframe_re.search(res.text) if iframe_m: url = urljoin(self.url, iframe_m.group("url")) res = http.get(url) video = self.src_re.search(res.text) stream_src = video and video.group("url") if stream_src and stream_src.endswith("m3u8"): return HLSStream.parse_variant_playlist(self.session, stream_src)
def _get_streams(self): stype = _url_re.match(self.url).group(1) if stype.lower() == "live": self.logger.debug("Searching the page for live stream URLs") playlists = http.get(self.url, schema=_live_schema) for playlist in playlists: for q, s in HLSStream.parse_variant_playlist(self.session, playlist["url"]).items(): yield "source" if playlist["isSource"] else q, s elif stype.lower() == "movie": self.logger.debug("Searching the page for VOD stream URLs") playlist = http.get(self.url, schema=_movie_schema) if playlist: for s in HLSStream.parse_variant_playlist(self.session, playlist).items(): yield s
def _get_streams(self): http.headers = {"User-Agent": useragents.CHROME} res = http.get(self.url) iframe_url = self.find_iframe(res) if iframe_url: self.logger.debug("Found iframe: {0}", iframe_url) res = http.get(iframe_url, headers={"Referer": self.url}) stream_url = update_scheme(self.url, self.stream_schema.validate(res.text)) return HLSStream.parse_variant_playlist( self.session, stream_url, headers={"User-Agent": useragents.CHROME})
def _get_playlist_url(self): # get the id content = http.get(self.url) match = _id_re.match(content.text.replace("\n", "")) if not match: return # get the m3u8 file url player_url = _stream_player_url.format(match.group(1)) content = http.get(player_url) match = _file_re.match(content.text.replace("\n", "")) if match: return match.group(1)
def _get_streams(self): http.headers.update({"User-Agent": useragents.CHROME}) res = http.get(self.url) self.logger.debug("search for js_re") m = self.js_re.search(res.text) if m: self.logger.debug("Found js key: {0}", m.group(1)) js_url = m.group(0) http.headers.update({"Referer": self.url}) res = http.get(urljoin(self.url, js_url)) self.logger.debug("search for player_re") for url in self.player_re.findall(res.text): self.logger.debug("Found url: {0}".format(url)) if "adblock" not in url: yield "live", HLSStream(self.session, url)
def _get_streams(self): vid = self.url_re.match(self.url).group(1) self.logger.debug("Found video ID: {0}", vid) page = http.get(self.play_url.format(vid=vid)) js_url_m = self.js_re.search(page.text) if js_url_m: js_url = js_url_m.group(1) self.logger.debug("Loading player JS: {0}", js_url) res = http.get(js_url) data = self.setup_schema.validate(res.text) for source in data["playlist"][0]["sources"]: if source["type"] == "hls": return HLSStream.parse_variant_playlist(self.session, "https:" + source["file"])