def _verify_video_password(self, webpage, url, video_id): password = self._downloader.params.get('videopassword') if password is None: raise ExtractorError( 'This video is protected by a password, use the --video-password option', expected=True ) requesttoken = self._search_regex(r'<input[^>]+?name="requesttoken" value="([^\"]+)"', webpage, 'requesttoken') data = urlencode_postdata({'requesttoken': requesttoken, 'password': password}) validation_response, urlh = self._download_webpage_handle( url, video_id, note='Validating Password...', errnote='Wrong password?', data=data ) password_protected = self._search_regex( r'<label[^>]+?for="(password)"', validation_response, 'password field', fatal=False, default=None ) warning = self._search_regex( r'<div[^>]+?class="warning">([^<]*)</div>', validation_response, 'warning', fatal=False, default="The password is wrong. Try again.", ) if password_protected is not None: raise ExtractorError('Login failed, %s said: %r' % (self.IE_NAME, warning), expected=True) return validation_response, urlh
def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) host = mobj.group('host') video_id = mobj.group('id') api_json = self._call_api(host, video_id, '', note='Downloading video JSON') search_results = api_json.get('search-results', {}) if 'result' not in search_results: raise ExtractorError('Playlist was not found') result_list = search_results.get('result', {}) if isinstance(result_list, dict): result_list = [result_list] entries = [] for episode in result_list: video = episode.get('mediapackage', {}) entries.append(self._parse_mediapackage(video)) if len(entries) == 0: raise ExtractorError('Playlist has no entries') playlist_title = entries[0].get('series') result_obj = self.playlist_result(entries, playlist_id=video_id, playlist_title=playlist_title) return result_obj
def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) scheme = mobj.group('scheme') host = mobj.group('host') path = mobj.group('path') video_id = mobj.group('id') # Extract launch URL view_webpage = self._download_webpage( url, video_id, 'Downloading kalvidres video view webpage') mobj = re.search( r'<iframe[^>]+class="kaltura-player-iframe"[^>]+src=(["\'])(?P<url>[^"\']+)\1', view_webpage) if not mobj: raise ExtractorError('Unable to extract kalvidres launch url') launch_url = html.unescape(mobj.group('url')) # Get launch parameters launch_webpage = self._download_webpage( launch_url, video_id, 'Downloading kalvidres launch webpage') launch_inputs = self._form_hidden_inputs(self._LAUNCH_FORM, launch_webpage) launch_form_str = self._search_regex( r'(?P<form><form[^>]+?id=(["\'])%s\2[^>]*>)' % self._LAUNCH_FORM, launch_webpage, 'login form', group='form') action_url = extract_attributes(launch_form_str).get('action') # Launch kalvidres video app submit_page, start_urlh = self._download_webpage_handle( action_url, video_id, 'Launch kalvidres app', data=urlencode_postdata(launch_inputs)) mobj = re.search(r'window.location.href = \'(?P<url>[^\']+)\'', submit_page) if not mobj: raise ExtractorError('Unable to extract kalvidres redirect url') # Follow kalvidres video app redirect redirect_page, redirect_urlh = self._download_webpage_handle( html.unescape(mobj.group('url')), video_id, 'Follow kalvidres redirect') kultura_url = KalturaIE._extract_url(redirect_page) if not kultura_url: raise ExtractorError('Unable to extract kaltura url') return { '_type': 'url', 'url': kultura_url, 'ie_key': 'Kaltura', }
def _extract_playlist(self, playlist, playlist_id=None, require_title=True): if len(playlist["results"]) == 0: raise ExtractorError("Cannot find playlist!") title = (playlist["results"][0]["nombre"] if require_title else playlist.get("results")[0].get("nombre")) thumbnail = None entries = try_get(playlist, lambda x: x["results"][0]["publicacion"]) # Playlist User need update slug_url video for entry in entries: video_id = entry.get("id") json_url = API_BASE_URL + "publicacion/?format=json&id=%s" % video_id video = self._download_json(json_url, video_id, "Downloading video JSON") info = self._extract_video(video, video_id) entry["slug_url"] = info.get("slug_url") return { "id": try_get(playlist, lambda x: x["results"][0]["id"], compat_str) or playlist_id, "title": title, "thumbnail": thumbnail, "entries": entries, }
def _entries(self, playlist_id): json_url = self.API_PLAYLIST_ENDPOINT + "?format=json&id=%s" % playlist_id headers = self._set_auth_basic() playlist = {} try: playlist = self._download_json(json_url, playlist_id, "Downloading playlist JSON", headers=headers) assert playlist.get("count", 0) >= 1 except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code in (403, ): raise self.raise_login_required( msg= "This playlist is only available for registered users. Check your username and password" ) except AssertionError: raise ExtractorError("Playlist no exists!") info_playlist = self._extract_playlist(playlist, playlist_id) playlist_entries = info_playlist.get("entries") for video in playlist_entries: video_id = video.get("id") video_url = (ROOT_BASE_URL + "medias/" + video.get("slug_url") + "?" + "playlist=" + playlist_id) yield self.url_result(video_url, PictaIE.ie_key(), video_id)
def _verify_video_password(self, url, video_id, webpage): password = self._downloader.params.get('videopassword') if password is None: raise ExtractorError( 'This video is protected by a password, use the --video-password option', expected=True ) meetId = self._search_regex(r'<input[^>]+?id="meetId" value="([^\"]+)"', webpage, 'meetId') data = urlencode_postdata({'id': meetId, 'passwd': password, 'action': "viewdetailedpage", 'recaptcha': ""}) validation_url = url.split("zoom.us")[0] + "zoom.us/rec/validate_meet_passwd" validation_response = self._download_json( validation_url, video_id, note='Validating Password...', errnote='Wrong password?', data=data ) if validation_response['errorCode'] != 0: raise ExtractorError( 'Login failed, %s said: %r' % (self.IE_NAME, validation_response['errorMessage']), expected=True )
def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) host = mobj.group('host') video_id = mobj.group('id') api_json = self._call_api(host, video_id, '', note='Downloading video JSON') search_results = api_json.get('search-results', {}) if 'result' not in search_results: raise ExtractorError('Video was not found') result_dict = search_results.get('result', {}) if not isinstance(result_dict, dict): raise ExtractorError( 'More than one video was unexpectedly returned.') video = result_dict.get('mediapackage', {}) result_obj = self._parse_mediapackage(video) return result_obj
def _real_extract(self, url): video_id = self._match_id(url) formats = [{ 'format_id': 'default', 'url': 'url:', }] if video_id == '0': raise ExtractorError('foo') if video_id == '2': formats.append({ 'format_id': 'extra', 'url': TEST_URL, }) return { 'id': video_id, 'title': 'Video %s' % video_id, 'formats': formats, }
def _extract_video(video, video_id=None, require_title=True): if len(video["results"]) == 0: raise ExtractorError("Cannot find video!") title = (video["results"][0]["nombre"] if require_title else video.get("results")[0].get("nombre")) description = try_get(video, lambda x: x["results"][0]["descripcion"], compat_str) slug_url = try_get(video, lambda x: x["results"][0]["slug_url"], compat_str) uploader = try_get(video, lambda x: x["results"][0]["usuario"]["username"], compat_str) add_date = try_get(video, lambda x: x["results"][0]["fecha_creacion"]) timestamp = int_or_none(unified_timestamp(add_date)) thumbnail = try_get(video, lambda x: x["results"][0]["url_imagen"]) manifest_url = try_get(video, lambda x: x["results"][0]["url_manifiesto"]) category = try_get( video, lambda x: x["results"][0]["categoria"]["tipologia"]["nombre"], compat_str, ) playlist_channel = ( video["results"][0]["lista_reproduccion_canal"][0] if len(video["results"][0]["lista_reproduccion_canal"]) > 0 else None) subtitle_url = try_get(video, lambda x: x["results"][0]["url_subtitulo"]) return { "id": try_get(video, lambda x: x["results"][0]["id"], compat_str) or video_id, "title": title, "slug_url": slug_url, "description": description, "thumbnail": thumbnail, "uploader": uploader, "timestamp": timestamp, "category": [category] if category else None, "manifest_url": manifest_url, "playlist_channel": playlist_channel, "subtitle_url": subtitle_url, }
def _extract_playlist(self, playlist, playlist_id=None, require_title=True): if len(playlist.get("results", [])) == 0: raise ExtractorError("Cannot find playlist!") title = (playlist["results"][0]["nombre"] if require_title else playlist["results"][0].get("nombre")) thumbnail = try_get(playlist, lambda x: x["results"][0].get("url_imagen")) entries = try_get(playlist, lambda x: x["results"][0]["publicaciones"]) return { "id": try_get(playlist, lambda x: x["results"][0]["id"], compat_str) or playlist_id, "title": title, "thumbnail": thumbnail, "entries": entries, }
def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) scheme = mobj.group('scheme') host = mobj.group('host') path = mobj.group('path') video_id = mobj.group('id') launch_url = scheme + host + path + '/mod/lti/launch.php?id=' + video_id # webpage = self._download_webpage(url, video_id) launch_webpage = self._download_webpage( launch_url, video_id, 'Downloading opencast lti launch webpage') launch_inputs = self._form_hidden_inputs(self._LAUNCH_FORM, launch_webpage) launch_form_str = self._search_regex( r'(?P<form><form[^>]+?id=(["\'])%s\2[^>]*>)' % self._LAUNCH_FORM, launch_webpage, 'login form', group='form') action_url = extract_attributes(launch_form_str).get('action') submit_page, start_urlh = self._download_webpage_handle( action_url, video_id, 'Launch opencast app', data=urlencode_postdata(launch_inputs)) if start_urlh.status != 200: raise ExtractorError('Unable to launch opencast app', expected=True) return { '_type': 'url', 'url': start_urlh.geturl(), }
def report_warning(self, message): # Don't accept warnings during tests raise ExtractorError(message)
def _real_extract(self, url): mobj = re.match(self._VALID_URL, url) scheme = mobj.group('scheme') host = mobj.group('host') path = mobj.group('path') video_id = mobj.group('id') launch_url = scheme + host + path + '/mod/helixmedia/launch.php?type=1&id=' + video_id # webpage = self._download_webpage(url, video_id) launch_webpage = self._download_webpage( launch_url, video_id, 'Downloading helixmedia launch webpage') launch_inputs = self._form_hidden_inputs(self._LAUNCH_FORM, launch_webpage) launch_form_str = self._search_regex( r'(?P<form><form[^>]+?id=(["\'])%s\2[^>]*>)' % self._LAUNCH_FORM, launch_webpage, 'login form', group='form') action_url = extract_attributes(launch_form_str).get('action') submit_page, start_urlh = self._download_webpage_handle( action_url, video_id, 'Launch helixmedia app', data=urlencode_postdata(launch_inputs)) if 'UploadSessionId' not in start_urlh.geturl(): raise ExtractorError('Unable to launch helixmedia video', expected=True) parsed_mediaserver_url = list( compat_urllib_parse_urlparse(start_urlh.geturl())) parsed_mediaserver_url[4] += '&mobile=N&fullWidth=940&fullHeight=906' parsed_mediaserver_url[2] += 'Split' mediaserver_url = compat_urllib_parse.urlunparse( parsed_mediaserver_url) video_webpage = self._download_webpage(mediaserver_url, video_id, 'Downloading video details') video_model = json.loads( js_to_json( self._search_regex(r'var model = ([^;]+);', video_webpage, 'video model'))) video_title = video_model.get('VideoTitle', None) video_description = video_model.get('VideoDescription', '') video_id = str(video_model.get('VideoId', video_id)) download_url = video_model.get('DownloadUrl', None) video_json = json.loads( video_model.get('PlayScreenVm', {}).get('VodPlayerModel', {}).get('PlayerJson', '{}')) if video_json == {}: video_json = json.loads( video_model.get('VodPlayerModel', {}).get('PlayerJson', '{}')) if video_title is None: video_title = video_json.get('abouttext', 'Unknown title') thumbnail_list = video_json.get('tracks', []) thumbnail = None if len(thumbnail_list) >= 1: thumbnail = thumbnail_list[0].get('file', None) if thumbnail is not None: thumbnail = thumbnail.replace('vtt', 'jpg') sources_list = video_json.get('sources', []) formats = [] for track in sources_list: href = track['file'] ext = determine_ext(href, None) if ext == 'mpd': # DASH formats.extend( self._extract_mpd_formats(href, video_id, mpd_id='dash', fatal=False)) elif ext == 'm3u8': # HLS formats.extend( self._extract_m3u8_formats(href, video_id, m3u8_id='hls', entry_protocol='m3u8_native', fatal=False)) elif ext == 'f4m': # HDS formats.extend( self._extract_f4m_formats(href, video_id, f4m_id='hds', fatal=False)) elif ext == 'smil': formats.extend( self._extract_smil_formats(href, video_id, fatal=False)) else: track_obj = { 'url': href, 'ext': ext, } formats.append(track_obj) if download_url is not None: ext_req = HEADRequest(download_url) ext_handle = self._request_webpage(ext_req, video_id, note='Determining extension') ext = self.urlhandle_detect_ext(ext_handle) track_obj_direct = { 'url': download_url, 'ext': ext, } formats.append(track_obj_direct) self._sort_formats(formats) result_obj = {'formats': formats} if video_id is not None: result_obj.update({'id': video_id}) if video_title is not None: result_obj.update({'title': video_title}) if video_description is not None: result_obj.update({'creator': video_description}) if thumbnail is not None: result_obj.update({'thumbnail': thumbnail}) return result_obj
def _real_extract(self, url): video_id = self._match_id(url) # First try the new method video_info = compat_parse_qs( self._download_webpage('https://drive.google.com/get_video_info', video_id, query={'docid': video_id}) ) def get_value(key): return try_get(video_info, lambda x: x[key][0]) reason = get_value('reason') title = get_value('title') use_old_webpage = False if not title and reason: use_old_webpage = True webpage = self._download_webpage('http://docs.google.com/file/d/%s' % video_id, video_id) title = self._search_regex( r'"title"\s*,\s*"([^"]+)', webpage, 'title', default=None ) or self._og_search_title(webpage) duration = int_or_none( self._search_regex(r'"length_seconds"\s*,\s*"([^"]+)', webpage, 'length seconds', default=None) ) formats = [] fmt_stream_map = self._search_regex( r'"fmt_stream_map"\s*,\s*"([^"]+)', webpage, 'fmt stream map', default='' ).split(',') fmt_list = self._search_regex(r'"fmt_list"\s*,\s*"([^"]+)', webpage, 'fmt_list', default='').split(',') hl = self._search_regex(r'"hl"\s*,\s*"([^"]+)', webpage, 'hl', default=None) subtitles_id = None ttsurl = self._search_regex(r'"ttsurl"\s*,\s*"([^"]+)', webpage, 'ttsurl', default=None) thumbnail_url = self._og_search_thumbnail(webpage, default=None) else: formats = [] fmt_stream_map = (get_value('fmt_stream_map') or '').split(',') fmt_list = (get_value('fmt_list') or '').split(',') duration = int_or_none(get_value('length_seconds')) hl = get_value('hl') subtitles_id = None ttsurl = get_value('ttsurl') thumbnail_url = 'https://drive.google.com/thumbnail?id=' + video_id if ttsurl: # the video Id for subtitles will be the last value in the ttsurl # query string subtitles_id = ttsurl.encode('utf-8').decode('unicode_escape').split('=')[-1] if fmt_stream_map and fmt_list: resolutions = {} for fmt in fmt_list: mobj = re.search(r'^(?P<format_id>\d+)/(?P<width>\d+)[xX](?P<height>\d+)', fmt) if mobj: resolutions[mobj.group('format_id')] = (int(mobj.group('width')), int(mobj.group('height'))) for fmt_stream in fmt_stream_map: fmt_stream_split = fmt_stream.split('|') if len(fmt_stream_split) < 2: continue format_id, format_url = fmt_stream_split[:2] f = { 'url': lowercase_escape(format_url), 'format_id': format_id, 'ext': self._FORMATS_EXT[format_id], } resolution = resolutions.get(format_id) if resolution: f.update( { 'width': resolution[0], 'height': resolution[1], } ) formats.append(f) source_url = update_url_query( 'https://drive.google.com/uc', { 'id': video_id, 'export': 'download', }, ) def request_source_file(source_url, kind): return self._request_webpage( source_url, video_id, note='Requesting %s file' % kind, errnote='Unable to request %s file' % kind, fatal=False, ) urlh = request_source_file(source_url, 'source') if urlh: def add_source_format(urlh): formats.append( { # Use redirect URLs as download URLs in order to calculate # correct cookies in _calc_cookies. # Using original URLs may result in redirect loop due to # google.com's cookies mistakenly used for googleusercontent.com # redirect URLs (see #23919). 'url': urlh.geturl(), 'ext': determine_ext(title, 'mp4').lower(), 'format_id': 'source', 'quality': 1, } ) if urlh.headers.get('Content-Disposition'): add_source_format(urlh) else: confirmation_webpage = self._webpage_read_content( urlh, url, video_id, note='Downloading confirmation page', errnote='Unable to confirm download', fatal=False, ) if confirmation_webpage: confirm = self._search_regex( r'confirm=([^&"\']+)', confirmation_webpage, 'confirmation code', fatal=False ) if confirm: confirmed_source_url = update_url_query( source_url, { 'confirm': confirm, }, ) urlh = request_source_file(confirmed_source_url, 'confirmed source') if urlh and urlh.headers.get('Content-Disposition'): add_source_format(urlh) if not use_old_webpage and not formats and reason: raise ExtractorError(reason, expected=True) elif use_old_webpage and not formats: reason = self._search_regex(r'"reason"\s*,\s*"([^"]+)', webpage, 'reason', default=None) if reason: raise ExtractorError(reason, expected=True) self._sort_formats(formats) return { 'id': video_id, 'title': title, 'thumbnail': thumbnail_url, 'duration': duration, 'formats': formats, 'subtitles': self.extract_subtitles(video_id, subtitles_id, hl), 'automatic_captions': self.extract_automatic_captions(video_id, subtitles_id, hl), }
def _real_extract(self, url): playlist_id = None video_id = self._match_id(url) json_url = API_BASE_URL + "publicacion/?format=json&slug_url_raw=%s" % video_id video = self._download_json(json_url, video_id, "Downloading video JSON") info = self._extract_video(video, video_id) if (info["playlist_channel"] and self.playlist_id is None and self._match_playlist_id(url) is None): playlist_id = info["playlist_channel"].get("id") self.playlist_id = playlist_id # Download Playlist (--yes-playlist) in first place if (self.playlist_id is None and self._match_playlist_id(url) and not self._downloader.params.get("noplaylist")): playlist_id = compat_str(self._match_playlist_id(url)) self.playlist_id = playlist_id self.to_screen( "Downloading playlist %s - add --no-playlist to just download video" % playlist_id) return self.url_result( ROOT_BASE_URL + "medias/" + video_id + "?" + "playlist=" + playlist_id, PictaUserPlaylistIE.ie_key(), playlist_id, ) elif playlist_id and not self._downloader.params.get("noplaylist"): playlist_id = compat_str(playlist_id) self.to_screen( "Downloading playlist %s - add --no-playlist to just download video" % playlist_id) return self.url_result( ROOT_BASE_URL + "medias/" + video_id + "?" + "playlist=" + playlist_id, PictaChannelPlaylistIE.ie_key(), playlist_id, ) elif self._downloader.params.get("noplaylist"): self.to_screen( "Downloading just video %s because of --no-playlist" % video_id) formats = [] # M3U8|MPD manifest manifest_url = info.get("manifest_url") src_ext = determine_ext(manifest_url) if src_ext.startswith("m3u"): formats.extend( self._extract_m3u8_formats(manifest_url, video_id, "mp4", m3u8_id="hls")) elif src_ext == "mpd": formats.extend( self._extract_mpd_formats(manifest_url, video_id, mpd_id="dash")) if not formats: raise ExtractorError("Cannot find video formats") self._sort_formats(formats) info["formats"] = formats # subtitles video_subtitles = self.extract_subtitles(info) info["subtitles"] = video_subtitles return info