def sync_coord_workflow(request, job_id): ParametersFormSet = formset_factory(ParameterForm, extra=0) job = check_job_access_permission(request, job_id) check_job_edition_permission(job, request.user) hue_coord = get_history().get_coordinator_from_config(job.conf_dict) hue_wf = (hue_coord and hue_coord.workflow) or get_history().get_workflow_from_config(job.conf_dict) wf_application_path = job.conf_dict.get('wf_application_path') and Hdfs.urlsplit(job.conf_dict['wf_application_path'])[2] or '' coord_application_path = job.conf_dict.get('oozie.coord.application.path') and Hdfs.urlsplit(job.conf_dict['oozie.coord.application.path'])[2] or '' properties = hue_coord and hue_coord.properties and dict([(param['name'], param['value']) for param in hue_coord.properties]) or None if request.method == 'POST': params_form = ParametersFormSet(request.POST) if params_form.is_valid(): mapping = dict([(param['name'], param['value']) for param in params_form.cleaned_data]) # Update workflow params in coordinator hue_coord.clear_workflow_params() properties = dict([(param['name'], param['value']) for param in hue_coord.properties]) # Deploy WF XML submission = Submission(user=request.user, job=hue_wf, fs=request.fs, jt=request.jt, properties=properties) submission.deploy(deployment_dir=wf_application_path) submission._create_file(wf_application_path, hue_wf.XML_FILE_NAME, hue_wf.to_xml(mapping=properties), do_as=True) # Deploy Coordinator XML job.conf_dict.update(mapping) submission = Submission(user=request.user, job=hue_coord, fs=request.fs, jt=request.jt, properties=job.conf_dict, oozie_id=job.id) submission._create_file(coord_application_path, hue_coord.XML_FILE_NAME, hue_coord.to_xml(mapping=job.conf_dict), do_as=True) # Server picks up deployed Coordinator XML changes after running 'update' action submission.update_coord() request.info(_('Successfully updated Workflow definition')) return redirect(reverse('oozie:list_oozie_coordinator', kwargs={'job_id': job_id})) else: request.error(_('Invalid submission form: %s' % params_form.errors)) else: new_params = hue_wf and hue_wf.find_all_parameters() or [] new_params = dict([(param['name'], param['value']) for param in new_params]) # Set previous values if properties: new_params = dict([(key, properties[key]) if key in properties.keys() else (key, new_params[key]) for key, value in new_params.iteritems()]) initial_params = ParameterForm.get_initial_params(new_params) params_form = ParametersFormSet(initial=initial_params) popup = render('editor2/submit_job_popup.mako', request, { 'params_form': params_form, 'name': _('Job'), 'header': _('Sync Workflow definition?'), 'action': reverse('oozie:sync_coord_workflow', kwargs={'job_id': job_id}) }, force_template=True).content return JsonResponse(popup, safe=False)
def sync_coord_workflow(request, job_id): ParametersFormSet = formset_factory(ParameterForm, extra=0) job = check_job_access_permission(request, job_id) check_job_edition_permission(job, request.user) hue_coord = get_history().get_coordinator_from_config(job.conf_dict) hue_wf = (hue_coord and hue_coord.workflow) or get_history().get_workflow_from_config(job.conf_dict) wf_application_path = job.conf_dict.get('wf_application_path') and Hdfs.urlsplit(job.conf_dict['wf_application_path'])[2] or '' coord_application_path = job.conf_dict.get('oozie.coord.application.path') and Hdfs.urlsplit(job.conf_dict['oozie.coord.application.path'])[2] or '' properties = hue_coord and hue_coord.properties and dict([(param['name'], param['value']) for param in hue_coord.properties]) or None if request.method == 'POST': params_form = ParametersFormSet(request.POST) if params_form.is_valid(): mapping = dict([(param['name'], param['value']) for param in params_form.cleaned_data]) # Update workflow params in coordinator hue_coord.clear_workflow_params() properties = dict([(param['name'], param['value']) for param in hue_coord.properties]) # Deploy WF XML submission = Submission(user=request.user, job=hue_wf, fs=request.fs, jt=request.jt, properties=properties) submission.deploy(deployment_dir=wf_application_path) submission._create_file(wf_application_path, hue_wf.XML_FILE_NAME, hue_wf.to_xml(mapping=properties), do_as=True) # Deploy Coordinator XML job.conf_dict.update(mapping) submission = Submission(user=request.user, job=hue_coord, fs=request.fs, jt=request.jt, properties=job.conf_dict, oozie_id=job.id) submission._create_file(coord_application_path, hue_coord.XML_FILE_NAME, hue_coord.to_xml(mapping=job.conf_dict), do_as=True) # Server picks up deployed Coordinator XML changes after running 'update' action submission.update_coord() request.info(_('Successfully updated Workflow definition')) return redirect(reverse('oozie:list_oozie_coordinator', kwargs={'job_id': job_id})) else: request.error(_('Invalid submission form: %s' % params_form.errors)) else: new_params = hue_wf and hue_wf.find_all_parameters() or [] new_params = dict([(param['name'], param['value']) for param in new_params]) # Set previous values if properties: new_params = dict([(key, properties[key]) if key in properties.keys() else (key, new_params[key]) for key, value in new_params.iteritems()]) initial_params = ParameterForm.get_initial_params(new_params) params_form = ParametersFormSet(initial=initial_params) popup = render('/scheduler/submit_job_popup.mako', request, { 'params_form': params_form, 'name': _('Job'), 'header': _('Sync Workflow definition?'), 'action': reverse('oozie:sync_coord_workflow', kwargs={'job_id': job_id}) }, force_template=True).content return JsonResponse(popup, safe=False)
def location_to_url(location, strict=True, is_embeddable=False): """ If possible, returns a file browser URL to the location. Prunes HDFS URI to path. Location is a URI, if strict is True. Python doesn't seem to have a readily-available URI-comparison library, so this is quite hacky. """ if location is None: return None split_path = Hdfs.urlsplit(location) if strict and not split_path[1] or not split_path[2]: # No netloc not full url or no URL return None path = location if split_path[0] == 'hdfs': path = split_path[2] try: filebrowser_path = reverse("filebrowser.views.view", kwargs=dict(path=path)) except Exception as e: LOG.warn('No table filesystem link: %s' % e) return None if is_embeddable and not filebrowser_path.startswith('/hue'): filebrowser_path = '/hue' + filebrowser_path return filebrowser_path
def hdfs_link_js(url): link = 'javascript:void(0)' if url: path = Hdfs.urlsplit(url)[2] if path: link = ('/filebrowser/view=%s' if path.startswith(posixpath.sep) else '/filebrowser/home_relative_view=/%s') % path return link
def _get_service_url(hdfs_config): override = hdfs_config.WEBHDFS_URL.get() if override: return override fs_defaultfs = hdfs_config.FS_DEFAULTFS.get() netloc = Hdfs.urlsplit(fs_defaultfs)[1] host = netloc.split(':')[0] return "{0}://{1}:{2}/webhdfs/v1".format(get_webhdfs_ssl()["protocol"], host, get_webhdfs_ssl()["port"])
def _get_service_url(hdfs_config): override = hdfs_config.WEBHDFS_URL.get() if override: return override fs_defaultfs = hdfs_config.FS_DEFAULTFS.get() netloc = Hdfs.urlsplit(fs_defaultfs)[1] host = netloc.split(':')[0] port = hadoop.conf.DEFAULT_NN_HTTP_PORT return "http://%s:%s/webhdfs/v1" % (host, port)
def hdfs_link(url): if url: path = Hdfs.urlsplit(url)[2] if path: if path.startswith(posixpath.sep): return "/filebrowser/view=" + path else: return "/filebrowser/home_relative_view=/" + path else: return url else: return url
def hdfs_link(url): if url: path = Hdfs.urlsplit(url)[2] if path: if path.startswith(posixpath.sep): return "/filebrowser/view" + path else: return "/filebrowser/home_relative_view/" + path else: return url else: return url
def location_to_url(location, strict=True): """ If possible, returns a file browser URL to the location. Location is a URI, if strict is True. Python doesn't seem to have a readily-available URI-comparison library, so this is quite hacky. """ if location is None: return None split_path = Hdfs.urlsplit(location) if strict and not split_path[1] or not split_path[2]: # No netloc not full url or no URL return None return reverse("filebrowser.views.view", kwargs=dict(path=split_path[2]))
def test_urlsplit(): """Test Hdfs urlsplit""" url = 'hdfs://nn.no.port/foo/bar' assert_equals(('hdfs', 'nn.no.port', '/foo/bar', '', ''), Hdfs.urlsplit(url)) url = 'hdfs://nn:8020/foo/bar' assert_equals(('hdfs', 'nn:8020', '/foo/bar', '', ''), Hdfs.urlsplit(url)) url = 'hdfs://nn:8020//foo//bar' assert_equals(('hdfs', 'nn:8020', '/foo/bar', '', ''), Hdfs.urlsplit(url)) url = 'hdfs://nn:8020' assert_equals(('hdfs', 'nn:8020', '/', '', ''), Hdfs.urlsplit(url)) url = '/foo/bar' assert_equals(('hdfs', '', '/foo/bar', '', ''), Hdfs.urlsplit(url)) url = 'foo//bar' assert_equals(('hdfs', '', 'foo/bar', '', ''), Hdfs.urlsplit(url))
def test_urlsplit(self): """Test Hdfs urlsplit""" url = 'hdfs://nn.no.port/foo/bar' assert_equals(('hdfs', 'nn.no.port', '/foo/bar', '', ''), Hdfs.urlsplit(url)) url = 'hdfs://nn:8020/foo/bar' assert_equals(('hdfs', 'nn:8020', '/foo/bar', '', ''), Hdfs.urlsplit(url)) url = 'hdfs://nn:8020//foo//bar' assert_equals(('hdfs', 'nn:8020', '/foo/bar', '', ''), Hdfs.urlsplit(url)) url = 'hdfs://nn:8020' assert_equals(('hdfs', 'nn:8020', '/', '', ''), Hdfs.urlsplit(url)) url = '/foo/bar' assert_equals(('hdfs', '', '/foo/bar', '', ''), Hdfs.urlsplit(url)) url = 'foo//bar' assert_equals(('hdfs', '', 'foo/bar', '', ''), Hdfs.urlsplit(url))
def sync_coord_workflow(request, job_id): ParametersFormSet = formset_factory(ParameterForm, extra=0) job = check_job_access_permission(request, job_id) check_job_edition_permission(job, request.user) hue_coord = get_history().get_coordinator_from_config(job.conf_dict) hue_wf = (hue_coord and hue_coord.workflow) or get_history().get_workflow_from_config(job.conf_dict) wf_application_path = job.conf_dict.get('wf_application_path') and Hdfs.urlsplit(job.conf_dict['wf_application_path'])[2] or '' if request.method == 'POST': params_form = ParametersFormSet(request.POST) if params_form.is_valid(): mapping = dict([(param['name'], param['value']) for param in params_form.cleaned_data]) submission = Submission(user=request.user, job=hue_wf, fs=request.fs, jt=request.jt, properties=mapping) submission._sync_definition(wf_application_path, mapping) request.info(_('Successfully updated Workflow definition')) return redirect(reverse('oozie:list_oozie_coordinator', kwargs={'job_id': job_id})) else: request.error(_('Invalid submission form: %s' % params_form.errors)) else: parameters = hue_wf and hue_wf.find_all_parameters() or [] params_dict = dict([(param['name'], param['value']) for param in parameters]) submission = Submission(user=request.user, job=hue_wf, fs=request.fs, jt=request.jt, properties=None) prev_properties = hue_wf and hue_wf.deployment_dir and \ submission.get_external_parameters(request.fs.join(wf_application_path, hue_wf.XML_FILE_NAME)) or {} for key, value in params_dict.iteritems(): params_dict[key] = prev_properties[key] if key in prev_properties.keys() else params_dict[key] initial_params = ParameterForm.get_initial_params(params_dict) params_form = ParametersFormSet(initial=initial_params) popup = render('editor2/submit_job_popup.mako', request, { 'params_form': params_form, 'name': _('Job'), 'header': _('Sync Workflow definition?'), 'action': reverse('oozie:sync_coord_workflow', kwargs={'job_id': job_id}) }, force_template=True).content return JsonResponse(popup, safe=False)
def location_to_url(location, strict=True, is_embeddable=False): """ If possible, returns a file browser URL to the location. Prunes HDFS URI to path. Location is a URI, if strict is True. Python doesn't seem to have a readily-available URI-comparison library, so this is quite hacky. """ if location is None: return None split_path = Hdfs.urlsplit(location) if strict and not split_path[1] or not split_path[2]: # No netloc not full url or no URL return None path = location if split_path[0] == 'hdfs': path = split_path[2] filebrowser_path = reverse("filebrowser.views.view", kwargs=dict(path=path)) if is_embeddable and not filebrowser_path.startswith('/hue'): filebrowser_path = '/hue' + filebrowser_path return filebrowser_path
def config_validator(user): """ config_validator() -> [ (config_variable, error_message) ] Called by core check_config() view. """ from hadoop.cluster import get_all_hdfs from hadoop.fs.hadoopfs import Hdfs from liboozie.oozie_api import get_oozie res = [] if OOZIE_URL.get(): status = get_oozie_status(user) if 'NORMAL' not in status: res.append((status, _('The Oozie server is not available'))) api = get_oozie(user) intrumentation = api.get_instrumentation() sharelib_url = [param['value'] for group in intrumentation['variables'] for param in group['data'] if param['name'] == 'sharelib.system.libpath'] if sharelib_url: sharelib_url = Hdfs.urlsplit(sharelib_url[0])[2] if not sharelib_url: res.append((status, _('Oozie Share Lib path is not available'))) class ConfigMock: def __init__(self, value): self.value = value def get(self): return self.value def get_fully_qualifying_key(self): return self.value for cluster in get_all_hdfs().values(): res.extend(validate_path(ConfigMock(sharelib_url), is_dir=True, fs=cluster, message=_('Oozie Share Lib not installed in default location.'))) return res
]: metrics = api.get_metrics() sharelib_url = 'gauges' in metrics and 'libs.sharelib.system.libpath' in metrics[ 'gauges'] and [ metrics['gauges']['libs.sharelib.system.libpath']['value'] ] or [] else: intrumentation = api.get_instrumentation() sharelib_url = [ param['value'] for group in intrumentation['variables'] for param in group['data'] if param['name'] == 'sharelib.system.libpath' ] if sharelib_url: sharelib_url = Hdfs.urlsplit(sharelib_url[0])[2] if not sharelib_url: res.append((status, _('Oozie Share Lib path is not available'))) class ConfigMock: def __init__(self, value): self.value = value def get(self): return self.value def get_fully_qualifying_key(self): return self.value for cluster in get_all_hdfs().values():
def urlsplit(url): return Hdfs.urlsplit(url)
def config_validator(user): """ config_validator() -> [ (config_variable, error_message) ] Called by core check_config() view. """ from hadoop.cluster import get_all_hdfs from hadoop.fs.hadoopfs import Hdfs from liboozie.oozie_api import get_oozie res = [] if OOZIE_URL.get(): status = get_oozie_status(user) if "NORMAL" not in status: res.append((status, _("The Oozie server is not available"))) api = get_oozie(user, api_version="v2") configuration = api.get_configuration() if "org.apache.oozie.service.MetricsInstrumentationService" in [ c.strip() for c in configuration.get("oozie.services.ext", "").split(",") ]: metrics = api.get_metrics() sharelib_url = ( "gauges" in metrics and "libs.sharelib.system.libpath" in metrics["gauges"] and [metrics["gauges"]["libs.sharelib.system.libpath"]["value"]] or [] ) else: intrumentation = api.get_instrumentation() sharelib_url = [ param["value"] for group in intrumentation["variables"] for param in group["data"] if param["name"] == "sharelib.system.libpath" ] if sharelib_url: sharelib_url = Hdfs.urlsplit(sharelib_url[0])[2] if not sharelib_url: res.append((status, _("Oozie Share Lib path is not available"))) class ConfigMock: def __init__(self, value): self.value = value def get(self): return self.value def get_fully_qualifying_key(self): return self.value for cluster in get_all_hdfs().values(): res.extend( validate_path( ConfigMock(sharelib_url), is_dir=True, fs=cluster, message=_("Oozie Share Lib not installed in default location."), ) ) return res
def config_validator(user): """ config_validator() -> [ (config_variable, error_message) ] Called by core check_config() view. """ from desktop.lib.fsmanager import get_filesystem from hadoop.cluster import get_all_hdfs from hadoop.fs.hadoopfs import Hdfs from liboozie.oozie_api import get_oozie res = [] try: from oozie.conf import REMOTE_SAMPLE_DIR except Exception as e: LOG.warn('Config check failed because Oozie app not installed: %s' % e) return res if OOZIE_URL.get(): status = get_oozie_status(user) if 'NORMAL' not in status: res.append((status, _('The Oozie server is not available'))) fs = get_filesystem() NICE_NAME = 'Oozie' if fs.do_as_superuser(fs.exists, REMOTE_SAMPLE_DIR.get()): stats = fs.do_as_superuser(fs.stats, REMOTE_SAMPLE_DIR.get()) mode = oct(stats.mode) # if neither group nor others have write permission group_has_write = int(mode[-2]) & 2 others_has_write = int(mode[-1]) & 2 if not group_has_write and not others_has_write: res.append( (NICE_NAME, "The permissions of workspace '%s' are too restrictive" % REMOTE_SAMPLE_DIR.get())) api = get_oozie(user, api_version="v2") configuration = api.get_configuration() if 'org.apache.oozie.service.MetricsInstrumentationService' in [ c.strip() for c in configuration.get('oozie.services.ext', '').split(',') ]: metrics = api.get_metrics() sharelib_url = 'gauges' in metrics and 'libs.sharelib.system.libpath' in metrics[ 'gauges'] and [ metrics['gauges']['libs.sharelib.system.libpath']['value'] ] or [] else: intrumentation = api.get_instrumentation() sharelib_url = [ param['value'] for group in intrumentation['variables'] for param in group['data'] if param['name'] == 'sharelib.system.libpath' ] if sharelib_url: sharelib_url = Hdfs.urlsplit(sharelib_url[0])[2] if not sharelib_url: res.append((status, _('Oozie Share Lib path is not available'))) class ConfigMock(object): def __init__(self, value): self.value = value def get(self): return self.value def get_fully_qualifying_key(self): return self.value for cluster in list(get_all_hdfs().values()): res.extend( validate_path( ConfigMock(sharelib_url), is_dir=True, fs=cluster, message=_( 'Oozie Share Lib not installed in default location.'))) return res
def sync_coord_workflow(request, job_id): ParametersFormSet = formset_factory(ParameterForm, extra=0) job = check_job_access_permission(request, job_id) check_job_edition_permission(job, request.user) hue_coord = get_history().get_coordinator_from_config(job.conf_dict) hue_wf = (hue_coord and hue_coord.workflow) or get_history().get_workflow_from_config(job.conf_dict) wf_application_path = ( job.conf_dict.get("wf_application_path") and Hdfs.urlsplit(job.conf_dict["wf_application_path"])[2] or "" ) coord_application_path = ( job.conf_dict.get("oozie.coord.application.path") and Hdfs.urlsplit(job.conf_dict["oozie.coord.application.path"])[2] or "" ) properties = ( hue_coord and hue_coord.properties and dict([(param["name"], param["value"]) for param in hue_coord.properties]) or None ) if request.method == "POST": params_form = ParametersFormSet(request.POST) if params_form.is_valid(): mapping = dict([(param["name"], param["value"]) for param in params_form.cleaned_data]) # Update workflow params in coordinator hue_coord.clear_workflow_params() properties = dict([(param["name"], param["value"]) for param in hue_coord.properties]) # Deploy WF XML submission = Submission(user=request.user, job=hue_wf, fs=request.fs, jt=request.jt, properties=properties) submission._create_file( wf_application_path, hue_wf.XML_FILE_NAME, hue_wf.to_xml(mapping=properties), do_as=True ) # Deploy Coordinator XML job.conf_dict.update(mapping) submission = Submission( user=request.user, job=hue_coord, fs=request.fs, jt=request.jt, properties=job.conf_dict, oozie_id=job.id, ) submission._create_file( coord_application_path, hue_coord.XML_FILE_NAME, hue_coord.to_xml(mapping=job.conf_dict), do_as=True ) # Server picks up deployed Coordinator XML changes after running 'update' action submission.update_coord() request.info(_("Successfully updated Workflow definition")) return redirect(reverse("oozie:list_oozie_coordinator", kwargs={"job_id": job_id})) else: request.error(_("Invalid submission form: %s" % params_form.errors)) else: new_params = hue_wf and hue_wf.find_all_parameters() or [] new_params = dict([(param["name"], param["value"]) for param in new_params]) # Set previous values if properties: new_params = dict( [ (key, properties[key]) if key in properties.keys() else (key, new_params[key]) for key, value in new_params.iteritems() ] ) initial_params = ParameterForm.get_initial_params(new_params) params_form = ParametersFormSet(initial=initial_params) popup = render( "editor2/submit_job_popup.mako", request, { "params_form": params_form, "name": _("Job"), "header": _("Sync Workflow definition?"), "action": reverse("oozie:sync_coord_workflow", kwargs={"job_id": job_id}), }, force_template=True, ).content return JsonResponse(popup, safe=False)