def sync_coord_workflow(request, job_id): ParametersFormSet = formset_factory(ParameterForm, extra=0) job = check_job_access_permission(request, job_id) check_job_edition_permission(job, request.user) hue_coord = get_history().get_coordinator_from_config(job.conf_dict) hue_wf = (hue_coord and hue_coord.workflow) or get_history().get_workflow_from_config(job.conf_dict) wf_application_path = job.conf_dict.get('wf_application_path') and Hdfs.urlsplit(job.conf_dict['wf_application_path'])[2] or '' coord_application_path = job.conf_dict.get('oozie.coord.application.path') and Hdfs.urlsplit(job.conf_dict['oozie.coord.application.path'])[2] or '' properties = hue_coord and hue_coord.properties and dict([(param['name'], param['value']) for param in hue_coord.properties]) or None if request.method == 'POST': params_form = ParametersFormSet(request.POST) if params_form.is_valid(): mapping = dict([(param['name'], param['value']) for param in params_form.cleaned_data]) # Update workflow params in coordinator hue_coord.clear_workflow_params() properties = dict([(param['name'], param['value']) for param in hue_coord.properties]) # Deploy WF XML submission = Submission(user=request.user, job=hue_wf, fs=request.fs, jt=request.jt, properties=properties) submission.deploy(deployment_dir=wf_application_path) submission._create_file(wf_application_path, hue_wf.XML_FILE_NAME, hue_wf.to_xml(mapping=properties), do_as=True) # Deploy Coordinator XML job.conf_dict.update(mapping) submission = Submission(user=request.user, job=hue_coord, fs=request.fs, jt=request.jt, properties=job.conf_dict, oozie_id=job.id) submission._create_file(coord_application_path, hue_coord.XML_FILE_NAME, hue_coord.to_xml(mapping=job.conf_dict), do_as=True) # Server picks up deployed Coordinator XML changes after running 'update' action submission.update_coord() request.info(_('Successfully updated Workflow definition')) return redirect(reverse('oozie:list_oozie_coordinator', kwargs={'job_id': job_id})) else: request.error(_('Invalid submission form: %s' % params_form.errors)) else: new_params = hue_wf and hue_wf.find_all_parameters() or [] new_params = dict([(param['name'], param['value']) for param in new_params]) # Set previous values if properties: new_params = dict([(key, properties[key]) if key in properties.keys() else (key, new_params[key]) for key, value in new_params.iteritems()]) initial_params = ParameterForm.get_initial_params(new_params) params_form = ParametersFormSet(initial=initial_params) popup = render('/scheduler/submit_job_popup.mako', request, { 'params_form': params_form, 'name': _('Job'), 'header': _('Sync Workflow definition?'), 'action': reverse('oozie:sync_coord_workflow', kwargs={'job_id': job_id}) }, force_template=True).content return JsonResponse(popup, safe=False)
def sync_coord_workflow(request, job_id): ParametersFormSet = formset_factory(ParameterForm, extra=0) job = check_job_access_permission(request, job_id) check_job_edition_permission(job, request.user) hue_coord = get_history().get_coordinator_from_config(job.conf_dict) hue_wf = (hue_coord and hue_coord.workflow) or get_history().get_workflow_from_config(job.conf_dict) wf_application_path = job.conf_dict.get('wf_application_path') and Hdfs.urlsplit(job.conf_dict['wf_application_path'])[2] or '' coord_application_path = job.conf_dict.get('oozie.coord.application.path') and Hdfs.urlsplit(job.conf_dict['oozie.coord.application.path'])[2] or '' properties = hue_coord and hue_coord.properties and dict([(param['name'], param['value']) for param in hue_coord.properties]) or None if request.method == 'POST': params_form = ParametersFormSet(request.POST) if params_form.is_valid(): mapping = dict([(param['name'], param['value']) for param in params_form.cleaned_data]) # Update workflow params in coordinator hue_coord.clear_workflow_params() properties = dict([(param['name'], param['value']) for param in hue_coord.properties]) # Deploy WF XML submission = Submission(user=request.user, job=hue_wf, fs=request.fs, jt=request.jt, properties=properties) submission.deploy(deployment_dir=wf_application_path) submission._create_file(wf_application_path, hue_wf.XML_FILE_NAME, hue_wf.to_xml(mapping=properties), do_as=True) # Deploy Coordinator XML job.conf_dict.update(mapping) submission = Submission(user=request.user, job=hue_coord, fs=request.fs, jt=request.jt, properties=job.conf_dict, oozie_id=job.id) submission._create_file(coord_application_path, hue_coord.XML_FILE_NAME, hue_coord.to_xml(mapping=job.conf_dict), do_as=True) # Server picks up deployed Coordinator XML changes after running 'update' action submission.update_coord() request.info(_('Successfully updated Workflow definition')) return redirect(reverse('oozie:list_oozie_coordinator', kwargs={'job_id': job_id})) else: request.error(_('Invalid submission form: %s' % params_form.errors)) else: new_params = hue_wf and hue_wf.find_all_parameters() or [] new_params = dict([(param['name'], param['value']) for param in new_params]) # Set previous values if properties: new_params = dict([(key, properties[key]) if key in properties.keys() else (key, new_params[key]) for key, value in new_params.iteritems()]) initial_params = ParameterForm.get_initial_params(new_params) params_form = ParametersFormSet(initial=initial_params) popup = render('editor2/submit_job_popup.mako', request, { 'params_form': params_form, 'name': _('Job'), 'header': _('Sync Workflow definition?'), 'action': reverse('oozie:sync_coord_workflow', kwargs={'job_id': job_id}) }, force_template=True).content return JsonResponse(popup, safe=False)
def test_copy_files(): cluster = pseudo_hdfs4.shared_cluster() try: c = make_logged_in_client() user = User.objects.get(username='******') ensure_home_directory(cluster.fs, user) prefix = '/tmp/test_copy_files' if cluster.fs.exists(prefix): cluster.fs.rmtree(prefix) # Jars in various locations deployment_dir = '%s/workspace' % prefix external_deployment_dir = '%s/deployment' % prefix jar_1 = '%s/udf1.jar' % prefix jar_2 = '%s/lib/udf2.jar' % prefix jar_3 = '%s/udf3.jar' % deployment_dir jar_4 = '%s/lib/udf4.jar' % deployment_dir # Doesn't move jar_5 = 'udf5.jar' jar_6 = 'lib/udf6.jar' # Doesn't move cluster.fs.mkdir(prefix) cluster.fs.create(jar_1) cluster.fs.create(jar_2) cluster.fs.create(jar_3) cluster.fs.create(jar_4) cluster.fs.create(deployment_dir + '/' + jar_5) cluster.fs.create(deployment_dir + '/' + jar_6) class MockJob(object): XML_FILE_NAME = 'workflow.xml' def __init__(self): self.deployment_dir = deployment_dir self.nodes = [ Node({ 'id': '1', 'type': 'mapreduce', 'properties': { 'jar_path': jar_1 } }), Node({ 'id': '2', 'type': 'mapreduce', 'properties': { 'jar_path': jar_2 } }), Node({ 'id': '3', 'type': 'java', 'properties': { 'jar_path': jar_3 } }), Node({ 'id': '4', 'type': 'java', 'properties': { 'jar_path': jar_4 } }), # Workspace relative paths Node({ 'id': '5', 'type': 'java', 'properties': { 'jar_path': jar_5 } }), Node({ 'id': '6', 'type': 'java', 'properties': { 'jar_path': jar_6 } }) ] submission = Submission(user, job=MockJob(), fs=cluster.fs, jt=cluster.jt) submission._copy_files(deployment_dir, "<xml>My XML</xml>", {'prop1': 'val1'}) submission._copy_files(external_deployment_dir, "<xml>My XML</xml>", {'prop1': 'val1'}) assert_true(cluster.fs.exists(deployment_dir + '/workflow.xml'), deployment_dir) assert_true(cluster.fs.exists(deployment_dir + '/job.properties'), deployment_dir) # All sources still there assert_true(cluster.fs.exists(jar_1)) assert_true(cluster.fs.exists(jar_2)) assert_true(cluster.fs.exists(jar_3)) assert_true(cluster.fs.exists(jar_4)) assert_true(cluster.fs.exists(deployment_dir + '/' + jar_5)) assert_true(cluster.fs.exists(deployment_dir + '/' + jar_6)) # Lib deployment_dir = deployment_dir + '/lib' external_deployment_dir = external_deployment_dir + '/lib' if USE_LIBPATH_FOR_JARS.get(): assert_true(jar_1 in submission.properties['oozie.libpath']) assert_true(jar_2 in submission.properties['oozie.libpath']) assert_true(jar_3 in submission.properties['oozie.libpath']) assert_true(jar_4 in submission.properties['oozie.libpath']) print(deployment_dir + '/' + jar_5) assert_true((deployment_dir + '/' + jar_5) in submission.properties['oozie.libpath'], submission.properties['oozie.libpath']) assert_true((deployment_dir + '/' + jar_6) in submission.properties['oozie.libpath'], submission.properties['oozie.libpath']) else: list_dir_workspace = cluster.fs.listdir(deployment_dir) list_dir_deployement = cluster.fs.listdir(external_deployment_dir) # All destinations there assert_true(cluster.fs.exists(deployment_dir + '/udf1.jar'), list_dir_workspace) assert_true(cluster.fs.exists(deployment_dir + '/udf2.jar'), list_dir_workspace) assert_true(cluster.fs.exists(deployment_dir + '/udf3.jar'), list_dir_workspace) assert_true(cluster.fs.exists(deployment_dir + '/udf4.jar'), list_dir_workspace) assert_true(cluster.fs.exists(deployment_dir + '/udf5.jar'), list_dir_workspace) assert_true(cluster.fs.exists(deployment_dir + '/udf6.jar'), list_dir_workspace) assert_true( cluster.fs.exists(external_deployment_dir + '/udf1.jar'), list_dir_deployement) assert_true( cluster.fs.exists(external_deployment_dir + '/udf2.jar'), list_dir_deployement) assert_true( cluster.fs.exists(external_deployment_dir + '/udf3.jar'), list_dir_deployement) assert_true( cluster.fs.exists(external_deployment_dir + '/udf4.jar'), list_dir_deployement) assert_true( cluster.fs.exists(external_deployment_dir + '/udf5.jar'), list_dir_deployement) assert_true( cluster.fs.exists(external_deployment_dir + '/udf6.jar'), list_dir_deployement) stats_udf1 = cluster.fs.stats(deployment_dir + '/udf1.jar') stats_udf2 = cluster.fs.stats(deployment_dir + '/udf2.jar') stats_udf3 = cluster.fs.stats(deployment_dir + '/udf3.jar') stats_udf4 = cluster.fs.stats(deployment_dir + '/udf4.jar') stats_udf5 = cluster.fs.stats(deployment_dir + '/udf5.jar') stats_udf6 = cluster.fs.stats(deployment_dir + '/udf6.jar') submission._copy_files('%s/workspace' % prefix, "<xml>My XML</xml>", {'prop1': 'val1'}) assert_not_equal( stats_udf1['fileId'], cluster.fs.stats(deployment_dir + '/udf1.jar')['fileId']) assert_not_equal( stats_udf2['fileId'], cluster.fs.stats(deployment_dir + '/udf2.jar')['fileId']) assert_not_equal( stats_udf3['fileId'], cluster.fs.stats(deployment_dir + '/udf3.jar')['fileId']) assert_equal( stats_udf4['fileId'], cluster.fs.stats(deployment_dir + '/udf4.jar')['fileId']) assert_not_equal( stats_udf5['fileId'], cluster.fs.stats(deployment_dir + '/udf5.jar')['fileId']) assert_equal( stats_udf6['fileId'], cluster.fs.stats(deployment_dir + '/udf6.jar')['fileId']) # Test _create_file() submission._create_file(deployment_dir, 'test.txt', data='Test data') assert_true(cluster.fs.exists(deployment_dir + '/test.txt'), list_dir_workspace) finally: try: cluster.fs.rmtree(prefix) except: LOG.exception('failed to remove %s' % prefix)
def test_copy_files(): cluster = pseudo_hdfs4.shared_cluster() try: c = make_logged_in_client() user = User.objects.get(username='******') prefix = '/tmp/test_copy_files' if cluster.fs.exists(prefix): cluster.fs.rmtree(prefix) # Jars in various locations deployment_dir = '%s/workspace' % prefix external_deployment_dir = '%s/deployment' % prefix jar_1 = '%s/udf1.jar' % prefix jar_2 = '%s/lib/udf2.jar' % prefix jar_3 = '%s/udf3.jar' % deployment_dir jar_4 = '%s/lib/udf4.jar' % deployment_dir # Doesn't move jar_5 = 'udf5.jar' jar_6 = 'lib/udf6.jar' # Doesn't move cluster.fs.mkdir(prefix) cluster.fs.create(jar_1) cluster.fs.create(jar_2) cluster.fs.create(jar_3) cluster.fs.create(jar_4) cluster.fs.create(deployment_dir + '/' + jar_5) cluster.fs.create(deployment_dir + '/' + jar_6) class MockJob(): XML_FILE_NAME = 'workflow.xml' def __init__(self): self.deployment_dir = deployment_dir self.nodes = [ Node({'id': '1', 'type': 'mapreduce', 'properties': {'jar_path': jar_1}}), Node({'id': '2', 'type': 'mapreduce', 'properties': {'jar_path': jar_2}}), Node({'id': '3', 'type': 'java', 'properties': {'jar_path': jar_3}}), Node({'id': '4', 'type': 'java', 'properties': {'jar_path': jar_4}}), # Workspace relative paths Node({'id': '5', 'type': 'java', 'properties': {'jar_path': jar_5}}), Node({'id': '6', 'type': 'java', 'properties': {'jar_path': jar_6}}) ] submission = Submission(user, job=MockJob(), fs=cluster.fs, jt=cluster.jt) submission._copy_files(deployment_dir, "<xml>My XML</xml>", {'prop1': 'val1'}) submission._copy_files(external_deployment_dir, "<xml>My XML</xml>", {'prop1': 'val1'}) assert_true(cluster.fs.exists(deployment_dir + '/workflow.xml'), deployment_dir) assert_true(cluster.fs.exists(deployment_dir + '/job.properties'), deployment_dir) # All sources still there assert_true(cluster.fs.exists(jar_1)) assert_true(cluster.fs.exists(jar_2)) assert_true(cluster.fs.exists(jar_3)) assert_true(cluster.fs.exists(jar_4)) assert_true(cluster.fs.exists(deployment_dir + '/' + jar_5)) assert_true(cluster.fs.exists(deployment_dir + '/' + jar_6)) # Lib deployment_dir = deployment_dir + '/lib' external_deployment_dir = external_deployment_dir + '/lib' list_dir_workspace = cluster.fs.listdir(deployment_dir) list_dir_deployement = cluster.fs.listdir(external_deployment_dir) # All destinations there assert_true(cluster.fs.exists(deployment_dir + '/udf1.jar'), list_dir_workspace) assert_true(cluster.fs.exists(deployment_dir + '/udf2.jar'), list_dir_workspace) assert_true(cluster.fs.exists(deployment_dir + '/udf3.jar'), list_dir_workspace) assert_true(cluster.fs.exists(deployment_dir + '/udf4.jar'), list_dir_workspace) assert_true(cluster.fs.exists(deployment_dir + '/udf5.jar'), list_dir_workspace) assert_true(cluster.fs.exists(deployment_dir + '/udf6.jar'), list_dir_workspace) assert_true(cluster.fs.exists(external_deployment_dir + '/udf1.jar'), list_dir_deployement) assert_true(cluster.fs.exists(external_deployment_dir + '/udf2.jar'), list_dir_deployement) assert_true(cluster.fs.exists(external_deployment_dir + '/udf3.jar'), list_dir_deployement) assert_true(cluster.fs.exists(external_deployment_dir + '/udf4.jar'), list_dir_deployement) assert_true(cluster.fs.exists(external_deployment_dir + '/udf5.jar'), list_dir_deployement) assert_true(cluster.fs.exists(external_deployment_dir + '/udf6.jar'), list_dir_deployement) stats_udf1 = cluster.fs.stats(deployment_dir + '/udf1.jar') stats_udf2 = cluster.fs.stats(deployment_dir + '/udf2.jar') stats_udf3 = cluster.fs.stats(deployment_dir + '/udf3.jar') stats_udf4 = cluster.fs.stats(deployment_dir + '/udf4.jar') stats_udf5 = cluster.fs.stats(deployment_dir + '/udf5.jar') stats_udf6 = cluster.fs.stats(deployment_dir + '/udf6.jar') submission._copy_files('%s/workspace' % prefix, "<xml>My XML</xml>", {'prop1': 'val1'}) assert_not_equal(stats_udf1['fileId'], cluster.fs.stats(deployment_dir + '/udf1.jar')['fileId']) assert_not_equal(stats_udf2['fileId'], cluster.fs.stats(deployment_dir + '/udf2.jar')['fileId']) assert_not_equal(stats_udf3['fileId'], cluster.fs.stats(deployment_dir + '/udf3.jar')['fileId']) assert_equal(stats_udf4['fileId'], cluster.fs.stats(deployment_dir + '/udf4.jar')['fileId']) assert_not_equal(stats_udf5['fileId'], cluster.fs.stats(deployment_dir + '/udf5.jar')['fileId']) assert_equal(stats_udf6['fileId'], cluster.fs.stats(deployment_dir + '/udf6.jar')['fileId']) # Test _create_file() submission._create_file(deployment_dir, 'test.txt', data='Test data') assert_true(cluster.fs.exists(deployment_dir + '/test.txt'), list_dir_workspace) finally: try: cluster.fs.rmtree(prefix) except: LOG.exception('failed to remove %s' % prefix)
def sync_coord_workflow(request, job_id): ParametersFormSet = formset_factory(ParameterForm, extra=0) job = check_job_access_permission(request, job_id) check_job_edition_permission(job, request.user) hue_coord = get_history().get_coordinator_from_config(job.conf_dict) hue_wf = (hue_coord and hue_coord.workflow) or get_history().get_workflow_from_config(job.conf_dict) wf_application_path = ( job.conf_dict.get("wf_application_path") and Hdfs.urlsplit(job.conf_dict["wf_application_path"])[2] or "" ) coord_application_path = ( job.conf_dict.get("oozie.coord.application.path") and Hdfs.urlsplit(job.conf_dict["oozie.coord.application.path"])[2] or "" ) properties = ( hue_coord and hue_coord.properties and dict([(param["name"], param["value"]) for param in hue_coord.properties]) or None ) if request.method == "POST": params_form = ParametersFormSet(request.POST) if params_form.is_valid(): mapping = dict([(param["name"], param["value"]) for param in params_form.cleaned_data]) # Update workflow params in coordinator hue_coord.clear_workflow_params() properties = dict([(param["name"], param["value"]) for param in hue_coord.properties]) # Deploy WF XML submission = Submission(user=request.user, job=hue_wf, fs=request.fs, jt=request.jt, properties=properties) submission._create_file( wf_application_path, hue_wf.XML_FILE_NAME, hue_wf.to_xml(mapping=properties), do_as=True ) # Deploy Coordinator XML job.conf_dict.update(mapping) submission = Submission( user=request.user, job=hue_coord, fs=request.fs, jt=request.jt, properties=job.conf_dict, oozie_id=job.id, ) submission._create_file( coord_application_path, hue_coord.XML_FILE_NAME, hue_coord.to_xml(mapping=job.conf_dict), do_as=True ) # Server picks up deployed Coordinator XML changes after running 'update' action submission.update_coord() request.info(_("Successfully updated Workflow definition")) return redirect(reverse("oozie:list_oozie_coordinator", kwargs={"job_id": job_id})) else: request.error(_("Invalid submission form: %s" % params_form.errors)) else: new_params = hue_wf and hue_wf.find_all_parameters() or [] new_params = dict([(param["name"], param["value"]) for param in new_params]) # Set previous values if properties: new_params = dict( [ (key, properties[key]) if key in properties.keys() else (key, new_params[key]) for key, value in new_params.iteritems() ] ) initial_params = ParameterForm.get_initial_params(new_params) params_form = ParametersFormSet(initial=initial_params) popup = render( "editor2/submit_job_popup.mako", request, { "params_form": params_form, "name": _("Job"), "header": _("Sync Workflow definition?"), "action": reverse("oozie:sync_coord_workflow", kwargs={"job_id": job_id}), }, force_template=True, ).content return JsonResponse(popup, safe=False)