def _submit_bundle(request, bundle, properties): try: deployment_mapping = {} coords = dict([(c.uuid, c) for c in Document2.objects.filter(type='oozie-coordinator2', uuid__in=[b['coordinator'] for b in bundle.data['coordinators']])]) for i, bundled in enumerate(bundle.data['coordinators']): coord = coords[bundled['coordinator']] workflow = Workflow(document=coord.dependencies.all()[0]) wf_dir = Submission(request.user, workflow, request.fs, request.jt, properties).deploy() deployment_mapping['wf_%s_dir' % i] = request.fs.get_hdfs_path(wf_dir) coordinator = Coordinator(document=coord) coord_dir = Submission(request.user, coordinator, request.fs, request.jt, properties).deploy() deployment_mapping['coord_%s_dir' % i] = coord_dir deployment_mapping['coord_%s' % i] = coord properties.update(deployment_mapping) submission = Submission(request.user, bundle, request.fs, request.jt, properties=properties) job_id = submission.run() return job_id except RestException, ex: LOG.exception('Error submitting bundle') raise PopupException(_("Error submitting bundle %s") % (bundle,), detail=ex._headers.get('oozie-error-message', ex))
def submit_external_job(request, application_path): ParametersFormSet = formset_factory(ParameterForm, extra=0) if request.method == 'POST': params_form = ParametersFormSet(request.POST) if params_form.is_valid(): mapping = dict([(param['name'], param['value']) for param in params_form.cleaned_data]) mapping['dryrun'] = request.POST.get('dryrun_checkbox') == 'on' application_name = os.path.basename(application_path) application_class = Bundle if application_name == 'bundle.xml' else Coordinator if application_name == 'coordinator.xml' else get_workflow() mapping[application_class.get_application_path_key()] = os.path.dirname(application_path) try: submission = Submission(request.user, fs=request.fs, jt=request.jt, properties=mapping) job_id = submission.run(application_path) except RestException, ex: detail = ex._headers.get('oozie-error-message', ex) if 'Max retries exceeded with url' in str(detail): detail = '%s: %s' % (_('The Oozie server is not running'), detail) LOG.exception(smart_str(detail)) raise PopupException(_("Error submitting job %s") % (application_path,), detail=detail) jsonify = request.POST.get('format') == 'json' if jsonify: return JsonResponse({'status': 0, 'job_id': job_id, 'type': 'external_workflow'}, safe=False) else: request.info(_('Oozie job submitted')) view = 'list_oozie_bundle' if application_name == 'bundle.xml' else 'list_oozie_coordinator' if application_name == 'coordinator.xml' else 'list_oozie_workflow' return redirect(reverse('oozie:%s' % view, kwargs={'job_id': job_id})) else: request.error(_('Invalid submission form: %s' % params_form.errors))
def _submit_bundle(request, bundle, properties): try: deployment_mapping = {} coords = dict([(c.uuid, c) for c in Document2.objects.filter(type='oozie-coordinator2', uuid__in=[b['coordinator'] for b in bundle.data['coordinators']])]) for i, bundled in enumerate(bundle.data['coordinators']): coord = coords[bundled['coordinator']] workflow = Workflow(document=coord.dependencies.filter(type='oozie-workflow2')[0]) wf_dir = Submission(request.user, workflow, request.fs, request.jt, properties).deploy() deployment_mapping['wf_%s_dir' % i] = request.fs.get_hdfs_path(wf_dir) coordinator = Coordinator(document=coord) coord_dir = Submission(request.user, coordinator, request.fs, request.jt, properties).deploy() deployment_mapping['coord_%s_dir' % i] = request.fs.get_hdfs_path(coord_dir) deployment_mapping['coord_%s' % i] = coord # Convert start/end dates of coordinator to server timezone for prop in bundled['properties']: if prop['name'] in ('end_date', 'start_date'): prop['value'] = convert_to_server_timezone(prop['value'], local_tz=coordinator.data['properties']['timezone']) properties.update(deployment_mapping) submission = Submission(request.user, bundle, request.fs, request.jt, properties=properties) job_id = submission.run() return job_id except RestException, ex: LOG.exception('Error submitting bundle') raise PopupException(_("Error submitting bundle %s") % (bundle,), detail=ex._headers.get('oozie-error-message', ex))
def submit_external_job(request, application_path): ParametersFormSet = formset_factory(ParameterForm, extra=0) if request.method == 'POST': params_form = ParametersFormSet(request.POST) if params_form.is_valid(): mapping = dict([(param['name'], param['value']) for param in params_form.cleaned_data]) mapping['dryrun'] = request.POST.get('dryrun_checkbox') == 'on' application_name = os.path.basename(application_path) application_class = Bundle if application_name == 'bundle.xml' else Coordinator if application_name == 'coordinator.xml' else get_workflow() mapping[application_class.get_application_path_key()] = application_path try: submission = Submission(request.user, fs=request.fs, jt=request.jt, properties=mapping) job_id = submission.run(application_path) except RestException, ex: detail = ex._headers.get('oozie-error-message', ex) if 'Max retries exceeded with url' in str(detail): detail = '%s: %s' % (_('The Oozie server is not running'), detail) LOG.exception(smart_str(detail)) raise PopupException(_("Error submitting job %s") % (application_path,), detail=detail) request.info(_('Oozie job submitted')) view = 'list_oozie_bundle' if application_name == 'bundle.xml' else 'list_oozie_coordinator' if application_name == 'coordinator.xml' else 'list_oozie_workflow' return redirect(reverse('oozie:%s' % view, kwargs={'job_id': job_id})) else: request.error(_('Invalid submission form: %s' % params_form.errors))
def _submit_bundle(request, bundle, properties): try: deployment_mapping = {} coords = dict([(c.uuid, c) for c in Document2.objects.filter(type='oozie-coordinator2', uuid__in=[b['coordinator'] for b in bundle.data['coordinators']])]) for i, bundled in enumerate(bundle.data['coordinators']): coord = coords[bundled['coordinator']] workflow = Workflow(document=coord.dependencies.filter(type='oozie-workflow2')[0]) wf_dir = Submission(request.user, workflow, request.fs, request.jt, properties).deploy() deployment_mapping['wf_%s_dir' % i] = request.fs.get_hdfs_path(wf_dir) coordinator = Coordinator(document=coord) coord_dir = Submission(request.user, coordinator, request.fs, request.jt, properties).deploy() deployment_mapping['coord_%s_dir' % i] = request.fs.get_hdfs_path(coord_dir) deployment_mapping['coord_%s' % i] = coord # Convert start/end dates of coordinator to server timezone for prop in bundled['properties']: if prop['name'] in ('end_date', 'start_date'): prop['value'] = convert_to_server_timezone(prop['value'], local_tz=coordinator.data['properties']['timezone']) properties.update(deployment_mapping) submission = Submission(request.user, bundle, request.fs, request.jt, properties=properties) job_id = submission.run() return job_id except RestException, ex: LOG.exception('Error submitting bundle') raise PopupException(_("Error submitting bundle %s") % (bundle,), detail=ex._headers.get('oozie-error-message', ex), error_code=200)
def _submit_coordinator(request, coordinator, mapping): try: wf = coordinator.workflow wf_dir = Submission( request.user, wf, request.fs, request.jt, mapping, local_tz=coordinator.data['properties']['timezone']).deploy() properties = {'wf_application_path': request.fs.get_hdfs_path(wf_dir)} properties.update(mapping) submission = Submission(request.user, coordinator, request.fs, request.jt, properties=properties) job_id = submission.run() return job_id except RestException, ex: LOG.exception('Error submitting coordinator') raise PopupException(_("Error submitting coordinator %s") % (coordinator, ), detail=ex._headers.get('oozie-error-message', ex))
def _submit_bundle(request, bundle, properties): try: deployment_mapping = {} coords = dict([(c.uuid, c) for c in Document2.objects.filter( type='oozie-coordinator2', uuid__in=[b['coordinator'] for b in bundle.data['coordinators']])]) for i, bundled in enumerate(bundle.data['coordinators']): coord = coords[bundled['coordinator']] workflow = Workflow(document=coord.dependencies.all()[0]) wf_dir = Submission(request.user, workflow, request.fs, request.jt, properties).deploy() deployment_mapping['wf_%s_dir' % i] = request.fs.get_hdfs_path(wf_dir) coordinator = Coordinator(document=coord) coord_dir = Submission(request.user, coordinator, request.fs, request.jt, properties).deploy() deployment_mapping['coord_%s_dir' % i] = coord_dir deployment_mapping['coord_%s' % i] = coord properties.update(deployment_mapping) submission = Submission(request.user, bundle, request.fs, request.jt, properties=properties) job_id = submission.run() return job_id except RestException, ex: raise PopupException(_("Error submitting bundle %s") % (bundle, ), detail=ex._headers.get('oozie-error-message', ex))
def _rerun_workflow(request, oozie_id, run_args, mapping): try: submission = Submission(user=request.user, fs=request.fs, jt=request.jt, properties=mapping, oozie_id=oozie_id) job_id = submission.rerun(**run_args) return job_id except RestException, ex: msg = _("Error re-running workflow %s.") % (oozie_id,) LOG.exception(msg) raise PopupException(msg, detail=ex._headers.get('oozie-error-message', ex))
def _rerun_bundle(request, oozie_id, args, params, properties): try: submission = Submission(user=request.user, fs=request.fs, jt=request.jt, oozie_id=oozie_id, properties=properties) job_id = submission.rerun_bundle(params=params, **args) return job_id except RestException, ex: msg = _("Error re-running bundle %s.") % (oozie_id,) LOG.exception(msg) raise PopupException(msg, detail=ex._headers.get('oozie-error-message', ex))
def submit_external_job(request, application_path): ParametersFormSet = formset_factory(ParameterForm, extra=0) if application_path.startswith('abfs:/') and not application_path.startswith('abfs://'): application_path = application_path.replace("abfs:/", "abfs://") elif application_path.startswith('s3a:/') and not application_path.startswith('s3a://'): application_path = application_path.replace('s3a:/', 's3a://') else: application_path = "/" + application_path if application_path.startswith("abfs://"): application_path = abfspath(application_path) if request.method == 'POST': params_form = ParametersFormSet(request.POST) if params_form.is_valid(): mapping = dict([(param['name'], param['value']) for param in params_form.cleaned_data]) mapping['dryrun'] = request.POST.get('dryrun_checkbox') == 'on' application_name = os.path.basename(application_path) application_class = Bundle if application_name == 'bundle.xml' else Coordinator if application_name == 'coordinator.xml' else get_workflow() mapping[application_class.get_application_path_key()] = os.path.dirname(application_path) try: submission = Submission(request.user, fs=request.fs, jt=request.jt, properties=mapping) job_id = submission.run(application_path) except RestException as ex: detail = ex._headers.get('oozie-error-message', ex) if 'Max retries exceeded with url' in str(detail): detail = '%s: %s' % (_('The Oozie server is not running'), detail) LOG.exception(smart_str(detail)) raise PopupException(_("Error submitting job %s") % (application_path,), detail=detail) jsonify = request.POST.get('format') == 'json' if jsonify: return JsonResponse({'status': 0, 'job_id': job_id, 'type': 'external_workflow'}, safe=False) else: request.info(_('Oozie job submitted')) view = 'list_oozie_bundle' if application_name == 'bundle.xml' else 'list_oozie_coordinator' if application_name == 'coordinator.xml' else 'list_oozie_workflow' return redirect(reverse('oozie:%s' % view, kwargs={'job_id': job_id})) else: request.error(_('Invalid submission form: %s' % params_form.errors)) else: parameters = Submission(request.user, fs=request.fs, jt=request.jt).get_external_parameters(application_path) initial_params = ParameterForm.get_initial_params(parameters) params_form = ParametersFormSet(initial=initial_params) popup = render('editor/submit_job_popup.mako', request, { 'params_form': params_form, 'name': _('Job'), 'action': reverse('oozie:submit_external_job', kwargs={'application_path': application_path}), 'show_dryrun': os.path.basename(application_path) != 'bundle.xml', 'return_json': request.GET.get('format') == 'json' }, force_template=True).content return JsonResponse(popup, safe=False)
def sync_coord_workflow(request, job_id): ParametersFormSet = formset_factory(ParameterForm, extra=0) job = check_job_access_permission(request, job_id) check_job_edition_permission(job, request.user) hue_coord = get_history().get_coordinator_from_config(job.conf_dict) hue_wf = (hue_coord and hue_coord.workflow) or get_history().get_workflow_from_config(job.conf_dict) wf_application_path = job.conf_dict.get('wf_application_path') and Hdfs.urlsplit(job.conf_dict['wf_application_path'])[2] or '' coord_application_path = job.conf_dict.get('oozie.coord.application.path') and Hdfs.urlsplit(job.conf_dict['oozie.coord.application.path'])[2] or '' properties = hue_coord and hue_coord.properties and dict([(param['name'], param['value']) for param in hue_coord.properties]) or None if request.method == 'POST': params_form = ParametersFormSet(request.POST) if params_form.is_valid(): mapping = dict([(param['name'], param['value']) for param in params_form.cleaned_data]) # Update workflow params in coordinator hue_coord.clear_workflow_params() properties = dict([(param['name'], param['value']) for param in hue_coord.properties]) # Deploy WF XML submission = Submission(user=request.user, job=hue_wf, fs=request.fs, jt=request.jt, properties=properties) submission.deploy(deployment_dir=wf_application_path) submission._create_file(wf_application_path, hue_wf.XML_FILE_NAME, hue_wf.to_xml(mapping=properties), do_as=True) # Deploy Coordinator XML job.conf_dict.update(mapping) submission = Submission(user=request.user, job=hue_coord, fs=request.fs, jt=request.jt, properties=job.conf_dict, oozie_id=job.id) submission._create_file(coord_application_path, hue_coord.XML_FILE_NAME, hue_coord.to_xml(mapping=job.conf_dict), do_as=True) # Server picks up deployed Coordinator XML changes after running 'update' action submission.update_coord() request.info(_('Successfully updated Workflow definition')) return redirect(reverse('oozie:list_oozie_coordinator', kwargs={'job_id': job_id})) else: request.error(_('Invalid submission form: %s' % params_form.errors)) else: new_params = hue_wf and hue_wf.find_all_parameters() or [] new_params = dict([(param['name'], param['value']) for param in new_params]) # Set previous values if properties: new_params = dict([(key, properties[key]) if key in properties.keys() else (key, new_params[key]) for key, value in new_params.iteritems()]) initial_params = ParameterForm.get_initial_params(new_params) params_form = ParametersFormSet(initial=initial_params) popup = render('editor2/submit_job_popup.mako', request, { 'params_form': params_form, 'name': _('Job'), 'header': _('Sync Workflow definition?'), 'action': reverse('oozie:sync_coord_workflow', kwargs={'job_id': job_id}) }, force_template=True).content return JsonResponse(popup, safe=False)
def test_generate_altus_action_start_cluster(self): class TestJob(object): XML_FILE_NAME = 'workflow.xml' def __init__(self): self.deployment_dir = '/tmp/test' self.nodes = [ Node({ 'id': '1', 'type': 'hive-document', 'properties': { 'jdbc_url': u"${wf:actionData('shell-31b5')['hiveserver']}", 'password': u'test' } }) ] user = User.objects.get(username='******') submission = Submission(user, job=TestJob(), fs=MockFs(logical_name='fsname'), jt=MockJt(logical_name='jtname')) command = submission._generate_altus_action_script( service='dataeng', command='listClusters', arguments={}, auth_key_id='altus_auth_key_id', auth_key_secret='altus_auth_key_secret') assert_true( '''#!/usr/bin/env python from navoptapi.api_lib import ApiLib hostname = 'dataengapi.us-west-1.altus.cloudera.com' auth_key_id = 'altus_auth_key_id' auth_key_secret = \'\'\'altus_auth_key_secret\'\'\' def _exec(service, command, parameters=None): if parameters is None: parameters = {} try: api = ApiLib(service, hostname, auth_key_id, auth_key_secret) resp = api.call_api(command, parameters) return resp.json() except Exception, e: print e raise e print _exec('dataeng', 'listClusters', {}) ''' in command, command)
def _submit_workflow(user, fs, jt, workflow, mapping): try: submission = Submission(user, workflow, fs, jt, mapping) job_id = submission.run() return job_id except RestException, ex: detail = ex._headers.get('oozie-error-message', ex) if 'Max retries exceeded with url' in str(detail): detail = '%s: %s' % (_('The Oozie server is not running'), detail) LOG.error(smart_str(detail)) raise PopupException(_("Error submitting workflow %s") % (workflow,), detail=detail)
def test_get_logical_properties(self): submission = Submission(self.user, fs=MockFs(logical_name='fsname'), jt=MockJt(logical_name='jtname')) assert_equal({}, submission.properties) submission._update_properties('curacao:8032', '/deployment_dir') assert_equal({ 'jobTracker': 'jtname', 'nameNode': 'fsname' }, submission.properties)
def test_get_properties(self): submission = Submission(self.user, fs=MockFs()) assert_equal({}, submission.properties) submission._update_properties('curacao:8032', '/deployment_dir') assert_equal({ 'jobTracker': 'curacao:8032', 'nameNode': 'hdfs://curacao:8020' }, submission.properties)
def sync_coord_workflow(request, job_id): ParametersFormSet = formset_factory(ParameterForm, extra=0) job = check_job_access_permission(request, job_id) check_job_edition_permission(job, request.user) hue_coord = get_history().get_coordinator_from_config(job.conf_dict) hue_wf = (hue_coord and hue_coord.workflow) or get_history().get_workflow_from_config(job.conf_dict) wf_application_path = job.conf_dict.get('wf_application_path') and Hdfs.urlsplit(job.conf_dict['wf_application_path'])[2] or '' coord_application_path = job.conf_dict.get('oozie.coord.application.path') and Hdfs.urlsplit(job.conf_dict['oozie.coord.application.path'])[2] or '' properties = hue_coord and hue_coord.properties and dict([(param['name'], param['value']) for param in hue_coord.properties]) or None if request.method == 'POST': params_form = ParametersFormSet(request.POST) if params_form.is_valid(): mapping = dict([(param['name'], param['value']) for param in params_form.cleaned_data]) # Update workflow params in coordinator hue_coord.clear_workflow_params() properties = dict([(param['name'], param['value']) for param in hue_coord.properties]) # Deploy WF XML submission = Submission(user=request.user, job=hue_wf, fs=request.fs, jt=request.jt, properties=properties) submission._create_file(wf_application_path, hue_wf.XML_FILE_NAME, hue_wf.to_xml(mapping=properties), do_as=True) # Deploy Coordinator XML job.conf_dict.update(mapping) submission = Submission(user=request.user, job=hue_coord, fs=request.fs, jt=request.jt, properties=job.conf_dict, oozie_id=job.id) submission._create_file(coord_application_path, hue_coord.XML_FILE_NAME, hue_coord.to_xml(mapping=job.conf_dict), do_as=True) # Server picks up deployed Coordinator XML changes after running 'update' action submission.update_coord() request.info(_('Successfully updated Workflow definition')) return redirect(reverse('oozie:list_oozie_coordinator', kwargs={'job_id': job_id})) else: request.error(_('Invalid submission form: %s' % params_form.errors)) else: new_params = hue_wf and hue_wf.find_all_parameters() or [] new_params = dict([(param['name'], param['value']) for param in new_params]) # Set previous values if properties: new_params = dict([(key, properties[key]) if key in properties.keys() else (key, new_params[key]) for key, value in new_params.iteritems()]) initial_params = ParameterForm.get_initial_params(new_params) params_form = ParametersFormSet(initial=initial_params) popup = render('editor2/submit_job_popup.mako', request, { 'params_form': params_form, 'name': _('Job'), 'header': _('Sync Workflow definition?'), 'action': reverse('oozie:sync_coord_workflow', kwargs={'job_id': job_id}) }, force_template=True).content return JsonResponse(popup, safe=False)
def test_update_credentials_from_hive_action_when_jdbc_url_is_variable( self): class TestJob(object): XML_FILE_NAME = 'workflow.xml' def __init__(self): self.deployment_dir = '/tmp/test' self.nodes = [ Node({ 'id': '1', 'type': 'hive-document', 'properties': { 'jdbc_url': u"${wf:actionData('shell-31b5')['hiveserver']}", 'password': u'test' } }) ] user = User.objects.get(username='******') submission = Submission(user, job=TestJob(), fs=MockFs(logical_name='fsname'), jt=MockJt(logical_name='jtname')) finish = ( beeswax.conf.HIVE_SERVER_HOST.set_for_testing('hue-koh-chang'), beeswax.conf.HIVE_SERVER_PORT.set_for_testing(12345), ) try: creds = Credentials(credentials=TestCredentials.CREDENTIALS.copy()) hive_properties = { 'thrift_uri': 'thrift://*****:*****@test-realm.com', } submission.properties['credentials'] = creds.get_properties( hive_properties) submission._update_credentials_from_hive_action(creds) assert_equal( submission.properties['credentials'][ creds.hiveserver2_name]['properties'], [('hive2.jdbc.url', u'jdbc:hive2://hue-koh-chang:12345/default'), ('hive2.server.principal', u'hive/[email protected]') ]) finally: for f in finish: f()
def _submit_workflow(user, fs, jt, workflow, mapping): try: submission = Submission(user, workflow, fs, jt, mapping) job_id = submission.run() workflow.document.add_to_history(submission.user, {'properties': submission.properties, 'oozie_id': submission.oozie_id}) return job_id except RestException, ex: detail = ex._headers.get('oozie-error-message', ex) if 'Max retries exceeded with url' in str(detail): detail = '%s: %s' % (_('The Oozie server is not running'), detail) LOG.exception('Error submitting workflow: %s' % smart_str(detail)) raise PopupException(_("Error submitting workflow %s: %s") % (workflow, detail))
def _submit_coordinator(request, coordinator, mapping): try: wf_doc = Document2.objects.get(uuid=coordinator.data['properties']['workflow']) wf_dir = Submission(request.user, Workflow(document=wf_doc), request.fs, request.jt, mapping).deploy() properties = {'wf_application_path': request.fs.get_hdfs_path(wf_dir)} properties.update(mapping) submission = Submission(request.user, coordinator, request.fs, request.jt, properties=properties) job_id = submission.run() return job_id except RestException, ex: raise PopupException(_("Error submitting coordinator %s") % (coordinator,), detail=ex._headers.get('oozie-error-message', ex))
def _submit_coordinator(request, coordinator, mapping): try: wf_doc = Document2.objects.get_by_uuid(user=request.user, uuid=coordinator.data['properties']['workflow']) wf_dir = Submission(request.user, Workflow(document=wf_doc), request.fs, request.jt, mapping, local_tz=coordinator.data['properties']['timezone']).deploy() properties = {'wf_application_path': request.fs.get_hdfs_path(wf_dir)} properties.update(mapping) submission = Submission(request.user, coordinator, request.fs, request.jt, properties=properties) job_id = submission.run() return job_id except RestException, ex: LOG.exception('Error submitting coordinator') raise PopupException(_("Error submitting coordinator %s") % (coordinator,), detail=ex._headers.get('oozie-error-message', ex))
def test_update_properties(self): finish = [] finish.append(MR_CLUSTERS.set_for_testing({'default': {}})) finish.append(MR_CLUSTERS['default'].SUBMIT_TO.set_for_testing(True)) finish.append(YARN_CLUSTERS.set_for_testing({'default': {}})) finish.append(YARN_CLUSTERS['default'].SUBMIT_TO.set_for_testing(True)) try: properties = { 'user.name': 'hue', 'test.1': 'http://localhost/test?test1=test&test2=test', 'nameNode': 'hdfs://curacao:8020', 'jobTracker': 'jtaddress', 'security_enabled': False } final_properties = properties.copy() submission = Submission(None, properties=properties, oozie_id='test', fs=MockFs()) assert_equal(properties, submission.properties) submission._update_properties('jtaddress', 'deployment-directory') assert_equal(final_properties, submission.properties) cluster.clear_caches() fs = cluster.get_hdfs() jt = cluster.get_next_ha_mrcluster()[1] final_properties = properties.copy() final_properties.update({ 'jobTracker': 'jtaddress', 'nameNode': fs.fs_defaultfs }) submission = Submission(None, properties=properties, oozie_id='test', fs=fs, jt=jt) assert_equal(properties, submission.properties) submission._update_properties('jtaddress', 'deployment-directory') assert_equal(final_properties, submission.properties) finish.append(HDFS_CLUSTERS['default'].LOGICAL_NAME.set_for_testing('namenode')) finish.append(MR_CLUSTERS['default'].LOGICAL_NAME.set_for_testing('jobtracker')) cluster.clear_caches() fs = cluster.get_hdfs() jt = cluster.get_next_ha_mrcluster()[1] final_properties = properties.copy() final_properties.update({ 'jobTracker': 'jobtracker', 'nameNode': 'namenode' }) submission = Submission(None, properties=properties, oozie_id='test', fs=fs, jt=jt) assert_equal(properties, submission.properties) submission._update_properties('jtaddress', 'deployment-directory') assert_equal(final_properties, submission.properties) finally: cluster.clear_caches() for reset in finish: reset()
def test_get_external_parameters(self): xml = """ <workflow-app name="Pig" xmlns="uri:oozie:workflow:0.4"> <start to="Pig"/> <action name="Pig"> <pig> <job-tracker>${jobTracker}</job-tracker> <name-node>${nameNode}</name-node> <prepare> <delete path="${output}"/> </prepare> <script>aggregate.pig</script> <argument>-param</argument> <argument>INPUT=${input}</argument> <argument>-param</argument> <argument>OUTPUT=${output}</argument> <configuration> <property> <name>mapred.input.format.class</name> <value>org.apache.hadoop.examples.SleepJob$SleepInputFormat</value> </property> </configuration> </pig> <ok to="end"/> <error to="kill"/> </action> <kill name="kill"> <message>Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message> </kill> <end name="end"/> </workflow-app> """ properties = """ # # Licensed to the Hue # nameNode=hdfs://localhost:8020 jobTracker=localhost:8021 queueName=default examplesRoot=examples oozie.use.system.libpath=true oozie.wf.application.path=${nameNode}/user/${user.name}/${examplesRoot}/apps/pig """ parameters = Submission(self.user)._get_external_parameters(xml, properties) assert_equal({'oozie.use.system.libpath': 'true', 'input': '', 'jobTracker': 'localhost:8021', 'oozie.wf.application.path': '${nameNode}/user/${user.name}/${examplesRoot}/apps/pig', 'examplesRoot': 'examples', 'output': '', 'nameNode': 'hdfs://localhost:8020', 'queueName': 'default' }, parameters)
def sync_coord_workflow(request, job_id): ParametersFormSet = formset_factory(ParameterForm, extra=0) job = check_job_access_permission(request, job_id) check_job_edition_permission(job, request.user) hue_coord = get_history().get_coordinator_from_config(job.conf_dict) hue_wf = (hue_coord and hue_coord.workflow) or get_history().get_workflow_from_config(job.conf_dict) wf_application_path = job.conf_dict.get('wf_application_path') and Hdfs.urlsplit(job.conf_dict['wf_application_path'])[2] or '' if request.method == 'POST': params_form = ParametersFormSet(request.POST) if params_form.is_valid(): mapping = dict([(param['name'], param['value']) for param in params_form.cleaned_data]) submission = Submission(user=request.user, job=hue_wf, fs=request.fs, jt=request.jt, properties=mapping) submission._sync_definition(wf_application_path, mapping) request.info(_('Successfully updated Workflow definition')) return redirect(reverse('oozie:list_oozie_coordinator', kwargs={'job_id': job_id})) else: request.error(_('Invalid submission form: %s' % params_form.errors)) else: parameters = hue_wf and hue_wf.find_all_parameters() or [] params_dict = dict([(param['name'], param['value']) for param in parameters]) submission = Submission(user=request.user, job=hue_wf, fs=request.fs, jt=request.jt, properties=None) prev_properties = hue_wf and hue_wf.deployment_dir and \ submission.get_external_parameters(request.fs.join(wf_application_path, hue_wf.XML_FILE_NAME)) or {} for key, value in params_dict.iteritems(): params_dict[key] = prev_properties[key] if key in prev_properties.keys() else params_dict[key] initial_params = ParameterForm.get_initial_params(params_dict) params_form = ParametersFormSet(initial=initial_params) popup = render('editor2/submit_job_popup.mako', request, { 'params_form': params_form, 'name': _('Job'), 'header': _('Sync Workflow definition?'), 'action': reverse('oozie:sync_coord_workflow', kwargs={'job_id': job_id}) }, force_template=True).content return JsonResponse(popup, safe=False)
def submit_external_job(request, application_path): ParametersFormSet = formset_factory(ParameterForm, extra=0) if request.method == "POST": params_form = ParametersFormSet(request.POST) if params_form.is_valid(): mapping = dict([(param["name"], param["value"]) for param in params_form.cleaned_data]) mapping["dryrun"] = request.POST.get("dryrun_checkbox") == "on" application_name = os.path.basename(application_path) application_class = ( Bundle if application_name == "bundle.xml" else Coordinator if application_name == "coordinator.xml" else get_workflow() ) mapping[application_class.get_application_path_key()] = application_path try: submission = Submission(request.user, fs=request.fs, jt=request.jt, properties=mapping) job_id = submission.run(application_path) except RestException, ex: detail = ex._headers.get("oozie-error-message", ex) if "Max retries exceeded with url" in str(detail): detail = "%s: %s" % (_("The Oozie server is not running"), detail) LOG.exception(smart_str(detail)) raise PopupException(_("Error submitting job %s") % (application_path,), detail=detail) request.info(_("Oozie job submitted")) view = ( "list_oozie_bundle" if application_name == "bundle.xml" else "list_oozie_coordinator" if application_name == "coordinator.xml" else "list_oozie_workflow" ) return redirect(reverse("oozie:%s" % view, kwargs={"job_id": job_id})) else: request.error(_("Invalid submission form: %s" % params_form.errors))
def _schedule_oozie_job(self, workspace_path, collection_name, input_path): oozie = get_oozie(self.username) properties = { "dryrun": "False", "zkHost": zkensemble(), # these libs can be installed from here: # https://drive.google.com/a/cloudera.com/folderview?id=0B1gZoK8Ae1xXc0sxSkpENWJ3WUU&usp=sharing "oozie.libpath": CONFIG_INDEXER_LIBS_PATH.get(), "security_enabled": "False", "collectionName": collection_name, "filePath": input_path, "outputDir": "/user/%s/indexer" % self.username, "workspacePath": workspace_path, 'oozie.wf.application.path': "${nameNode}%s" % workspace_path, 'user.name': self.username } submission = Submission(self.username, fs=self.fs, properties=properties) job_id = submission.run(workspace_path) return job_id
def _submit_coordinator(request, coordinator, mapping): try: wf = coordinator.workflow if IS_MULTICLUSTER_ONLY.get() and has_multi_cluster(): mapping['auto-cluster'] = { u'additionalClusterResourceTags': [], u'automaticTerminationCondition': u'EMPTY_JOB_QUEUE', #'u'NONE', u'cdhVersion': u'CDH514', u'clouderaManagerPassword': u'guest', u'clouderaManagerUsername': u'guest', u'clusterName': u'analytics4', # Add time variable u'computeWorkersConfiguration': { u'bidUSDPerHr': 0, u'groupSize': 0, u'useSpot': False }, u'environmentName': u'crn:altus:environments:us-west-1:12a0079b-1591-4ca0-b721-a446bda74e67:environment:analytics/236ebdda-18bd-428a-9d2b-cd6973d42946', u'instanceBootstrapScript': u'', u'instanceType': u'm4.xlarge', u'jobSubmissionGroupName': u'', u'jobs': [ { u'failureAction': u'INTERRUPT_JOB_QUEUE', u'name': u'a87e20d7-5c0d-49ee-ab37-625fa2803d51', u'sparkJob': { u'applicationArguments': ['5'], u'jars': [ u's3a://datawarehouse-customer360/ETL/spark-examples.jar' ], u'mainClass': u'org.apache.spark.examples.SparkPi' } }, # { # u'failureAction': u'INTERRUPT_JOB_QUEUE', # u'name': u'a87e20d7-5c0d-49ee-ab37-625fa2803d51', # u'sparkJob': { # u'applicationArguments': ['10'], # u'jars': [u's3a://datawarehouse-customer360/ETL/spark-examples.jar'], # u'mainClass': u'org.apache.spark.examples.SparkPi' # } # }, # { # u'failureAction': u'INTERRUPT_JOB_QUEUE', # u'name': u'a87e20d7-5c0d-49ee-ab37-625fa2803d51', # u'sparkJob': { # u'applicationArguments': [u'filesystems3.conf'], # u'jars': [u's3a://datawarehouse-customer360/ETL/envelope-0.6.0-SNAPSHOT-c6.jar'], # u'mainClass': u'com.cloudera.labs.envelope.EnvelopeMain', # u'sparkArguments': u'--archives=s3a://datawarehouse-customer360/ETL/filesystems3.conf' # } # } ], u'namespaceName': u'crn:altus:sdx:us-west-1:12a0079b-1591-4ca0-b721-a446bda74e67:namespace:analytics/7ea35fe5-dbc9-4b17-92b1-97a1ab32e410', u'publicKey': DEFAULT_PUBLIC_KEY.get(), u'serviceType': u'SPARK', u'workersConfiguration': {}, u'workersGroupSize': u'3' } wf_dir = Submission( request.user, wf, request.fs, request.jt, mapping, local_tz=coordinator.data['properties']['timezone']).deploy() properties = {'wf_application_path': request.fs.get_hdfs_path(wf_dir)} properties.update(mapping) submission = Submission(request.user, coordinator, request.fs, request.jt, properties=properties) job_id = submission.run() return job_id except RestException, ex: LOG.exception('Error submitting coordinator') raise PopupException(_("Error submitting coordinator %s") % (coordinator, ), detail=ex._headers.get('oozie-error-message', ex), error_code=200)
def test_copy_files(): cluster = pseudo_hdfs4.shared_cluster() try: c = make_logged_in_client() user = User.objects.get(username='******') prefix = '/tmp/test_copy_files' if cluster.fs.exists(prefix): cluster.fs.rmtree(prefix) # Jars in various locations deployment_dir = '%s/workspace' % prefix external_deployment_dir = '%s/deployment' % prefix jar_1 = '%s/udf1.jar' % prefix jar_2 = '%s/lib/udf2.jar' % prefix jar_3 = '%s/udf3.jar' % deployment_dir jar_4 = '%s/lib/udf4.jar' % deployment_dir # Never move cluster.fs.mkdir(prefix) cluster.fs.create(jar_1) cluster.fs.create(jar_2) cluster.fs.create(jar_3) cluster.fs.create(jar_4) class MockNode(): def __init__(self, jar_path): self.jar_path = jar_path class MockJob(): XML_FILE_NAME = 'workflow.xml' def __init__(self): self.node_list = [ MockNode(jar_1), MockNode(jar_2), MockNode(jar_3), MockNode(jar_4), ] submission = Submission(user, job=MockJob(), fs=cluster.fs, jt=cluster.jt) submission._copy_files(deployment_dir, "<xml>My XML</xml>", {'prop1': 'val1'}) submission._copy_files(external_deployment_dir, "<xml>My XML</xml>", {'prop1': 'val1'}) assert_true(cluster.fs.exists(deployment_dir + '/workflow.xml'), deployment_dir) assert_true(cluster.fs.exists(deployment_dir + '/job.properties'), deployment_dir) # All sources still there assert_true(cluster.fs.exists(jar_1)) assert_true(cluster.fs.exists(jar_2)) assert_true(cluster.fs.exists(jar_3)) assert_true(cluster.fs.exists(jar_4)) deployment_dir = deployment_dir + '/lib' external_deployment_dir = external_deployment_dir + '/lib' list_dir_workspace = cluster.fs.listdir(deployment_dir) list_dir_deployement = cluster.fs.listdir(external_deployment_dir) # All destinations there assert_true(cluster.fs.exists(deployment_dir + '/udf1.jar'), list_dir_workspace) assert_true(cluster.fs.exists(deployment_dir + '/udf2.jar'), list_dir_workspace) assert_true(cluster.fs.exists(deployment_dir + '/udf3.jar'), list_dir_workspace) assert_true(cluster.fs.exists(deployment_dir + '/udf4.jar'), list_dir_workspace) assert_true(cluster.fs.exists(external_deployment_dir + '/udf1.jar'), list_dir_deployement) assert_true(cluster.fs.exists(external_deployment_dir + '/udf2.jar'), list_dir_deployement) assert_true(cluster.fs.exists(external_deployment_dir + '/udf3.jar'), list_dir_deployement) assert_true(cluster.fs.exists(external_deployment_dir + '/udf4.jar'), list_dir_deployement) stats_udf1 = cluster.fs.stats(deployment_dir + '/udf1.jar') stats_udf2 = cluster.fs.stats(deployment_dir + '/udf2.jar') stats_udf3 = cluster.fs.stats(deployment_dir + '/udf3.jar') stats_udf4 = cluster.fs.stats(deployment_dir + '/udf4.jar') submission._copy_files('%s/workspace' % prefix, "<xml>My XML</xml>", {'prop1': 'val1'}) assert_not_equal(stats_udf1['fileId'], cluster.fs.stats(deployment_dir + '/udf1.jar')['fileId']) assert_not_equal(stats_udf2['fileId'], cluster.fs.stats(deployment_dir + '/udf2.jar')['fileId']) assert_not_equal(stats_udf3['fileId'], cluster.fs.stats(deployment_dir + '/udf3.jar')['fileId']) assert_equal(stats_udf4['fileId'], cluster.fs.stats(deployment_dir + '/udf4.jar')['fileId']) finally: try: cluster.fs.rmtree(prefix) except: pass
def sync_coord_workflow(request, job_id): ParametersFormSet = formset_factory(ParameterForm, extra=0) job = check_job_access_permission(request, job_id) check_job_edition_permission(job, request.user) hue_coord = get_history().get_coordinator_from_config(job.conf_dict) hue_wf = (hue_coord and hue_coord.workflow) or get_history().get_workflow_from_config(job.conf_dict) wf_application_path = ( job.conf_dict.get("wf_application_path") and Hdfs.urlsplit(job.conf_dict["wf_application_path"])[2] or "" ) coord_application_path = ( job.conf_dict.get("oozie.coord.application.path") and Hdfs.urlsplit(job.conf_dict["oozie.coord.application.path"])[2] or "" ) properties = ( hue_coord and hue_coord.properties and dict([(param["name"], param["value"]) for param in hue_coord.properties]) or None ) if request.method == "POST": params_form = ParametersFormSet(request.POST) if params_form.is_valid(): mapping = dict([(param["name"], param["value"]) for param in params_form.cleaned_data]) # Update workflow params in coordinator hue_coord.clear_workflow_params() properties = dict([(param["name"], param["value"]) for param in hue_coord.properties]) # Deploy WF XML submission = Submission(user=request.user, job=hue_wf, fs=request.fs, jt=request.jt, properties=properties) submission._create_file( wf_application_path, hue_wf.XML_FILE_NAME, hue_wf.to_xml(mapping=properties), do_as=True ) # Deploy Coordinator XML job.conf_dict.update(mapping) submission = Submission( user=request.user, job=hue_coord, fs=request.fs, jt=request.jt, properties=job.conf_dict, oozie_id=job.id, ) submission._create_file( coord_application_path, hue_coord.XML_FILE_NAME, hue_coord.to_xml(mapping=job.conf_dict), do_as=True ) # Server picks up deployed Coordinator XML changes after running 'update' action submission.update_coord() request.info(_("Successfully updated Workflow definition")) return redirect(reverse("oozie:list_oozie_coordinator", kwargs={"job_id": job_id})) else: request.error(_("Invalid submission form: %s" % params_form.errors)) else: new_params = hue_wf and hue_wf.find_all_parameters() or [] new_params = dict([(param["name"], param["value"]) for param in new_params]) # Set previous values if properties: new_params = dict( [ (key, properties[key]) if key in properties.keys() else (key, new_params[key]) for key, value in new_params.iteritems() ] ) initial_params = ParameterForm.get_initial_params(new_params) params_form = ParametersFormSet(initial=initial_params) popup = render( "editor2/submit_job_popup.mako", request, { "params_form": params_form, "name": _("Job"), "header": _("Sync Workflow definition?"), "action": reverse("oozie:sync_coord_workflow", kwargs={"job_id": job_id}), }, force_template=True, ).content return JsonResponse(popup, safe=False)
def _submit_coordinator(request, coordinator, mapping): try: wf = coordinator.workflow if IS_MULTICLUSTER_ONLY.get() and has_multi_cluster(): mapping['auto-cluster'] = { u'additionalClusterResourceTags': [], u'automaticTerminationCondition': u'EMPTY_JOB_QUEUE', #'u'NONE', u'cdhVersion': u'CDH514', u'clouderaManagerPassword': u'guest', u'clouderaManagerUsername': u'guest', u'clusterName': u'analytics4', # Add time variable u'computeWorkersConfiguration': { u'bidUSDPerHr': 0, u'groupSize': 0, u'useSpot': False }, u'environmentName': u'crn:altus:environments:us-west-1:12a0079b-1591-4ca0-b721-a446bda74e67:environment:analytics/236ebdda-18bd-428a-9d2b-cd6973d42946', u'instanceBootstrapScript': u'', u'instanceType': u'm4.xlarge', u'jobSubmissionGroupName': u'', u'jobs': [{ u'failureAction': u'INTERRUPT_JOB_QUEUE', u'name': u'a87e20d7-5c0d-49ee-ab37-625fa2803d51', u'sparkJob': { u'applicationArguments': ['5'], u'jars': [u's3a://datawarehouse-customer360/ETL/spark-examples.jar'], u'mainClass': u'org.apache.spark.examples.SparkPi' } }, # { # u'failureAction': u'INTERRUPT_JOB_QUEUE', # u'name': u'a87e20d7-5c0d-49ee-ab37-625fa2803d51', # u'sparkJob': { # u'applicationArguments': ['10'], # u'jars': [u's3a://datawarehouse-customer360/ETL/spark-examples.jar'], # u'mainClass': u'org.apache.spark.examples.SparkPi' # } # }, # { # u'failureAction': u'INTERRUPT_JOB_QUEUE', # u'name': u'a87e20d7-5c0d-49ee-ab37-625fa2803d51', # u'sparkJob': { # u'applicationArguments': [u'filesystems3.conf'], # u'jars': [u's3a://datawarehouse-customer360/ETL/envelope-0.6.0-SNAPSHOT-c6.jar'], # u'mainClass': u'com.cloudera.labs.envelope.EnvelopeMain', # u'sparkArguments': u'--archives=s3a://datawarehouse-customer360/ETL/filesystems3.conf' # } # } ], u'namespaceName': u'crn:altus:sdx:us-west-1:12a0079b-1591-4ca0-b721-a446bda74e67:namespace:analytics/7ea35fe5-dbc9-4b17-92b1-97a1ab32e410', u'publicKey': DEFAULT_PUBLIC_KEY.get(), u'serviceType': u'SPARK', u'workersConfiguration': {}, u'workersGroupSize': u'3' } wf_dir = Submission(request.user, wf, request.fs, request.jt, mapping, local_tz=coordinator.data['properties']['timezone']).deploy() properties = {'wf_application_path': request.fs.get_hdfs_path(wf_dir)} properties.update(mapping) submission = Submission(request.user, coordinator, request.fs, request.jt, properties=properties) job_id = submission.run() return job_id except RestException, ex: LOG.exception('Error submitting coordinator') raise PopupException(_("Error submitting coordinator %s") % (coordinator,), detail=ex._headers.get('oozie-error-message', ex), error_code=200)
def test_copy_files(): cluster = pseudo_hdfs4.shared_cluster() try: c = make_logged_in_client() user = User.objects.get(username='******') ensure_home_directory(cluster.fs, user) prefix = '/tmp/test_copy_files' if cluster.fs.exists(prefix): cluster.fs.rmtree(prefix) # Jars in various locations deployment_dir = '%s/workspace' % prefix external_deployment_dir = '%s/deployment' % prefix jar_1 = '%s/udf1.jar' % prefix jar_2 = '%s/lib/udf2.jar' % prefix jar_3 = '%s/udf3.jar' % deployment_dir jar_4 = '%s/lib/udf4.jar' % deployment_dir # Doesn't move jar_5 = 'udf5.jar' jar_6 = 'lib/udf6.jar' # Doesn't move cluster.fs.mkdir(prefix) cluster.fs.create(jar_1) cluster.fs.create(jar_2) cluster.fs.create(jar_3) cluster.fs.create(jar_4) cluster.fs.create(deployment_dir + '/' + jar_5) cluster.fs.create(deployment_dir + '/' + jar_6) class MockJob(object): XML_FILE_NAME = 'workflow.xml' def __init__(self): self.deployment_dir = deployment_dir self.nodes = [ Node({ 'id': '1', 'type': 'mapreduce', 'properties': { 'jar_path': jar_1 } }), Node({ 'id': '2', 'type': 'mapreduce', 'properties': { 'jar_path': jar_2 } }), Node({ 'id': '3', 'type': 'java', 'properties': { 'jar_path': jar_3 } }), Node({ 'id': '4', 'type': 'java', 'properties': { 'jar_path': jar_4 } }), # Workspace relative paths Node({ 'id': '5', 'type': 'java', 'properties': { 'jar_path': jar_5 } }), Node({ 'id': '6', 'type': 'java', 'properties': { 'jar_path': jar_6 } }) ] submission = Submission(user, job=MockJob(), fs=cluster.fs, jt=cluster.jt) submission._copy_files(deployment_dir, "<xml>My XML</xml>", {'prop1': 'val1'}) submission._copy_files(external_deployment_dir, "<xml>My XML</xml>", {'prop1': 'val1'}) assert_true(cluster.fs.exists(deployment_dir + '/workflow.xml'), deployment_dir) assert_true(cluster.fs.exists(deployment_dir + '/job.properties'), deployment_dir) # All sources still there assert_true(cluster.fs.exists(jar_1)) assert_true(cluster.fs.exists(jar_2)) assert_true(cluster.fs.exists(jar_3)) assert_true(cluster.fs.exists(jar_4)) assert_true(cluster.fs.exists(deployment_dir + '/' + jar_5)) assert_true(cluster.fs.exists(deployment_dir + '/' + jar_6)) # Lib deployment_dir = deployment_dir + '/lib' external_deployment_dir = external_deployment_dir + '/lib' if USE_LIBPATH_FOR_JARS.get(): assert_true(jar_1 in submission.properties['oozie.libpath']) assert_true(jar_2 in submission.properties['oozie.libpath']) assert_true(jar_3 in submission.properties['oozie.libpath']) assert_true(jar_4 in submission.properties['oozie.libpath']) print(deployment_dir + '/' + jar_5) assert_true((deployment_dir + '/' + jar_5) in submission.properties['oozie.libpath'], submission.properties['oozie.libpath']) assert_true((deployment_dir + '/' + jar_6) in submission.properties['oozie.libpath'], submission.properties['oozie.libpath']) else: list_dir_workspace = cluster.fs.listdir(deployment_dir) list_dir_deployement = cluster.fs.listdir(external_deployment_dir) # All destinations there assert_true(cluster.fs.exists(deployment_dir + '/udf1.jar'), list_dir_workspace) assert_true(cluster.fs.exists(deployment_dir + '/udf2.jar'), list_dir_workspace) assert_true(cluster.fs.exists(deployment_dir + '/udf3.jar'), list_dir_workspace) assert_true(cluster.fs.exists(deployment_dir + '/udf4.jar'), list_dir_workspace) assert_true(cluster.fs.exists(deployment_dir + '/udf5.jar'), list_dir_workspace) assert_true(cluster.fs.exists(deployment_dir + '/udf6.jar'), list_dir_workspace) assert_true( cluster.fs.exists(external_deployment_dir + '/udf1.jar'), list_dir_deployement) assert_true( cluster.fs.exists(external_deployment_dir + '/udf2.jar'), list_dir_deployement) assert_true( cluster.fs.exists(external_deployment_dir + '/udf3.jar'), list_dir_deployement) assert_true( cluster.fs.exists(external_deployment_dir + '/udf4.jar'), list_dir_deployement) assert_true( cluster.fs.exists(external_deployment_dir + '/udf5.jar'), list_dir_deployement) assert_true( cluster.fs.exists(external_deployment_dir + '/udf6.jar'), list_dir_deployement) stats_udf1 = cluster.fs.stats(deployment_dir + '/udf1.jar') stats_udf2 = cluster.fs.stats(deployment_dir + '/udf2.jar') stats_udf3 = cluster.fs.stats(deployment_dir + '/udf3.jar') stats_udf4 = cluster.fs.stats(deployment_dir + '/udf4.jar') stats_udf5 = cluster.fs.stats(deployment_dir + '/udf5.jar') stats_udf6 = cluster.fs.stats(deployment_dir + '/udf6.jar') submission._copy_files('%s/workspace' % prefix, "<xml>My XML</xml>", {'prop1': 'val1'}) assert_not_equal( stats_udf1['fileId'], cluster.fs.stats(deployment_dir + '/udf1.jar')['fileId']) assert_not_equal( stats_udf2['fileId'], cluster.fs.stats(deployment_dir + '/udf2.jar')['fileId']) assert_not_equal( stats_udf3['fileId'], cluster.fs.stats(deployment_dir + '/udf3.jar')['fileId']) assert_equal( stats_udf4['fileId'], cluster.fs.stats(deployment_dir + '/udf4.jar')['fileId']) assert_not_equal( stats_udf5['fileId'], cluster.fs.stats(deployment_dir + '/udf5.jar')['fileId']) assert_equal( stats_udf6['fileId'], cluster.fs.stats(deployment_dir + '/udf6.jar')['fileId']) # Test _create_file() submission._create_file(deployment_dir, 'test.txt', data='Test data') assert_true(cluster.fs.exists(deployment_dir + '/test.txt'), list_dir_workspace) finally: try: cluster.fs.rmtree(prefix) except: LOG.exception('failed to remove %s' % prefix)
if 'Max retries exceeded with url' in str(detail): detail = '%s: %s' % (_('The Oozie server is not running'), detail) LOG.exception(smart_str(detail)) raise PopupException(_("Error submitting job %s") % (application_path,), detail=detail) jsonify = request.POST.get('format') == 'json' if jsonify: return JsonResponse({'status': 0, 'job_id': job_id, 'type': 'external_workflow'}, safe=False) else: request.info(_('Oozie job submitted')) view = 'list_oozie_bundle' if application_name == 'bundle.xml' else 'list_oozie_coordinator' if application_name == 'coordinator.xml' else 'list_oozie_workflow' return redirect(reverse('oozie:%s' % view, kwargs={'job_id': job_id})) else: request.error(_('Invalid submission form: %s' % params_form.errors)) else: parameters = Submission(request.user, fs=request.fs, jt=request.jt).get_external_parameters(application_path) initial_params = ParameterForm.get_initial_params(parameters) params_form = ParametersFormSet(initial=initial_params) popup = render('editor/submit_job_popup.mako', request, { 'params_form': params_form, 'name': _('Job'), 'action': reverse('oozie:submit_external_job', kwargs={'application_path': application_path}), 'show_dryrun': os.path.basename(application_path) != 'bundle.xml', 'return_json': request.GET.get('format') == 'json' }, force_template=True).content return JsonResponse(popup, safe=False) def massaged_workflow_actions_for_json(workflow_actions, oozie_coordinator, oozie_bundle): actions = []
def test_update_properties(self): finish = [] finish.append(MR_CLUSTERS.set_for_testing({'default': {}})) finish.append(MR_CLUSTERS['default'].SUBMIT_TO.set_for_testing(True)) finish.append(YARN_CLUSTERS.set_for_testing({'default': {}})) finish.append(YARN_CLUSTERS['default'].SUBMIT_TO.set_for_testing(True)) try: properties = { 'user.name': 'hue', 'test.1': 'http://localhost/test?test1=test&test2=test', 'nameNode': 'hdfs://curacao:8020', 'jobTracker': 'jtaddress', 'security_enabled': False } final_properties = properties.copy() submission = Submission(None, properties=properties, oozie_id='test', fs=MockFs()) assert_equal(properties, submission.properties) submission._update_properties('jtaddress', 'deployment-directory') assert_equal(final_properties, submission.properties) clear_sys_caches() fs = cluster.get_hdfs() final_properties = properties.copy() final_properties.update({ 'jobTracker': 'jtaddress', 'nameNode': fs.fs_defaultfs }) submission = Submission(None, properties=properties, oozie_id='test', fs=fs, jt=None) assert_equal(properties, submission.properties) submission._update_properties('jtaddress', 'deployment-directory') assert_equal(final_properties, submission.properties) finish.append( HDFS_CLUSTERS['default'].LOGICAL_NAME.set_for_testing( 'namenode')) finish.append(MR_CLUSTERS['default'].LOGICAL_NAME.set_for_testing( 'jobtracker')) clear_sys_caches() fs = cluster.get_hdfs() final_properties = properties.copy() final_properties.update({ 'jobTracker': 'jobtracker', 'nameNode': 'namenode' }) submission = Submission(None, properties=properties, oozie_id='test', fs=fs, jt=None) assert_equal(properties, submission.properties) finally: clear_sys_caches() for reset in finish: reset()
def test_copy_files(): cluster = pseudo_hdfs4.shared_cluster() try: c = make_logged_in_client() user = User.objects.get(username='******') prefix = '/tmp/test_copy_files' if cluster.fs.exists(prefix): cluster.fs.rmtree(prefix) # Jars in various locations deployment_dir = '%s/workspace' % prefix external_deployment_dir = '%s/deployment' % prefix jar_1 = '%s/udf1.jar' % prefix jar_2 = '%s/lib/udf2.jar' % prefix jar_3 = '%s/udf3.jar' % deployment_dir jar_4 = '%s/lib/udf4.jar' % deployment_dir # Doesn't move jar_5 = 'udf5.jar' jar_6 = 'lib/udf6.jar' # Doesn't move cluster.fs.mkdir(prefix) cluster.fs.create(jar_1) cluster.fs.create(jar_2) cluster.fs.create(jar_3) cluster.fs.create(jar_4) cluster.fs.create(deployment_dir + '/' + jar_5) cluster.fs.create(deployment_dir + '/' + jar_6) class MockJob(): XML_FILE_NAME = 'workflow.xml' def __init__(self): self.deployment_dir = deployment_dir self.nodes = [ Node({'id': '1', 'type': 'mapreduce', 'properties': {'jar_path': jar_1}}), Node({'id': '2', 'type': 'mapreduce', 'properties': {'jar_path': jar_2}}), Node({'id': '3', 'type': 'java', 'properties': {'jar_path': jar_3}}), Node({'id': '4', 'type': 'java', 'properties': {'jar_path': jar_4}}), # Workspace relative paths Node({'id': '5', 'type': 'java', 'properties': {'jar_path': jar_5}}), Node({'id': '6', 'type': 'java', 'properties': {'jar_path': jar_6}}) ] submission = Submission(user, job=MockJob(), fs=cluster.fs, jt=cluster.jt) submission._copy_files(deployment_dir, "<xml>My XML</xml>", {'prop1': 'val1'}) submission._copy_files(external_deployment_dir, "<xml>My XML</xml>", {'prop1': 'val1'}) assert_true(cluster.fs.exists(deployment_dir + '/workflow.xml'), deployment_dir) assert_true(cluster.fs.exists(deployment_dir + '/job.properties'), deployment_dir) # All sources still there assert_true(cluster.fs.exists(jar_1)) assert_true(cluster.fs.exists(jar_2)) assert_true(cluster.fs.exists(jar_3)) assert_true(cluster.fs.exists(jar_4)) assert_true(cluster.fs.exists(deployment_dir + '/' + jar_5)) assert_true(cluster.fs.exists(deployment_dir + '/' + jar_6)) # Lib deployment_dir = deployment_dir + '/lib' external_deployment_dir = external_deployment_dir + '/lib' list_dir_workspace = cluster.fs.listdir(deployment_dir) list_dir_deployement = cluster.fs.listdir(external_deployment_dir) # All destinations there assert_true(cluster.fs.exists(deployment_dir + '/udf1.jar'), list_dir_workspace) assert_true(cluster.fs.exists(deployment_dir + '/udf2.jar'), list_dir_workspace) assert_true(cluster.fs.exists(deployment_dir + '/udf3.jar'), list_dir_workspace) assert_true(cluster.fs.exists(deployment_dir + '/udf4.jar'), list_dir_workspace) assert_true(cluster.fs.exists(deployment_dir + '/udf5.jar'), list_dir_workspace) assert_true(cluster.fs.exists(deployment_dir + '/udf6.jar'), list_dir_workspace) assert_true(cluster.fs.exists(external_deployment_dir + '/udf1.jar'), list_dir_deployement) assert_true(cluster.fs.exists(external_deployment_dir + '/udf2.jar'), list_dir_deployement) assert_true(cluster.fs.exists(external_deployment_dir + '/udf3.jar'), list_dir_deployement) assert_true(cluster.fs.exists(external_deployment_dir + '/udf4.jar'), list_dir_deployement) assert_true(cluster.fs.exists(external_deployment_dir + '/udf5.jar'), list_dir_deployement) assert_true(cluster.fs.exists(external_deployment_dir + '/udf6.jar'), list_dir_deployement) stats_udf1 = cluster.fs.stats(deployment_dir + '/udf1.jar') stats_udf2 = cluster.fs.stats(deployment_dir + '/udf2.jar') stats_udf3 = cluster.fs.stats(deployment_dir + '/udf3.jar') stats_udf4 = cluster.fs.stats(deployment_dir + '/udf4.jar') stats_udf5 = cluster.fs.stats(deployment_dir + '/udf5.jar') stats_udf6 = cluster.fs.stats(deployment_dir + '/udf6.jar') submission._copy_files('%s/workspace' % prefix, "<xml>My XML</xml>", {'prop1': 'val1'}) assert_not_equal(stats_udf1['fileId'], cluster.fs.stats(deployment_dir + '/udf1.jar')['fileId']) assert_not_equal(stats_udf2['fileId'], cluster.fs.stats(deployment_dir + '/udf2.jar')['fileId']) assert_not_equal(stats_udf3['fileId'], cluster.fs.stats(deployment_dir + '/udf3.jar')['fileId']) assert_equal(stats_udf4['fileId'], cluster.fs.stats(deployment_dir + '/udf4.jar')['fileId']) assert_not_equal(stats_udf5['fileId'], cluster.fs.stats(deployment_dir + '/udf5.jar')['fileId']) assert_equal(stats_udf6['fileId'], cluster.fs.stats(deployment_dir + '/udf6.jar')['fileId']) # Test _create_file() submission._create_file(deployment_dir, 'test.txt', data='Test data') assert_true(cluster.fs.exists(deployment_dir + '/test.txt'), list_dir_workspace) finally: try: cluster.fs.rmtree(prefix) except: LOG.exception('failed to remove %s' % prefix)