def setUp(self): super(TestWithHadoop, self).setUp() # FIXME (HUE-2562): The tests unfortunately require superuser at the # moment, but should be rewritten to not need it. self.c = make_logged_in_client(is_superuser=True) grant_access("test", "test", "pig") self.user = User.objects.get(username='******') self.c.post(reverse('pig:install_examples')) self.cluster = pseudo_hdfs4.shared_cluster() self.api = OozieApi(self.cluster.fs, self.cluster.jt, self.user)
def test_create_workflow(self): cluster = pseudo_hdfs4.shared_cluster() api = OozieApi(cluster.fs, cluster.jt, self.user) xattrs = { "parameters": [ {"name": "output", "value": "/tmp"}, {"name": "-param", "value": "input=/data"}, # Alternative way for params {"name": "-optimizer_off", "value": "SplitFilter"}, {"name": "-v", "value": ""}, ], "resources": [{"type": "file", "value": "/tmp/file"}, {"type": "archive", "value": "/tmp/file.zip"}], "hadoopProperties": [ {"name": "mapred.map.tasks.speculative.execution", "value": "false"}, {"name": "mapred.job.queue", "value": "fast"}, ], } pig_script = create_script(self.user, xattrs) params = json.dumps([{"name": "output", "value": "/tmp2"}]) workflow = api._create_workflow(pig_script, params) pig_action = workflow.start.get_child("to").get_full_node() assert_equal( [ {u"type": u"argument", u"value": u"-param"}, {u"type": u"argument", u"value": u"output=/tmp2"}, {u"type": u"argument", u"value": u"-param"}, {u"type": u"argument", u"value": u"input=/data"}, {u"type": u"argument", u"value": u"-optimizer_off"}, {u"type": u"argument", u"value": u"SplitFilter"}, {u"type": u"argument", u"value": u"-v"}, ], pig_action.get_params(), ) assert_equal( [ {u"name": u"mapred.map.tasks.speculative.execution", u"value": u"false"}, {u"name": u"mapred.job.queue", u"value": u"fast"}, ], pig_action.get_properties(), ) assert_equal(["/tmp/file"], pig_action.get_files()) assert_equal([{u"dummy": u"", u"name": u"/tmp/file.zip"}], pig_action.get_archives())
def test_create_workflow(self): cluster = pseudo_hdfs4.shared_cluster() api = OozieApi(cluster.fs, self.user) xattrs = { 'parameters': [ {'name': 'output', 'value': '/tmp'}, {'name': '-param', 'value': 'input=/data'}, # Alternative way for params {'name': '-optimizer_off', 'value': 'SplitFilter'}, {'name': '-v', 'value': ''}, ], 'resources': [ {'type': 'file', 'value': '/tmp/file'}, {'type': 'archive', 'value': '/tmp/file.zip'}, ], 'hadoopProperties': [ {'name': 'mapred.map.tasks.speculative.execution', 'value': 'false'}, {'name': 'mapred.job.queue', 'value': 'fast'}, ] } pig_script = create_script(self.user, xattrs) params = json.dumps([ {'name': 'output', 'value': '/tmp2'}, ]) workflow = api._create_workflow(pig_script, params) pig_action = workflow.start.get_child('to').get_full_node() assert_equal([ {u'type': u'argument', u'value': u'-param'}, {u'type': u'argument', u'value': u'output=/tmp2'}, {u'type': u'argument', u'value': u'-param'}, {u'type': u'argument', u'value': u'input=/data'}, {u'type': u'argument', u'value': u'-optimizer_off'}, {u'type': u'argument', u'value': u'SplitFilter'}, {u'type': u'argument', u'value': u'-v'}, ], pig_action.get_params()) assert_equal([ {u'name': u'mapred.map.tasks.speculative.execution', u'value': u'false'}, {u'name': u'mapred.job.queue', u'value': u'fast'}, ], pig_action.get_properties()) assert_equal(['/tmp/file'], pig_action.get_files()) assert_equal([ {u'dummy': u'', u'name': u'/tmp/file.zip'}, ], pig_action.get_archives())
def make_log_links(self): # FileBrowser assert_equal( """<a href="/filebrowser/view/user/romain/tmp" target="_blank">hdfs://localhost:8020/user/romain/tmp</a> <dir>""", OozieApi._make_links('hdfs://localhost:8020/user/romain/tmp <dir>') ) assert_equal( """<a href="/filebrowser/view/user/romain/tmp" target="_blank">hdfs://localhost:8020/user/romain/tmp</a><dir>""", OozieApi._make_links('hdfs://localhost:8020/user/romain/tmp<dir>') ) assert_equal( """output: <a href="/filebrowser/view/user/romain/tmp" target="_blank">/user/romain/tmp</a> <dir>""", OozieApi._make_links('output: /user/romain/tmp <dir>') ) assert_equal( 'Successfully read 3760 records (112648 bytes) from: "<a href="/filebrowser/view/user/hue/pig/examples/data/midsummer.txt" target="_blank">/user/hue/pig/examples/data/midsummer.txt</a>"', OozieApi._make_links('Successfully read 3760 records (112648 bytes) from: "/user/hue/pig/examples/data/midsummer.txt"') ) assert_equal( 'data,upper_case MAP_ONLY <a href="/filebrowser/view/user/romain/out/fffff" target="_blank">hdfs://localhost:8020/user/romain/out/fffff</a>,', OozieApi._make_links('data,upper_case MAP_ONLY hdfs://localhost:8020/user/romain/out/fffff,') ) assert_equal( 'MAP_ONLY <a href="/filebrowser/view/user/romain/out/fffff" target="_blank">hdfs://localhost:8020/user/romain/out/fffff</a>\n2013', OozieApi._make_links('MAP_ONLY hdfs://localhost:8020/user/romain/out/fffff\n2013') ) # JobBrowser assert_equal( """<a href="/jobbrowser/jobs/job_201306261521_0058" target="_blank">job_201306261521_0058</a>""", OozieApi._make_links('job_201306261521_0058') ) assert_equal( """Hadoop Job IDs executed by Pig: <a href="/jobbrowser/jobs/job_201306261521_0058" target="_blank">job_201306261521_0058</a>""", OozieApi._make_links('Hadoop Job IDs executed by Pig: job_201306261521_0058') ) assert_equal( """MapReduceLauncher - HadoopJobId: <a href="/jobbrowser/jobs/job_201306261521_0058" target="_blank">job_201306261521_0058</a>""", OozieApi._make_links('MapReduceLauncher - HadoopJobId: job_201306261521_0058') ) assert_equal( """- More information at: http://localhost:50030/jobdetails.jsp?jobid=<a href="/jobbrowser/jobs/job_201306261521_0058" target="_blank">job_201306261521_0058</a>""", OozieApi._make_links('- More information at: http://localhost:50030/jobdetails.jsp?jobid=job_201306261521_0058') )
class TestWithHadoop(OozieBase): def setUp(self): super(TestWithHadoop, self).setUp() # FIXME (HUE-2562): The tests unfortunately require superuser at the # moment, but should be rewritten to not need it. self.c = make_logged_in_client(is_superuser=True) grant_access("test", "test", "pig") self.user = User.objects.get(username='******') self.c.post(reverse('pig:install_examples')) self.cluster = pseudo_hdfs4.shared_cluster() self.api = OozieApi(self.cluster.fs, self.cluster.jt, self.user) def test_create_workflow(self): xattrs = { 'parameters': [ { 'name': 'output', 'value': self.cluster.fs_prefix + '/test_pig_script_workflow' }, { 'name': '-param', 'value': 'input=/data' }, # Alternative way for params { 'name': '-optimizer_off', 'value': 'SplitFilter' }, { 'name': '-v', 'value': '' }, ], 'resources': [ { 'type': 'file', 'value': '/tmp/file' }, { 'type': 'archive', 'value': '/tmp/file.zip' }, ], 'hadoopProperties': [ { 'name': 'mapred.map.tasks.speculative.execution', 'value': 'false' }, { 'name': 'mapred.job.queue', 'value': 'fast' }, ] } pig_script = create_script(self.user, xattrs) output_path = self.cluster.fs_prefix + '/test_pig_script_2' params = json.dumps([ { 'name': 'output', 'value': output_path }, ]) workflow = self.api._create_workflow(pig_script, params) pig_action = workflow.start.get_child('to').get_full_node() assert_equal([ { u'type': u'argument', u'value': u'-param' }, { u'type': u'argument', u'value': u'output=%s' % output_path }, { u'type': u'argument', u'value': u'-param' }, { u'type': u'argument', u'value': u'input=/data' }, { u'type': u'argument', u'value': u'-optimizer_off' }, { u'type': u'argument', u'value': u'SplitFilter' }, { u'type': u'argument', u'value': u'-v' }, ], pig_action.get_params()) assert_equal([ { u'name': u'mapred.map.tasks.speculative.execution', u'value': u'false' }, { u'name': u'mapred.job.queue', u'value': u'fast' }, ], pig_action.get_properties()) assert_equal(['/tmp/file'], pig_action.get_files()) assert_equal([ { u'dummy': u'', u'name': u'/tmp/file.zip' }, ], pig_action.get_archives()) def wait_until_completion(self, pig_script_id, timeout=300.0, step=5, expected_status='SUCCEEDED'): script = PigScript.objects.get(id=pig_script_id) job_id = script.dict['job_id'] response = self.c.get(reverse('pig:watch', args=[job_id])) response = json.loads(response.content) start = time.time() while response['workflow']['status'] in [ 'PREP', 'RUNNING' ] and time.time() - start < timeout: time.sleep(step) response = self.c.get(reverse('pig:watch', args=[job_id])) response = json.loads(response.content) logs = OozieServerProvider.oozie.get_job_log(job_id) if response['workflow']['status'] != expected_status: msg = "[%d] %s took more than %d to complete or %s: %s" % ( time.time(), job_id, timeout, response['workflow']['status'], logs) self.api.stop(job_id) raise Exception(msg) return pig_script_id def test_submit(self): if is_live_cluster(): raise SkipTest('HUE-2909: Skipping because test is not reentrant') script = PigScript.objects.get(id=1100713) script_dict = script.dict post_data = { 'id': script.id, 'name': script_dict['name'], 'script': script_dict['script'], 'user': script.owner, 'parameters': json.dumps(script_dict['parameters']), 'resources': json.dumps(script_dict['resources']), 'hadoopProperties': json.dumps(script_dict['hadoopProperties']), 'submissionVariables': json.dumps([{ "name": "output", "value": self.cluster.fs_prefix + '/test_pig_script_submit' }]), } response = self.c.post(reverse('pig:run'), data=post_data, follow=True) job_id = json.loads(response.content)['id'] self.wait_until_completion(job_id) def test_stop(self): script = PigScript.objects.get(id=1100713) script_dict = script.dict post_data = { 'id': script.id, 'name': script_dict['name'], 'script': script_dict['script'], 'user': script.owner, 'parameters': json.dumps(script_dict['parameters']), 'resources': json.dumps(script_dict['resources']), 'hadoopProperties': json.dumps(script_dict['hadoopProperties']), 'submissionVariables': json.dumps([{ "name": "output", "value": self.cluster.fs_prefix + '/test_pig_script_stop' }]), } submit_response = self.c.post(reverse('pig:run'), data=post_data, follow=True) script = PigScript.objects.get( id=json.loads(submit_response.content)['id']) assert_true(script.dict['job_id'], script.dict) self.c.post(reverse('pig:stop'), data={'id': script.id}, follow=True) self.wait_until_completion(json.loads(submit_response.content)['id'], expected_status='KILLED')
class TestWithHadoop(OozieBase): def setUp(self): super(TestWithHadoop, self).setUp() # FIXME (HUE-2562): The tests unfortunately require superuser at the # moment, but should be rewritten to not need it. self.c = make_logged_in_client(is_superuser=True) grant_access("test", "test", "pig") self.user = User.objects.get(username='******') self.c.post(reverse('pig:install_examples')) self.cluster = pseudo_hdfs4.shared_cluster() self.api = OozieApi(self.cluster.fs, self.cluster.jt, self.user) def test_create_workflow(self): xattrs = { 'parameters': [ {'name': 'output', 'value': self.cluster.fs_prefix + '/test_pig_script_workflow'}, {'name': '-param', 'value': 'input=/data'}, # Alternative way for params {'name': '-optimizer_off', 'value': 'SplitFilter'}, {'name': '-v', 'value': ''}, ], 'resources': [ {'type': 'file', 'value': '/tmp/file'}, {'type': 'archive', 'value': '/tmp/file.zip'}, ], 'hadoopProperties': [ {'name': 'mapred.map.tasks.speculative.execution', 'value': 'false'}, {'name': 'mapred.job.queue', 'value': 'fast'}, ] } pig_script = create_script(self.user, xattrs) output_path = self.cluster.fs_prefix + '/test_pig_script_2' params = json.dumps([ {'name': 'output', 'value': output_path}, ]) workflow = self.api._create_workflow(pig_script, params) pig_action = workflow.start.get_child('to').get_full_node() assert_equal([ {u'type': u'argument', u'value': u'-param'}, {u'type': u'argument', u'value': u'output=%s' % output_path}, {u'type': u'argument', u'value': u'-param'}, {u'type': u'argument', u'value': u'input=/data'}, {u'type': u'argument', u'value': u'-optimizer_off'}, {u'type': u'argument', u'value': u'SplitFilter'}, {u'type': u'argument', u'value': u'-v'}, ], pig_action.get_params()) assert_equal([ {u'name': u'mapred.map.tasks.speculative.execution', u'value': u'false'}, {u'name': u'mapred.job.queue', u'value': u'fast'}, ], pig_action.get_properties()) assert_equal(['/tmp/file'], pig_action.get_files()) assert_equal([ {u'dummy': u'', u'name': u'/tmp/file.zip'}, ], pig_action.get_archives()) def wait_until_completion(self, pig_script_id, timeout=300.0, step=5, expected_status='SUCCEEDED'): script = PigScript.objects.get(id=pig_script_id) job_id = script.dict['job_id'] response = self.c.get(reverse('pig:watch', args=[job_id])) response = json.loads(response.content) start = time.time() while response['workflow']['status'] in ['PREP', 'RUNNING'] and time.time() - start < timeout: time.sleep(step) response = self.c.get(reverse('pig:watch', args=[job_id])) response = json.loads(response.content) logs = OozieServerProvider.oozie.get_job_log(job_id) if response['workflow']['status'] != expected_status: msg = "[%d] %s took more than %d to complete or %s: %s" % (time.time(), job_id, timeout, response['workflow']['status'], logs) self.api.stop(job_id) raise Exception(msg) return pig_script_id def test_submit(self): if is_live_cluster(): raise SkipTest('HUE-2909: Skipping because test is not reentrant') if not IS_HUE_4.get(): script = PigScript.objects.get(id=SAMPLE_USER_ID) script_dict = script.dict post_data = { 'id': script.id, 'name': script_dict['name'], 'script': script_dict['script'], 'user': script.owner, 'parameters': json.dumps(script_dict['parameters']), 'resources': json.dumps(script_dict['resources']), 'hadoopProperties': json.dumps(script_dict['hadoopProperties']), 'submissionVariables': json.dumps([{"name": "output", "value": self.cluster.fs_prefix + '/test_pig_script_submit'}]), } response = self.c.post(reverse('pig:run'), data=post_data, follow=True) job_id = json.loads(response.content)['id'] self.wait_until_completion(job_id) def test_stop(self): if not IS_HUE_4.get(): script = PigScript.objects.get(id=SAMPLE_USER_ID) script_dict = script.dict post_data = { 'id': script.id, 'name': script_dict['name'], 'script': script_dict['script'], 'user': script.owner, 'parameters': json.dumps(script_dict['parameters']), 'resources': json.dumps(script_dict['resources']), 'hadoopProperties': json.dumps(script_dict['hadoopProperties']), 'submissionVariables': json.dumps([{"name": "output", "value": self.cluster.fs_prefix + '/test_pig_script_stop'}]), } submit_response = self.c.post(reverse('pig:run'), data=post_data, follow=True) script = PigScript.objects.get(id=json.loads(submit_response.content)['id']) assert_true(script.dict['job_id'], script.dict) self.c.post(reverse('pig:stop'), data={'id': script.id}, follow=True) self.wait_until_completion(json.loads(submit_response.content)['id'], expected_status='KILLED')
class TestWithHadoop(OozieBase): def setUp(self): super(TestWithHadoop, self).setUp() # FIXME (HUE-2562): The tests unfortunately require superuser at the # moment, but should be rewritten to not need it. self.c = make_logged_in_client(is_superuser=True) grant_access("test", "test", "pig") self.user = User.objects.get(username="******") self.c.post(reverse("pig:install_examples")) self.cluster = pseudo_hdfs4.shared_cluster() self.api = OozieApi(self.cluster.fs, self.cluster.jt, self.user) def test_create_workflow(self): xattrs = { "parameters": [ {"name": "output", "value": self.cluster.fs_prefix + "/test_pig_script_workflow"}, {"name": "-param", "value": "input=/data"}, # Alternative way for params {"name": "-optimizer_off", "value": "SplitFilter"}, {"name": "-v", "value": ""}, ], "resources": [{"type": "file", "value": "/tmp/file"}, {"type": "archive", "value": "/tmp/file.zip"}], "hadoopProperties": [ {"name": "mapred.map.tasks.speculative.execution", "value": "false"}, {"name": "mapred.job.queue", "value": "fast"}, ], } pig_script = create_script(self.user, xattrs) output_path = self.cluster.fs_prefix + "/test_pig_script_2" params = json.dumps([{"name": "output", "value": output_path}]) workflow = self.api._create_workflow(pig_script, params) pig_action = workflow.start.get_child("to").get_full_node() assert_equal( [ {u"type": u"argument", u"value": u"-param"}, {u"type": u"argument", u"value": u"output=%s" % output_path}, {u"type": u"argument", u"value": u"-param"}, {u"type": u"argument", u"value": u"input=/data"}, {u"type": u"argument", u"value": u"-optimizer_off"}, {u"type": u"argument", u"value": u"SplitFilter"}, {u"type": u"argument", u"value": u"-v"}, ], pig_action.get_params(), ) assert_equal( [ {u"name": u"mapred.map.tasks.speculative.execution", u"value": u"false"}, {u"name": u"mapred.job.queue", u"value": u"fast"}, ], pig_action.get_properties(), ) assert_equal(["/tmp/file"], pig_action.get_files()) assert_equal([{u"dummy": u"", u"name": u"/tmp/file.zip"}], pig_action.get_archives()) def wait_until_completion(self, pig_script_id, timeout=300.0, step=5, expected_status="SUCCEEDED"): script = PigScript.objects.get(id=pig_script_id) job_id = script.dict["job_id"] response = self.c.get(reverse("pig:watch", args=[job_id])) response = json.loads(response.content) start = time.time() while response["workflow"]["status"] in ["PREP", "RUNNING"] and time.time() - start < timeout: time.sleep(step) response = self.c.get(reverse("pig:watch", args=[job_id])) response = json.loads(response.content) logs = OozieServerProvider.oozie.get_job_log(job_id) if response["workflow"]["status"] != expected_status: msg = "[%d] %s took more than %d to complete or %s: %s" % ( time.time(), job_id, timeout, response["workflow"]["status"], logs, ) self.api.stop(job_id) raise Exception(msg) return pig_script_id def test_submit(self): if is_live_cluster(): raise SkipTest("HUE-2909: Skipping because test is not reentrant") script = PigScript.objects.get(id=SAMPLE_USER_ID) script_dict = script.dict post_data = { "id": script.id, "name": script_dict["name"], "script": script_dict["script"], "user": script.owner, "parameters": json.dumps(script_dict["parameters"]), "resources": json.dumps(script_dict["resources"]), "hadoopProperties": json.dumps(script_dict["hadoopProperties"]), "submissionVariables": json.dumps( [{"name": "output", "value": self.cluster.fs_prefix + "/test_pig_script_submit"}] ), } response = self.c.post(reverse("pig:run"), data=post_data, follow=True) job_id = json.loads(response.content)["id"] self.wait_until_completion(job_id) def test_stop(self): script = PigScript.objects.get(id=SAMPLE_USER_ID) script_dict = script.dict post_data = { "id": script.id, "name": script_dict["name"], "script": script_dict["script"], "user": script.owner, "parameters": json.dumps(script_dict["parameters"]), "resources": json.dumps(script_dict["resources"]), "hadoopProperties": json.dumps(script_dict["hadoopProperties"]), "submissionVariables": json.dumps( [{"name": "output", "value": self.cluster.fs_prefix + "/test_pig_script_stop"}] ), } submit_response = self.c.post(reverse("pig:run"), data=post_data, follow=True) script = PigScript.objects.get(id=json.loads(submit_response.content)["id"]) assert_true(script.dict["job_id"], script.dict) self.c.post(reverse("pig:stop"), data={"id": script.id}, follow=True) self.wait_until_completion(json.loads(submit_response.content)["id"], expected_status="KILLED")
def test_make_log_links(): # FileBrowser assert_equal( """<a href="/filebrowser/view/user/romain/tmp" target="_blank">hdfs://localhost:8020/user/romain/tmp</a> <dir>""", OozieApi._make_links("hdfs://localhost:8020/user/romain/tmp <dir>"), ) assert_equal( """<a href="/filebrowser/view/user/romain/tmp" target="_blank">hdfs://localhost:8020/user/romain/tmp</a><dir>""", OozieApi._make_links("hdfs://localhost:8020/user/romain/tmp<dir>"), ) assert_equal( """output: <a href="/filebrowser/view/user/romain/tmp" target="_blank">/user/romain/tmp</a> <dir>""", OozieApi._make_links("output: /user/romain/tmp <dir>"), ) assert_equal( 'Successfully read 3760 records (112648 bytes) from: "<a href="/filebrowser/view/user/hue/pig/examples/data/midsummer.txt" target="_blank">/user/hue/pig/examples/data/midsummer.txt</a>"', OozieApi._make_links( 'Successfully read 3760 records (112648 bytes) from: "/user/hue/pig/examples/data/midsummer.txt"' ), ) assert_equal( 'data,upper_case MAP_ONLY <a href="/filebrowser/view/user/romain/out/fffff" target="_blank">hdfs://localhost:8020/user/romain/out/fffff</a>,', OozieApi._make_links("data,upper_case MAP_ONLY hdfs://localhost:8020/user/romain/out/fffff,"), ) assert_equal( 'MAP_ONLY <a href="/filebrowser/view/user/romain/out/fffff" target="_blank">hdfs://localhost:8020/user/romain/out/fffff</a>\n2013', OozieApi._make_links("MAP_ONLY hdfs://localhost:8020/user/romain/out/fffff\n2013"), ) assert_equal( ' <a href="/filebrowser/view/jobs.tsv" target="_blank">/jobs.tsv</a> ', OozieApi._make_links(" /jobs.tsv ") ) assert_equal( '<a href="/filebrowser/view/user/romain/job_pos_2012.tsv" target="_blank">hdfs://localhost:8020/user/romain/job_pos_2012.tsv</a>', OozieApi._make_links("hdfs://localhost:8020/user/romain/job_pos_2012.tsv"), ) # JobBrowser assert_equal( """<a href="/jobbrowser/jobs/job_201306261521_0058" target="_blank">job_201306261521_0058</a>""", OozieApi._make_links("job_201306261521_0058"), ) assert_equal( """Hadoop Job IDs executed by Pig: <a href="/jobbrowser/jobs/job_201306261521_0058" target="_blank">job_201306261521_0058</a>""", OozieApi._make_links("Hadoop Job IDs executed by Pig: job_201306261521_0058"), ) assert_equal( """MapReduceLauncher - HadoopJobId: <a href="/jobbrowser/jobs/job_201306261521_0058" target="_blank">job_201306261521_0058</a>""", OozieApi._make_links("MapReduceLauncher - HadoopJobId: job_201306261521_0058"), ) assert_equal( """- More information at: http://localhost:50030/jobdetails.jsp?jobid=<a href="/jobbrowser/jobs/job_201306261521_0058" target="_blank">job_201306261521_0058</a>""", OozieApi._make_links( "- More information at: http://localhost:50030/jobdetails.jsp?jobid=job_201306261521_0058" ), ) assert_equal( """ Logging error messages to: job_201307091553_0028/attempt_201307091553_002""", OozieApi._make_links(" Logging error messages to: job_201307091553_0028/attempt_201307091553_002"), ) assert_equal(""" pig-job_201307091553_0028.log""", OozieApi._make_links(" pig-job_201307091553_0028.log"))