def test_timeout(self): g = GlueJob( "example/glue_jobs/simple_etl_job/", bucket="alpha-everyone", job_role="alpha_user_isichei", job_arguments={"--test_arg": "this is a test"}, ) self.assertEqual(g._job_definition()["Timeout"], 1363) g.allocated_capacity = 10 self.assertEqual(g._job_definition()["Timeout"], 272) g.allocated_capacity = 40 self.assertEqual(g._job_definition()["Timeout"], 68) g = GlueJob( "example/glue_jobs/simple_etl_job/", bucket="alpha-everyone", job_role="alpha_user_isichei", job_arguments={"--test_arg": "this is a test"}, timeout_override_minutes=2880, ) g.allocated_capacity = 40 self.assertEqual(g._job_definition()["Timeout"], 2880)
except: raise Exception("You must provide a role name") bucket = 'alpha-data-linking' job = GlueJob( 'match/', bucket=bucket, job_role=ROLE, job_arguments={ "--test_arg": 'some_string', "--conf": 'spark.jars.packages=graphframes:graphframes:0.6.0-spark2.3-s_2.11', '--enable-spark-ui': 'true', '--spark-event-logs-path': 's3://alpha-data-linking/glue_test_delete/logsdelete', '--enable-continuous-cloudwatch-log': 'true' }) job.job_name = '1m_p_50_e_6' print(job._job_definition()) job.allocated_capacity = 2 try: job.run_job() job.wait_for_completion() finally: pass # job.cleanup()
def test_init(self): g = GlueJob( "example/glue_jobs/simple_etl_job/", bucket="alpha-everyone", job_role="alpha_user_isichei", job_arguments={"--test_arg": "this is a test"}, ) self.assertEqual( g.resources, [ "example/glue_jobs/simple_etl_job/glue_resources/employees.json", "example/glue_jobs/shared_job_resources/glue_resources/teams.json", ], ) self.assertEqual( g.py_resources, [ "example/glue_jobs/shared_job_resources/glue_py_resources/" "my_dummy_utils.zip" ], ) self.assertEqual( set(g.jars), set([ "example/glue_jobs/simple_etl_job/glue_jars/j1.jar", "example/glue_jobs/simple_etl_job/glue_jars/j2.jar", ]), ) self.assertEqual(g.job_name, "simple_etl_job") self.assertEqual(g.bucket, "alpha-everyone") self.assertEqual(g.job_role, "alpha_user_isichei") self.assertEqual( g.github_zip_urls, [ "https://github.com/moj-analytical-services/gluejobutils/archive/" "master.zip" ], ) self.assertEqual(g.job_arguments["--test_arg"], "this is a test") self.assertEqual(g.github_py_resources, []) self.assertEqual(g.max_retries, 0) self.assertEqual(g.max_concurrent_runs, 1) self.assertEqual(g.allocated_capacity, 2) jobdef = g._job_definition() self.assertTrue("j2.jar" in jobdef["DefaultArguments"]["--extra-jars"]) g2 = GlueJob( "example/glue_jobs/simple_etl_job/", bucket="alpha-everyone", job_role="alpha_user_isichei", include_shared_job_resources=False, ) self.assertEqual( g2.resources, ["example/glue_jobs/simple_etl_job/glue_resources/employees.json"], ) self.assertEqual(g2.py_resources, []) self.assertTrue( "_GlueJobs_" in g2.job_arguments["--metadata_base_path"])