示例#1
0
    def test_timeout(self):
        g = GlueJob(
            "example/glue_jobs/simple_etl_job/",
            bucket="alpha-everyone",
            job_role="alpha_user_isichei",
            job_arguments={"--test_arg": "this is a test"},
        )

        self.assertEqual(g._job_definition()["Timeout"], 1363)

        g.allocated_capacity = 10

        self.assertEqual(g._job_definition()["Timeout"], 272)

        g.allocated_capacity = 40

        self.assertEqual(g._job_definition()["Timeout"], 68)

        g = GlueJob(
            "example/glue_jobs/simple_etl_job/",
            bucket="alpha-everyone",
            job_role="alpha_user_isichei",
            job_arguments={"--test_arg": "this is a test"},
            timeout_override_minutes=2880,
        )

        g.allocated_capacity = 40

        self.assertEqual(g._job_definition()["Timeout"], 2880)
示例#2
0
except:
    raise Exception("You must provide a role name")

bucket = 'alpha-data-linking'

job = GlueJob(
    'match/',
    bucket=bucket,
    job_role=ROLE,
    job_arguments={
        "--test_arg": 'some_string',
        "--conf":
        'spark.jars.packages=graphframes:graphframes:0.6.0-spark2.3-s_2.11',
        '--enable-spark-ui': 'true',
        '--spark-event-logs-path':
        's3://alpha-data-linking/glue_test_delete/logsdelete',
        '--enable-continuous-cloudwatch-log': 'true'
    })

job.job_name = '1m_p_50_e_6'
print(job._job_definition())

job.allocated_capacity = 2

try:
    job.run_job()
    job.wait_for_completion()
finally:
    pass
    # job.cleanup()
示例#3
0
    def test_init(self):
        g = GlueJob(
            "example/glue_jobs/simple_etl_job/",
            bucket="alpha-everyone",
            job_role="alpha_user_isichei",
            job_arguments={"--test_arg": "this is a test"},
        )

        self.assertEqual(
            g.resources,
            [
                "example/glue_jobs/simple_etl_job/glue_resources/employees.json",
                "example/glue_jobs/shared_job_resources/glue_resources/teams.json",
            ],
        )
        self.assertEqual(
            g.py_resources,
            [
                "example/glue_jobs/shared_job_resources/glue_py_resources/"
                "my_dummy_utils.zip"
            ],
        )

        self.assertEqual(
            set(g.jars),
            set([
                "example/glue_jobs/simple_etl_job/glue_jars/j1.jar",
                "example/glue_jobs/simple_etl_job/glue_jars/j2.jar",
            ]),
        )
        self.assertEqual(g.job_name, "simple_etl_job")
        self.assertEqual(g.bucket, "alpha-everyone")
        self.assertEqual(g.job_role, "alpha_user_isichei")
        self.assertEqual(
            g.github_zip_urls,
            [
                "https://github.com/moj-analytical-services/gluejobutils/archive/"
                "master.zip"
            ],
        )
        self.assertEqual(g.job_arguments["--test_arg"], "this is a test")
        self.assertEqual(g.github_py_resources, [])
        self.assertEqual(g.max_retries, 0)
        self.assertEqual(g.max_concurrent_runs, 1)
        self.assertEqual(g.allocated_capacity, 2)

        jobdef = g._job_definition()
        self.assertTrue("j2.jar" in jobdef["DefaultArguments"]["--extra-jars"])

        g2 = GlueJob(
            "example/glue_jobs/simple_etl_job/",
            bucket="alpha-everyone",
            job_role="alpha_user_isichei",
            include_shared_job_resources=False,
        )
        self.assertEqual(
            g2.resources,
            ["example/glue_jobs/simple_etl_job/glue_resources/employees.json"],
        )
        self.assertEqual(g2.py_resources, [])

        self.assertTrue(
            "_GlueJobs_" in g2.job_arguments["--metadata_base_path"])