Пример #1
0
    def test_timeout(self):
        g = GlueJob(
            "example/glue_jobs/simple_etl_job/",
            bucket="alpha-everyone",
            job_role="alpha_user_isichei",
            job_arguments={"--test_arg": "this is a test"},
        )

        self.assertEqual(g._job_definition()["Timeout"], 1363)

        g.allocated_capacity = 10

        self.assertEqual(g._job_definition()["Timeout"], 272)

        g.allocated_capacity = 40

        self.assertEqual(g._job_definition()["Timeout"], 68)

        g = GlueJob(
            "example/glue_jobs/simple_etl_job/",
            bucket="alpha-everyone",
            job_role="alpha_user_isichei",
            job_arguments={"--test_arg": "this is a test"},
            timeout_override_minutes=2880,
        )

        g.allocated_capacity = 40

        self.assertEqual(g._job_definition()["Timeout"], 2880)
Пример #2
0
except:
    raise Exception("You must provide a role name")

bucket = 'alpha-data-linking'

job = GlueJob(
    'match/',
    bucket=bucket,
    job_role=ROLE,
    job_arguments={
        "--test_arg": 'some_string',
        "--conf":
        'spark.jars.packages=graphframes:graphframes:0.6.0-spark2.3-s_2.11',
        '--enable-spark-ui': 'true',
        '--spark-event-logs-path':
        's3://alpha-data-linking/glue_test_delete/logsdelete',
        '--enable-continuous-cloudwatch-log': 'true'
    })

job.job_name = '1m_p_50_e_6'
print(job._job_definition())

job.allocated_capacity = 2

try:
    job.run_job()
    job.wait_for_completion()
finally:
    pass
    # job.cleanup()