示例#1
0
    def execute(self, context):
        """
        Executes AWS Glue Job from Airflow

        :return: the id of the current glue job.
        """
        if self.script_location and not self.script_location.startswith(
                self.s3_protocol):
            s3_hook = S3Hook(aws_conn_id=self.aws_conn_id)
            script_name = os.path.basename(self.script_location)
            s3_hook.load_file(self.script_location, self.s3_bucket,
                              self.s3_artifcats_prefix + script_name)
        glue_job = AwsGlueJobHook(
            job_name=self.job_name,
            desc=self.job_desc,
            concurrent_run_limit=self.concurrent_run_limit,
            script_location=self.script_location,
            retry_limit=self.retry_limit,
            num_of_dpus=self.num_of_dpus,
            aws_conn_id=self.aws_conn_id,
            region_name=self.region_name,
            s3_bucket=self.s3_bucket,
            iam_role_name=self.iam_role_name)
        self.log.info("Initializing AWS Glue Job: %s", self.job_name)
        glue_job_run = glue_job.initialize_job(self.script_args)
        glue_job_run = glue_job.job_completion(self.job_name,
                                               glue_job_run['JobRunId'])
        self.log.info("AWS Glue Job: %s status: %s. Run Id: %s", self.job_name,
                      glue_job_run['JobRunState'], glue_job_run['JobRunId'])
        return glue_job_run['JobRunId']
示例#2
0
    def test_initialize_job(self, mock_get_conn, mock_get_or_create_glue_job,
                            mock_get_job_state):
        some_data_path = "s3://glue-datasets/examples/medicare/SampleData.csv"
        some_script_arguments = {"--s3_input_data_path": some_data_path}
        some_script = "s3:/glue-examples/glue-scripts/sample_aws_glue_job.py"
        some_s3_bucket = "my-includes"

        mock_get_or_create_glue_job.Name = mock.Mock(Name='aws_test_glue_job')
        mock_get_conn.return_value.start_job_run()

        mock_job_run_state = mock_get_job_state.return_value
        glue_job_hook = AwsGlueJobHook(
            job_name='aws_test_glue_job',
            desc='This is test case job from Airflow',
            iam_role_name='my_test_role',
            script_location=some_script,
            s3_bucket=some_s3_bucket,
            region_name=self.some_aws_region,
        )
        glue_job_run = glue_job_hook.initialize_job(some_script_arguments)
        glue_job_run_state = glue_job_hook.get_job_state(
            glue_job_run['JobName'], glue_job_run['JobRunId'])
        self.assertEqual(glue_job_run_state,
                         mock_job_run_state,
                         msg='Mocks but be equal')
示例#3
0
 def poke(self, context):
     hook = AwsGlueJobHook(aws_conn_id=self.aws_conn_id)
     self.log.info("Poking for job run status :for Glue Job %s and ID %s", self.job_name, self.run_id)
     job_state = hook.get_job_state(job_name=self.job_name, run_id=self.run_id)
     if job_state in self.success_states:
         self.log.info("Exiting Job %s Run State: %s", self.run_id, job_state)
         return True
     elif job_state in self.errored_states:
         job_error_message = "Exiting Job " + self.run_id + " Run State: " + job_state
         raise AirflowException(job_error_message)
     else:
         return False
示例#4
0
 def test_get_iam_execution_role(self):
     hook = AwsGlueJobHook(job_name='aws_test_glue_job',
                           s3_bucket='some_bucket',
                           iam_role_name='my_test_role')
     iam_role = hook.get_client_type('iam').create_role(
         Path="/",
         RoleName='my_test_role',
         AssumeRolePolicyDocument=json.dumps({
             "Version": "2012-10-17",
             "Statement": {
                 "Effect": "Allow",
                 "Principal": {"Service": "glue.amazonaws.com"},
                 "Action": "sts:AssumeRole"
             }
         })
     )
     iam_role = hook.get_iam_execution_role()
     self.assertIsNotNone(iam_role)
示例#5
0
    def test_get_or_create_glue_job(self, mock_get_conn,
                                    mock_get_iam_execution_role):
        mock_get_iam_execution_role.return_value = \
            mock.MagicMock(Role={'RoleName': 'my_test_role'})
        some_script = "s3:/glue-examples/glue-scripts/sample_aws_glue_job.py"
        some_s3_bucket = "my-includes"

        mock_glue_job = mock_get_conn.return_value.get_job()['Job']['Name']
        glue_job = AwsGlueJobHook(job_name='aws_test_glue_job',
                                  desc='This is test case job from Airflow',
                                  script_location=some_script,
                                  iam_role_name='my_test_role',
                                  s3_bucket=some_s3_bucket,
                                  region_name=self.some_aws_region)\
            .get_or_create_glue_job()
        self.assertEqual(glue_job, mock_glue_job)