class TestAwsGlueJobOperator(unittest.TestCase): @mock.patch('airflow.providers.amazon.aws.hooks.glue.AwsGlueJobHook') def setUp(self, glue_hook_mock): configuration.load_test_config() self.glue_hook_mock = glue_hook_mock some_script = "s3:/glue-examples/glue-scripts/sample_aws_glue_job.py" self.glue = AwsGlueJobOperator( task_id='test_glue_operator', job_name='my_test_job', script_location=some_script, aws_conn_id='aws_default', region_name='us-west-2', s3_bucket='some_bucket', iam_role_name='my_test_role', ) @mock.patch.object(AwsGlueJobHook, 'get_job_state') @mock.patch.object(AwsGlueJobHook, 'initialize_job') @mock.patch.object(AwsGlueJobHook, "get_conn") @mock.patch.object(S3Hook, "load_file") def test_execute_without_failure(self, mock_load_file, mock_get_conn, mock_initialize_job, mock_get_job_state): mock_initialize_job.return_value = { 'JobRunState': 'RUNNING', 'JobRunId': '11111' } mock_get_job_state.return_value = 'SUCCEEDED' self.glue.execute(None) mock_initialize_job.assert_called_once_with({}) self.assertEqual(self.glue.job_name, 'my_test_job')
def setUp(self, glue_hook_mock): configuration.load_test_config() self.glue_hook_mock = glue_hook_mock some_script = "s3:/glue-examples/glue-scripts/sample_aws_glue_job.py" self.glue = AwsGlueJobOperator(task_id='test_glue_operator', job_name='my_test_job', script_location=some_script, aws_conn_id='aws_default', region_name='us-west-2', s3_bucket='some_bucket', iam_role_name='my_test_role')
from airflow import DAG from datetime import datetime, timedelta from airflow.providers.amazon.aws.operators.glue import AwsGlueJobOperator default_args = { "owner": "airflow-user", "start_date": datetime.today(), "depends_on_past": False, "email_on_failure": False, "email_on_retry": False, "email": "<your-email-address>", "retries": 1, "retry_delay": timedelta(minutes=5) } with DAG(dag_id="start-glue-job", schedule_interval="@daily", default_args=default_args, catchup=False) as dag: glue_job = AwsGlueJobOperator( task_id='my_glue_job', job_name='<your-Glue-job-name>', num_of_dpus=5, region_name='<region>' )