def setUp(self, aws_hook_mock): configuration.load_test_config() self.aws_hook_mock = aws_hook_mock self.batch = AWSBatchOperator( task_id='task', job_name='51455483-c62c-48ac-9b88-53a6a725baa3', queue='queue', job_definition='hello-world', max_retries=5, overrides={}, aws_conn_id=None, region_name='eu-west-1')
try: start = DummyOperator(task_id='start', dag=dag) end = DummyOperator(task_id='end', dag=dag) next = DummyOperator(task_id='next', dag=dag) generate_impressions = AWSBatchOperator( task_id='firebase-generate_impressions', job_name='firebase_generate_impressions', job_queue='batch-job-queue-b', job_definition= 'arn:aws:batch:eu-west-1:652586300051:job-definition/boutiqaat-etl-jobs-airflow:1', max_retries=4200, overrides={'command': ['make -C firebase generate_impressions']}, aws_conn_id=None, array_properties={}, parameters={}, status_retries=10, region_name='eu-west-1', dag=(dag)) daily_traffic = AWSBatchOperator( task_id='firebase-daily_traffic', job_name='firebase_daily_traffic', job_queue='batch-job-queue-b', job_definition= 'arn:aws:batch:eu-west-1:652586300051:job-definition/boutiqaat-etl-jobs-airflow:1', max_retries=4200, overrides={'command': ['make -C firebase daily_traffic']},
'retries': 1, 'retry_delay': timedelta(minutes=5), } dag = DAG(dag_id='REDSHIFT_DAILY', default_args=default_args, catchup=False, schedule_interval='30 3,6,8 * * *', dagrun_timeout=timedelta(minutes=120), tags=['REDSHIFT']) # t1, t2, t3 and t4 are examples of tasks created using operators try: t1 = AWSBatchOperator( task_id='redshift-daily-1', job_name='redshift_daily', job_queue='batch-job-queue-b', job_definition= 'arn:aws:batch:eu-west-1:652586300051:job-definition/boutiqaat-etl-jobs-airflow:1', max_retries=4200, overrides={'command': ['make -j4 -C redshift daily']}, aws_conn_id=None, array_properties={}, parameters={}, status_retries=10, region_name='eu-west-1', timeout={'attemptDurationSeconds': 7200}, dag=(dag)) except Exception as e: t1.log.info(e)
class TestAWSBatchOperator(unittest.TestCase): @mock.patch('airflow.contrib.operators.awsbatch_operator.AwsHook') def setUp(self, aws_hook_mock): configuration.load_test_config() self.aws_hook_mock = aws_hook_mock self.batch = AWSBatchOperator( task_id='task', job_name='51455483-c62c-48ac-9b88-53a6a725baa3', queue='queue', job_definition='hello-world', max_retries=5, overrides={}, aws_conn_id=None, region_name='eu-west-1') def test_init(self): self.assertEqual(self.batch.job_name, '51455483-c62c-48ac-9b88-53a6a725baa3') self.assertEqual(self.batch.queue, 'queue') self.assertEqual(self.batch.job_definition, 'hello-world') self.assertEqual(self.batch.max_retries, 5) self.assertEqual(self.batch.overrides, {}) self.assertEqual(self.batch.region_name, 'eu-west-1') self.assertEqual(self.batch.aws_conn_id, None) self.assertEqual(self.batch.hook, self.aws_hook_mock.return_value) self.aws_hook_mock.assert_called_once_with(aws_conn_id=None) def test_template_fields_overrides(self): self.assertEqual(self.batch.template_fields, ('overrides',)) @mock.patch.object(AWSBatchOperator, '_wait_for_task_ended') @mock.patch.object(AWSBatchOperator, '_check_success_task') def test_execute_without_failures(self, check_mock, wait_mock): client_mock = self.aws_hook_mock.return_value.get_client_type.return_value client_mock.submit_job.return_value = RESPONSE_WITHOUT_FAILURES self.batch.execute(None) self.aws_hook_mock.return_value.get_client_type.assert_called_once_with('batch', region_name='eu-west-1') client_mock.submit_job.assert_called_once_with( jobQueue='queue', jobName='51455483-c62c-48ac-9b88-53a6a725baa3', containerOverrides={}, jobDefinition='hello-world' ) wait_mock.assert_called_once_with() check_mock.assert_called_once_with() self.assertEqual(self.batch.jobId, '8ba9d676-4108-4474-9dca-8bbac1da9b19') def test_execute_with_failures(self): client_mock = self.aws_hook_mock.return_value.get_client_type.return_value client_mock.submit_job.return_value = "" with self.assertRaises(AirflowException): self.batch.execute(None) self.aws_hook_mock.return_value.get_client_type.assert_called_once_with('batch', region_name='eu-west-1') client_mock.submit_job.assert_called_once_with( jobQueue='queue', jobName='51455483-c62c-48ac-9b88-53a6a725baa3', containerOverrides={}, jobDefinition='hello-world' ) def test_wait_end_tasks(self): client_mock = mock.Mock() self.batch.jobId = '8ba9d676-4108-4474-9dca-8bbac1da9b19' self.batch.client = client_mock self.batch._wait_for_task_ended() client_mock.get_waiter.assert_called_once_with('job_execution_complete') client_mock.get_waiter.return_value.wait.assert_called_once_with( jobs=['8ba9d676-4108-4474-9dca-8bbac1da9b19'] ) self.assertEquals(sys.maxsize, client_mock.get_waiter.return_value.config.max_attempts) def test_check_success_tasks_raises(self): client_mock = mock.Mock() self.batch.jobId = '8ba9d676-4108-4474-9dca-8bbac1da9b19' self.batch.client = client_mock client_mock.describe_jobs.return_value = { 'jobs': [] } with self.assertRaises(Exception) as e: self.batch._check_success_task() # Ordering of str(dict) is not guaranteed. self.assertIn('No job found for ', str(e.exception)) def test_check_success_tasks_raises_failed(self): client_mock = mock.Mock() self.batch.jobId = '8ba9d676-4108-4474-9dca-8bbac1da9b19' self.batch.client = client_mock client_mock.describe_jobs.return_value = { 'jobs': [{ 'status': 'FAILED', 'attempts': [{ 'exitCode': 1 }] }] } with self.assertRaises(Exception) as e: self.batch._check_success_task() # Ordering of str(dict) is not guaranteed. self.assertIn('This containers encounter an error during execution ', str(e.exception)) def test_check_success_tasks_raises_pending(self): client_mock = mock.Mock() self.batch.jobId = '8ba9d676-4108-4474-9dca-8bbac1da9b19' self.batch.client = client_mock client_mock.describe_jobs.return_value = { 'jobs': [{ 'status': 'RUNNABLE' }] } with self.assertRaises(Exception) as e: self.batch._check_success_task() # Ordering of str(dict) is not guaranteed. self.assertIn('This task is still pending ', str(e.exception)) def test_check_success_tasks_raises_mutliple(self): client_mock = mock.Mock() self.batch.jobId = '8ba9d676-4108-4474-9dca-8bbac1da9b19' self.batch.client = client_mock client_mock.describe_jobs.return_value = { 'jobs': [{ 'status': 'FAILED', 'attempts': [{ 'exitCode': 1 }, { 'exitCode': 10 }] }] } with self.assertRaises(Exception) as e: self.batch._check_success_task() # Ordering of str(dict) is not guaranteed. self.assertIn('This containers encounter an error during execution ', str(e.exception)) def test_check_success_task_not_raises(self): client_mock = mock.Mock() self.batch.jobId = '8ba9d676-4108-4474-9dca-8bbac1da9b19' self.batch.client = client_mock client_mock.describe_jobs.return_value = { 'jobs': [{ 'status': 'SUCCEEDED' }] } self.batch._check_success_task() # Ordering of str(dict) is not guaranteed. client_mock.describe_jobs.assert_called_once_with(jobs=['8ba9d676-4108-4474-9dca-8bbac1da9b19'])
try: start = DummyOperator(task_id='start', dag=dag) end = DummyOperator(task_id='end', dag=dag) next = DummyOperator(task_id='next', dag=dag) catalog_product_flat_1 = AWSBatchOperator( task_id='magento-catalog_product_flat_1', job_name='magento_catalog_product_flat_1', job_queue='batch-job-queue-b', job_definition= 'arn:aws:batch:eu-west-1:652586300051:job-definition/boutiqaat-etl-jobs-airflow:1', max_retries=4200, overrides={'command': ['make -C magento catalog_product_flat_1']}, aws_conn_id=None, array_properties={}, parameters={}, status_retries=10, region_name='eu-west-1', dag=(dag)) disabled_skus = AWSBatchOperator( task_id='magento-disabled_skus', job_name='magento_disabled_skus', job_queue='batch-job-queue-b', job_definition= 'arn:aws:batch:eu-west-1:652586300051:job-definition/boutiqaat-etl-jobs-airflow:1', max_retries=4200, overrides={'command': ['make -C magento disabled_skus']},
# t1, t2, t3 and t4 are examples of tasks created using operators try: start = DummyOperator(task_id='start', dag=dag) end = DummyOperator(task_id='end', dag=dag) next = DummyOperator(task_id='next', dag=dag) ofs_status_master = AWSBatchOperator( task_id='ofs-status-master-1', job_name='ofs_status_master', job_queue='batch-job-queue-b', job_definition= 'arn:aws:batch:eu-west-1:652586300051:job-definition/boutiqaat-etl-jobs-airflow:1', max_retries=4200, overrides={'command': ['make -C OFS status_master']}, aws_conn_id=None, array_properties={}, parameters={}, status_retries=10, region_name='eu-west-1', dag=(dag)) ofs_crm_orders = AWSBatchOperator( task_id='ofs-crm-orders-1', job_name='ofs_crm_orders', job_queue='batch-job-queue-b', job_definition= 'arn:aws:batch:eu-west-1:652586300051:job-definition/boutiqaat-etl-jobs-airflow:1', max_retries=4200, overrides={'command': ['make -C OFS crm_orders']},
dag = DAG( dag_id='CELEBRITY_MASTER', default_args=default_args, catchup=False, schedule_interval='0 6 * * *', dagrun_timeout=timedelta(minutes=15), tags=['AOI']) celebrity_master = AWSBatchOperator( task_id='celebrity_master', job_name='celebrity_master', job_queue='batch-job-queue-b', job_definition='arn:aws:batch:eu-west-1:652586300051:job-definition/boutiqaat-etl-jobs-airflow:1', max_retries=4200, overrides={'command': ['make -C AOI celebrity_master']}, aws_conn_id=None, array_properties={}, parameters={}, status_retries=10, region_name='eu-west-1', dag=(dag)) celebrity_master_load = PostgresOperator( task_id='celebrity_master_load', sql='queries/celebrity_master_load.sql', postgres_conn_id='redshift', dag=(dag)) celebrity_master >> celebrity_master_load
default_args=default_args, catchup=False, schedule_interval='20 2-20 * * *', dagrun_timeout=timedelta(minutes=60), tags=['AOI']) # t1, t2, t3 and t4 are examples of tasks created using operators try: t0 = AWSBatchOperator( task_id='aoi-consolidated-sku-stock-1', job_name='aoi_consolidated_sku_stock', job_queue='batch-job-queue-b', job_definition= 'arn:aws:batch:eu-west-1:652586300051:job-definition/boutiqaat-etl-jobs-airflow:1', max_retries=4200, overrides={'command': ['make -C AOI consolidated_sku_stock']}, aws_conn_id=None, array_properties={}, parameters={}, status_retries=10, region_name='eu-west-1', dag=(dag)) t1 = AWSBatchOperator( task_id='aoi-inventory-health-1', job_name='aoi_inventory_health', job_queue='batch-job-queue-b', job_definition= 'arn:aws:batch:eu-west-1:652586300051:job-definition/boutiqaat-etl-jobs-airflow:1', max_retries=4200, overrides={'command': ['make -C AOI inventory_health']},
provide_context=True, python_callable=branch_func, op_kwargs={ 'input_task': 'check_inbound_sales_line', 'cont_task': 'ofs-inbound-sales-line-incremental-1', 'stop_task': 'end' }, dag=dag) isl_inc = AWSBatchOperator( task_id='ofs-inbound-sales-line-incremental-1', job_name='ofs_inbound_sales_line_incremental', job_queue='batch-job-queue-b', job_definition= 'arn:aws:batch:eu-west-1:652586300051:job-definition/boutiqaat-etl-jobs-airflow:1', max_retries=4200, overrides={'command': ['make -C OFS inbound_sales_line_incremental']}, aws_conn_id=None, array_properties={}, parameters={}, status_retries=10, region_name='eu-west-1', dag=(dag)) check_isl >> branch_isl >> [isl_inc, end] isl_inc >> end check_ioa = PythonOperator( task_id='check_inbound_order_address', python_callable=compare, op_kwargs={ 'mysql_table_name': 'InboundOrderAddress',
start = DummyOperator( task_id='start', dag=dag) end = DummyOperator( task_id='end', dag=dag) bi_celebrity_master = AWSBatchOperator( task_id='aoi-bi-celebrity-master', job_name='aoi_bi_celebrity_master', job_queue='batch-job-queue-b', job_definition='arn:aws:batch:eu-west-1:652586300051:job-definition/boutiqaat-etl-jobs-airflow:1', max_retries=4200, overrides={'command': ['make -C AOI bi_celebrity_master']}, aws_conn_id=None, array_properties={}, parameters={}, status_retries=10, region_name='eu-west-1', dag=(dag)) order_items = AWSBatchOperator( task_id='aoi-order-items', job_name='aoi_order_items', job_queue='batch-job-queue-b', job_definition='arn:aws:batch:eu-west-1:652586300051:job-definition/boutiqaat-etl-jobs-airflow:1', max_retries=4200, overrides={'command': ['make -C AOI order_items']}, aws_conn_id=None,
class TestAWSBatchOperator(unittest.TestCase): MAX_RETRIES = 2 STATUS_RETRIES = 3 @mock.patch("airflow.contrib.operators.awsbatch_operator.AwsHook") def setUp(self, aws_hook_mock): self.aws_hook_mock = aws_hook_mock self.batch = AWSBatchOperator( task_id="task", job_name=JOB_NAME, job_queue="queue", job_definition="hello-world", max_retries=self.MAX_RETRIES, status_retries=self.STATUS_RETRIES, parameters=None, overrides={}, array_properties=None, aws_conn_id=None, region_name="eu-west-1", ) def test_init(self): self.assertEqual(self.batch.job_name, JOB_NAME) self.assertEqual(self.batch.job_queue, "queue") self.assertEqual(self.batch.job_definition, "hello-world") self.assertEqual(self.batch.max_retries, self.MAX_RETRIES) self.assertEqual(self.batch.status_retries, self.STATUS_RETRIES) self.assertEqual(self.batch.parameters, None) self.assertEqual(self.batch.overrides, {}) self.assertEqual(self.batch.array_properties, {}) self.assertEqual(self.batch.region_name, "eu-west-1") self.assertEqual(self.batch.aws_conn_id, None) self.assertEqual(self.batch.hook, self.aws_hook_mock.return_value) self.aws_hook_mock.assert_called_once_with(aws_conn_id=None) def test_template_fields_overrides(self): self.assertEqual(self.batch.template_fields, ( "job_name", "overrides", "parameters", )) @mock.patch.object(AWSBatchOperator, "_wait_for_task_ended") @mock.patch.object(AWSBatchOperator, "_check_success_task") def test_execute_without_failures(self, check_mock, wait_mock): client_mock = self.aws_hook_mock.return_value.get_client_type.return_value client_mock.submit_job.return_value = RESPONSE_WITHOUT_FAILURES self.batch.execute(None) self.aws_hook_mock.return_value.get_client_type.assert_called_once_with( "batch", region_name="eu-west-1") client_mock.submit_job.assert_called_once_with( jobQueue="queue", jobName=JOB_NAME, containerOverrides={}, jobDefinition="hello-world", arrayProperties={}, parameters=None, ) wait_mock.assert_called_once_with() check_mock.assert_called_once_with() self.assertEqual(self.batch.jobId, JOB_ID) def test_execute_with_failures(self): client_mock = self.aws_hook_mock.return_value.get_client_type.return_value client_mock.submit_job.return_value = "" with self.assertRaises(AirflowException): self.batch.execute(None) self.aws_hook_mock.return_value.get_client_type.assert_called_once_with( "batch", region_name="eu-west-1") client_mock.submit_job.assert_called_once_with( jobQueue="queue", jobName=JOB_NAME, containerOverrides={}, jobDefinition="hello-world", arrayProperties={}, parameters=None, ) def test_wait_end_tasks(self): client_mock = mock.Mock() self.batch.jobId = JOB_ID self.batch.client = client_mock self.batch._wait_for_task_ended() client_mock.get_waiter.assert_called_once_with( "job_execution_complete") client_mock.get_waiter.return_value.wait.assert_called_once_with( jobs=[JOB_ID]) self.assertEqual( sys.maxsize, client_mock.get_waiter.return_value.config.max_attempts) @mock.patch("airflow.contrib.operators.awsbatch_operator.randint") def test_poll_job_status_success(self, mock_randint): client_mock = mock.Mock() self.batch.jobId = JOB_ID self.batch.client = client_mock mock_randint.return_value = 0 # don't pause in unit tests client_mock.get_waiter.return_value.wait.side_effect = ValueError() client_mock.describe_jobs.return_value = { "jobs": [{ "jobId": JOB_ID, "status": "SUCCEEDED" }] } self.batch._wait_for_task_ended() client_mock.describe_jobs.assert_called_once_with(jobs=[JOB_ID]) @mock.patch("airflow.contrib.operators.awsbatch_operator.randint") def test_poll_job_status_running(self, mock_randint): client_mock = mock.Mock() self.batch.jobId = JOB_ID self.batch.client = client_mock mock_randint.return_value = 0 # don't pause in unit tests client_mock.get_waiter.return_value.wait.side_effect = ValueError() client_mock.describe_jobs.return_value = { "jobs": [{ "jobId": JOB_ID, "status": "RUNNING" }] } self.batch._wait_for_task_ended() # self.assertEqual(client_mock.describe_jobs.call_count, self.STATUS_RETRIES) client_mock.describe_jobs.assert_called_with(jobs=[JOB_ID]) self.assertEqual(client_mock.describe_jobs.call_count, self.MAX_RETRIES) @mock.patch("airflow.contrib.operators.awsbatch_operator.randint") def test_poll_job_status_hit_api_throttle(self, mock_randint): client_mock = mock.Mock() self.batch.jobId = JOB_ID self.batch.client = client_mock mock_randint.return_value = 0 # don't pause in unit tests client_mock.describe_jobs.side_effect = botocore.exceptions.ClientError( error_response={"Error": { "Code": "TooManyRequestsException" }}, operation_name="get job description", ) with self.assertRaises(Exception) as e: self.batch._poll_for_task_ended() self.assertIn("Failed to get job description", str(e.exception)) client_mock.describe_jobs.assert_called_with(jobs=[JOB_ID]) self.assertEqual(client_mock.describe_jobs.call_count, self.STATUS_RETRIES) def test_check_success_tasks_raises(self): client_mock = mock.Mock() self.batch.jobId = JOB_ID self.batch.client = client_mock client_mock.describe_jobs.return_value = {"jobs": []} with self.assertRaises(Exception) as e: self.batch._check_success_task() # Ordering of str(dict) is not guaranteed. self.assertIn("Failed to get job description", str(e.exception)) def test_check_success_tasks_raises_failed(self): client_mock = mock.Mock() self.batch.jobId = JOB_ID self.batch.client = client_mock client_mock.describe_jobs.return_value = { "jobs": [{ "jobId": JOB_ID, "status": "FAILED", "statusReason": "This is an error reason", "attempts": [{ "exitCode": 1 }], }] } with self.assertRaises(Exception) as e: self.batch._check_success_task() # Ordering of str(dict) is not guaranteed. self.assertIn("Job ({}) failed with status ".format(JOB_ID), str(e.exception)) def test_check_success_tasks_raises_pending(self): client_mock = mock.Mock() self.batch.jobId = JOB_ID self.batch.client = client_mock client_mock.describe_jobs.return_value = { "jobs": [{ "jobId": JOB_ID, "status": "RUNNABLE" }] } with self.assertRaises(Exception) as e: self.batch._check_success_task() # Ordering of str(dict) is not guaranteed. self.assertIn("Job ({}) is still pending".format(JOB_ID), str(e.exception)) def test_check_success_tasks_raises_multiple(self): client_mock = mock.Mock() self.batch.jobId = JOB_ID self.batch.client = client_mock client_mock.describe_jobs.return_value = { "jobs": [{ "jobId": JOB_ID, "status": "FAILED", "statusReason": "This is an error reason", "attempts": [{ "exitCode": 1 }, { "exitCode": 10 }], }] } with self.assertRaises(Exception) as e: self.batch._check_success_task() # Ordering of str(dict) is not guaranteed. self.assertIn("Job ({}) failed with status ".format(JOB_ID), str(e.exception)) def test_check_success_task_not_raises(self): client_mock = mock.Mock() self.batch.jobId = JOB_ID self.batch.client = client_mock client_mock.describe_jobs.return_value = { "jobs": [{ "jobId": JOB_ID, "status": "SUCCEEDED" }] } self.batch._check_success_task() # Ordering of str(dict) is not guaranteed. client_mock.describe_jobs.assert_called_once_with(jobs=[JOB_ID]) def test_check_success_task_raises_without_jobs(self): client_mock = mock.Mock() self.batch.jobId = JOB_ID self.batch.client = client_mock client_mock.describe_jobs.return_value = {"jobs": []} with self.assertRaises(Exception) as e: self.batch._check_success_task() client_mock.describe_jobs.assert_called_with(jobs=[JOB_ID]) self.assertEqual(client_mock.describe_jobs.call_count, self.STATUS_RETRIES) self.assertIn("Failed to get job description", str(e.exception)) def test_kill_job(self): client_mock = mock.Mock() self.batch.jobId = JOB_ID self.batch.client = client_mock client_mock.terminate_job.return_value = {} self.batch.on_kill() client_mock.terminate_job.assert_called_once_with( jobId=JOB_ID, reason="Task killed by the user")
) check_pinkman_result = EmrJobFlowSensor( task_id='check_pinkman_result', job_flow_id= "{{ task_instance.xcom_pull(task_ids='start_pinkman', key='return_value') }}", aws_conn_id='aws_default', dag=dag, ) run_walter_white = AWSBatchOperator( task_id='run_walter-white', job_name='walter-white', job_queue=os.getenv('COMPUTE_ENVIRONMENT_JOB_QUEUE'), job_definition=os.getenv('WALTER_WHITE_JOB_DEFINITION'), aws_conn_id='aws_default', region_name='eu-central-1', overrides={ 'environment': [ { 'name': 'MLFLOW_TRACKING_URI', 'value': os.getenv("MLFLOW_TRACKING_URI") }, ], }, parameters={}, dag=dag, ) start_pinkman >> check_pinkman_result >> run_walter_white
'AWS_SECRET_ACCESS_KEY': f"{{{{ ti.xcom_pull(task_ids='get_credentials_for_s3_task_live', key='SecretAccessKey') }}}}", 'TOKEN': f"{{{{ ti.xcom_pull(task_ids='get_credentials_for_s3_task_live', key='SessionToken') }}}}" }, dag=dag) command = { 'command': [ '/usr/local/bin/run-pan-bbc-churn-predictions.sh', ] } aws_job_submission = AWSBatchOperator( task_id='aws-batch-job-submission', job_name='airflow-job-submission-and-run-' + datetime.today().strftime('%Y-%m-%d'), job_definition='live-Airflow-Pan-BBC-Churn-Classification-final', job_queue='live-Airflow-Pan-BBC-Churn--JobQueue', overrides=command, aws_conn_id='aws_default', dag=dag) ## Copy shap values and propensity scores to historical storage last_monday = date.today() + relativedelta(weekday=TU(-2)) historical_storage = 'historical-storage' s3_target_location = f"{historical_storage}/week_start={last_monday}/" ''' export AWS_DEFAULT_REGION=eu-west-1 ENV=${ENV:-int} current_date=`python get_current_week_start.py`
task_id='nav-master_sync', python_callable=execute_from_file, op_kwargs={ 'connection_name': 'nav_master_db', 'query': 'exec [Boutiqaat_Live].[dbo].[USP_GetItemStock]' }, dag=dag, ) aoi_consolidated_sku_stock = AWSBatchOperator( task_id='aoi-consolidated-sku-stock-1', job_name='aoi_consolidated_sku_stock', job_queue='batch-job-queue-b', job_definition= 'arn:aws:batch:eu-west-1:652586300051:job-definition/boutiqaat-etl-jobs-airflow:1', max_retries=4200, overrides={'command': ['make -C AOI consolidated_sku_stock']}, aws_conn_id=None, array_properties={}, parameters={}, status_retries=10, region_name='eu-west-1', dag=(dag)) nav_sku_master = AWSBatchOperator( task_id='nav-sku-master-full-1', job_name='nav_sku_master_full', job_queue='batch-job-queue-b', job_definition= 'arn:aws:batch:eu-west-1:652586300051:job-definition/boutiqaat-etl-jobs-airflow:1', max_retries=4200, overrides={'command': ['make -C NAV nav_sku_master_full']},
# overrides={'command': ['make -C magento sku_master']}, # aws_conn_id=None, # array_properties={}, # parameters={}, # status_retries=10, # region_name='eu-west-1', # dag=(dag)) start = DummyOperator(task_id='start', dag=dag) magento_sku_master = AWSBatchOperator( task_id='redshift-magento-sku-master-1', job_name='redshift_magento_sku_master', job_queue='batch-job-queue-b', job_definition= 'arn:aws:batch:eu-west-1:652586300051:job-definition/boutiqaat-etl-jobs-airflow:1', max_retries=4200, overrides={'command': ['make -C redshift magento_sku_master']}, aws_conn_id=None, array_properties={}, parameters={}, status_retries=10, region_name='eu-west-1', dag=(dag)) sku_status_history = AWSBatchOperator( task_id='magento-sku-status-history-1', job_name='magento_sku_status_history', job_queue='batch-job-queue-b', job_definition= 'arn:aws:batch:eu-west-1:652586300051:job-definition/boutiqaat-etl-jobs-airflow:1', max_retries=4200, overrides={'command': ['make -C magento sku_status_history']},
'retry_delay': timedelta(minutes=5), } dag = DAG( dag_id='AOI_ORDERS_INCREMENTAL', default_args=default_args, catchup=False, schedule_interval='*/5 3-16 * * *', dagrun_timeout=timedelta(minutes=60), tags=['AOI']) # t1, t2, t3 and t4 are examples of tasks created using operators try: t2 = AWSBatchOperator( task_id='aoi-order_details_incremental-1', job_name='aoi_order_details_incremental', job_queue='batch-job-queue-b', job_definition='arn:aws:batch:eu-west-1:652586300051:job-definition/boutiqaat-etl-jobs-airflow:1', max_retries=4200, overrides={'command': ['make -C AOI order_details_lastmodified']}, aws_conn_id=None, array_properties={}, parameters={}, status_retries=10, region_name='eu-west-1', dag=(dag)) except Exception as e: t2.log.info(e)
}, { 'name': 'MAP_NAME', 'value': 'massachusetts-latest' }, { 'name': 'BATCH_AWS_SECRET_ACCESS_KEY', 'value': 'DI9SPRWYLxNvhQJdISydNmD2xczoUIKp0olHYON6' }, ], } dag = DAG("cch_massachusetts", default_args=default_args, catchup=False, schedule_interval="30 1 * * *",) t1 = AWSBatchOperator(task_id="submit_cch", job_name=(jobNamePrefix + datetime.now().strftime('%Y%m%d%H%M')), job_definition=jobDefinition, job_queue=jobQueue, overrides=containerOverrides, aws_conn_id="aws_mobi", region_name="us-east-1", dag=dag);
# catalog_product_flat_1 disabled_skus eav_attribute eav_attribute_option_swatch sales_order sales_order_item magento_customerbalance magento_customerbalance_history celebrity_am_log magento_product_catalog sku_brands sku_categories customer_demographic_info notify_out_of_stock order_gen_mapped try: start = DummyOperator(task_id='start', dag=dag) end = DummyOperator(task_id='end', dag=dag) pg_report_incremental = AWSBatchOperator( task_id='magento-pg_report_incremental', job_name='magento_pg_report_incremental', job_queue='batch-job-queue-b', job_definition= 'arn:aws:batch:eu-west-1:652586300051:job-definition/boutiqaat-etl-jobs-airflow:1', max_retries=4200, overrides={ 'command': ['make -C ML payment_gateway_report_incremental'] }, aws_conn_id=None, array_properties={}, parameters={}, status_retries=10, region_name='eu-west-1', dag=(dag)) start >> [pg_report_incremental] >> end """ order_gen_mapped = AWSBatchOperator( task_id='magento-order_gen_mapped_incremental', job_name='magento_order_gen_mapped_incremental', job_queue='batch-job-queue-b', job_definition='arn:aws:batch:eu-west-1:652586300051:job-definition/boutiqaat-etl-jobs-airflow:1', max_retries=4200,
dag = DAG(dag_id='REDSHIFT_TASKS', default_args=default_args, catchup=False, schedule_interval=None, dagrun_timeout=timedelta(minutes=60), tags=['REDSHIFT']) try: sku_live_status_report = AWSBatchOperator( task_id='redshift-sku_live_status_report', job_name='redshift_sku_live_status_report', job_queue='batch-job-queue-b', job_definition= 'arn:aws:batch:eu-west-1:652586300051:job-definition/boutiqaat-etl-jobs-airflow:1', max_retries=4200, overrides={'command': ['make -C redshift sku_live_status_report']}, aws_conn_id=None, array_properties={}, parameters={}, status_retries=10, region_name='eu-west-1', dag=(dag)) brand_performance = AWSBatchOperator( task_id='redshift-brand_performance', job_name='redshift_brand_performance', job_queue='batch-job-queue-b', job_definition= 'arn:aws:batch:eu-west-1:652586300051:job-definition/boutiqaat-etl-jobs-airflow:1', max_retries=4200, overrides={'command': ['make -C redshift brand_performance']},
class TestAWSBatchOperator(unittest.TestCase): @mock.patch('airflow.contrib.operators.awsbatch_operator.AwsHook') def setUp(self, aws_hook_mock): configuration.load_test_config() self.aws_hook_mock = aws_hook_mock self.batch = AWSBatchOperator( task_id='task', job_name='51455483-c62c-48ac-9b88-53a6a725baa3', job_queue='queue', job_definition='hello-world', max_retries=5, overrides={}, aws_conn_id=None, region_name='eu-west-1') def test_init(self): self.assertEqual(self.batch.job_name, '51455483-c62c-48ac-9b88-53a6a725baa3') self.assertEqual(self.batch.job_queue, 'queue') self.assertEqual(self.batch.job_definition, 'hello-world') self.assertEqual(self.batch.max_retries, 5) self.assertEqual(self.batch.overrides, {}) self.assertEqual(self.batch.region_name, 'eu-west-1') self.assertEqual(self.batch.aws_conn_id, None) self.assertEqual(self.batch.hook, self.aws_hook_mock.return_value) self.aws_hook_mock.assert_called_once_with(aws_conn_id=None) def test_template_fields_overrides(self): self.assertEqual(self.batch.template_fields, ( 'job_name', 'overrides', )) @mock.patch.object(AWSBatchOperator, '_wait_for_task_ended') @mock.patch.object(AWSBatchOperator, '_check_success_task') def test_execute_without_failures(self, check_mock, wait_mock): client_mock = self.aws_hook_mock.return_value.get_client_type.return_value client_mock.submit_job.return_value = RESPONSE_WITHOUT_FAILURES self.batch.execute(None) self.aws_hook_mock.return_value.get_client_type.assert_called_once_with( 'batch', region_name='eu-west-1') client_mock.submit_job.assert_called_once_with( jobQueue='queue', jobName='51455483-c62c-48ac-9b88-53a6a725baa3', containerOverrides={}, jobDefinition='hello-world') wait_mock.assert_called_once_with() check_mock.assert_called_once_with() self.assertEqual(self.batch.jobId, '8ba9d676-4108-4474-9dca-8bbac1da9b19') def test_execute_with_failures(self): client_mock = self.aws_hook_mock.return_value.get_client_type.return_value client_mock.submit_job.return_value = "" with self.assertRaises(AirflowException): self.batch.execute(None) self.aws_hook_mock.return_value.get_client_type.assert_called_once_with( 'batch', region_name='eu-west-1') client_mock.submit_job.assert_called_once_with( jobQueue='queue', jobName='51455483-c62c-48ac-9b88-53a6a725baa3', containerOverrides={}, jobDefinition='hello-world') def test_wait_end_tasks(self): client_mock = mock.Mock() self.batch.jobId = '8ba9d676-4108-4474-9dca-8bbac1da9b19' self.batch.client = client_mock self.batch._wait_for_task_ended() client_mock.get_waiter.assert_called_once_with( 'job_execution_complete') client_mock.get_waiter.return_value.wait.assert_called_once_with( jobs=['8ba9d676-4108-4474-9dca-8bbac1da9b19']) self.assertEquals( sys.maxsize, client_mock.get_waiter.return_value.config.max_attempts) def test_check_success_tasks_raises(self): client_mock = mock.Mock() self.batch.jobId = '8ba9d676-4108-4474-9dca-8bbac1da9b19' self.batch.client = client_mock client_mock.describe_jobs.return_value = {'jobs': []} with self.assertRaises(Exception) as e: self.batch._check_success_task() # Ordering of str(dict) is not guaranteed. self.assertIn('No job found for ', str(e.exception)) def test_check_success_tasks_raises_failed(self): client_mock = mock.Mock() self.batch.jobId = '8ba9d676-4108-4474-9dca-8bbac1da9b19' self.batch.client = client_mock client_mock.describe_jobs.return_value = { 'jobs': [{ 'status': 'FAILED', 'statusReason': 'This is an error reason', 'attempts': [{ 'exitCode': 1 }] }] } with self.assertRaises(Exception) as e: self.batch._check_success_task() # Ordering of str(dict) is not guaranteed. self.assertIn('Job failed with status ', str(e.exception)) def test_check_success_tasks_raises_pending(self): client_mock = mock.Mock() self.batch.jobId = '8ba9d676-4108-4474-9dca-8bbac1da9b19' self.batch.client = client_mock client_mock.describe_jobs.return_value = { 'jobs': [{ 'status': 'RUNNABLE' }] } with self.assertRaises(Exception) as e: self.batch._check_success_task() # Ordering of str(dict) is not guaranteed. self.assertIn('This task is still pending ', str(e.exception)) def test_check_success_tasks_raises_multiple(self): client_mock = mock.Mock() self.batch.jobId = '8ba9d676-4108-4474-9dca-8bbac1da9b19' self.batch.client = client_mock client_mock.describe_jobs.return_value = { 'jobs': [{ 'status': 'FAILED', 'statusReason': 'This is an error reason', 'attempts': [{ 'exitCode': 1 }, { 'exitCode': 10 }] }] } with self.assertRaises(Exception) as e: self.batch._check_success_task() # Ordering of str(dict) is not guaranteed. self.assertIn('Job failed with status ', str(e.exception)) def test_check_success_task_not_raises(self): client_mock = mock.Mock() self.batch.jobId = '8ba9d676-4108-4474-9dca-8bbac1da9b19' self.batch.client = client_mock client_mock.describe_jobs.return_value = { 'jobs': [{ 'status': 'SUCCEEDED' }] } self.batch._check_success_task() # Ordering of str(dict) is not guaranteed. client_mock.describe_jobs.assert_called_once_with( jobs=['8ba9d676-4108-4474-9dca-8bbac1da9b19'])
# task_id='flex_maybe_spawn', # provide_context=True, # python_callable=flex_maybe_spawn, # op_kwargs={ # 'crypt_auth_tok' : utils.encrypt_tok(airflow_conf.as_dict() # ['connections']['APP_CLIENT_SECRET']).decode(), # } # ) my_conn_id = 'aws_default' my_overrides = { } # vcpus, memory, command, instanceType, environment, resourceRequirements my_useless = { 'region_name': 'us-east-1', 'signature_version': 'v4', 'retries': { 'max_attempts': 10, 'mode': 'standard' } } my_params = {} batch_task = AWSBatchOperator(task_id='aws_batch_task', job_name='test-airflow-submission', job_definition='test-job-definition', job_queue='test-queue', overrides=my_overrides, parameters=my_params, aws_conn_id=my_conn_id) dag >> batch_task