def test_execute(self, gcs_mock_hook, s3_one_mock_hook, s3_two_mock_hook): """Test the execute function when the run is successful.""" operator = S3ToGCSOperator(task_id=TASK_ID, bucket=S3_BUCKET, prefix=S3_PREFIX, delimiter=S3_DELIMITER, dest_gcs_conn_id=GCS_CONN_ID, dest_gcs=GCS_PATH_PREFIX) s3_one_mock_hook.return_value.list_keys.return_value = MOCK_FILES s3_two_mock_hook.return_value.list_keys.return_value = MOCK_FILES uploaded_files = operator.execute(None) gcs_mock_hook.return_value.upload.assert_has_calls([ mock.call('gcs-bucket', 'data/TEST1.csv', mock.ANY, gzip=False), mock.call('gcs-bucket', 'data/TEST3.csv', mock.ANY, gzip=False), mock.call('gcs-bucket', 'data/TEST2.csv', mock.ANY, gzip=False) ], any_order=True) s3_one_mock_hook.assert_called_once_with(aws_conn_id=AWS_CONN_ID, verify=None) s3_two_mock_hook.assert_called_once_with(aws_conn_id=AWS_CONN_ID, verify=None) gcs_mock_hook.assert_called_once_with( google_cloud_storage_conn_id=GCS_CONN_ID, delegate_to=None) # we expect MOCK_FILES to be uploaded self.assertEqual(sorted(MOCK_FILES), sorted(uploaded_files))
def test_execute_with_gzip(self, gcs_mock_hook, s3_one_mock_hook, s3_two_mock_hook): """Test the execute function when the run is successful.""" operator = S3ToGCSOperator( task_id=TASK_ID, bucket=S3_BUCKET, prefix=S3_PREFIX, delimiter=S3_DELIMITER, dest_gcs_conn_id=GCS_CONN_ID, dest_gcs=GCS_PATH_PREFIX, gzip=True, ) s3_one_mock_hook.return_value.list_keys.return_value = MOCK_FILES s3_two_mock_hook.return_value.list_keys.return_value = MOCK_FILES operator.execute(None) gcs_mock_hook.assert_called_once_with( google_cloud_storage_conn_id=GCS_CONN_ID, delegate_to=None, impersonation_chain=None, ) gcs_mock_hook.return_value.upload.assert_has_calls( [ mock.call('gcs-bucket', 'data/TEST2.csv', mock.ANY, gzip=True), mock.call('gcs-bucket', 'data/TEST1.csv', mock.ANY, gzip=True), mock.call('gcs-bucket', 'data/TEST3.csv', mock.ANY, gzip=True), ], any_order=True, )
def test_init(self): """Test S3ToGCSOperator instance is properly initialized.""" operator = S3ToGCSOperator(task_id=TASK_ID, bucket=S3_BUCKET, prefix=S3_PREFIX, delimiter=S3_DELIMITER, gcp_conn_id=GCS_CONN_ID, dest_gcs=GCS_PATH_PREFIX) self.assertEqual(operator.task_id, TASK_ID) self.assertEqual(operator.bucket, S3_BUCKET) self.assertEqual(operator.prefix, S3_PREFIX) self.assertEqual(operator.delimiter, S3_DELIMITER) self.assertEqual(operator.gcp_conn_id, GCS_CONN_ID) self.assertEqual(operator.dest_gcs, GCS_PATH_PREFIX)
def test_init(self): """Test S3ToGCSOperator instance is properly initialized.""" operator = S3ToGCSOperator( task_id=TASK_ID, bucket=S3_BUCKET, prefix=S3_PREFIX, delimiter=S3_DELIMITER, gcp_conn_id=GCS_CONN_ID, dest_gcs=GCS_PATH_PREFIX, google_impersonation_chain=IMPERSONATION_CHAIN, ) assert operator.task_id == TASK_ID assert operator.bucket == S3_BUCKET assert operator.prefix == S3_PREFIX assert operator.delimiter == S3_DELIMITER assert operator.gcp_conn_id == GCS_CONN_ID assert operator.dest_gcs == GCS_PATH_PREFIX assert operator.google_impersonation_chain == IMPERSONATION_CHAIN
schedule_interval='@once', start_date=datetime(2021, 1, 1), catchup=False, tags=['example'], ) as dag: create_s3_bucket = S3CreateBucketOperator(task_id="create_s3_bucket", bucket_name=S3BUCKET_NAME, region_name='us-east-1') create_gcs_bucket = GCSCreateBucketOperator( task_id="create_bucket", bucket_name=GCS_BUCKET, project_id=GCP_PROJECT_ID, ) # [START howto_transfer_s3togcs_operator] transfer_to_gcs = S3ToGCSOperator(task_id='s3_to_gcs_task', bucket=S3BUCKET_NAME, prefix=PREFIX, dest_gcs=GCS_BUCKET_URL) # [END howto_transfer_s3togcs_operator] delete_s3_bucket = S3DeleteBucketOperator(task_id='delete_s3_bucket', bucket_name=S3BUCKET_NAME, force_delete=True) delete_gcs_bucket = GCSDeleteBucketOperator(task_id='delete_gcs_bucket', bucket_name=GCS_BUCKET) (create_s3_bucket >> upload_file() >> create_gcs_bucket >> transfer_to_gcs >> delete_s3_bucket >> delete_gcs_bucket)