def test_execute(self, mock_hook): mock_hook.return_value.list.return_value = MOCK_FILES operator = GCSListObjectsOperator( task_id=TASK_ID, bucket=TEST_BUCKET, prefix=PREFIX, delimiter=DELIMITER ) files = operator.execute(None) mock_hook.return_value.list.assert_called_once_with( bucket_name=TEST_BUCKET, prefix=PREFIX, delimiter=DELIMITER ) self.assertEqual(sorted(files), sorted(MOCK_FILES))
BUCKET_FILE_LOCATION = PATH_TO_UPLOAD_FILE.rpartition("/")[-1] with models.DAG( "example_gcs", default_args=default_args, schedule_interval=None, tags=['example'], ) as dag: create_bucket1 = GCSCreateBucketOperator( task_id="create_bucket1", bucket_name=BUCKET_1, project_id=PROJECT_ID ) create_bucket2 = GCSCreateBucketOperator( task_id="create_bucket2", bucket_name=BUCKET_2, project_id=PROJECT_ID ) list_buckets = GCSListObjectsOperator( task_id="list_buckets", bucket=BUCKET_1 ) list_buckets_result = BashOperator( task_id="list_buckets_result", bash_command="echo \"{{ task_instance.xcom_pull('list_buckets') }}\"", ) upload_file = LocalFilesystemToGCSOperator( task_id="upload_file", src=PATH_TO_UPLOAD_FILE, dst=BUCKET_FILE_LOCATION, bucket=BUCKET_1, ) transform_file = GCSFileTransformOperator(
# Assumes existence of Airflow Variable set to name of GCP Project PROJECT_ID = models.Variable.get("gcp_project") with models.DAG( "example_gcs", start_date=days_ago(1), schedule_interval=None, ) as dag: generate_uuid = PythonOperator(task_id='generate_uuid', python_callable=lambda: str(uuid.uuid4())) create_bucket = GCSCreateBucketOperator( task_id="create_bucket", bucket_name="{{ task_instance.xcom_pull('generate_uuid') }}", project_id=PROJECT_ID) list_objects = GCSListObjectsOperator( task_id="list_objects", bucket="{{ task_instance.xcom_pull('generate_uuid') }}") list_buckets_result = BashOperator( task_id="list_buckets_result", bash_command="echo \"{{ task_instance.xcom_pull('list_objects') }}\"", ) delete_bucket = GCSDeleteBucketOperator( task_id="delete_bucket", bucket_name="{{ task_instance.xcom_pull('generate_uuid') }}") generate_uuid >> create_bucket >> list_objects >> list_buckets_result >> delete_bucket if __name__ == "__main__": dag.clear(dag_run_state=State.NONE) dag.run()
if logs: return 'branch_a' return 'branch_b' with DAG( 'tacc_branch_test', default_args=default_args, description='Test task: branch', schedule_interval='@once' ) as dag: get_gcs_object_list = GCSListObjectsOperator( task_id='get_gcs_object_list', bucket='your-bucket-name', prefix='your-path', delimiter='.log' ) branching = BranchPythonOperator( task_id='branching', python_callable=_check_if_log_exists ) branch_a = DummyOperator( task_id='branch_a' ) branch_b = DummyOperator( task_id='branch_b' )
logs = ti.xcom_pull(task_ids=['check_gcs_logs'], key="return_value")[0] if logs: # return ['create_dataproc', 'initial_ato_sn_calc_final', 'initial_ato_sn_calc_daily'] return ['initial_ato_sn_calc_final', 'initial_ato_sn_calc_daily'] return 'branch_b' with DAG('etl', default_args=default_args, description='etl', schedule_interval='36 17 * * *') as dag: check_gcs_logs = GCSListObjectsOperator( task_id='check_gcs_logs', bucket=bucket_config['upload_data'], prefix='logs/{{ ds_nodash }}', delimiter='.log', gcp_conn_id=gcp_config['conn_id']) branching = BranchPythonOperator(task_id='branching', python_callable=_check_if_log_exists) branch_b = DummyOperator(task_id='branch_b') # create_dataproc = DataprocCreateClusterOperator( # task_id="create_dataproc", # project_id=gcp_config["project_id"], # cluster_name=dataproc_config["cluster_name"], # region=gcp_config["region"], # cluster_config=cluster_config, # gcp_conn_id=gcp_config["conn_id"]