def test_execute_move_single_file(self, sftp_hook, gcs_hook): task = SFTPToGCSOperator( task_id=TASK_ID, source_path=SOURCE_OBJECT_NO_WILDCARD, destination_bucket=TEST_BUCKET, destination_path=DESTINATION_PATH_FILE, move_object=True, gcp_conn_id=GCP_CONN_ID, sftp_conn_id=SFTP_CONN_ID, delegate_to=DELEGATE_TO, ) task.execute(None) gcs_hook.assert_called_once_with(gcp_conn_id=GCP_CONN_ID, delegate_to=DELEGATE_TO) sftp_hook.assert_called_once_with(SFTP_CONN_ID) sftp_hook.return_value.retrieve_file.assert_called_once_with( os.path.join(SOURCE_OBJECT_NO_WILDCARD), mock.ANY) gcs_hook.return_value.upload.assert_called_once_with( bucket_name=TEST_BUCKET, object_name=DESTINATION_PATH_FILE, filename=mock.ANY, mime_type=DEFAULT_MIME_TYPE, ) sftp_hook.return_value.delete_file.assert_called_once_with( SOURCE_OBJECT_NO_WILDCARD)
def test_execute_move_with_wildcard(self, sftp_hook, gcs_hook): sftp_hook.return_value.get_tree_map.return_value = [ [ "main_dir/test_object3.json", "main_dir/sub_dir/test_object3.json" ], [], [], ] gcs_hook.return_value.list.return_value = SOURCE_FILES_LIST[:2] task = SFTPToGCSOperator( task_id=TASK_ID, source_path=SOURCE_OBJECT_WILDCARD_FILENAME, destination_bucket=TEST_BUCKET, destination_path=DESTINATION_PATH_DIR, move_object=True, gcp_conn_id=GCP_CONN_ID, sftp_conn_id=SFTP_CONN_ID, delegate_to=DELEGATE_TO, ) task.execute(None) sftp_hook.return_value.delete_file.assert_has_calls([ mock.call("main_dir/test_object3.json"), mock.call("main_dir/sub_dir/test_object3.json"), ])
def test_execute_copy_with_wildcard(self, sftp_hook, gcs_hook): sftp_hook.return_value.get_tree_map.return_value = [ [ "main_dir/test_object3.json", "main_dir/sub_dir/test_object3.json" ], [], [], ] task = SFTPToGCSOperator( task_id=TASK_ID, source_path=SOURCE_OBJECT_WILDCARD_FILENAME, destination_bucket=TEST_BUCKET, destination_path=DESTINATION_PATH_DIR, move_object=True, gcp_conn_id=GCP_CONN_ID, sftp_conn_id=SFTP_CONN_ID, delegate_to=DELEGATE_TO, ) task.execute(None) sftp_hook.return_value.get_tree_map.assert_called_with( "main_dir", prefix="main_dir/test_object", delimiter=".json") sftp_hook.return_value.retrieve_file.assert_has_calls([ mock.call("main_dir/test_object3.json", mock.ANY), mock.call("main_dir/sub_dir/test_object3.json", mock.ANY), ]) gcs_hook.return_value.upload.assert_has_calls([ mock.call( bucket_name=TEST_BUCKET, object_name="destination_dir/test_object3.json", mime_type=DEFAULT_MIME_TYPE, filename=mock.ANY, ), mock.call( bucket_name=TEST_BUCKET, object_name="destination_dir/sub_dir/test_object3.json", mime_type=DEFAULT_MIME_TYPE, filename=mock.ANY, ), ])
def test_execute_more_than_one_wildcard_exception(self, sftp_hook, gcs_hook): task = SFTPToGCSOperator( task_id=TASK_ID, source_path=SOURCE_OBJECT_MULTIPLE_WILDCARDS, destination_bucket=TEST_BUCKET, destination_path=DESTINATION_PATH_FILE, move_object=False, gcp_conn_id=GCP_CONN_ID, sftp_conn_id=SFTP_CONN_ID, delegate_to=DELEGATE_TO, ) with self.assertRaises(AirflowException) as cm: task.execute(None) err = cm.exception self.assertIn( "Only one wildcard '*' is allowed in source_path parameter", str(err))
TMP_PATH = "/tmp" DIR = "tests_sftp_hook_dir" SUBDIR = "subdir" OBJECT_SRC_1 = "parent-1.bin" OBJECT_SRC_2 = "parent-2.bin" OBJECT_SRC_3 = "parent-3.txt" with models.DAG( "example_sftp_to_gcs", default_args=default_args, schedule_interval=None ) as dag: # [START howto_operator_sftp_to_gcs_copy_single_file] copy_file_from_sftp_to_gcs = SFTPToGCSOperator( task_id="file-copy-sftp-to-gcs", source_path=os.path.join(TMP_PATH, DIR, OBJECT_SRC_1), destination_bucket=BUCKET_SRC, ) # [END howto_operator_sftp_to_gcs_copy_single_file] # [START howto_operator_sftp_to_gcs_move_single_file_destination] move_file_from_sftp_to_gcs_destination = SFTPToGCSOperator( task_id="file-move-sftp-to-gcs-destination", source_path=os.path.join(TMP_PATH, DIR, OBJECT_SRC_2), destination_bucket=BUCKET_SRC, destination_path="destination_dir/destination_filename.bin", move_object=True, ) # [END howto_operator_sftp_to_gcs_move_single_file_destination] # [START howto_operator_sftp_to_gcs_copy_directory]