def test_execute_copy_single_file(self, sftp_hook, gcs_hook): task = GCSToSFTPOperator( task_id=TASK_ID, source_bucket=TEST_BUCKET, source_object=SOURCE_OBJECT_NO_WILDCARD, destination_path=DESTINATION_SFTP, move_object=False, gcp_conn_id=GCP_CONN_ID, sftp_conn_id=SFTP_CONN_ID, delegate_to=DELEGATE_TO, impersonation_chain=IMPERSONATION_CHAIN, ) task.execute({}) gcs_hook.assert_called_once_with( gcp_conn_id=GCP_CONN_ID, delegate_to=DELEGATE_TO, impersonation_chain=IMPERSONATION_CHAIN, ) sftp_hook.assert_called_once_with(SFTP_CONN_ID) args, kwargs = gcs_hook.return_value.download.call_args assert kwargs["bucket_name"] == TEST_BUCKET assert kwargs["object_name"] == SOURCE_OBJECT_NO_WILDCARD args, kwargs = sftp_hook.return_value.store_file.call_args assert args[0] == os.path.join(DESTINATION_SFTP, SOURCE_OBJECT_NO_WILDCARD) gcs_hook.return_value.delete.assert_not_called()
def test_execute_move_single_file(self, sftp_hook, gcs_hook): task = GCSToSFTPOperator( task_id=TASK_ID, source_bucket=TEST_BUCKET, source_object=SOURCE_OBJECT_NO_WILDCARD, destination_path=DESTINATION_SFTP, move_object=True, gcp_conn_id=GCP_CONN_ID, sftp_conn_id=SFTP_CONN_ID, delegate_to=DELEGATE_TO, ) task.execute(None) gcs_hook.assert_called_once_with(gcp_conn_id=GCP_CONN_ID, delegate_to=DELEGATE_TO) sftp_hook.assert_called_once_with(SFTP_CONN_ID) args, kwargs = gcs_hook.return_value.download.call_args self.assertEqual(kwargs["bucket_name"], TEST_BUCKET) self.assertEqual(kwargs["object_name"], SOURCE_OBJECT_NO_WILDCARD) args, kwargs = sftp_hook.return_value.store_file.call_args self.assertEqual( args[0], os.path.join(DESTINATION_SFTP, SOURCE_OBJECT_NO_WILDCARD)) gcs_hook.return_value.delete.assert_called_once_with( TEST_BUCKET, SOURCE_OBJECT_NO_WILDCARD)
def test_execute_move_single_file( self, source_object, target_object, keep_directory_structure, sftp_hook_mock, gcs_hook_mock ): task = GCSToSFTPOperator( task_id=TASK_ID, source_bucket=TEST_BUCKET, source_object=source_object, destination_path=DESTINATION_SFTP, keep_directory_structure=keep_directory_structure, move_object=True, gcp_conn_id=GCP_CONN_ID, sftp_conn_id=SFTP_CONN_ID, delegate_to=DELEGATE_TO, impersonation_chain=IMPERSONATION_CHAIN, ) task.execute(None) gcs_hook_mock.assert_called_once_with( gcp_conn_id=GCP_CONN_ID, delegate_to=DELEGATE_TO, impersonation_chain=IMPERSONATION_CHAIN, ) sftp_hook_mock.assert_called_once_with(SFTP_CONN_ID) gcs_hook_mock.return_value.download.assert_called_with( bucket_name=TEST_BUCKET, object_name=source_object, filename=mock.ANY ) sftp_hook_mock.return_value.store_file.assert_called_with( os.path.join(DESTINATION_SFTP, target_object), mock.ANY ) gcs_hook_mock.return_value.delete.assert_called_once_with(TEST_BUCKET, source_object)
def test_execute_more_than_one_wildcard_exception(self, sftp_hook_mock, gcs_hook_mock): operator = GCSToSFTPOperator( task_id=TASK_ID, source_bucket=TEST_BUCKET, source_object="csv/*/test_*.csv", destination_path=DESTINATION_SFTP, move_object=False, gcp_conn_id=GCP_CONN_ID, sftp_conn_id=SFTP_CONN_ID, delegate_to=DELEGATE_TO, ) with pytest.raises(AirflowException): operator.execute(None)
def test_execute_more_than_one_wildcard_exception(self, sftp_hook, gcs_hook): gcs_hook.return_value.list.return_value = SOURCE_FILES_LIST[:2] operator = GCSToSFTPOperator( task_id=TASK_ID, source_bucket=TEST_BUCKET, source_object=SOURCE_OBJECT_MULTIPLE_WILDCARDS, destination_path=DESTINATION_SFTP, move_object=False, gcp_conn_id=GCP_CONN_ID, sftp_conn_id=SFTP_CONN_ID, delegate_to=DELEGATE_TO, ) with pytest.raises(AirflowException): operator.execute(None)
def test_execute_move_with_wildcard(self, sftp_hook, gcs_hook): gcs_hook.return_value.list.return_value = SOURCE_FILES_LIST[:2] operator = GCSToSFTPOperator( task_id=TASK_ID, source_bucket=TEST_BUCKET, source_object=SOURCE_OBJECT_WILDCARD_FILENAME, destination_path=DESTINATION_SFTP, move_object=True, gcp_conn_id=GCP_CONN_ID, sftp_conn_id=SFTP_CONN_ID, delegate_to=DELEGATE_TO, ) operator.execute(None) gcs_hook.return_value.list.assert_called_with(TEST_BUCKET, delimiter=".txt", prefix="test_object") call_one, call_two = gcs_hook.return_value.delete.call_args_list assert call_one[0] == (TEST_BUCKET, "test_object/file1.txt") assert call_two[0] == (TEST_BUCKET, "test_object/file2.txt")
def test_execute_move_with_wildcard( self, source_object, prefix, delimiter, gcs_files_list, target_objects, keep_directory_structure, sftp_hook_mock, gcs_hook_mock, ): gcs_hook_mock.return_value.list.return_value = gcs_files_list operator = GCSToSFTPOperator( task_id=TASK_ID, source_bucket=TEST_BUCKET, source_object=source_object, destination_path=DESTINATION_SFTP, keep_directory_structure=keep_directory_structure, move_object=True, gcp_conn_id=GCP_CONN_ID, sftp_conn_id=SFTP_CONN_ID, delegate_to=DELEGATE_TO, ) operator.execute(None) gcs_hook_mock.return_value.list.assert_called_with(TEST_BUCKET, delimiter=delimiter, prefix=prefix) gcs_hook_mock.return_value.download.assert_has_calls( [ mock.call(bucket_name=TEST_BUCKET, object_name=gcs_file, filename=mock.ANY) for gcs_file in gcs_files_list ] ) sftp_hook_mock.return_value.store_file.assert_has_calls( [ mock.call(os.path.join(DESTINATION_SFTP, target_object), mock.ANY) for target_object in target_objects ] ) gcs_hook_mock.return_value.delete.assert_has_calls( [mock.call(TEST_BUCKET, gcs_file) for gcs_file in gcs_files_list] )
OBJECT_SRC_1 = "parent-1.bin" OBJECT_SRC_2 = "dir-1/parent-2.bin" OBJECT_SRC_3 = "dir-2/*" DESTINATION_PATH_1 = "/tmp/single-file/" DESTINATION_PATH_2 = "/tmp/dest-dir-1/" DESTINATION_PATH_3 = "/tmp/dest-dir-2/" with models.DAG("example_gcs_to_sftp", start_date=days_ago(1), schedule_interval=None, tags=['example']) as dag: # [START howto_operator_gcs_to_sftp_copy_single_file] copy_file_from_gcs_to_sftp = GCSToSFTPOperator( task_id="file-copy-gsc-to-sftp", sftp_conn_id=SFTP_CONN_ID, source_bucket=BUCKET_SRC, source_object=OBJECT_SRC_1, destination_path=DESTINATION_PATH_1, ) # [END howto_operator_gcs_to_sftp_copy_single_file] check_copy_file_from_gcs_to_sftp = SFTPSensor( task_id="check-file-copy-gsc-to-sftp", sftp_conn_id=SFTP_CONN_ID, timeout=60, path=os.path.join(DESTINATION_PATH_1, OBJECT_SRC_1), ) # [START howto_operator_gcs_to_sftp_move_single_file_destination] move_file_from_gcs_to_sftp = GCSToSFTPOperator( task_id="file-move-gsc-to-sftp",
BUCKET_SRC = os.environ.get("GCP_GCS_BUCKET_1_SRC", "test-gcs-sftp") OBJECT_SRC_1 = "parent-1.bin" OBJECT_SRC_2 = "parent-2.bin" OBJECT_SRC_3 = "subdir-1/*" DESTINATION_PATH_1 = "/tmp/single-file/" DESTINATION_PATH_2 = "/tmp/dirs/" with models.DAG("example_gcs_to_sftp", default_args=default_args, schedule_interval=None, tags=['example']) as dag: # [START howto_operator_gcs_to_sftp_copy_single_file] copy_file_from_gcs_to_sftp = GCSToSFTPOperator( task_id="file-copy-gsc-to-sftp", source_bucket=BUCKET_SRC, source_object=OBJECT_SRC_1, destination_path=DESTINATION_PATH_1, ) # [END howto_operator_gcs_to_sftp_copy_single_file] # [START howto_operator_gcs_to_sftp_move_single_file_destination] move_file_from_gcs_to_sftp = GCSToSFTPOperator( task_id="file-move-gsc-to-sftp", source_bucket=BUCKET_SRC, source_object=OBJECT_SRC_2, destination_path=DESTINATION_PATH_1, move_object=True, ) # [END howto_operator_gcs_to_sftp_move_single_file_destination] # [START howto_operator_gcs_to_sftp_copy_directory]