def test_sftp_protocol(sdc_builder, sdc_executor, sftp): """Test SFTP destination. We first create a local file using shell and use that file for SFTP/FTP/FTPS destination stage to see if it gets successfully uploaded. The pipelines look like: directory >> sftp_ftp_client """ # Our destination SFTP/FTP/FTPS file name sftp_ftp_file_name = get_random_string(string.ascii_letters, 10) # Local temporary directory where we will create a source file to be uploaded to SFTP/FTP/FTPS server local_tmp_directory = os.path.join( '~', tempfile.gettempdir(), get_random_string(string.ascii_letters, 10)) local_file_name = f'sdc-{get_random_string(string.ascii_letters, 5)}' raw_data = 'Hello World!' sdc_executor.execute_shell(f'mkdir {local_tmp_directory}/') sdc_executor.execute_shell( f'echo {raw_data} >> {local_tmp_directory}/{local_file_name}') # Build source file pipeline logic builder = sdc_builder.get_pipeline_builder() directory = builder.add_stage('Directory', type='origin') directory.data_format = 'WHOLE_FILE' directory.file_name_pattern = 'sdc*' directory.files_directory = local_tmp_directory sftp_ftp_client = builder.add_stage( name= 'com_streamsets_pipeline_stage_destination_remote_RemoteUploadDTarget') sftp_ftp_client.file_name_expression = sftp_ftp_file_name directory >> sftp_ftp_client sftp_ftp_client.protocol = 'SFTP' sftp_ftp_client_pipeline = builder.build( 'SFTP Destination Pipeline - Protocol').configure_for_environment(sftp) sdc_executor.add_pipeline(sftp_ftp_client_pipeline) # Start SFTP/FTP/FTPS upload (destination) file pipeline and assert pipeline has processed expected number of files sdc_executor.start_pipeline( sftp_ftp_client_pipeline).wait_for_pipeline_output_records_count(1) sdc_executor.stop_pipeline(sftp_ftp_client_pipeline) history = sdc_executor.get_pipeline_history(sftp_ftp_client_pipeline) try: assert history.latest.metrics.counter( 'pipeline.batchInputRecords.counter').count >= 1 assert history.latest.metrics.counter( 'pipeline.batchOutputRecords.counter').count >= 1 assert history.latest.metrics.counter( 'pipeline.batchErrorRecords.counter').count == 0 # Read SFTP destination file and compare our source data to assert assert sftp.get_string(os.path.join( sftp.path, sftp_ftp_file_name)).strip() == raw_data # Delete the test SFTP origin file we created transport, client = sftp.client client.remove(os.path.join(sftp.path, sftp_ftp_file_name)) finally: client.close() transport.close() sdc_executor.execute_shell(f'rm -R {local_tmp_directory}')
def test_sftp_destination(sdc_builder, sdc_executor, sftp): """Smoke test SFTP destination. We first create a local file using Local FS destination stage and use that file for SFTP destination stage to see if it gets successfully uploaded. The pipelines look like: dev_raw_data_source >> local_fs directory >> sftp_ftp_client """ # Our destination SFTP file name sftp_file_name = get_random_string(string.ascii_letters, 10) # Local temporary directory where we will create a source file to be uploaded to SFTP server local_tmp_directory = os.path.join(tempfile.gettempdir(), get_random_string(string.ascii_letters, 10)) # Build source file pipeline logic builder = sdc_builder.get_pipeline_builder() dev_raw_data_source = builder.add_stage('Dev Raw Data Source') dev_raw_data_source.data_format = 'TEXT' dev_raw_data_source.raw_data = 'Hello World!' dev_raw_data_source.stop_after_first_batch = True local_fs = builder.add_stage('Local FS', type='destination') local_fs.directory_template = local_tmp_directory local_fs.data_format = 'TEXT' dev_raw_data_source >> local_fs local_fs_pipeline = builder.build('Local FS Pipeline') builder = sdc_builder.get_pipeline_builder() # Build SFTP destination pipeline logic directory = builder.add_stage('Directory', type='origin') directory.data_format = 'WHOLE_FILE' directory.file_name_pattern = 'sdc*' directory.files_directory = local_tmp_directory sftp_ftp_client = builder.add_stage(name='com_streamsets_pipeline_stage_destination_remote_RemoteUploadDTarget') sftp_ftp_client.file_name_expression = sftp_file_name directory >> sftp_ftp_client sftp_ftp_client_pipeline = builder.build('SFTP Destination Pipeline').configure_for_environment(sftp) sdc_executor.add_pipeline(local_fs_pipeline, sftp_ftp_client_pipeline) # Start source file creation pipeline and assert file has been created with expected number of records sdc_executor.start_pipeline(local_fs_pipeline).wait_for_finished() history = sdc_executor.get_pipeline_history(local_fs_pipeline) assert history.latest.metrics.counter('pipeline.batchInputRecords.counter').count == 1 assert history.latest.metrics.counter('pipeline.batchOutputRecords.counter').count == 1 # Start SFTP upload (destination) file pipeline and assert pipeline has processed expected number of files sdc_executor.start_pipeline(sftp_ftp_client_pipeline).wait_for_pipeline_output_records_count(1) sdc_executor.stop_pipeline(sftp_ftp_client_pipeline) history = sdc_executor.get_pipeline_history(sftp_ftp_client_pipeline) assert history.latest.metrics.counter('pipeline.batchInputRecords.counter').count == 1 assert history.latest.metrics.counter('pipeline.batchOutputRecords.counter').count == 1 # Read SFTP destination file and compare our source data to assert assert sftp.get_string(os.path.join(sftp.path, sftp_file_name)).strip() == dev_raw_data_source.raw_data # Delete the test SFTP origin file we created transport, client = sftp.client try: client.remove(os.path.join(sftp.path, sftp_file_name)) finally: client.close() transport.close()
def test_authentication(sdc_builder, sdc_executor, sftp, stage_attributes): """Test SFTP and FTP/FTPS executor. We first create a local file using shell and use that file for SFTP/FTP/FTPS executor. We then assert the ingested data using wiretap. The pipelines look like: Local FS >> FTP/SFTP Destination Local FS >= Pipeline Finisher FTP/SFTP Destination >= FTP/SFTP Executor """ # Our origin SFTP/FTP/FTPS file name sftp_ftp_file_name = get_random_string(string.ascii_letters, 10) local_tmp_directory = os.path.join( '~', tempfile.gettempdir(), get_random_string(string.ascii_letters, 10)) local_file_name = f'sdc-{get_random_string(string.ascii_letters, 5)}' raw_text_data = 'Hello World!' sdc_executor.execute_shell(f'mkdir {local_tmp_directory}/') sdc_executor.execute_shell( f'echo {raw_text_data} >> {local_tmp_directory}/{local_file_name}') # Build Consumer Pipeline builder = sdc_builder.get_pipeline_builder() directory = builder.add_stage('Directory', type='origin') directory.set_attributes(data_format='WHOLE_FILE', file_name_pattern='sdc*', files_directory=local_tmp_directory) pipeline_finished_executor = builder.add_stage( 'Pipeline Finisher Executor') sftp_ftp_client = builder.add_stage(name=REMOTE_DESTINATION_STAGE) sftp_ftp_client.set_attributes(file_name_expression=sftp_ftp_file_name) wiretap = builder.add_wiretap() directory >> sftp_ftp_client >= wiretap.destination directory >= pipeline_finished_executor sftp_ftp_client.authentication = stage_attributes['authentication'] sftp_ftp_client_pipeline = builder.build( 'SFTP Executor Pipeline - Authentication').configure_for_environment( sftp) sdc_executor.add_pipeline(sftp_ftp_client_pipeline) # Start SFTP/FTP/FTPS upload (destination) file pipeline and assert pipeline has processed expected number of files sdc_executor.start_pipeline(sftp_ftp_client_pipeline).wait_for_finished() history = sdc_executor.get_pipeline_history(sftp_ftp_client_pipeline) try: assert history.latest.metrics.counter( 'pipeline.batchInputRecords.counter').count >= 1 assert history.latest.metrics.counter( 'pipeline.batchOutputRecords.counter').count >= 5 assert history.latest.metrics.counter( 'pipeline.batchErrorRecords.counter').count == 0 assert sftp.get_string(os.path.join( sftp.path, sftp_ftp_file_name)).strip() == raw_text_data # Delete the test SFTP origin file we created transport, client = sftp.client client.remove(os.path.join(sftp.path, sftp_ftp_file_name)) finally: client.close() transport.close() sdc_executor.execute_shell(f'rm -R {local_tmp_directory}')