Пример #1
0
def test_sftp_protocol(sdc_builder, sdc_executor, sftp):
    """Test SFTP destination. We first create a local file using shell and use that file for SFTP/FTP/FTPS destination
    stage to see if it gets successfully uploaded.
    The pipelines look like:
        directory >> sftp_ftp_client
    """

    # Our destination SFTP/FTP/FTPS file name
    sftp_ftp_file_name = get_random_string(string.ascii_letters, 10)
    # Local temporary directory where we will create a source file to be uploaded to SFTP/FTP/FTPS server
    local_tmp_directory = os.path.join(
        '~', tempfile.gettempdir(), get_random_string(string.ascii_letters,
                                                      10))
    local_file_name = f'sdc-{get_random_string(string.ascii_letters, 5)}'
    raw_data = 'Hello World!'

    sdc_executor.execute_shell(f'mkdir {local_tmp_directory}/')
    sdc_executor.execute_shell(
        f'echo {raw_data} >> {local_tmp_directory}/{local_file_name}')

    # Build source file pipeline logic
    builder = sdc_builder.get_pipeline_builder()
    directory = builder.add_stage('Directory', type='origin')
    directory.data_format = 'WHOLE_FILE'
    directory.file_name_pattern = 'sdc*'
    directory.files_directory = local_tmp_directory

    sftp_ftp_client = builder.add_stage(
        name=
        'com_streamsets_pipeline_stage_destination_remote_RemoteUploadDTarget')
    sftp_ftp_client.file_name_expression = sftp_ftp_file_name

    directory >> sftp_ftp_client

    sftp_ftp_client.protocol = 'SFTP'

    sftp_ftp_client_pipeline = builder.build(
        'SFTP Destination Pipeline - Protocol').configure_for_environment(sftp)

    sdc_executor.add_pipeline(sftp_ftp_client_pipeline)

    # Start SFTP/FTP/FTPS upload (destination) file pipeline and assert pipeline has processed expected number of files
    sdc_executor.start_pipeline(
        sftp_ftp_client_pipeline).wait_for_pipeline_output_records_count(1)
    sdc_executor.stop_pipeline(sftp_ftp_client_pipeline)
    history = sdc_executor.get_pipeline_history(sftp_ftp_client_pipeline)

    try:
        assert history.latest.metrics.counter(
            'pipeline.batchInputRecords.counter').count >= 1
        assert history.latest.metrics.counter(
            'pipeline.batchOutputRecords.counter').count >= 1
        assert history.latest.metrics.counter(
            'pipeline.batchErrorRecords.counter').count == 0

        # Read SFTP destination file and compare our source data to assert
        assert sftp.get_string(os.path.join(
            sftp.path, sftp_ftp_file_name)).strip() == raw_data

        # Delete the test SFTP origin file we created
        transport, client = sftp.client
        client.remove(os.path.join(sftp.path, sftp_ftp_file_name))

    finally:
        client.close()
        transport.close()
        sdc_executor.execute_shell(f'rm -R {local_tmp_directory}')
Пример #2
0
def test_sftp_destination(sdc_builder, sdc_executor, sftp):
    """Smoke test SFTP destination. We first create a local file using Local FS destination stage and use that file
    for SFTP destination stage to see if it gets successfully uploaded.
    The pipelines look like:
        dev_raw_data_source >> local_fs
        directory >> sftp_ftp_client
    """
    # Our destination SFTP file name
    sftp_file_name = get_random_string(string.ascii_letters, 10)
    # Local temporary directory where we will create a source file to be uploaded to SFTP server
    local_tmp_directory = os.path.join(tempfile.gettempdir(), get_random_string(string.ascii_letters, 10))

    # Build source file pipeline logic
    builder = sdc_builder.get_pipeline_builder()

    dev_raw_data_source = builder.add_stage('Dev Raw Data Source')
    dev_raw_data_source.data_format = 'TEXT'
    dev_raw_data_source.raw_data = 'Hello World!'
    dev_raw_data_source.stop_after_first_batch = True

    local_fs = builder.add_stage('Local FS', type='destination')
    local_fs.directory_template = local_tmp_directory
    local_fs.data_format = 'TEXT'

    dev_raw_data_source >> local_fs
    local_fs_pipeline = builder.build('Local FS Pipeline')

    builder = sdc_builder.get_pipeline_builder()

    # Build SFTP destination pipeline logic
    directory = builder.add_stage('Directory', type='origin')
    directory.data_format = 'WHOLE_FILE'
    directory.file_name_pattern = 'sdc*'
    directory.files_directory = local_tmp_directory

    sftp_ftp_client = builder.add_stage(name='com_streamsets_pipeline_stage_destination_remote_RemoteUploadDTarget')
    sftp_ftp_client.file_name_expression = sftp_file_name

    directory >> sftp_ftp_client
    sftp_ftp_client_pipeline = builder.build('SFTP Destination Pipeline').configure_for_environment(sftp)

    sdc_executor.add_pipeline(local_fs_pipeline, sftp_ftp_client_pipeline)

    # Start source file creation pipeline and assert file has been created with expected number of records
    sdc_executor.start_pipeline(local_fs_pipeline).wait_for_finished()
    history = sdc_executor.get_pipeline_history(local_fs_pipeline)
    assert history.latest.metrics.counter('pipeline.batchInputRecords.counter').count == 1
    assert history.latest.metrics.counter('pipeline.batchOutputRecords.counter').count == 1

    # Start SFTP upload (destination) file pipeline and assert pipeline has processed expected number of files
    sdc_executor.start_pipeline(sftp_ftp_client_pipeline).wait_for_pipeline_output_records_count(1)
    sdc_executor.stop_pipeline(sftp_ftp_client_pipeline)
    history = sdc_executor.get_pipeline_history(sftp_ftp_client_pipeline)
    assert history.latest.metrics.counter('pipeline.batchInputRecords.counter').count == 1
    assert history.latest.metrics.counter('pipeline.batchOutputRecords.counter').count == 1

    # Read SFTP destination file and compare our source data to assert
    assert sftp.get_string(os.path.join(sftp.path, sftp_file_name)).strip() == dev_raw_data_source.raw_data

    # Delete the test SFTP origin file we created
    transport, client = sftp.client
    try:
        client.remove(os.path.join(sftp.path, sftp_file_name))
    finally:
        client.close()
        transport.close()
Пример #3
0
def test_authentication(sdc_builder, sdc_executor, sftp, stage_attributes):
    """Test SFTP and FTP/FTPS executor. We first create a local file using shell and use
    that file for SFTP/FTP/FTPS executor. We then assert the ingested data using wiretap.
    The pipelines look like:
        Local FS  >>  FTP/SFTP Destination
        Local FS  >=  Pipeline Finisher
                      FTP/SFTP Destination >= FTP/SFTP Executor
    """
    # Our origin SFTP/FTP/FTPS file name
    sftp_ftp_file_name = get_random_string(string.ascii_letters, 10)
    local_tmp_directory = os.path.join(
        '~', tempfile.gettempdir(), get_random_string(string.ascii_letters,
                                                      10))
    local_file_name = f'sdc-{get_random_string(string.ascii_letters, 5)}'
    raw_text_data = 'Hello World!'

    sdc_executor.execute_shell(f'mkdir {local_tmp_directory}/')
    sdc_executor.execute_shell(
        f'echo {raw_text_data} >> {local_tmp_directory}/{local_file_name}')

    # Build Consumer Pipeline
    builder = sdc_builder.get_pipeline_builder()
    directory = builder.add_stage('Directory', type='origin')
    directory.set_attributes(data_format='WHOLE_FILE',
                             file_name_pattern='sdc*',
                             files_directory=local_tmp_directory)

    pipeline_finished_executor = builder.add_stage(
        'Pipeline Finisher Executor')

    sftp_ftp_client = builder.add_stage(name=REMOTE_DESTINATION_STAGE)
    sftp_ftp_client.set_attributes(file_name_expression=sftp_ftp_file_name)

    wiretap = builder.add_wiretap()

    directory >> sftp_ftp_client >= wiretap.destination
    directory >= pipeline_finished_executor

    sftp_ftp_client.authentication = stage_attributes['authentication']

    sftp_ftp_client_pipeline = builder.build(
        'SFTP Executor Pipeline - Authentication').configure_for_environment(
            sftp)

    sdc_executor.add_pipeline(sftp_ftp_client_pipeline)

    # Start SFTP/FTP/FTPS upload (destination) file pipeline and assert pipeline has processed expected number of files
    sdc_executor.start_pipeline(sftp_ftp_client_pipeline).wait_for_finished()
    history = sdc_executor.get_pipeline_history(sftp_ftp_client_pipeline)

    try:
        assert history.latest.metrics.counter(
            'pipeline.batchInputRecords.counter').count >= 1
        assert history.latest.metrics.counter(
            'pipeline.batchOutputRecords.counter').count >= 5
        assert history.latest.metrics.counter(
            'pipeline.batchErrorRecords.counter').count == 0

        assert sftp.get_string(os.path.join(
            sftp.path, sftp_ftp_file_name)).strip() == raw_text_data

        # Delete the test SFTP origin file we created
        transport, client = sftp.client
        client.remove(os.path.join(sftp.path, sftp_ftp_file_name))
    finally:
        client.close()
        transport.close()
        sdc_executor.execute_shell(f'rm -R {local_tmp_directory}')