Exemplos de BulkProcessor em Python, exemplos de toolbox.bulk_processing.bulk_processor.BulkProcessor em Python

Exemplo n.º 1

0

Exibir arquivo

def test_run_validation_successful(patch_storage, patch_rabbit,
                                   patch_db_helper, tmp_path):
    # Given
    test_message = {"test_message": "blah"}
    mock_processor = setup_mock_processor({
        'header_1': [],
        'header_2': []
    }, test_message)
    bulk_processor = BulkProcessor(mock_processor)
    bulk_processor.working_dir = tmp_path
    mock_blob = Mock()
    mock_blob.name = 'mock_blob_name'
    patch_storage.Client.return_value.list_blobs.return_value = [mock_blob]

    patch_storage.Client.return_value.download_blob_to_file.side_effect = partial(
        mock_download_blob,
        mock_data=(b'header_1,header_2\n'
                   b'value1,value2\n'))

    # When
    bulk_processor.run()

    # Then
    mock_upload_to_bucket = patch_storage.Client.return_value.bucket.return_value.blob.return_value \
        .upload_from_filename
    mock_upload_to_bucket.assert_called_once_with(
        str(tmp_path.joinpath('PROCESSED_mock_blob_name')))
    patch_rabbit.return_value.__enter__.return_value.publish_message.assert_called_once_with(
        message=json.dumps(test_message),
        content_type='application/json',
        headers=None,
        exchange=mock_processor.exchange,
        routing_key=mock_processor.routing_key)
    patch_db_helper.connect_to_read_replica_pool.assert_called_once()
    assert_no_left_over_files(tmp_path)

Exemplo n.º 2

0

Exibir arquivo

def test_validation_row_too_short(patch_storage, patch_rabbit, patch_db_helper,
                                  tmp_path):
    # Given
    mock_processor = setup_mock_processor({'COL_1': [], 'COL_2': []}, None)
    bulk_processor = BulkProcessor(mock_processor)
    bulk_processor.working_dir = tmp_path
    mock_blob = Mock()
    mock_blob.name = 'mock_blob_name'
    patch_storage.Client.return_value.list_blobs.return_value = [mock_blob]

    # Mock data misses the 2nd column in it's row entirely
    patch_storage.Client.return_value.download_blob_to_file.side_effect = partial(
        mock_download_blob, mock_data=b'COL_1,COL_2\n'
        b'col_1_value')

    # When
    bulk_processor.run()

    # Then
    mock_upload_to_bucket = patch_storage.Client.return_value.bucket.return_value.blob.return_value \
        .upload_from_filename
    mock_upload_calls = mock_upload_to_bucket.call_args_list
    assert len(
        mock_upload_calls) == 2, 'Upload to bucket should be called twice'
    assert call(str(
        tmp_path.joinpath('ERROR_mock_blob_name'))) in mock_upload_calls
    assert call(str(
        tmp_path.joinpath('ERROR_DETAIL_mock_blob_name'))) in mock_upload_calls
    patch_rabbit.return_value.__enter__.return_value.publish_message.assert_not_called(
    )
    patch_db_helper.connect_to_read_replica_pool.assert_called_once()

    assert_no_left_over_files(tmp_path)

Exemplo n.º 3

0

Exibir arquivo

def bulk_non_compliance_processed(context):
    # Run against the real bucket if it is configured
    if Config.BULK_NON_COMPLIANCE_BUCKET_NAME:
        BulkProcessor(NonComplianceProcessor()).run()
        return

    # If we don't have a bucket, mock the storage bucket client interactions to work with only local files
    with mock_bulk_processor_bucket(context.non_compliance_bulk_file):
        BulkProcessor(NonComplianceProcessor()).run()

Exemplo n.º 4

0

Exibir arquivo

def process_uninvalidate_addresses_updates_file(context):
    # Run against the real bucket if it is configured
    if Config.BULK_UNINVALIDATED_ADDRESS_BUCKET_NAME:
        BulkProcessor(UnInvalidateAddressProcessor()).run()
        return

    # If we don't have a bucket, mock the storage bucket client interactions to work with only local files
    with mock_bulk_processor_bucket(context.bulk_uninvalidated_addresses_file):
        BulkProcessor(UnInvalidateAddressProcessor()).run()

Exemplo n.º 5

0

Exibir arquivo

def process_bulk_address_updates_file(context):
    # Run against the real bucket if it is configured
    if Config.BULK_ADDRESS_UPDATE_BUCKET_NAME:
        BulkProcessor(AddressUpdateProcessor()).run()
        return

    # If we don't have a bucket, mock the storage bucket client interactions to work with only local files
    with mock_bulk_processor_bucket(context.bulk_address_updates_file):
        BulkProcessor(AddressUpdateProcessor()).run()

Exemplo n.º 6

0

Exibir arquivo

def bulk_questionnaire_link_processed(context):
    # Run against the real bucket if it is configured
    if Config.BULK_QID_LINK_BUCKET_NAME:
        BulkProcessor(QidLinkProcessor()).run()
        return

    # If we don't have a bucket, mock the storage bucket client interactions to work with only local files
    with mock_bulk_processor_bucket(context.qid_link_bulk_file):
        BulkProcessor(QidLinkProcessor()).run()

Exemplo n.º 7

0

Exibir arquivo

def process_bulk_refusal_file(context):
    # Run against the real bucket if it is configured
    if Config.BULK_REFUSAL_BUCKET_NAME:
        BulkProcessor(RefusalProcessor()).run()
        return

    # If we don't have a bucket, mock the storage bucket client interactions to work with only local files
    with mock_bulk_processor_bucket(context.bulk_refusals_file):
        BulkProcessor(RefusalProcessor()).run()

Exemplo n.º 8

0

Exibir arquivo

def process_bulk_new_address_file(context):
    new_address_processor = NewAddressProcessor(
        action_plan_id=context.action_plan_id,
        collection_exercise_id=context.collection_exercise_id)

    # Run against the real bucket if it is configured
    if Config.BULK_NEW_ADDRESS_BUCKET_NAME:
        BulkProcessor(new_address_processor).run()
        return

    # If we don't have a bucket, mock the storage bucket client interactions to work with only local files
    with mock_bulk_processor_bucket(context.bulk_new_address_file):
        BulkProcessor(new_address_processor).run()

Exemplo n.º 9

0

Exibir arquivo

def test_refusal_validation_headers_fails_empty(_patched_storage_client):
    result = BulkProcessor(RefusalProcessor()).find_header_validation_errors(
        {})

    assert result.line_number == 1
    assert "refusal_type" in result.description
    assert "case_id" in result.description

Exemplo n.º 10

0

Exibir arquivo

def main():
    logger_initial_config()
    logger = wrap_logger(logging.getLogger(__name__))
    logger.info('Started bulk processing qid linking',
                app_log_level=Config.LOG_LEVEL,
                environment=Config.ENVIRONMENT)
    BulkProcessor(QidLinkProcessor()).run()

Exemplo n.º 11

0

Exibir arquivo

Arquivo: non_compliance_processor.py Projeto: uk-gov-mirror/ONSdigital.census-rm-toolbox

def main():
    logger_initial_config()
    logger = wrap_logger(logging.getLogger(__name__))
    logger.info('Started bulk processing non compliance',
                app_log_level=Config.LOG_LEVEL,
                environment=Config.ENVIRONMENT)
    BulkProcessor(NonComplianceProcessor()).run()

Exemplo n.º 12

0

Exibir arquivo

Arquivo: uninvalidate_address_processor.py Projeto: uk-gov-mirror/ONSdigital.census-rm-toolbox

def main():
    logger_initial_config()
    logger = wrap_logger(logging.getLogger(__name__))
    logger.info('Started bulk processing uninvalidate addresses',
                app_log_level=Config.LOG_LEVEL,
                environment=Config.ENVIRONMENT)
    BulkProcessor(UnInvalidateAddressProcessor()).run()

Exemplo n.º 13

0

Exibir arquivo

Arquivo: test_qid_link_processor.py Projeto: ONSdigital/census-rm-toolbox

def test_qid_link_validation_headers(_patched_storage_client):
    refusal_headers = ["case_id", "qid"]

    result = BulkProcessor(
        QidLinkProcessor()).find_header_validation_errors(refusal_headers)

    assert result is None

Exemplo n.º 14

0

Exibir arquivo

def test_refusal_validation_headers(_patched_storage_client):
    refusal_headers = ["case_id", "refusal_type"]

    result = BulkProcessor(
        RefusalProcessor()).find_header_validation_errors(refusal_headers)

    assert result is None

Exemplo n.º 15

0

Exibir arquivo

Arquivo: test_qid_link_processor.py Projeto: ONSdigital/census-rm-toolbox

def test_qid_link_validation_headers_fails_empty(_patched_storage_client):
    result = BulkProcessor(QidLinkProcessor()).find_header_validation_errors(
        {})

    assert result.line_number == 1
    assert "case_id" in result.description
    assert "qid" in result.description

Exemplo n.º 16

0

Exibir arquivo

Arquivo: test_invalid_address_processor.py Projeto: uk-gov-mirror/ONSdigital.census-rm-toolbox

def test_invalid_address_validation_headers_fails_empty(
        _patched_storage_client):
    result = BulkProcessor(
        InvalidAddressProcessor()).find_header_validation_errors({})

    assert result.line_number == 1
    assert "reason" in result.description
    assert "case_id" in result.description

Exemplo n.º 17

0

Exibir arquivo

Arquivo: test_invalid_address_processor.py Projeto: uk-gov-mirror/ONSdigital.census-rm-toolbox

def test_invalid_address_validation_headers(_patched_storage_client):
    invalid_address_headers = ["case_id", "reason"]

    result = BulkProcessor(
        InvalidAddressProcessor()).find_header_validation_errors(
            invalid_address_headers)

    assert result is None

Exemplo n.º 18

0

Exibir arquivo

Arquivo: test_uninvalidate_address_processor.py Projeto: uk-gov-mirror/ONSdigital.census-rm-toolbox

def test_uninvalidate_address_validation_headers(patch_storage):
    invalid_address_headers = ["CASE_ID"]

    result = BulkProcessor(
        UnInvalidateAddressProcessor()).find_header_validation_errors(
            invalid_address_headers)

    assert result is None

Exemplo n.º 19

0

Exibir arquivo

Arquivo: test_non_compliance_processor.py Projeto: ONSdigital/census-rm-toolbox

def test_non_compliance_validation_headers_fails_empty(
        _patched_storage_client):
    result = BulkProcessor(
        NonComplianceProcessor()).find_header_validation_errors({})

    assert result.line_number == 1
    assert "CASE_ID" in result.description
    assert "NC_STATUS" in result.description

Exemplo n.º 20

0

Exibir arquivo

Arquivo: test_qid_link_processor.py Projeto: ONSdigital/census-rm-toolbox

def test_qid_link_validation_headers_fails_qid(_patched_storage_client):
    refusal_headers = ["case_id", "notqid"]

    result = BulkProcessor(
        QidLinkProcessor()).find_header_validation_errors(refusal_headers)

    assert result.line_number == 1
    assert "notqid" in result.description
    assert "qid" in result.description

Exemplo n.º 21

0

Exibir arquivo

Arquivo: test_non_compliance_processor.py Projeto: ONSdigital/census-rm-toolbox

def test_non_compliance_validation_headers(_patched_storage_client):
    refusal_headers = [
        "CASE_ID", "NC_STATUS", "FIELDCOORDINATOR_ID", "FIELDOFFICER_ID"
    ]

    result = BulkProcessor(
        NonComplianceProcessor()).find_header_validation_errors(
            refusal_headers)

    assert result is None

Exemplo n.º 22

0

Exibir arquivo

Arquivo: test_uninvalidate_address_processor.py Projeto: uk-gov-mirror/ONSdigital.census-rm-toolbox

def test_uninvalidate_address_validation_headers_fails_case_id(patch_storage):
    invalid_address_headers = ["not_a_case_id"]

    result = BulkProcessor(
        UnInvalidateAddressProcessor()).find_header_validation_errors(
            invalid_address_headers)

    assert result.line_number == 1
    assert "not_a_case_id" in result.description
    assert "case_id" in result.description

Exemplo n.º 23

0

Exibir arquivo

def test_rebuild_errored_csv_row_too_many_columns():
    # Given
    # If a row contains too many columns then the excess will be stored in a list in the None key
    row_in_expected_format = {'COL_1': 'value_1', None: ['extra_1', 'extra_2']}

    # When
    rebuilt_row = BulkProcessor.rebuild_errored_csv_row(row_in_expected_format)

    # Then
    assert rebuilt_row == 'value_1,extra_1,extra_2'

Exemplo n.º 24

0

Exibir arquivo

def test_rebuild_errored_csv_row_too_few_columns():
    # Given
    # If a row contains too few columns then the missing values on the end will be stored as None
    row_in_expected_format = {'COL_1': 'value_1', 'COL_MISSING_2': None}

    # When
    rebuilt_row = BulkProcessor.rebuild_errored_csv_row(row_in_expected_format)

    # Then
    assert rebuilt_row == 'value_1'

Exemplo n.º 25

0

Exibir arquivo

Arquivo: test_invalid_address_processor.py Projeto: uk-gov-mirror/ONSdigital.census-rm-toolbox

def test_invalid_address_validation_headers_fails_case_id(
        _patched_storage_client):
    invalid_address_headers = ["not_a_case_id", "reason"]

    result = BulkProcessor(
        InvalidAddressProcessor()).find_header_validation_errors(
            invalid_address_headers)

    assert result.line_number == 1
    assert "not_a_case_id" in result.description
    assert "case_id" in result.description

Exemplo n.º 26

0

Exibir arquivo

def test_process_file_encoding_failure(patch_storage, patch_rabbit, tmp_path):
    schema = {'header_1': [], 'header_2': []}
    header = ','.join(key for key in schema.keys())
    mock_processor = setup_mock_processor(schema, None)
    bulk_processor = BulkProcessor(mock_processor)
    bulk_processor.working_dir = tmp_path
    test_file = RESOURCE_PATH.joinpath('bulk_test_file_encoding_failure.csv')

    success_file, error_file, error_detail_file = bulk_processor.initialise_results_files(
        test_file.name)
    success_count, failure_count = bulk_processor.process_file(
        test_file, success_file, error_file, error_detail_file)

    assert failure_count == 1, 'Should have one failure when it tries to decode the file'
    assert not success_count, 'Should not successfully process any rows'

    assert success_file.read_text() == header + '\n'
    assert 'Invalid file encoding, requires utf-8' in error_detail_file.read_text(
    )
    patch_rabbit.publish_message.assert_not_called()

Exemplo n.º 27

0

Exibir arquivo

def test_run_success_failure_mix(patch_storage, patch_rabbit, patch_db_helper,
                                 tmp_path):
    # Given
    test_message = {"test_message": "blah"}

    mock_processor = setup_mock_processor({'header': [no_invalid_validator()]},
                                          test_message)
    bulk_processor = BulkProcessor(mock_processor)
    bulk_processor.working_dir = tmp_path
    mock_blob = Mock()
    mock_blob.name = 'mock_blob_name'
    patch_storage.Client.return_value.list_blobs.return_value = [mock_blob]

    patch_storage.Client.return_value.download_blob_to_file.side_effect = partial(
        mock_download_blob, mock_data=(b'header\n'
                                       b'value\n'
                                       b'invalid'))
    # When
    bulk_processor.run()

    # Then
    mock_upload_to_bucket = patch_storage.Client.return_value.bucket.return_value.blob.return_value. \
        upload_from_filename
    mock_upload_calls = mock_upload_to_bucket.call_args_list
    assert len(
        mock_upload_calls) == 3, 'Upload to bucket should be called twice'
    assert call(str(
        tmp_path.joinpath('PROCESSED_mock_blob_name'))) in mock_upload_calls
    assert call(str(
        tmp_path.joinpath('ERROR_mock_blob_name'))) in mock_upload_calls
    assert call(str(
        tmp_path.joinpath('ERROR_DETAIL_mock_blob_name'))) in mock_upload_calls

    patch_rabbit.return_value.__enter__.return_value.publish_message.assert_called_once_with(
        message=json.dumps(test_message),
        content_type='application/json',
        headers=None,
        exchange=mock_processor.exchange,
        routing_key=mock_processor.routing_key)
    patch_db_helper.connect_to_read_replica_pool.assert_called_once()
    assert_no_left_over_files(tmp_path)

Exemplo n.º 28

0

Exibir arquivo

def test_process_file_successful(patch_storage, patch_rabbit, tmp_path):
    schema = {'header_1': [], 'header_2': []}
    header = ','.join(key for key in schema.keys())
    mock_processor = setup_mock_processor(schema, None)
    mock_processor.build_event_messages.side_effect = lambda row: [row]
    bulk_processor = BulkProcessor(mock_processor)
    bulk_processor.working_dir = tmp_path
    bulk_processor.rabbit = patch_rabbit
    test_file = RESOURCE_PATH.joinpath('bulk_test_file_success.csv')

    success_file, error_file, error_detail_file = bulk_processor.initialise_results_files(
        test_file.name)
    success_count, failure_count = bulk_processor.process_file(
        test_file, success_file, error_file, error_detail_file)

    assert not failure_count, 'Should have no processing errors'
    assert success_count == 1, 'Should successfully process one row'

    assert success_file.read_text() == test_file.read_text()
    assert error_file.read_text() == header + '\n'
    assert error_detail_file.read_text() == HEADER_IS_VALID

    patch_rabbit.publish_message.assert_called_once_with(
        message=json.dumps({
            'header_1': 'foo',
            'header_2': 'bar'
        }),
        content_type='application/json',
        headers=None,
        exchange=mock_processor.exchange,
        routing_key=mock_processor.routing_key)

Exemplo n.º 29

0

Exibir arquivo

def test_rebuild_errored_csv_row():
    # Given
    row_in_expected_format = {
        'COL_1': 'value_1',
        'COL_2': '',
        'COL_3': 'value_3'
    }

    # When
    rebuilt_row = BulkProcessor.rebuild_errored_csv_row(row_in_expected_format)

    # Then
    assert rebuilt_row == 'value_1,,value_3'

Exemplo n.º 30

0

Exibir arquivo

Arquivo: test_non_compliance_processor.py Projeto: ONSdigital/census-rm-toolbox

def test_non_compliance_validation_headers_fail(_patched_storage_client):
    refusal_headers = ["ID", "NC_STAT", "FIELDCOORDINATORID", "FIELDOFFICERID"]

    result = BulkProcessor(
        NonComplianceProcessor()).find_header_validation_errors(
            refusal_headers)

    assert result.line_number == 1
    assert "ID" in result.description
    assert "CASE_ID" in result.description
    assert "NC_STAT" in result.description
    assert "FIELDCOORDINATORID" in result.description
    assert "FIELDOFFICERID" in result.description