Пример #1
0
def test_dictionary_lookup_standardiser_appends_columns_to_data():
    standardiser = EthnicityDictionaryLookup(
        "tests/test_data/test_dictionary_lookup/test_lookup.csv")

    # given data
    data_set = EthnicityDataset(
        data=[["Ethnicity", "Ethnicity type"], ["a", "any ethnicity type"]])

    # when we add_columns
    standardiser.process_data_set(data_set)

    # then 4 columns are appended to the data
    assert 6 == data_set.get_data()[0].__len__()
Пример #2
0
def test_dictionary_lookup_standardiser_without_default_values_appends_blanks_when_not_found(
):
    standardiser = EthnicityDictionaryLookup(
        "tests/test_data/test_dictionary_lookup/test_lookup.csv")

    # given a dataset with a strange value
    data = [["Ethnicity", "Ethnicity type"], ["strange", "missing"]]
    data_set = EthnicityDataset(data=data)

    # when we add_columns
    standardiser.process_data_set(data_set)

    # then 4 blank values are appended for the four columns
    assert data_set.get_data()[1] == ["strange", "missing", "", "", "", ""]
Пример #3
0
def test_dictionary_lookup_standardiser_appends_columns_using_defaults_for_unknown_ethnicity_type(
):
    standardiser = EthnicityDictionaryLookup(
        "tests/test_data/test_dictionary_lookup/test_lookup.csv")

    # given data from an ethnicity type not in the lookup
    data = [["Ethnicity", "Ethnicity type"], [" a", "xxx"], ["b ", "xxx"]]
    data_set = EthnicityDataset(data=data)

    # when we add_columns
    standardiser.process_data_set(data_set)

    # then values are added
    assert data_set.get_data()[0][2] == "Label"
    assert data_set.get_data()[1][2] == "A"
    assert data_set.get_data()[2][2] == "B"
Пример #4
0
def test_dictionary_lookup_standardiser_appends_columns_trimming_white_space_for_lookup(
):
    standardiser = EthnicityDictionaryLookup(
        "tests/test_data/test_dictionary_lookup/test_lookup.csv")

    # given data where one has forward white space and the other has trailing
    data = [["Ethnicity", "Ethnicity type"], [" a", "phonetic"],
            ["b ", "phonetic"]]
    data_set = EthnicityDataset(data=data)

    # when we add_columns
    standardiser.process_data_set(data_set)

    # then values are added
    assert data_set.get_data()[0][2] == "Label"
    assert data_set.get_data()[1][2] == "alpha"
    assert data_set.get_data()[2][2] == "bravo"
Пример #5
0
def test_dictionary_lookup_standardiser_appends_columns_using_case_insensitive_lookup(
):
    standardiser = EthnicityDictionaryLookup(
        "tests/test_data/test_dictionary_lookup/test_lookup.csv")

    # given data where one is capitalised
    data = [["Ethnicity", "Ethnicity type"], ["A", "phonetic"],
            ["b", "phonetic"]]
    data_set = EthnicityDataset(data=data)

    # when we add_columns
    standardiser.process_data_set(data_set)

    # then values are added
    assert data_set.get_data()[0][2] == "Label"
    assert data_set.get_data()[1][2] == "alpha"
    assert data_set.get_data()[2][2] == "bravo"
Пример #6
0
def test_dictionary_lookup_standardiser_appends_columns_using_specific_ethnicity_type_in_lookup(
):
    standardiser = EthnicityDictionaryLookup(
        "tests/test_data/test_dictionary_lookup/test_lookup.csv")

    # given data from an ethnicity type in the lookup
    data = [["Ethnicity", "Ethnicity type"], ["a", "phonetic"],
            ["b", "phonetic"]]
    data_set = EthnicityDataset(data=data)

    # when we add_columns
    standardiser.process_data_set(data_set)

    # then added values come from entries in the lookup with ethnicity_type = ''
    assert data_set.get_data()[0][2] == "Label"
    assert data_set.get_data()[1][2] == "alpha"
    assert data_set.get_data()[2][2] == "bravo"
Пример #7
0
def test_dictionary_lookup_standardiser_with_wildcard_values_inserts_custom_defaults_when_not_found(
):
    default_values = ["*", "two", "Unknown - *", "four"]
    standardiser = EthnicityDictionaryLookup(
        "tests/test_data/test_dictionary_lookup/test_lookup.csv",
        default_values=default_values)

    # given a dataset with a strange value
    data = [["Ethnicity", "Ethnicity type"], ["strange", "missing"]]
    data_set = EthnicityDataset(data=data)

    # when we add_columns
    standardiser.process_data_set(data_set)

    # then the default values are appended with * substituted with the ethnicity value
    assert data_set.get_data()[1] == [
        "strange", "missing", "strange", "two", "Unknown - strange", "four"
    ]
Пример #8
0
def test_dictionary_lookup_standardiser_can_handle_empty_rows():
    standardiser = EthnicityDictionaryLookup(
        "tests/test_data/test_dictionary_lookup/test_lookup.csv")

    # given a dataset with a blank row
    data = [["Ethnicity", "Ethnicity type"], [" a", "xxx"], []]
    data_set = EthnicityDataset(data=data)

    # when we add_columns
    try:
        standardiser.process_data_set(data_set)
    except IndexError:
        assert False
    def process_data(self, data):
        ethnicity_data_set = EthnicityDataset(data)

        self.process_data_set(data_set=ethnicity_data_set)

        return ethnicity_data_set.get_data()