예제 #1
0
def _compare_cities(
        project,
        validation_dataset,
        rdr_dataset,
        pii_dataset,
        hpo,
        concept_id,
        pii_field
    ):
    """
    Compare email addresses from hpo PII table and OMOP observation table.

    :param project:  project to search for the datasets
    :param validation_dataset:  the auto generated match validation dataset
        created in this module.  queried to get the location value to identify
        a location field
    :param rdr_dataset:  contains datasets from the rdr group
    :param pii_dataset:  dataset created from submitted hpo sites.  the pii tables
    :param hpo: string identifier of hpo
    :param concept_id:  integer value of concept id for concept in the rdr_dataset
    :param pii_field:  string value of field name with data matching the
        concept_id.  used to extract the correct values from the pii tables

    :return: a match_value dictionary.
    """
    match_values = {}

    cities = readers.get_rdr_match_values(
        project, validation_dataset, consts.ID_MATCH_TABLE, concept_id
    )

    pii_cities = readers.get_location_pii(
        project,
        rdr_dataset,
        pii_dataset,
        hpo,
        consts.PII_ADDRESS_TABLE,
        pii_field
    )

    for person_id, pii_city in pii_cities:
        rdr_city = cities.get(person_id)

        if rdr_city is None or pii_city is None:
            match_str = consts.MISSING
        else:
            rdr_city = normalizer.normalize_city_name(rdr_city)
            pii_city = normalizer.normalize_city_name(pii_city)
            match_str = consts.MATCH if rdr_city == pii_city else consts.MISMATCH

        match_values[person_id] = match_str

    return match_values
예제 #2
0
def _compare_cities(project, validation_dataset, rdr_dataset, pii_dataset, hpo,
                    concept_id, pii_field, pii_tables):
    """
    Compare city information from hpo PII table and OMOP observation table.

    :param project:  project to search for the datasets
    :param validation_dataset:  the auto generated match validation dataset
        created in this module.  queried to get the location value to identify
        a location field
    :param rdr_dataset:  contains datasets from the rdr group
    :param pii_dataset:  dataset created from submitted hpo sites.  the pii tables
    :param hpo: string identifier of hpo
    :param concept_id:  integer value of concept id for concept in the rdr_dataset
    :param pii_field:  string value of field name with data matching the
        concept_id.  used to extract the correct values from the pii tables

    :return: a match_value dictionary.
    """
    match_values = {}
    table_name = hpo + consts.PII_ADDRESS_TABLE

    if table_name in pii_tables:
        cities = readers.get_rdr_match_values(project, validation_dataset,
                                              consts.ID_MATCH_TABLE,
                                              concept_id)

        try:
            pii_cities = readers.get_location_pii(project, rdr_dataset,
                                                  pii_dataset, hpo,
                                                  consts.PII_ADDRESS_TABLE,
                                                  pii_field)
        except (oauth2client.client.HttpAccessTokenRefreshError,
                googleapiclient.errors.HttpError):
            LOGGER.exception(
                f"Unable to read PII for: {hpo}\tdata field:\t{pii_field}")
            raise

        for person_id, pii_city in pii_cities:
            rdr_city = cities.get(person_id)

            if rdr_city is None or pii_city is None:
                match_str = consts.MISSING
            else:
                rdr_city = normalizer.normalize_city_name(rdr_city)
                pii_city = normalizer.normalize_city_name(pii_city)
                match_str = consts.MATCH if rdr_city == pii_city else consts.MISMATCH

            match_values[person_id] = match_str
    else:
        raise RuntimeError('Table {} doesnt exist.'.format(table_name))

    return match_values
예제 #3
0
    def test_nomalize_city_with_unknown_abbreviation(self):
        # test
        actual = normalizer.normalize_city_name('L8t. Made Up Place')

        # post conditions
        expected = 'l8t made up place'
        self.assertEqual(actual, expected)
예제 #4
0
    def test_normalize_city_name_mixed_case(self):
        # test
        actual = normalizer.normalize_city_name('bIrMiNgHaM')

        expected = 'birmingham'
        # post conditions
        self.assertEqual(actual, expected)
예제 #5
0
    def test_normalize_city_with_punctuation_and_spaces(self):
        # test
        actual = normalizer.normalize_city_name('St. Paul\'s Place')

        # post conditions
        expected = 'saint pauls place'
        self.assertEqual(actual, expected)
예제 #6
0
    def test_normalize_non_string_city(self):
        # test
        actual = normalizer.normalize_city_name(88.321)

        # post conditions
        expected = '88321'
        self.assertEqual(actual, expected)
예제 #7
0
    def test_normalize_None_city(self):
        # test
        actual = normalizer.normalize_city_name(None)

        # post conditions
        expected = ''
        self.assertEqual(actual, expected)