示例#1
0
    def test_get_pii_values(self, mock_query, mock_response):
        # pre conditions
        mock_query.return_value = {}
        mock_response.return_value = [
            {
                consts.PERSON_ID_FIELD: 1,
                12345: 'saLLy',
            },
            {
                consts.PERSON_ID_FIELD: 2,
                12345: 'Rudy'
            },
            {
                consts.PERSON_ID_FIELD: 3,
                12345: 'MaTiLdA'
            },
        ]

        # test
        actual = reader.get_pii_values('project-foo', 'pii-bar', 'zeta',
                                       '_sea', 12345)

        # postconditions
        expected = [(1, 'saLLy'), (2, 'Rudy'), (3, 'MaTiLdA')]
        self.assertEqual(actual, expected)
        self.assertEqual(mock_query.call_count, 1)
        self.assertEqual(mock_response.call_count, 1)
        self.assertEqual(
            mock_query.assert_called_with(
                consts.PII_VALUES.format(project='project-foo',
                                         dataset='pii-bar',
                                         hpo_site_str='zeta',
                                         table_suffix='_sea',
                                         field=12345)), None)
示例#2
0
def _compare_phone_numbers(
        project,
        rdr_dataset,
        pii_dataset,
        hpo,
        concept_id,
        pii_field,
        pii_tables
    ):
    """
    Compare the digit based phone numbers from PII and Observation tables.

    :param project:  project to search for the datasets
    :param rdr_dataset:  contains datasets from the rdr group
    :param pii_dataset:  dataset created from submitted hpo sites.  the pii tables
    :param hpo: string identifier of hpo
    :param concept_id:  integer value of concept id for concept in the rdr_dataset
    :param pii_field:  string value of field name with data matching the
        concept_id.  used to extract the correct values from the pii tables

    :return: A match_values dictionary.
    """
    match_values = {}
    table_name = hpo + consts.PII_PHONE_TABLE

    if table_name in pii_tables:
        phone_numbers = readers.get_rdr_match_values(
            project, rdr_dataset, consts.ID_MATCH_TABLE, concept_id
        )

        try:
            pii_phone_numbers = readers.get_pii_values(
                project,
                pii_dataset,
                hpo,
                consts.PII_PHONE_TABLE,
                pii_field
            )
        except (oauth2client.client.HttpAccessTokenRefreshError,
                googleapiclient.errors.HttpError):
            LOGGER.exception(
                "Unable to read PII for: %s\tdata field:\t%s", hpo, pii_field
            )
            raise

        for person_id, pii_number in pii_phone_numbers:
            rdr_phone = phone_numbers.get(person_id)

            if rdr_phone is None or pii_number is None:
                match_str = consts.MISSING
            else:
                rdr_phone = normalizer.normalize_phone(rdr_phone)
                pii_number = normalizer.normalize_phone(pii_number)
                match_str = consts.MATCH if rdr_phone == pii_number else consts.MISMATCH

            match_values[person_id] = match_str
    else:
        raise RuntimeError('Table {} doesnt exist.'.format(table_name))

    return match_values
示例#3
0
def _compare_name_fields(project, rdr_dataset, pii_dataset, hpo, concept_id,
                         pii_field, pii_tables):
    """
    For an hpo, compare all first, middle, and last name fields to omop settings.

    This compares a site's name field values found in their uploaded PII
    tables with the values in the OMOP observation table.

    :param project:  project to search for the datasets
    :param rdr_dataset:  contains datasets from the rdr group
    :param pii_dataset:  dataset created from submitted hpo sites.  the pii tables
    :param hpo: string identifier of hpo
    :param concept_id:  integer value of concept id for concept in the rdr_dataset
    :param pii_field:  string value of field name with data matching the
        concept_id.  used to extract the correct values from the pii tables

    :return: a match_values dictionary.
    """
    match_values = {}
    table_name = hpo + consts.PII_NAME_TABLE

    if table_name in pii_tables:
        rdr_names = readers.get_rdr_match_values(project, rdr_dataset,
                                                 consts.ID_MATCH_TABLE,
                                                 concept_id)

        try:
            pii_names = readers.get_pii_values(project, pii_dataset, hpo,
                                               consts.PII_NAME_TABLE,
                                               pii_field)
        except (oauth2client.client.HttpAccessTokenRefreshError,
                googleapiclient.errors.HttpError):
            LOGGER.exception(
                f"Unable to read PII for: {hpo}\tdata field:\t{pii_field}")
            raise

        for person_id, pii_name in pii_names:
            rdr_name = rdr_names.get(person_id)

            if rdr_name is None or pii_name is None:
                match_str = consts.MISSING
            else:
                pii_name = normalizer.normalize_name(pii_name)
                rdr_name = normalizer.normalize_name(rdr_name)
                match_str = consts.MATCH if rdr_name == pii_name else consts.MISMATCH

            match_values[person_id] = match_str
    else:
        raise RuntimeError('Table {} doesnt exist.'.format(table_name))

    return match_values
示例#4
0
def _compare_name_fields(
        project,
        rdr_dataset,
        pii_dataset,
        hpo,
        concept_id,
        pii_field
    ):
    """
    For an hpo, compare all first, middle, and last name fields to omop settings.

    This compares a site's name field values found in their uploaded PII
    tables with the values in the OMOP observation table.

    :param project:  project to search for the datasets
    :param rdr_dataset:  contains datasets from the rdr group
    :param pii_dataset:  dataset created from submitted hpo sites.  the pii tables
    :param hpo: string identifier of hpo
    :param concept_id:  integer value of concept id for concept in the rdr_dataset
    :param pii_field:  string value of field name with data matching the
        concept_id.  used to extract the correct values from the pii tables

    :return: a match_values dictionary.
    """
    match_values = {}

    rdr_names = readers.get_rdr_match_values(
        project, rdr_dataset, consts.ID_MATCH_TABLE, concept_id
    )

    pii_names = readers.get_pii_values(
        project,
        pii_dataset,
        hpo,
        consts.PII_NAME_TABLE,
        pii_field
    )

    for person_id, pii_name in pii_names:
        rdr_name = rdr_names.get(person_id)

        if rdr_name is None or pii_name is None:
            match_str = consts.MISSING
        else:
            pii_name = normalizer.normalize_name(pii_name)
            rdr_name = normalizer.normalize_name(rdr_name)
            match_str = consts.MATCH if rdr_name == pii_name else consts.MISMATCH

        match_values[person_id] = match_str

    return match_values
示例#5
0
def _compare_email_addresses(project, rdr_dataset, pii_dataset, hpo,
                             concept_id, pii_field, pii_tables):
    """
    Compare email addresses from hpo PII table and OMOP observation table.

    :param project:  project to search for the datasets
    :param rdr_dataset:  contains datasets from the rdr group
    :param pii_dataset:  dataset created from submitted hpo sites.  the pii tables
    :param hpo: string identifier of hpo
    :param concept_id:  integer value of concept id for concept in the rdr_dataset
    :param pii_field:  string value of field name with data matching the
        concept_id.  used to extract the correct values from the pii tables

    :return: a match_value dictionary.
    """
    match_values = {}
    table_name = hpo + consts.PII_EMAIL_TABLE

    if table_name in pii_tables:
        email_addresses = readers.get_rdr_match_values(project, rdr_dataset,
                                                       consts.ID_MATCH_TABLE,
                                                       concept_id)

        try:
            pii_emails = readers.get_pii_values(project, pii_dataset, hpo,
                                                consts.PII_EMAIL_TABLE,
                                                pii_field)
        except (oauth2client.client.HttpAccessTokenRefreshError,
                googleapiclient.errors.HttpError):
            LOGGER.exception(
                f"Unable to read PII for: {hpo}\tdata field:\t{pii_field}")
            raise

        for person_id, pii_email in pii_emails:
            rdr_email = email_addresses.get(person_id)

            if rdr_email is None or pii_email is None:
                match_str = consts.MISSING
            else:
                rdr_email = normalizer.normalize_email(rdr_email)
                pii_email = normalizer.normalize_email(pii_email)
                match_str = consts.MATCH if rdr_email == pii_email else consts.MISMATCH

            match_values[person_id] = match_str
    else:
        raise RuntimeError('Table {} doesnt exist.'.format(table_name))

    return match_values
示例#6
0
def _compare_email_addresses(
        project,
        rdr_dataset,
        pii_dataset,
        hpo,
        concept_id,
        pii_field
    ):
    """
    Compare email addresses from hpo PII table and OMOP observation table.

    :param project:  project to search for the datasets
    :param rdr_dataset:  contains datasets from the rdr group
    :param pii_dataset:  dataset created from submitted hpo sites.  the pii tables
    :param hpo: string identifier of hpo
    :param concept_id:  integer value of concept id for concept in the rdr_dataset
    :param pii_field:  string value of field name with data matching the
        concept_id.  used to extract the correct values from the pii tables

    :return: a match_value dictionary.
    """
    match_values = {}

    email_addresses = readers.get_rdr_match_values(
        project, rdr_dataset, consts.ID_MATCH_TABLE, concept_id
    )

    pii_emails = readers.get_pii_values(
        project,
        pii_dataset,
        hpo,
        consts.PII_EMAIL_TABLE,
        pii_field
    )

    for person_id, pii_email in pii_emails:
        rdr_email = email_addresses.get(person_id)

        if rdr_email is None or pii_email is None:
            match_str = consts.MISSING
        else:
            rdr_email = normalizer.normalize_email(rdr_email)
            pii_email = normalizer.normalize_email(pii_email)
            match_str = consts.MATCH if rdr_email == pii_email else consts.MISMATCH

        match_values[person_id] = match_str

    return match_values
示例#7
0
def _compare_phone_numbers(
        project,
        rdr_dataset,
        pii_dataset,
        hpo,
        concept_id,
        pii_field
    ):
    """
    Compare the digit based phone numbers from PII and Observation tables.

    :param project:  project to search for the datasets
    :param rdr_dataset:  contains datasets from the rdr group
    :param pii_dataset:  dataset created from submitted hpo sites.  the pii tables
    :param hpo: string identifier of hpo
    :param concept_id:  integer value of concept id for concept in the rdr_dataset
    :param pii_field:  string value of field name with data matching the
        concept_id.  used to extract the correct values from the pii tables

    :return: A match_values dictionary.
    """
    match_values = {}

    phone_numbers = readers.get_rdr_match_values(
        project, rdr_dataset, consts.ID_MATCH_TABLE, concept_id
    )

    pii_phone_numbers = readers.get_pii_values(
        project,
        pii_dataset,
        hpo,
        consts.PII_PHONE_TABLE,
        pii_field
    )

    for person_id, pii_number in pii_phone_numbers:
        rdr_phone = phone_numbers.get(person_id)

        if rdr_phone is None or pii_number is None:
            match_str = consts.MISSING
        else:
            rdr_phone = normalizer.normalize_phone(rdr_phone)
            pii_number = normalizer.normalize_phone(pii_number)
            match_str = consts.MATCH if rdr_phone == pii_number else consts.MISMATCH

        match_values[person_id] = match_str

    return match_values