def test_get_ehr_person_values_bytes(self, mock_query, mock_response, mock_fields): # pre conditions mock_query.return_value = {} column_name = 'foo_field' column_value = b'hello' mock_response.return_value = [ { consts.PERSON_ID_FIELD: 1, column_name: column_value, }, ] mock_fields.return_value = [{ 'name': column_name, 'type': consts.STRING_TYPE }] # test actual = reader.get_ehr_person_values('project-foo', 'ehr-bar', 'table-doh', column_name) # post-conditions expected = {1: 'hello'} self.assertEqual(actual, expected) self.assertEqual(mock_query.call_count, 1) self.assertEqual(mock_response.call_count, 1) self.assertEqual( mock_query.assert_called_with( consts.EHR_PERSON_VALUES.format(project='project-foo', dataset='ehr-bar', table='table-doh', field=column_name)), None)
def test_get_ehr_person_values(self, mock_query, mock_response): # pre conditions mock_query.return_value = {} column_name = 'gender_concept_id' mock_response.return_value = [ { consts.PERSON_ID_FIELD: 1, column_name: 'saLLy', }, { consts.PERSON_ID_FIELD: 2, column_name: 'Rudy' }, { consts.PERSON_ID_FIELD: 3, column_name: 'MaTiLdA' }, ] # test actual = reader.get_ehr_person_values('project-foo', 'ehr-bar', 'table-doh', column_name) # post-conditions expected = {1: 'saLLy', 2: 'Rudy', 3: 'MaTiLdA'} self.assertEqual(actual, expected) self.assertEqual(mock_query.call_count, 1) self.assertEqual(mock_response.call_count, 1) self.assertEqual( mock_query.assert_called_with( consts.EHR_PERSON_VALUES.format(project='project-foo', dataset='ehr-bar', table='table-doh', field=column_name)), None)
def _compare_birth_dates(project, validation_dataset, pii_dataset, site, concept_id_pii, pii_tables): """ Compare birth dates for people. Converts birthdates and birth_datetimes to calendar objects. Converts the calendar objects back to strings with the same format and compares these strings. :param project: project to search for the datasets :param validation_dataset: the auto generated match validation dataset created in this module. queried to get the gender value :param pii_dataset: dataset created from submitted hpo sites. the pii tables :param site: string identifier of hpo :param concept_id_pii: integer value of concept id for concept in the rdr_dataset :return: updated match_values dictionary """ match_values = {} table_name = site + consts.EHR_PERSON_TABLE_SUFFIX if table_name in pii_tables: pii_birthdates = readers.get_rdr_match_values(project, validation_dataset, consts.ID_MATCH_TABLE, concept_id_pii) try: ehr_birthdates = readers.get_ehr_person_values( project, pii_dataset, table_name, consts.BIRTH_DATETIME_FIELD) except (oauth2client.client.HttpAccessTokenRefreshError, googleapiclient.errors.HttpError): LOGGER.exception("Unable to read PII for: %s\tdata field:\t%s", site, consts.BIRTH_DATETIME_FIELD) raise # compare birth_datetime from ppi info to ehr info and record results. for person_id, ehr_birthdate in ehr_birthdates.items(): rdr_birthdate = pii_birthdates.get(person_id) ehr_birthdate = ehr_birthdates.get(person_id) if rdr_birthdate is None or ehr_birthdate is None: match_values[person_id] = consts.MISSING elif isinstance(rdr_birthdate, str) and isinstance( ehr_birthdate, str): # convert values to datetime objects rdr_date = parse(rdr_birthdate) ehr_date = parse(ehr_birthdate) # convert datetime objects to Year/month/day strings and compare rdr_string = rdr_date.strftime(consts.DATE_FORMAT) ehr_string = ehr_date.strftime(consts.DATE_FORMAT) match_str = consts.MATCH if rdr_string == ehr_string else consts.MISMATCH match_values[person_id] = match_str else: match_values[person_id] = consts.MISMATCH else: raise RuntimeError('Table {} doesnt exist.'.format(table_name)) return match_values
def _compare_birth_dates( project, validation_dataset, pii_dataset, site, concept_id_pii ): """ Compare birth dates for people. Converts birthdates and birth_datetimes to calendar objects. Converts the calendar objects back to strings with the same format and compares these strings. :param project: project to search for the datasets :param validation_dataset: the auto generated match validation dataset created in this module. queried to get the gender value :param pii_dataset: dataset created from submitted hpo sites. the pii tables :param hpo: string identifier of hpo :param concept_id_pii: integer value of concept id for concept in the rdr_dataset :return: updated match_values dictionary """ match_values = {} pii_birthdates = readers.get_rdr_match_values( project, validation_dataset, consts.ID_MATCH_TABLE, concept_id_pii ) ehr_birthdates = readers.get_ehr_person_values( project, pii_dataset, site + consts.EHR_PERSON_TABLE_SUFFIX, consts.BIRTH_DATETIME_FIELD ) # compare birth_datetime from ppi info to ehr info and record results. for person_id, ehr_birthdate in ehr_birthdates.iteritems(): rdr_birthdate = pii_birthdates.get(person_id) ehr_birthdate = ehr_birthdates.get(person_id) if rdr_birthdate is None or ehr_birthdate is None: match_values[person_id] = consts.MISSING elif isinstance(rdr_birthdate, str) and isinstance(ehr_birthdate, str): # convert values to datetime objects rdr_date = parse(rdr_birthdate) ehr_date = parse(ehr_birthdate) # convert datetime objects to Year/month/day strings and compare rdr_string = rdr_date.strftime(consts.DATE) ehr_string = ehr_date.strftime(consts.DATE) match_str = consts.MATCH if rdr_string == ehr_string else consts.MISMATCH match_values[person_id] = match_str else: match_values[person_id] = consts.MISMATCH return match_values
def _compare_genders(project, validation_dataset, pii_dataset, hpo, concept_id_pii, pii_tables): """ Compare genders for people. Converts birthdates and birth_datetimes to calendar objects. Converts the calendar objects back to strings with the same format and compares these strings. :param project: project to search for the datasets :param validation_dataset: the auto generated match validation dataset created in this module. queried to get the gender value :param pii_dataset: dataset created from submitted hpo sites. the pii tables :param hpo: string identifier of hpo :param concept_id_pii: integer value of concept id for concept in the rdr_dataset :return: updated match_values dictionary """ match_values = {} table_name = hpo + consts.EHR_PERSON_TABLE_SUFFIX if table_name in pii_tables: pii_genders = readers.get_rdr_match_values(project, validation_dataset, consts.ID_MATCH_TABLE, concept_id_pii) try: ehr_genders = readers.get_ehr_person_values( project, pii_dataset, table_name, consts.GENDER_FIELD) except (oauth2client.client.HttpAccessTokenRefreshError, googleapiclient.errors.HttpError): LOGGER.exception( f"Unable to read PII for: {hpo}\tdata field:\t{consts.GENDER_FIELD}" ) raise # compare gender from ppi info to ehr info and record results. for person_id, ehr_gender in ehr_genders.items(): rdr_gender = pii_genders.get(person_id, '') ehr_gender = consts.SEX_CONCEPT_IDS.get(ehr_gender, '') if rdr_gender is None or ehr_gender is None: match_str = consts.MISSING else: rdr_gender = rdr_gender.lower() ehr_gender = ehr_gender.lower() match_str = consts.MATCH if rdr_gender == ehr_gender else consts.MISMATCH match_values[person_id] = match_str else: raise RuntimeError('Table {} doesnt exist.'.format(table_name)) return match_values
def _compare_genders( project, validation_dataset, pii_dataset, hpo, concept_id_pii ): """ Compare genders for people. Converts birthdates and birth_datetimes to calendar objects. Converts the calendar objects back to strings with the same format and compares these strings. :param project: project to search for the datasets :param validation_dataset: the auto generated match validation dataset created in this module. queried to get the gender value :param pii_dataset: dataset created from submitted hpo sites. the pii tables :param hpo: string identifier of hpo :param concept_id_pii: integer value of concept id for concept in the rdr_dataset :return: updated match_values dictionary """ match_values = {} pii_genders = readers.get_rdr_match_values( project, validation_dataset, consts.ID_MATCH_TABLE, concept_id_pii ) ehr_genders = readers.get_ehr_person_values( project, pii_dataset, hpo + consts.EHR_PERSON_TABLE_SUFFIX, consts.GENDER_FIELD ) # compare gender from ppi info to ehr info and record results. for person_id, ehr_gender in ehr_genders.iteritems(): rdr_gender = pii_genders.get(person_id, '') ehr_gender = consts.SEX_CONCEPT_IDS.get(ehr_gender, '') rdr_gender = rdr_gender.lower() ehr_gender = ehr_gender.lower() if rdr_gender is None or ehr_gender is None: match_str = consts.MISSING else: match_str = consts.MATCH if rdr_gender == ehr_gender else consts.MISMATCH match_values[person_id] = match_str return match_values
def test_get_ehr_person_values_with_duplicate_keys(self, mock_query, mock_response, mock_fields): # pre conditions mock_query.return_value = {} column_name = 'birth_datetime' mock_response.return_value = [ { consts.PERSON_ID_FIELD: 1, column_name: 'saLLy', }, { consts.PERSON_ID_FIELD: 2, column_name: 'Rudy' }, { consts.PERSON_ID_FIELD: 3, column_name: 'MaTiLdA' }, { consts.PERSON_ID_FIELD: 2, column_name: 'Rudy' }, { consts.PERSON_ID_FIELD: 3, column_name: 'mattie' }, ] mock_fields.return_value = [{ 'name': column_name, 'type': consts.DATE_TYPE }] # test actual = reader.get_ehr_person_values('project-foo', 'ehr-bar', 'table-doh', column_name) # post-conditions expected = {1: 'saLLy', 2: 'Rudy', 3: 'MaTiLdA'} self.assertEqual(actual, expected) self.assertEqual(mock_query.call_count, 1) self.assertEqual(mock_response.call_count, 1) self.assertEqual( mock_query.assert_called_with( consts.EHR_PERSON_VALUES.format(project='project-foo', dataset='ehr-bar', table='table-doh', field=column_name)), None)
def test_get_ehr_person_values_birthdates(self, mock_query, mock_response, mock_fields): # pre conditions mock_query.return_value = {} column_name = 'birth_datetime' mock_response.return_value = [ { consts.PERSON_ID_FIELD: 1, column_name: 16520400.0, }, { consts.PERSON_ID_FIELD: 2, column_name: -662670000.0, }, { consts.PERSON_ID_FIELD: 3, column_name: 12459600.0, }, ] mock_fields.return_value = [{ 'name': column_name, 'type': consts.TIMESTAMP_TYPE }] # test actual = reader.get_ehr_person_values('project-foo', 'ehr-bar', 'table-doh', column_name) # post-conditions expected = {1: '1970-07-11', 2: '1949-01-01', 3: '1970-05-25'} self.assertEqual(actual, expected) self.assertEqual(mock_query.call_count, 1) self.assertEqual(mock_response.call_count, 1) self.assertEqual( mock_query.assert_called_with( consts.EHR_PERSON_VALUES.format(project='project-foo', dataset='ehr-bar', table='table-doh', field=column_name)), None)