def test_match_participants_same_participant(self): # pre conditions # test id_match.match_participants(self.project, self.rdr_dataset, self.pii_dataset, self.dest_dataset) # post conditions self.assertEqual(self.mock_dest_dataset.call_count, 1) self.assertEqual( self.mock_dest_dataset.assert_called_with( dataset_id=self.dest_dataset, description=consts.DESTINATION_DATASET_DESCRIPTION.format( version='', rdr_dataset=self.rdr_dataset, ehr_dataset=self.pii_dataset), overwrite_existing=True), None) self.assertEqual(self.mock_match_tables.call_count, 1) self.assertEqual( self.mock_match_tables.assert_called_with(self.project, self.rdr_dataset, self.dest_dataset), None) self.assertEqual(self.mock_site_names.call_count, 1) self.assertEqual(self.mock_site_names.assert_called_once_with(), None) num_sites = len(self.site_list) self.assertEqual(self.mock_pii_match_tables.call_count, num_sites) self.assertEqual(self.mock_ehr_person.call_count, num_sites * 2) self.assertEqual(self.mock_rdr_values.call_count, num_sites * 12) self.assertEqual(self.mock_pii_values.call_count, num_sites * 5) self.assertEqual(self.mock_table_append.call_count, num_sites * 12) self.assertEqual(self.mock_location_pii.call_count, num_sites * 5) self.assertEqual(self.mock_merge_fields.call_count, num_sites) self.assertEqual(self.mock_remove_sparse_records.call_count, num_sites) self.assertEqual(self.mock_change_nulls.call_count, num_sites) self.assertEqual(self.mock_hpo_bucket.call_count, num_sites) self.assertEqual(self.mock_drc_bucket.call_count, 1) self.assertEqual(self.mock_validation_report.call_count, num_sites + 1) site_filename = os.path.join( consts.REPORT_DIRECTORY.format(date=self.date_string), consts.REPORT_TITLE) drc_filename = os.path.join(self.dest_dataset, consts.REPORT_TITLE) expected_report_calls = [ call(self.project, self.dest_dataset, [self.site_list[0]], self.bucket_ids[0], site_filename), call(self.project, self.dest_dataset, [self.site_list[1]], self.bucket_ids[1], site_filename), call(self.project, self.dest_dataset, self.site_list, self.internal_bucket_id, drc_filename) ] self.assertEqual(self.mock_validation_report.mock_calls, expected_report_calls)
def test_match_participants_same_participant_simulate_merge_errors(self): # pre conditions self.mock_merge_fields.side_effect = googleapiclient.errors.HttpError(500, 'bar', 'baz') self.mock_remove_sparse_records.side_effect = googleapiclient.errors.HttpError(500, 'r', '') self.mock_change_nulls.side_effect = googleapiclient.errors.HttpError(500, 'bar', 'baz') # test id_match.match_participants( self.project, self.rdr_dataset, self.pii_dataset, self.dest_dataset ) # post conditions self.assertEqual(self.mock_dest_dataset.call_count, 1) self.assertEqual( self.mock_dest_dataset.assert_called_with( dataset_id=self.dest_dataset, description=consts.DESTINATION_DATASET_DESCRIPTION.format( version='', rdr_dataset=self.rdr_dataset, ehr_dataset=self.pii_dataset ), overwrite_existing=True ), None ) self.assertEqual(self.mock_match_tables.call_count, 1) self.assertEqual( self.mock_match_tables.assert_called_with( self.project, self.rdr_dataset, self.dest_dataset ), None ) self.assertEqual(self.mock_site_names.call_count, 1) self.assertEqual( self.mock_site_names.assert_called_once_with(), None ) num_sites = len(self.site_list) self.assertEqual(self.mock_pii_match_tables.call_count, num_sites) self.assertEqual(self.mock_ehr_person.call_count, num_sites * 2) self.assertEqual(self.mock_rdr_values.call_count, num_sites * 12) self.assertEqual(self.mock_pii_values.call_count, num_sites * 5) self.assertEqual(self.mock_table_append.call_count, num_sites * 12) self.assertEqual(self.mock_location_pii.call_count, num_sites * 5) self.assertEqual(self.mock_merge_fields.call_count, num_sites) self.assertEqual(self.mock_remove_sparse_records.call_count, num_sites) self.assertEqual(self.mock_change_nulls.call_count, num_sites) self.assertEqual(self.mock_hpo_bucket.call_count, 0) self.assertEqual(self.mock_drc_bucket.call_count, 0) self.assertEqual(self.mock_validation_report.call_count, 0)
def test_match_participants_same_participant_simulate_pii_read_errors( self): # pre conditions self.mock_pii_values.side_effect = test_util.mock_google_http_error( status_code=500, content=b'baz', reason='bar') # test id_match.match_participants(self.project, self.rdr_dataset, self.pii_dataset, self.dest_dataset) # post conditions self.assertEqual(self.mock_dest_dataset.call_count, 1) self.assertEqual( self.mock_dest_dataset.assert_called_with( dataset_id=self.dest_dataset, description=consts.DESTINATION_DATASET_DESCRIPTION.format( version='', rdr_dataset=self.rdr_dataset, ehr_dataset=self.pii_dataset), overwrite_existing=True), None) self.assertEqual(self.mock_match_tables.call_count, 1) self.assertEqual( self.mock_match_tables.assert_called_with(self.project, self.rdr_dataset, self.dest_dataset), None) self.assertEqual(self.mock_site_names.call_count, 1) self.assertEqual(self.mock_site_names.assert_called_once_with(), None) num_sites = len(self.site_list) self.assertEqual(self.mock_pii_match_tables.call_count, num_sites) self.assertEqual(self.mock_ehr_person.call_count, (num_sites - 1) * 2) self.assertEqual(self.mock_rdr_values.call_count, (num_sites - 1) * 11) self.assertEqual(self.mock_pii_values.call_count, (num_sites - 1) * 4) self.assertEqual(self.mock_table_write.call_count, num_sites) self.assertEqual(self.mock_location_pii.call_count, (num_sites - 1) * 5) self.assertEqual(self.mock_hpo_bucket.call_count, 0) self.assertEqual(self.mock_drc_bucket.call_count, 0) self.assertEqual(self.mock_validation_report.call_count, 0)
def validate_pii(): project = bq_utils.app_identity.get_application_id() combined_dataset = bq_utils.get_combined_dataset_id() ehr_dataset = bq_utils.get_dataset_id() dest_dataset = bq_utils.get_validation_results_dataset_id() logging.info('Calling match_participants') _, errors = matching.match_participants(project, combined_dataset, ehr_dataset, dest_dataset) if errors > 0: logging.error("Errors encountered in validation process") return consts.VALIDATION_SUCCESS