Пример #1
0
    def test_match_participants_same_participant(self):
        # pre conditions

        # test
        id_match.match_participants(self.project, self.rdr_dataset,
                                    self.pii_dataset, self.dest_dataset)

        # post conditions
        self.assertEqual(self.mock_dest_dataset.call_count, 1)
        self.assertEqual(
            self.mock_dest_dataset.assert_called_with(
                dataset_id=self.dest_dataset,
                description=consts.DESTINATION_DATASET_DESCRIPTION.format(
                    version='',
                    rdr_dataset=self.rdr_dataset,
                    ehr_dataset=self.pii_dataset),
                overwrite_existing=True), None)

        self.assertEqual(self.mock_match_tables.call_count, 1)
        self.assertEqual(
            self.mock_match_tables.assert_called_with(self.project,
                                                      self.rdr_dataset,
                                                      self.dest_dataset), None)

        self.assertEqual(self.mock_site_names.call_count, 1)
        self.assertEqual(self.mock_site_names.assert_called_once_with(), None)

        num_sites = len(self.site_list)
        self.assertEqual(self.mock_pii_match_tables.call_count, num_sites)

        self.assertEqual(self.mock_ehr_person.call_count, num_sites * 2)
        self.assertEqual(self.mock_rdr_values.call_count, num_sites * 12)
        self.assertEqual(self.mock_pii_values.call_count, num_sites * 5)
        self.assertEqual(self.mock_table_append.call_count, num_sites * 12)
        self.assertEqual(self.mock_location_pii.call_count, num_sites * 5)
        self.assertEqual(self.mock_merge_fields.call_count, num_sites)
        self.assertEqual(self.mock_remove_sparse_records.call_count, num_sites)
        self.assertEqual(self.mock_change_nulls.call_count, num_sites)
        self.assertEqual(self.mock_hpo_bucket.call_count, num_sites)
        self.assertEqual(self.mock_drc_bucket.call_count, 1)
        self.assertEqual(self.mock_validation_report.call_count, num_sites + 1)

        site_filename = os.path.join(
            consts.REPORT_DIRECTORY.format(date=self.date_string),
            consts.REPORT_TITLE)
        drc_filename = os.path.join(self.dest_dataset, consts.REPORT_TITLE)
        expected_report_calls = [
            call(self.project, self.dest_dataset, [self.site_list[0]],
                 self.bucket_ids[0], site_filename),
            call(self.project, self.dest_dataset, [self.site_list[1]],
                 self.bucket_ids[1], site_filename),
            call(self.project, self.dest_dataset, self.site_list,
                 self.internal_bucket_id, drc_filename)
        ]
        self.assertEqual(self.mock_validation_report.mock_calls,
                         expected_report_calls)
Пример #2
0
    def test_match_participants_same_participant_simulate_merge_errors(self):
        # pre conditions
        self.mock_merge_fields.side_effect = googleapiclient.errors.HttpError(500, 'bar', 'baz')
        self.mock_remove_sparse_records.side_effect = googleapiclient.errors.HttpError(500, 'r', '')
        self.mock_change_nulls.side_effect = googleapiclient.errors.HttpError(500, 'bar', 'baz')

        # test
        id_match.match_participants(
            self.project,
            self.rdr_dataset,
            self.pii_dataset,
            self.dest_dataset
        )

        # post conditions
        self.assertEqual(self.mock_dest_dataset.call_count, 1)
        self.assertEqual(
            self.mock_dest_dataset.assert_called_with(
                dataset_id=self.dest_dataset,
                description=consts.DESTINATION_DATASET_DESCRIPTION.format(
                    version='', rdr_dataset=self.rdr_dataset, ehr_dataset=self.pii_dataset
                ),
                overwrite_existing=True
            ),
            None
        )

        self.assertEqual(self.mock_match_tables.call_count, 1)
        self.assertEqual(
            self.mock_match_tables.assert_called_with(
                self.project, self.rdr_dataset, self.dest_dataset
            ),
            None
        )

        self.assertEqual(self.mock_site_names.call_count, 1)
        self.assertEqual(
            self.mock_site_names.assert_called_once_with(),
            None
        )

        num_sites = len(self.site_list)
        self.assertEqual(self.mock_pii_match_tables.call_count, num_sites)

        self.assertEqual(self.mock_ehr_person.call_count, num_sites * 2)
        self.assertEqual(self.mock_rdr_values.call_count, num_sites * 12)
        self.assertEqual(self.mock_pii_values.call_count, num_sites * 5)
        self.assertEqual(self.mock_table_append.call_count, num_sites * 12)
        self.assertEqual(self.mock_location_pii.call_count, num_sites * 5)
        self.assertEqual(self.mock_merge_fields.call_count, num_sites)
        self.assertEqual(self.mock_remove_sparse_records.call_count, num_sites)
        self.assertEqual(self.mock_change_nulls.call_count, num_sites)
        self.assertEqual(self.mock_hpo_bucket.call_count, 0)
        self.assertEqual(self.mock_drc_bucket.call_count, 0)
        self.assertEqual(self.mock_validation_report.call_count, 0)
Пример #3
0
    def test_match_participants_same_participant_simulate_pii_read_errors(
            self):
        # pre conditions
        self.mock_pii_values.side_effect = test_util.mock_google_http_error(
            status_code=500, content=b'baz', reason='bar')

        # test
        id_match.match_participants(self.project, self.rdr_dataset,
                                    self.pii_dataset, self.dest_dataset)

        # post conditions
        self.assertEqual(self.mock_dest_dataset.call_count, 1)
        self.assertEqual(
            self.mock_dest_dataset.assert_called_with(
                dataset_id=self.dest_dataset,
                description=consts.DESTINATION_DATASET_DESCRIPTION.format(
                    version='',
                    rdr_dataset=self.rdr_dataset,
                    ehr_dataset=self.pii_dataset),
                overwrite_existing=True), None)

        self.assertEqual(self.mock_match_tables.call_count, 1)
        self.assertEqual(
            self.mock_match_tables.assert_called_with(self.project,
                                                      self.rdr_dataset,
                                                      self.dest_dataset), None)

        self.assertEqual(self.mock_site_names.call_count, 1)
        self.assertEqual(self.mock_site_names.assert_called_once_with(), None)

        num_sites = len(self.site_list)
        self.assertEqual(self.mock_pii_match_tables.call_count, num_sites)

        self.assertEqual(self.mock_ehr_person.call_count, (num_sites - 1) * 2)
        self.assertEqual(self.mock_rdr_values.call_count, (num_sites - 1) * 11)
        self.assertEqual(self.mock_pii_values.call_count, (num_sites - 1) * 4)
        self.assertEqual(self.mock_table_write.call_count, num_sites)
        self.assertEqual(self.mock_location_pii.call_count,
                         (num_sites - 1) * 5)
        self.assertEqual(self.mock_hpo_bucket.call_count, 0)
        self.assertEqual(self.mock_drc_bucket.call_count, 0)
        self.assertEqual(self.mock_validation_report.call_count, 0)
Пример #4
0
def validate_pii():
    project = bq_utils.app_identity.get_application_id()
    combined_dataset = bq_utils.get_combined_dataset_id()
    ehr_dataset = bq_utils.get_dataset_id()
    dest_dataset = bq_utils.get_validation_results_dataset_id()
    logging.info('Calling match_participants')
    _, errors = matching.match_participants(project, combined_dataset,
                                            ehr_dataset, dest_dataset)

    if errors > 0:
        logging.error("Errors encountered in validation process")

    return consts.VALIDATION_SUCCESS