def test_integration_five_person_data_retraction(self, mock_hpo_bucket,
                                                     mock_bucket,
                                                     mock_extract_pids):
        mock_hpo_bucket.return_value = self.site_bucket
        mock_bucket.return_value = self.bucket
        mock_extract_pids.return_value = self.pids
        lines_to_remove = {}
        total_lines_prior = {}
        for file_path in test_util.FIVE_PERSONS_FILES:
            # generate results files
            file_name = file_path.split('/')[-1]
            table_name = file_name.split('.')[0]
            lines_to_remove[file_name] = 0
            total_lines_prior[file_name] = 0
            with open(file_path) as f:
                # skip header
                next(f)
                for line in f:
                    line = line.strip()
                    if line != '':
                        if (table_name in rd.PID_IN_COL1 and int(line.split(",")[0]) in self.pids) or \
                                (table_name in rd.PID_IN_COL2 and int(line.split(",")[1]) in self.pids):
                            lines_to_remove[file_name] += 1
                        total_lines_prior[file_name] += 1

            # write file to cloud for testing
            test_util.write_cloud_file(self.bucket,
                                       file_path,
                                       prefix=self.folder_prefix_1)
            test_util.write_cloud_file(self.bucket,
                                       file_path,
                                       prefix=self.folder_prefix_2)

        retract_result = rd.run_gcs_retraction(self.project_id,
                                               self.sandbox_dataset_id,
                                               self.pid_table_id,
                                               self.hpo_id,
                                               folder='all_folders',
                                               force_flag=True)

        total_lines_post = {}
        for file_path in test_util.FIVE_PERSONS_FILES:
            file_name = file_path.split('/')[-1]
            actual_result_contents = test_util.read_cloud_file(
                self.bucket, self.folder_prefix_1 + file_name)
            # convert to list and remove header and last list item since it is a newline
            total_lines_post[file_name] = len(
                actual_result_contents.split('\n')[1:-1])

        for key in total_lines_prior.keys():
            if key in lines_to_remove:
                self.assertEqual(
                    lines_to_remove[key],
                    total_lines_prior[key] - total_lines_post[key])
            else:
                self.assertEqual(total_lines_prior[key], total_lines_post[key])

        # metadata for each updated file is returned
        self.assertEqual(len(retract_result[self.folder_prefix_1]),
                         len(lines_to_remove.keys()))
Exemplo n.º 2
0
 def _test_html_report_five_person(self, mock_check_cron):
     # Not sure this test is still relevant (see hpo_report module and tests)
     # TODO refactor or remove this test
     folder_prefix = '2019-01-01/'
     for cdm_file in test_util.FIVE_PERSONS_FILES:
         test_util.write_cloud_file(self.hpo_bucket,
                                    cdm_file,
                                    prefix=folder_prefix)
     # achilles sometimes fails due to rate limits.
     # using both success and failure cases allow it to fail gracefully until there is a fix for achilles
     with open(test_util.FIVE_PERSON_RESULTS_FILE, 'r') as f:
         expected_result_achilles_success = self._remove_timestamp_tags_from_results(
             f.read())
     with open(test_util.FIVE_PERSON_RESULTS_ACHILLES_ERROR_FILE, 'r') as f:
         expected_result_achilles_failure = self._remove_timestamp_tags_from_results(
             f.read())
     expected_results = [
         expected_result_achilles_success, expected_result_achilles_failure
     ]
     main.app.testing = True
     with main.app.test_client() as c:
         c.get(test_util.VALIDATE_HPO_FILES_URL)
         actual_result = test_util.read_cloud_file(
             self.hpo_bucket, folder_prefix + common.RESULTS_HTML)
         actual_result_file = self._remove_timestamp_tags_from_results(
             StringIO(actual_result).getvalue())
         self.assertIn(actual_result_file, expected_results)
Exemplo n.º 3
0
    def test_html_report_five_person(self, mock_check_cron, mock_first_run,
                                     mock_rdr_date, mock_required_files_loaded):
        mock_required_files_loaded.return_value = False
        mock_first_run.return_value = False
        rdr_date = '2020-01-01'
        mock_rdr_date.return_value = rdr_date
        for cdm_file in test_util.FIVE_PERSONS_FILES:
            test_util.write_cloud_file(self.hpo_bucket,
                                       cdm_file,
                                       prefix=self.folder_prefix)
        # load person table in RDR
        bq_utils.load_table_from_csv(self.project_id, self.rdr_dataset_id,
                                     common.PERSON,
                                     test_util.FIVE_PERSONS_PERSON_CSV)

        # Load measurement_concept_sets
        required_labs.load_measurement_concept_sets_table(
            project_id=self.project_id, dataset_id=self.bigquery_dataset_id)
        # Load measurement_concept_sets_descendants
        required_labs.load_measurement_concept_sets_descendants_table(
            project_id=self.project_id, dataset_id=self.bigquery_dataset_id)

        main.app.testing = True
        with main.app.test_client() as c:
            c.get(test_util.VALIDATE_HPO_FILES_URL)
            actual_result = test_util.read_cloud_file(
                self.hpo_bucket, self.folder_prefix + common.RESULTS_HTML)

        # ensure emails are not sent
        bucket_items = gcs_utils.list_bucket(self.hpo_bucket)
        folder_items = main.get_folder_items(bucket_items, self.folder_prefix)
        self.assertFalse(main.is_first_validation_run(folder_items))

        # parse html
        soup = bs(actual_result, parser="lxml", features="lxml")
        missing_pii_html_table = soup.find('table', id='missing_pii')
        table_headers = missing_pii_html_table.find_all('th')
        self.assertEqual('Missing Participant Record Type',
                         table_headers[0].get_text())
        self.assertEqual('Count', table_headers[1].get_text())

        table_rows = missing_pii_html_table.find_next('tbody').find_all('tr')
        missing_record_types = [
            table_row.find('td').text for table_row in table_rows
        ]
        self.assertIn(main_consts.EHR_NO_PII, missing_record_types)
        self.assertIn(main_consts.PII_NO_EHR, missing_record_types)
        self.assertIn(main_consts.EHR_NO_RDR.format(date=rdr_date),
                      missing_record_types)
        self.assertIn(main_consts.EHR_NO_PARTICIPANT_MATCH,
                      missing_record_types)

        required_lab_html_table = soup.find('table', id='required-lab')
        table_headers = required_lab_html_table.find_all('th')
        self.assertEqual(3, len(table_headers))
        self.assertEqual('Ancestor Concept ID', table_headers[0].get_text())
        self.assertEqual('Ancestor Concept Name', table_headers[1].get_text())
        self.assertEqual('Found', table_headers[2].get_text())

        table_rows = required_lab_html_table.find_next('tbody').find_all('tr')
        table_rows_last_column = [
            table_row.find_all('td')[-1] for table_row in table_rows
        ]
        submitted_labs = [
            row for row in table_rows_last_column
            if 'result-1' in row.attrs['class']
        ]
        missing_labs = [
            row for row in table_rows_last_column
            if 'result-0' in row.attrs['class']
        ]
        self.assertTrue(len(table_rows) > 0)
        self.assertTrue(len(submitted_labs) > 0)
        self.assertTrue(len(missing_labs) > 0)
Exemplo n.º 4
0
    def test_html_report_five_person(self, mock_check_cron, mock_first_run,
                                     mock_required_files_loaded,
                                     mock_has_all_required_files,
                                     mock_updated_datetime_object):
        mock_required_files_loaded.return_value = False
        mock_first_run.return_value = False
        mock_has_all_required_files.return_value = True
        mock_updated_datetime_object.return_value = datetime.datetime.today(
        ) - datetime.timedelta(minutes=7)

        for cdm_file in test_util.FIVE_PERSONS_FILES:
            blob_name = f'{self.folder_prefix}{os.path.basename(cdm_file)}'
            test_blob = self.storage_bucket.blob(blob_name)
            test_blob.upload_from_filename(cdm_file)

        # load person table in RDR
        bq_utils.load_table_from_csv(self.project_id, self.rdr_dataset_id,
                                     common.PERSON,
                                     test_util.FIVE_PERSONS_PERSON_CSV)

        # Load measurement_concept_sets
        required_labs.load_measurement_concept_sets_table(
            project_id=self.project_id, dataset_id=self.bigquery_dataset_id)
        # Load measurement_concept_sets_descendants
        required_labs.load_measurement_concept_sets_descendants_table(
            project_id=self.project_id, dataset_id=self.bigquery_dataset_id)

        main.app.testing = True
        with main.app.test_client() as c:
            c.get(test_util.VALIDATE_HPO_FILES_URL)
            actual_result = test_util.read_cloud_file(
                self.hpo_bucket, self.folder_prefix + common.RESULTS_HTML)

        # ensure emails are not sent
        bucket_items = gcs_utils.list_bucket(self.hpo_bucket)
        folder_items = main.get_folder_items(bucket_items, self.folder_prefix)
        self.assertFalse(main.is_first_validation_run(folder_items))

        # parse html
        soup = bs(actual_result, parser="lxml", features="lxml")
        missing_pii_html_table = soup.find('table', id='missing_pii')
        table_headers = missing_pii_html_table.find_all('th')
        self.assertEqual('Missing Participant Record Type',
                         table_headers[0].get_text())
        self.assertEqual('Count', table_headers[1].get_text())

        table_rows = missing_pii_html_table.find_next('tbody').find_all('tr')
        missing_record_types = [
            table_row.find('td').text for table_row in table_rows
        ]
        self.assertIn(main_consts.EHR_NO_PII, missing_record_types)
        self.assertIn(main_consts.PII_NO_EHR, missing_record_types)

        # the missing from RDR component is obsolete (see DC-1932)
        # this is to confirm it was removed successfully from the report
        rdr_date = '2020-01-01'
        self.assertNotIn(main_consts.EHR_NO_RDR.format(date=rdr_date),
                         missing_record_types)
        self.assertIn(main_consts.EHR_NO_PARTICIPANT_MATCH,
                      missing_record_types)

        required_lab_html_table = soup.find('table', id='required-lab')
        table_headers = required_lab_html_table.find_all('th')
        self.assertEqual(3, len(table_headers))
        self.assertEqual('Ancestor Concept ID', table_headers[0].get_text())
        self.assertEqual('Ancestor Concept Name', table_headers[1].get_text())
        self.assertEqual('Found', table_headers[2].get_text())

        table_rows = required_lab_html_table.find_next('tbody').find_all('tr')
        table_rows_last_column = [
            table_row.find_all('td')[-1] for table_row in table_rows
        ]
        submitted_labs = [
            row for row in table_rows_last_column
            if 'result-1' in row.attrs['class']
        ]
        missing_labs = [
            row for row in table_rows_last_column
            if 'result-0' in row.attrs['class']
        ]
        self.assertTrue(len(table_rows) > 0)
        self.assertTrue(len(submitted_labs) > 0)
        self.assertTrue(len(missing_labs) > 0)