def setUp(self): self.app_id = app_identity.get_application_id() self.dataset_id = bq_utils.get_dataset_id() self.bucket = gcs_utils.get_drc_bucket() test_util.empty_bucket(self.bucket) test_util.delete_all_tables(self.dataset_id) self.load_test_data(hpo_id=HPO_NYC)
def setUp(self): self.hpo_bucket = gcs_utils.get_hpo_bucket(FAKE_HPO_ID) self.dataset = bq_utils.get_dataset_id() self.project_id = app_identity.get_application_id() self.storage_client = StorageClient(self.project_id) self.storage_client.empty_bucket(self.hpo_bucket) test_util.delete_all_tables(self.dataset)
def setUp(self): self.project_id = bq_utils.app_identity.get_application_id() self.dataset_id = bq_utils.get_combined_dataset_id() self.sandbox_dataset_id = bq_utils.get_unioned_dataset_id() if not self.project_id or not self.dataset_id: # TODO: Fix handling of globals, push these assertions down if they are required. raise ValueError( f"missing configuration for project ('{self.project_id}') " + f"and/or dataset ('{self.dataset_id}')") # TODO: Reconcile this with a consistent integration testing model. Ideally each test should # clean up after itself so that we don't need this defensive check. test_util.delete_all_tables(self.dataset_id) # drop concept table drop_concept_table(self.dataset_id) create_tables = ['person', 'observation'] table_fields = { 'person': 'post_deid_person', 'observation': 'observation', 'concept': 'concept' } for tbl in ['concept']: if not bq_utils.table_exists(tbl, dataset_id=self.dataset_id): create_tables.append(tbl) for tbl in create_tables: bq_utils.create_standard_table(table_fields[tbl], tbl, dataset_id=self.dataset_id, force_all_nullable=True)
def setUpClass(cls): print( '\n**************************************************************') print(cls.__name__) print('**************************************************************') dataset_id = bq_utils.get_dataset_id() test_util.delete_all_tables(dataset_id) test_util.populate_achilles()
def setUp(self): self.project_id = app_identity.get_application_id() self.dataset_id = bq_utils.get_dataset_id() self.bucket: str = gcs_utils.get_drc_bucket() self.storage_client = StorageClient(self.project_id) self.storage_client.empty_bucket(self.bucket) test_util.delete_all_tables(self.dataset_id) self.load_test_data(hpo_id=HPO_NYC)
def setUpClass(cls): print( '\n**************************************************************') print(cls.__name__) print('**************************************************************') fake_bucket = gcs_utils.get_hpo_bucket(test_util.FAKE_HPO_ID) dataset_id = bq_utils.get_dataset_id() test_util.delete_all_tables(dataset_id) test_util.get_synpuf_results_files() test_util.populate_achilles(fake_bucket)
def setUp(self): self.hpo_bucket = gcs_utils.get_hpo_bucket(FAKE_HPO_ID) self.person_table_id = bq_utils.get_table_id(FAKE_HPO_ID, common.PERSON) self.dataset_id = bq_utils.get_dataset_id() test_util.delete_all_tables(self.dataset_id) self.project_id = app_identity.get_application_id() self.TEST_FIELDS = [ { "type": "integer", "name": "integer_field", "mode": "required", "description": "An integer field" }, # DC-586 Import RDR rules should support null fields { "type": "integer", "name": "nullable_integer_field", "mode": "nullable", "description": "A nullable integer field" }, { "type": "string", "name": "string_field", "mode": "required", "description": "A string field" }, { "type": "date", "name": "date_field", "mode": "required", "description": "A date field" }, { "type": "timestamp", "name": "timestamp_field", "mode": "required", "description": "A timestamp field" }, { "type": "boolean", "name": "boolean_field", "mode": "required", "description": "A boolean field" }, { "type": "float", "name": "float_field", "mode": "required", "description": "A float field" } ] self.DT_FORMAT = '%Y-%m-%d %H:%M:%S' self.client = StorageClient(self.project_id) self.client.empty_bucket(self.hpo_bucket)
def setUpClass(cls): print('**************************************************************') print(cls.__name__) print('**************************************************************') # TODO base class this ehr_dataset_id = bq_utils.get_dataset_id() rdr_dataset_id = bq_utils.get_rdr_dataset_id() test_util.delete_all_tables(ehr_dataset_id) test_util.delete_all_tables(rdr_dataset_id) cls.load_dataset_from_files(ehr_dataset_id, test_util.NYC_FIVE_PERSONS_PATH, True) cls.load_dataset_from_files(rdr_dataset_id, test_util.RDR_PATH)
def tearDown(self): test_util.delete_all_tables(self.dataset_id) # Delete concept table drop_concept_table(self.dataset_id) # re create concept table q = """CREATE or REPLACE table `{project}.{dataset}.concept` as ( SELECT * FROM `{project}.{vocab}.concept`)""".format( project=self.project_id, dataset=self.dataset_id, vocab=common.VOCABULARY_DATASET) bq_utils.query(q)
def setUp(self): self.hpo_id = test_util.FAKE_HPO_ID self.hpo_bucket = gcs_utils.get_hpo_bucket(self.hpo_id) mock_get_hpo_name = mock.patch('validation.main.get_hpo_name') self.mock_get_hpo_name = mock_get_hpo_name.start() self.mock_get_hpo_name.return_value = 'Fake HPO' self.addCleanup(mock_get_hpo_name.stop) self.bigquery_dataset_id = bq_utils.get_dataset_id() self.folder_prefix = '2019-01-01/' self._empty_bucket() test_util.delete_all_tables(self.bigquery_dataset_id)
def setUpClass(cls): print('**************************************************************') print(cls.__name__) print('**************************************************************') # TODO base class this logger.level = logging.INFO stream_handler = logging.StreamHandler(sys.stdout) logger.addHandler(stream_handler) ehr_dataset_id = bq_utils.get_dataset_id() rdr_dataset_id = bq_utils.get_rdr_dataset_id() test_util.delete_all_tables(ehr_dataset_id) test_util.delete_all_tables(rdr_dataset_id) cls.load_dataset_from_files(ehr_dataset_id, test_util.NYC_FIVE_PERSONS_PATH, True) cls.load_dataset_from_files(rdr_dataset_id, test_util.RDR_PATH)
def setUp(self): self.hpo_bucket = gcs_utils.get_hpo_bucket(FAKE_HPO_ID) self.project_id = app_identity.get_application_id() self.dataset_id = bq_utils.get_dataset_id() self.rdr_dataset_id = bq_utils.get_rdr_dataset_id() self.folder_prefix = '2019-01-01/' test_util.delete_all_tables(self.dataset_id) test_util.empty_bucket(self.hpo_bucket) mock_get_hpo_name = mock.patch('validation.main.get_hpo_name') self.mock_get_hpo_name = mock_get_hpo_name.start() self.mock_get_hpo_name.return_value = 'Fake HPO' self.addCleanup(mock_get_hpo_name.stop) self._load_data()
def setUp(self): self.hpo_id = test_util.FAKE_HPO_ID self.hpo_bucket = gcs_utils.get_hpo_bucket(self.hpo_id) self.project_id = app_identity.get_application_id() self.rdr_dataset_id = bq_utils.get_rdr_dataset_id() mock_get_hpo_name = mock.patch('validation.main.get_hpo_name') self.mock_get_hpo_name = mock_get_hpo_name.start() self.mock_get_hpo_name.return_value = 'Fake HPO' self.addCleanup(mock_get_hpo_name.stop) self.bigquery_dataset_id = bq_utils.get_dataset_id() self.folder_prefix = '2019-01-01-v1/' self._empty_bucket() test_util.delete_all_tables(self.bigquery_dataset_id) self._create_drug_class_table(self.bigquery_dataset_id)
def test_execute_queries(self): project_id = bq_utils.app_identity.get_application_id() dataset_id = bq_utils.get_combined_dataset_id() sandbox_id = bq_utils.get_unioned_dataset_id() test_util.delete_all_tables(dataset_id) create_tables = ( ['person'] + common.CLINICAL_DATA_TABLES + ['_mapping_' + t for t in common.MAPPED_CLINICAL_DATA_TABLES]) # TODO(calbach): Make the setup/teardown of these concept tables hermetic. for tbl in ['concept', 'concept_ancestor']: if not bq_utils.table_exists(tbl, dataset_id=dataset_id): create_tables.push(tbl) for tbl in create_tables: bq_utils.create_standard_table(tbl, tbl, dataset_id=dataset_id, force_all_nullable=True) for tmpl in INSERT_FAKE_PARTICIPANTS_TMPLS: resp = bq_utils.query( tmpl.render(project_id=project_id, dataset_id=dataset_id, rdr_basics_concept_id=123, rdr_consent_concept_id=345, ehr_obs_concept_id=567, rdr_basics_module_concept_id= drop_participants_without_ppi_or_ehr. BASICS_MODULE_CONCEPT_ID)) self.assertTrue(resp["jobComplete"]) clean_cdr_engine.clean_dataset( project_id, dataset_id, sandbox_id, [(drop_participants_without_ppi_or_ehr.get_queries, )]) def table_to_person_ids(t): rows = bq_utils.response2rows( bq_utils.query("SELECT person_id FROM `{}.{}.{}`".format( project_id, dataset_id, t))) return set([r["person_id"] for r in rows]) # We expect participants 1, 5 to have been removed from all tables. self.assertEqual(set([2, 3, 4, 6]), table_to_person_ids("person")) self.assertEqual(set([2, 4, 6]), table_to_person_ids("observation")) self.assertEquals(set([3, 4]), table_to_person_ids("drug_exposure")) test_util.delete_all_tables(dataset_id)
def setUp(self): self.project_id = bq_utils.app_identity.get_application_id() self.hpo_ids = [NYC_HPO_ID, PITT_HPO_ID] self.input_dataset_id = bq_utils.get_dataset_id() self.output_dataset_id = bq_utils.get_unioned_dataset_id() # Done in tearDown(). this is redundant. self._empty_hpo_buckets() test_util.delete_all_tables(self.input_dataset_id) test_util.delete_all_tables(self.output_dataset_id) # TODO Generalize to work for all foreign key references # Collect all primary key fields in CDM tables mapped_fields = [] for table in cdm.tables_to_map(): field = table + '_id' mapped_fields.append(field) self.mapped_fields = mapped_fields self.implemented_foreign_keys = [ eu_constants.VISIT_OCCURRENCE_ID, eu_constants.CARE_SITE_ID, eu_constants.LOCATION_ID ]
def tearDown(self): test_util.delete_all_tables(self.dataset_id)
def setUp(self): self.ehr_dataset_id = bq_utils.get_dataset_id() self.rdr_dataset_id = bq_utils.get_rdr_dataset_id() self.combined_dataset_id = bq_utils.get_combined_dataset_id() self.drc_bucket = gcs_utils.get_drc_bucket() test_util.delete_all_tables(self.combined_dataset_id)
def tearDownClass(cls): ehr_dataset_id = bq_utils.get_dataset_id() rdr_dataset_id = bq_utils.get_rdr_dataset_id() test_util.delete_all_tables(ehr_dataset_id) test_util.delete_all_tables(rdr_dataset_id)
def tearDown(self): self._empty_hpo_buckets() test_util.delete_all_tables(self.input_dataset_id) test_util.delete_all_tables(self.output_dataset_id)
def setUp(self): self.hpo_bucket = gcs_utils.get_hpo_bucket(test_util.FAKE_HPO_ID) test_util.empty_bucket(self.hpo_bucket) test_util.delete_all_tables(bq_utils.get_dataset_id())
def tearDown(self): self.storage_client.empty_bucket(self.hpo_bucket) bucket_nyc = gcs_utils.get_hpo_bucket('nyc') self.storage_client.empty_bucket(bucket_nyc) self.storage_client.empty_bucket(gcs_utils.get_drc_bucket()) test_util.delete_all_tables(self.bigquery_dataset_id)
def tearDownClass(cls): dataset_id = bq_utils.get_dataset_id() test_util.delete_all_tables(dataset_id)
def tearDown(self): test_util.delete_all_tables(self.bq_dataset_id) test_util.delete_all_tables(self.bq_sandbox_dataset_id) self.client.delete_dataset(self.bq_sandbox_dataset_id)
def tearDown(self): test_util.delete_all_tables(self.dataset_id) self.client.delete_table(self.fq_concept_table)
def tearDown(self): self.storage_client.empty_bucket(self.bucket) test_util.delete_all_tables(self.dataset_id)
def tearDown(self): test_util.delete_all_tables(self.dataset_id) self.client.empty_bucket(self.hpo_bucket)
def tearDown(self): test_util.delete_all_tables(bq_utils.get_dataset_id()) test_util.empty_bucket(self.hpo_bucket)
def tearDown(self): self._empty_bucket() bucket_nyc = gcs_utils.get_hpo_bucket('nyc') test_util.empty_bucket(bucket_nyc) test_util.empty_bucket(gcs_utils.get_drc_bucket()) test_util.delete_all_tables(self.bigquery_dataset_id)
def tearDown(self): test_util.empty_bucket(self.bucket) test_util.delete_all_tables(self.dataset_id)