def load_dataset_from_files(dataset_id, path): app_id = bq_utils.app_identity.get_application_id() bucket = gcs_utils.get_hpo_bucket(test_util.FAKE_HPO_ID) test_util.empty_bucket(bucket) job_ids = [] for table in common.CDM_TABLES: filename = table + '.csv' schema = os.path.join(resources.fields_path, table + '.json') f = os.path.join(path, filename) if os.path.exists(os.path.join(path, filename)): with open(f, 'r') as fp: gcs_utils.upload_object(bucket, filename, fp) else: test_util.write_cloud_str(bucket, filename, '\n') gcs_path = 'gs://{bucket}/{filename}'.format(bucket=bucket, filename=filename) load_results = bq_utils.load_csv(schema, gcs_path, app_id, dataset_id, table, allow_jagged_rows=True) load_job_id = load_results['jobReference']['jobId'] job_ids.append(load_job_id) incomplete_jobs = bq_utils.wait_on_jobs(job_ids) if len(incomplete_jobs) > 0: message = "Job id(s) %s failed to complete" % incomplete_jobs raise RuntimeError(message) test_util.empty_bucket(bucket)
def test_target_bucket_upload(self): bucket_nyc = gcs_utils.get_hpo_bucket('nyc') folder_prefix = 'test-folder-fake/' test_util.empty_bucket(bucket_nyc) main._upload_achilles_files(hpo_id=None, folder_prefix=folder_prefix, target_bucket=bucket_nyc) actual_bucket_files = set([item['name'] for item in gcs_utils.list_bucket(bucket_nyc)]) expected_bucket_files = set(['test-folder-fake/' + item for item in common.ALL_ACHILLES_INDEX_FILES]) self.assertSetEqual(expected_bucket_files, actual_bucket_files)
def setUp(self): super(AchillesTest, self).setUp() self.testbed = testbed.Testbed() self.testbed.activate() self.testbed.init_app_identity_stub() self.testbed.init_memcache_stub() self.testbed.init_urlfetch_stub() self.testbed.init_blobstore_stub() self.testbed.init_datastore_v3_stub() self.hpo_bucket = gcs_utils.get_hpo_bucket(test_util.FAKE_HPO_ID) test_util.empty_bucket(self.hpo_bucket)
def setUp(self): self.testbed = testbed.Testbed() self.testbed.activate() self.testbed.init_app_identity_stub() self.testbed.init_memcache_stub() self.testbed.init_urlfetch_stub() self.testbed.init_blobstore_stub() self.testbed.init_datastore_v3_stub() self.hpo_bucket = gcs_utils.get_hpo_bucket(test_util.FAKE_HPO_ID) test_util.empty_bucket(self.hpo_bucket) test_util.delete_all_tables(bq_utils.get_dataset_id())
def setUp(self): self.testbed = testbed.Testbed() self.testbed.activate() self.testbed.init_app_identity_stub() self.testbed.init_memcache_stub() self.testbed.init_urlfetch_stub() self.testbed.init_blobstore_stub() self.testbed.init_datastore_v3_stub() self.app_id = os.environ.get('APPLICATION_ID') self.dataset_id = os.environ.get('BIGQUERY_DATASET_ID') self.bucket = os.environ.get('DRC_BUCKET_NAME') test_util.empty_bucket(self.bucket) test_util.delete_all_tables(self.dataset_id) self.load_test_data(hpo_id=HPO_NYC)
def load_dataset_from_files(dataset_id, path, mappings=False): bucket = gcs_utils.get_hpo_bucket(test_util.FAKE_HPO_ID) test_util.empty_bucket(bucket) job_ids = [] for table in resources.CDM_TABLES: job_ids.append(CombineEhrRdrTest._upload_file_to_bucket(bucket, dataset_id, path, table)) if mappings and table in DOMAIN_TABLES: mapping_table = '_mapping_{table}'.format(table=table) job_ids.append(CombineEhrRdrTest._upload_file_to_bucket(bucket, dataset_id, path, mapping_table)) incomplete_jobs = bq_utils.wait_on_jobs(job_ids) if len(incomplete_jobs) > 0: message = "Job id(s) %s failed to complete" % incomplete_jobs raise RuntimeError(message) test_util.empty_bucket(bucket)
def _empty_hpo_buckets(self): for hpo_id in self.hpo_ids: bucket = gcs_utils.get_hpo_bucket(hpo_id) test_util.empty_bucket(bucket)
def tearDown(self): self._empty_bucket() bucket_nyc = gcs_utils.get_hpo_bucket('nyc') test_util.empty_bucket(bucket_nyc) test_util.empty_bucket(gcs_utils.get_drc_bucket()) self.testbed.deactivate()
def tearDown(self): test_util.empty_bucket(self.hpo_bucket) self.testbed.deactivate()
def tearDown(self): test_util.empty_bucket(self.bucket) test_util.delete_all_tables(self.dataset_id) self.testbed.deactivate()
def tearDown(self): test_util.delete_all_tables(bq_utils.get_dataset_id()) test_util.empty_bucket(self.hpo_bucket) self.testbed.deactivate()
def tearDown(self): self._empty_bucket() bucket_nyc = gcs_utils.get_hpo_bucket('nyc') test_util.empty_bucket(bucket_nyc)