def load_dataset_from_files(dataset_id, path):
     app_id = bq_utils.app_identity.get_application_id()
     bucket = gcs_utils.get_hpo_bucket(test_util.FAKE_HPO_ID)
     test_util.empty_bucket(bucket)
     job_ids = []
     for table in common.CDM_TABLES:
         filename = table + '.csv'
         schema = os.path.join(resources.fields_path, table + '.json')
         f = os.path.join(path, filename)
         if os.path.exists(os.path.join(path, filename)):
             with open(f, 'r') as fp:
                 gcs_utils.upload_object(bucket, filename, fp)
         else:
             test_util.write_cloud_str(bucket, filename, '\n')
         gcs_path = 'gs://{bucket}/{filename}'.format(bucket=bucket,
                                                      filename=filename)
         load_results = bq_utils.load_csv(schema,
                                          gcs_path,
                                          app_id,
                                          dataset_id,
                                          table,
                                          allow_jagged_rows=True)
         load_job_id = load_results['jobReference']['jobId']
         job_ids.append(load_job_id)
     incomplete_jobs = bq_utils.wait_on_jobs(job_ids)
     if len(incomplete_jobs) > 0:
         message = "Job id(s) %s failed to complete" % incomplete_jobs
         raise RuntimeError(message)
     test_util.empty_bucket(bucket)
示例#2
0
    def test_target_bucket_upload(self):
        bucket_nyc = gcs_utils.get_hpo_bucket('nyc')
        folder_prefix = 'test-folder-fake/'
        test_util.empty_bucket(bucket_nyc)

        main._upload_achilles_files(hpo_id=None, folder_prefix=folder_prefix, target_bucket=bucket_nyc)
        actual_bucket_files = set([item['name'] for item in gcs_utils.list_bucket(bucket_nyc)])
        expected_bucket_files = set(['test-folder-fake/' + item for item in common.ALL_ACHILLES_INDEX_FILES])
        self.assertSetEqual(expected_bucket_files, actual_bucket_files)
示例#3
0
 def setUp(self):
     super(AchillesTest, self).setUp()
     self.testbed = testbed.Testbed()
     self.testbed.activate()
     self.testbed.init_app_identity_stub()
     self.testbed.init_memcache_stub()
     self.testbed.init_urlfetch_stub()
     self.testbed.init_blobstore_stub()
     self.testbed.init_datastore_v3_stub()
     self.hpo_bucket = gcs_utils.get_hpo_bucket(test_util.FAKE_HPO_ID)
     test_util.empty_bucket(self.hpo_bucket)
示例#4
0
 def setUp(self):
     self.testbed = testbed.Testbed()
     self.testbed.activate()
     self.testbed.init_app_identity_stub()
     self.testbed.init_memcache_stub()
     self.testbed.init_urlfetch_stub()
     self.testbed.init_blobstore_stub()
     self.testbed.init_datastore_v3_stub()
     self.hpo_bucket = gcs_utils.get_hpo_bucket(test_util.FAKE_HPO_ID)
     test_util.empty_bucket(self.hpo_bucket)
     test_util.delete_all_tables(bq_utils.get_dataset_id())
 def setUp(self):
     self.testbed = testbed.Testbed()
     self.testbed.activate()
     self.testbed.init_app_identity_stub()
     self.testbed.init_memcache_stub()
     self.testbed.init_urlfetch_stub()
     self.testbed.init_blobstore_stub()
     self.testbed.init_datastore_v3_stub()
     self.app_id = os.environ.get('APPLICATION_ID')
     self.dataset_id = os.environ.get('BIGQUERY_DATASET_ID')
     self.bucket = os.environ.get('DRC_BUCKET_NAME')
     test_util.empty_bucket(self.bucket)
     test_util.delete_all_tables(self.dataset_id)
     self.load_test_data(hpo_id=HPO_NYC)
示例#6
0
 def load_dataset_from_files(dataset_id, path, mappings=False):
     bucket = gcs_utils.get_hpo_bucket(test_util.FAKE_HPO_ID)
     test_util.empty_bucket(bucket)
     job_ids = []
     for table in resources.CDM_TABLES:
         job_ids.append(CombineEhrRdrTest._upload_file_to_bucket(bucket, dataset_id, path, table))
         if mappings and table in DOMAIN_TABLES:
             mapping_table = '_mapping_{table}'.format(table=table)
             job_ids.append(CombineEhrRdrTest._upload_file_to_bucket(bucket, dataset_id, path, mapping_table))
     incomplete_jobs = bq_utils.wait_on_jobs(job_ids)
     if len(incomplete_jobs) > 0:
         message = "Job id(s) %s failed to complete" % incomplete_jobs
         raise RuntimeError(message)
     test_util.empty_bucket(bucket)
示例#7
0
 def _empty_hpo_buckets(self):
     for hpo_id in self.hpo_ids:
         bucket = gcs_utils.get_hpo_bucket(hpo_id)
         test_util.empty_bucket(bucket)
示例#8
0
 def tearDown(self):
     self._empty_bucket()
     bucket_nyc = gcs_utils.get_hpo_bucket('nyc')
     test_util.empty_bucket(bucket_nyc)
     test_util.empty_bucket(gcs_utils.get_drc_bucket())
     self.testbed.deactivate()
示例#9
0
 def tearDown(self):
     test_util.empty_bucket(self.hpo_bucket)
     self.testbed.deactivate()
 def tearDown(self):
     test_util.empty_bucket(self.bucket)
     test_util.delete_all_tables(self.dataset_id)
     self.testbed.deactivate()
示例#11
0
 def tearDown(self):
     test_util.delete_all_tables(bq_utils.get_dataset_id())
     test_util.empty_bucket(self.hpo_bucket)
     self.testbed.deactivate()
示例#12
0
 def tearDown(self):
     self._empty_bucket()
     bucket_nyc = gcs_utils.get_hpo_bucket('nyc')
     test_util.empty_bucket(bucket_nyc)