def test_data_is_aggregated(self): # there are 11 rows in the input file; 2 are for the same # practice/presentation and should be collapsed, and 1 is for # an UNKNONWN SURGERY (see issue #349) raw_data_path = ("frontend/tests/fixtures/commands/" + "convert_hscic_prescribing/2016_01/" + "EPD_201601.csv") gcs_path = "hscic/prescribing_v2/2016_01/EPD_201601.csv" client = StorageClient() bucket = client.get_bucket() blob = bucket.blob(gcs_path) with open(raw_data_path, "rb") as f: blob.upload_from_file(f) call_command("convert_hscic_prescribing", filename=raw_data_path) # Test that data added to prescribing table client = BQClient() sql = """SELECT * FROM {hscic}.prescribing_v2 WHERE month = TIMESTAMP('2016-01-01')""" rows = list(results_to_dicts(client.query(sql))) self.assertEqual(len(rows), 9) for row in rows: if row["practice"] == "P92042" and row[ "bnf_code"] == "0202010B0AAABAB": self.assertEqual(row["quantity"], 1288)
def test_existing_files_deleted(self): # Create a dataset fragment which should end up being deleted client = StorageClient() bucket = client.get_bucket() blob_name = ('hscic/views/vw__presentation_summary_by_ccg' '-000000009999.csv.gz') blob = bucket.blob(blob_name) blob.upload_from_string("test", content_type="application/gzip") # Run import command call_command('create_views') # Check the bucket is no longer there client = StorageClient() bucket = client.get_bucket() prefix, suffix = blob_name.split('-') for blob in bucket.list_blobs(prefix=prefix): self.assertNotIn(suffix, blob.path)
def update_bnf_table(): """Update `bnf` table from cloud-stored CSV""" storage_client = StorageClient() bucket = storage_client.get_bucket() blobs = bucket.list_blobs(prefix="hscic/bnf_codes/") blobs = sorted(blobs, key=lambda blob: blob.name, reverse=True) blob = blobs[0] bq_client = BQClient("hscic") table = bq_client.get_table("bnf") table.insert_rows_from_storage(blob.name, skip_leading_rows=1)
def upload_task_input_files(task): storage_client = StorageClient() bucket = storage_client.get_bucket() for path in task.input_paths(): assert path[0] == '/' assert settings.PIPELINE_DATA_BASEDIR[-1] == '/' name = 'hscic' + path.replace(settings.PIPELINE_DATA_BASEDIR, '/') blob = bucket.blob(name) if blob.exists(): print("Skipping %s, already uploaded" % name) continue print("Uploading %s to %s" % (path, name)) with open(path) as f: blob.upload_from_file(f)
def test_data_is_aggregated(self): # there are 11 rows in the input file; 2 are for the same # practice/presentation and should be collapsed, and 1 is for # an UNKNONWN SURGERY (see issue #349) raw_data_path = 'frontend/tests/fixtures/commands/' +\ 'convert_hscic_prescribing/2016_01/' +\ 'Detailed_Prescribing_Information.csv' converted_data_path = 'frontend/tests/fixtures/commands/' +\ 'convert_hscic_prescribing/2016_01/' +\ 'Detailed_Prescribing_Information_formatted.CSV' gcs_path = 'hscic/prescribing/2016_01/' +\ 'Detailed_Prescribing_Information.csv' client = StorageClient() bucket = client.get_bucket() blob = bucket.blob(gcs_path) with open(raw_data_path) as f: blob.upload_from_file(f) call_command('convert_hscic_prescribing', filename=raw_data_path) # Test that data added to prescribing table client = BQClient() sql = '''SELECT * FROM {hscic}.prescribing WHERE month = TIMESTAMP('2016-01-01')''' rows = list(results_to_dicts(client.query(sql))) self.assertEqual(len(rows), 9) for row in rows: if row['practice'] == 'P92042' and \ row['bnf_code'] == '0202010B0AAABAB': self.assertEqual(row['quantity'], 1288) # Test that downloaded data is correct with open(converted_data_path) as f: rows = list(csv.reader(f)) self.assertEqual(len(rows), 9) for row in rows: if row[1] == 'P92042' and row[2] == '0202010B0AAABAB': self.assertEqual(row[6], '1288')