def handle(self, *args, **kwargs): base_path = os.path.join(settings.APPS_ROOT, 'frontend', 'management', 'commands', 'measure_sql') client = Client("measures") for table_name in [ 'opioid_total_ome', 'practice_data_all_low_priority', 'pregabalin_total_mg', 'vw__median_price_per_unit', 'vw__ghost_generic_measure', ]: path = os.path.join(base_path, table_name + '.sql') with open(path, "r") as sql_file: sql = sql_file.read() try: client.create_table_with_view(table_name, sql, False) except Conflict: client.delete_table(table_name) client.create_table_with_view(table_name, sql, False) # cmpa_products is a table that has been created and managed by Rich. schema = build_schema( ('bnf_code', 'STRING'), ('bnf_name', 'STRING'), ('type', 'STRING'), ) client.get_or_create_table('cmpa_products', schema)
def handle(self, *args, **kwargs): try: Client("hscic").create_storage_backed_table( "raw_prescribing", RAW_PRESCRIBING_SCHEMA, "hscic/prescribing/20*Detailed_Prescribing_Information.csv", ) except Conflict: pass client = Client("measures") for table_name in [ "dmd_objs_with_form_route", "opioid_total_ome", "practice_data_all_low_priority", "pregabalin_total_mg", "vw__median_price_per_unit", "vw__ghost_generic_measure", "vw__herbal_list", # This references pregabalin_total_mg, so must come afterwards "gaba_total_ddd", ]: self.recreate_table(client, table_name) self.recreate_table(Client("hscic"), "raw_prescribing_normalised") # cmpa_products is a table that has been created and managed by Rich. schema = build_schema(("bnf_code", "STRING"), ("bnf_name", "STRING"), ("type", "STRING")) client.get_or_create_table("cmpa_products", schema)
def handle(self, *args, **kwargs): base_path = os.path.join( settings.APPS_ROOT, "frontend", "management", "commands", "measure_sql" ) client = Client("measures") for table_name in [ "dmd_objs_with_form_route", "opioid_total_ome", "practice_data_all_low_priority", "pregabalin_total_mg", "vw__median_price_per_unit", "vw__ghost_generic_measure", # This references pregabalin_total_mg, so must come afterwards "gaba_total_ddd", ]: path = os.path.join(base_path, table_name + ".sql") with open(path, "r") as sql_file: sql = sql_file.read() try: client.create_table_with_view(table_name, sql, False) except Conflict: client.delete_table(table_name) client.create_table_with_view(table_name, sql, False) # cmpa_products is a table that has been created and managed by Rich. schema = build_schema( ("bnf_code", "STRING"), ("bnf_name", "STRING"), ("type", "STRING") ) client.get_or_create_table("cmpa_products", schema)
from google.cloud.exceptions import Conflict from frontend.models import Chemical from frontend.models import Presentation from frontend.models import Product from frontend.models import Section from gcutils.bigquery import Client, TableExporter, build_schema logger = logging.getLogger(__name__) BNF_MAP_SCHEMA = build_schema( ('former_bnf_code', 'STRING'), ('current_bnf_code', 'STRING'), ) def create_code_mapping(filenames): """Given a list of filenames containing tab-delimited old->new BNF code changes: * find the matching entity in our local database (e.g. Section or Presentation, etc); mark the old version as * no-longer-current add a reference (by BNF code) to its * replacement """ Presentation.objects.filter(replaced_by__isnull=False).delete() for f in filenames:
from gcutils.bigquery import build_schema DMD_SCHEMA = build_schema( ('dmdid', 'STRING'), ('bnf_code', 'STRING'), ('vpid', 'STRING'), ('display_name', 'STRING'), ('ema', 'STRING'), ('pres_statcd', 'STRING'), ('avail_restrictcd', 'STRING'), ('product_type', 'STRING'), ('non_availcd', 'STRING'), ('concept_class', 'STRING'), ('nurse_f', 'STRING'), ('dent_f', 'STRING'), ('prod_order_no', 'STRING'), ('sched_1', 'STRING'), ('sched_2', 'STRING'), ('padm', 'STRING'), ('fp10_mda', 'STRING'), ('acbs', 'STRING'), ('assort_flav', 'STRING'), ('catcd', 'STRING'), ('tariff_category', 'STRING'), ('flag_imported', 'STRING'), ('flag_broken_bulk', 'STRING'), ('flag_non_bioequivalence', 'STRING'), ('flag_special_containers', 'BOOLEAN')) CCG_SCHEMA = build_schema( ('code', 'STRING'), ('name', 'STRING'), ('ons_code', 'STRING'), ('org_type', 'STRING'), ('open_date', 'TIMESTAMP'), ('close_date', 'TIMESTAMP'), ('address', 'STRING'), ('postcode', 'STRING'), ) PRESCRIBING_SCHEMA = build_schema( ('sha', 'STRING'), ('pct', 'STRING'), ('practice', 'STRING'), ('bnf_code', 'STRING'),
def run_end_to_end(): print('BQ_NONCE: {}'.format(settings.BQ_NONCE)) call_command('migrate') path = os.path.join(settings.APPS_ROOT, 'frontend', 'management', 'commands', 'measure_definitions') # No MeasureGlobals or MeasureValues are generated for the ghost branded # generics measure, because both numerator and denominator are computed # from a view (vw__ghost_generic_measure) which has no data. Rather than # populate this view, it is simpler to pretend it doesn't exist. num_measures = len(os.listdir(path)) - 1 shutil.rmtree(settings.PIPELINE_DATA_BASEDIR, ignore_errors=True) with open(settings.PIPELINE_IMPORT_LOG_PATH, 'w') as f: f.write('{}') for blob in StorageClient().bucket().list_blobs(): blob.delete() for dataset_key in DATASETS: BQClient(dataset_key).create_dataset() client = BQClient('hscic') client.create_table('bnf', schemas.BNF_SCHEMA) client.create_table('ccgs', schemas.CCG_SCHEMA) client.create_table('ppu_savings', schemas.PPU_SAVING_SCHEMA) client.create_table( 'practice_statistics', schemas.PRACTICE_STATISTICS_SCHEMA ) client.create_table( 'practice_statistics_all_years', schemas.PRACTICE_STATISTICS_SCHEMA ) client.create_table('practices', schemas.PRACTICE_SCHEMA) client.create_table('prescribing', schemas.PRESCRIBING_SCHEMA) client.create_table('presentation', schemas.PRESENTATION_SCHEMA) client.create_table('tariff', schemas.TARIFF_SCHEMA) client.create_table('bdz_adq', schemas.BDZ_ADQ_SCHEMA) client = BQClient('measures') # This is enough of a schema to allow the practice_data_all_low_priority # table to be created, since it references these fields. Once populated by # import_measures, the tables in the measures dataset will have several # more fields. But we don't need to specify exactly what they are, as BQ # will work it out when the data is inserted with insert_rows_from_query. measures_schema = build_schema( ('month', 'DATE'), ('practice_id', 'STRING'), ('numerator', 'INTEGER'), ('denominator', 'INTEGER'), ) path = os.path.join(settings.APPS_ROOT, 'frontend', 'management', 'commands', 'measure_definitions', '*.json') for path in glob.glob(path): measure_id = os.path.splitext(os.path.basename(path))[0] client.create_table('practice_data_' + measure_id, measures_schema) client.create_table('ccg_data_' + measure_id, measures_schema) client.create_table('global_data_' + measure_id, measures_schema) # Although there are no model instances, we call upload_model to create the # tables in BQ that might be required by certain measure views. client = BQClient('dmd') client.upload_model(TariffPrice) client.upload_model(VMPP) call_command('generate_presentation_replacements') path = os.path.join(settings.APPS_ROOT, 'frontend', 'management', 'commands', 'replace_matviews.sql') with open(path) as f: with connection.cursor() as c: c.execute(f.read()) copy_tree( os.path.join(e2e_path, 'data-1'), os.path.join(e2e_path, 'data'), ) runner.run_all(2017, 9, under_test=True) # We expect one MeasureGlobal per measure per month. assert_count_equal(num_measures, MeasureGlobal) # We expect one MeasureValue for each organisation per measure per month # (There are 4 practices, 2 CCGs, 2 STPs, and 2 regional teams). assert_count_equal(10 * num_measures, MeasureValue) # We expect one statistic per CCG per month assert_raw_count_equal(2, 'vw__ccgstatistics') # We expect one chemical summary per CCG per month assert_raw_count_equal(2, 'vw__chemical_summary_by_ccg', "chemical_id = '1001030C0'") # We expect one chemical summary per practice per month assert_raw_count_equal(4, 'vw__chemical_summary_by_practice', "chemical_id = '1001030C0'") # We expect one summary per practice per month assert_raw_count_equal(4, 'vw__practice_summary') # We expect one presentation summary per month assert_raw_count_equal(1, 'vw__presentation_summary', "presentation_code = '1001030C0AAAAAA'") # We expect one presentation summary per CCG per month assert_raw_count_equal(2, 'vw__presentation_summary_by_ccg', "presentation_code = '1001030C0AAAAAA'") copy_tree( os.path.join(e2e_path, 'data-2'), os.path.join(e2e_path, 'data'), ) runner.run_all(2017, 10, under_test=True) # We expect one MeasureGlobal per measure per month assert_count_equal(2 * num_measures, MeasureGlobal) # We expect one MeasureValue for each organisation per measure per month assert_count_equal(20 * num_measures, MeasureValue) # We expect one statistic per CCG per month assert_raw_count_equal(4, 'vw__ccgstatistics') # We expect one chemical summary per CCG per month assert_raw_count_equal(4, 'vw__chemical_summary_by_ccg', "chemical_id = '1001030C0'") # We expect one chemical summary per practice per month assert_raw_count_equal(8, 'vw__chemical_summary_by_practice', "chemical_id = '1001030C0'") # We expect one summary per practice per month assert_raw_count_equal(8, 'vw__practice_summary') # We expect one presentation summary per month assert_raw_count_equal(2, 'vw__presentation_summary', "presentation_code = '1001030C0AAAAAA'") # We expect one presentation summary per CCG per month assert_raw_count_equal(4, 'vw__presentation_summary_by_ccg', "presentation_code = '1001030C0AAAAAA'")
def run_end_to_end(): print("BQ_NONCE: {}".format(settings.BQ_NONCE)) call_command("migrate") # No MeasureGlobals or MeasureValues are generated for the ghost branded # generics measure, because both numerator and denominator are computed # from a view (vw__ghost_generic_measure) which has no data. Rather than # populate this view, it is simpler to pretend it doesn't exist. num_measures = (len( glob.glob(os.path.join(settings.MEASURE_DEFINITIONS_PATH, "*.json"))) - 1) shutil.rmtree(settings.PIPELINE_DATA_BASEDIR, ignore_errors=True) with open(settings.PIPELINE_IMPORT_LOG_PATH, "w") as f: f.write("{}") for blob in StorageClient().bucket().list_blobs(): blob.delete() for dataset_key in DATASETS: BQClient(dataset_key).create_dataset() client = BQClient("hscic") client.create_table("bnf", schemas.BNF_SCHEMA) client.create_table("ccgs", schemas.CCG_SCHEMA) client.create_table("stps", schemas.STP_SCHEMA) client.create_table("regional_teams", schemas.REGIONAL_TEAM_SCHEMA) client.create_table("ppu_savings", schemas.PPU_SAVING_SCHEMA) client.create_table("practice_statistics", schemas.PRACTICE_STATISTICS_SCHEMA) client.create_table("practice_statistics_all_years", schemas.PRACTICE_STATISTICS_SCHEMA) client.create_table("practices", schemas.PRACTICE_SCHEMA) client.create_table("prescribing", schemas.PRESCRIBING_SCHEMA) client.create_table("presentation", schemas.PRESENTATION_SCHEMA) client.create_table("tariff", schemas.TARIFF_SCHEMA) client.create_table("bdz_adq", schemas.BDZ_ADQ_SCHEMA) client = BQClient("measures") # This is enough of a schema to allow the practice_data_all_low_priority # table to be created, since it references these fields. Once populated by # import_measures, the tables in the measures dataset will have several # more fields. But we don't need to specify exactly what they are, as BQ # will work it out when the data is inserted with insert_rows_from_query. measures_schema = build_schema( ("month", "DATE"), ("practice_id", "STRING"), ("numerator", "INTEGER"), ("denominator", "INTEGER"), ) for path in glob.glob( os.path.join(settings.MEASURE_DEFINITIONS_PATH, "*.json")): measure_id = os.path.splitext(os.path.basename(path))[0] client.create_table("practice_data_" + measure_id, measures_schema) client.create_table("ccg_data_" + measure_id, measures_schema) client.create_table("global_data_" + measure_id, measures_schema) # Although there are no model instances, we call upload_model to create the # dm+d tables in BQ that are required by certain measure views. client = BQClient("dmd") for model in apps.get_app_config("dmd2").get_models(): client.upload_model(model) call_command("generate_presentation_replacements") copy_tree(os.path.join(e2e_path, "data-1"), os.path.join(e2e_path, "data")) runner.run_all(2017, 9, under_test=True) # We expect one MeasureGlobal per measure per month. assert_count_equal(num_measures, MeasureGlobal) # We expect one MeasureValue for each organisation per measure per month # (There are 4 practices, 2 CCGs, 2 STPs, and 2 regional teams). assert_count_equal(10 * num_measures, MeasureValue) copy_tree(os.path.join(e2e_path, "data-2"), os.path.join(e2e_path, "data")) runner.run_all(2017, 10, under_test=True) # We expect one MeasureGlobal per measure per month assert_count_equal(2 * num_measures, MeasureGlobal) # We expect one MeasureValue for each organisation per measure per month assert_count_equal(20 * num_measures, MeasureValue)
def test_the_lot(self): client = Client('test') schema = build_schema( ('a', 'INTEGER'), ('b', 'STRING'), ) headers = ['a', 'b'] rows = [ (1, 'apple'), (2, 'banana'), (3, 'coconut'), ] t1 = client.get_or_create_table('t1', schema) t1_qname = t1.qualified_name # Test Table.insert_rows_from_csv t1.insert_rows_from_csv('gcutils/tests/test_table.csv') self.assertEqual(sorted(t1.get_rows()), rows) # Test Table.insert_rows_from_query t2 = client.get_table('t2') sql = 'SELECT * FROM {} WHERE a > 1'.format(t1_qname) t2.insert_rows_from_query(sql) self.assertEqual(sorted(t2.get_rows()), rows[1:]) # Test Client.query sql = 'SELECT * FROM {} WHERE a > 2'.format(t1_qname) results = client.query(sql) self.assertEqual(sorted(results.rows), rows[2:]) # Test Client.query_into_dataframe sql = 'SELECT * FROM {} WHERE a > 2'.format(t1_qname) df = client.query_into_dataframe(sql) self.assertEqual(df.values.tolist(), [list(rows[2])]) # Test TableExporter.export_to_storage and # TableExporter.download_from_storage_and_unzip t1_exporter = TableExporter(t1, self.storage_prefix + 'test_table-') t1_exporter.export_to_storage() with tempfile.NamedTemporaryFile(mode='r+') as f: t1_exporter.download_from_storage_and_unzip(f) f.seek(0) reader = csv.reader(f) data = [reader.next()] + sorted(reader) self.assertEqual(data, [map(str, row) for row in [headers] + rows]) # Test Table.insert_rows_from_storage storage_path = self.storage_prefix + 'test_table.csv' self.upload_to_storage('gcutils/tests/test_table.csv', storage_path) t2.insert_rows_from_storage(storage_path) self.assertEqual(sorted(t2.get_rows()), rows) # Test Client.create_storage_backed_table storage_path = self.storage_prefix + 'test_table_headers.csv' self.upload_to_storage( 'gcutils/tests/test_table_headers.csv', storage_path ) schema = [ {'name': 'a', 'type': 'integer'}, {'name': 'b', 'type': 'string'}, ] t3 = client.create_storage_backed_table( 't3', schema, storage_path ) results = client.query('SELECT * FROM {}'.format(t3.qualified_name)) self.assertEqual(sorted(results.rows), rows) self.upload_to_storage( 'gcutils/tests/test_table_headers_2.csv', storage_path ) results = client.query('SELECT * FROM {}'.format(t3.qualified_name)) self.assertEqual(sorted(results.rows), rows + [(4, u'damson')]) # Test Client.create_table_with_view sql = 'SELECT * FROM {{project}}.{} WHERE a > 1'.format(t1_qname) t4 = client.create_table_with_view('t4', sql, False) results = client.query('SELECT * FROM {}'.format(t4.qualified_name)) self.assertEqual(sorted(results.rows), rows[1:]) # Test Client.insert_rows_from_pg PCT.objects.create(code='ABC', name='CCG 1') PCT.objects.create(code='XYZ', name='CCG 2') def transformer(row): return [ord(row[0][0]), row[1]] t1.insert_rows_from_pg(PCT, ['code', 'name'], transformer) self.assertEqual(sorted(t1.get_rows()), [(65, 'CCG 1'), (88, 'CCG 2')]) # Test Table.delete_all_rows t1.delete_all_rows() self.assertEqual(list(t1.get_rows()), [])
from gcutils.bigquery import build_schema DMD_SCHEMA = build_schema( ("dmdid", "STRING"), ("bnf_code", "STRING"), ("vpid", "STRING"), ("display_name", "STRING"), ("ema", "STRING"), ("pres_statcd", "STRING"), ("avail_restrictcd", "STRING"), ("product_type", "STRING"), ("non_availcd", "STRING"), ("concept_class", "STRING"), ("nurse_f", "STRING"), ("dent_f", "STRING"), ("prod_order_no", "STRING"), ("sched_1", "STRING"), ("sched_2", "STRING"), ("padm", "STRING"), ("fp10_mda", "STRING"), ("acbs", "STRING"), ("assort_flav", "STRING"), ("catcd", "STRING"), ("tariff_category", "STRING"), ("flag_imported", "STRING"), ("flag_broken_bulk", "STRING"), ("flag_non_bioequivalence", "STRING"), ("flag_special_containers", "BOOLEAN"), ) CCG_SCHEMA = build_schema(
def test_the_lot(self): client = Client("test") archive_client = Client("archive") orig_schema = build_schema(("a", "STRING"), ("b", "INTEGER")) schema = build_schema(("a", "INTEGER"), ("b", "STRING")) headers = ["a", "b"] rows = [(1, "apple"), (2, "banana"), (3, "coconut")] t1 = client.get_or_create_table("t1", orig_schema) t1_qname = t1.qualified_name # Test Table.insert_rows_from_csv t1.insert_rows_from_csv("gcutils/tests/test_table.csv", schema) self.assertEqual(sorted(t1.get_rows()), rows) # Test Table.insert_rows_from_query t2 = client.get_table("t2") sql = "SELECT * FROM {} WHERE a > 1".format(t1_qname) t2.insert_rows_from_query(sql) self.assertEqual(sorted(t2.get_rows()), rows[1:]) # Test Client.query sql = "SELECT * FROM {} WHERE a > 2".format(t1_qname) results = client.query(sql) self.assertEqual(sorted(results.rows), rows[2:]) # Test Client.query_into_dataframe sql = "SELECT * FROM {} WHERE a > 2".format(t1_qname) df = client.query_into_dataframe(sql) self.assertEqual(df.values.tolist(), [list(rows[2])]) # Test TableExporter.export_to_storage and # TableExporter.download_from_storage_and_unzip t1_exporter = TableExporter(t1, self.storage_prefix + "test_table-") t1_exporter.export_to_storage() with tempfile.NamedTemporaryFile(mode="r+") as f: t1_exporter.download_from_storage_and_unzip(f) f.seek(0) reader = csv.reader(f) data = [next(reader)] + sorted(reader) self.assertEqual(data, [list(map(str, row)) for row in [headers] + rows]) # Test Table.insert_rows_from_storage storage_path = self.storage_prefix + "test_table.csv" self.upload_to_storage("gcutils/tests/test_table.csv", storage_path) t2.insert_rows_from_storage(storage_path) self.assertEqual(sorted(t2.get_rows()), rows) # Test Client.create_storage_backed_table storage_path = self.storage_prefix + "test_table_headers.csv" self.upload_to_storage("gcutils/tests/test_table_headers.csv", storage_path) schema = build_schema(("a", "INTEGER"), ("b", "STRING")) t3 = client.create_storage_backed_table("t3", schema, storage_path) results = client.query("SELECT * FROM {}".format(t3.qualified_name)) self.assertEqual(sorted(results.rows), rows) self.upload_to_storage("gcutils/tests/test_table_headers_2.csv", storage_path) results = client.query("SELECT * FROM {}".format(t3.qualified_name)) self.assertEqual(sorted(results.rows), rows + [(4, "damson")]) # Test Client.create_table_with_view sql = "SELECT * FROM {{project}}.{} WHERE a > 1".format(t1_qname) t4 = client.create_table_with_view("t4", sql, False) results = client.query("SELECT * FROM {}".format(t4.qualified_name)) self.assertEqual(sorted(results.rows), rows[1:]) # Test Table.copy_to_new_dataset t1.copy_to_new_dataset("archive") t1_archived = archive_client.get_table("t1") self.assertEqual(sorted(t1_archived.get_rows()), rows) self.assertEqual(sorted(t1.get_rows()), rows) # Test Table.move_to_new_dataset t2.move_to_new_dataset("archive") t2_archived = archive_client.get_table("t2") self.assertEqual(sorted(t2_archived.get_rows()), rows) with self.assertRaises(NotFound): list(t2.get_rows()) # Test Client.insert_rows_from_pg PCT.objects.create(code="ABC", name="CCG 1") PCT.objects.create(code="XYZ", name="CCG 2") def transformer(row): return [ord(row[0][0]), row[1]] t1.insert_rows_from_pg( PCT, build_schema(("code", "INTEGER"), ("name", "STRING")), transformer=transformer, ) self.assertEqual(sorted(t1.get_rows()), [(65, "CCG 1"), (88, "CCG 2")]) # Test Table.delete_all_rows t1.delete_all_rows() self.assertEqual(list(t1.get_rows()), [])
def run_end_to_end(): print('BQ_NONCE: {}'.format(settings.BQ_NONCE)) num_measures = 57 shutil.rmtree(settings.PIPELINE_DATA_BASEDIR, ignore_errors=True) with open(settings.PIPELINE_IMPORT_LOG_PATH, 'w') as f: f.write('{}') for blob in StorageClient().bucket().list_blobs(): blob.delete() for dataset_key in DATASETS: BQClient(dataset_key).create_dataset() client = BQClient('hscic') client.create_table('bnf', schemas.BNF_SCHEMA) client.create_table('ccgs', schemas.CCG_SCHEMA) client.create_table('ppu_savings', schemas.PPU_SAVING_SCHEMA) client.create_table('practice_statistics', schemas.PRACTICE_STATISTICS_SCHEMA) client.create_table('practice_statistics_all_years', schemas.PRACTICE_STATISTICS_SCHEMA) client.create_table('practices', schemas.PRACTICE_SCHEMA) client.create_table('prescribing', schemas.PRESCRIBING_SCHEMA) client.create_table('presentation', schemas.PRESENTATION_SCHEMA) client.create_table('tariff', schemas.TARIFF_SCHEMA) client = BQClient('measures') # This is enough of a schema to allow the practice_data_all_low_priority # table to be created, since it references these fields. Once populated by # import_measures, the tables in the measures dataset will have several # more fields. But we don't need to specify exactly what they are, as BQ # will work it out when the data is inserted with insert_rows_from_query. measures_schema = build_schema( ('month', 'DATE'), ('practice_id', 'STRING'), ('numerator', 'INTEGER'), ('denominator', 'INTEGER'), ) path = os.path.join(settings.SITE_ROOT, 'frontend', 'management', 'commands', 'measure_definitions', '*.json') for path in glob.glob(path): measure_id = os.path.splitext(os.path.basename(path))[0] client.create_table('practice_data_' + measure_id, measures_schema) client.create_table('ccg_data_' + measure_id, measures_schema) client.create_table('global_data_' + measure_id, measures_schema) call_command('generate_presentation_replacements') path = os.path.join(settings.SITE_ROOT, 'frontend', 'management', 'commands', 'replace_matviews.sql') with open(path) as f: with connection.cursor() as c: c.execute(f.read()) copy_tree( os.path.join(e2e_path, 'data-1'), os.path.join(e2e_path, 'data'), ) runner.run_all(2017, 9, under_test=True) # We expect one MeasureGlobal per measure per month. If this assert fails, # check that num_measures is still correct. assert_count_equal(num_measures, MeasureGlobal) # We expect one MeasureValue for each CCG or Practice per measure per month assert_count_equal(6 * num_measures, MeasureValue) # We expect one statistic per CCG per month assert_raw_count_equal(2, 'vw__ccgstatistics') # We expect one chemical summary per CCG per month assert_raw_count_equal(2, 'vw__chemical_summary_by_ccg', "chemical_id = '1001030C0'") # We expect one chemical summary per practice per month assert_raw_count_equal(4, 'vw__chemical_summary_by_practice', "chemical_id = '1001030C0'") # We expect one summary per practice per month assert_raw_count_equal(4, 'vw__practice_summary') # We expect one presentation summary per month assert_raw_count_equal(1, 'vw__presentation_summary', "presentation_code = '1001030C0AAAAAA'") # We expect one presentation summary per CCG per month assert_raw_count_equal(2, 'vw__presentation_summary_by_ccg', "presentation_code = '1001030C0AAAAAA'") copy_tree( os.path.join(e2e_path, 'data-2'), os.path.join(e2e_path, 'data'), ) runner.run_all(2017, 10, under_test=True) # We expect one MeasureGlobal per measure per month assert_count_equal(2 * num_measures, MeasureGlobal) # We expect one MeasureValue for each CCG or Practice per measure per month assert_count_equal(12 * num_measures, MeasureValue) # We expect one statistic per CCG per month assert_raw_count_equal(4, 'vw__ccgstatistics') # We expect one chemical summary per CCG per month assert_raw_count_equal(4, 'vw__chemical_summary_by_ccg', "chemical_id = '1001030C0'") # We expect one chemical summary per practice per month assert_raw_count_equal(8, 'vw__chemical_summary_by_practice', "chemical_id = '1001030C0'") # We expect one summary per practice per month assert_raw_count_equal(8, 'vw__practice_summary') # We expect one presentation summary per month assert_raw_count_equal(2, 'vw__presentation_summary', "presentation_code = '1001030C0AAAAAA'") # We expect one presentation summary per CCG per month assert_raw_count_equal(4, 'vw__presentation_summary_by_ccg', "presentation_code = '1001030C0AAAAAA'")
import csv import tempfile import requests from bs4 import BeautifulSoup from django.core.management import BaseCommand from gcutils.bigquery import Client, build_schema from google.cloud.exceptions import Conflict SCHEMA = build_schema( ("year_month", "DATE"), ("ods_code", "STRING"), ("vmp_snomed_code", "STRING"), ("vmp_product_name", "STRING"), ("unit_of_measure_identifier", "STRING"), ("unit_of_measure_name", "STRING"), ("total_quanity_in_vmp_unit", "FLOAT"), ) class Command(BaseCommand): help = "Import any SCMD datasets missing from our BigQuery dataset" def ensure_dataset_exists(self, client): try: client.create_dataset() except Conflict: pass def handle(self, *args, **kwargs): s = requests.Session()