Python build_schema示例，gcutils.bigquery.build_schema Python示例

示例#1

0

显示文件

    def handle(self, *args, **kwargs):
        base_path = os.path.join(settings.APPS_ROOT, 'frontend', 'management',
                                 'commands', 'measure_sql')

        client = Client("measures")

        for table_name in [
            'opioid_total_ome',
            'practice_data_all_low_priority',
            'pregabalin_total_mg',
            'vw__median_price_per_unit',
            'vw__ghost_generic_measure',
        ]:
            path = os.path.join(base_path, table_name + '.sql')
            with open(path, "r") as sql_file:
                sql = sql_file.read()

            try:
                client.create_table_with_view(table_name, sql, False)
            except Conflict:
                client.delete_table(table_name)
                client.create_table_with_view(table_name, sql, False)

        # cmpa_products is a table that has been created and managed by Rich.
        schema = build_schema(
            ('bnf_code', 'STRING'),
            ('bnf_name', 'STRING'),
            ('type', 'STRING'),
        )
        client.get_or_create_table('cmpa_products', schema)

示例#2

0

显示文件

文件： create_bq_measure_views.py 项目： WeakLemonDrink/openprescribing

    def handle(self, *args, **kwargs):
        try:
            Client("hscic").create_storage_backed_table(
                "raw_prescribing",
                RAW_PRESCRIBING_SCHEMA,
                "hscic/prescribing/20*Detailed_Prescribing_Information.csv",
            )
        except Conflict:
            pass

        client = Client("measures")

        for table_name in [
                "dmd_objs_with_form_route",
                "opioid_total_ome",
                "practice_data_all_low_priority",
                "pregabalin_total_mg",
                "vw__median_price_per_unit",
                "vw__ghost_generic_measure",
                "vw__herbal_list",
                # This references pregabalin_total_mg, so must come afterwards
                "gaba_total_ddd",
        ]:
            self.recreate_table(client, table_name)

        self.recreate_table(Client("hscic"), "raw_prescribing_normalised")

        # cmpa_products is a table that has been created and managed by Rich.
        schema = build_schema(("bnf_code", "STRING"), ("bnf_name", "STRING"),
                              ("type", "STRING"))
        client.get_or_create_table("cmpa_products", schema)

示例#3

0

显示文件

文件： create_bq_measure_views.py 项目： maykonmenezes/openprescribing

    def handle(self, *args, **kwargs):
        base_path = os.path.join(
            settings.APPS_ROOT, "frontend", "management", "commands", "measure_sql"
        )

        client = Client("measures")

        for table_name in [
            "dmd_objs_with_form_route",
            "opioid_total_ome",
            "practice_data_all_low_priority",
            "pregabalin_total_mg",
            "vw__median_price_per_unit",
            "vw__ghost_generic_measure",
            # This references pregabalin_total_mg, so must come afterwards
            "gaba_total_ddd",
        ]:
            path = os.path.join(base_path, table_name + ".sql")
            with open(path, "r") as sql_file:
                sql = sql_file.read()

            try:
                client.create_table_with_view(table_name, sql, False)
            except Conflict:
                client.delete_table(table_name)
                client.create_table_with_view(table_name, sql, False)

        # cmpa_products is a table that has been created and managed by Rich.
        schema = build_schema(
            ("bnf_code", "STRING"), ("bnf_name", "STRING"), ("type", "STRING")
        )
        client.get_or_create_table("cmpa_products", schema)

示例#4

0

显示文件

from google.cloud.exceptions import Conflict

from frontend.models import Chemical
from frontend.models import Presentation
from frontend.models import Product
from frontend.models import Section

from gcutils.bigquery import Client, TableExporter, build_schema


logger = logging.getLogger(__name__)


BNF_MAP_SCHEMA = build_schema(
    ('former_bnf_code', 'STRING'),
    ('current_bnf_code', 'STRING'),
)


def create_code_mapping(filenames):
    """Given a list of filenames containing tab-delimited old->new BNF
    code changes:

      * find the matching entity in our local database (e.g. Section
        or Presentation, etc); mark the old version as
      * no-longer-current add a reference (by BNF code) to its
      * replacement

    """
    Presentation.objects.filter(replaced_by__isnull=False).delete()
    for f in filenames:

示例#5

0

显示文件

文件： bq_schemas.py 项目： rebkwok/openprescribing

from gcutils.bigquery import build_schema

DMD_SCHEMA = build_schema(
    ('dmdid', 'STRING'), ('bnf_code', 'STRING'), ('vpid', 'STRING'),
    ('display_name', 'STRING'), ('ema', 'STRING'), ('pres_statcd', 'STRING'),
    ('avail_restrictcd', 'STRING'), ('product_type', 'STRING'),
    ('non_availcd', 'STRING'), ('concept_class', 'STRING'),
    ('nurse_f', 'STRING'), ('dent_f', 'STRING'), ('prod_order_no', 'STRING'),
    ('sched_1', 'STRING'), ('sched_2', 'STRING'), ('padm', 'STRING'),
    ('fp10_mda', 'STRING'), ('acbs', 'STRING'), ('assort_flav', 'STRING'),
    ('catcd', 'STRING'), ('tariff_category', 'STRING'),
    ('flag_imported', 'STRING'), ('flag_broken_bulk', 'STRING'),
    ('flag_non_bioequivalence', 'STRING'),
    ('flag_special_containers', 'BOOLEAN'))

CCG_SCHEMA = build_schema(
    ('code', 'STRING'),
    ('name', 'STRING'),
    ('ons_code', 'STRING'),
    ('org_type', 'STRING'),
    ('open_date', 'TIMESTAMP'),
    ('close_date', 'TIMESTAMP'),
    ('address', 'STRING'),
    ('postcode', 'STRING'),
)

PRESCRIBING_SCHEMA = build_schema(
    ('sha', 'STRING'),
    ('pct', 'STRING'),
    ('practice', 'STRING'),
    ('bnf_code', 'STRING'),

示例#6

0

显示文件

def run_end_to_end():
    print('BQ_NONCE: {}'.format(settings.BQ_NONCE))

    call_command('migrate')

    path = os.path.join(settings.APPS_ROOT, 'frontend', 'management',
                        'commands', 'measure_definitions')

    # No MeasureGlobals or MeasureValues are generated for the ghost branded
    # generics measure, because both numerator and denominator are computed
    # from a view (vw__ghost_generic_measure) which has no data.  Rather than
    # populate this view, it is simpler to pretend it doesn't exist.
    num_measures = len(os.listdir(path)) - 1

    shutil.rmtree(settings.PIPELINE_DATA_BASEDIR, ignore_errors=True)

    with open(settings.PIPELINE_IMPORT_LOG_PATH, 'w') as f:
        f.write('{}')

    for blob in StorageClient().bucket().list_blobs():
        blob.delete()

    for dataset_key in DATASETS:
        BQClient(dataset_key).create_dataset()

    client = BQClient('hscic')
    client.create_table('bnf', schemas.BNF_SCHEMA)
    client.create_table('ccgs', schemas.CCG_SCHEMA)
    client.create_table('ppu_savings', schemas.PPU_SAVING_SCHEMA)
    client.create_table(
        'practice_statistics',
        schemas.PRACTICE_STATISTICS_SCHEMA
    )
    client.create_table(
        'practice_statistics_all_years',
        schemas.PRACTICE_STATISTICS_SCHEMA
    )
    client.create_table('practices', schemas.PRACTICE_SCHEMA)
    client.create_table('prescribing', schemas.PRESCRIBING_SCHEMA)
    client.create_table('presentation', schemas.PRESENTATION_SCHEMA)
    client.create_table('tariff', schemas.TARIFF_SCHEMA)
    client.create_table('bdz_adq', schemas.BDZ_ADQ_SCHEMA)

    client = BQClient('measures')
    # This is enough of a schema to allow the practice_data_all_low_priority
    # table to be created, since it references these fields.  Once populated by
    # import_measures, the tables in the measures dataset will have several
    # more fields.  But we don't need to specify exactly what they are, as BQ
    # will work it out when the data is inserted with insert_rows_from_query.
    measures_schema = build_schema(
        ('month', 'DATE'),
        ('practice_id', 'STRING'),
        ('numerator', 'INTEGER'),
        ('denominator', 'INTEGER'),
    )
    path = os.path.join(settings.APPS_ROOT, 'frontend', 'management',
                        'commands', 'measure_definitions', '*.json')
    for path in glob.glob(path):
        measure_id = os.path.splitext(os.path.basename(path))[0]
        client.create_table('practice_data_' + measure_id, measures_schema)
        client.create_table('ccg_data_' + measure_id, measures_schema)
        client.create_table('global_data_' + measure_id, measures_schema)

    # Although there are no model instances, we call upload_model to create the
    # tables in BQ that might be required by certain measure views.
    client = BQClient('dmd')
    client.upload_model(TariffPrice)
    client.upload_model(VMPP)

    call_command('generate_presentation_replacements')

    path = os.path.join(settings.APPS_ROOT, 'frontend', 'management',
                        'commands', 'replace_matviews.sql')
    with open(path) as f:
        with connection.cursor() as c:
            c.execute(f.read())

    copy_tree(
        os.path.join(e2e_path, 'data-1'),
        os.path.join(e2e_path, 'data'),
    )

    runner.run_all(2017, 9, under_test=True)

    # We expect one MeasureGlobal per measure per month.
    assert_count_equal(num_measures, MeasureGlobal)

    # We expect one MeasureValue for each organisation per measure per month
    # (There are 4 practices, 2 CCGs, 2 STPs, and 2 regional teams).
    assert_count_equal(10 * num_measures, MeasureValue)

    # We expect one statistic per CCG per month
    assert_raw_count_equal(2, 'vw__ccgstatistics')

    # We expect one chemical summary per CCG per month
    assert_raw_count_equal(2, 'vw__chemical_summary_by_ccg',
                           "chemical_id = '1001030C0'")

    # We expect one chemical summary per practice per month
    assert_raw_count_equal(4, 'vw__chemical_summary_by_practice',
                           "chemical_id = '1001030C0'")

    # We expect one summary per practice per month
    assert_raw_count_equal(4, 'vw__practice_summary')

    # We expect one presentation summary per month
    assert_raw_count_equal(1, 'vw__presentation_summary',
                           "presentation_code = '1001030C0AAAAAA'")

    # We expect one presentation summary per CCG per month
    assert_raw_count_equal(2, 'vw__presentation_summary_by_ccg',
                           "presentation_code = '1001030C0AAAAAA'")

    copy_tree(
        os.path.join(e2e_path, 'data-2'),
        os.path.join(e2e_path, 'data'),
    )

    runner.run_all(2017, 10, under_test=True)

    # We expect one MeasureGlobal per measure per month
    assert_count_equal(2 * num_measures, MeasureGlobal)

    # We expect one MeasureValue for each organisation per measure per month
    assert_count_equal(20 * num_measures, MeasureValue)

    # We expect one statistic per CCG per month
    assert_raw_count_equal(4, 'vw__ccgstatistics')

    # We expect one chemical summary per CCG per month
    assert_raw_count_equal(4, 'vw__chemical_summary_by_ccg',
                           "chemical_id = '1001030C0'")

    # We expect one chemical summary per practice per month
    assert_raw_count_equal(8, 'vw__chemical_summary_by_practice',
                           "chemical_id = '1001030C0'")

    # We expect one summary per practice per month
    assert_raw_count_equal(8, 'vw__practice_summary')

    # We expect one presentation summary per month
    assert_raw_count_equal(2, 'vw__presentation_summary',
                           "presentation_code = '1001030C0AAAAAA'")

    # We expect one presentation summary per CCG per month
    assert_raw_count_equal(4, 'vw__presentation_summary_by_ccg',
                           "presentation_code = '1001030C0AAAAAA'")

示例#7

0

显示文件

文件： run_pipeline_e2e_tests.py 项目： maykonmenezes/openprescribing

def run_end_to_end():
    print("BQ_NONCE: {}".format(settings.BQ_NONCE))

    call_command("migrate")

    # No MeasureGlobals or MeasureValues are generated for the ghost branded
    # generics measure, because both numerator and denominator are computed
    # from a view (vw__ghost_generic_measure) which has no data.  Rather than
    # populate this view, it is simpler to pretend it doesn't exist.
    num_measures = (len(
        glob.glob(os.path.join(settings.MEASURE_DEFINITIONS_PATH, "*.json"))) -
                    1)

    shutil.rmtree(settings.PIPELINE_DATA_BASEDIR, ignore_errors=True)

    with open(settings.PIPELINE_IMPORT_LOG_PATH, "w") as f:
        f.write("{}")

    for blob in StorageClient().bucket().list_blobs():
        blob.delete()

    for dataset_key in DATASETS:
        BQClient(dataset_key).create_dataset()

    client = BQClient("hscic")
    client.create_table("bnf", schemas.BNF_SCHEMA)
    client.create_table("ccgs", schemas.CCG_SCHEMA)
    client.create_table("stps", schemas.STP_SCHEMA)
    client.create_table("regional_teams", schemas.REGIONAL_TEAM_SCHEMA)
    client.create_table("ppu_savings", schemas.PPU_SAVING_SCHEMA)
    client.create_table("practice_statistics",
                        schemas.PRACTICE_STATISTICS_SCHEMA)
    client.create_table("practice_statistics_all_years",
                        schemas.PRACTICE_STATISTICS_SCHEMA)
    client.create_table("practices", schemas.PRACTICE_SCHEMA)
    client.create_table("prescribing", schemas.PRESCRIBING_SCHEMA)
    client.create_table("presentation", schemas.PRESENTATION_SCHEMA)
    client.create_table("tariff", schemas.TARIFF_SCHEMA)
    client.create_table("bdz_adq", schemas.BDZ_ADQ_SCHEMA)

    client = BQClient("measures")
    # This is enough of a schema to allow the practice_data_all_low_priority
    # table to be created, since it references these fields.  Once populated by
    # import_measures, the tables in the measures dataset will have several
    # more fields.  But we don't need to specify exactly what they are, as BQ
    # will work it out when the data is inserted with insert_rows_from_query.
    measures_schema = build_schema(
        ("month", "DATE"),
        ("practice_id", "STRING"),
        ("numerator", "INTEGER"),
        ("denominator", "INTEGER"),
    )

    for path in glob.glob(
            os.path.join(settings.MEASURE_DEFINITIONS_PATH, "*.json")):
        measure_id = os.path.splitext(os.path.basename(path))[0]
        client.create_table("practice_data_" + measure_id, measures_schema)
        client.create_table("ccg_data_" + measure_id, measures_schema)
        client.create_table("global_data_" + measure_id, measures_schema)

    # Although there are no model instances, we call upload_model to create the
    # dm+d tables in BQ that are required by certain measure views.
    client = BQClient("dmd")
    for model in apps.get_app_config("dmd2").get_models():
        client.upload_model(model)

    call_command("generate_presentation_replacements")

    copy_tree(os.path.join(e2e_path, "data-1"), os.path.join(e2e_path, "data"))

    runner.run_all(2017, 9, under_test=True)

    # We expect one MeasureGlobal per measure per month.
    assert_count_equal(num_measures, MeasureGlobal)

    # We expect one MeasureValue for each organisation per measure per month
    # (There are 4 practices, 2 CCGs, 2 STPs, and 2 regional teams).
    assert_count_equal(10 * num_measures, MeasureValue)

    copy_tree(os.path.join(e2e_path, "data-2"), os.path.join(e2e_path, "data"))

    runner.run_all(2017, 10, under_test=True)

    # We expect one MeasureGlobal per measure per month
    assert_count_equal(2 * num_measures, MeasureGlobal)

    # We expect one MeasureValue for each organisation per measure per month
    assert_count_equal(20 * num_measures, MeasureValue)

示例#8

0

显示文件

文件： test_bigquery.py 项目： dmcalli2/openprescribing

    def test_the_lot(self):
        client = Client('test')

        schema = build_schema(
            ('a', 'INTEGER'),
            ('b', 'STRING'),
        )

        headers = ['a', 'b']
        rows = [
            (1, 'apple'),
            (2, 'banana'),
            (3, 'coconut'),
        ]

        t1 = client.get_or_create_table('t1', schema)
        t1_qname = t1.qualified_name

        # Test Table.insert_rows_from_csv
        t1.insert_rows_from_csv('gcutils/tests/test_table.csv')

        self.assertEqual(sorted(t1.get_rows()), rows)

        # Test Table.insert_rows_from_query
        t2 = client.get_table('t2')

        sql = 'SELECT * FROM {} WHERE a > 1'.format(t1_qname)
        t2.insert_rows_from_query(sql)

        self.assertEqual(sorted(t2.get_rows()), rows[1:])

        # Test Client.query
        sql = 'SELECT * FROM {} WHERE a > 2'.format(t1_qname)
        results = client.query(sql)

        self.assertEqual(sorted(results.rows), rows[2:])

        # Test Client.query_into_dataframe
        sql = 'SELECT * FROM {} WHERE a > 2'.format(t1_qname)
        df = client.query_into_dataframe(sql)

        self.assertEqual(df.values.tolist(), [list(rows[2])])

        # Test TableExporter.export_to_storage and
        # TableExporter.download_from_storage_and_unzip
        t1_exporter = TableExporter(t1, self.storage_prefix + 'test_table-')
        t1_exporter.export_to_storage()

        with tempfile.NamedTemporaryFile(mode='r+') as f:
            t1_exporter.download_from_storage_and_unzip(f)
            f.seek(0)
            reader = csv.reader(f)
            data = [reader.next()] + sorted(reader)

        self.assertEqual(data, [map(str, row) for row in [headers] + rows])

        # Test Table.insert_rows_from_storage
        storage_path = self.storage_prefix + 'test_table.csv'
        self.upload_to_storage('gcutils/tests/test_table.csv', storage_path)

        t2.insert_rows_from_storage(storage_path)

        self.assertEqual(sorted(t2.get_rows()), rows)

        # Test Client.create_storage_backed_table
        storage_path = self.storage_prefix + 'test_table_headers.csv'
        self.upload_to_storage(
            'gcutils/tests/test_table_headers.csv',
            storage_path
        )

        schema = [
            {'name': 'a', 'type': 'integer'},
            {'name': 'b', 'type': 'string'},
        ]

        t3 = client.create_storage_backed_table(
            't3',
            schema,
            storage_path
        )

        results = client.query('SELECT * FROM {}'.format(t3.qualified_name))

        self.assertEqual(sorted(results.rows), rows)

        self.upload_to_storage(
            'gcutils/tests/test_table_headers_2.csv',
            storage_path
        )

        results = client.query('SELECT * FROM {}'.format(t3.qualified_name))

        self.assertEqual(sorted(results.rows), rows + [(4, u'damson')])

        # Test Client.create_table_with_view
        sql = 'SELECT * FROM {{project}}.{} WHERE a > 1'.format(t1_qname)

        t4 = client.create_table_with_view('t4', sql, False)

        results = client.query('SELECT * FROM {}'.format(t4.qualified_name))

        self.assertEqual(sorted(results.rows), rows[1:])

        # Test Client.insert_rows_from_pg
        PCT.objects.create(code='ABC', name='CCG 1')
        PCT.objects.create(code='XYZ', name='CCG 2')

        def transformer(row):
            return [ord(row[0][0]), row[1]]
        t1.insert_rows_from_pg(PCT, ['code', 'name'], transformer)

        self.assertEqual(sorted(t1.get_rows()), [(65, 'CCG 1'), (88, 'CCG 2')])

        # Test Table.delete_all_rows
        t1.delete_all_rows()

        self.assertEqual(list(t1.get_rows()), [])

示例#9

0

显示文件

文件： bq_schemas.py 项目： maykonmenezes/openprescribing

from gcutils.bigquery import build_schema

DMD_SCHEMA = build_schema(
    ("dmdid", "STRING"),
    ("bnf_code", "STRING"),
    ("vpid", "STRING"),
    ("display_name", "STRING"),
    ("ema", "STRING"),
    ("pres_statcd", "STRING"),
    ("avail_restrictcd", "STRING"),
    ("product_type", "STRING"),
    ("non_availcd", "STRING"),
    ("concept_class", "STRING"),
    ("nurse_f", "STRING"),
    ("dent_f", "STRING"),
    ("prod_order_no", "STRING"),
    ("sched_1", "STRING"),
    ("sched_2", "STRING"),
    ("padm", "STRING"),
    ("fp10_mda", "STRING"),
    ("acbs", "STRING"),
    ("assort_flav", "STRING"),
    ("catcd", "STRING"),
    ("tariff_category", "STRING"),
    ("flag_imported", "STRING"),
    ("flag_broken_bulk", "STRING"),
    ("flag_non_bioequivalence", "STRING"),
    ("flag_special_containers", "BOOLEAN"),
)

CCG_SCHEMA = build_schema(

示例#10

0

显示文件

文件： bq_schemas.py 项目： somulu/openprescribing

from gcutils.bigquery import build_schema

DMD_SCHEMA = build_schema(
    ("dmdid", "STRING"),
    ("bnf_code", "STRING"),
    ("vpid", "STRING"),
    ("display_name", "STRING"),
    ("ema", "STRING"),
    ("pres_statcd", "STRING"),
    ("avail_restrictcd", "STRING"),
    ("product_type", "STRING"),
    ("non_availcd", "STRING"),
    ("concept_class", "STRING"),
    ("nurse_f", "STRING"),
    ("dent_f", "STRING"),
    ("prod_order_no", "STRING"),
    ("sched_1", "STRING"),
    ("sched_2", "STRING"),
    ("padm", "STRING"),
    ("fp10_mda", "STRING"),
    ("acbs", "STRING"),
    ("assort_flav", "STRING"),
    ("catcd", "STRING"),
    ("tariff_category", "STRING"),
    ("flag_imported", "STRING"),
    ("flag_broken_bulk", "STRING"),
    ("flag_non_bioequivalence", "STRING"),
    ("flag_special_containers", "BOOLEAN"),
)

CCG_SCHEMA = build_schema(

示例#11

0

显示文件

文件： test_bigquery.py 项目： maykonmenezes/openprescribing

    def test_the_lot(self):
        client = Client("test")
        archive_client = Client("archive")

        orig_schema = build_schema(("a", "STRING"), ("b", "INTEGER"))

        schema = build_schema(("a", "INTEGER"), ("b", "STRING"))

        headers = ["a", "b"]
        rows = [(1, "apple"), (2, "banana"), (3, "coconut")]

        t1 = client.get_or_create_table("t1", orig_schema)
        t1_qname = t1.qualified_name

        # Test Table.insert_rows_from_csv
        t1.insert_rows_from_csv("gcutils/tests/test_table.csv", schema)

        self.assertEqual(sorted(t1.get_rows()), rows)

        # Test Table.insert_rows_from_query
        t2 = client.get_table("t2")

        sql = "SELECT * FROM {} WHERE a > 1".format(t1_qname)
        t2.insert_rows_from_query(sql)

        self.assertEqual(sorted(t2.get_rows()), rows[1:])

        # Test Client.query
        sql = "SELECT * FROM {} WHERE a > 2".format(t1_qname)
        results = client.query(sql)

        self.assertEqual(sorted(results.rows), rows[2:])

        # Test Client.query_into_dataframe
        sql = "SELECT * FROM {} WHERE a > 2".format(t1_qname)
        df = client.query_into_dataframe(sql)

        self.assertEqual(df.values.tolist(), [list(rows[2])])

        # Test TableExporter.export_to_storage and
        # TableExporter.download_from_storage_and_unzip
        t1_exporter = TableExporter(t1, self.storage_prefix + "test_table-")
        t1_exporter.export_to_storage()

        with tempfile.NamedTemporaryFile(mode="r+") as f:
            t1_exporter.download_from_storage_and_unzip(f)
            f.seek(0)
            reader = csv.reader(f)
            data = [next(reader)] + sorted(reader)

        self.assertEqual(data,
                         [list(map(str, row)) for row in [headers] + rows])

        # Test Table.insert_rows_from_storage
        storage_path = self.storage_prefix + "test_table.csv"
        self.upload_to_storage("gcutils/tests/test_table.csv", storage_path)

        t2.insert_rows_from_storage(storage_path)

        self.assertEqual(sorted(t2.get_rows()), rows)

        # Test Client.create_storage_backed_table
        storage_path = self.storage_prefix + "test_table_headers.csv"
        self.upload_to_storage("gcutils/tests/test_table_headers.csv",
                               storage_path)

        schema = build_schema(("a", "INTEGER"), ("b", "STRING"))

        t3 = client.create_storage_backed_table("t3", schema, storage_path)

        results = client.query("SELECT * FROM {}".format(t3.qualified_name))

        self.assertEqual(sorted(results.rows), rows)

        self.upload_to_storage("gcutils/tests/test_table_headers_2.csv",
                               storage_path)

        results = client.query("SELECT * FROM {}".format(t3.qualified_name))

        self.assertEqual(sorted(results.rows), rows + [(4, "damson")])

        # Test Client.create_table_with_view
        sql = "SELECT * FROM {{project}}.{} WHERE a > 1".format(t1_qname)

        t4 = client.create_table_with_view("t4", sql, False)

        results = client.query("SELECT * FROM {}".format(t4.qualified_name))

        self.assertEqual(sorted(results.rows), rows[1:])

        # Test Table.copy_to_new_dataset
        t1.copy_to_new_dataset("archive")
        t1_archived = archive_client.get_table("t1")
        self.assertEqual(sorted(t1_archived.get_rows()), rows)
        self.assertEqual(sorted(t1.get_rows()), rows)

        # Test Table.move_to_new_dataset
        t2.move_to_new_dataset("archive")
        t2_archived = archive_client.get_table("t2")
        self.assertEqual(sorted(t2_archived.get_rows()), rows)
        with self.assertRaises(NotFound):
            list(t2.get_rows())

        # Test Client.insert_rows_from_pg
        PCT.objects.create(code="ABC", name="CCG 1")
        PCT.objects.create(code="XYZ", name="CCG 2")

        def transformer(row):
            return [ord(row[0][0]), row[1]]

        t1.insert_rows_from_pg(
            PCT,
            build_schema(("code", "INTEGER"), ("name", "STRING")),
            transformer=transformer,
        )

        self.assertEqual(sorted(t1.get_rows()), [(65, "CCG 1"), (88, "CCG 2")])

        # Test Table.delete_all_rows
        t1.delete_all_rows()

        self.assertEqual(list(t1.get_rows()), [])

示例#12

0

显示文件

文件： pipeline_e2e_tests.py 项目： rebkwok/openprescribing

def run_end_to_end():
    print('BQ_NONCE: {}'.format(settings.BQ_NONCE))

    num_measures = 57

    shutil.rmtree(settings.PIPELINE_DATA_BASEDIR, ignore_errors=True)

    with open(settings.PIPELINE_IMPORT_LOG_PATH, 'w') as f:
        f.write('{}')

    for blob in StorageClient().bucket().list_blobs():
        blob.delete()

    for dataset_key in DATASETS:
        BQClient(dataset_key).create_dataset()

    client = BQClient('hscic')
    client.create_table('bnf', schemas.BNF_SCHEMA)
    client.create_table('ccgs', schemas.CCG_SCHEMA)
    client.create_table('ppu_savings', schemas.PPU_SAVING_SCHEMA)
    client.create_table('practice_statistics',
                        schemas.PRACTICE_STATISTICS_SCHEMA)
    client.create_table('practice_statistics_all_years',
                        schemas.PRACTICE_STATISTICS_SCHEMA)
    client.create_table('practices', schemas.PRACTICE_SCHEMA)
    client.create_table('prescribing', schemas.PRESCRIBING_SCHEMA)
    client.create_table('presentation', schemas.PRESENTATION_SCHEMA)
    client.create_table('tariff', schemas.TARIFF_SCHEMA)

    client = BQClient('measures')
    # This is enough of a schema to allow the practice_data_all_low_priority
    # table to be created, since it references these fields.  Once populated by
    # import_measures, the tables in the measures dataset will have several
    # more fields.  But we don't need to specify exactly what they are, as BQ
    # will work it out when the data is inserted with insert_rows_from_query.
    measures_schema = build_schema(
        ('month', 'DATE'),
        ('practice_id', 'STRING'),
        ('numerator', 'INTEGER'),
        ('denominator', 'INTEGER'),
    )
    path = os.path.join(settings.SITE_ROOT, 'frontend', 'management',
                        'commands', 'measure_definitions', '*.json')
    for path in glob.glob(path):
        measure_id = os.path.splitext(os.path.basename(path))[0]
        client.create_table('practice_data_' + measure_id, measures_schema)
        client.create_table('ccg_data_' + measure_id, measures_schema)
        client.create_table('global_data_' + measure_id, measures_schema)

    call_command('generate_presentation_replacements')

    path = os.path.join(settings.SITE_ROOT, 'frontend', 'management',
                        'commands', 'replace_matviews.sql')
    with open(path) as f:
        with connection.cursor() as c:
            c.execute(f.read())

    copy_tree(
        os.path.join(e2e_path, 'data-1'),
        os.path.join(e2e_path, 'data'),
    )

    runner.run_all(2017, 9, under_test=True)

    # We expect one MeasureGlobal per measure per month.  If this assert fails,
    # check that num_measures is still correct.
    assert_count_equal(num_measures, MeasureGlobal)

    # We expect one MeasureValue for each CCG or Practice per measure per month
    assert_count_equal(6 * num_measures, MeasureValue)

    # We expect one statistic per CCG per month
    assert_raw_count_equal(2, 'vw__ccgstatistics')

    # We expect one chemical summary per CCG per month
    assert_raw_count_equal(2, 'vw__chemical_summary_by_ccg',
                           "chemical_id = '1001030C0'")

    # We expect one chemical summary per practice per month
    assert_raw_count_equal(4, 'vw__chemical_summary_by_practice',
                           "chemical_id = '1001030C0'")

    # We expect one summary per practice per month
    assert_raw_count_equal(4, 'vw__practice_summary')

    # We expect one presentation summary per month
    assert_raw_count_equal(1, 'vw__presentation_summary',
                           "presentation_code = '1001030C0AAAAAA'")

    # We expect one presentation summary per CCG per month
    assert_raw_count_equal(2, 'vw__presentation_summary_by_ccg',
                           "presentation_code = '1001030C0AAAAAA'")

    copy_tree(
        os.path.join(e2e_path, 'data-2'),
        os.path.join(e2e_path, 'data'),
    )

    runner.run_all(2017, 10, under_test=True)

    # We expect one MeasureGlobal per measure per month
    assert_count_equal(2 * num_measures, MeasureGlobal)

    # We expect one MeasureValue for each CCG or Practice per measure per month
    assert_count_equal(12 * num_measures, MeasureValue)

    # We expect one statistic per CCG per month
    assert_raw_count_equal(4, 'vw__ccgstatistics')

    # We expect one chemical summary per CCG per month
    assert_raw_count_equal(4, 'vw__chemical_summary_by_ccg',
                           "chemical_id = '1001030C0'")

    # We expect one chemical summary per practice per month
    assert_raw_count_equal(8, 'vw__chemical_summary_by_practice',
                           "chemical_id = '1001030C0'")

    # We expect one summary per practice per month
    assert_raw_count_equal(8, 'vw__practice_summary')

    # We expect one presentation summary per month
    assert_raw_count_equal(2, 'vw__presentation_summary',
                           "presentation_code = '1001030C0AAAAAA'")

    # We expect one presentation summary per CCG per month
    assert_raw_count_equal(4, 'vw__presentation_summary_by_ccg',
                           "presentation_code = '1001030C0AAAAAA'")

示例#13

0

显示文件

文件： import_scmd.py 项目： ebmdatalab/openprescribing

import csv
import tempfile

import requests
from bs4 import BeautifulSoup
from django.core.management import BaseCommand
from gcutils.bigquery import Client, build_schema
from google.cloud.exceptions import Conflict

SCHEMA = build_schema(
    ("year_month", "DATE"),
    ("ods_code", "STRING"),
    ("vmp_snomed_code", "STRING"),
    ("vmp_product_name", "STRING"),
    ("unit_of_measure_identifier", "STRING"),
    ("unit_of_measure_name", "STRING"),
    ("total_quanity_in_vmp_unit", "FLOAT"),
)


class Command(BaseCommand):
    help = "Import any SCMD datasets missing from our BigQuery dataset"

    def ensure_dataset_exists(self, client):
        try:
            client.create_dataset()
        except Conflict:
            pass

    def handle(self, *args, **kwargs):
        s = requests.Session()