def __init__(self): """Default constructor""" self.bqConn = bigQueryUtil.connection() self.bqClient = bigQueryUtil.BigQueryClient() self.connFactory = DBUtil.ConnectionFactory() # Default connection source self.categoryBySourceDescr = dict() self.clinicalItemByCategoryIdExtId = dict()
def __init__(self): """Default constructor""" self.bqConn = bigQueryUtil.connection() self.bqClient = bigQueryUtil.BigQueryClient() self.connFactory = DBUtil.ConnectionFactory( ) # Default connection source, but Allow specification of alternative DB connection source self.categoryBySourceDescr = dict( ) # Local cache to track the clinical item category table contents self.clinicalItemByCompositeKey = dict( ) # Local cache to track clinical item table contents
def upload_test_data_csv_to_bigquery(self): big_query_client = bigQueryUtil.BigQueryClient() schema = big_query_client.client.get_table( big_query_client.client.dataset('starr_datalake2018', 'mining-clinical-decisions').table('demographic') ).schema big_query_client.load_csv_to_table( TEST_SOURCE_TABLE.split('.')[0], TEST_SOURCE_TABLE.split('.')[1], self.test_data_csv, False, schema, 1 )
def setUp(self): """Prepare state for test cases""" # create dummy CSV self.tmp_dummy_csv_path = TMP_DIR + '/unittest_bq_dummy.csv' self.dummy_table = lines = [['num', 'char']] + [[n, chr(ord('a')+n)] for n in range(26)] with open(self.tmp_dummy_csv_path, 'wb') as csvfile: writer = csv.writer(csvfile, delimiter=',') writer.writerows(lines) self.tmp_csv_path = TMP_DIR + '/unittest_bq.csv' self.bqConn = bigQueryUtil.connection() self.bqClient = bigQueryUtil.BigQueryClient()
def load_alert_table(csv_path): bq_client = bigQueryUtil.BigQueryClient() assert 'GOOGLE_APPLICATION_CREDENTIALS' in os.environ, 'GOOGLE_APPLICATION_CREDENTIALS is not set.' schema = [ bigquery.SchemaField('anon_id', 'STRING', 'REQUIRED', None, ()), bigquery.SchemaField('alt_id_jittered', 'INT64', 'REQUIRED', None, ()), bigquery.SchemaField('pat_csn_jittered', 'INT64', 'NULLABLE', None, ()), bigquery.SchemaField('update_date_jittered', 'TIMESTAMP', 'REQUIRED', None, ()), bigquery.SchemaField('alert_desc', 'STRING', 'NULLABLE', None, ()), bigquery.SchemaField('general_alt_type_c', 'INT64', 'NULLABLE', None, ()), bigquery.SchemaField('general_alert_name', 'STRING', 'NULLABLE', None, ()), bigquery.SchemaField('general_alert_title', 'STRING', 'NULLABLE', None, ()), bigquery.SchemaField('general_alert_abbr', 'STRING', 'NULLABLE', None, ()), bigquery.SchemaField('med_alert_type_c', 'INT64', 'NULLABLE', None, ()), bigquery.SchemaField('med_alert_name', 'STRING', 'NULLABLE', None, ()), bigquery.SchemaField('med_alert_title', 'STRING', 'NULLABLE', None, ()), bigquery.SchemaField('med_alert_abbr', 'STRING', 'NULLABLE', None, ()), bigquery.SchemaField('immun_id', 'INT64', 'NULLABLE', None, ()), bigquery.SchemaField('immun_name', 'STRING', 'NULLABLE', None, ()), bigquery.SchemaField('immun_abbreviation', 'STRING', 'NULLABLE', None, ()), bigquery.SchemaField('immun_type', 'STRING', 'NULLABLE', None, ()), bigquery.SchemaField('bpa_locator_id', 'INT64', 'NULLABLE', None, ()), bigquery.SchemaField('local_update_date_jittered', 'DATETIME', 'REQUIRED', None, ()) ] bq_client.load_csv_to_table('alert_2019', 'alert', csv_path, auto_detect_schema=False, \ schema=schema, skip_rows=0)
def __init__(self): """Default constructor""" self.bqConn = bigQueryUtil.connection() self.bqClient = bigQueryUtil.BigQueryClient() self.connFactory = DBUtil.ConnectionFactory( ) # Default connection source, but Allow specification of alternative DB connection source self.starrUtil = STARRUtil.StarrCommonUtils(self.bqClient) self.categoryBySourceDescr = dict( ) # Local cache to track the clinical item category table contents self.clinicalItemByCategoryIdExtId = dict( ) # Local cache to track clinical item table contents self.itemCollectionByKeyStr = dict( ) # Local cache to track item collections self.itemCollectionItemByCollectionIdItemId = dict( ) # Local cache to track item collection items self.patient_items = dict() # Local cache of processed patient items self.patient_item_collection_links = set( ) # Local cache of processed patient item collection links
()), bigquery.SchemaField('RPT_FINAL_PROV_MAP_ID', 'STRING', 'NULLABLE', None, ()), bigquery.SchemaField('BILLING_PROV_MAP_ID', 'STRING', 'NULLABLE', None, ()), bigquery.SchemaField('REFERRING_PROV_MAP_ID', 'STRING', 'NULLABLE', None, ()), bigquery.SchemaField('PROC_PERF_PROV_MAP_ID', 'STRING', 'NULLABLE', None, ()), bigquery.SchemaField('LAB_STATUS_C', 'STRING', 'NULLABLE', None, ()), bigquery.SchemaField('LAB_STATUS', 'STRING', 'NULLABLE', None, ()), bigquery.SchemaField('ORDER_STATUS_C', 'STRING', 'NULLABLE', None, ()), bigquery.SchemaField('ORDER_STATUS', 'STRING', 'NULLABLE', None, ()), bigquery.SchemaField('DATA_SOURCE', 'STRING', 'NULLABLE', None, ()) ] if __name__ == '__main__': logging.basicConfig() upload = input('Upload? ("y"/"n"): ') bq_client = bigQueryUtil.BigQueryClient() if upload == 'Y' or upload == 'y': bq_client.reconnect_client() bq_client.load_csv_to_table(DATASET_NAME, TABLE_NAME, csv_path, auto_detect_schema=False, schema=FINAL_TABLE_SCHEMA, skip_rows=1) print('Done')