def rate_table(): import dismod_at import copy # file_name = 'example.db' new = True connection = dismod_at.create_connection(file_name, new) cursor = connection.cursor() # # create the rate table col_name = [ 'rate_name', 'parent_smooth_id', 'child_smooth_id', 'child_nslist_id' ] col_type = ['text', 'integer', 'integer', 'integer'] row_list = [['pini', 0, 1, None], ['iota', 2, 3, None], ['rho', 2, 3, None], ['chi', 2, 3, None], ['omega', 2, 3, 0]] tbl_name = 'rate' dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) # ---------------------------------------------------------------------- # include primary key in test check_name = [tbl_name + '_id'] + col_name check_list = list() for i in range(len(row_list)): check_list.append([i] + row_list[i]) # row_list = dismod_at.get_row_list(connection, tbl_name, check_name) assert row_list == check_list # ---------------------------------------------------------------------- connection.close() print('rate_table: OK')
def density_table() : import dismod_at import copy # file_name = 'example.db' new = True connection = dismod_at.create_connection(file_name, new) cursor = connection.cursor() # # create the density table col_name = [ 'density_name' ] col_type = [ 'text' ] row_list = [ ['uniform'], ['gaussian'], ['laplace'], ['log_gaussian'], ['log_laplace'] ] tbl_name = 'density' dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) # ---------------------------------------------------------------------- # include primary key in test check_name = [ tbl_name + '_id' ] + col_name check_list = list() for i in range( len(row_list) ) : check_list.append( [i] + row_list[i] ) # row_list = dismod_at.get_row_list(connection, tbl_name, check_name) assert row_list == check_list # ---------------------------------------------------------------------- connection.close() print('density_table: OK')
def age_table(): import dismod_at import copy # file_name = 'example.db' new = True connection = dismod_at.create_connection(file_name, new) cursor = connection.cursor() # # create the age table col_name = ['age'] col_type = ['real'] row_list = [[0.0], [20.0], [40.0], [60.0], [80.0], [100.0]] tbl_name = 'age' dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) # ---------------------------------------------------------------------- # include primary key in test check_name = [tbl_name + '_id'] + col_name check_list = list() for i in range(len(row_list)): check_list.append([i] + row_list[i]) # row_list = dismod_at.get_row_list(connection, tbl_name, check_name) assert row_list == check_list # ---------------------------------------------------------------------- connection.close() print('age_table: OK')
def get_table_dict(): import dismod_at import copy # file_name = 'example.db' new = True connection = dismod_at.create_connection(file_name, new) cursor = connection.cursor() # # create the covariate table col_name = ['covariate_name', 'reference'] col_type = ['text', 'real'] row_list = [['sex', 0.0], ['income', 2000.0]] tbl_name = 'covariate' dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) n_row = len(row_list) # table_dict = dismod_at.get_table_dict(connection, tbl_name) assert len(table_dict) == n_row for i in range(n_row): assert len(table_dict[i]) == 2 assert table_dict[i]['covariate_name'] == row_list[i][0] assert table_dict[i]['reference'] == row_list[i][1] # connection.close() print('get_table_dict: OK')
def get_name_type(): import dismod_at # file_name = 'example.db' new = True connection = dismod_at.create_connection(file_name, new) cursor = connection.cursor() # # create temp table col_name = ['int_name', 'real_name', 'text_name'] col_type = ['integer', 'real', 'text'] row_list = [[1, 2.0, 'three']] tbl_name = 'temp' dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) # check_name = ['temp_id'] + col_name check_type = ['integer primary key'] + col_type # # get the column names and corresponding types (col_name, col_type) = dismod_at.get_name_type(connection, tbl_name) # assert col_name == check_name assert col_type == check_type # connection.close() print('get_name_type: OK')
def covariate_table(): import dismod_at import copy # file_name = 'example.db' new = True connection = dismod_at.create_connection(file_name, new) cursor = connection.cursor() # # create the covariate table col_name = ['covariate_name', 'reference', 'max_difference'] col_type = ['text', 'real', 'real'] row_list = [['sex', 0.0, 0.6], ['income', 2000.0, None]] tbl_name = 'covariate' dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) # # ---------------------------------------------------------------------- # include primary key in test check_name = [tbl_name + '_id'] + col_name check_list = list() for i in range(len(row_list)): check_list.append([i] + row_list[i]) # row_list = dismod_at.get_row_list(connection, tbl_name, check_name) assert row_list == check_list # ---------------------------------------------------------------------- connection.close() print('covariate_table: OK')
def nslist_table(): import dismod_at import copy # file_name = 'example.db' new = True connection = dismod_at.create_connection(file_name, new) cursor = connection.cursor() # # create the nslist table col_name = ['nslist_name'] col_type = ['test'] row_list = [['first_list'], ['second_list']] tbl_name = 'nslist' dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) # ---------------------------------------------------------------------- # include primary key in test check_name = [tbl_name + '_id'] + col_name check_list = list() for i in range(len(row_list)): check_list.append([i] + row_list[i]) # row_list = dismod_at.get_row_list(connection, tbl_name, check_name) assert row_list == check_list # ---------------------------------------------------------------------- connection.close() print('nslist_table: OK')
def get_row_list(): import dismod_at import copy # file_name = 'example.db' new = True connection = dismod_at.create_connection(file_name, new) cursor = connection.cursor() # # create the covariate table col_name = ['covariate_name', 'reference'] col_type = ['text', 'real'] row_list = [['sex', 0.0], ['income', 2000.0]] tbl_name = 'covariate' dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) n_row = len(row_list) # # reverse the order of the columns col_name = ['reference', 'covariate_name'] n_col = len(col_name) row_list = dismod_at.get_row_list(connection, tbl_name, col_name) # assert len(row_list) == n_row for i in range(n_row): assert len(row_list[i]) == n_col assert isinstance(row_list[i][0], float) assert isinstance(row_list[i][1], str) assert row_list[0][0] == 0.0 assert row_list[0][1] == 'sex' assert row_list[1][0] == 2000.0 assert row_list[1][1] == 'income' # connection.close() print('get_row_list: OK')
def replace_table(connection, tbl_name, table_dict): import dismod_at # # col_name, col_type (col_name, col_type) = dismod_at.get_name_type(connection, tbl_name) # # remove primary key becasue it is automatically added bo create_table primary_key = tbl_name + '_id' assert col_name[0] == primary_key assert col_type[0] == 'integer primary key' del col_name[0] del col_type[0] # # remove the old table cmd = 'DROP TABLE ' + tbl_name cursor = connection.cursor() cursor.execute(cmd) # # row_list row_list = list() for row in table_dict: this_row = list() for col in col_name: this_row.append(row[col]) row_list.append(this_row) # # write the new table dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) connection.commit()
def integrand_table(): import dismod_at import copy # file_name = 'example.db' new = True connection = dismod_at.create_connection(file_name, new) cursor = connection.cursor() # # create the integrand table col_name = ['integrand_name', 'eta'] col_type = ['text', 'real'] row_list = [['Tincidence', 1e-6], ['remission', 1e-6], ['mtall', 1e-6], ['mulcov_1', 1e-6]] tbl_name = 'integrand' dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) # ---------------------------------------------------------------------- # include primary key in test check_name = [tbl_name + '_id'] + col_name check_list = list() for i in range(len(row_list)): check_list.append([i] + row_list[i]) # row_list = dismod_at.get_row_list(connection, tbl_name, check_name) assert row_list == check_list # ---------------------------------------------------------------------- connection.close() print('integrand_table: OK')
def node_table(): import dismod_at # file_name = 'example.db' new = True connection = dismod_at.create_connection(file_name, new) cursor = connection.cursor() # # create the node table col_name = ['node_name', 'parent'] col_type = ['text', 'integer'] row_list = [['world', None], ['north_america', 0], ['united_states', 1], ['canada', 1]] tbl_name = 'node' dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) # ---------------------------------------------------------------------- # include primary key in test check_name = [tbl_name + '_id'] + col_name check_list = list() for i in range(len(row_list)): check_list.append([i] + row_list[i]) # row_list = dismod_at.get_row_list(connection, tbl_name, check_name) assert row_list == check_list # ---------------------------------------------------------------------- connection.close() print('node_table: OK')
def prior_table(): import dismod_at import copy import collections # file_name = 'example.db' new = True connection = dismod_at.create_connection(file_name, new) cursor = connection.cursor() # # create the prior table ptype = 'integer primary key' col_name2type = collections.OrderedDict([('prior_name', 'text'), ('density_id', 'integer'), ('lower', 'real'), ('upper', 'real'), ('mean', 'real'), ('std', 'real'), ('eta', 'real')]) col_name = list(col_name2type.keys()) col_type = list(col_name2type.values()) uniform_density_id = 0 row_list = [ [ 'none', # prior_name uniform_density_id, # density_id None, # lower None, # upper 0, # mean None, # std None # eta ], [ 'rate', # prior_name uniform_density_id, # density_id 0.0, # lower 1.0, # upper 0.1, # mean None, # std None # eta ] ] tbl_name = 'prior' dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) # ---------------------------------------------------------------------- # include primary key in test check_name = [tbl_name + '_id'] + col_name check_list = list() for i in range(len(row_list)): check_list.append([i] + row_list[i]) # row_list = dismod_at.get_row_list(connection, tbl_name, check_name) assert row_list == check_list # ---------------------------------------------------------------------- connection.close() print('prior_table: OK')
def weight_grid_table(): import dismod_at import copy # file_name = 'example.db' new = True connection = dismod_at.create_connection(file_name, new) cursor = connection.cursor() # # create weight table col_name = ['weight_name', 'n_age', 'ntime'] col_type = ['text', 'integer', 'integer'] row_list = [['constant', 1, 1], ['age_linear', 2, 1], ['bilinear', 2, 2]] tbl_name = 'weight' dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) # # create weight_grid table col_name = ['weight_id', 'age_id', 'time_id', 'weight'] col_type = ['integer', 'integer', 'integer', 'real'] row_list = [ # constant [0, 1, 1, 1.0], # age_linear [1, 0, 1, 0.5], [1, 2, 1, 1.5], # bilinear [2, 0, 0, 0.5], [2, 2, 0, 1.0], [2, 0, 2, 1.0], [2, 2, 2, 1.5] ] tbl_name = 'weight_grid' dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) # # check values in the bilinear weight table columns = ','.join(col_name) columns = 'weight_grid_id,' + columns cmd = 'SELECT ' + columns + ' FROM weight_grid' cmd += ' INNER JOIN weight USING (weight_id)' cmd += ' WHERE weight_name = "bilinear"' # count = 3 cursor = connection.cursor() for row in cursor.execute(cmd): check = copy.copy(row_list[count]) check.insert(0, count) assert len(row) == len(check) for j in range(len(row)): assert row[j] == check[j] count += 1 assert count == len(row_list) # connection.close() print('weight_grid_table: OK')
def avgint_table(): import dismod_at import copy import collections # file_name = 'example.db' new = True connection = dismod_at.create_connection(file_name, new) # col_name2type = collections.OrderedDict([ # required columns ('integrand_id', 'integer'), ('density_id', 'integer'), ('node_id', 'integer'), ('weight_id', 'integer'), ('age_lower', 'real'), ('age_upper', 'real'), ('time_lower', 'real'), ('time_upper', 'real'), # covariates ('x_sex', 'real'), ('x_income', 'real'), ]) col_name = list(col_name2type.keys()) col_type = list(col_name2type.values()) row_list = [[ 1, # integrand_id 0, # density_id 3, # node_id 4, # weight_id 10.0, # age_lower 90.0, # age_upper 2000., # time_lower 2005., # time_upper 0.5, # x_sex 1000. # x_income ]] # create the avgint table tbl_name = 'avgint' dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) # ---------------------------------------------------------------------- # include primary key in test check_name = [tbl_name + '_id'] + col_name check_list = list() for i in range(len(row_list)): check_list.append([i] + row_list[i]) # row_list = dismod_at.get_row_list(connection, tbl_name, check_name) assert row_list == check_list # ---------------------------------------------------------------------- connection.close() print('avgint_table: OK')
def mulcov_table(): import dismod_at import copy import collections # file_name = 'example.db' new = True connection = dismod_at.create_connection(file_name, new) cursor = connection.cursor() # # create a mulcov table col_name2type = collections.OrderedDict([('mulcov_type', 'text'), ('rate_id', 'integer'), ('integrand_id', 'integer'), ('covariate_id', 'integer'), ('smooth_id', 'integer')]) col_name = list(col_name2type.keys()) col_type = list(col_name2type.values()) row_list = [ [ 'meas_value', # muitiplier_type None, # rate_id is null becasue measurement covariate 2, # integrand_id 1, # covariate_id 2 # smooth_id ], [ 'rate_value', # muitiplier_type 1, # rate_id None, # integrand_id is null because a rate covariate 2, # covariate_id 2 # smooth_id ] ] tbl_name = 'mulcov' dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) # ---------------------------------------------------------------------- # include primary key in test check_name = [tbl_name + '_id'] + col_name check_list = list() for i in range(len(row_list)): check_list.append([i] + row_list[i]) # row_list = dismod_at.get_row_list(connection, tbl_name, check_name) assert row_list == check_list # ---------------------------------------------------------------------- connection.close() print('mulcov_table: OK')
def nslist_pair_table() : import dismod_at import copy import collections # file_name = 'example.db' new = True connection = dismod_at.create_connection(file_name, new) cursor = connection.cursor() # # create nslist_pair table column names and types col_name2type = collections.OrderedDict( [ ('nslist_id', 'integer' ), ('node_id', 'integer' ), ('smooth_id', 'integer' ) ] ) col_name = list(col_name2type.keys()) col_type = list(col_name2type.values()) # two lists with different smoothing for each node row_list = [ # nslist_id, node_id, smooth_id [ 0, 0, 0 ], [ 0, 1, 1 ], [ 0, 2, 2 ], [ 1, 0, 3 ], [ 1, 1, 4 ], [ 1, 2, 5 ] ] tbl_name = 'nslist_pair' dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) # ---------------------------------------------------------------------- # include primary key in test check_name = [ tbl_name + '_id' ] + col_name check_list = list() for i in range( len(row_list) ) : check_list.append( [i] + row_list[i] ) # row_list = dismod_at.get_row_list(connection, tbl_name, check_name) assert row_list == check_list # ---------------------------------------------------------------------- connection.close() print('nslist_pair_table: OK')
def create_table() : import dismod_at # file_name = 'example.db' new = True connection = dismod_at.create_connection(file_name, new) cursor = connection.cursor() # inverted_exclamation = chr( 10 * 16 + 1 ) # 00a1 cent_sign = chr( 10 * 16 + 2 ) # 00a2 pound_sign = chr( 10 * 16 + 3 ) # 00a3 # # create table col_name = [ 'temp_name' ] col_type = [ 'text' ] row_list = [ [ inverted_exclamation ], [ cent_sign ], [ pound_sign ] ] tbl_name = 'temp' dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) # # check values in table row_list = list() cmd = 'SELECT temp_id, temp_name FROM temp' for row in cursor.execute(cmd) : row_list.append(row) # for i in range( len(row_list) ) : assert row_list[i][0] == i # assert row_list[0][1] == inverted_exclamation assert row_list[1][1] == cent_sign assert row_list[2][1] == pound_sign # connection.close() print('create_table: OK')
def create_truth_var_table(): new = False connection = dismod_at.create_connection(file_name, new) var_table = dismod_at.get_table_dict(connection, 'var') rate_table = dismod_at.get_table_dict(connection, 'rate') covariate_table = dismod_at.get_table_dict(connection, 'covariate') integrand_table = dismod_at.get_table_dict(connection, 'integrand') node_table = dismod_at.get_table_dict(connection, 'node') time_table = dismod_at.get_table_dict(connection, 'time') age_table = dismod_at.get_table_dict(connection, 'age') # ------------------------------------------------------------------------- # create truth table tbl_name = 'truth_var' col_name = ['truth_var_value'] col_type = ['real'] row_list = list() for var_id in range(len(var_table)): value = None # row = var_table[var_id] var_type = row['var_type'] age = age_table[row['age_id']]['age'] time = time_table[row['time_id']]['time'] if var_type.startswith('mulcov_'): covariate = covariate_table[row['covariate_id']]['covariate_name'] value = mulcov_dict[covariate] elif var_type == 'rate': node = node_table[row['node_id']]['node_name'] rate = rate_table[row['rate_id']]['rate_name'] value = true_rate(node, rate, age, time) else: assert False # row_list.append([value]) dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) connection.close() return
def replace_table(): import dismod_at # file_name = 'example.db' new = True connection = dismod_at.create_connection(file_name, new) cursor = connection.cursor() # # create my table col_name = ['int_name', 'real_name', 'text_name'] col_type = ['integer', 'real', 'text'] row_list = [[1, 2.0, 'three']] tbl_name = 'my' dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) # # original table my_table = dismod_at.get_table_dict(connection, tbl_name) # # original values assert len(my_table) == 1 row = my_table[0] assert row['int_name'] == 1 assert row['real_name'] == 2.0 assert row['text_name'] == 'three' # # new row in the table row = {'int_name': 2, 'real_name': 3.0, 'text_name': 'four'} my_table.append(row) dismod_at.replace_table(connection, tbl_name, my_table) # # check the new table new_table = dismod_at.get_table_dict(connection, 'my') assert new_table == my_table # connection.close() print('get_name_type: OK')
node_name = subgroup_table[subgroup_id]['subgroup_name'] if node_name == 'n1': age = age_table[age_id]['age'] truth_var_value = iota_n1(age) else: truth_var_value = random_effect[node_name] row_list.append([truth_var_value]) var_id2node_name.append(node_name) # fit_var_value = fit_var_table[var_id]['fit_var_value'] relerr = 1.0 - fit_var_value / truth_var_value if abs(relerr) > 3.0 * meas_cv: print(node_name, truth_var_value, fit_var_value, relerr) assert False # dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) connection.close() # --------------------------------------------------------------------------- # Sample Posterior and Check Coverage # --------------------------------------------------------------------------- # # sample from the posterior distribution n_str = str(number_sample) dismod_at.system_command_prc([program, file_name, 'simulate', n_str]) dismod_at.system_command_prc( [program, file_name, 'sample', 'simulate', 'both', n_str]) # # compute sample standard deviation and check for coverate connection = dismod_at.create_connection(file_name, new) sample_table = dismod_at.get_table_dict(connection, 'sample') sample_array = numpy.zeros((number_sample, n_var), dtype=numpy.double)
def modify_command(database, arg_list) : import re import os import sys import copy import dismod_at # # ------------------------------------------------------------------------- # arguments assert len(arg_list) >= 4 table_name = arg_list[0] column_name = arg_list[1] row_expression = arg_list[2] value_expression = arg_list[3] i_arg = 4 while i_arg < len(arg_list): exec( arg_list[i_arg] ) i_arg += 1 # ------------------------------------------------------------------------- # replaces variable by _v_['variable'] def replace_variable(expression, variable) : # ch_set = "([^a-zA-Z_'\"])" # pattern_in = ch_set + variable + ch_set pattern_out = r"\1_v_['" + variable + r"']\2" expression_out = re.sub(pattern_in, pattern_out, expression) # pattern_in = '^' + variable + ch_set pattern_out = r"_v_['" + variable + r"']\1" expression_out = re.sub(pattern_in, pattern_out, expression_out) # pattern_in = ch_set + variable + '$' pattern_out = r"\1_v_['" + variable + r"']" expression_out = re.sub(pattern_in, pattern_out, expression_out) return expression_out # ------------------------------------------------------------------------- # get the original value for the table new = False connection = dismod_at.create_connection(database, new) (col_name, col_type) = dismod_at.get_name_type(connection, table_name) table_dict = dismod_at.get_table_dict(connection, table_name) if not column_name in col_name : msg = column_name + ' is not a column in table ' + table_name + '\n' msg += 'of database ' + database assert False, msg # ------------------------------------------------------------------------- # map variable -> _v_['variable'] primary_key = table_name + '_id' primary_index = None count = 0 for col in col_name : row_expression = replace_variable(row_expression, col) value_expression = replace_variable(value_expression, col) if col == primary_key : primary_index = count count = count + 1 # ------------------------------------------------------------------------- # modify the values in the table count = 0 for _v_ in table_dict : _v_[primary_key] = count count = count + 1 if eval( row_expression ) : _v_[column_name] = eval(value_expression) # ------------------------------------------------------------------------- row_list = list() del col_name[primary_index] del col_type[primary_index] for row in table_dict : this_row = list() for col in col_name : this_row.append( row[col] ) row_list.append(this_row) # ------------------------------------------------------------------------- # delete the old version of the table cmd = 'DROP TABLE ' + table_name cursor = connection.cursor() cursor.execute(cmd) # ------------------------------------------------------------------------- # create the new version dismod_at.create_table(connection,table_name,col_name,col_type,row_list) # ------------------------------------------------------------------------- connection.close()
def smooth_grid_table(): import dismod_at import copy import collections # file_name = 'example.db' new = True connection = dismod_at.create_connection(file_name, new) cursor = connection.cursor() # # create smooth table ptype = 'integer primary key' col_name2type = collections.OrderedDict([ ('smooth_name', 'text'), ('n_age', 'integer'), ('n_time', 'integer'), ('mulstd_value_prior_id', 'integer'), ('mulstd_dage_prior_id', 'integer'), ('mulstd_dtime_prior_id', 'integer') ]) col_name = list(col_name2type.keys()) col_type = list(col_name2type.values()) row_list = [['constant', 1, 1, 1, 1, 1], ['age_only', 3, 1, 1, 1, 1], ['time_only', 1, 2, 1, 1, 1], ['bilinear', 3, 2, 1, 1, 1]] tbl_name = 'smooth' dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) # # smooth_grid table column names col_name2type = collections.OrderedDict([('smooth_id', 'integer'), ('age_id', 'integer'), ('time_id', 'integer'), ('value_prior_id', 'integer'), ('dage_prior_id', 'integer'), ('dtime_prior_id', 'integer'), ('const_value', 'real')]) col_name = list(col_name2type.keys()) col_type = list(col_name2type.values()) # # smooth_grid table values row_list = list() default = [ 3, # smooth_id (smooth_id == 3 is bilinear) None, # age_id (age_id index is 1 in default) None, # time_id (time_id index is 2 in default) 1, # value_prior_id 2, # dage_prior_id 3, # dtime_prior_id None # const_value ] age_time_list = list() for age_id in [0, 1, 2]: # n_age is 3 for time_id in [0, 1]: # n_time is 2 default[1] = age_id default[2] = time_id row = copy.copy(default) if age_id == 2: row[4] = None # dage_prior_id null for this case if time_id == 1: row[5] = None # dtime_prior_id null for this case row_list.append(row) age_time_list.append((age_id, time_id)) # # write the table tbl_name = 'smooth_grid' dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) # # check values in the table columns = ','.join(col_name) columns = 'smooth_grid_id,' + columns cmd = 'SELECT ' + columns + ' FROM smooth_grid' cmd += ' JOIN smooth USING(smooth_id) ' cmd += ' WHERE smooth_name = "bilinear"' count = 0 cursor = connection.cursor() for row in cursor.execute(cmd): assert len(row) == 8 assert row[0] == count assert row[1] == 3 assert row[2] == age_time_list[count][0] assert row[3] == age_time_list[count][1] assert row[4] == 1 if row[2] == 2: assert row[5] == None else: assert row[5] == 2 if row[3] == 1: assert row[6] == None else: assert row[6] == 3 assert row[7] == None count += 1 assert count == len(row_list) # connection.close() print('smooth_grid_table: OK')
def data_table(): import dismod_at import copy import collections # file_name = 'example.db' new = True connection = dismod_at.create_connection(file_name, new) # col_name2type = collections.OrderedDict([ # required columns ('data_name', 'text'), ('integrand_id', 'integer'), ('density_id', 'integer'), ('node_id', 'integer'), ('weight_id', 'integer'), ('hold_out', 'integer'), ('meas_value', 'real'), ('meas_std', 'real'), ('age_lower', 'real'), ('age_upper', 'real'), ('time_lower', 'real'), ('time_upper', 'real'), # covariates ('x_sex', 'real'), ('x_income', 'real'), # comments ('c_data_source', 'text') ]) col_name = list(col_name2type.keys()) col_type = list(col_name2type.values()) row_list = [[ 'one', # data_name 1, # integrand_id 0, # density_id 3, # node_id 4, # weight_id 0, # hold_out 1e-4, # meas_value 1e-5, # meas_std 10.0, # age_lower 90.0, # age_upper 2000., # time_lower 2005., # time_upper 0.5, # x_sex 1000., # x_income 'www.healthdata.org' # c_data_source ]] # create the data table tbl_name = 'data' dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) # ---------------------------------------------------------------------- # include primary key in test check_name = [tbl_name + '_id'] + col_name check_list = list() for i in range(len(row_list)): check_list.append([i] + row_list[i]) # row_list = dismod_at.get_row_list(connection, tbl_name, check_name) assert row_list == check_list # ---------------------------------------------------------------------- connection.close() print('data_table: OK')
def create_database(file_name, age_list, time_list, integrand_table, node_table, subgroup_table, weight_table, covariate_table, avgint_table, data_table, prior_table, smooth_table, nslist_table, rate_table, mulcov_table, option_table): import sys import dismod_at # ---------------------------------------------------------------------- # avgint_extra_columns, data_extra_columns avgint_extra_columns = list() data_extra_columns = list() for row in option_table: if row['name'] == 'avgint_extra_columns': avgint_extra_columns = row['value'].split() if row['name'] == 'data_extra_columns': data_extra_columns = row['value'].split() # ---------------------------------------------------------------------- # create database new = True connection = dismod_at.create_connection(file_name, new) # ---------------------------------------------------------------------- # create age table col_name = ['age'] col_type = ['real'] row_list = [] for age in age_list: row_list.append([age]) tbl_name = 'age' dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) # ---------------------------------------------------------------------- # create time table col_name = ['time'] col_type = ['real'] row_list = [] for time in time_list: row_list.append([time]) tbl_name = 'time' dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) # ---------------------------------------------------------------------- # create integrand table col_name = ['integrand_name', 'minimum_meas_cv'] col_type = ['text', 'real'] row_list = [] for i in range(len(integrand_table)): minimum_meas_cv = 0.0 if 'minimum_meas_cv' in integrand_table[i]: minimum_meas_cv = integrand_table[i]['minimum_meas_cv'] row = [integrand_table[i]['name'], minimum_meas_cv] row_list.append(row) tbl_name = 'integrand' dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) # global_integrand_name2id = {} for i in range(len(row_list)): global_integrand_name2id[row_list[i][0]] = i # ---------------------------------------------------------------------- # create density table col_name = ['density_name'] col_type = ['text'] row_list = [ ['uniform'], ['gaussian'], ['laplace'], ['students'], ['log_gaussian'], ['log_laplace'], ['log_students'], ['cen_gaussian'], ['cen_laplace'], ['cen_log_gaussian'], ['cen_log_laplace'], ] tbl_name = 'density' dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) # global_density_name2id = {} for i in range(len(row_list)): global_density_name2id[row_list[i][0]] = i # ---------------------------------------------------------------------- # create covariate table col_name = ['covariate_name', 'reference', 'max_difference'] col_type = ['text', 'real', 'real'] row_list = [] for i in range(len(covariate_table)): max_difference = None if 'max_difference' in covariate_table[i]: max_difference = covariate_table[i]['max_difference'] row = [ covariate_table[i]['name'], covariate_table[i]['reference'], max_difference ] row_list.append(row) tbl_name = 'covariate' dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) # global_covariate_name2id = {} for i in range(len(covariate_table)): global_covariate_name2id[covariate_table[i]['name']] = i # ---------------------------------------------------------------------- # create node table global_node_name2id = {} for i in range(len(node_table)): global_node_name2id[node_table[i]['name']] = i # col_name = ['node_name', 'parent'] col_type = ['text', 'integer'] row_list = [] for i in range(len(node_table)): node = node_table[i] name = node['name'] parent = node['parent'] if parent == '': parent = None else: parent = global_node_name2id[parent] row_list.append([name, parent]) tbl_name = 'node' dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) # ---------------------------------------------------------------------- # create subgroup table global_subgroup_name2id = {} global_group_name2id = {} group_id = 0 group_name = subgroup_table[0]['group'] global_group_name2id[group_name] = group_id for i in range(len(subgroup_table)): global_subgroup_name2id[subgroup_table[i]['subgroup']] = i if subgroup_table[i]['group'] != group_name: group_id = group_id + 1 group_name = subgroup_table[i]['group'] global_group_name2id[group_name] = group_id # col_name = ['subgroup_name', 'group_id', 'group_name'] col_type = ['text', 'integer', 'text'] row_list = [] for i in range(len(subgroup_table)): if i == 0: group_id = 0 group_name = subgroup_table[0]['group'] elif subgroup_table[i]['group'] != group_name: group_id = group_id + 1 group_name = subgroup_table[i]['group'] subgroup_name = subgroup_table[i]['subgroup'] row_list.append([subgroup_name, group_id, group_name]) tbl_name = 'subgroup' dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) # ---------------------------------------------------------------------- # create prior table col_name = [ 'prior_name', 'lower', 'upper', 'mean', 'std', 'density_id', 'eta', 'nu' ] col_type = [ 'text', 'real', 'real', 'real', 'real', 'integer', 'real', 'real' ] row_list = [] for i in range(len(prior_table)): prior = prior_table[i] density_id = global_density_name2id[prior['density']] # # columns that have null for default value for key in ['lower', 'upper', 'std', 'eta', 'nu']: if not key in prior: prior[key] = None # row = [ prior['name'], prior['lower'], prior['upper'], prior['mean'], prior['std'], density_id, prior['eta'], prior['nu'], ] row_list.append(row) tbl_name = 'prior' dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) # global_prior_name2id = {} for i in range(len(row_list)): global_prior_name2id[row_list[i][0]] = i # ---------------------------------------------------------------------- # create weight table col_name = ['weight_name', 'n_age', 'n_time'] col_type = ['text', 'integer', 'integer'] row_list = [] for i in range(len(weight_table)): weight = weight_table[i] name = weight['name'] n_age = len(weight['age_id']) n_time = len(weight['time_id']) row_list.append([name, n_age, n_time]) tbl_name = 'weight' dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) # global_weight_name2id = {} for i in range(len(weight_table)): global_weight_name2id[weight_table[i]['name']] = i # null is used for constant weighting global_weight_name2id[''] = None # ---------------------------------------------------------------------- # create weight_grid table col_name = ['weight_id', 'age_id', 'time_id', 'weight'] col_type = ['integer', 'integer', 'integer', 'real'] row_list = [] for i in range(len(weight_table)): weight = weight_table[i] age_id = weight['age_id'] time_id = weight['time_id'] fun = weight['fun'] for j in age_id: for k in time_id: w = fun(age_list[j], time_list[k]) row_list.append([i, j, k, w]) tbl_name = 'weight_grid' dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) # ---------------------------------------------------------------------- # create smooth table col_name = [ 'smooth_name', 'n_age', 'n_time', 'mulstd_value_prior_id', 'mulstd_dage_prior_id', 'mulstd_dtime_prior_id' ] col_type = ['text', 'integer', 'integer', 'integer', 'integer', 'integer'] row_list = [] for i in range(len(smooth_table)): smooth = smooth_table[i] name = smooth['name'] n_age = len(smooth['age_id']) n_time = len(smooth['time_id']) # prior_id = dict() for key in ['value', 'dage', 'dtime']: prior_id[key] = None mulstd_key = 'mulstd_' + key + '_prior_name' if mulstd_key in smooth: prior_name = smooth[mulstd_key] if prior_name != None: prior_id[key] = global_prior_name2id[prior_name] # row_list.append([ name, n_age, n_time, prior_id['value'], prior_id['dage'], prior_id['dtime'], ]) tbl_name = 'smooth' dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) # global_smooth_name2id = {} for i in range(len(smooth_table)): global_smooth_name2id[smooth_table[i]['name']] = i # ---------------------------------------------------------------------- # create smooth_grid table col_name = [ 'smooth_id', 'age_id', 'time_id', 'value_prior_id', 'dage_prior_id', 'dtime_prior_id', 'const_value', ] col_type = [ 'integer', # smooth_id 'integer', # age_id 'integer', # time_id 'integer', # value_prior_id 'integer', # dage_prior_id 'integer', # dtime_prior_id 'real', # const_value ] row_list = [] for i in range(len(smooth_table)): smooth = smooth_table[i] age_id = smooth['age_id'] time_id = smooth['time_id'] fun = smooth['fun'] max_j = 0 for j in age_id: if age_list[j] > age_list[max_j]: max_j = j max_k = 0 for k in time_id: if time_list[k] > time_list[max_k]: max_k = k for j in age_id: for k in time_id: (v, da, dt) = fun(age_list[j], time_list[k]) # if j == max_j: da = None elif da != None: da = global_prior_name2id[da] # if k == max_k: dt = None elif dt != None: dt = global_prior_name2id[dt] # const_value = None if isinstance(v, float): const_value = v v = None elif v != None: v = global_prior_name2id[v] row_list.append([i, j, k, v, da, dt, const_value]) tbl_name = 'smooth_grid' dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) # ---------------------------------------------------------------------- # create nslist table col_name = ['nslist_name'] col_type = ['text'] row_list = list() for nslist_name in nslist_table: row_list.append([nslist_name]) tbl_name = 'nslist' dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) # global_nslist_name2id = dict() for i in range(len(row_list)): global_nslist_name2id[row_list[i][0]] = i # ---------------------------------------------------------------------- # create nslist_pair table col_name = ['nslist_id', 'node_id', 'smooth_id'] col_type = ['integer', 'integer', 'integer'] row_list = list() tbl_name = 'nslist_pair' for key in nslist_table: pair_list = nslist_table[key] nslist_id = global_nslist_name2id[key] for pair in pair_list: node_id = global_node_name2id[pair[0]] smooth_id = global_smooth_name2id[pair[1]] row_list.append([nslist_id, node_id, smooth_id]) dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) # ---------------------------------------------------------------------- # create rate table col_name = [ 'rate_name', 'parent_smooth_id', 'child_smooth_id', 'child_nslist_id' ] col_type = ['text', 'integer', 'integer', 'integer'] row_list = list() for rate_name in ['pini', 'iota', 'rho', 'chi', 'omega']: row = [rate_name, None, None, None] for i in range(len(rate_table)): rate = rate_table[i] if rate['name'] == rate_name: row = [rate_name] for key in ['parent_smooth', 'child_smooth', 'child_nslist']: entry = None if key in rate: entry = rate[key] if entry != None: if key == 'child_nslist': entry = global_nslist_name2id[entry] else: entry = global_smooth_name2id[entry] row.append(entry) row_list.append(row) tbl_name = 'rate' dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) global_rate_name2id = {} for i in range(len(row_list)): global_rate_name2id[row_list[i][0]] = i # ---------------------------------------------------------------------- # create mulcov table col_name = [ 'mulcov_type', 'rate_id', 'integrand_id', 'covariate_id', 'group_id', 'group_smooth_id', 'subgroup_smooth_id', ] col_type = [ 'text', # mulcov_type 'integer', # rate_id 'integer', # integrand_id 'integer', # covariate_id 'integer', # group_id 'integer', # group_smooth_id 'integer', # subgroup_smooth_id ] row_list = [] warning_printed = False for i in range(len(mulcov_table)): mulcov = mulcov_table[i] mulcov_type = mulcov['type'] effected = mulcov['effected'] covariate_id = global_covariate_name2id[mulcov['covariate']] # # rate_id and integrand_id if mulcov_type == 'rate_value': rate_id = global_rate_name2id[effected] integrand_id = None else: integrand_id = global_integrand_name2id[effected] rate_id = None # # group_id if 'group' in mulcov: group_id = global_group_name2id[mulcov['group']] else: group_id = 0 if not warning_printed: msg = 'create_database Warning: ' msg += 'group key missing in mulcov table,\n' msg += 'using default value; i.e., first group ' msg += '(you should fix this).' print(msg) warning_printed = True # # group_smooth_id if mulcov['smooth'] == None: group_smooth_id = None else: group_smooth_id = global_smooth_name2id[mulcov['smooth']] # # subgroup_smooth_id if not 'subsmooth' in mulcov: subgroup_smooth_id = None elif mulcov['subsmooth'] == None: subgroup_smooth_id = None else: subgroup_smooth_id = global_smooth_name2id[mulcov['subsmooth']] # row_list.append([ mulcov_type, rate_id, integrand_id, covariate_id, group_id, group_smooth_id, subgroup_smooth_id, ]) tbl_name = 'mulcov' dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) # ---------------------------------------------------------------------- # avgint table # # extra_name, extra_type extra_name = [] extra_type = [] if (len(avgint_table) > 0): extra_name = avgint_extra_columns row = avgint_table[0] for key in extra_name: if isinstance(row[key], str): extra_type.append('text') elif isinstance(row[key], int): extra_type.append('integer') elif isinstance(row[key], float): extra_type.append('real') else: msg = 'db2csv_command: avgint_extra_columns: type error:' msg += '\nThe type for column ' + key msg += ' is not str, int, or float' assert False, msg # # col_name col_name = extra_name + [ 'integrand_id', 'node_id', 'subgroup_id', 'weight_id', 'age_lower', 'age_upper', 'time_lower', 'time_upper' ] for j in range(len(covariate_table)): col_name.append('x_%s' % j) # # col_type col_type = extra_type + [ 'integer', # integrand_id 'integer', # node_id 'integer', # subgroup_id 'integer', # weight_id 'real', # age_lower 'real', # age_upper 'real', # time_lower 'real' # time_upper ] for j in range(len(covariate_table)): col_type.append('real') # # row_list row_list = [] warning_printed = False for i in range(len(avgint_table)): avgint = avgint_table[i] # # subgroup column has a default value if 'subgroup' not in avgint: avgint['subgroup'] = subgroup_table[0]['subgroup'] if not warning_printed: msg = 'create_database Warning: ' msg += 'subgroup key missing in avgint table,\n' msg += 'using default value; i.e., first subgroup ' msg += '(you should fix this).' print(msg) warning_printed = True # # extra columns first row = list() for name in extra_name: row.append(avgint[name]) # avgint_id = i integrand_id = global_integrand_name2id[avgint['integrand']] node_id = global_node_name2id[avgint['node']] subgroup_id = global_subgroup_name2id[avgint['subgroup']] weight_id = global_weight_name2id[avgint['weight']] row = row + [ integrand_id, node_id, subgroup_id, weight_id, avgint['age_lower'], avgint['age_upper'], avgint['time_lower'], avgint['time_upper'] ] for j in range(len(covariate_table)): row.append(avgint[covariate_table[j]['name']]) row_list.append(row) tbl_name = 'avgint' dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) # ---------------------------------------------------------------------- # create data table # # # extra_name, extra_type extra_name = [] extra_type = [] if (len(data_table) > 0): extra_name = data_extra_columns row = data_table[0] for key in extra_name: if isinstance(row[key], str): extra_type.append('text') elif isinstance(row[key], int): extra_type.append('integer') elif isinstance(row[key], float): extra_type.append('real') else: msg = 'db2csv_command: data_extra_columns: type error' msg += '\nThe type for column ' + key msg += ' is not str, int, or float' assert False, msg # # col_name col_name = extra_name + [ 'integrand_id', 'node_id', 'subgroup_id', 'weight_id', 'age_lower', 'age_upper', 'time_lower', 'time_upper', 'hold_out', 'density_id', 'meas_value', 'meas_std', 'eta', 'nu', ] for j in range(len(covariate_table)): col_name.append('x_%s' % j) # # col_type col_type = extra_type + [ 'integer', # integrand_id 'integer', # node_id 'integer', # subgroup_id 'integer', # weight_id 'real', # age_lower 'real', # age_upper 'real', # time_lower 'real', # time_upper 'integer', # hold_out 'integer', # density_id 'real', # meas_value 'real', # meas_std 'real', # eta 'real', # nu ] for j in range(len(covariate_table)): col_type.append('real') row_list = [] warning_printed = False for i in range(len(data_table)): data = data_table[i] # # extra columns first row = list() for name in extra_name: row.append(data[name]) # # columns that have null for default value for key in ['meas_std', 'eta', 'nu']: if not key in data: data[key] = None # # subgroup column has a default value if not 'subgroup' in data: data['subgroup'] = subgroup_table[0]['subgroup'] if not warning_printed: msg = 'create_database Warning: ' msg += 'subgroup key missing in data table,\n' msg += 'using default value; i.e., first subgroup ' msg += '(you should fix this).' print(msg) warning_printed = True # integrand_id = global_integrand_name2id[data['integrand']] density_id = global_density_name2id[data['density']] node_id = global_node_name2id[data['node']] subgroup_id = global_subgroup_name2id[data['subgroup']] weight_id = global_weight_name2id[data['weight']] hold_out = int(data['hold_out']) row = row + [ integrand_id, node_id, subgroup_id, weight_id, data['age_lower'], data['age_upper'], data['time_lower'], data['time_upper'], hold_out, density_id, data['meas_value'], data['meas_std'], data['eta'], data['nu'] ] for j in range(len(covariate_table)): row.append(data[covariate_table[j]['name']]) row_list.append(row) tbl_name = 'data' dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) # ---------------------------------------------------------------------- # create option table col_name = ['option_name', 'option_value'] col_type = ['text unique', 'text'] row_list = [] for row in option_table: name = row['name'] value = row['value'] row_list.append([name, value]) tbl_name = 'option' dismod_at.create_table(connection, tbl_name, col_name, col_type, row_list) # ---------------------------------------------------------------------- # close the connection connection.close() return
def create_GBD_integrand(self, integrands: List[str], time_list: List[int], sex_ids: List[int], location_name_to_id: Dict[str, int], covariates: Dict[str, pd.DataFrame] = None): path = self.path_to_db[:-3] + '_gbd.db' print(path) shutil.copyfile(self.path_to_db, path) connection = sqlite3.connect(path) crsr = connection.cursor() crsr.execute('select count(covariate_id) from covariate') n_covs = crsr.fetchall()[0][0] crsr.execute("drop table integrand") row_list = [] for name in integrands: row_list.append([0.0, name]) #print(row_list) dismod_at.create_table(connection, 'integrand', ['minimum_meas_cv', 'integrand_name'], ['real', 'text'], row_list) crsr.execute("drop table avgint") cov_name_to_id = self.get_covarates_names() #print(cov_name_to_id, n_covs) row_list = [] for integrand_id in range(len(integrands)): for age_id in age_group_ids: for time in time_list: for node_id, node_name in self.node_id_to_loc.items(): if node_name in location_name_to_id: age_lower = age_id_to_range[age_id][0] age_upper = age_id_to_range[age_id][1] if age_lower >= self.age_min and age_upper <= self.age_max and \ self.time_min <= time <= self.time_max: row = [integrand_id, node_id, None, age_lower, age_upper, time, time] #row.extend([None]*n_covs) row.extend([location_name_to_id[node_name], age_id, time, integrand_to_measure_id[integrands[integrand_id]]]) for sex_id in sex_ids: covs = [None]*n_covs if covariates is not None: include = True for name, df in covariates.items(): i = int(cov_name_to_id[name].split("_")[1]) v = df[ (df['age_group_id'].isin([age_id, 22])) & (df['location_id'] == location_name_to_id[node_name]) & (df['year_id'] == time) & (df['sex_id'].isin([3, sex_id]))]['mean_value'].values if v.shape[0] > 0: covs[i] = v[0] else: include = False # this age_group_id is not used in this covariate break if include: row_list.append(row + [sex_id] + covs) else: row_list.append(row + [sex_id] + covs) dismod_at.create_table(connection, 'avgint', ['integrand_id', 'node_id', 'weight_id', 'age_lower', 'age_upper', 'time_lower', 'time_upper'] + ['location_id', 'age_group_id', 'year_id', 'measure_id', 'sex_id'] + ['x_' + str(i) for i in range(n_covs)], ['integer', 'integer', 'integer', 'real', 'real', 'real', 'real'] + ['integer']*5 + ['real']*n_covs, row_list) connection.close() system_command([program, path, 'predict', 'fit_var'])