def fit_both(self, tol: float = 1e-4, fit_fixed: bool = True, db2csv: bool = True, max_iter: int = 100, fit_gaussian: bool = False, zero_sum: bool = False, print_level: int = 5): if fit_gaussian: self.db.set_meas_density('gaussian') if fit_fixed: self.fit_fixed(tol=tol, db2csv=False, max_iter=max_iter, zero_sum=zero_sum, print_level=print_level) system_command([program, self.db_path, 'set', 'start_var', 'fit_var']) else: self.db.set_tol(tol) self.db.set_max_iteration(max_iter) self.db.set_print_level(print_level) if zero_sum: self.db.set_zero_sum_constraint() self.initialize(db2csv=False) if fit_gaussian: self.db.reset_meas_density() self.initialize(db2csv=False) system_command([program, self.db_path, 'fit', 'both']) system_command([program, self.db_path, 'predict', 'fit_var']) if db2csv: dismod_at.db2csv_command(self.db_path)
def simulate(self, node, file_name, n_sim=10): N_str = str(n_sim) system_command([program, file_name, 'set', 'truth_var', 'fit_var']) system_command([program, file_name, 'set', 'start_var', 'fit_var']) system_command([program, file_name, 'set', 'scale_var', 'fit_var']) system_command([program, file_name, 'simulate', N_str]) dismod_at.db2csv_command(file_name) system_command([program, file_name, 'sample', 'simulate', N_str]) system_command([program, file_name, 'predict', 'sample']) dismod_at.db2csv_command(file_name) predict = pd.read_csv(self.file_path + 'predict.csv') predict_rate_dict = collections.defaultdict(list) predict_alpha_dict = collections.defaultdict(list) for i, row in predict.iterrows(): if row['integrand'] == 'Sincidence': predict_rate_dict[row['node']].append(row['avgint']) else: predict_alpha_dict[row['integrand']].append(row['avgint']) assert len(predict_rate_dict) == len(self.node_parent_children[node]) assert len(predict_alpha_dict) == self.n_cov rate_mean_std = {k: (np.mean(v), np.std(v)) for k, v in predict_rate_dict.items()} alpha_mean_std = {k: (np.mean(v), np.std(v)) for k, v in predict_alpha_dict.items()} os.rename(self.file_path + 'predict.csv', self.file_path + 'predict_' + node + '.csv') return rate_mean_std, alpha_mean_std
def fit_fixed(self, db2csv=True): command = [program, self.path, 'fit', 'fixed'] print(' '.join(command)) flag = subprocess.call(command) if flag != 0: sys.exit('The dismod_at fit fixed command failed') if db2csv: dismod_at.db2csv_command(self.path)
def fit_fixed(self, tol: float = 1e-4, db2csv: bool = True, max_iter: int = 100, zero_sum: bool = False, print_level: int = 5): self.db.set_tol(tol) self.db.set_max_iteration(max_iter) self.db.set_print_level(print_level) if zero_sum: self.db.set_zero_sum_constraint() self.initialize(db2csv=False) system_command([program, self.db_path, 'fit', 'fixed']) system_command([program, self.db_path, 'predict', 'fit_var']) if db2csv: dismod_at.db2csv_command(self.db_path)
def fit(self, file_name, depth=1, fit_fixed=False, fit_both=True, verbose=True, write_to_csv=True): self.db.initialize(file_name) system_command([program, file_name, 'init'], verbose) if depth == 0 or fit_fixed: system_command([program, file_name, 'fit', 'fixed'], verbose) system_command([program, file_name, 'set', 'start_var', 'fit_var'], verbose) system_command([program, file_name, 'set', 'scale_var', 'fit_var'], verbose) if depth > 0 and fit_both: system_command([program, file_name, 'fit', 'both'], verbose) if write_to_csv: dismod_at.db2csv_command(file_name)
def initialize(self, file_name, to_csv=False): dismod_at.create_database(file_name, self.age_list, self.time_list, self.integrand_table, self.node_table, self.weight_table, self.covariate_table, self.avgint_table, self.data_table, self.prior_table, self.smooth_table, list(), self.rate_table, self.mulcov_table, self.option_table) if to_csv: flag = subprocess.call([program, file_name, 'init']) if flag != 0: sys.exit('command failed: flag = ' + str(flag)) dismod_at.db2csv_command(file_name)
def __init__(self, path_to_folder, db_file_name): self.path_to_db = os.path.join(path_to_folder + db_file_name) dismod_at.db2csv_command(self.path_to_db) self.db_output = DismodOutput(self.path_to_db) self.path_to_data = path_to_folder + 'data.csv' self.age_list, self.time_list = self.db_output.get_age_time_lists() self.integrand_values = self.db_output.get_integrand_values() self.data_values = pd.read_csv(self.path_to_data) self.cov_name_to_id = self.db_output.get_covarates_names() self.rate_to_integrand = { 'iota': 'Sincidence', 'rho': 'remission', 'chi': 'mtexcess', 'omega': 'mtother' }
def init_database(self, db2csv: bool = True): dismod_at.create_database(self.path, self.age_list, self.time_list, self.integrand_table, self.node_table, self.weight_table, self.covariate_table, self.avgint_table, self.data_table, self.prior_table, self.smooth_table, list(), self.rate_table, self.mulcov_table, self.option_table) if not os.path.exists(self.path): os.mknod(self.path) command = [program, self.path, 'init'] print(' '.join(command)) flag = subprocess.call(command) if flag != 0: sys.exit('The dismod_at init command failed') if db2csv is True: dismod_at.db2csv_command(self.path)
cmd = 'insert into log' cmd += ' (log_id,message_type,table_name,row_id,unix_time,message) values(' cmd += str(log_id) + ',' # log_id cmd += '"command",' # message_type cmd += 'null,' # table_name cmd += 'null,' # row_id cmd += str(begin_time) + ',' # unix_time cmd += '"' + message + '")' # message dismod_at.sql_command(connection, cmd) connection.close() # --------------------------------------------------------------------------- # execute command if command_arg == 'db2csv': if len(sys.argv) != 3: sys.exit(usage) dismod_at.db2csv_command(database_file_arg) elif command_arg == 'perturb': if len(sys.argv) != 5: sys.exit(usage) tbl_name = arg_list[0] sigma = arg_list[1] dismod_at.perturb_command(database_file_arg, tbl_name, sigma) elif command_arg == 'plot_rate_fit': if len(sys.argv) != 6: sys.exit(usage) pdf_file = arg_list[0] plot_title = arg_list[1] rate_set = set(arg_list[2].split()) dismod_at.plot_rate_fit(database_file_arg, pdf_file, plot_title, rate_set) elif command_arg == 'plot_data_fit': if len(sys.argv) != 6:
def initDatabase(self, max_iter=500): if len(self.age_list) == 0: max_age = -float('inf') min_age = float('inf') for i in range(self.n): max_age = max(max_age, self.data.loc[i, 'age_end']) min_age = min(min_age, self.data.loc[i, 'age_start']) age_list = [int(round(x)) for x in np.linspace(min_age,max_age,\ round((max_age-min_age)/5)+1)] age_list = sorted(list(set(age_list))) #if len(age_list) == 1: # age_list.insert(0,age_list[0]-1) self.age_list = age_list if len(self.time_list) == 0: max_time = -float('inf') min_time = float('inf') for i in range(self.n): max_time = max(max_time, self.data.loc[i, 'year_end']) min_time = min(min_time, self.data.loc[i, 'year_start']) time_list = [int(round(x)) for x in np.linspace(min_time, max_time,\ round((max_time - min_time)/3+1))] time_list = sorted(list(set(time_list))) self.time_list = time_list #print(self.age_list) #print(self.time_list) avgint_table = list() nslist_table = dict() # smoothing integrand_table = [] for intg in self.integrand: integrand_table.append({'name': intg}) node_table = [{'name': 'world', 'parent': ''}] weight_table = [{ 'name': 'constant', 'age_id': range(len(self.age_list)), 'time_id': range(len(self.time_list)), 'fun': lambda a, t: 1.0 }] rate_table = list() for rate in self.rates: rate_table.append({ 'name': rate, 'parent_smooth': 'smooth_rate_' + rate }) covariate_table = list() for cov in self.covariates: covariate_table.append({'name': cov['name'], 'reference': 0.0}) mulcov_table = list() for cov in self.covariates: mulcov_table.append({ 'covariate': cov['name'], 'type': cov['type'], 'effected': cov['effected'], 'smooth': 'smooth_mulcov_' + cov['name'] }) smooth_table = list() for rate in self.rates: smooth_table.append({ 'name': 'smooth_rate_' + rate, 'age_id': range(len(self.age_list)), 'time_id': range(len(self.time_list)), 'fun': lambda a, t, r=rate: ('value_prior_' + r, 'dage_prior_' + r, 'dtime_prior_' + r) }) for cov in self.covariates: name = cov['name'] smooth_table.append({ 'name': 'smooth_mulcov_' + cov['name'], 'age_id': range(len(self.age_list)), 'time_id': range(len(self.time_list)), 'fun': lambda a, t, name=name: ('value_prior_' + name, 'dage_prior_' + name, 'dtime_prior_' + name) }) #for row in smooth_table: # print(row['fun'](0,0)) prior_table = [] for i in range(len(self.rates)): prior_table.append({'name': 'value_prior_' + self.rates[i]}) prior_table[-1].update(self.rate_priors[i][0]) prior_table.append({'name': 'dage_prior_' + self.rates[i]}) prior_table[-1].update(self.rate_priors[i][1]) prior_table.append({'name': 'dtime_prior_' + self.rates[i]}) prior_table[-1].update(self.rate_priors[i][2]) for i in range(len(self.covariates)): prior_table.append( {'name': 'value_prior_' + self.covariates[i]['name']}) prior_table[-1].update(self.cov_priors[i][0]) prior_table.append( {'name': 'dage_prior_' + self.covariates[i]['name']}) prior_table[-1].update(self.cov_priors[i][1]) prior_table.append( {'name': 'dtime_prior_' + self.covariates[i]['name']}) prior_table[-1].update(self.cov_priors[i][2]) data_table = list() row = { 'node': 'world', 'weight': 'constant', 'hold_out': False, } row.update(self.meas_noise_density) for data_id in range(self.n): if self.data.loc[data_id, 'measure'] in self.integrand: row['integrand'] = self.data.loc[data_id, 'measure'] for k, v in self.meas_noise_density[row['integrand']].items(): row[k] = v row['meas_value'] = self.data.loc[data_id, 'meas_value'] row['meas_std'] = self.data.loc[data_id, 'meas_std'] row['age_lower'] = self.data.loc[data_id, 'age_start'] row['age_upper'] = self.data.loc[data_id, 'age_end'] row['time_lower'] = self.data.loc[data_id, 'year_start'] row['time_upper'] = self.data.loc[data_id, 'year_end'] for cov in self.covariates: row[cov['name']] = self.data.loc[data_id, cov['name']] data_table.append(copy.copy(row)) option_table = [ { 'name': 'parent_node_name', 'value': 'world' }, { 'name': 'ode_step_size', 'value': '10.0' }, { 'name': 'quasi_fixed', 'value': 'false' }, { 'name': 'max_num_iter_fixed', 'value': max_iter }, { 'name': 'print_level_fixed', 'value': '5' }, { 'name': 'tolerance_fixed', 'value': '1e-8' }, ] if self.integrand == ['Sincidence']: option_table.append({ 'name': 'rate_case', 'value': 'iota_pos_rho_zero' }) elif self.integrand == 'remission': option_table.append({ 'name': 'rate_case', 'value': 'iota_zero_rho_pos' }) else: option_table.append({ 'name': 'rate_case', 'value': 'iota_pos_rho_pos' }) option_name_id = {} for i in range(len(option_table)): option_name_id[option_table[i]['name']] = i for option in self.options: if option['name'] in option_name_id: option_table[option_name_id[ option['name']]]['value'] = option['value'] else: option_table.append(option) dismod_at.create_database(self.path, self.age_list, self.time_list, integrand_table, node_table, weight_table, covariate_table, avgint_table, data_table, prior_table, smooth_table, nslist_table, rate_table, mulcov_table, option_table) command = [program, self.path, 'init'] print(' '.join(command)) flag = subprocess.call(command) if flag != 0: sys.exit('The dismod_at init command failed') dismod_at.db2csv_command(self.path)
dismod_at.system_command_prc([program, file_name, 'fit', 'both']) dismod_at.system_command_prc( [program, file_name, 'sample', 'asymptotic', 'both', str(number_sample)]) # ----------------------------------------------------------------------- # get tables new = False connection = dismod_at.create_connection(file_name, new) var_table = dismod_at.get_table_dict(connection, 'var') node_table = dismod_at.get_table_dict(connection, 'node') rate_table = dismod_at.get_table_dict(connection, 'rate') fit_var_table = dismod_at.get_table_dict(connection, 'fit_var') sample_table = dismod_at.get_table_dict(connection, 'sample') hes_random_table = dismod_at.get_table_dict(connection, 'hes_random') connection.close() dismod_at.db2csv_command(file_name) # ----------------------------------------------------------------------- # var_id2name var_id2node_name = dict() assert len(var_table) == 3 for var_id in range(len(var_table)): assert var_id < 3 row = var_table[var_id] assert row['var_type'] == 'rate' assert rate_table[row['rate_id']]['rate_name'] == 'iota' node_name = node_table[row['node_id']]['node_name'] var_id2node_name[var_id] = node_name if node_name == 'world': theta = fit_var_table[var_id]['fit_var_value'] # ----------------------------------------------------------------------- # check the Hessian of the random effects objective
max_abs_err = 0.0 for var_id in range(len(var_table)): var_row = var_table[var_id] fit_row = fit_var_table[var_id] var_type = var_row['var_type'] rate_id = var_row['rate_id'] rate_name = rate_table[rate_id]['rate_name'] fit_var_value = fit_row['fit_var_value'] relative_err = fit_var_value / rate_true[rate_name] - 1.0 max_abs_err = max(max_abs_err, abs(relative_err)) if max_abs_err > 0.1: sys.msg('csv2db.py: max_abs_err = ' + str(max_abs_err)) # # check error in mtall approximation max_abs_res = 0.0 for data_id in range(len(data_table)): assert data_id == data_subset_table[data_id]['data_id'] integrand_id = data_table[data_id]['integrand_id'] integrand_name = integrand_table[integrand_id]['integrand_name'] if integrand_name == 'mtall': weighted_residual = fit_data_subset[data_id]['weighted_residual'] max_abs_res = max(max_abs_res, abs(weighted_residual)) if max_abs_res > 0.1: sys.msg('csv2db.py: max_abs_res = ' + str(max_abs_res)) # --------------------------------------------------------------------------- # csv representation of database dismod_at.db2csv_command(database) # --------------------------------------------------------------------------- print('csv2db.py: OK') # END PYTHON