def main(): settings = process_command_line() print('Current settings:') pp.pprint(vars(settings)) # Resetting random seed np.random.seed(settings.init_id * 1000) random.seed(settings.init_id * 1000) # load data print('Loading data ...') data = load_data(settings) print('Loading data ... completed') if settings.center_y: print('center_y = True; centering the y variables at mean(data[y_train])') center_labels(data, settings) backup_target(data, settings) #pre-compute & initialize time_start = time.clock() param, cache, cache_tmp = precompute(data, settings) bart = BART(data, param, settings, cache, cache_tmp) time_initialization = time.clock() - time_start # initialize stuff for results mcmc_stats = np.zeros((settings.m_bart, settings.n_iterations, 10)) mcmc_stats_bart = np.zeros((settings.n_iterations, 10)) mcmc_stats_bart_desc = ['loglik', 'logprior', 'logprob', \ 'mean depth', 'mean num_leaves', 'mean num_nonleaves', 'mean change', \ 'mse_train', 'lambda_bart', 'time_itr'] mcmc_counts = None mcmc_tree_predictions = init_performance_storage(data, settings) n_burn_in = 0 # NOTE: predictions are stored without discarding burn-in assert n_burn_in == 0 time_init = time.clock() time_init_run_avg = time.clock() itr_run_avg = 0 change = True tree_order = range(settings.m_bart) print('initial settings:') print('lambda_bart value = %.3f' % param.lambda_bart) loglik_train, mse_train = bart.compute_train_loglik(data, settings, param) print('mse train = %.3f, loglik_train = %.3f' % (mse_train, loglik_train)) for itr in range(settings.n_iterations): time_init_current = time.clock() if settings.verbose >= 1: print('\n%s BART iteration = %7d %s' % ('*'*30, itr, '*'*30)) logprior = 0. if settings.sample_y == 1 and settings.mcmc_type != 'prior': # Successive conditional simulator bart.sample_labels(data, settings, param) # sampling lambda_bart bart.sample_lambda_bart(param, data, settings) time_sample_lambda = time.clock() - time_init_current logprior += bart.lambda_logprior random.shuffle(tree_order) for i_t in tree_order: if settings.debug == 1: print('\ntree_id = %3d' % i_t) time_init_current_tree = time.clock() # update data['y_train'] bart.update_residual(i_t, data) update_cache_tmp(cache_tmp, data, param, settings) # MCMC for i_t'th tree bart.trees[i_t].update_loglik_node_all(data, param, cache, settings) (bart.trees[i_t], change) = run_mcmc_single_tree(bart.trees[i_t], settings, data, param, \ cache, change, mcmc_counts, cache_tmp, bart.pmcmc_objects[i_t]) # update parameters sample_param(bart.trees[i_t], settings, param) logprior += bart.trees[i_t].pred_val_logprior # update pred_val bart.update_pred_val(i_t, data, param, settings) # update stats # 'change' indicates whether MCMC move was accepted bart.trees[i_t].update_depth() mcmc_stats[i_t, itr, [3,6,7,8,9]] = np.array([bart.trees[i_t].depth, \ len(bart.trees[i_t].leaf_nodes), len(bart.trees[i_t].non_leaf_nodes), \ change, time.clock() - time_init_current_tree]) # NOTE: this logprior computation does not affect timing if settings.mcmc_type == 'cgm' or settings.mcmc_type == 'growprune': mcmc_stats[i_t, itr, 1] = bart.trees[i_t].compute_logprior() else: mcmc_stats[i_t, itr, 1] = -np.inf #NOTE: compute_logprior could be incorrect for PG # (prior over feature_ids is 1/D rather than 1/numValidDimensions) if settings.sample_y == 1 and settings.mcmc_type == 'prior': # Marginal conditional simulator bart.sample_labels(data, settings, param) if settings.mcmc_type == 'cgm' or settings.mcmc_type == 'growprune': logprior += float(np.sum(mcmc_stats[:, itr, 1])) else: logprior = -np.inf loglik_train, mse_train = bart.compute_train_loglik(data, settings, param) logprob_bart = logprior + loglik_train # mcmc_stats_bart_desc = 0: loglik, 1: logprior, 2: logprob, # 3: mean depth, 4: mean num_leaves, 5: mean num_nonleaves, 6: mean change, # 7: mse_train, 8: lambda_bart, 9: time_itr mcmc_stats_bart[itr, :3] = [loglik_train, logprior, logprob_bart] mcmc_stats_bart[itr, 3:7] = np.mean(mcmc_stats[:, itr, [3,6,7,8]], 0) # depth, #leaf, #nonleaf, change mcmc_stats_bart[itr, -3:-1] = [mse_train, param.lambda_bart] mcmc_stats_bart[itr, -1] = np.sum(mcmc_stats[:, itr, -1]) + time_sample_lambda # total time per iteration if itr == 0: mcmc_stats_bart[itr, -1] += time_initialization if settings.verbose >=2 : print('fraction of trees in which MCMC move was accepted = %.3f' % mcmc_stats_bart[itr, 6]) if (settings.save == 1): for tree in bart.trees: tree.gen_rules_tree() pred_tmp = {'train': bart.predict_train(data, param, settings), \ 'test': bart.predict(data['x_test'], data['y_test_orig'], param, settings)} for k_data in settings.perf_dataset_keys: for k_store in settings.perf_store_keys: mcmc_tree_predictions[k_data]['accum'][k_store] += pred_tmp[k_data][k_store] if itr == 0 and settings.verbose >= 1: print('Cumulative: itr, itr_run_avg, [mse train, logprob_train, mse test, ' \ 'logprob_test, time_mcmc, time_mcmc_prediction], time_mcmc_cumulative') print('itr, [mse train, logprob_train, mse test, ' \ 'logprob_test, time_mcmc, time_mcmc+time_prediction]') if settings.store_every_iteration == 1: store_every_iteration(mcmc_tree_predictions, data, settings, param, itr, \ pred_tmp, mcmc_stats_bart[itr, -1], time_init_current) if (itr > 0) and (itr % settings.n_run_avg == (settings.n_run_avg - 1)): metrics = {} for k_data in settings.perf_dataset_keys: k_data_tmp, k_data_n = get_k_data_names(settings, k_data) for k_store in settings.perf_store_keys: mcmc_tree_predictions[k_data][k_store][itr_run_avg] = \ mcmc_tree_predictions[k_data]['accum'][k_store] / (itr + 1) metrics[k_data] = compute_metrics_regression(data[k_data_tmp], \ mcmc_tree_predictions[k_data]['pred_mean'][itr_run_avg], \ mcmc_tree_predictions[k_data]['pred_prob'][itr_run_avg]) itr_range = range(itr_run_avg * settings.n_run_avg, (itr_run_avg + 1) * settings.n_run_avg) if settings.debug == 1: print('itr_range = %s' % itr_range) time_mcmc_train = np.sum(mcmc_stats_bart[itr_range, -1]) mcmc_tree_predictions['run_avg_stats'][:, itr_run_avg] = \ [ metrics['train']['mse'], metrics['train']['log_prob'], \ metrics['test']['mse'], metrics['test']['log_prob'], \ time_mcmc_train, time.clock() - time_init_run_avg ] if settings.verbose >= 1: print('Cumulative: %7d, %7d, %s, %.2f'.format(itr, itr_run_avg, mcmc_tree_predictions['run_avg_stats'][:, itr_run_avg].T, \ np.sum(mcmc_tree_predictions['run_avg_stats'][-2, :itr_run_avg+1]))) itr_run_avg += 1 time_init_run_avg = time.clock() # print results print('\nTotal time (seconds) = %f' % (time.clock() - time_init)) if settings.verbose >=2: print('mcmc_stats_bart[:, 3:] (not cumulative) = ') print('mean depth, mean num_leaves, mean num_nonleaves, ' + \ 'mean change, mse_train, lambda_bart, time_itr') print(mcmc_stats_bart[:, 3:]) if settings.verbose >=1: print('mean of mcmc_stats_bart (discarding first 50% of the chain)') itr_start = mcmc_stats_bart.shape[0] / 2 for k, s in enumerate(mcmc_stats_bart_desc): print('%20s\t%.2f'.format(s, np.mean(mcmc_stats_bart[itr_start:, k]))) if settings.save == 1: print('predictions averaged across all previous additive trees:') print('mse train, mean log_prob_train, mse test, mean log_prob_test') print(mcmc_tree_predictions['run_avg_stats'][:4,:].T) # Write results to disk if settings.save == 1: filename = get_filename_bart(settings) print('filename = ' + filename) results = {} results['mcmc_stats_bart'] = mcmc_stats_bart results['mcmc_stats_bart_desc'] = mcmc_stats_bart_desc if settings.store_all_stats: results['mcmc_stats'] = mcmc_stats results['settings'] = settings if settings.dataset[:8] == 'friedman' or settings.dataset[:3] == 'toy': results['data'] = data pickle.dump(results, open(filename, "wb"), protocol=pickle.HIGHEST_PROTOCOL) filename2 = filename[:-1] + 'tree_predictions.p' print('predictions stored in file: %s' % filename2) pickle.dump(mcmc_tree_predictions, open(filename2, "wb"), protocol=pickle.HIGHEST_PROTOCOL)
def main(): settings = process_command_line() print 'Current settings:' pp.pprint(vars(settings)) # Resetting random seed np.random.seed(settings.init_id * 1000) random.seed(settings.init_id * 1000) # load data print 'Loading data ...' data = load_data(settings) print 'Loading data ... completed' if settings.center_y: print 'center_y = True; centering the y variables at mean(data[y_train])' center_labels(data, settings) backup_target(data, settings) #pre-compute & initialize time_start = time.clock() param, cache, cache_tmp = precompute(data, settings) bart = BART(data, param, settings, cache, cache_tmp) time_initialization = time.clock() - time_start # initialize stuff for results mcmc_stats = np.zeros((settings.m_bart, settings.n_iterations, 10)) mcmc_stats_bart = np.zeros((settings.n_iterations, 10)) mcmc_stats_bart_desc = ['loglik', 'logprior', 'logprob', \ 'mean depth', 'mean num_leaves', 'mean num_nonleaves', 'mean change', \ 'mse_train', 'lambda_bart', 'time_itr'] mcmc_counts = None mcmc_tree_predictions = init_performance_storage(data, settings) n_burn_in = 0 # NOTE: predictions are stored without discarding burn-in assert n_burn_in == 0 time_init = time.clock() time_init_run_avg = time.clock() itr_run_avg = 0 change = True tree_order = range(settings.m_bart) print 'initial settings:' print 'lambda_bart value = %.3f' % param.lambda_bart loglik_train, mse_train = bart.compute_train_loglik(data, settings, param) print 'mse train = %.3f, loglik_train = %.3f' % (mse_train, loglik_train) for itr in range(settings.n_iterations): time_init_current = time.clock() if settings.verbose >= 1: print '\n%s BART iteration = %7d %s' % ('*'*30, itr, '*'*30) logprior = 0. if settings.sample_y == 1 and settings.mcmc_type != 'prior': # Successive conditional simulator bart.sample_labels(data, settings, param) # sampling lambda_bart bart.sample_lambda_bart(param, data, settings) time_sample_lambda = time.clock() - time_init_current logprior += bart.lambda_logprior random.shuffle(tree_order) for i_t in tree_order: if settings.debug == 1: print '\ntree_id = %3d' % i_t time_init_current_tree = time.clock() # update data['y_train'] bart.update_residual(i_t, data) update_cache_tmp(cache_tmp, data, param, settings) # MCMC for i_t'th tree bart.trees[i_t].update_loglik_node_all(data, param, cache, settings) (bart.trees[i_t], change) = run_mcmc_single_tree(bart.trees[i_t], settings, data, param, \ cache, change, mcmc_counts, cache_tmp, bart.pmcmc_objects[i_t]) # update parameters sample_param(bart.trees[i_t], settings, param) logprior += bart.trees[i_t].pred_val_logprior # update pred_val bart.update_pred_val(i_t, data, param, settings) # update stats # 'change' indicates whether MCMC move was accepted bart.trees[i_t].update_depth() mcmc_stats[i_t, itr, [3,6,7,8,9]] = np.array([bart.trees[i_t].depth, \ len(bart.trees[i_t].leaf_nodes), len(bart.trees[i_t].non_leaf_nodes), \ change, time.clock() - time_init_current_tree]) # NOTE: this logprior computation does not affect timing if settings.mcmc_type == 'cgm' or settings.mcmc_type == 'growprune': mcmc_stats[i_t, itr, 1] = bart.trees[i_t].compute_logprior() else: mcmc_stats[i_t, itr, 1] = -np.inf #NOTE: compute_logprior could be incorrect for PG # (prior over feature_ids is 1/D rather than 1/numValidDimensions) if settings.sample_y == 1 and settings.mcmc_type == 'prior': # Marginal conditional simulator bart.sample_labels(data, settings, param) if settings.mcmc_type == 'cgm' or settings.mcmc_type == 'growprune': logprior += float(np.sum(mcmc_stats[:, itr, 1])) else: logprior = -np.inf loglik_train, mse_train = bart.compute_train_loglik(data, settings, param) logprob_bart = logprior + loglik_train # mcmc_stats_bart_desc = 0: loglik, 1: logprior, 2: logprob, # 3: mean depth, 4: mean num_leaves, 5: mean num_nonleaves, 6: mean change, # 7: mse_train, 8: lambda_bart, 9: time_itr mcmc_stats_bart[itr, :3] = [loglik_train, logprior, logprob_bart] mcmc_stats_bart[itr, 3:7] = np.mean(mcmc_stats[:, itr, [3,6,7,8]], 0) # depth, #leaf, #nonleaf, change mcmc_stats_bart[itr, -3:-1] = [mse_train, param.lambda_bart] mcmc_stats_bart[itr, -1] = np.sum(mcmc_stats[:, itr, -1]) + time_sample_lambda # total time per iteration if itr == 0: mcmc_stats_bart[itr, -1] += time_initialization if settings.verbose >=2 : print 'fraction of trees in which MCMC move was accepted = %.3f' % mcmc_stats_bart[itr, 6] if (settings.save == 1): for tree in bart.trees: tree.gen_rules_tree() pred_tmp = {'train': bart.predict_train(data, param, settings), \ 'test': bart.predict(data['x_test'], data['y_test_orig'], param, settings)} for k_data in settings.perf_dataset_keys: for k_store in settings.perf_store_keys: mcmc_tree_predictions[k_data]['accum'][k_store] += pred_tmp[k_data][k_store] if itr == 0 and settings.verbose >= 1: print 'Cumulative: itr, itr_run_avg, [mse train, logprob_train, mse test, ' \ 'logprob_test, time_mcmc, time_mcmc_prediction], time_mcmc_cumulative' print 'itr, [mse train, logprob_train, mse test, ' \ 'logprob_test, time_mcmc, time_mcmc+time_prediction]' if settings.store_every_iteration == 1: store_every_iteration(mcmc_tree_predictions, data, settings, param, itr, \ pred_tmp, mcmc_stats_bart[itr, -1], time_init_current) if (itr > 0) and (itr % settings.n_run_avg == (settings.n_run_avg - 1)): metrics = {} for k_data in settings.perf_dataset_keys: k_data_tmp, k_data_n = get_k_data_names(settings, k_data) for k_store in settings.perf_store_keys: mcmc_tree_predictions[k_data][k_store][itr_run_avg] = \ mcmc_tree_predictions[k_data]['accum'][k_store] / (itr + 1) metrics[k_data] = compute_metrics_regression(data[k_data_tmp], \ mcmc_tree_predictions[k_data]['pred_mean'][itr_run_avg], \ mcmc_tree_predictions[k_data]['pred_prob'][itr_run_avg]) itr_range = range(itr_run_avg * settings.n_run_avg, (itr_run_avg + 1) * settings.n_run_avg) if settings.debug == 1: print 'itr_range = %s' % itr_range time_mcmc_train = np.sum(mcmc_stats_bart[itr_range, -1]) mcmc_tree_predictions['run_avg_stats'][:, itr_run_avg] = \ [ metrics['train']['mse'], metrics['train']['log_prob'], \ metrics['test']['mse'], metrics['test']['log_prob'], \ time_mcmc_train, time.clock() - time_init_run_avg ] if settings.verbose >= 1: print 'Cumulative: %7d, %7d, %s, %.2f' % \ (itr, itr_run_avg, mcmc_tree_predictions['run_avg_stats'][:, itr_run_avg].T, \ np.sum(mcmc_tree_predictions['run_avg_stats'][-2, :itr_run_avg+1])) itr_run_avg += 1 time_init_run_avg = time.clock() # print results print '\nTotal time (seconds) = %f' % (time.clock() - time_init) if settings.verbose >=2: print 'mcmc_stats_bart[:, 3:] (not cumulative) = ' print 'mean depth, mean num_leaves, mean num_nonleaves, ' + \ 'mean change, mse_train, lambda_bart, time_itr' print mcmc_stats_bart[:, 3:] if settings.verbose >=1: print 'mean of mcmc_stats_bart (discarding first 50% of the chain)' itr_start = mcmc_stats_bart.shape[0] / 2 for k, s in enumerate(mcmc_stats_bart_desc): print '%20s\t%.2f' % (s, np.mean(mcmc_stats_bart[itr_start:, k])) if settings.save == 1: print 'predictions averaged across all previous additive trees:' print 'mse train, mean log_prob_train, mse test, mean log_prob_test' print mcmc_tree_predictions['run_avg_stats'][:4,:].T # Write results to disk if settings.save == 1: filename = get_filename_bart(settings) print 'filename = ' + filename results = {} results['mcmc_stats_bart'] = mcmc_stats_bart results['mcmc_stats_bart_desc'] = mcmc_stats_bart_desc if settings.store_all_stats: results['mcmc_stats'] = mcmc_stats results['settings'] = settings if settings.dataset[:8] == 'friedman' or settings.dataset[:3] == 'toy': results['data'] = data pickle.dump(results, open(filename, "wb"), protocol=pickle.HIGHEST_PROTOCOL) filename2 = filename[:-1] + 'tree_predictions.p' print 'predictions stored in file: %s' % filename2 pickle.dump(mcmc_tree_predictions, open(filename2, "wb"), protocol=pickle.HIGHEST_PROTOCOL)
def main(): settings = process_command_line() print('Current Settings:') ppt.pprint(vars(settings)) np.random.seed(settings.init_id*1000) random.seed(settings.init_id*1000) print("Loading data....") data = load_data(settings) print("Dating loading completed") if settings.center_y: print('center_y = True; center the y variables at mean(data[y_train])') center_labels(data, settings) backup_target(data,settings) time_start = time.clock() param , cache , cache_tmp = precompute(data,settings) bart = BART(data,param,settings,cache,cache_tmp) time_initialization = time.clock() - time_start mcmc_stats = np.zeros((settings.m_bart,settings.n_iterations,10)) mcmc_stats_bart = np.zeros((settings.n_iterations, 10)) mcmc_stats_bart_desc = ['loglik','logprior','logprob','mean_depth','mean num_leaves','mean num_nonleaves','mean change','mse_train','lambda_bart','time_itr'] mcmc_counts = None mcmc_tree_predictions = init_performance_storage(data, settings) burn_in_number = 0 assert burn_in_number == 0 init_time = time.clock() init_time_run_average = time.clock() iteration_run_average = 0 change = True tree_order = range(settings.m_bart) print('Initial settings') print('lambda_bart value = %.3f' % param.lambda_bart) loglikelihood_training , mse_training = bart.compute_train_loglikelihood(data,settings,param) print('mse train =%.3f, loglik_train= %.3f' %(mse_training,loglikelihood_training)) for iterator in range(settings.n_iterations): init_current_time = time.clock() if settings.verbose >= 1: print('\n%s BART ITERATION = %7d %s' % ('*'*30,iterator, '*'*30)) logarithmic_prior = 0. if settings.sample_y == 1 and settings.mcmc_type != 'prior': bart.sample_labels(data,settings,param) bart.sample_lambda_bart(param,data,settings) time_sample_lambda = time.clock() -init_current_time logarithmic_prior += bart.lambda_logprior random.shuffle(tree_order) for ele in tree_order: if settings.debug == 1: print('\ntree_id = %3d' % ele) init_current_tree_time = time.clock() #set data['y_train'] to new value bart.update_residual(ele,data) update_cache_tmp(cache_tmp, data , param , settings) #Get the MCMC for i_t'th tree bart.trees[ele].update_loglik_node_all(data,param, cache, settings) (bart.trees[ele],change) = run_mcmc_single_tree(bart.trees[ele],settings, data, param,cache,change,mcmc_counts,cache_tmp,bart.pmcmc_objects[ele]) # update to new parameters sample_param(bart.trees[ele],settings,param) logarithmic_prior += bart.trees[ele].pred_val_logprior # update predicted value bart.update_predicted_value(ele,data,param,settings) bart.trees[ele].update_depth() mcmc_stats[ele,iterator,[3,6,7,8,9]] = np.array([bart.trees[ele].depth,len(bart.trees[ele].leaf_nodes),len(bart.trees[ele].non_leaf_nodes),change,time.clock()-init_current_tree_time]) if settings.mcmc_type == 'cgm' or settings.mcmc_type == 'grow_prune': mcmc_stats[ele,iterator,1] = bart.trees[ele].compute_logprior() else: mcmc_stats[ele,iterator,1] = -np.inf if settings.sample_y == 1 and settings.mcmc_type == 'prior': bart.sample_labels(data,settings,param) if settings.mcmc_type == 'cgm' or settings.mcmc_type == 'growprune': logarithmic_prior +=float(np.sum(mcmc_stats[:,iterator,1])) else: logarithmic_prior = -np.inf loglikelihood_training,mse_training = bart.compute_train_loglikelihood(data,settings,param) bart_log_probability = logarithmic_prior + loglikelihood_training mcmc_stats_bart[iterator,:3]=[loglikelihood_training,logarithmic_prior,bart_log_probability] mcmc_stats_bart[iterator,3:7]= np.mean(mcmc_stats[:,iterator,[3,6,7,8]],0) mcmc_stats_bart[iterator,-3:-1]=[mse_training,param.lambda_bart] mcmc_stats_bart[iterator,-1] = np.sum(mcmc_stats[:,iterator,-1]) + time_sample_lambda if iterator == 0: mcmc_stats_bart[iterator,-1] += time_initialization if (settings.verbose >= 2): print('Fraction of trees where MCMC moves were accepted = %.3f' % mcmc_stats_bart[iterator,6]) if (settings.save == 1): for tree_ele in bart.trees: tree_ele.gen_rules_tree() pred_tmp = {'train':bart.predict_training(data,param,settings),'test':bart.predict(data['x_test'],data['y_test_orig'],param,settings)} for data_of_keys in settings.perf_dataset_keys: for stored_keys in settings.perf_store_keys: mcmc_predict_predictions[data_of_keys]['accum'][stored_keys] += pred_tmp[data_of_keys][stored_keys] if iterator == 0 and settings.verbose >= 1: print('Cumulative: itr, itr_run_avg, [mse train, logprob_train, mse test, ' 'logprob_test, time_mcmc, time_mcmc_prediction], time_mcmc_cumulative') print('itr, [mse train, logprob_train, mse test, ' 'logprob_test, time_mcmc, time_mcmc+time_prediction]') if settings.store_every_iteration == 1: store_every_iteration(mcmc_tree_predictions,data,settings,param,iterator,pred_tmp,mcmc_stats_bart[iterator,-1],init_current_time) if iterator > 0 and iterator % settings.n_run_avg == settings.n_run_avg - 1 : metrics={} for data_of_keys in settings.perf_dataset_keys: k_temp,k_data_n = get_k_data_names(settings,data_of_keys) for stored_keys in settings.perf_store_keys: mcmc_tree_predictions[data_of_keys][stored_keys][iteration_run_average] = mcmc_tree_predictions[data_of_keys]['accum'][stored_keys] metrics[data_of_keys] = compute_metrics_regression(data[k_temp],mcmc_tree_predictions[data_of_keys]['pred_mean'][iteration_run_average],mcmc_tree_predictions[data_of_keys]['pred_mean'][iteration_run_average]) iterator_range = range(iteration_run_average*settings.n_run_avg,(iteration_run_average+1)*settings.n_run_avg) if settings.debug == 1: print('Iteration range = %s' % iteration_range) mcmc_train_timing = np.sum(mcmc_stats_bart[iteration_range,-1]) mcmc_tree_predictions['run_avg_tests'][:,iteration_run_average] = [metrics['train']['mse'],metrics['train']['log_prob'],metrics['test']['mse'],metrics['test']['log_prob'],mcmc_train_timing,time.clock()-init_time_run_average] if settings.verbose >= 1: print('Cumulative: %7d, %7d, %s, %.2f' % (iterator,iteration_run_average,mcmc_tree_predictions['run_avg_stats'][:,iteration_run_average].T,np.sum(mcmc_tree_predictions['run_avg_stats'][-2,:iteration_run_average+1]))) iteration_run_average += 1 init_time_run_average = time.clock() print('\nTotal time in seconds =%f' % (time.clock()-init_time)) if settings.verbose >= 2: print('mcmc_stats_bart[:,3:] (non cummulative) =') print('mean_depth,mean num_leaves ,mean num_nonleaves,mean change,mse_training,lambda_bart,time_iterations') print(mcmc_stats_bart[:,3:]) if settings.verbose >=1: print('mean of mcmc_stats_bart discarding first 50% of the chain') iteration_start = mcmc_stats_bart.shape[0]/2 for k_ele,s_ele in enumerate(mcmc_stats_bart_desc): print('%20s\t%.2f' %(s_ele,np.mean(mcmc_stats_bart[iteration_start:,k_ele]))) if settings.save == 1: print('Averaged predictions across all previous additive trees:') print('mse training,mean log_prob_train, mse test,mean log_prob_test') print(mcmc_tree_predictions['run_avg_tests'][:4,:].T) if settings.save == 1: filename_to_use = get_filename_bart(settings) print('filename = '+filename_to_use) prediction_results={} prediction_results['mcmc_stats_bart'] = mcmc_stats_bart prediction_results['mcmc_stats_bart_desc'] = mcmc_stats_bart_desc if settings.store_all_stats: prediction_results['mcmc_stats'] = mcmc_stats prediction_results['settings'] = settings if settings.dataset[:8] == 'friedman' or settings.dataset[:3] == 'toy': results['data'] = data pickle.dump(prediction_results,open(filename_to_use,"wb"),protocol=pickle.HIGHEST_PROTOCOL) second_filename_to_use = filename_to_use[:-1]+ 'tree_predictions.p' print('predictions stored in file: %s' % second_filename_to_use) pickle.dump(mcmc_tree_predictions,open(second_filename_to_use,'wb'),protocol=pickle.HIGHEST_PROTOCOL)