def main_rcn_structure(): dataset_dir = sys.argv[4] data_name = sys.argv[6] min_depth = int(sys.argv[8]) max_depth = int(sys.argv[10]) train_filename = dataset_dir + data_name + '.ts.data' train_dataset = np.loadtxt(train_filename, dtype=int, delimiter=',') n_variables = train_dataset.shape[1] max_depth = min (n_variables-2, max_depth) # at least 2 nodes in the leaf to bulid the chow-liu tree print("-----Learning the structure Randomly----") tree = [] output_cnet = '../rcn_output/' for i in range(min_depth, max_depth+1): cnet = CNET_deep(tree, depth=i) #cnet.learnStructure(reload_mix_clt, train_dataset, lamda, beta_function) cnet.learnStructure(n_variables) tree = copy.deepcopy(cnet.tree) main_dict = {} utilM.save_cutset(main_dict, cnet.tree, np.arange(n_variables), ccpt_flag = True) np.savez_compressed(output_cnet + data_name + '_structure_' + str(i), module = main_dict)
def main_rcn_structure(parms_dict): print('------------------------------------------------------------------') print('Learning the structure of Deep Random Cutset Network') print('------------------------------------------------------------------') dataset_dir = parms_dict['dir'] data_name = parms_dict['dn'] min_depth = int(parms_dict['min_depth']) max_depth = int(parms_dict['max_depth']) output_dir = parms_dict['output_dir'] train_filename = dataset_dir + data_name + '.ts.data' train_dataset = np.loadtxt(train_filename, dtype=int, delimiter=',') n_variables = train_dataset.shape[1] max_depth = min( n_variables - 2, max_depth) # at least 2 nodes in the leaf to bulid the chow-liu tree tree = [] for i in range(min_depth, max_depth + 1): cnet = CNET_deep(tree, depth=i) cnet.learnStructure(n_variables) tree = copy.deepcopy(cnet.tree) main_dict = {} utilM.save_cutset(main_dict, cnet.tree, np.arange(n_variables), ccpt_flag=True) np.savez_compressed(output_dir + data_name + '_structure_' + str(i), module=main_dict)
def structure_redefine(self, load_info): self.weights = load_info.mixture_weight self.n_components = load_info.n_components for cn in load_info.cnet_list: main_dict = {} utilM.save_cutset(main_dict, cn.tree, np.arange(self.n_variable), ccpt_flag=True) cnet_component = CNET_dfs(main_dict, self.n_variable) self.cnet_dict_list.append(cnet_component) internal_list, leaf_list = cnet_component.get_node_list() self.internal_list.append(internal_list) self.leaf_list.append(leaf_list)
def main_cutset_opt(): dataset_dir = sys.argv[2] data_name = sys.argv[4] max_depth = int(sys.argv[6]) train_filename = dataset_dir + data_name + '.ts.data' test_filename = dataset_dir + data_name + '.test.data' valid_filename = dataset_dir + data_name + '.valid.data' train_dataset = np.loadtxt(train_filename, dtype=int, delimiter=',') valid_dataset = np.loadtxt(valid_filename, dtype=int, delimiter=',') test_dataset = np.loadtxt(test_filename, dtype=int, delimiter=',') print("Learning Cutset Networks only using data.....") #max_depth = min(train_dataset.shape[1], 20) +1 train_ll = np.zeros(max_depth) valid_ll = np.zeros(max_depth) test_ll = np.zeros(max_depth) #cnet_list =[] best_valid = -np.inf best_module = None for i in range(1, max_depth + 1): #for i in range(5, 6): cnet = CNET(depth=i) cnet.learnStructure(train_dataset) train_ll[i - 1] = np.sum( cnet.getWeights(train_dataset)) / train_dataset.shape[0] valid_ll[i - 1] = np.sum( cnet.getWeights(valid_dataset)) / valid_dataset.shape[0] test_ll[i - 1] = np.sum( cnet.getWeights(test_dataset)) / test_dataset.shape[0] if best_valid < valid_ll[i - 1]: best_valid = valid_ll[i - 1] best_module = copy.deepcopy(cnet) #cnet_list.append(cnet) #print("done") print('Train set cnet LL scores') for l in xrange(max_depth): print(train_ll[l], l + 1) print() print('Valid set cnet LL scores') for l in xrange(max_depth): print(valid_ll[l], l + 1) print() print('test set cnet LL scores') for l in xrange(max_depth): print(test_ll[l], l + 1) best_ind = np.argmax(valid_ll) #best_module = cnet_list[best_ind] print() print('Best Validation ll score achived in layer: ', best_ind) print( 'train: ', np.sum(best_module.getWeights(train_dataset)) / train_dataset.shape[0]) print( 'valid: ', np.sum(best_module.getWeights(valid_dataset)) / valid_dataset.shape[0]) print('test : ', np.sum(best_module.getWeights(test_dataset)) / test_dataset.shape[0]) main_dict = {} utilM.save_cutset(main_dict, best_module.tree, np.arange(train_dataset.shape[1]), ccpt_flag=True) np.savez_compressed('../cn_output/' + data_name, module=main_dict)
def main_cnxd(): dataset_dir = sys.argv[2] data_name = sys.argv[4] lamda = float(sys.argv[6]) # using validation dataset beta_function = sys.argv[8] # 'linear', square, root (square root) min_depth = int(sys.argv[10]) max_depth = int(sys.argv[12]) tum_module = sys.argv[14] print('------------------------------------------------------------------') print('Learning CNxD using Data and TUM') print('------------------------------------------------------------------') train_filename = dataset_dir + data_name + '.ts.data' test_filename = dataset_dir + data_name +'.test.data' valid_filename = dataset_dir + data_name + '.valid.data' train_dataset = np.loadtxt(train_filename, dtype=int, delimiter=',') valid_dataset = np.loadtxt(valid_filename, dtype=int, delimiter=',') test_dataset = np.loadtxt(test_filename, dtype=int, delimiter=',') n_variables = train_dataset.shape[1] ### Load the trained mixture of clt print ('Start reloading MT...') mt_dir = '../mt_output/' reload_mix_clt = load_mt(mt_dir, tum_module) # Set information for MT for t in reload_mix_clt.clt_list: t.nvariables = n_variables # learn the junction tree for each clt jt = JT.JunctionTree() jt.learn_structure(t.topo_order, t.parents, t.cond_cpt) reload_mix_clt.jt_list.append(jt) print("Learning Cutset Networks by inferece.....") #for lamda in lamda_array: print ("Current Lamda: ", lamda) print ("Current Function: ", beta_function) #n_variable = valid_dataset.shape[1] #cnets = [] tree = [] module_dir = '../cnxd_output/' + data_name +'/' train_ll_score = np.zeros(max_depth) valid_ll_score = np.zeros(max_depth) test_ll_score = np.zeros(max_depth) learning_time = np.zeros(max_depth) for i in range(min_depth, max_depth+1): #for i in range(10, 20): start = time.time() cnet = CNXD(tree, depth=i) cnet.learnStructure(reload_mix_clt, train_dataset, lamda, beta_function) learning_time[i-1] = time.time() - start #cnets.append(cnet) tree = copy.deepcopy(cnet.tree) # compute ll score train_ll_score[i-1] = cnet.computeLL(train_dataset) / train_dataset.shape[0] valid_ll_score[i-1] = cnet.computeLL(valid_dataset) / valid_dataset.shape[0] test_ll_score[i-1] = cnet.computeLL(test_dataset) / test_dataset.shape[0] main_dict = {} utilM.save_cutset(main_dict, cnet.tree, np.arange(n_variables), ccpt_flag = True) np.savez_compressed(module_dir + data_name + '_' + str(lamda) + '_' + beta_function + '_' + str(i), module = main_dict) #print (train_ll_score) print('CNxD train set LL scores') for l in xrange(max_depth): print (train_ll_score[l], l+1) print() print('CNxD valid set LL scores') for l in xrange(max_depth): print (valid_ll_score[l], l+1) print() print('CNxD test set LL scores') for l in xrange(max_depth): print (test_ll_score[l], l+1) print() print ('CNxD learning times: ') for l in xrange(max_depth): print (np.sum(learning_time[0:l+1]), l+1) print()
def main_cutset_opt(parms_dict): print ("----------------------------------------------------") print ("Learning Cutset Networks on original data ") print ("----------------------------------------------------") dataset_dir = parms_dict['dir'] data_name = parms_dict['dn'] max_depth = int(parms_dict['max_depth']) out_dir = parms_dict['output_dir'] train_filename = dataset_dir + data_name + '.ts.data' test_filename = dataset_dir + data_name +'.test.data' valid_filename = dataset_dir + data_name + '.valid.data' train_dataset = np.loadtxt(train_filename, dtype=int, delimiter=',') valid_dataset = np.loadtxt(valid_filename, dtype=int, delimiter=',') test_dataset = np.loadtxt(test_filename, dtype=int, delimiter=',') train_ll = np.zeros(max_depth) valid_ll = np.zeros(max_depth) test_ll = np.zeros(max_depth) best_valid = -np.inf best_module = None for i in range(1, max_depth+1): cnet = CNET(depth=i) cnet.learnStructure(train_dataset) train_ll[i-1] = np.sum(cnet.getWeights(train_dataset)) / train_dataset.shape[0] valid_ll[i-1] = np.sum(cnet.getWeights(valid_dataset)) / valid_dataset.shape[0] test_ll[i-1] = np.sum(cnet.getWeights(test_dataset)) / test_dataset.shape[0] if best_valid < valid_ll[i-1]: best_valid = valid_ll[i-1] best_module = copy.deepcopy(cnet) print('Train set cnet LL scores') for l in xrange(max_depth): print (train_ll[l], l+1) print() print('Valid set cnet LL scores') for l in xrange(max_depth): print (valid_ll[l], l+1) print() print('test set cnet LL scores') for l in xrange(max_depth): print (test_ll[l], l+1) best_ind = np.argmax(valid_ll) print () print ('Best Validation ll score achived in layer: ', best_ind ) print( 'Train set LL score: ', np.sum(best_module.getWeights(train_dataset)) / train_dataset.shape[0]) print( 'valid set LL score: ', np.sum(best_module.getWeights(valid_dataset)) / valid_dataset.shape[0]) print( 'test set LL score : ',np.sum(best_module.getWeights(test_dataset)) / test_dataset.shape[0]) main_dict = {} utilM.save_cutset(main_dict, best_module.tree, np.arange(train_dataset.shape[1]), ccpt_flag = True) np.savez_compressed(out_dir + data_name, module = main_dict)
def main_cnxd(parms_dict): dataset_dir = parms_dict['dir'] data_name = parms_dict['dn'] lamda = float(parms_dict['a']) beta_function = parms_dict['f'] min_depth = int(parms_dict['min_depth']) max_depth = int(parms_dict['max_depth']) mt_dir = parms_dict['input_dir'] tum_module = parms_dict['input_module'] module_dir = parms_dict['output_dir'] print('------------------------------------------------------------------') print('Learning CNxD using Data and MAP Intractable Model') print('------------------------------------------------------------------') train_filename = dataset_dir + data_name + '.ts.data' test_filename = dataset_dir + data_name + '.test.data' valid_filename = dataset_dir + data_name + '.valid.data' train_dataset = np.loadtxt(train_filename, dtype=int, delimiter=',') valid_dataset = np.loadtxt(valid_filename, dtype=int, delimiter=',') test_dataset = np.loadtxt(test_filename, dtype=int, delimiter=',') n_variables = train_dataset.shape[1] ### Load the trained mixture of clt print('Start reloading MT...') reload_mix_clt = load_mt(mt_dir, tum_module) # Set information for MT for t in reload_mix_clt.clt_list: t.nvariables = n_variables # learn the junction tree for each clt jt = JT.JunctionTree() jt.learn_structure(t.topo_order, t.parents, t.cond_cpt) reload_mix_clt.jt_list.append(jt) print("Current Alpha: ", lamda) print("Current Function: ", beta_function) tree = [] #module_dir = '../cnxd_output/' + data_name +'/' train_ll_score = np.zeros(max_depth) valid_ll_score = np.zeros(max_depth) test_ll_score = np.zeros(max_depth) learning_time = np.zeros(max_depth) for i in range(min_depth, max_depth + 1): start = time.time() cnet = CNXD(tree, depth=i) cnet.learnStructure(reload_mix_clt, train_dataset, lamda, beta_function) learning_time[i - 1] = time.time() - start tree = copy.deepcopy(cnet.tree) # compute ll score train_ll_score[ i - 1] = cnet.computeLL(train_dataset) / train_dataset.shape[0] valid_ll_score[ i - 1] = cnet.computeLL(valid_dataset) / valid_dataset.shape[0] test_ll_score[i - 1] = cnet.computeLL(test_dataset) / test_dataset.shape[0] main_dict = {} utilM.save_cutset(main_dict, cnet.tree, np.arange(n_variables), ccpt_flag=True) np.savez_compressed(module_dir + data_name + '_' + str(lamda) + '_' + beta_function + '_' + str(i), module=main_dict) print('CNxD train set LL scores') for l in xrange(max_depth): print(train_ll_score[l], l + 1) print() print('CNxD valid set LL scores') for l in xrange(max_depth): print(valid_ll_score[l], l + 1) print() print('CNxD test set LL scores') for l in xrange(max_depth): print(test_ll_score[l], l + 1) print() print('CNxD learning times: ') for l in xrange(max_depth): print(np.sum(learning_time[0:l + 1]), l + 1) print()
def main_bag_cnet(parms_dict): print("----------------------------------------------------") print("Learning Bags of Cutset Networks ") print("----------------------------------------------------") dataset_dir = parms_dict['dir'] data_name = parms_dict['dn'] n_components = int(parms_dict['ncomp']) max_depth = int(parms_dict['max_depth']) sel_option = int(parms_dict['sp']) depth_option = int(parms_dict['dp']) output_dir = parms_dict['output_dir'] train_name = dataset_dir + data_name + '.ts.data' valid_name = dataset_dir + data_name + '.valid.data' test_name = dataset_dir + data_name + '.test.data' data_train = np.loadtxt(train_name, delimiter=',', dtype=np.uint32) data_valid = np.loadtxt(valid_name, delimiter=',', dtype=np.uint32) data_test = np.loadtxt(test_name, delimiter=',', dtype=np.uint32) #new_dataset = np.concatenate((data_train, data_valid), axis=0) bag_cnet = BAG_CNET() bag_cnet.learnStructure(data_train, n_components, max_depth, node_sel_option=sel_option, depth_sel_option=depth_option) #bag_cnet.learnStructure(new_dataset, n_components, max_depth, node_sel_option = sel_option, depth_sel_option =depth_option) # # save the ll train_ll = bag_cnet.computeLL(data_train) / data_train.shape[0] valid_ll = bag_cnet.computeLL(data_valid) / data_valid.shape[0] test_ll = bag_cnet.computeLL(data_test) / data_test.shape[0] ll_score = np.zeros(3) ll_score[0] = train_ll ll_score[1] = valid_ll ll_score[2] = test_ll print('Train set LL scores') print(train_ll) print('Valid set LL scores') print(valid_ll) print('Test set LL scores') print(test_ll) for i in xrange(n_components): main_dict = {} utilM.save_cutset(main_dict, bag_cnet.cnet_list[i].tree, np.arange(data_train.shape[1]), ccpt_flag=True) np.savez_compressed(output_dir + data_name + '_' + str(i), module=main_dict) # save the component weights np.savetxt(output_dir + data_name + '_component_weights.txt', bag_cnet.mixture_weight, delimiter=',')
def main_bag_cnet(): #train_filename = sys.argv[1] dataset_dir = sys.argv[2] data_name = sys.argv[4] n_components = int(sys.argv[6]) max_depth = int(sys.argv[8]) sel_option = int(sys.argv[10]) depth_option = int(sys.argv[12]) train_name = dataset_dir + data_name + '.ts.data' valid_name = dataset_dir + data_name + '.valid.data' test_name = dataset_dir + data_name + '.test.data' data_train = np.loadtxt(train_name, delimiter=',', dtype=np.uint32) data_valid = np.loadtxt(valid_name, delimiter=',', dtype=np.uint32) data_test = np.loadtxt(test_name, delimiter=',', dtype=np.uint32) #new_dataset = np.concatenate((data_train, data_valid), axis=0) print("Learning Bags of Cutset Network on original data ......") #start = time.time() bag_cnet = BAG_CNET() bag_cnet.learnStructure(data_train, n_components, max_depth, node_sel_option=sel_option, depth_sel_option=depth_option) #bag_cnet.learnStructure(new_dataset, n_components, max_depth, node_sel_option = sel_option, depth_sel_option =depth_option) #running_time = time.time() - start # # save the ll train_ll = bag_cnet.computeLL(data_train) / data_train.shape[0] valid_ll = bag_cnet.computeLL(data_valid) / data_valid.shape[0] test_ll = bag_cnet.computeLL(data_test) / data_test.shape[0] ll_score = np.zeros(3) ll_score[0] = train_ll ll_score[1] = valid_ll ll_score[2] = test_ll print('Train set LL scores') print(train_ll) print('Valid set LL scores') print(valid_ll) print('Test set LL scores') print(test_ll) output_dir = '../bcnet_output/' for i in xrange(n_components): main_dict = {} utilM.save_cutset(main_dict, bag_cnet.cnet_list[i].tree, np.arange(data_train.shape[1]), ccpt_flag=True) np.savez_compressed(output_dir + data_name + '_' + str(i), module=main_dict) # save the component weights np.savetxt(output_dir + data_name + '_component_weights.txt', bag_cnet.mixture_weight, delimiter=',')