def test_exact_against_bp(self): n_trials = 100 bp = get_algorithm("bp")("marginal") tree_bp = get_algorithm("tree_bp")("marginal") bp_n = get_algorithm("bp_nonsparse")("marginal") exact = get_algorithm("exact")("marginal") graphs = [] for trial in range(n_trials): graph = construct_binary_mrf("random_tree", n_nodes=8, shuffle_nodes=True) graphs.append(graph) r1 = exact.run(graphs) r2 = bp.run(graphs) r3 = bp_n.run(graphs) r4 = tree_bp.run(graphs) v1, v2, v3, v4 = [], [], [], [] for graph_res in r1: v1.extend([node_res[1] for node_res in graph_res]) for graph_res in r2: v2.extend([node_res[1] for node_res in graph_res]) for graph_res in r3: v3.extend([node_res[1] for node_res in graph_res]) for graph_res in r4: v4.extend([node_res[1] for node_res in graph_res]) corr_bp = pearsonr(v1, v2) corr_bpn = pearsonr(v1, v3) corr_treebp = pearsonr(v1, v4) print("Correlation between exact and BP:", corr_bp[0]) print("Correlation between exact and BP nonsparse:", corr_bpn[0]) print("Correlation between exact and tree BP:", corr_treebp[0])
def _test_exact_against_mcmc(self): sizes = [5, 10, 15] n_samples = [500, 1000, 2000, 5000, 10000] n_trials = 100 mcmc = get_algorithm("mcmc")("marginal") exact = get_algorithm("exact")("marginal") def get_exp_data(n_trials, n_nodes): graphs = [] for trial in range(n_trials): graph = construct_binary_mrf("fc", n_nodes=n_nodes, shuffle_nodes=True) graphs.append(graph) return graphs for size in sizes: graphs = get_exp_data(n_trials, size) exact_res = exact.run(graphs) for n_samp in n_samples: mcmc_res = mcmc.run(graphs, n_samp) v1, v2 = [], [] for graph_res in mcmc_res: v1.extend([node_res[1] for node_res in graph_res]) for graph_res in exact_res: v2.extend([node_res[1] for node_res in graph_res]) corr_mcmc = pearsonr(v1, v2) print("{},{}: correlation between exact and MCMC: {}".format(size, n_samp, corr_mcmc[0]))
def run_sg_with_method(self, graph, algorithm, verbose): exact = get_algorithm("exact")("marginal") mcmc = get_algorithm("mcmc")("marginal") labelSG = LabelSG(algorithm=algorithm, inf_algo=mcmc) true_res = mcmc.run([graph]) t0 = time() res = labelSG.run([graph], verbose=verbose) mse_err = np.sqrt(np.sum(np.array(res) - np.array(true_res))**2) print( f"Partition {algorithm} MSE error: \t{mse_err} in \t{time()-t0} seconds" )
def _test_gnn(self): # print("Testing GNN constructor") # GGNN parmeters graph = self.graph n_nodes = graph.W.shape[0] n_hidden_states = 5 message_dim_P = 5 hidden_unit_message_dim = 64 hidden_unit_readout_dim = 64 T = 10 learning_rate = 1e-2 epochs = 10 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') gnn_constructor = get_algorithm("gnn_inference") exists = os.path.isfile('pretrained/gnn_model.pt') if(exists): gnn_inference = gnn_constructor('marginal', n_nodes, n_hidden_states, message_dim_P,hidden_unit_message_dim, hidden_unit_readout_dim, T,'pretrained/gnn_model.pt') out = gnn_inference.run(graph,device) #print('gnn') #print(out) else: print('pretrained model needed')
def get_map(self, algo_obj=None, algo=None): if algo_obj is None: if algo is None: algo = self.algo_map algo_obj = get_algorithm(algo) inf_res = algo_obj.run(self, mode="map") return inf_res
def _test_mcmc_runtimes(self): sizes = [5, 15, 50, 500, 1000] n_samples = [500, 1000, 2000] n_trials = 10 mcmc = get_algorithm("mcmc")("marginal") exact = get_algorithm("exact")("marginal") def get_exp_data(n_trials, n_nodes): graphs = [] for trial in range(n_trials): graph = construct_binary_mrf("fc", n_nodes=n_nodes, shuffle_nodes=True) graphs.append(graph) return graphs for size in sizes: graphs = get_exp_data(n_trials, size) for n_samp in n_samples: t0 = time() mcmc_res = mcmc.run(graphs, n_samp) t = time() - t0 print("{},{}: {} seconds per graph".format(size, n_samp, t/10))
def _test_exact_probs(self): graph = construct_binary_mrf("fc", 3) # compute probs: probs = np.zeros((2,2,2)) for i in range(2): for j in range(2): for k in range(2): state = 2*np.array([i,j,k])-1 probs[i, j, k] = state.dot(graph.W.dot(state)) + graph.b.dot(state) probs = np.exp(probs) probs /= probs.sum() exact = get_algorithm("exact")("marginal") exact_probs = exact.compute_probs(graph.W, graph.b, graph.n_nodes) assert np.allclose(probs, exact_probs)
def run_lbp_on_graph(self, graph): exact = get_algorithm("exact")("marginal") print("With subgraph of size 1") lbp = LabelProp([1], exact) res = lbp.run([graph]) true_res = exact.run([graph]) mse_err = np.sqrt(np.sum(np.array(res) - np.array(true_res))**2) print(f"MSE error: {mse_err}") print("With subgraph of size 5") lbp = LabelProp([5], exact) res = lbp.run([graph]) true_res = exact.run([graph]) mse_err = np.sqrt(np.sum(np.array(res) - np.array(true_res))**2) print(f"MSE error: {mse_err}") print("With subgraph of size 10") lbp = LabelProp([10], exact) res = lbp.run([graph]) true_res = exact.run([graph]) mse_err = np.sqrt(np.sum(np.array(res) - np.array(true_res))**2) print(f"MSE error: {mse_err}")
def run_experiment(train_set_name, test_set_name, inference_mode="marginal", base_data_dir=DFLT_DATA_DIR, model_base_dir=DFLT_MODEL_DIR): """ tests for in-sample (same structure, same size, marginals) """ train_path = os.path.join(base_data_dir, "train") test_path = os.path.join(base_data_dir, "test") model_load_path = os.path.join(model_base_dir, train_set_name) train_data = get_dataset_by_name(train_set_name, train_path) test_data = get_dataset_by_name(test_set_name, test_path, mode=inference_mode) # load model n_hidden_states = 5 message_dim_P = 5 hidden_unit_message_dim = 64 hidden_unit_readout_dim = 64 T = 10 gnn_constructor = get_algorithm("gnn_inference") gnn_inference = gnn_constructor(inference_mode, n_hidden_states, message_dim_P, hidden_unit_message_dim, hidden_unit_readout_dim, T, model_load_path, USE_SPARSE_GNN) # run inference on test times = {} t0 = time() gnn_res = gnn_inference.run(test_data, DEVICE) times["gnn"] = (time() - t0) / len(test_data) t0 = time() bp = get_algorithm("bp")(inference_mode) bp_res = bp.run(test_data, use_log=True, verbose=False) times["bp"] = (time() - t0) / len(test_data) # TODO! don't forget to uncomment t0 = time() mcmc = get_algorithm("mcmc")(inference_mode) mcmc_res = mcmc.run(test_data) times["mcmc"] = (time() - t0) / len(test_data) #--- sanity check ----# #exact = get_algorithm("exact")("marginal") #exact_res = exact.run(test_data) #--- sanity check ----# # all loaded graphs have ground truth set if inference_mode == "marginal": true_labels = [] for g in test_data: true_labels.extend(list(m[1] for m in g.marginal)) gnn_labels = [] for graph_res in gnn_res: gnn_labels.extend(list(m[1] for m in graph_res)) bp_labels = [] for graph_res in bp_res: bp_labels.extend(list(m[1] for m in graph_res)) #mcmc_labels = bp_labels # TODO! don't forget to uncomment mcmc_labels = [] for graph_res in mcmc_res: mcmc_labels.extend(list(m[1] for m in graph_res)) #--- sanity check ----# # exact_labels = [] # for graph_res in exact_res: # exact_labels.extend(list(m[1] for m in graph_res)) #--- sanity check ----# # colors = [] # for g in test_data: # colors.extend([g.struct] * g.n_nodes) # save these results save_marginal_results( true_labels, gnn_labels, bp_labels, mcmc_labels, filename="./experiments/saved_exp_res/res_{}_{}".format( train_set_name, test_set_name)) # plot them plot_marginal_results_individual( true_labels, gnn_labels, bp_labels, mcmc_labels, filename="./experiments/res_{}_{}".format(train_set_name, test_set_name)) # MAP: only numeric else: true_labels = [] for g in test_data: true_labels.extend(g.map) true_labels = np.array(true_labels) gnn_labels = [] for graph_res in gnn_res: gnn_labels.extend( list(-1 if m[0] > m[1] else +1 for m in graph_res)) gnn_labels = np.array(gnn_labels) gnn_accuracy = np.mean(true_labels == gnn_labels) bp_labels = [] for graph_res in bp_res: bp_labels.extend(graph_res) bp_labels = np.array(bp_labels) bp_accuracy = np.mean(true_labels == bp_labels) mcmc_labels = [] for graph_res in mcmc_res: mcmc_labels.extend(graph_res) mcmc_labels = np.array(mcmc_labels) mcmc_accuracy = np.mean(true_labels == mcmc_labels) print("Accuracies: GNN {}, BP {}, MCMC {}".format( gnn_accuracy, bp_accuracy, mcmc_accuracy)) print("Runtimes", times)
# construct graphical models # either new-data-generation or data labeling scenario if args.unlab_graphs_path == 'none' or args.algo == 'none': # create new graphs graphs = [] for _ in range(args.num): # sample n_nodes from range n_nodes = np.random.choice(size_range) graphs.append(construct_binary_mrf(args.graph_struct, n_nodes)) else: # both are non-None: need to load data and label it path = os.path.join(args.base_data_dir, args.unlab_graphs_path) graphs = load_graphs(path + '.npy') # label them using a chosen algorithm if args.algo in ['exact', 'bp', 'mcmc']: algo_obj = get_algorithm(args.algo)(args.mode) list_of_res = algo_obj.run(graphs, verbose=args.verbose) # Propagate-from-subgraph algorithm (pt 2.2): elif args.algo.startswith('label_prop'): # e.g. label_prop_exact_10_5 inf_algo_name, sg_sizes = args.algo.split('_')[2], args.algo.split( '_')[3:] sg_sizes = list(map(int, sg_sizes)) inf_algo = get_algorithm(inf_algo_name)(args.mode) label_prop = LabelProp(sg_sizes, inf_algo, max_iter=30) list_of_res = label_prop.run(graphs, verbose=args.verbose) # Subgraph labeling algorithm (pt 2.1): elif args.algo == 'label_tree': lbt = LabelTree(args.mode)
def _test_mcmc(self): mcmc = get_algorithm("mcmc")("marginal") res = mcmc.run([self.graph2]) print("mcmc") print(res)
def _test_bp_nonsparse(self): # BP fails on n=2 and n=3 star (on fully-conn n=3 - ok) bp = get_algorithm("bp_nonsparse")("marginal") res = bp.run([self.graph], use_log=True) print("bp nonsparse") print(res)
def _test_tree_bp(self): bp = get_algorithm("tree_bp")("marginal") res = bp.run([self.graph]) print("tree_bp") print(res)
def _test_exact(self): # check probs computation exact = get_algorithm("exact")("marginal") print("exact") print(exact.run([self.graph]))
# dataset = [g for g in dataset if g.marginal is not None] # elif args.mode == "map": # dataset = [g for g in dataset if g.map is not None] # GGNN parmeters n_hidden_states = 5 message_dim_P = 5 hidden_unit_message_dim = 64 hidden_unit_readout_dim = 64 T = 10 learning_rate = 1e-2 # number of epochs epochs = args.epochs gnn_constructor = get_algorithm("gnn_inference") gnn_inference = gnn_constructor(args.mode, n_hidden_states, message_dim_P,hidden_unit_message_dim, hidden_unit_readout_dim, T, sparse=USE_SPARSE_GNN) if args.use_pretrained != 'none': model_path_pre = os.path.join(args.model_dir, args.use_pretrained) gnn_inference.load_model(model_path_pre) print(f"Model loaded from {model_path_pre}") optimizer = Adam(gnn_inference.model.parameters(), lr=learning_rate) if args.mode == "marginal": # criterion = KLDivLossComputer() criterion = CrossEntropyComputer() else: criterion = CrossEntropyMAPComputer()
self.default_algo = {"marginal": "bp", "map": "bp"} # params = {"W": W, "b": b} super(BinaryMRF, self).__init__( n_nodes=self.n_nodes, default_algo=self.default_algo) def get_subgraph_on_nodes(self, node_list): """ node_list does not need to be ordered, return in the same order """ nx_graph = nx.from_numpy_matrix(self.W) sg = nx_graph.subgraph(node_list) W_sg = np.array(nx.to_numpy_matrix(sg)) b_sg = self.b[node_list] # in the same order return BinaryMRF(W_sg, b_sg) def get_max_abs_spanning_tree(self): nx_graph = nx.from_numpy_matrix(np.abs(self.W)) tree = nx.minimum_spanning_tree(nx_graph) W_abs_tree = np.array(nx.to_numpy_matrix(tree)) W_mask = np.where(W_abs_tree > 0, 1, 0) # zero out unused edges: W_tree = W_mask * self.W b_tree = self.b return BinaryMRF(W_tree, b_tree) if __name__ == "__main__": print(get_algorithm("bp"))