def main(args): # load configuration config = load_config(os.path.join(args.restore, 'config.json')) # create autoencoder ae = get_network(config['hiddens'], logger=g_logger) # build graph sess, saver, _ = build_graph(ae, input_shape=[None, 784]) restore(sess, saver, args.restore) test_result = os.path.join(args.result, 'test') # make result directory if not exists if not os.path.exists(test_result): os.makedirs(test_result) # use mnist for test mnist = tf.contrib.learn.datasets.load_dataset('mnist') row_col_size = 10 cnt = 0 for x, y in next_mnist_data(mnist, 'test', batch_size=row_col_size**2): x_ = sess.run(ae.x_, feed_dict={ae.x: x}) save_mnist_images(x, test_result, cnt, suffix='original', row_col_size=row_col_size) save_mnist_images(x_, test_result, cnt, suffix='reconstruct', row_col_size=row_col_size) cnt += 1
def main(args): import logging log.setLevel(logging.DEBUG) log.info("start") num_devices = len(F.cuda_places()) model = DeepwalkModel(args.num_nodes, args.hidden_size, args.neg_num, False, False, 1.) pyreader = model.pyreader loss = model.forward() train_steps = int(args.num_nodes * args.epoch / args.batch_size / num_devices) optimization(args.lr * num_devices, loss, train_steps, args.optimizer) place = F.CUDAPlace(0) exe = F.Executor(place) exe.run(F.default_startup_program()) graph = build_graph(args.num_nodes, args.edge_path) gen_func = build_gen_func(args, graph) pyreader.decorate_tensor_provider(gen_func) pyreader.start() train_prog = F.default_main_program() if args.warm_start_from_dir is not None: F.io.load_params(exe, args.warm_start_from_dir, train_prog) train_exe = get_parallel_exe(train_prog, loss) train(train_exe, exe, train_prog, loss, pyreader, args, train_steps)
def solve_it_nontrivial(node_count, edge_count, edges): """Graph coloring solution based on DFS of adjacency matrix Parameters --------- node_count -- number of nodes edge_count -- number of edges edges -- list (e_i1, e_i2) tuples representing edges Returns ------- optimal -- is this a proven optimal solution output_data -- string formatting of the solution as specified in the handout """ #Create the adjacency matrix representing the graph optimal = 1 graph = build_graph(node_count, edge_count, edges) #Get the cardinalities sorted with corresponding row indices cardinalities = np.sum(graph, axis=1) cardinalities = [(i, int(cardinalities[i])) for i in range(len(cardinalities))] cardinalities.sort(key=lambda x: -x[1]) #Create the colors array colors = [0] * node_count print("Nodes: ", node_count) colors = recolor_iterative_greedy(node_count, colors, graph) return (optimal, colors)
def compute_success_rate(): files = [ os.path.join('.', 'Maps', f) for f in os.listdir('Maps') if f[-3:] == 'mat' and f[-12:-4] != 'solution' ] print('Files Found: {}'.format(files)) success_count = {} for window_ratio in [1, 2, 3, 4, 5]: success_count[window_ratio] = 10 for f in files: cost_matrix = load_cost_matrix(f) num_nodes = cost_matrix.shape[0] score_vector = np.ones(num_nodes) task_windows = setup_task_windows(score_vector, window_ratio) max_cost = get_starting_cost(cost_matrix, task_windows) try: plan, visit_times, profit, cost, solver_stats = get_solution( score_vector, cost_matrix, task_windows, max_cost) except ValueError: print('Failed with cost {}'.format(max_cost)) success_count[window_ratio] -= 1 continue wait_times = compute_wait_times(plan, cost_matrix, visit_times) visit_order_waits, visit_times_waits = get_arrive_depart_pairs( plan, visit_times, wait_times, cost) save_solution(f, visit_order_waits, visit_times_waits, solver_type='tw') g, tour, verified_cost = build_graph(plan, score_vector, cost_matrix) print(f) msg = 'The maximum profit tour found is \n' for idx, k in enumerate(tour): msg += str(k) if idx < len(tour) - 1: msg += ' -> ' else: msg += ' -> 0' print(msg) msg = 'Profit: {:.2f}, cost: {:.2f}, verification cost: {:.2f}' print(msg.format(profit, cost, verified_cost)) print(solver_stats.solve_time) print(solver_stats.setup_time) msg = 'Time taken: {} seconds' time = solver_stats.solve_time + solver_stats.setup_time print(msg.format(time)) success_count[window_ratio] /= 10.0 print('Success probabilities across constraint ratios') print(success_count) return success_count
def parse_sentence(sentence, tokenizer, encoder, use_cuda=True): '''Implement the match part of MAMA ''' tokenizer_name = str(tokenizer.__str__) inputs, tokenid2word_mapping, token2id, noun_chunks = create_mapping( sentence, return_pt=True, tokenizer=tokenizer) with torch.no_grad(): if use_cuda: for key in inputs.keys(): inputs[key] = inputs[key].cuda() outputs = encoder(**inputs, output_attentions=True) trim = True if 'GPT2' in tokenizer_name: trim = False ''' Use average of last layer attention : page 6, section 3.1.2 ''' attention = process_matrix(outputs[2], avg_head=True, trim=trim, use_cuda=use_cuda) merged_attention = compress_attention(attention, tokenid2word_mapping) attn_graph = build_graph(merged_attention) tail_head_pairs = [] for head in noun_chunks: for tail in noun_chunks: if head != tail: tail_head_pairs.append((token2id[head], token2id[tail])) black_list_relation = set([token2id[n] for n in noun_chunks]) all_relation_pairs = [] id2token = {value: key for key, value in token2id.items()} with Pool(10) as pool: params = [( pair[0], pair[1], attn_graph, max(tokenid2word_mapping), black_list_relation, ) for pair in tail_head_pairs] for output in pool.imap_unordered(bfs, params): if len(output): all_relation_pairs += [(o, id2token) for o in output] triplet_text = [] with Pool(10) as pool: for triplet in pool.imap_unordered(filter_relation_sets, all_relation_pairs): if len(triplet) > 0: triplet_text.append(triplet) return triplet_text
def compute_solve_times(files): problem_sizes = [3, 4, 5, 6, 7, 8, 9, 10] small_map_solve_times = {p: [] for p in problem_sizes} large_map_solve_times = {p: [] for p in problem_sizes} for n in problem_sizes: for f in files: cost_matrix = load_cost_matrix(f) # cost_matrix = cost_matrix[0:n, 0:n] # print('----- Edge Costs -----') # print(cost_matrix) num_nodes = cost_matrix.shape[0] score_vector = np.ones(num_nodes) task_windows = setup_task_windows(score_vector) max_cost = get_starting_cost(cost_matrix, task_windows) try: plan, visit_times, profit, cost, solver_stats = get_solution( score_vector, cost_matrix, task_windows, max_cost) except ValueError: print('Failed with cost {}'.format(max_cost)) continue wait_times = compute_wait_times(plan, cost_matrix, visit_times) visit_order_waits, visit_times_waits = get_arrive_depart_pairs( plan, visit_times, wait_times, cost) save_solution(f, visit_order_waits, visit_times_waits, solver_type='tw') g, tour, verified_cost = build_graph(plan, score_vector, cost_matrix) print(f) msg = 'The maximum profit tour found is \n' for idx, k in enumerate(tour): msg += str(k) if idx < len(tour) - 1: msg += ' -> ' else: msg += ' -> 0' print(msg) msg = 'Profit: {:.2f}, cost: {:.2f}, verification cost: {:.2f}' print(msg.format(profit, cost, verified_cost)) msg = 'Time taken: {} seconds' time = solver_stats.solve_time + solver_stats.setup_time print(msg.format(time)) # display_results(g, plan, task_windows, visit_times, wait_times, cost) if f[-12:-7] == '20x20': small_map_solve_times[n].append(time) elif f[-12:-7] == '50x50': large_map_solve_times[n].append(time) return (small_map_solve_times, large_map_solve_times)
def test(args): graph = build_graph(args.num_nodes, args.edge_path) gen_func = build_gen_func(args, graph) start = time.time() num = 10 for idx, _ in enumerate(gen_func()): if idx % num == num - 1: log.info("%s" % (1.0 * (time.time() - start) / num)) start = time.time()
def get_distances(path, source): assert os.path.exists(path) g = build_graph(path) distances = nx.shortest_path_length(g, source=source, weight="weight") distances_filtered = dict() for key in distances.keys(): _, _, _, position = key distances_filtered[position] = distances[key] return distances_filtered
def walk(args): graph = build_graph(args.num_nodes, args.edge_path) num_sample_workers = args.num_sample_workers if args.train_files is None or args.train_files == "None": log.info("Walking from graph...") train_files = [None for _ in range(num_sample_workers)] else: log.info("Walking from train_data...") files = get_file_list(args.train_files) train_files = [[] for i in range(num_sample_workers)] for idx, f in enumerate(files): train_files[idx % num_sample_workers].append(f) def walk_to_file(walk_gen, filename, max_num): with open(filename, "w") as outf: num = 0 for walks in walk_gen: for walk in walks: outf.write("%s\n" % "\t".join([str(i) for i in walk])) num += 1 if num % 1000 == 0: log.info("Total: %s, %s walkpath is saved. " % (max_num, num)) if num == max_num: return m_args = [(DeepwalkReader( graph, batch_size=args.batch_size, walk_len=args.walk_len, win_size=args.win_size, neg_num=args.neg_num, neg_sample_type=args.neg_sample_type, walkpath_files=None, train_files=train_files[i]).walk_generator(), "%s/%s" % (args.walkpath_files, i), args.epoch * args.num_nodes // args.num_sample_workers) for i in range(num_sample_workers)] ps = [] for i in range(num_sample_workers): p = Process(target=walk_to_file, args=m_args[i]) p.start() ps.append(p) for i in range(num_sample_workers): ps[i].join()
def hungary(self, dispatch_observ): if len(dispatch_observ) == 0: return [] driver_id_orig2new, order_id_orig2new, driver_id_new2orig, order_id_new2orig = rehash( dispatch_observ) costs, row_is_driver = build_graph(dispatch_observ, driver_id_orig2new, order_id_orig2new) n = len(costs) m = len(costs[0]) lmate = -np.ones(n, dtype=np.int32) lmate = lmate.ctypes.data_as(ctypes.c_void_p) dataptr = costs.ctypes.data_as(ctypes.c_void_p) self.hung.MaxProfMatching(dataptr, n, m, lmate) array_pointer = ctypes.cast(lmate, ctypes.POINTER(ctypes.c_int * n)) np_arr = np.frombuffer(array_pointer.contents, dtype=np.int32, count=n) lmate = np_arr.reshape((n, )) lmate = list(lmate) dispatch_action = get_pairs(lmate, row_is_driver, driver_id_new2orig, order_id_new2orig) return dispatch_action
def __init__(self, params): self.p = params self.prj_path = Path(__file__).parent.resolve() self.time_stamp = time.strftime('%Y_%m_%d') + '_' + time.strftime( '%H:%M:%S') self.data = load_data(self.p.dataset) self.num_nodes, self.train_data, self.valid_data, self.test_data, self.num_rels = self.data.num_nodes, self.data.train, self.data.valid, self.data.test, self.data.num_rels if torch.cuda.is_available() and params.gpu >= 0: self.device = torch.device(f'cuda:{params.gpu}') else: self.device = torch.device('cpu') self.val_test_data = preprocess({ 'train': self.train_data, 'valid': self.valid_data, 'test': self.test_data }) self.data_iter = self.get_data_iter() # self.rel: relations in train set self.graph, self.rel, node_norm = build_graph(num_nodes=self.num_nodes, num_rels=self.num_rels, edges=self.train_data) self.rel = torch.from_numpy(self.rel).to(self.device) # used to sample sub-graph self.in_deg = self.graph.in_degrees(range( self.graph.number_of_nodes())).float().view(-1, 1) self.test_node_id = torch.arange( 0, self.num_nodes, dtype=torch.long).view(-1, 1).to(self.device) self.test_edge_norm = node_norm_2_edge_norm( self.graph, torch.from_numpy(node_norm).view(-1, 1)).to(self.device) self.adj_list = get_adj(self.num_nodes, self.train_data) self.model = self.get_model() self.optimizer = torch.optim.Adam(self.model.parameters(), lr=params.lr)
def norikae(): if request.method == "POST": start = request.form.get('start') dest = request.form.get('dest') choice = request.form.get('options') date = request.form.get('date') input_time = date.split()[1].split(':') # print(date) # print(date.split()[1].split(':')) # CASE 1: if start and dest is the same: if start == dest: return "It's same whyyyy" else: if choice == CHOICES[0]: # if non-weighted graph connect_list = utils.create_pairs(network) graph = utils.build_graph(connect_list, outtages) path, dist = bfs.print_bfs(graph, start, dest) elif choice == CHOICES[1]: pass elif choice == CHOICES[2]: graph = utils.build_weighted_graph(timeSchedule) dijkstra.print_dfs(graph, start, dest) schedules = utils.process_timeJson(timeSchedule) path, dist = None, None # return redirect('/') return render_template('result.html', path=path, dist=dist, time=None) else: rand_num = [random.randint(1, NUM_STATIONS + 1) for _ in range(2)] return render_template('norikae.html', network=network, selected_stations=rand_num)
# -*- coding: utf-8 -*- import utils import graph import string g = utils.build_graph() utils.clear() ans = "s" print("Welcome to Network of Thrones") while ans == "s" or ans == "S": print(""" Faça sua escolha: 1) Distância entre 2 personagens. 2) Caminho entre 2 personagens. 3) Encontrar Pontos de Articulação. 4) Encontrar Pontes. 0) Sair""") choice = int(input(">>> ")) utils.clear() if choice == 0: break if choice == 3: points = graph.articulation_point(g) print("Os pontos de articulação são:") print("\n".join(points)) elif choice == 4:
def get_time_data(): np.random.seed(1) print(c.installed_solvers()) files = [ os.path.join('.', 'Maps', f) for f in os.listdir('Maps') if f[-3:] == 'mat' and f[-12:-4] != 'solution' ] maps20x20 = [f for f in files if '20x20' in f] maps50x50 = [f for f in files if '20x20' in f] big_maps = [f for f in files if '100_POI' in f] print(big_maps) # print('20x20 maps:') # print(maps20x20) # print('50x50 maps:') # print(maps50x50) runtimes = {} # scores = {} # for n in [4, 6, 8, 10, 12, 14, 16]: # for n in [4, 6, 8, 10, 12, 14]: # for n in [14]: for n in [4, 6, 8, 10, 12, 14]: runtimes[n] = [] for f in big_maps: # print(f) cost_matrix = load_cost_matrix(f) # high diagonal costs cause numerical errors # use constraints to prevent travel to self # diagonal cost must be > 0 for this to work # but should be low np.fill_diagonal(cost_matrix, 1) cost_matrix = cost_matrix[0:n, 0:n] num_nodes = cost_matrix.shape[0] score_vector = np.ones(num_nodes) task_windows = setup_task_windows(score_vector) budget = 350 # works plan, reward, cost, solve_time = get_solution( score_vector, cost_matrix, budget) runtimes[n].append(solve_time) print('----- Plan -----') print(plan) print('----- Edge Costs -----') print(cost_matrix) print('----- Scores -----') print(score_vector) g, tour, verified_cost = build_graph(plan, score_vector, cost_matrix) msg = 'The maximum reward tour found is \n' for idx, k in enumerate(tour): msg += str(k) if idx < len(tour) - 1: msg += ' -> ' else: msg += ' -> 0' print(msg) a_times = get_arrival_times(plan, cost_matrix) print(a_times) print(get_plan_score(task_windows, plan, a_times)) msg = 'Profit: {:.2f}, cost: {:.2f}, verification cost: {:.2f}' print(msg.format(reward, cost, verified_cost)) print('Time taken: {:.2f} seconds'.format(solve_time)) # display_results(g, tour, cost_matrix) # print(runtimes) with open('results_op.txt', 'w') as f: f.write(str(runtimes))
def train(args): import logging log.setLevel(logging.DEBUG) log.info("start") worker_num = int(os.getenv("PADDLE_TRAINERS_NUM", "0")) num_devices = int(os.getenv("CPU_NUM", 10)) model = DeepwalkModel(args.num_nodes, args.hidden_size, args.neg_num, args.is_sparse, args.is_distributed, 1.) pyreader = model.pyreader loss = model.forward() # init fleet init_role() train_steps = math.ceil(1. * args.num_nodes * args.epoch / args.batch_size / num_devices / worker_num) log.info("Train step: %s" % train_steps) if args.optimizer == "sgd": args.lr *= args.batch_size * args.walk_len * args.win_size optimization(args.lr, loss, train_steps, args.optimizer) # init and run server or worker if fleet.is_server(): fleet.init_server(args.warm_start_from_dir) fleet.run_server() if fleet.is_worker(): log.info("start init worker done") fleet.init_worker() #just the worker, load the sample log.info("init worker done") exe = F.Executor(F.CPUPlace()) exe.run(fleet.startup_program) log.info("Startup done") if args.dataset is not None: if args.dataset == "BlogCatalog": graph = data_loader.BlogCatalogDataset().graph elif args.dataset == "ArXiv": graph = data_loader.ArXivDataset().graph else: raise ValueError(args.dataset + " dataset doesn't exists") log.info("Load buildin BlogCatalog dataset done.") elif args.walkpath_files is None or args.walkpath_files == "None": graph = build_graph(args.num_nodes, args.edge_path) log.info("Load graph from '%s' done." % args.edge_path) else: graph = build_fake_graph(args.num_nodes) log.info("Load fake graph done.") # bind gen gen_func = build_gen_func(args, graph) pyreader.decorate_tensor_provider(gen_func) pyreader.start() compiled_prog = build_complied_prog(fleet.main_program, loss) train_prog(exe, compiled_prog, loss, pyreader, args, train_steps)
action='store_true', help= 'Bootstrap relational classifier training with local classifier predictions.' ) parser.add_argument( '-validation', default=False, action='store_true', help='Whether to test on validation set (True) or test set (False).') args = parser.parse_args() np.random.seed(args.seed) # load data adj, features, labels, idx_train, idx_val, idx_test = load_data(args.dataset) graph, domain_labels = build_graph(adj, features, labels) # train / test splits train = idx_train if args.validation: test = idx_val else: test = idx_test eval_idx = np.setdiff1d(range(adj.shape[0]), idx_train) # run training ica_accuracies = list() for run in range(args.num_trials): t_begin = time.time()
plan, visit_times, wait_times, cost) save_solution(f, visit_order_waits, visit_times_waits, solver_type='tw') # print('----- Visited -----') # print(np.sum(plan, axis=1)) # print('----- Plan -----') # print(np.around(plan, 2).astype('int32')) # print('----- Edge Costs -----') # print(cost_matrix) # print('----- Wait Times -----') # print(np.around(wait_times, 2)) g, tour, verified_cost = build_graph(plan, score_vector, cost_matrix) print(f) msg = 'The maximum profit tour found is \n' for idx, k in enumerate(tour): msg += str(k) if idx < len(tour) - 1: msg += ' -> ' else: msg += ' -> 0' print(msg) score = get_plan_score(task_windows, plan, visit_times, task_windows[:, 2]) rewards[budget].append(score) times[budget].append(solve_time)
def main(args): hiddens = args.hiddens if args.restore: config = load_config(os.path.join(args.restore, 'config.json')) hiddens = config['hiddens'] # create autoencoder ae = get_network(hiddens, logger=g_logger) # build graph sess, saver, init_op = build_graph(ae, [None, 784]) if args.restore: restore(sess, saver, args.restore) else: g_logger.info('Initialize the model') sess.run(init_op) train_result = os.path.join(args.result, 'train') # make result directory if not exists if not os.path.exists(train_result): os.makedirs(train_result) # save configuraion save_dict = args.__dict__ save_dict['hiddens'] = hiddens save_config(save_dict, os.path.join(args.result, 'config.json')) # use mnist for test mnist = tf.contrib.learn.datasets.load_dataset('mnist') figure = plt.figure(figsize=(8, 8)) scatter_data = {} last_epoch = 0 try: # Learn number of epoch times nodes = [ae.train, ae.loss, ae.z, ae.x_] for i in range(1, args.epoch + 1): losses = 0 cnt = 0 # get data with batch size for x, y in next_mnist_data(mnist, 'train'): _, loss, z, x_ = sess.run(nodes, feed_dict={ae.x: x}) # make scatter data with latent variables(z) make_scatter_data(scatter_data, z, y) losses += loss cnt += 1 last_epoch = i g_logger.info('epoch: {}, loss: {}'.format(i, losses / cnt)) scatter(scatter_data, train_result, i) figure.clear() scatter_data.clear() # save checkpoint saver.save(sess, args.result + '/checkpoint', global_step=args.epoch) except KeyboardInterrupt: saver.save(sess, args.result + '/checkpoint', global_step=last_epoch)
def __init__(self, args, num_classes=1000): super(CNN, self).__init__() self.conv1 = depthwise_separable_conv_3x3(3, args.channels // 2, 2) self.bn1 = nn.BatchNorm2d(args.channels // 2) if args.net_type == 'small': self.conv2 = Triplet_unit(args.channels // 2, args.channels, 2) if args.resume: graph = load_graph(os.path.join(args.model_dir, 'conv3.yaml')) else: graph = build_graph(args.nodes, args) save_graph(graph, os.path.join(args.model_dir, 'conv3.yaml')) self.conv3 = StageBlock(graph, args.channels, args.channels) if args.resume: graph = load_graph(os.path.join(args.model_dir, 'conv4.yaml')) else: graph = build_graph(args.nodes, args) save_graph(graph, os.path.join(args.model_dir, 'conv4.yaml')) self.conv4 = StageBlock(graph, args.channels, args.channels * 2) if args.resume: graph = load_graph(os.path.join(args.model_dir, 'conv5.yaml')) else: graph = build_graph(args.nodes, args) save_graph(graph, os.path.join(args.model_dir, 'conv5.yaml')) self.conv5 = StageBlock(graph, args.channels * 2, args.channels * 4) self.relu = nn.ReLU() self.conv = nn.Conv2d(args.channels * 4, 1280, kernel_size=1) self.bn2 = nn.BatchNorm2d(1280) elif args.net_type == 'regular': if args.resume: graph = load_graph(os.path.join(args.model_dir, 'conv2.yaml')) else: graph = build_graph(args.nodes // 2, args) save_graph(graph, os.path.join(args.model_dir, 'conv2.yaml')) self.conv2 = StageBlock(graph, args.channels // 2, args.channels) if args.resume: graph = load_graph(os.path.join(args.model_dir, 'conv3.yaml')) else: graph = build_graph(args.nodes, args) save_graph(graph, os.path.join(args.model_dir, 'conv3.yaml')) self.conv3 = StageBlock(graph, args.channels, args.channels * 2) if args.resume: graph = load_graph(os.path.join(args.model_dir, 'conv4.yaml')) else: graph = build_graph(args.nodes, args) save_graph(graph, os.path.join(args.model_dir, 'conv4.yaml')) self.conv4 = StageBlock(graph, args.channels * 2, args.channels * 4) if args.resume: graph = load_graph(os.path.join(args.model_dir, 'conv5.yaml')) else: graph = build_graph(args.nodes, args) save_graph(graph, os.path.join(args.model_dir, 'conv5.yaml')) self.conv5 = StageBlock(graph, args.channels * 4, args.channels * 8) self.relu = nn.ReLU() self.conv = nn.Conv2d(args.channels * 8, 1280, kernel_size=1) self.bn2 = nn.BatchNorm2d(1280) self.avgpool = nn.AvgPool2d(7, stride=1) self.fc = nn.Linear(1280, num_classes) for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n))
import pickle from utils import build_graph, Data, split_validation import networkx as nx import matplotlib.pyplot as plt if __name__ == '__main__': test_data = pickle.load(open('../datasets/yoho1_64/train_300.txt', 'rb')) print(len(test_data[0])) print(len(test_data[1])) # test_data = Data(test_data, sub_graph=True, method='ggnn', shuffle=False) G = build_graph(test_data[0]) nx.draw(G, with_labels=True) plt.show() # G = nx.Graph() # 建立一个空的无向图G # G.add_node('a') # 添加一个节点1 # G.add_nodes_from(['b', 'c', 'd', 'e']) # 加点集合 # G.add_cycle(['f', 'g', 'h', 'j']) # 加环 # H = nx.path_graph(10) # 返回由10个节点挨个连接的无向图,所以有9条边 # G.add_nodes_from(H) # 创建一个子图H加入G # G.add_node(H) # 直接将图作为节点 # # nx.draw(G, with_labels=True) # plt.show()
labels_df = utils.build_dataframe(vertices_train, "label", preserve_int_col_name=True) # BUILDING NUMPY MATRICES X_train = X_train_df.values X_test = X_test_df.values X = np.concatenate((X_train, X_test), axis=0) labels = labels_df.values # MASK FOR TRAIN/TEST train_idx = range(X_train.shape[0]) test_idx = range(X_train.shape[0], X.shape[0]) # BUILDING THE GRAPH G = utils.build_graph() # ALSO REMOVES SELF-LOOPS G = G.to_directed() # BUILDING ADJ MATRIX FOR GCN A = utils.adjacency_matrix_GCN(G, theta=1) # DEFINING OUR PARAMETERS n_features = X.shape[1] n_classes = labels.shape[1] n_hidden = 32 # NUMBER OF HIDDEN PARAMETERS IN OUR NET # CREATING AND TRAINING OUR MODEL gcn_model = GCN(n_features, n_hidden, n_classes, dropout=0.5) embedding_gcn = train_model(gcn_model, X,
def Train(directory, epochs, aggregator, embedding_size, layers, dropout, slope, lr, wd, random_seed, ctx): dgl.load_backend('mxnet') random.seed(random_seed) np.random.seed(random_seed) mx.random.seed(random_seed) g, disease_ids_invmap, mirna_ids_invmap = build_graph( directory, random_seed=random_seed, ctx=ctx) samples = sample(directory, random_seed=random_seed) ID, IM = load_data(directory) print('## vertices:', g.number_of_nodes()) print('## edges:', g.number_of_edges()) print('## disease nodes:', nd.sum(g.ndata['type'] == 1).asnumpy()) print('## mirna nodes:', nd.sum(g.ndata['type'] == 0).asnumpy()) samples_df = pd.DataFrame(samples, columns=['miRNA', 'disease', 'label']) sample_disease_vertices = [ disease_ids_invmap[id_] for id_ in samples[:, 1] ] sample_mirna_vertices = [ mirna_ids_invmap[id_] + ID.shape[0] for id_ in samples[:, 0] ] kf = KFold(n_splits=5, shuffle=True, random_state=random_seed) train_index = [] test_index = [] for train_idx, test_idx in kf.split(samples[:, 2]): train_index.append(train_idx) test_index.append(test_idx) auc_result = [] acc_result = [] pre_result = [] recall_result = [] f1_result = [] fprs = [] tprs = [] for i in range(len(train_index)): print( '------------------------------------------------------------------------------------------------------' ) print('Training for Fold ', i + 1) samples_df['train'] = 0 samples_df['test'] = 0 samples_df['train'].iloc[train_index[i]] = 1 samples_df['test'].iloc[test_index[i]] = 1 train_tensor = nd.from_numpy( samples_df['train'].values.astype('int32')).copyto(ctx) test_tensor = nd.from_numpy( samples_df['test'].values.astype('int32')).copyto(ctx) edge_data = {'train': train_tensor, 'test': test_tensor} g.edges[sample_disease_vertices, sample_mirna_vertices].data.update(edge_data) g.edges[sample_mirna_vertices, sample_disease_vertices].data.update(edge_data) train_eid = g.filter_edges(lambda edges: edges.data['train']).astype( 'int64') g_train = g.edge_subgraph(train_eid, preserve_nodes=True) g_train.copy_from_parent() # get the training set rating_train = g_train.edata['rating'] src_train, dst_train = g_train.all_edges() # get the testing edge set test_eid = g.filter_edges(lambda edges: edges.data['test']).astype( 'int64') src_test, dst_test = g.find_edges(test_eid) rating_test = g.edges[test_eid].data['rating'] src_train = src_train.copyto(ctx) src_test = src_test.copyto(ctx) dst_train = dst_train.copyto(ctx) dst_test = dst_test.copyto(ctx) print('## Training edges:', len(train_eid)) print('## Testing edges:', len(test_eid)) # Train the model model = GNNMDA( GraphEncoder(embedding_size=embedding_size, n_layers=layers, G=g_train, aggregator=aggregator, dropout=dropout, slope=slope, ctx=ctx), BilinearDecoder(feature_size=embedding_size)) model.collect_params().initialize( init=mx.init.Xavier(magnitude=math.sqrt(2.0)), ctx=ctx) cross_entropy = gloss.SigmoidBinaryCrossEntropyLoss(from_sigmoid=True) trainer = gluon.Trainer(model.collect_params(), 'adam', { 'learning_rate': lr, 'wd': wd }) for epoch in range(epochs): start = time.time() for _ in range(10): with mx.autograd.record(): score_train = model(g_train, src_train, dst_train) loss_train = cross_entropy(score_train, rating_train).mean() loss_train.backward() trainer.step(1) h_val = model.encoder(g) score_val = model.decoder(h_val[src_test], h_val[dst_test]) loss_val = cross_entropy(score_val, rating_test).mean() train_auc = metrics.roc_auc_score( np.squeeze(rating_train.asnumpy()), np.squeeze(score_train.asnumpy())) val_auc = metrics.roc_auc_score(np.squeeze(rating_test.asnumpy()), np.squeeze(score_val.asnumpy())) results_val = [ 0 if j < 0.5 else 1 for j in np.squeeze(score_val.asnumpy()) ] accuracy_val = metrics.accuracy_score(rating_test.asnumpy(), results_val) precision_val = metrics.precision_score(rating_test.asnumpy(), results_val) recall_val = metrics.recall_score(rating_test.asnumpy(), results_val) f1_val = metrics.f1_score(rating_test.asnumpy(), results_val) end = time.time() print('Epoch:', epoch + 1, 'Train Loss: %.4f' % loss_train.asscalar(), 'Val Loss: %.4f' % loss_val.asscalar(), 'Acc: %.4f' % accuracy_val, 'Pre: %.4f' % precision_val, 'Recall: %.4f' % recall_val, 'F1: %.4f' % f1_val, 'Train AUC: %.4f' % train_auc, 'Val AUC: %.4f' % val_auc, 'Time: %.2f' % (end - start)) h_test = model.encoder(g) score_test = model.decoder(h_test[src_test], h_test[dst_test]) # loss_test = cross_entropy(score_test, rating_test).mean() fpr, tpr, thresholds = metrics.roc_curve( np.squeeze(rating_test.asnumpy()), np.squeeze(score_test.asnumpy())) test_auc = metrics.auc(fpr, tpr) results_test = [ 0 if j < 0.5 else 1 for j in np.squeeze(score_test.asnumpy()) ] accuracy_test = metrics.accuracy_score(rating_test.asnumpy(), results_test) precision_test = metrics.precision_score(rating_test.asnumpy(), results_test) recall_test = metrics.recall_score(rating_test.asnumpy(), results_test) f1_test = metrics.f1_score(rating_test.asnumpy(), results_test) print('Fold:', i + 1, 'Test Acc: %.4f' % accuracy_test, 'Test Pre: %.4f' % precision_test, 'Test Recall: %.4f' % recall_test, 'Test F1: %.4f' % f1_test, 'Test AUC: %.4f' % test_auc) auc_result.append(test_auc) acc_result.append(accuracy_test) pre_result.append(precision_test) recall_result.append(recall_test) f1_result.append(f1_test) fprs.append(fpr) tprs.append(tpr) print('## Training Finished !') print( '----------------------------------------------------------------------------------------------------------' ) return auc_result, acc_result, pre_result, recall_result, f1_result, fprs, tprs
def get_features(dataset,k,n_exs,n_test,b,mode,N_SAMPLES,Neigh_SAMPLE=0, Neigh_PROB=0): if not os.path.exists("./logs"): os.makedirs("./logs") log = open("./logs/log-"+dataset+"-features.out","w",0) opt = "train" x_file = "./"+dataset+"/"+opt+"_x.txt" graph_file = "./"+dataset+"/graph_1.txt" node_type_file = "./"+dataset+"/node_type.txt" #build graph G = build_graph(graph_file, node_type_file) if dataset == "DBLP": to_remove = [] for n in G.nodes(): if nx.get_node_attributes(G,'type')[n] == 4: to_remove.append(n) G.remove_nodes_from(to_remove) H = build_bliss_graph(G) #load x X = np.loadtxt(x_file) #gather all patterns in data patterns = {} patterns[0] = {} ex_index = 0 for x in X: #print(ex_index) x = map(int,x) neighbors = get_neighbors(G,x, Neigh_SAMPLE, Neigh_PROB) if len(neighbors) > N_SAMPLES and N_SAMPLES > 0: n_hash = [] xl = list(x) for n in neighbors: xl.append(n) xl = map(int, xl) n_hash.append(graph_hash(bliss_subgraph(H,xl))) xl.pop() prob = 1/float(len(set(n_hash))) counter=collections.Counter(n_hash) for h in range(0,len(n_hash)): n_hash[h] = prob*(1/float(counter[n_hash[h]])) neighbors = np.random.choice(neighbors,N_SAMPLES,replace=False,p=n_hash) for n in neighbors: xl = list(x) xl.append(n) xl = map(int, xl) #print xl gh = graph_hash(bliss_subgraph(H,xl)) conn = 1 if gh not in patterns[0]: patterns[0][gh] = [] patterns[0][gh].append(ex_index) else: patterns[0][gh].append(ex_index) if gh not in patterns: g = nx.Graph(G.subgraph(xl)) if nx.is_connected(g): #print g.nodes(data=True) #print g.edges() patterns[gh] = {} else: conn = 0 if conn == 1: #print >>log, datetime.now() - startTime second_neighbors = get_neighbors(G,xl,Neigh_SAMPLE, Neigh_PROB) # second_neighbors = G[n] # second_neighbors = list((set(neighbors) | set(second_neighbors)) - set(xl)) for sn in second_neighbors: to_del = list(xl) for d in to_del: input_pattern = list(xl) input_pattern.remove(d) input_pattern.append(int(sn)) ghi = graph_hash(bliss_subgraph(H,input_pattern)) if ghi not in patterns[gh] and ghi not in patterns: #if nx.is_connected(nx.Graph(G.subgraph(input_pattern))): patterns[gh][ghi] = [] patterns[gh][ghi].append(ex_index) elif ghi not in patterns[gh] and ghi in patterns: patterns[gh][ghi] = [] patterns[gh][ghi].append(ex_index) else: patterns[gh][ghi].append(ex_index) #print >>log, datetime.now() - startTime xl.pop() #print datetime.now() - startTime ex_index = ex_index + 1 if ex_index == n_exs: break print >>log, datetime.now() - startTime print >>log, ex_index/float(n_exs) opt = "test" x_file = "./"+dataset+"/"+opt+"_x.txt" graph_file = "./"+dataset+"/graph_2.txt" node_type_file = "./"+dataset+"/node_type.txt" #build graph G = build_graph(graph_file, node_type_file) if dataset == "DBLP": to_remove = [] for n in G.nodes(): if nx.get_node_attributes(G,'type')[n] == 4: to_remove.append(n) G.remove_nodes_from(to_remove) H = build_bliss_graph(G) #load x X = np.loadtxt(x_file) #test_IDS = range(0,len(X[:,])) test_IDS = False print >>log, "CONSTRUCTING TEST FEATURES" ex_index = 0 for x in X: #print(ex_index + n_exs) x = x.tolist() x = map(int,x) neighbors = get_neighbors(G,x,Neigh_SAMPLE, Neigh_PROB) if len(neighbors) > N_SAMPLES and N_SAMPLES > 0: n_hash = [] xl = list(x) for n in neighbors: xl.append(n) xl = map(int, xl) n_hash.append(graph_hash(bliss_subgraph(H,xl))) xl.pop() prob = 1/float(len(set(n_hash))) counter=collections.Counter(n_hash) for h in range(0,len(n_hash)): n_hash[h] = prob*(1/float(counter[n_hash[h]])) neighbors = np.random.choice(neighbors,N_SAMPLES,replace=False,p=n_hash,) for n in neighbors: xl = list(x) xl.append(n) xl = map(int, xl) gh = graph_hash(bliss_subgraph(H,xl)) if gh in patterns[0]: patterns[0][gh].append(ex_index + n_exs) if gh in patterns: #print >>log, datetime.now() - startTime second_neighbors = get_neighbors(G,xl,Neigh_SAMPLE, Neigh_PROB) # second_neighbors = G[n] # second_neighbors = list((set(neighbors) | set(second_neighbors)) - set(xl)) for sn in second_neighbors: to_del = list(xl) for d in to_del: input_pattern = list(xl) #print input_pattern input_pattern.remove(d) input_pattern.append(int(sn)) ghi = graph_hash(bliss_subgraph(H,input_pattern)) if ghi in patterns[gh]: patterns[gh][ghi].append(ex_index + n_exs) #print >>log, datetime.now() - startTime xl.pop() print >>log, datetime.now() - startTime ex_index = ex_index + 1 if ex_index == n_test: break print >>log, datetime.now() - startTime num_dim = 0 print >>log, len(patterns) PL = [] for p in patterns: num_dim = num_dim + len(patterns[p]) PL.append(len(patterns[p])) print >>log, PL print >>log, num_dim new_X = np.zeros((n_exs + n_test, num_dim)) norm = np.zeros((n_exs + n_test, len(PL))) j = 0 k = 0 for p in patterns: for pp in patterns[p]: for i in patterns[p][pp]: new_X[i,j] = new_X[i,j] + 1 norm[i,k] = norm[i,k] + 1 j = j + 1 k = k + 1 s = 0 f = 0 pi = 0 for p in PL: f = s + p if mode == 'avg': new_X[:,s:f] = new_X[:,s:f]/(np.transpose(np.tile(norm[:,pi],(p,1)))) s = f pi = pi + 1 new_X = np.nan_to_num(new_X) np.savetxt(dataset+'train_x.txt',new_X[0:n_exs,:], '%5.0f') np.savetxt(dataset+'test_x.txt',new_X[n_exs:(n_exs+n_test),:], '%5.0f') np.savetxt(dataset+'PL.txt',np.asarray(PL), '%5.0f') np.savetxt(dataset+'test_IDS.txt',np.atleast_1d(test_IDS), '%5.0f') return new_X[0:n_exs,:].astype(float), new_X[n_exs:(n_exs+n_test),:].astype(float),PL,test_IDS
def get_time_data(): np.random.seed(1) files = [ os.path.join('.', 'Maps', f) for f in os.listdir('Maps') if f[-3:] == 'mat' and f[-12:-4] != 'solution' ] maps20x20 = [f for f in files if '20x20' in f] maps50x50 = [f for f in files if '20x20' in f] big_maps = [f for f in files if '100_POI' in f] runtimes = {} for n in [4, 6, 8, 10, 12, 14]: print(n) runtimes[n] = [] for f in big_maps: cost_matrix = load_cost_matrix(f) # high diagonal costs cause numerical errors # use constraints to prevent travel to self # diagonal cost must be > 0 for this to work # but should be low np.fill_diagonal(cost_matrix, 1) cost_matrix = cost_matrix[0:n, 0:n] # print('----- Edge Costs -----') # print(cost_matrix) num_nodes = cost_matrix.shape[0] score_vector = np.ones(num_nodes) task_windows = setup_task_windows(score_vector) # print('----- Task Windows -----') # print(np.around(task_windows, 2).astype('float')) max_cost = get_starting_cost(cost_matrix, task_windows) plan, visit_times, profit, cost, solve_time = get_solution( score_vector, cost_matrix, task_windows, max_cost) runtimes[n].append(solve_time) wait_times = compute_wait_times(plan, cost_matrix, visit_times) visit_order_waits, visit_times_waits = get_arrive_depart_pairs( plan, visit_times, wait_times, cost) save_solution(f, visit_order_waits, visit_times_waits, solver_type='tw') print('----- Visited -----') print(np.sum(plan, axis=1)) print('----- Plan -----') print(np.around(plan, 2).astype('int32')) # print('----- Edge Costs -----') # print(cost_matrix) print('----- Wait Times -----') print(np.around(wait_times, 2)) g, tour, verified_cost = build_graph(plan, score_vector, cost_matrix) print(f) msg = 'The maximum profit tour found is \n' for idx, k in enumerate(tour): msg += str(k) if idx < len(tour) - 1: msg += ' -> ' else: msg += ' -> 0' print(msg) msg = 'Profit: {:.2f}, cost: {:.2f}, verification cost: {:.2f}' print(msg.format(profit, cost, verified_cost)) msg = 'Time taken: {} seconds' print(msg.format(solve_time)) # display_results(g, plan, task_windows, visit_times, wait_times, cost) with open('results_tw.txt', 'w') as f: f.write(str(runtimes))
def clone_fn(tf_batch_queue): return build_graph(tf_batch_queue, network_fn)
def get_object_places(table_path, graph_path, visited_places): assert os.path.exists(table_path) assert os.path.exists(graph_path) df = pd.read_csv(table_path, index_col=0) while True: #speech to text part # obtain audio from the microphone r = sr.Recognizer() with sr.Microphone() as source: print("Quale oggetto stai cercando?\n") #audio = r.listen(source) # recognize speech using Google Cloud Speech try: object_to_search = "wallet" #object_to_search = r.recognize_google_cloud(audio, credentials_json=GOOGLE_CLOUD_SPEECH_CREDENTIALS).strip() #similar = check_similapr_objects(df, object_to_search) #print(similar) if (len(df.loc[df["object"] == object_to_search]) > 0): break elif (similar != None): answer = input( object_to_search + " non trovato, trovata compatibilita' con " + similar + ", utilizzare la sua distribuzione?[Y/n]\n") if (answer == "Y"): object_to_search = similar break except sr.UnknownValueError: print("Google Cloud Speech could not understand audio") except sr.RequestError as e: print( "Could not request results from Google Cloud Speech service; {0}" .format(e)) df = df.loc[df["object"] == object_to_search].drop('object', 1) places = list(df.keys()) #dropping places already visited for visited_place in visited_places: df = df.drop(visited_place, 1) places.remove(visited_place) row = df print(row) for x in list(zip(row.keys(), row.values[0])): print(str(x[0]) + " " + str(x[1])) knowledge = row.values[0] number_of_places = len(knowledge) distances_dict = get_distances(graph_path, ("pose", 1.7219, 11.1261, "storage")) distances = [distances_dict[key] for key in places] max_distance = max(distances) inverted_distances = list( map(lambda x: abs(x - max_distance + 1) / 5, distances)) prior_knowledge = np.array(inverted_distances) with pm.Model() as model: # Parameters of the Multinomial are from a Dirichlet parameters = pm.Dirichlet('parameters', a=prior_knowledge, shape=number_of_places) # Observed data is from a Multinomial distribution observed_data = pm.Multinomial('observed_data', n=sum(knowledge), p=parameters, shape=number_of_places, observed=knowledge) with model: # Sample from the posterior trace = pm.sample(draws=1000, chains=2, tune=500, discard_tuned_samples=True) trace_df = pd.DataFrame(trace['parameters'], columns=places) # For probabilities use samples after burn in pvals = trace_df.iloc[:, :number_of_places].mean(axis=0) tag_and_dist = sorted(zip(places, pvals), key=lambda x: x[1], reverse=True) display_probs(dict(tag_and_dist)) top_4_places = [x[0] for x in tag_and_dist[:4]] g = build_graph(graph_path) topn_nodes = [] for label in top_4_places: for node in g.nodes(): _, _, _, node_label = node if (node_label == label): topn_nodes += [node] break #adding the actual position to the top4 nodes topn_nodes += [("pose", 7.3533, 0.5381, "corridor-1")] subgraph = nx.Graph() edges = list(itertools.combinations(g.subgraph(topn_nodes), 2)) all_distances = dict(nx.all_pairs_shortest_path_length(g)) edges_with_weight = [(topn_nodes.index(x[0]), topn_nodes.index(x[1]), all_distances[x[0]][x[1]]) for x in edges] print(edges_with_weight) fitness_dists = mlrose.TravellingSales(distances=edges_with_weight) problem_fit = mlrose.TSPOpt(length=len(topn_nodes), fitness_fn=fitness_dists, maximize=False) best_state, best_fitness = mlrose.genetic_alg(problem_fit, random_state=2) path = [topn_nodes[x][3] for x in best_state] path = rotate(path, path.index('corridor-1')) print(path)
def __init__(self, net): self.graph = build_graph(net) super().__init__() for n, (v, _) in self.graph.items(): setattr(self, n, v)