def main(): test_network = ["karate_club", "facebook"] for net in test_network: net_file = data_utils.get_data_path(net) g = graph_utils.load_basic_network(net_file) sir_file = net_file.split('.')[0] + '-sir.txt' sir = {} if os.path.exists(sir_file): with open(sir_file, 'r') as f: for l in f: data = l.split() id = int(data[0]) score = float(data[1]) sir[id] = score else: print("SIR Simulation start.") sir = sir_ranking(g, gamma=1.0, num_epoch=100) print("SIR Simulation end.") centralities = [ nx.degree_centrality, nx.closeness_centrality, nx.eigenvector_centrality, nx.pagerank, Local_gravity_model ] for c in centralities: if c.__name__ == 'pagerank': res = c(g, alpha=0.95) elif c.__name__ == 'Local_gravity_model': res = c(g, depth=2) else: res = c(g) tau, p = kendallTau(res, sir) print("%s\t%s\t%f" % (net, c.__name__, tau))
def main(): for dataset_name in data_utils.data_file: net_file = data_utils.get_data_path(dataset_name) nx_adj = graph_utils.load_networkx_format(net_file) g = nx.from_scipy_sparse_matrix(nx_adj) train, test, val, train_neg, test_neg, val_neg = graph_utils.train_test_split( nx_adj, pos_neg_ratio=0.5) logger.info("[%s] train 1#%d 0#%d" % (dataset_name, len(train), len(train_neg))) logger.info("[%s] test 1#%d 0#%d" % (dataset_name, len(test), len(test_neg))) logger.info("[%s] valid 1#%d 0#%d" % (dataset_name, len(val), len(val_neg))) # Compute basic link prediction indexes from g_train aa_matrix = np.zeros(nx_adj.shape) g_train = nx.from_edgelist(train) train_nodes = g_train.nodes candidate_edges = [] for u, v in test: if u in train_nodes and v in train_nodes: candidate_edges.append((u, v)) for u, v in test_neg: if u in train_nodes and v in train_nodes: candidate_edges.append((u, v)) # Run Algos lp_baselines = { "Adamic-Adar": nx.adamic_adar_index, "Resouce Allocation": nx.resource_allocation_index, "Jaccard": nx.jaccard_coefficient, "Preferential Attachment": nx.preferential_attachment } print("#============================") print("Method\tAUC\tAP") for baseline in lp_baselines: for u, v, p in lp_baselines[baseline](g_train, candidate_edges): aa_matrix[u][v] = p aa_matrix[v][u] = p # make sure it's symmetric # Calculate ROC AUC and Average Precision roc, ap = get_roc_score(test, test_neg, aa_matrix) print("%s\t%.6f\t%.6f" % (baseline, roc, ap)) logger.info("[%s]\t%s\t%.6f\t%.6f" % (dataset_name, baseline, roc, ap)) print("#============================")
def explore_lyb(): net_file = data_utils.get_data_path("lyb") g = graph_utils.load_basic_network(net_file) n2v_emb = node2vec.node2vec_emb(g, p=0.5, q=2, out_dim=32, num_walks=20) n2v_emb.learn_embedding() # out_file = net_file.split('.')[0]+'-n2v_emb.txt' # n2v_emb.output_embedding(out_file) node_labels = graph_utils.load_node_labels(data_utils.get_node_path("lyb")) sus_best = n2v_emb.model.most_similar(positive=['23']) for item in sus_best: print(node_labels[int(item[0])], item[1]) com = list(greedy_modularity_communities(g)) node_community = {} for i, c in enumerate(com): for node in c: node_community[node_labels[node]] = i print(node_community)
network_stats(g, 'BA-Network') # WS-Network k = 20 p = 0.01 g = nx.watts_strogatz_graph(n, k, p) g = g.to_undirected() draw_degree_dist(g, k, title='WS-Network Distribution') network_stats(g, 'WS-Network') # powerlaw_cluster network m = 10 p = 1 g = nx.powerlaw_cluster_graph(n, m, p) g = g.to_undirected() draw_degree_dist(g, m, title='PC-Network Distribution') network_stats(g, 'PC-Network') # Facebook Network import os, sys projct_root_path = os.path.dirname( os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.append(projct_root_path) import common.graph_utils as graph_utils import common.data_utils as data_utils net_file = data_utils.get_data_path("facebook") g = graph_utils.load_basic_network(net_file) g = g.to_undirected() draw_degree_dist(g, m, title='Facebook-Network Distribution') network_stats(g, 'Facebook-Network')
print('%d ego graph saved to adjacency_matrix.npy.' % (len(self.ego_graphs))) np.save(os.path.join(file_dir, "influence_feature.npy"),self.influence_features) print('influence_feature.npy saved.') np.save(os.path.join(file_dir,"label.npy"),self.graph_labels) print('label.npy saved.') np.save(os.path.join(file_dir, "vertex_id.npy"), self.ego_virtices) print('vertex_id.npy saved.') np.save(os.path.join(file_dir, "vertex_feature.npy"), self.graph_node_features) print('vertex_feature.npy saved.') np.save(os.path.join(file_dir, "embedding.npy"),self.embedding) print('embedding.npy saved.') def load(self, file_dir): self.ego_graphs = np.load(os.path.join(file_dir, "adjacency_matrix.npy")) self.influence_features = np.load(os.path.join(file_dir, "influence_feature.npy")).astype(np.float32) self.graph_labels = np.load(os.path.join(file_dir, "label.npy")) self.ego_virtices = np.load(os.path.join(file_dir, "vertex_id.npy")) self.graph_node_features = torch.FloatTensor(np.load(os.path.join(file_dir, "vertex_feature.npy"))) self.embedding = torch.FloatTensor(np.load(os.path.join(file_dir, "embedding.npy"))) print("%s dataset loaded." % (file_dir)) if __name__ == '__main__': network_name = 'facebook' net_file = data_utils.get_data_path(network_name) g = graph_utils.load_basic_network(net_file) dataset = deepinf_dataset(g, sir_file=net_file.split('.')[0]+'-sir.txt') target_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),network_name+"_preprocess") dataset.make() dataset.save(target_path) # dataset.load(target_path)
def main(): net_file = data_utils.get_data_path("twitter") adj = load_networkx_format(net_file)
def main(): # Training settings # Note: Hyper-parameters need to be tuned in order to obtain results reported in the paper. parser = argparse.ArgumentParser( description= 'PyTorch graph convolutional neural net for whole-graph classification' ) parser.add_argument('--dataset', type=str, default="IMDBBINARY", help='name of dataset (default: MUTAG)') parser.add_argument('--device', type=int, default=0, help='which gpu to use if any (default: 0)') parser.add_argument('--batch_size', type=int, default=32, help='input batch size for training (default: 32)') parser.add_argument( '--iters_per_epoch', type=int, default=50, help='number of iterations per each epoch (default: 50)') parser.add_argument('--epochs', type=int, default=50, help='number of epochs to train (default: 20)') parser.add_argument('--lr', type=float, default=0.001, help='learning rate (default: 0.01)') parser.add_argument( '--seed', type=int, default=0, help='random seed for splitting the dataset into 10 (default: 0)') parser.add_argument( '--fold_idx', type=int, default=0, help='the index of fold in 10-fold validation. Should be less then 10.' ) parser.add_argument( '--num_layers', type=int, default=5, help='number of layers INCLUDING the input one (default: 5)') parser.add_argument( '--num_mlp_layers', type=int, default=2, help= 'number of layers for MLP EXCLUDING the input one (default: 2). 1 means linear model.' ) parser.add_argument('--hidden_dim', type=int, default=128, help='number of hidden units (default: 64)') parser.add_argument('--final_dropout', type=float, default=0.5, help='final layer dropout (default: 0.5)') parser.add_argument( '--graph_pooling_type', type=str, default="sum", choices=["sum", "average"], help='Pooling for over nodes in a graph: sum or average') parser.add_argument( '--neighbor_pooling_type', type=str, default="sum", choices=["sum", "average", "max"], help='Pooling for over neighboring nodes: sum, average or max') parser.add_argument( '--learn_eps', action="store_true", help= 'Whether to learn the epsilon weighting for the center nodes. Does not affect training accuracy though.' ) parser.add_argument( '--degree_as_tag', action="store_true", help= 'let the input node features be the degree of nodes (heuristics for unlabeled graph)' ) parser.add_argument('--filename', type=str, default="", help='output file') args = parser.parse_args() # set up seeds and gpu device torch.manual_seed(0) np.random.seed(0) device = torch.device( "cuda:" + str(args.device)) if torch.cuda.is_available() else torch.device("cpu") if torch.cuda.is_available(): torch.cuda.manual_seed_all(0) graphs, num_classes = load_graphs(data_utils.get_data_path(args.dataset), args.degree_as_tag) # 10-fold cross validation. Conduct an experiment on the fold specified by args.fold_idx. train_graphs, test_graphs = separate_graph_data(graphs, args.seed, args.fold_idx) model = GraphCNN(args.num_layers, args.num_mlp_layers, train_graphs[0].node_features.shape[1], args.hidden_dim, num_classes, args.final_dropout, args.learn_eps, args.graph_pooling_type, args.neighbor_pooling_type, device).to(device) optimizer = optim.Adam(model.parameters(), lr=args.lr) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5) for epoch in range(1, args.epochs + 1): scheduler.step() print("$epoch", epoch) avg_loss = train(args, model, device, train_graphs, optimizer, epoch) acc_train, acc_test = test(args, model, device, train_graphs, test_graphs, epoch) if not args.filename == "": with open(args.filename, 'w') as f: f.write("%f %f %f" % (avg_loss, acc_train, acc_test)) f.write("\n") print()
cur_level = list(neighbors(node)) visited = set(cur_level) while depth_now <= depth and len(cur_level) > 0: next_level = set() for target in cur_level: if target not in candidates: candidates[target] = depth_now for child in neighbors(target): if child not in visited: visited.add(child) next_level.add(child) cur_level = next_level depth_now += 1 gravity = 0 for target in candidates: distance = candidates[target] if target != node and distance <= depth: partial_gravity = degrees[node] * degrees[target] / (distance** 2) gravity += partial_gravity lgm_results[node] = gravity return lgm_results if __name__ == '__main__': # g = nx.karate_club_graph() net_file = data_utils.get_data_path("lyb") g = graph_utils.load_basic_network(net_file) lgm = Local_gravity_model(g) print("Local Gravity Model:") print(sorted(lgm.items(), key=lambda v: v[1], reverse=True))
def main(): net_file = data_utils.get_data_path("lyb") g = graph_utils.load_basic_network(net_file) n2v_emb = node2vec.node2vec_emb(g, p=0.5, q=2, out_dim=64, num_walks=20) n2v_emb.learn_embedding()
avg_node_inf = node_influence / num_epoch if num_epoch > 0 else 0.0 sir_score[node] = avg_node_inf return sir_score def opt_beta(g): k1s = 0 k2s = 0 for node in g.nodes(): k1s += len(list(g.neighbors(node))) k2s += len(list(nx.bfs_tree(g, source=node, depth_limit=2).edges())) if k2s > k1s: beta = k1s / (k2s - k1s) else: beta = 0.1 return beta if __name__ == '__main__': # g = nx.karate_club_graph() # sir_score = sir_ranking(g,num_epoch=1000) # print(sorted(sir_score.items(), key=lambda v: v[1], reverse=True))\ net_file = data_utils.get_data_path("BlogCatalog") g = graph_utils.load_basic_network(net_file) st = time.time() sir = sir_ranking(g, beta=opt_beta(g), gamma=1.0, num_epoch=100) out_file = net_file.split('.')[0] + '-sir.txt' with open(out_file, 'w') as f: for i, v in sir.items(): f.write(str(i) + '\t' + str(round(v, 6)) + '\n') print('time used:', time.time() - st)