def q3(): harmonic = [] loc_glob = [] for mu in np.arange(0.1, 1, 0.1): graph = LFR_benchmark_graph(1000, 3, 1.5, mu=mu, min_community=20, average_degree=5, seed=3) nodes = graph.nodes communities = {frozenset(graph.nodes[v]['community']) for v in graph} res = np.zeros(len(nodes)).astype(int) for i, part in enumerate(communities): for j in part: res[j] = i harmonic_accs = [] local_global_accs = [] for i in range(10): harmonic_acc, local_global_acc = drop_label(graph.copy(), res, 0.8) harmonic_accs.append(harmonic_acc) local_global_accs.append(local_global_acc) harmonic.append(np.mean(harmonic_accs)) loc_glob.append(np.mean(local_global_accs)) for i in harmonic: print("{:.4f}".format(i)) print() for i in loc_glob: print("{:.4f}".format(i))
def load_synthetic(mu, n=1000, tau1=3, tau2=1.5, edge_drop_percent=0.2): G = LFR_benchmark_graph(n, tau1, tau2, mu, average_degree=5, min_community=30, seed=10) for n in G.nodes: G.nodes[n]['value'] = list(G.nodes[n]['community'])[0] true_coms = list(nx.get_node_attributes(G, 'value').values()) com_keys = list(Counter(true_coms).keys()) for i in range(0, len(true_coms)): G.nodes[i]['value'] = com_keys.index(true_coms[i]) #remove self edges selfE = list(G.selfloop_edges()) for (i, j) in selfE: G.remove_edge(i, j) #convert all graph to undirected G = nx.Graph(G) ListOfNodes = list(G.nodes()) sample = int(len(ListOfNodes) * node_drop_percent) RandomSample = random.sample(ListOfNodes, sample) for n in G.nodes(): if (n not in RandomSample): G.nodes[n][label_name] = G.nodes[n]['value'] return (G, RandomSample)
def test_generator(): n = 250 tau1 = 3 tau2 = 1.5 mu = 0.1 G = LFR_benchmark_graph(n, tau1, tau2, mu, average_degree=5, min_community=20, seed=10) assert_equal(len(G), 250) C = {frozenset(G.nodes[v]['community']) for v in G} assert_true(is_partition(G.nodes(), C))
def test_generator(): n = 250 tau1 = 3 tau2 = 1.5 mu = 0.1 G = LFR_benchmark_graph(n, tau1, tau2, mu, average_degree=5, min_community=20, seed=10) assert_equal(len(G), 250) C = {frozenset(G.node[v]['community']) for v in G} assert_true(is_partition(G.nodes(), C))
def load_synthetic(n=250, tau1=3, tau2=1.5, mu=0.1): G = LFR_benchmark_graph(n, tau1, tau2, mu, average_degree=5, min_community=20, seed=10) for n in G.nodes: G.nodes[n]['value'] = list(G.nodes[n]['community'])[0] true_coms = list(nx.get_node_attributes(G,'value').values()) com_keys = list(Counter(true_coms).keys()) for i in range(0, len(true_coms)): G.nodes[i]['value'] = com_keys.index(true_coms[i]) true_labels = list(nx.get_node_attributes(G,'value').values()) true_labels = [ int(x) for x in true_labels ] #remove self edges selfE = list(G.selfloop_edges()) for (i,j) in selfE: G.remove_edge(i,j) #convert all graph to undirected G = nx.Graph(G) nG = nx.Graph(G) # first convert the networkx graph to igraph G = ig.Graph.Adjacency((nx.to_numpy_matrix(G) > 0).tolist()) G.to_undirected() return (G, nG, true_labels)
def generate_lfr_benchmark(self): """ Generate LFR benchmark """ G = LFR_benchmark_graph(self._nodes, tau1=self._gamma, tau2=self._beta, mu=self._mu, min_degree=self._min_degree, max_degree=self._max_degree, min_community=self._min_community, max_community=self._max_community) G.name = "LFR Benchmark with {0} nodes".format(self._nodes) return G
def generating_main(G_edge, path_net='synthetic1.txt', path_comm='synthetic1_comm.dat', path_pic='synthetic1.png', muid=0.1): p_copy = path_net pc_copy = path_comm pi_copy = path_pic m_copy = muid G_copy = G_edge # generating synthetic networks n = 200 tau1 = 2 tau2 = 1.5 mu = muid G = LFR_benchmark_graph(n, tau1, tau2, mu, average_degree=6, max_degree=20, min_community=20, seed=10) communities = {frozenset(G.nodes[v]['community']) for v in G} partition_original = [] g_e = len(G.edges()) if g_e in G_edge: generating_main(G_copy, p_copy, pc_copy, pi_copy, m_copy + 0.003) else: G_edge.append(g_e) print(G_edge) print(len(communities)) print(nx.info(G)) for item in communities: partition_original.append(list(sorted(item))) partition_original = sorted(partition_original) nodegroup = add_Group(partition_original) draw_Network(G, path_pic, nodegroup) edges = sorted(G.edges()) write_network(path_net, edges) write_community(path_comm, nodegroup)
def createNetwork(agents): """ Creates a network using the LFR Benchmark Algorithm (to be improved?) """ aveDeg = 15 maxDeg = 40 gamma = 3 beta = 2 mu = 0.25 G = nx.Graph() for agent in agents: G.add_node(agent.ID, data=agent) G_ = LFR_benchmark_graph(len(agents), gamma, beta, mu, max_degree=maxDeg, average_degree=aveDeg, max_iters=1000, seed=42) G.add_edges_from(G_.edges) G.remove_edges_from(nx.selfloop_edges(G)) return G
def benchmark_girvan_newman(): N = 128 tau1 = 3 tau2 = 1.5 mu = 0.04 k =16 minc = 32 maxc = 32 return LFR_benchmark_graph(n = N, tau1 = tau1, tau2 = tau2, mu = mu, min_degree = k, max_degree = k, min_community=minc, max_community = maxc, seed = 10)
def LFR(n, tau1, tau2, mu, min_com_size, force = False): # enforce regeneration if force==True path = "../data/LFR/LFR_%d_%.2f_%.2f_%.2f.gpickle" %(n, tau1, tau2, mu) if not force and os.path.isfile(path): G = nx.read_gpickle(path) return G else: G = LFR_benchmark_graph(n, tau1, tau2, mu, average_degree = 8, min_community = min_com_size, seed=0) print("write gpickle file", path) nx.write_gpickle(G, path) return G
def get_lfr_network_data(n, tau1, tau2, mu): graph = LFR_benchmark_graph(n, tau1, tau2, mu, average_degree=5, min_community=30, seed=10) comm = list({frozenset(graph.nodes[v]['community']) for v in graph}) node_comm = np.empty(n) for i, c in enumerate(comm): for node in c: node_comm[node] = i return graph, node_comm
def lfr_model(self): n = self._model['lfr_N'] tau1 = self._model[ 'lfr_tau1'] # power law exponent for node degree distribution tau2 = self._model[ 'lfr_tau2'] # power law exponent for community size distribution mu = self._model['lfr_mu'] # fraction of edges between communities max_deg = self._model[ 'lfr_max_deg'] if 'lfr_max_deg' in self._model else n min_comm = self._model['lfr_min_community'] max_comm = self._model[ 'lfr_max_community'] if 'lfr_max_community' in self._model else n if 'lfr_average_degree' in self._model: avg_deg = self._model['lfr_average_degree'] graph = LFR_benchmark_graph(n=n, tau1=tau1, tau2=tau2, mu=mu, average_degree=avg_deg, max_degree=max_deg, min_community=min_comm, max_community=max_comm) return graph elif 'lfr_min_degree' in self._model: min_deg = self._model['lfr_min_degree'] graph = LFR_benchmark_graph(n=n, tau1=tau1, tau2=tau2, mu=mu, min_degree=min_deg, max_degree=max_deg, min_community=min_comm, max_community=max_comm) return graph
def get_lfr_network_data(n, tau1, tau2, mu): graph = LFR_benchmark_graph(n, tau1, tau2, mu, average_degree=5, min_community=30, seed=10) all_edges = list(graph.edges) idx = np.arange(len(all_edges)) np.random.shuffle(idx) test_idx = idx[:int(0.2 * len(graph.edges))] pos_test_links = [all_edges[i] for i in test_idx] n_test = len(test_idx) neg_test_links = sample_neg(graph, num_neg_links=n_test) graph.remove_edges_from(pos_test_links) return graph, pos_test_links, neg_test_links
def test_invalid_tau2(): n = 100 tau1 = 1 tau2 = 2 mu = 0.1 LFR_benchmark_graph(n, tau1, tau2, mu, min_degree=2)
tau2 = 1.5 mu_list = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] #mu_list = [0.5] average_degree = 5 minimum_community=20 drop_factor = 0.2 nmi_list = [] ars_list = [] for mu in mu_list: print('\nNode Prediction when mu is', mu, r'and 20% of nodes are dropped') G = LFR_benchmark_graph(n, tau1, tau2, mu, average_degree, min_community=minimum_community) #print('First Node:',G.nodes[0]) communities = {frozenset(G.nodes[v]['community']) for v in G} #print('Communities:\n', communities) #print('Number of Ground Turth Communities:', len(communities)) labels_truth = get_labels_from_community(communities, n) #print('Ground Truth Labels:\n', labels_truth) G = apply_community_value_to_graph(G, labels_truth) #print('First Node Modified:',G.nodes[0]) for node in random.sample(G.nodes(), int(n*drop_factor)): del G.node[node]['value'] nmi, ars = calculate_node_classification_accuracy(G, 'value', labels_truth, method) nmi_list.append(nmi)
def test_mu_too_large(): n = 100 tau1 = 2 tau2 = 2 mu = 1.1 LFR_benchmark_graph(n, tau1, tau2, mu, min_degree=2)
import networkx as nx n = 1000 tau1 = 2.2 tau2 = 2.3 mu = 0.35 generated_sample_count = 0 num_of_samples = 5 while generated_sample_count < num_of_samples: try: #G = LFR_benchmark_graph(n, tau1, tau2, mu, average_degree=5, min_community=20) G = LFR_benchmark_graph(n, tau1, tau2, mu, min_degree=1, min_community=20) communities = {frozenset(G.nodes[v]['community']) for v in G} generated_sample_count += 1 print(communities) except nx.exception.ExceededMaxIterations: #generated_sample_count -= 1 print("exception") print(generated_sample_count)
from scipy import cluster from scipy.cluster.hierarchy import fcluster from scipy.spatial.distance import pdist global _RANK_GRAPH print("Generating communities..") n = 500 tau1 = 4 tau2 = 1.5 mu = 0.1 # _RANK_GRAPH = nx.windmill_graph(20, 5) _RANK_GRAPH = LFR_benchmark_graph(n, tau1, tau2, mu, average_degree=5, min_community=30, seed=10) print(nx.info(_RANK_GRAPH)) A = _RANK_GRAPH.copy() _RANK_GRAPH = nx.to_scipy_sparse_matrix(_RANK_GRAPH) _RANK_GRAPH = stochastic_normalization(_RANK_GRAPH) ## normalize n = _RANK_GRAPH.shape[1] with mp.Pool(processes=mp.cpu_count()) as p: results = p.map(page_rank_kernel, range(n)) vectors = np.zeros((n, n)) for pr_vector in results: if pr_vector != None: vectors[pr_vector[0], :] = pr_vector[1]
labels_truth = get_ground_truth_labels_from_graph(G) number_of_nodes = nx.number_of_nodes(G) #print('Ground Truth Labels:\n', labels_truth) print('\nNumber of Nodes in Graph:', number_of_nodes) calculate_community(G, number_of_nodes, labels_truth) # Synthetic Data Analysis elif option == 'c': print('Synthetic Data - No File') # Settings provided by the prof n = 1000 tau1 = 3 tau2 = 1.5 mu = 0.5 average_degree = 5 minimum_community=20 G = LFR_benchmark_graph(n, tau1, tau2, mu, average_degree, min_community=minimum_community) #print(G.nodes[0]) communities = {frozenset(G.nodes[v]['community']) for v in G} #print('Communities:\n', communities) print('Number of Ground Turth Communities:', len(communities)) labels_truth = get_labels_from_community(communities, n) #print('Ground Truth Labels:\n', labels_truth) calculate_community(G, n, labels_truth) else: print('Wrong Option Selected')
nx.draw_spring(nx.barabasi_albert_graph(100, 4)) #%% from networkx.algorithms.community import LFR_benchmark_graph import numpy as np import networkx as nx import matplotlib.pyplot as plt from matplotlib.colors import rgb2hex n = 250 tau1 = 3 tau2 = 1.5 mu = 0.05 G = LFR_benchmark_graph(n, tau1, tau2, mu, average_degree=5, min_community=20, seed=10) communities = {frozenset(G.nodes[v]['community']) for v in G} communities = list(communities) comm1 = dict.fromkeys(communities[0], 1) comm2 = dict.fromkeys(communities[1], 2) comm3 = dict.fromkeys(communities[2], 3) comm3 comm1.update(comm2) comm1.update(comm3) tmp = sorted(comm1.items(), key=lambda e: e[0]) labels = [item[1] for item in tmp] k = max(labels)
def LFR(n, tau1, tau2, mu): #n = 1000 #(int)number of nodes #tau1 = 3 #(float) Power law exponent for the degree distribution of the created graph. This value must be strictly greater than one. #tau2 = 1.1 #(float) Power law exponent for the community size distribution in the created graph. This value must be strictly greater than one. #mu = 0.1 #(float) Fraction of intra-community edges incident to each node. This value must be in the interval [0, 1]. #greater mu => pio asafeis koinothtes! #average_degree and min_degree must be in [0, n]. One of these must be specified. #max_degree if not specified is set to n. #min_community if not specified is set to min_degree. #max_community if not specified is set to n. #tol(float) Tolerance when comparing floats, specifically when comparing average degree values. #max_iters (int) Maximum number of iterations to try to create the community sizes, degree distribution, and community affiliations. #seed (integer, random_state, or None (default)) Indicator of random number generation state. os.chdir('experiments/datasets/lfr') G = LFR_benchmark_graph(n, tau1, tau2, mu, average_degree=10, max_degree=50, min_community=10, max_community=50) #remove self loops G.remove_edges_from(G.selfloop_edges()) numberOfEdges = G.number_of_edges() print("Number of edges of graph G: ", numberOfEdges) print("------------------------------") #na mh sxediazontai oi aksones #plt.axis('off') #sxediasmos grafou #nx.draw(G) communities = {frozenset(G.nodes[v]['community']) for v in G} adjacency_list_filename = 'lfrAdjlistN' + str(n) + 'MU' + str(mu) + '*.txt' edge_list_filename = 'lfrEdgelistN' + str(n) + 'MU' + str(mu) + '*.txt' community_list_filename = 'lfrCommN' + str(n) + 'MU' + str(mu) + '*.txt' #print('Communities: ', communities) with open('lfrCommN' + str(n) + 'MU' + str(mu) + '*.txt', 'w') as fc: fc.write(str([list(x) for x in communities])) nx.write_adjlist(G, adjacency_list_filename) fh = open(adjacency_list_filename, 'wb') nx.write_adjlist(G, fh) edge_list = [] with open(adjacency_list_filename, 'r') as f: for line in f: if line.startswith("#"): #skip first comment lines continue else: line = line.rstrip('\n').split(' ') source = line[0] for target in line[1:]: #edge_list.append("%s %s 1" % (source, target)) #1 is for the weight edge_list.append("%s %s" % (source, target)) with open(edge_list_filename, 'w') as f: f.write('%s\n' % ('\n'.join(edge_list))) with open(community_list_filename, 'w') as f: for item in communities: f.write("%s\n" % str(list(item))) #remove unecessary symbols like [] with open(community_list_filename, 'r') as my_file: text = my_file.read() text = text.replace("[", "") text = text.replace("]", "") text = text.replace(",", "") with open(community_list_filename, 'w') as my_file: my_file.write(text) #convert edge txt file to csv file appending also weight 1 to all edges with open('lfrEdgelistN' + str(n) + 'MU' + str(mu) + '*.txt') as data_file: reader = csv.reader(data_file, delimiter=' ') with open('lfrEdgelistN' + str(n) + 'MU' + str(mu) + '*.csv', 'w') as out_file: writer = csv.writer(out_file, delimiter=';') for row in reader: writer.writerow([row[0], row[1], 1]) return 'lfrEdgelistN' + str(n) + 'MU' + str(mu) + '*.csv'
def test_neither_degrees_none(): n = 100 tau1 = 2 tau2 = 2 mu = -1 LFR_benchmark_graph(n, tau1, tau2, mu, min_degree=2, average_degree=5)
def test_both_degrees_none(): n = 100 tau1 = 2 tau2 = 2 mu = -1 LFR_benchmark_graph(n, tau1, tau2, mu)
draw_original_community(sorted_nodes, labels) return labels if __name__ == '__main__': args = get_args() if args.data in q1_data: if args.data == 'karate': graph = nx.karate_club_graph() else: graph = nx.read_gml("real-classic/{}.gml".format(args.data), label='id').to_undirected() pdb.set_trace() elif args.data == 'LFR': graph = LFR_benchmark_graph(200, 2.5, 1.5, 0.1, min_community=10, min_degree=5, seed=10) else: graph, labels = load_data_gcn(args.data) pos = nx.spring_layout(graph) labels = load_labels(graph) lou_partition = louvain_partition(graph) gre_partition = greedy_partition(graph)
graph, labels, y_train, y_val, y_test, train_mask, val_mask, test_mask = load_data_gcn( args.data) compute_avg_auc(graph) if __name__ == '__main__': args = get_args() if args.data in q1_data: if args.data == 'karate': graph = nx.karate_club_graph() else: graph = nx.read_gml("real-classic/{}.gml".format(args.data), label='id').to_undirected() if args.data != 'strike': graph = convert_multigraph_to_single_graph(graph) graph = compute_avg_auc(graph) # pdb.set_trace() elif args.data == 'LFR': for mu in np.arange(0.1, 1, 0.1): graph = LFR_benchmark_graph(1000, 3, 1.5, mu=mu, min_community=20, average_degree=5, seed=3) compute_avg_auc(graph) else: q2()
def main(user_num, topology, centrality, isreal): network_object = [] if isreal == "True": if topology == "karate": os.chdir(os.path.dirname(os.path.abspath(__file__))) G = nx.read_edgelist("../dataset/karate.edgelist") network_object.append(G) elif topology == "lesmis": os.chdir(os.path.dirname(os.path.abspath(__file__))) G = nx.readwrite.gml.read_gml("../dataset/lesmis.gml") network_object.append(G) elif topology == "facebook": os.chdir(os.path.dirname(os.path.abspath(__file__))) G = nx.read_edgelist("../dataset/facebook_combined.txt", nodetype=int) network_object.append(G) elif topology == "twitter": os.chdir(os.path.dirname(os.path.abspath(__file__))) G = nx.read_edgelist("../dataset/facebook_combined.txt", nodetype=int) network_object.append(G) else: raise NameError( "{} cannot be used because specified topology is artificial network" .format(topology)) elif isreal == "False": if topology == "scale_free": G = nx.barabasi_albert_graph(n=user_num, m=40, seed=0) network_object.append(G) elif topology == "small_world": G = nx.newman_watts_strogatz_graph(n=user_num, k=40, p=0.20, seed=0) network_object.append(G) elif topology == "random_network": G = nx.fast_gnp_random_graph(n=user_num, p=0.20, seed=0) network_object.append(G) elif topology == "lfr_benchmark": G = LFR_benchmark_graph(n=user_num, tau1=3, tau2=1.5, mu=0.1, average_degree=40, min_community=50, seed=0) network_object.append(G) else: raise NameError( "{} cannot be used because specified topology is real-world network" .format(topology)) pos = nx.spring_layout(network_object[0]) if centrality == "DC": draw_centrality_headmap(G=network_object[0], pos=pos, topology=topology, centrality=nx.degree_centrality( network_object[0]), centrality_name=centrality) elif centrality == "CC": draw_centrality_headmap(G=network_object[0], pos=pos, topology=topology, centrality=nx.closeness_centrality( network_object[0]), centrality_name=centrality) elif centrality == "BC": draw_centrality_headmap(G=network_object[0], pos=pos, topology=topology, centrality=nx.betweenness_centrality( network_object[0]), centrality_name=centrality) elif centrality == "EC": draw_centrality_headmap(G=network_object[0], pos=pos, topology=topology, centrality=nx.eigenvector_centrality( network_object[0]), centrality_name=centrality) elif centrality == "KATZ": draw_centrality_headmap(G=network_object[0], pos=pos, topology=topology, centrality=nx.katz_centrality( network_object[0]), centrality_name=centrality) elif centrality == "PR": draw_centrality_headmap(G=network_object[0], pos=pos, topology=topology, centrality=nx.pagerank(network_object[0]), centrality_name=centrality)
def main(user_num, topology, isreal): network_object = [] if isreal == "True": if topology == "karate": os.chdir(os.path.dirname(os.path.abspath(__file__))) G = nx.read_edgelist("../dataset/karate.edgelist") network_object.append(G) elif topology == "lesmis": os.chdir(os.path.dirname(os.path.abspath(__file__))) G = nx.readwrite.gml.read_gml("../dataset/lesmis.gml") network_object.append(G) elif topology == "facebook": os.chdir(os.path.dirname(os.path.abspath(__file__))) G = nx.read_edgelist("../dataset/facebook_combined.txt", nodetype=int) network_object.append(G) elif topology == "twitter": os.chdir(os.path.dirname(os.path.abspath(__file__))) G = nx.read_edgelist("../dataset/facebook_combined.txt", nodetype=int) network_object.append(G) else: raise NameError( "{} cannot be used because specified topology is artificial network" .format(topology)) elif isreal == "False": if topology == "scale_free": G = nx.barabasi_albert_graph(n=user_num, m=12, seed=0) network_object.append(G) elif topology == "small_world": G = nx.newman_watts_strogatz_graph(n=user_num, k=12, p=0.20, seed=0) network_object.append(G) elif topology == "random_network": G = nx.fast_gnp_random_graph(n=user_num, p=0.20, seed=0) network_object.append(G) elif topology == "lfr_benchmark": G = LFR_benchmark_graph(n=user_num, tau1=3, tau2=1.5, mu=0.1, average_degree=12, min_community=20, seed=0) network_object.append(G) else: raise NameError( "{} cannot be used because specified topology is real-world network" .format(topology)) print("{} infomation {}".format(topology, nx.info(network_object[0]))) print("shortest path length {}".format( nx.average_shortest_path_length(network_object[0]))) print("clustering coefficient {}".format( nx.average_clustering(network_object[0]))) plt.subplot(211) plt.title("{} degree distribution".format(topology)) plt.xlabel("degree") plt.ylabel("the number of nodes ") plt.plot(nx.degree_histogram(network_object[0])) if topology == "scale_free": plt.subplot(212) plt.xscale("log") plt.yscale("log") plt.grid("both") plt.xlabel("log-scale degree") plt.ylabel("log-scale the number of nodes ") plt.plot(nx.degree_histogram(network_object[0])) plt.show()
def test_mu_too_small(): n = 100 tau1 = 2 tau2 = 2 mu = -1 LFR_benchmark_graph(n, tau1, tau2, mu, min_degree=2)
sweeping_results = defaultdict(list) # ntask = 51 # SLURM_PROCID starts from 0 to 50. SLURM_PROCID = int(argv[1]) rho = rhos[SLURM_PROCID] num_seeds = int(rho * num_nodes) for u in us: for i in range(num_runs): G = LFR_benchmark_graph(num_nodes, tau1, tau2, u, average_degree=avg_degree, max_degree=max_degree, seed=10) seeds = random.sample(G.nodes(), num_seeds) activation_time, step = simulation_ltm(G, thsh, seeds, frac=f) activation_per_step = [0] * step for node in activation_time: atime = activation_time[node] activation_per_step[atime] += 1 sweeping_results[u].append( (len(activation_time), step, activation_per_step)) Data_Root = '/auto/rcf-proj/ef/hp_273/Diffusion/LFR/per_step_data/' with open(
from networkx.algorithms.community import LFR_benchmark_graph import pickle num_vertices = 1000 LFR_graph = LFR_benchmark_graph(num_vertices, 2, 1.5, 0.8, average_degree = 20, min_degree = None, max_degree = 100, min_community = 10, max_community = None, tol = 1e-07, max_iters = 500, seed = None) filename = "graph_mu_8" outfile = open(filename, 'wb') pickle.dump(LFR_graph, outfile) outfile.close()