def loadNwU(dsName, path, cd, wccOnly, revEdges, undir): print(" Opening " + dsName + " and loading graph... ") t1 = time.clock() fh = open(path + dsName, "rb") if undir: if cd: prodNet = nx.read_edgelist(fh, delimiter=",") else: prodNet = nx.read_edgelist(fh) # prodNet = prodNet.to_directed() else: if cd: prodNet = nx.read_edgelist(fh, delimiter=",", create_using=nx.DiGraph()) else: prodNet = nx.read_edgelist(fh, create_using=nx.DiGraph()) fh.close() if wccOnly: prodNet = nx.algorithms.weakly_connected.weakly_connected_component_subgraphs(prodNet)[0] prodNet.remove_edges_from(prodNet.selfloop_edges()) if revEdges: prodNet.reverse(False) numNodes = str(prodNet.__len__()) numEdges = str(prodNet.size()) t2 = time.clock() print(" -> graph loaded: " + numNodes + " nodes, " + numEdges + " edges (" + str(t2 - t1) + " sec).") return prodNet
def gen_random_graphs(seed, db): print "generating random graph with seed " + str(seed) directory = db.get_rnd_graphs_path() if not path.exists(directory): makedirs(directory) filename = db.get_rnd_graph_full_name(str(seed), str(db.get_final_time())) if(path.exists(filename)): print "random graph with seed " + str(seed) + " already exists! Skipping..." return pathD = db.get_graphs_path() filename = pathD + db.get_windowed_graph_name(0) G=nx.read_edgelist(filename, nodetype = int, data=(('weight',float),)) GR = get_random_graph_from(G, seed) save_random_graph(GR,1, db) for i in range(2,db.get_final_time()+1): filename = pathD + db.get_windowed_graph_name(str(i)) if(not path.exists(filename)): f = open(filename,'w') f.close() G=nx.read_edgelist(filename, nodetype = int, data=(('weight',float),)) GRnew = get_random_graph_from(G, seed) GR.graph['nmerges'] = i-2 GR = merge_temporal_graphs(GR, GRnew) GR = compute_edge_features(GR) save_random_graph(GR,i, db) print("G_RND[" + str(i) + "] has " + str(GR.number_of_edges()) + " edges")
def k_obfuscation_measure(before_file, after_file, n_nodes, k_arr, data=True): print "n_nodes =", n_nodes # before_file bG = nx.read_edgelist(before_file, '#', '\t', None, nodetype=int) print "read bG - DONE" # if bG.number_of_nodes() < n_nodes: # bG.add_nodes_from(range(n_nodes)) # only for er_100k # Case 1 - aG = bG if after_file == before_file: # after_file is before_file for e in bG.edges_iter(): bG[e[0]][e[1]]['p'] = 1.0 return compute_eps_multi(bG, bG, k_arr) # Case 2 - aG is a sample # after_file if data == True: aG = nx.read_edgelist(after_file, '#', '\t', None, nodetype=int, data=True) else: aG = nx.read_edgelist(after_file, '#', '\t', None, nodetype=int, data=False) # if aG.number_of_nodes() < n_nodes: # aG.add_nodes_from(range(n_nodes)) # only for the cases of KeyError ! for e in aG.edges_iter(): aG[e[0]][e[1]]['p'] = 1.0 print "read aG - DONE" return compute_eps_multi(bG, aG, k_arr)
def gen_random_graphs(seed): # create windowed random graphs for each real graph # obtain aggreggated graph # calculate features of random graph print "GENERATING RANDOM GRAPHS" day = 1 final_day = which_day(_maxtime)+1 filename = str(results_folder) + "Graphs_Data/windowed_graph_" + str(day) + str(".txt") print filename G = nx.read_edgelist(filename, nodetype = int, data = (('top',float),)) # print G GR = get_random_graph_from(G, seed) for i in range(2,final_day): day = i filename = str(results_folder) + "Graphs_Data/windowed_graph_" + str(day) + str(".txt") G = nx.read_edgelist(filename, nodetype = int, data = (('top',float),)) GRnew = get_random_graph_from(G, seed) GR.graph['nmerges'] = i - 2 GR = merge_temporal_graphs(GR, GRnew) GR = compute_edge_features(GR) save_random_graph(GR,i,seed)
def incorrectness_uncertain_from_file(before_file, after_file, sample_file, n_samples, bins): # compute sig_list_b, bucket_list_b ONCE ! start = time.clock() bG = nx.read_edgelist(before_file, '#', '\t', None, nodetype=int) # G = nx.read_edgelist(after_file, '#', '\t', None, nodetype=int, data=True) print "read bG: DONE, elapsed :", time.clock() - start h2_list = equivalence_class_H2_open(bG, None) cand_size, bin_size, sig_list_b, bucket_list_b = bucket_H2(h2_list, bins) # print "len B:", len(sig_list_b), len(bucket_list_b) # H1 score, H2 score start = time.clock() score_H1 = 0.0 score_H2 = 0.0 count = 0 for i in range(n_samples): file_name = sample_file + str(i) aG = nx.read_edgelist(file_name, '#', '\t', create_using=nx.MultiGraph(), nodetype=int, data=False) # IMPORTANT: MultiGraph # H1 sum_re_prob, re_prob_dict = incorrectness_H1(bG, aG, bins) score_H1 += sum_re_prob # H2 sum_re_prob, re_prob_dict = incorrectness_H2_open(aG, sig_list_b, bucket_list_b, bins) score_H2 += sum_re_prob print "count =", count count += 1 # score_H1 = score_H1/n_samples score_H2 = score_H2/n_samples print "compute score_H1, score_H2: DONE, elapsed :", time.clock() - start # return score_H1, score_H2
def main(): """ Pre-processing: load data, compute centrality measures, write files with node data """ print(nx.__version__) # Load network data, create storage dict, and extract main component depends=nx.read_edgelist("data/depends.csv",delimiter=",",create_using=nx.DiGraph(),nodetype=str,data=(("weight",time_from_today),)) depends.name="depends" suggests=nx.read_edgelist("data/suggests.csv",delimiter=",",create_using=nx.DiGraph(),nodetype=str,data=(("weight",time_from_today),)) suggests.name="suggests" imports=nx.read_edgelist("data/imports.csv",delimiter=",",create_using=nx.DiGraph(),nodetype=str,data=(("weight",time_from_today),)) imports.name="imports" nets_dict={"depends":depends,"suggests":suggests,"imports":imports} for k in nets_dict.keys(): main_component=nx.connected_component_subgraphs(nets_dict[k].to_undirected())[0].nodes() nets_dict[k]=nx.subgraph(nets_dict[k],main_component) # Run multiple measures on graphs and normalize weights measure_list=[nx.in_degree_centrality,nx.betweenness_centrality,nx.pagerank] for g in nets_dict.values(): multiple_measures(g,measure_list) normalize_weights(g) # Output networks in GraphML format (to store node attributes) for i in nets_dict.items(): # print(i[1].edges(data=True)) nx.write_graphml(i[1],"data/"+i[0]+"_data.graphml") print("") print("All files written with data") """Visualization:
def main(): parser = createParser() options = parser.parse_args() gtGraphNames = glob.glob("{0}/*.sim.cut".format(options.gtruth)) gtGraphs = { fn.split("/")[-1][:-8] : nx.read_edgelist(fn) for fn in gtGraphNames } print(gtGraphs) print(gtGraphNames) oGraphNames = [ "{0}/{1}.out.ppi".format(options.other, k) for k in gtGraphs.keys() ] oGraphs = { fn.split("/")[-1][:-8] : nx.read_weighted_edgelist(fn) for fn in oGraphNames } inputGraphNames = glob.glob("{0}/bZIP*.cut".format(options.other)) print(inputGraphNames) inputGraph = nx.read_edgelist(inputGraphNames[0]) print(oGraphNames) cutoff = 0.99 paranaGraph = graphWithCutoff(options.parana, 0.0) c = findSuggestedCutoff( paranaGraph, inputGraph, cutoff ) evaluation.printStats( filteredGraph(paranaGraph, inputGraph.nodes(), cutoff=c ), inputGraph ) print >>sys.stderr, "Parana 2.0 : {0}".format(getCurve(paranaGraph, inputGraph)) for gtName, gtGraph in gtGraphs.iteritems(): print(gtName) c = findSuggestedCutoff( paranaGraph, gtGraph, cutoff ) print("Parana cutoff = {0}".format(c)) print("==================") evaluation.printStats( filteredGraph(oGraphs[gtName], gtGraph.nodes()), gtGraph ) print >>sys.stderr, "Pinney et. al : {0}".format(getCurve(oGraphs[gtName], gtGraph)) evaluation.printStats( filteredGraph(paranaGraph, gtGraph.nodes(), cutoff=c ), gtGraph ) print >>sys.stderr, "Parana 2.0 : {0}".format(getCurve(paranaGraph, gtGraph)) print("\n") sys.exit(0)
def graph_properties(filename, directed=False): # Read in rec as undirected graph if directed: G=nx.read_edgelist(filename, nodetype=int, create_using=nx.DiGraph()) else: G=nx.read_edgelist(filename, nodetype=int, create_using=nx.Graph()) props = {} # Calculate number of edges props['num_edges'] = G.number_of_edges() # Calculate number of nodes props['num_nodes'] = len(G) # Calculate largest connected component largest_component = nx.connected_component_subgraphs(G)[0] props['size_largestcc'] = len(largest_component) props['proportion_in_largestcc'] = float(len(largest_component)) / len(G) # Calculate clustering coefficient props['average_clustering'] = nx.average_clustering(G) # Calculate diameter of largest connected component # props['diameter'] = nx.diameter(largest_component) return props
def calGraph(infile, mode = 1): #init Parameter inputpath = 'edge_list/' outputpath = 'network_output/' n = mode Data_G = inputpath+infile+'_'+str(n)+'.edgelist' #init Graph G = nx.read_edgelist(Data_G, create_using=nx.DiGraph()) GU = nx.read_edgelist(Data_G) #basci info print nx.info(G),'\n', nx.info(GU) average_degree = float(sum(nx.degree(G).values()))/len(G.nodes()) print 'average degree :', average_degree degree_histogram = nx.degree_histogram(G) print 'degree histogram max :', degree_histogram[1] desity = nx.density(G) print 'desity :', desity #Approximation #Centrality degree_centrality = nx.degree_centrality(G) print 'degree centrality top 10 !', sorted_dict(degree_centrality)[:2] out_degree_centrality = nx.out_degree_centrality(G) print 'out degree centrality top 10 !', sorted_dict(out_degree_centrality)[:2]
def load(self,fname): fext = (str(fname).split("."))[1] self.fname = (str(fname).split("."))[0] if self.directed_graph == False: self.G = nx.read_edgelist(path=fname) else: self.G = nx.read_edgelist(path=fname,create_using=nx.DiGraph())
def test_edgelist_integers(self): G=nx.convert_node_labels_to_integers(self.G) (fd,fname)=tempfile.mkstemp() nx.write_edgelist(G,fname) H=nx.read_edgelist(fname,nodetype=int) H2=nx.read_edgelist(fname,nodetype=int) G.remove_node(5) # isolated nodes are not written in edgelist assert_equal(sorted(H.nodes()),sorted(G.nodes())) assert_equal(sorted(H.edges()),sorted(G.edges())) os.close(fd) os.unlink(fname)
def test_edgelist_multidigraph(self): G = self.XDG (fd, fname) = tempfile.mkstemp() nx.write_edgelist(G, fname) H = nx.read_edgelist(fname, nodetype=int, create_using=nx.MultiDiGraph()) H2 = nx.read_edgelist(fname, nodetype=int, create_using=nx.MultiDiGraph()) assert_not_equal(H, H2) # they should be different graphs assert_nodes_equal(list(H), list(G)) assert_edges_equal(list(H.edges()), list(G.edges())) os.close(fd) os.unlink(fname)
def calGraph(infile, mode = 1): #init Parameter inputpath = 'edge_list/' n = mode Data_G = inputpath+infile+'_'+str(n)+'.edgelist' #init Graph G = nx.read_edgelist(Data_G, create_using=nx.DiGraph()) GU = nx.read_edgelist(Data_G) average_clustering = nx.average_clustering(GU) transitivity = nx.transitivity(G) return [average_clustering, transitivity]
def test_edgelist_graph(self): G=self.G (fd,fname)=tempfile.mkstemp() nx.write_edgelist(G,fname) H=nx.read_edgelist(fname) H2=nx.read_edgelist(fname) assert_not_equal(H,H2) # they should be different graphs G.remove_node('g') # isolated nodes are not written in edgelist assert_equal(sorted(H.nodes()),sorted(G.nodes())) assert_equal(sorted(H.edges()),sorted(G.edges())) os.close(fd) os.unlink(fname)
def test_edgelist_digraph(self): G = self.DG (fd, fname) = tempfile.mkstemp() nx.write_edgelist(G, fname) H = nx.read_edgelist(fname, create_using=nx.DiGraph()) G.remove_node('g') # isolated nodes are not written in edgelist H2 = nx.read_edgelist(fname, create_using=nx.DiGraph()) assert_not_equal(H, H2) # they should be different graphs assert_nodes_equal(list(H), list(G)) assert_edges_equal(list(H.edges()), list(G.edges())) os.close(fd) os.unlink(fname)
def comorbid_count_compare(net_dir, icd_gene_clinical, cancer_info, alterations, weighted=False): # = 'humannet.9' graph = networkx.read_edgelist(net_dir + '/network',nodetype=str) ct = neighbor_count_comorbid(graph, alterations['peak_mut'], icd_gene_clinical, cancer_info, comorbid_only = True, weighted=weighted) import os randdir = net_dir + '/rand/' randnets = os.listdir(randdir) x = scipy.zeros([len(randnets)]) for i,f in enumerate(randnets): net = networkx.read_edgelist(randdir + f, nodetype = str, data=weighted) x[i] = neighbor_count_comorbid(net, alterations['peak_mut'], icd_gene_clinical, cancer_info, comorbid_only = True, weighted = weighted) print 'comorbid_edges= ' + str(ct) + "\tngreater=" +str(sum(x >= ct)) + '\tp=' + str(sum(x >= ct)/float(len(randnets))) return ct, x
def test_read_edgelist_3(self): s = b"""\ # comment line 1 2 {'weight':2.0} # comment line 2 3 {'weight':3.0} """ bytesIO = io.BytesIO(s) G = nx.read_edgelist(bytesIO,nodetype=int,data=False) assert_equal_edges(G.edges(),[(1,2),(2,3)]) bytesIO = io.BytesIO(s) G = nx.read_edgelist(bytesIO,nodetype=int,data=True) assert_equal_edges(G.edges(data=True),[(1,2,{'weight':2.0}),(2,3,{'weight':3.0})])
def read_graph(): ''' Reads the input network in networkx. ''' if args.weighted: G = nx.read_edgelist(args.input, nodetype=int, data=(('weight',float),), create_using=nx.DiGraph()) else: G = nx.read_edgelist(args.input, nodetype=int, create_using=nx.DiGraph()) for edge in G.edges(): G[edge[0]][edge[1]]['weight'] = 1 if not args.directed: G = G.to_undirected() return G
def write_communities(graph, name_to_size): shortname = str(graph.split('/')[-1].strip('.ncol')) nxgraph = networkx.read_edgelist(graph) partition = community.best_partition(networkx.read_edgelist(graph)) count = 0 if shortname in name_to_size.keys(): for com in set(partition.values()): count = count + 1. list_nodes = [nodes for nodes in partition.keys() if partition[nodes] == com] size_com = len(list_nodes) if size_com > name_to_size[shortname]: community_subgraph = nxgraph.subgraph(list_nodes) with open("/net/data/graph-models/louvain-clusters/communities/" + shortname +"_" +str(count), 'a') as fout1: networkx.write_edgelist(community_subgraph, fout1)
def read_graph(self, nx_g): if self.is_weighted: self.G = nx.read_edgelist(nx_g, data=(('weight', float),), create_using=nx.DiGraph(), edgetype=str) else: self.G = nx.read_edgelist(nx_g, create_using=nx.DiGraph(), edgetype=str) for edge in self.G.edges(): self.G[edge[0]][edge[1]]['weight'] = 1 if not self.is_directed: self.G = self.G.to_undirected()
def main(): """ Program Driver. Parses command line arguments to determine where to store output pickle files and what networks to attack, reads in networks from the given source, runs all necessary attacks, and pickles the output for later use. """ aparse = argparse.ArgumentParser(usage="Attack a collection of networks") aparse.add_argument('--network_file', '-f', action='store', default='networks.yaml', help="Path to network config (default: ./networks.yaml)", dest='config_path') aparse.add_argument('--picklejar', '-p', action='store', default='.', help='output for pickle files (default: current directory)', ) aparse.add_argument('--update', '-u', action='store_true', help='Only run network processes for networks which have' + 'not already been analyzed.') args = aparse.parse_args() cfg = open(args.config_path, 'r') for net_attrs in yaml.safe_load_all(cfg): picklename = net_attrs["name"] + ".pickle" if args.update and picklename in os.listdir(args.picklejar): continue print "Analyzing network %s..." % net_attrs['name'] fname = net_attrs['filename'] data = [(key, eval(value)) for key, value in net_attrs['data'].items()] if net_attrs["directed"]: network = networkx.read_edgelist(fname, create_using=networkx.DiGraph(), nodetype=str, data=data).to_undirected() else: network = networkx.read_edgelist(fname, create_using=networkx.Graph(), nodetype=str, data=data) print "Network file loaded" pckl = os.path.normpath(args.picklejar+"/"+ picklename) ac.compare_to_random_networks(network, FRACS, pckl) print "Done!"
def main(): msg = "help: sigcomm graph1 graph2 cap dist edges wasted thld " \ "hops tries ttl prob" if len(sys.argv) < 12: print msg; return -1 g = nx.read_edgelist(sys.argv[1], create_using=nx.Graph()) dg = nx.DiGraph() random.seed(-1) cap = int(sys.argv[3]) dist = int(sys.argv[4]) edges = int(sys.argv[5]) wasted = int(sys.argv[6]) threshold = int(sys.argv[7]) hops = int(sys.argv[8]) tries = int(sys.argv[9]) ttl = int(sys.argv[10]) prob = float(sys.argv[11]) cap_edges(g, cap) get_followers_dist(g, dg, dist) sum_edges(g, edges) wasted_packets(g, dg, wasted) add_pseudo_edges(g, dg, threshold) find_paths(g, dg, hops, tries, ttl, prob) print >> sys.stderr, "g nodes", len(g) print >> sys.stderr, "g edges", g.size() print >> sys.stderr, "dg nodes", len(dg) print >> sys.stderr, "dg edges", dg.size()
def show(filename,title): if not os.path.isfile(filename+'.png'): FLAG = 0 x = [] y = [] reader = csv.reader(open(filename+'.vna', 'rb'),delimiter='\t') #dialect='excel-tab') for row in reader: if FLAG == 1: x.append(row[0]) y.append(row[1]) if row[0] == 'v1': FLAG = 1 with open(filename+'.csv', 'wb') as csvfile: writer = csv.writer(csvfile, delimiter = ',') for i in range(len(x)): writer.writerow([x[i]] + [y[i]]) G = nx.read_edgelist(filename+'.csv', delimiter=",",create_using = nx.Graph(), nodetype = str) plot6.Save(G,filename) else: img = mpimg.imread(filename + '.png') plt.imshow(img, interpolation='nearest') plt.axis('off') plt.suptitle("", y=0.95) plt.suptitle(title, y = 0.95) plt.show()
def get_followers_dist(g, dg, follow): if follow == -1: return -1 if len(dg) == 0: dg = nx.read_edgelist(sys.argv[2], create_using=dg) no_of_paths = 0 for u in dg.nodes(): if not g.has_node(u): print "no_source" continue for v in dg.successors(u): if u == v: continue if g.has_node(v): try: print nx.shortest_path_length(g, source=u, target=v) no_of_paths += 1 except nx.exception.NetworkXError as err: print "no_path" else: print "no_target" print >> sys.stderr, "no of paths", no_of_paths return no_of_paths
def get_graph(path): fh = open(path, 'rb') G = nx.read_edgelist(fh) fh.close() #remove posible self loops G.remove_edges_from(G.selfloop_edges()) return G
def add_pseudo_edges(g, dg, threshold): """ flawed logic, needs to be fixed """ if threshold == -1 : return -1 if len(dg) == 0: dg = nx.read_edgelist(sys.argv[2], create_using=dg) new_edges = [] for n in dg.nodes(): if not g.has_node(n): continue fw_count = {} n_dists = nx.single_source_shortest_path_length(g,n,4) followings = set(dg.successors(n)) for node, dist in n_dists.iteritems(): if dist > 2: continue for f in dg.successors(node): if f not in followings: if f in fw_count: fw_count[f] = fw_count[f] + 1 else: fw_count[f] = 1 for k,v in fw_count.iteritems(): if v >= threshold and k in n_dists and n_dists[k] <= 4: new_edges.append((n,k)) for e in new_edges: dg.add_edge(*e) print >> sys.stderr, "new edges", len(new_edges) return 0
def load_data(from_cache): if from_cache: input_filename = constants.CHARTS_FOLDER_NAME + 'by_nodes' reader = open(input_filename, 'r') by_nodes = eval(reader.read()) reader.close input_filename = constants.CHARTS_FOLDER_NAME + 'by_times' reader = open(input_filename, 'r') by_times = eval(reader.read()) reader.close else: by_nodes = {} by_times = {} utils.ensure_folder(constants.CHARTS_FOLDER_NAME) filenames = os.listdir(constants.GRAPHS_FOLDER_NAME) filenames.sort() print len(filenames) time = 0 for filename in filenames: print 'Processing: ' + filename input_filename = constants.GRAPHS_FOLDER_NAME + filename #g = nx.read_gpickle(input_filename) g = nx.read_edgelist(input_filename, create_using=nx.DiGraph()) #for wifi data #g = nx.read_edgelist(input_filename, '#', ',') for v in g.nodes(): node_state = calc_node_state(g, v) if v not in by_nodes: by_nodes[v] = {} by_nodes[v][time] = node_state if time not in by_times: by_times[time] = {} by_times[time][v] = node_state time = time+1 output_filename = constants.CHARTS_FOLDER_NAME + 'by_nodes' writer = open(output_filename, 'w') writer.write(str(by_nodes)) writer.close output_filename = constants.CHARTS_FOLDER_NAME + 'by_times' writer = open(output_filename, 'w') writer.write(str(by_times)) writer.close print len(by_times) print len(by_nodes) return by_nodes, by_times
def _read_cell_graph(self, filename, format): """Load the cell-to-cell connectivity graph from a file. Returns None if any error happens. """ cell_graph = None if filename: try: start = datetime.now() if format == "gml": cell_graph = nx.read_gml(filename) elif format == "pickle": cell_graph = nx.read_gpickle(filename) elif format == "edgelist": cell_graph = nx.read_edgelist(filename) elif format == "yaml": cell_graph = nx.read_yaml(filename) elif format == "graphml": cell_graph = cell_graph = nx.read_graphml(filename) else: print "Unrecognized format:", format end = datetime.now() delta = end - start config.BENCHMARK_LOGGER.info( "Read cell_graph from file %s of format %s in %g s" % (filename, format, delta.seconds + 1e-6 * delta.microseconds) ) except Exception, e: print e
def read_general(datadir,tolerance,minrepeats): """ Function to read datasets from files in *datadir*. Each file represents a graph for a particular timestamp. The name of the files is expected to be <timestamp>.ncol, and each line in the file represents one edge in the graph e.g. line:' 1 2 5 ' indicates there is an edge between nodes '1' and '2' with weight '5' Parameters ---------- datadir: string path to the directory containing the dataset. tolerance: float,optional For a label to be considered a dominant label, it must be within this much of the maximum value found for the quality function. The smaller it is, the fewer dominant labels there will be. minrepeats: integer The number of variations to try before returning the best partition. Returns ------- t: list an array of timestamps, each representing a snapshot of the communities. g1: networkx.Graph the last graph to be read from file. initial_label_dictionary: dictionary { node: community} A dictionary mapping nodes to community labels if it is the first snapshot, otherwise *None*. """ raw_file_list = os.listdir(datadir) timestamps = sorted([int(f.rstrip(".ncol")) for f in raw_file_list if f.endswith(".ncol")]) initial_label_dict_filename = os.path.join(datadir, 'initial_label_dict.txt') beginning = True for t in timestamps: f = str(t) + ".ncol" fpath = os.path.join(datadir,f) # if a file is empty, move on to the next timestamp if os.path.getsize(fpath) == 0: continue g1 = nx.read_edgelist(fpath, nodetype=int, data=(('weight',float),)) if beginning is True: # when called for the first time just return initial_label_dict if not os.path.exists(initial_label_dict_filename): initial_label_dict = maxQ(g1,tolerance=tolerance,minrepeats=minrepeats) with open(initial_label_dict_filename, 'w') as lf: lf.write(repr(initial_label_dict)) with open(initial_label_dict_filename, 'r') as lf: initial_label_dict = eval(lf.read()) yield (t, g1, initial_label_dict) beginning = False else: yield (t, g1, None)
def _read_celltype_graph(self, celltypes_file, format="gml"): """ Read celltype-celltype connectivity graph from file. celltypes_file -- the path of the file containing the graph. format -- format of the file. allowed values: gml, graphml, edgelist, pickle, yaml. """ start = datetime.now() celltype_graph = None try: if format == "gml": celltype_graph = nx.read_gml(celltypes_file) elif format == "edgelist": celltype_graph = nx.read_edgelist(celltypes_file) elif format == "graphml": celltype_graph = nx.read_graphml(celltypes_file) elif format == "pickle": celltype_graph = nx.read_gpickle(celltypes_file) elif format == "yaml": celltype_graph = nx.read_yaml(celltypes_file) else: print "Unrecognized format %s" % (format) except Exception, e: print e
- https://github.com/networkx/networkx/blob/master/examples/drawing/sampson_data.zip """ import zipfile from io import BytesIO as StringIO import matplotlib.pyplot as plt import networkx as nx with zipfile.ZipFile("sampson_data.zip") as zf: e1 = StringIO(zf.read("samplike1.txt")) e2 = StringIO(zf.read("samplike2.txt")) e3 = StringIO(zf.read("samplike3.txt")) G1 = nx.read_edgelist(e1, delimiter="\t") G2 = nx.read_edgelist(e2, delimiter="\t") G3 = nx.read_edgelist(e3, delimiter="\t") pos = nx.spring_layout(G3, iterations=100) plt.clf() plt.subplot(221) plt.title("samplike1") nx.draw(G1, pos, node_size=50, with_labels=False) plt.subplot(222) plt.title("samplike2") nx.draw(G2, pos, node_size=50, with_labels=False) plt.subplot(223) plt.title("samplike3") nx.draw(G3, pos, node_size=50, with_labels=False) plt.subplot(224)
def sample_graph(self, hparams, placeholders, adj, features, weights, weight_bins, s_num, node, hde, num=10, outdir=None): ''' Args : num - int 10 number of edges to be sampled outdir - string output dir ''' list_edges = [] for i in range(self.n): for j in range(i + 1, self.n): list_edges.append((i, j, 1)) list_edges.append((i, j, 2)) list_edges.append((i, j, 3)) # list_edges.append((-1, -1, 0)) list_weight = [1, 2, 3] hparams.sample = True eps = np.random.randn(self.n, self.z_dim, 1) with open(hparams.z_dir + 'test_prior_' + str(s_num) + '.txt', 'a') as f: for z_i in eps: f.write('[' + ','.join([str(el[0]) for el in z_i]) + ']\n') feed_dict = construct_feed_dict(hparams.learning_rate, hparams.dropout_rate, self.k, self.n, self.d, hparams.decay_rate, placeholders) feed_dict.update({self.adj: adj[0]}) feed_dict.update({self.features: features[0]}) feed_dict.update({self.weight_bin: weight_bins[0]}) feed_dict.update({self.weight: weights[0]}) feed_dict.update({self.input_data: np.zeros([self.k, self.n, self.d])}) feed_dict.update({self.eps: eps}) prob, ll, z_encoded, kl, sample_mu, sample_sigma, loss, w_edge, labels = self.sess.run( [ self.prob, self.ll, self.z_encoded, self.kl, self.enc_mu, self.enc_sigma, self.cost, self.w_edge, self.label ], feed_dict=feed_dict) prob = np.reshape(prob, (self.n, self.n)) w_edge = np.reshape(w_edge, (self.n, self.n, self.bin_dim)) indicator = np.ones([self.n, 3]) p, list_edges, w_new = normalise(prob, w_edge, self.n, self.bin_dim, [], list_edges, indicator) if not hparams.mask_weight: trial = 0 while trial < 5000: candidate_edges = [ list_edges[i] for i in np.random.choice(range( len(list_edges)), [hparams.edges], p=p, replace=False) ] with open(hparams.sample_file + 'test.txt', 'w') as f: for (u, v, w) in candidate_edges: if (u >= 0 and v >= 0): f.write( str(u) + ' ' + str(v) + ' {\'weight\':' + str(w) + '}\n') f = open(hparams.sample_file + 'test.txt') G = nx.read_edgelist(f, nodetype=int) if nx.is_connected(G): for (u, v, w) in candidate_edges: if (u >= 0 and v >= 0): with open( hparams.sample_file + "approach_2_" + str(trial) + "_" + str(s_num) + '.txt', 'a') as f: f.write( str(u) + ' ' + str(v) + ' {\'weight\':' + str(w) + '}\n') trial += 1 else: trial = 0 while trial < 5000: candidate_edges = self.get_masked_candidate( list_edges, prob, w_edge, hparams.edges, hde) # print("Debug candidate", candidate_edges) if len(candidate_edges) > 0: with open(hparams.sample_file + 'test.txt', 'w') as f: for uvw in candidate_edges.split(): [u, v, w] = uvw.split("-") u = int(u) v = int(v) w = int(w) if (u >= 0 and v >= 0): f.write( str(u) + ' ' + str(v) + ' {\'weight\':' + str(w) + '}\n') f = open(hparams.sample_file + 'test.txt') # try: G = nx.read_edgelist(f, nodetype=int) # except: # continue if nx.is_connected(G): for uvw in candidate_edges.split(): [u, v, w] = uvw.split("-") u = int(u) v = int(v) w = int(w) if (u >= 0 and v >= 0): with open( hparams.sample_file + "approach_2_" + str(trial) + "_" + str(s_num) + '.txt', 'a') as f: f.write( str(u) + ' ' + str(v) + ' {\'weight\':' + str(w) + '}\n') trial += 1
import networkx as nx import sys g = nx.read_edgelist("/fast-data/patentmark/triplets.tsv", )
def create_graph_from_file(filename): print("----------------build graph--------------------") f = open(filename, "rb") g = nx.read_edgelist(f) return g
# Output OUTPUT_EVENT = sys.argv.pop() OUTPUT = sys.argv.pop() if len(sys.argv) == 12: isolatable_node_type = sys.pop() # Load data logging.debug("Loading data") filename, file_extension = os.path.splitext(edgelist) print(filename, file_extension) if file_extension == ".gexf": # when a node has attributes G = nx.read_gexf(edgelist, node_type=int) elif file_extension == ".edgelist": # when a node does not have attrbutes G = nx.read_edgelist(edgelist, nodetype=int) else: raise ValueError("The input graph should be saved in .edgelist or .gexf format") logs = pd.read_csv(sim_log_data) # # Preprocess # logging.debug("Construct the transmission tree from the log") logs["id"] = "id" tree_list = utils.construct_transmission_tree(logs) logging.debug("Set onset time") for tid, tree in enumerate(tree_list): tree_list[tid] = utils.set_onset_time(tree, time_lag_for_isolation)
for c, idx in color_idx.items(): plt.scatter(node_pos[idx, 0], node_pos[idx, 1], label=c) # c=node_colors) plt.legend() plt.show() if __name__ == "__main__": # G = nx.read_edgelist('test.edge_list.txt', create_using=nx.DiGraph(), nodetype=None, # data=[('weight', int)]) G = nx.read_edgelist( '../data/ETH/Phishing node classification/TransEdgelist.txt', create_using=nx.MultiDiGraph(), delimiter=',', nodetype=None, data=[('amount', float), ('weight', int)]) model = Struc2Vec( G, 10, 80, workers=4, verbose=40, ) model.train() embeddings = model.get_embeddings() evaluate_embeddings(embeddings) plot_embeddings(embeddings)
def __init__(self, input_filename): infile = open(input_filename, 'r') self.G = nx.read_edgelist(infile, nodetype=int, data=(('weight', float), )) print("successfully loaded graph from file: " + input_filename)
import networkx as nx G = nx.read_edgelist("../dataset/Email-Enron.txt") communities = sorted()
start_id = int(sys.argv[4]) if len(sys.argv) == 6: p = float(sys.argv[3]) n_samples = int(sys.argv[4]) start_id = int(sys.argv[5]) print "file_name =", file_name print "n_samples =", n_samples print "start_id =", start_id print "t =", t print "alpha =", alpha if len(sys.argv) == 6: print "p =", p G = nx.read_edgelist( "../data/" + file_name + ".gr", '#', '\t', None, nodetype=int ) # implicitly remove duplicate edges (i.e. no multiple edges), use type 'int' instead of string # G = nx.read_edgelist(file_name, '#', ' ', None, nodetype=int) print "#nodes :", G.number_of_nodes() print "#edges :", len(G.edges()) print "#self-loops :", G.number_of_selfloops() print "#components :", len(nx.connected_components(G)) n_nodes = G.number_of_nodes() deg_list = nx.degree(G) # dict[node] = deg min_deg = min(deg_list.itervalues()) max_deg = max(deg_list.itervalues()) print "min-deg =", min_deg print "max-deg =", max_deg # TEST random_walk_transform()
for edge_index in a_edge_index: arr_tmp.append([ fund_index, len(list_funds) + edge_index, weight_matrix_total[fund_index, edge_index] ]) arr_tmp = np.array(arr_tmp) pd_tmp = pd.DataFrame(arr_tmp) pd_tmp[0] = pd_tmp[0].astype(int) pd_tmp[1] = pd_tmp[1].astype(int) output_name = 'fund' path = data_dir + 'graph/{}.csv'.format(output_name) pd_tmp.to_csv(path, index=False, sep=' ') nx_G = nx.read_edgelist(path, nodetype=int, data=(('weight', float), ), create_using=nx.DiGraph()) nx_G = nx_G.to_undirected() G = graph.Graph(nx_G, False, 1, 1) G.preprocess_transition_probs() walks = G.simulate_walks(200, 200) walks = [list(map(str, walk)) for walk in walks] from gensim.models import Word2Vec model = Word2Vec(walks, size=32, window=6, min_count=0, sg=1,
return r1, r2 Iteration = 10000 ##迭代次数 P = [ 1, 0.95, 0.9, 0.85, 0.8, 0.75, 0.7, 0.65, 0.6, 0.55, 0.5, 0.45, 0.4, 0.35, 0.3, 0.25, 0.2, 0.15, 0.1, 0.05, 0 ] assortivity = np.zeros((21, 1)) average_path = np.zeros((21, 1)) count = 0 for i in P: print(i) for j in range(10): G = nx.read_edgelist('datasets/network_average_path.txt', nodetype=int) r1, r2 = calculate_ass(G, i, Iteration) print(r1, r2) assortivity[count] = assortivity[count] + r1 average_path[count] = average_path[count] + r2 assortivity[count] = assortivity[count] / 10 average_path[count] = average_path[count] / 10 count = count + 1 fid = open('results/average_path_ass.txt', 'w') for i in range(len(P)): fid.write(str(assortivity[i]) + ' ' + str(average_path[i]) + '\n') fid.close() plt.figure(figsize=(16, 16)) plt.style.use('ggplot') plt.semilogx(assortivity, average_path, 'o-', label='$r_1$')
color_idx = {} for i in range(len(X)): color_idx.setdefault(Y[i][0], []) color_idx[Y[i][0]].append(i) for c, idx in color_idx.items(): plt.scatter(node_pos[idx, 0], node_pos[idx, 1], label=c) # c=node_colors) plt.legend() plt.show() if __name__ == "__main__": G = nx.read_edgelist('../data/flight/usa-airports.edgelist', create_using=nx.DiGraph(), nodetype=None, data=[('weight', int)]) model = Node2Vec(G, 10, 80, workers=1, p=0.25, q=2) model.train() embeddings = model.get_embeddings() evaluate_embeddings(embeddings) plot_embeddings(embeddings)
# 3rd: cluster cores for candidate in candidates: self.cluster_core(candidate) print '3. after cluster core, disjoint set - parent dict:', dict( zip(range(self.n), self.disjoint_set.parent)) # 4th: cluster non-core self.cluster_non_core() print '4. after cluster non-core mark cluster id, cluster(represented by root vertex), min ele id:', dict( filter(lambda pair: pair[1] != self.n, zip(range(self.n), self.cluster_dict))) # finally, output result print '\nfinal result in format:', ' '.join([ 'core/non-core', 'vertex id', 'cluster id(min core vertex id in this cluster)' ]) self.result_lines.append('c/n vertex_id cluster_id') print 'c/n vertex_id cluster_id' self.output_result() if __name__ == '__main__': graph = nx.read_edgelist('demo_input_graph.txt', nodetype=int) offset_lst, dst_v_lst, deg_lst = to_csr_graph(graph) print 'csr representation:\noffset_lst=', offset_lst, '\ndst_v_lst=', dst_v_lst, '\ndeg_lst=', deg_lst, '\n' pscan_algo = PScan(offset_lst, dst_v_lst, deg_lst, eps=0.6, min_pts=3) pscan_algo.run_algorithm()
''' Ganesh Prasad - 2018csm1008 Rakesh meena - 2018csm1017 Jeevan Kumar - 2018csm1012 Problem: Given a network of friendships, we have to find the nodes(persons) with most and least friendly neighbourhood. Solution: We take a node and find its adjacent nodes(a list say j) which are also impressed by the node. Now for every node in j we get a total number of mutually impressed friends, which after dividing by len(j) we arrived at an average treated as a factor to decide whose neighbourhood is friendly or not.''' import networkx as nx import matplotlib.pyplot as plt G = nx.read_edgelist(r"pagerank.txt", create_using=nx.DiGraph(), nodetype=int) nx.draw(G, with_labels=True) plt.show() # Find total Number of mutually impressed friends of a node def num_of_mutually_impressed_nodes(G, node): list_successors = G.successors(node) total_num = 0 for successor_node in list_successors: if (G.has_edge(successor_node, node)): total_num = total_num + 1 return total_num # Returns a list of mutually impressed friends for a node def mutually_impressed_nodes(G, node): list_successors = G.successors(node)
import networkx as nx import numpy G = nx.read_edgelist("facebook_combined.txt") n = list(G.nodes()) spll = [] for u in n: for v in n: if u != v: l = nx.shortest_path_length(G, u, v) print("Shortest path between ", u, " and ", v, " is of lenth ", l) spll.append(l) min_spl = min(spll) max_spl = max(spll) avg_spl = numpy.average(spll) print("Minimum shortest path length : ", min_spl) print("Maximum shortest path length : ", max_spl) print("Average shortest path length : ", avg_spl)
import networkx as nx from itertools import chain #this script is meant to remove all the pages with fewer than 5 incoming links #noise reduction process G = nx.read_edgelist('../datasets/dbpedia_resources_wiki.edgelist', nodetype=int, create_using=nx.DiGraph()) print 'read graph' remove_nodes_out = (node for node, degree in G.out_degree().iteritems() if degree == 0) #nodes with out_degree = 0 remove_nodes_in = (node for node, degree in G.in_degree().iteritems() if degree == 0) #nodes with in_degree = 0 remove_nodes = chain(remove_nodes_out, remove_nodes_in) G.remove_nodes_from(remove_nodes) print 'graph has %d nodes and %d edges' % (len(G.nodes()), len(G.edges())) print "writing graph" nx.write_edgelist( G, '../graph/dbpedia_resources_wiki_reduced_1_out_1_in.edgelist', data=False)
import collections import numpy as np import pandas as pd from networkx import bipartite ################### #Makes degree histogram def grapher(G): degree_sequence = sorted([d for n, d in G.degree()], reverse=True) # degree sequence # print "Degree sequence", degree_sequence degreeCount = collections.Counter(degree_sequence) deg, cnt = zip(*degreeCount.items()) fig, ax = plt.subplots() plt.bar(deg, cnt, width=0.80, color='b') plt.title("Degree Histogram") plt.ylabel("Count") plt.xlabel("Degree") # log-log option # ax.set_xscale("log") # ax.set_yscale("log") ax.set_xticks([d + 0.1 for d in deg]) ax.set_xticklabels(deg) plt.show() p = nx.read_edgelist('DCh-Miner_miner-disease-chemical.tsv')
argvs = sys.argv argc = len(argvs) if (argc < 2): print( 'Please give frovedis_server calling command as the first argument \n(e.g. "mpirun -np 2 -x /opt/nec/nosupport/frovedis/ve/bin/frovedis_server")' ) quit() FrovedisServer.initialize(argvs[1]) frov_graph = fnx.read_edgelist(DATASET, nodetype=np.int32, delimiter=' ') fres = set(fnx.bfs_edges(frov_graph, src, depth_limit=depth)) FrovedisServer.shut_down() except Exception as e: print("status=Exception: " + str(e)) sys.exit(1) #NetworkX try: nx_graph = nx.read_edgelist(DATASET, nodetype=np.int32, delimiter=' ') nres = set(nx.bfs_edges(nx_graph, src, depth_limit=depth)) except Exception as e: print("status=Exception: " + str(e)) sys.exit(1) print(fres) print(nres) if len(fres - nres) == 0: print("status=Passed") else: print("status=Failed")
[1, 0, 0, 0, 1, 0, 0, 0, 1, 0], [0, 1, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1, 0, 0, 0, 1], [0, 0, 0, 0, 0, 0, 0, 0, 1, 0] ]) G3 = nx.Graph(G_mat) print('list(G3.edges())\n',list(G3.edges())) # 简单可视化 # plt.figure() # nx.draw_networkx(G3) # plt.show() # 边列表 G4 = nx.read_edgelist('./data/G_edgelist.txt', data=[('weigth', int)]) print('list(G4.edges(data=True))\n',list(G4.edges(data=True))) # # 简单可视化 # plt.figure() # nx.draw_networkx(G4) # plt.show() # 2.4DateFrame G_df = pd.read_csv('./data/G_edgelist.txt', delim_whitespace=True, header =None, names= ['n1', 'n2', 'weight']) print('G_df\n',G_df) G5 = nx.from_pandas_dataframe(G_df,'n1','n2',edge_attr='weight') print('list(G5.edges(data=True))\n',list(G5.edges(data=True))) # 简单可视化
def label_prop(): G = nx.read_edgelist("facebook_combined.txt", create_using=nx.Graph(), nodetype=int) print nx.info(G) for i in G.nodes(): G.node[i]['label'] = i G.node[i]['ID'] = i G.node[i]['l_1'] = 0 G.node[i]['l_2'] = 0 G.node[i]['l_next'] = 0 ''' for n,nbrs in G.adjacency_iter(): for nbr,edict in nbrs.items(): if nbr==200: print n, nbrs, G.node[nbr]['label'] ''' mainStop = False i = 0 while (i < 100): if i == 99: set_communities = set() for n in G.nodes(): set_communities.add(G.node[n]['label']) print "the number of communities after 100 iterations==", len( set_communities) i += 1 mainStop = False l1_stop = True l2_stop = True for n in G.nodes(): if (not (G.node[n]['label'] == G.node[n]['l_1'])): l1_stop = False for n in G.nodes(): if (not (G.node[n]['label'] == G.node[n]['l_2'])): l2_stop = False #print l1_stop, l2_stop if (not (l1_stop or l2_stop)): #print "in not loop" for n, nbrs in G.adjacency_iter(): dict = {} dict.clear() for nbr, d in nbrs.items(): temp = G.node[nbr]['label'] if not dict.has_key(temp): dict = {temp: 1} else: dict[temp] += 1 max_key = 0 max_key = max(dict, key=dict.get) G.node[n]['l_next'] = max_key G.node[n]['l_2'] = G.node[n]['l_1'] G.node[n]['l_1'] = G.node[n]['label'] G.node[n]['label'] = max_key ''' for n in G.nodes(): G.node[n]['l_2']=G.node[n]['l_1'] G.node[n]['l_1']=G.node[n]['label'] G.node[n]['label']=G.node[n]['l_next'] ''' else: print "The Community converges" mainStop = True print i return i
# -*- coding: utf-8 -*- import random import networkx as nx import matplotlib.pyplot as plt from operator import itemgetter G2 = nx.read_edgelist('Facebook_Dataset.txt', create_using=nx.Graph(), nodetype=int) #return the friends of a user def friends(graph, user): return set(graph.neighbors(user)) #returns a list of friends of friends of a user def friends_of_friends(graph, user): x = [] for each in graph.neighbors(user): for item in graph.neighbors(each): x.append(item) return set(x) # returns a list of common friends def common_friends(graph, user1, user2): x1 = friends(graph, user1) x2 = friends(graph, user2) return set(x1 & x2)
def __init__(self): self.graph = nx.read_edgelist( 'data/1_edge_list/kaggle_numbers_bidi.edgelist', create_using=nx.DiGraph) print(len(self.graph.nodes)) print(len(self.graph.edges))
def nodeID_mapping(input_file_name, output_file_name=" ", reverse=False): if input_file_name.endswith(".edges") or input_file_name.endswith(".txt"): f = open(input_file_name, "r") g = nx.read_edgelist(f, create_using=nx.DiGraph(), nodetype=str, data=False) # print g.edges()[:10] f.close() elif input_file_name.endswith(".gpickle"): g = nx.read_gpickle(input_file_name) if output_file_name == " " or output_file_name == None: output_file_name = os.path.abspath(input_file_name).split( ".")[0] + "_index0.edges" print("write graph edges list to: %s" % output_file_name) print("Original graph: # nodes: %d, # edges: %d" % (g.number_of_nodes(), g.number_of_edges())) id_mapping = {} i2s_mapping = {} index = 0 for (u, v) in g.edges(): if u not in id_mapping: id_mapping[u] = index i2s_mapping[index] = u index += 1 if v not in id_mapping: id_mapping[v] = index i2s_mapping[index] = v index += 1 new_edges = [(id_mapping[u], id_mapping[v]) for (u, v) in g.edges()] new_g = nx.DiGraph() new_g.add_edges_from(new_edges) if reverse: print("edge reversed...") new_g.reverse(copy=False) print("New graph: # nodes: %d, # edges: %d" % (new_g.number_of_nodes(), new_g.number_of_edges())) nodes = list(new_g.nodes()) print("New graph: min(node id): %d, max(node id):%d" % (min(nodes), max(nodes))) print("is Directed Acyclic Graph: %s " % nx.is_directed_acyclic_graph(new_g)) nx.write_edgelist(new_g, output_file_name, data=False) print("# instances in mapping: %d (%d)" % (len(id_mapping), len(i2s_mapping))) mapping = {"s2i": id_mapping, "i2s": i2s_mapping} mapping_file = os.path.abspath(input_file_name).split( ".")[0] + "_id_mapping.pkl" print("id mapping file is saved: %s" % mapping_file) print("mappged graph file is saved at: %s" % output_file_name) with open(mapping_file, "wb") as f: pickle.dump(mapping, f) return output_file_name, mapping_file
import networkx as nx import sys, os, datetime sys.path.insert(1, os.path.dirname(os.path.dirname(os.path.realpath(__file__)))) from utils import io_utils # Author: Katya Gurgel # Description: a script cleaning up unweighted SNAP graphs encoded by pairs of # nodes on each line, each representing individual edges. The output is printed. # Usage: python ./snap_cleanup.py INPUT_FILE > OUTPUT_DIR/OUTPUT_FILE G = nx.read_edgelist(sys.argv[1]) print('# {} {} {}'.format(datetime.datetime.now(), os.popen('git rev-parse HEAD').read().strip(), sys.argv[1])) io_utils.print_uw_graph(G)
import networkx as nx import math import matplotlib.pyplot as plt def avg_degree(A): degree = A.degree() Average_degree = sum(degree.values()) / float(len(A)) return Average_degree S = nx.Graph() G = nx.read_edgelist("edges.txt", delimiter=",") k = avg_degree(G) print("The average degree of original graph is \t", k) #propabilty of edge creation p = k / (G.number_of_nodes() - 1) S.add_edges_from((nx.fast_gnp_random_graph(G.number_of_nodes(), p, seed=None, directed=False)).edges()) c = avg_degree(S) avg_clusco = nx.average_clustering(S) print("Average local Clustering\t" + str(avg_clusco)) #average path length apl = math.log(S.number_of_nodes()) / float(math.log(c))
import numpy as np import networkx as nx webget.download("https://snap.standford.edu/data/twitter_combined.txt.gz") with gzip.open('twitter_combined.txt.gz') as f: g = nx.read_edgelist(f)
if __name__ == "__main__": # G=nx.read_edgelist('../data/wiki/Wiki_edgelist.txt', # create_using = nx.DiGraph(), nodetype = None, data = [('weight', int)]) # # # model = Node2Vec(G, walk_length=10, num_walks=80, # p=0.25, q=4, workers=1, use_rejection_sampling=0) # model.train(embed_size=64,window_size = 5, iter = 3) # embeddings=model.get_embeddings() # print(embeddings) # # evaluate_embeddings(embeddings) # plot_embeddings(embeddings) G = nx.read_edgelist('../data/text.txt', create_using=nx.DiGraph(), nodetype=None, data=[('weight', int)]) model = Node2Vec(G, walk_length=10, num_walks=80, p=0.25, q=4, workers=1, use_rejection_sampling=0) model.train(embed_size=4, window_size=5, iter=3) embeddings = model.get_embeddings() print(embeddings)
#print('dict_2:', dict_2) return dict_1 dict_1 = create_dict(hosts_list) # pos = open('pickles/dict_2.pkl','wb') # pickle.dump(dict_2, pos) # pos.close() pos = open('pickles/dict_2.pkl', 'rb') dict_2 = pickle.load(pos) pos.close() # load dataset to a directed graph G = nx.read_edgelist('dataset/edgelist.txt', delimiter='\t', create_using=nx.DiGraph()) nodes_list = nx.nodes(G) print('**Nodes list**', nodes_list) # print(len(nodes_list)) # ************************************************************************* # compute in_degree and in_degree number def in_out_degree(G, dim): num_rows = 0 out_degree_matrix = np.zeros((dim, 1)) in_degree_matrix = np.zeros((dim, 1)) for k in hosts_list: if k in nodes_list:
def main(args, jupyter=False): ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) seed = args.seed edgelist = os.path.join( ROOT_DIR, '../data/{}/train/{}_{}_seed_{}.txt'.format(args.sampling_method, args.graph, args.percent_test, seed)) if not args.bipartite: G = nx.read_edgelist(edgelist) else: bip_edges = list() A = set() B = set() for line in open(edgelist, 'r'): a, b = line.strip().split() A.add(a) B.add(b) bip_edges.append((a, b)) G = nx.Graph() G.add_nodes_from(A, bipartite=0) G.add_nodes_from(B, bipartite=1) G.add_edges_from(bip_edges) test_path = os.path.join( ROOT_DIR, '../data/{}/test/{}_{}_seed_{}.txt'.format(args.sampling_method, args.graph, args.percent_test, seed)) output_dir = os.path.join(ROOT_DIR, '../output/{}/'.format(args.sampling_method)) emb_path = os.path.join( output_dir, '{}_{}_{}_seed_{}.emb'.format(args.embedding_method, args.graph, args.percent_test, seed)) if not os.path.exists(output_dir): os.mkdir(output_dir) if not args.output_override: output_path = os.path.join( output_dir, '{}_{}_{}_{}_{}_{}_{}_k_{}.txt'.format( args.method, args.graph, args.embedding_method, args.percent_test, args.exact_search_tolerance, args.bailout_tol, seed, args.k)) else: output_path = args.output_override if args.embedding_method == 'netmf1': embeddings = NetMF(args.embedding_method, edgelist, test_path, emb_path, G, normalize=True, window_size=1) elif args.embedding_method == 'netmf2': embeddings = NetMF(args.embedding_method, edgelist, test_path, emb_path, G, normalize=True, window_size=2) elif args.embedding_method == 'bine': embeddings = BiNE(args.embedding_method, edgelist, test_path, emb_path, G, normalize=True) elif args.embedding_method == 'aa': embeddings = AA(args.embedding_method, edgelist, test_path, emb_path, G, normalize=True) if args.force_emb or not os.path.exists(emb_path): if os.path.exists(emb_path.replace('.emb', '_nodeX.npy')): os.remove(emb_path.replace('.emb', '_nodeX.npy')) embeddings.run(G) if args.method in {'lapm'}: sel = LaPMSelector(args.method, G, args.k, embeddings, output_path, seed=seed, bipartite=args.bipartite) load_embeddings = True elif args.method in {'cn'}: sel = CNSelector(args.method, G, args.k, embeddings, output_path, seed=seed, bipartite=args.bipartite) load_embeddings = False elif args.method in {'js'}: sel = JSSelector(args.method, G, args.k, embeddings, output_path, seed=seed, bipartite=args.bipartite) load_embeddings = False elif args.method in {'aa'}: sel = AASelector(args.method, G, args.k, embeddings, output_path, seed=seed, bipartite=args.bipartite) load_embeddings = False elif args.method in {'nmf+bag'}: sel = BaggingEnsemble(args.method, G, args.k, embeddings, output_path, seed=seed, bipartite=args.bipartite) load_embeddings = False elif args.method == 'LinkWaldo': num_groupings = 0 if args.DG: num_groupings += 1 if args.SG: num_groupings += 1 if args.CG: num_groupings += 1 if num_groupings > 1: if args.bailout_tol > 0.0: sel = MGBailoutSelector( args.method, G, args.k, embeddings, output_path, DG=args.DG, SG=args.SG, CG=args.CG, exact_search_tolerance=args.exact_search_tolerance, seed=seed, bipartite=args.bipartite) else: sel = MGSelector( args.method, G, args.k, embeddings, output_path, DG=args.DG, SG=args.SG, CG=args.CG, exact_search_tolerance=args.exact_search_tolerance, seed=seed, bipartite=args.bipartite) else: if args.DG and args.bailout_tol > 0.0: sel = DGBailoutSelector( args.method, G, args.k, embeddings, output_path, DG=args.DG, SG=args.SG, CG=args.CG, exact_search_tolerance=args.exact_search_tolerance, seed=seed, bipartite=args.bipartite) elif args.DG: sel = DGSelector( args.method, G, args.k, embeddings, output_path, DG=args.DG, SG=args.SG, CG=args.CG, exact_search_tolerance=args.exact_search_tolerance, seed=seed, bipartite=args.bipartite) elif args.SG: sel = SGSelector( args.method, G, args.k, embeddings, output_path, DG=args.DG, SG=args.SG, CG=args.CG, exact_search_tolerance=args.exact_search_tolerance, seed=seed, bipartite=args.bipartite) elif args.CG: sel = CGSelector( args.method, G, args.k, embeddings, output_path, DG=args.DG, SG=args.SG, CG=args.CG, exact_search_tolerance=args.exact_search_tolerance, seed=seed, bipartite=args.bipartite) load_embeddings = True sel.num_groups = args.num_groups sel.num_groups_alt = args.num_groups_alt sel.bailout_tol = args.bailout_tol sel.bag_epsilon = args.bag_epsilon sel.skip_output = args.skip_output embeddings.load_data(load_embeddings=load_embeddings) if jupyter: return sel _time = sel.select() sel.write_res(_time) if not args.skip_output: sel.write()
# Do the same process to compute a training subset from within the test graph edge_splitter_train = EdgeSplitter(graph_test, graph) graph_train, examples, labels = edge_splitter_train.train_test_split( p=0.1, method="global" ) ( examples_train, examples_model_selection, labels_train, labels_model_selection, ) = train_test_split(examples, labels, train_size=0.75, test_size=0.25) print(graph_train.info())''' G_test_nx = nx.read_edgelist('val_positive.txt') nodes = G_test_nx.nodes() feature_vector = [1,1,1,1,1] node_data = pd.DataFrame( [feature_vector for i in range(len(nodes))], index=[node for node in nodes]) graph_test = sg.StellarGraph.from_networkx(G_test_nx, node_features=node_data) G_test_neg = nx.read_edgelist('val_negative.txt') edges_test_neg = G_test_neg.edges() G_train_nx = nx.read_edgelist('training.txt') nodes_train = G_train_nx.nodes() feature_vector = [1,1,1,1,1] node_data = pd.DataFrame( [feature_vector for i in range(len(nodes_train))],