def generate_graph(n, beta, mean_degree): """ Test Graph generation """ G = nx.empty_graph(n) degreeArray = utils.degreeDistribution(beta, n, mean_degree) utils.randPairings(G, degreeArray) # output of the RGG if not os.path.exists('generated'): os.mkdir('generated') txtName = "generated/adj-%s-%s-%s-.txt" % (str(n), str(beta), str(mean_degree)) nx.write_adjlist(G, txtName) # plotting utils.drawDegreeHistogram(G) if n < 1000: utils.drawGraph(G) pngname = "generated/graph-%s-%s-%s-.png" % (str(n), str(beta), str(mean_degree)) plt.savefig(pngname) if not os.path.exists('feed'): os.mkdir('feed') utils.generateFeed(n)
def gnp_survey_benchmark(path, n_array, p_array): with open(path, 'w', newline='\n') as f: w = csv.writer(f) w.writerow(['n', 'p = 0.2', 'p = 0.4', 'p = 0.5', 'p = 0.6', 'p = 0.8']) for n in n_array: list = [str(n)] for p in p_array: print("Starting trial n = " + str(n) + ", p = " + str(p) + ".") G = nx.gnp_random_graph(n, p) for (i, j) in G.edges_iter(): G[i][j]['weight'] = 1 print("Calling GH algorithm") ############ TIMING ############ start = time.time() gh = gomory_hu_tree(G) end = time.time() t = end-start ########## END TIMING ########## list.append(t) print("Completed trial n = " + str(n) + ", p = " + str(p) + ".") print("Time = " + str(t)) nx.write_adjlist(G, 'Graph(' + str(n) + "," + str(p) + ").csv") nx.write_adjlist(gh, 'Gomory_Hu(' + str(n) + "," + str(p) + ").csv") print() w.writerow(list)
def construct_RankTh(fCorr, ffMRI): # # a function to generate rank-based thresholding networks # # # some parameters Target_d = [3, 4, 5, 6, 8, 10, 15, 20, 30] # Output directory is relative to fCorr directory CorrDir, fCorrMat = os.path.split(fCorr) BaseDir, CorrDirName = os.path.split(CorrDir) OutBase = os.path.join(BaseDir, 'Adjlist') if not os.path.exists(OutBase): os.makedirs(OutBase) OutDir = os.path.join(OutBase, 'Network_RankTh') if not os.path.exists(OutDir): os.makedirs(OutDir) # loading the correlation matrix R, NodeInd = NetUtil.load_corrmat_sparse(fCorr, ffMRI) # loop for generating rank-th networks for iTh in range(len(Target_d)): print "Generating a network with threshold d=" + str(Target_d[iTh]) # generating the network if iTh==0: G, trR = NetUtil.net_builder_RankTh(R, NodeInd, Target_d[iTh]) R = [] # releasing the memory else: # just generate the difference between the previous threshold. # then combine the resulting graphs deltaG, trR = NetUtil.net_builder_RankTh(trR, NodeInd, Target_d[iTh]-Target_d[iTh-1]) G = nx.compose(G, deltaG) # saving the network fNetFile = "Network_d" + str(Target_d[iTh]) + ".adjlist" fNet = os.path.join(OutDir,fNetFile) nx.write_adjlist(G, fNet)
def construct_HardTh(fCorr, ffMRI): # # a function to generate hard thresholding networks # # # some parameters Target_K = [10, 20, 30, 40, 50] # Output directory is relative to fCorr directory CorrDir, fCorrMat = os.path.split(fCorr) BaseDir, CorrDirName = os.path.split(CorrDir) OutBase = os.path.join(BaseDir, 'Adjlist') if not os.path.exists(OutBase): os.makedirs(OutBase) OutDir = os.path.join(OutBase, 'Network_HardTh') if not os.path.exists(OutDir): os.makedirs(OutDir) # loading the correlation matrix R, NodeInd = NetUtil.load_corrmat_sparse(fCorr, ffMRI) # loop for generating hard-th networks for K in Target_K: print "Generating a network with threshold <k>=" + str(K) # generating the network G, RTh = NetUtil.net_builder_HardTh(R, NodeInd, K) # saving the network fNetFile = "Network_K" + str(K) + ".adjlist" fNet = os.path.join(OutDir,fNetFile) nx.write_adjlist(G, fNet)
def build_graph_for_all_hosts(in_path_to_outbreaks_root, in_output_path, k, all_files, all_hosts): all_kmers_str = [] preprocessing_dir = in_output_path + str(k) + '/preprocessing/' if os.path.isfile(in_output_path + str(k) + '/preprocessing/all_kmers_k' + str(k) + '.npy'): print('K-mers alredy available at ' + in_output_path + str(k) + '/preprocessing/all_kmers_k' + str(k) + '.npy') all_kmers_np = np.load(in_output_path + str(k) + '/preprocessing/all_kmers_k' + str(k) + '.npy') all_kmers_str = all_kmers_np.tolist() else: if not os.path.exists(preprocessing_dir): os.makedirs(preprocessing_dir) print("Getting kmers for all files...") kmers_start_time = current_milli_time() kmers_files = {} if os.path.isfile(in_output_path + str(k) + '/all_files_kmers_dictionary.pkl'): print("Kmers for all files are already available. Reading file...") kmers_files = load_obj(in_output_path + str(k) + '/all_files_kmers_dictionary') print("Done.") else: __kmers = get_kmers_for_all_files(all_files, all_hosts, k) kmers_files = __kmers[0] print("Done.") print("Saving kmers for all files to disk...") save_obj(kmers_files, in_output_path + str(k) + '/all_files_kmers_dictionary') print("Done.") all_kmers = __kmers[1] print("Getting kmers for all files... Done. Took %d seconds." % (current_milli_time() - kmers_start_time)) print("Total number of kmers for k = %d is %d." %(k, len(all_kmers))) all_kmers_str = [str(kmer) for kmer in all_kmers] print("Done getting kmers for all files...") np.save(preprocessing_dir + '/all_kmers_k' + str(k) + '.npy', np.array(all_kmers_str)) distances = [] dbg_node_labels = [] if os.path.isfile(preprocessing_dir + '/all_distances_k' + str(k) + '.npy'): print(preprocessing_dir + '/all_distances_k' + str(k) + '.npy') distances = np.load(preprocessing_dir + '/all_distances_k' + str(k) + '.npy') print('Nodes alredy available at ' + preprocessing_dir + '/all_distances_k' + str(k) + '.npy') dbg_node_labels_np = np.load(preprocessing_dir + '/all_distances_k' + str(k) + '.npy') dbg_node_labels = dbg_node_labels_np.tolist() else: dbg = lambda: None if os.path.isfile(in_output_path + str(k) + '/graph_' + str(k) + '.adjlist'): print('De Bruijn graph already available. Reading from disk...') G = nx.read_adjlist(in_output_path + str(k) + '/graph_' + str(k) + '.adjlist') dbg.G = G print('Done.') else: print("De Bruijn graph not available. Building...") graph_start_time = current_milli_time() dbg = DeBruijnGraph(all_kmers_str, k + 1) nx.write_adjlist(dbg.G,in_output_path + str(k) + '/graph_' + str(k) + '.adjlist') print("Building De Bruijn graph... Done. Took %d seconds." % (current_milli_time() - graph_start_time)) return(dbg.G)
def preprocess_metapath(adjM, type_mask, expected_metapaths, dataoutput, i): # adjM, type_mask, expected_metapaths, dataoutput, i = input[0] pathlib.Path(os.path.join(dataoutput, '{}'.format(i))).mkdir(parents=True, exist_ok=True) specific_metapath_dict_list = \ utils.preprocess.get_metapath_neighbor_pairs(adj_matrix=adjM, type_mask=type_mask, expected_metapath_list=expected_metapaths[i]) g_list = utils.preprocess.get_networkx_graph( specific_metapath_dict_list=specific_metapath_dict_list, type_mask=type_mask, start_node_type_index=i) for g, metapath in zip(g_list, expected_metapaths[i]): nx.write_adjlist( g, dataoutput + "{}/".format(i) + "-".join(map(str, metapath)) + ".adjlist") specific_metapath_array_list = \ utils.preprocess.get_edge_metapath_idx_array(specific_metapath_dict_list=specific_metapath_dict_list) for metapath, specific_metapath_array in zip(expected_metapaths[i], specific_metapath_array_list): specific_metapath_array_npy_path = \ os.path.join(dataoutput, "{}/".format(i), "-".join(map(str, metapath)) + "_idx.npy") np.save(specific_metapath_array_npy_path, specific_metapath_array) pass
def generate_random_graph(): """ generates random graphs: external loop - number of repeated generations; inner loop-number of different probabilities :return: saves generated graphs to json and as networkx adjlist """ for i in range(1, 100): for prob in range(1, 10): prob = float(prob / float(nodes * 10)) # den=nodes*10 G = nx.fast_gnp_random_graph(nodes, prob).to_undirected() res = {} try: res = nx.to_dict_of_lists(G) except TypeError: # Python 3.x sys.stdout("Error") size = len(list(nx.bridges(G))) # number of bridges file_name_adj = "random_{}_{}_{}.adj_list".format( nodes, prob, size) file_name_json = "random_{}_{}_{}.json".format(nodes, prob, size) file_path_adj = os.path.join(os.getcwd(), '..', 'res', file_name_adj) file_path_json = os.path.join(os.getcwd(), '..', 'res', file_name_json) # writing to .json with open(file_path_json, "w") as f: json.dump(res, f, indent=4) # writing to .adjlist fh = open(file_path_adj, 'wb+') nx.write_adjlist(G, fh)
def construct_de_bruijn_velvet(kmers, draw, outfile): #make list of k-1mers for quick edge construction k1mers = [x[:-1] for x in kmers.keys()] k1mers_array = np.array(k1mers) #find overlaps edge_list = [] for kmer in kmers.keys(): matches = np.where(k1mers_array == kmer[1:]) for match in matches[0]: #print match edge_list.append((kmer, kmers.keys()[match])) #make graph G = nx.DiGraph() #add seq_kmers as nodes and overlaps as edges for kmer in kmers.items(): G.add_node(kmer[0], num=kmer[1]) G.add_edges_from(edge_list) # draw the graph if desired if draw == "True": nx.draw_spring(G) plt.show() #output adjacency list format of the graph if desired if outfile != "": nx.write_adjlist(G, outfile) return G
def construct_HardThE(fCorr, ffMRI): # # a function to generate hard thresholding networks with the same number # of edges as rank-thresholded networks. # # # some parameters Target_d = [3, 4, 5, 6, 8, 10, 15, 20, 30] # Output directory is relative to fCorr directory CorrDir, fCorrMat = os.path.split(fCorr) BaseDir, CorrDirName = os.path.split(CorrDir) OutBase = os.path.join(BaseDir, 'Adjlist') if not os.path.exists(OutBase): os.makedirs(OutBase) OutDir = os.path.join(OutBase, 'Network_HardThE') if not os.path.exists(OutDir): os.makedirs(OutDir) # directory where rank-th networks are RankDir = os.path.join(OutBase, 'Network_RankTh') # loading the correlation matrix R, NodeInd = NetUtil.load_corrmat_sparse(fCorr, ffMRI) # loop for generating hard-th networks for d in Target_d: print "Generating an equivalent hard thresholded network with d=" + str(d) # loading the rank thresholded network to determine the number of edges fdNetFile = "Network_d" + str(d) + ".adjlist" fdNet = os.path.join(RankDir,fdNetFile) tmpG = nx.read_adjlist(fdNet) E = len(tmpG.edges()) # generating the network G, RTh = NetUtil.net_builder_HardThE(R, NodeInd, E) # saving the network fNetFile = "Network_EQd" + str(d) + ".adjlist" fNet = os.path.join(OutDir,fNetFile) nx.write_adjlist(G, fNet)
def write_graph(graph, filename, file_type=None): if not file_type: file_type = get_graph_type(filename) if not file_type: raise RuntimeError("Unable to determine graph file type.") if file_type == "adjlist": networkx.write_adjlist(graph, filename) elif file_type == "edgelist": networkx.write_edgelist(graph, filename) elif file_type == "gexf": networkx.write_gexf(graph, filename) elif file_type == "gml": networkx.write_gml(graph, filename) elif file_type == "gpickle": networkx.write_gpickle(graph, filename) elif file_type == "graphml": networkx.write_graphml(graph, filename) elif file_type == "yaml": networkx.write_yaml(graph, filename) elif file_type == "pajek" or file_type == "net": networkx.write_pajek(graph, filename) elif file_type == "adjmat": #sparse_matrix = networkx.adjacency_matrix(graph) #dense_matrix = sparse_matrix.todense() #dense_matrix.tofile(filename, sep=",", format="%g") matrix = networkx.to_numpy_matrix(graph) numpy.savetxt(filename, matrix, delimiter=",", newline="\n", fmt="%g") else: raise RuntimeError("Unrecognized output graph file type.")
def main(): N = int(input()) photoList = [] for i in range(N): photo = list(input().split()) photoList.append([ photo[0], int( photo[1] ), set(photo[2:]), i]) photoH = [ p for p in photoList if p[0]=='H'] photoV = [ p for p in photoList if p[0]=='V'] slides = [ p[2:] for p in photoH] for i in range(0, len(photoV), 2): photoVV = [ photoV[i][2] | photoV[i+1][2], [photoV[i][3], photoV[i+1][3]] ] slides.append( photoVV ) # print(slides) G = nx.Graph() start = timer() for i in range(N): for j in range(i+1, N): s = score(slides[i][0], slides[j][0]) if s > 0: G.add_edge(i, j, weight=s) end = timer() print(f"time spent {end-start:0.4f}") nx.write_adjlist(G,"G.adjlist") nx.write_edgelist(G, "G.edgelist") sol = []
def load_data(self): print("Building StringDB Graph. It can take a while the first time...") self.proteinlinks = self.datastore + "/graphs/9606.protein.links.detailed.v11.0.txt" savefile = self.datastore + "/graphs/stringdb_graph_" + self.graph_type + "_edges.adjlist" if os.path.isfile(savefile): self.nx_graph = nx.read_adjlist(savefile) else: print(" ensp_to_hugo_map") ensmap = ensp_to_hugo_map(self.datastore) print(" reading self.proteinlinks") edges = pd.read_csv(self.proteinlinks, sep=' ') selected_edges = edges[self.name_to_edge[self.graph_type]] != 0 edgelist = edges[selected_edges][["protein1", "protein2"]].values.tolist() edgelist = [[ensmap[edge[0][5:]], ensmap[edge[1][5:]]] for edge in edgelist if edge[0][5:] in ensmap.keys() and edge[1][5:] in ensmap.keys()] print(" creating OrderedGraph") self.nx_graph = nx.OrderedGraph(edgelist) print(" writing graph") nx.write_adjlist(self.nx_graph, savefile) # Randomize if self.randomize: self.nx_graph = nx.relabel.relabel_nodes( self.nx_graph, randmap(self.nx_graph.nodes)) print("Graph built !")
def writeList(self, file_name): """ writes the adjacency list of the graph to a file :param file_name: path of the file where the adjacency list will be exported :return: """ nx.write_adjlist(self.graph, file_name)
def createMergedGraph(groupSampleDict, processedDataDir, rawModelDir): print 'Merging genomes from specified taxonomic groups (lineage/clade/group)' # Loop over the keys of the dictionary, one for each group for group in groupSampleDict: # Create an empty graph object mergedGraph = nx.DiGraph() # Read in the graph of the group and merge with the graph from the previous # iteration for sample in groupSampleDict[group]: # Read in adjacency list and convert to digraph object myDiGraph = nx.read_adjlist('../' + rawModelDir + '/' + sample + '/' + sample + 'AdjList.txt', create_using=nx.DiGraph()) # Append to the previous graph mergedGraph = nx.compose(mergedGraph, myDiGraph) # Check that the proper output directory exists. It not, create it. if not os.path.exists('../' + processedDataDir + '/' + group): os.makedirs('../' + processedDataDir + '/' + group) nx.write_adjlist( mergedGraph, '../' + processedDataDir + '/' + group + '/' + group + 'AdjList.txt') nx.write_graphml( mergedGraph, '../' + processedDataDir + '/' + group + '/' + group + 'Graph.xml') return
def load_graph_data(dataset_str): import json fp = open("number_cosponsor1.json", "r") info = fp.read() member_cosponsor = json.loads(info) member_list = [] for item in member_cosponsor: if item[0] not in member_list: member_list.append(item[0]) if item[1] not in member_list: member_list.append(item[1]) member_idx = np.sort(member_list) graph = {} for item in member_cosponsor: if item[0] not in graph: graph[item[0]] = [item[1]] else: graph[item[0]].append(item[1]) DG = nx.MultiDiGraph() for item in graph: for ITEM in graph[item]: DG.add_edge(item, ITEM, weight=1) weighted_adj = nx.to_numpy_matrix(DG) fh = open("graph.adjlist", 'wb') nx.write_adjlist(DG, fh) return member_idx, weighted_adj
def RGG(n, beta, mean_degree): G = nx.empty_graph(n) powerLawArray = utils.powerLawArray(n, beta, mean_degree) powerLawDegreeArray = np.array(powerLawArray, dtype = np.longlong) sumOfDegrees = powerLawDegreeArray.sum() delimiterArray = np.cumsum(powerLawDegreeArray) delimiterArray = np.insert(delimiterArray, 0, 0) delimiterArray = np.delete(delimiterArray, n) someCounter = 0 while someCounter < sumOfDegrees/2: G.add_edge(np.searchsorted(delimiterArray, rnd.randrange(sumOfDegrees)), np.searchsorted(delimiterArray, rnd.randrange(sumOfDegrees))) someCounter += 1 txtname = "generated/adj-%s-%s-%s-.txt" % (str(n), str(beta), str(mean_degree)) nx.write_adjlist(G, txtname) degreeSequence=sorted(nx.degree(G).values(),reverse=True) dmax=max(degreeSequence) plt.clf() plt.cla() plt.loglog(degreeSequence,'b-',marker='o') plt.title("Degree rank plot") plt.ylabel("degree") plt.xlabel("rank") if n < 1000: plt.axes([0.45,0.45,0.45,0.45]) plt.cla() Gcc=nx.connected_component_subgraphs(G)[0] pos=nx.spring_layout(Gcc) plt.axis('off') nx.draw_networkx_nodes(Gcc,pos,node_size=20) nx.draw_networkx_edges(Gcc,pos,alpha=0.4) pngname = "generated/graph-%s-%s-%s-.png" % (str(n), str(beta), str(mean_degree)) plt.savefig(pngname)
def load_graph(cutoff=0): ajl_name = "graph_%0.2f.ajl" % cutoff if path.Path(ajl_name).is_file(): G = nx.read_adjlist(ajl_name) names = load_names("ncd.csv") # names = [] # names = [G[i]["label"] for i in G.nodes()] else: coords, names = load_diagram("ncd.csv", cutoff=cutoff) name_used = [False] * len(names) for x in range(len(coords[0])): name_used[coords[1][0][x]] = True name_used[coords[1][1][x]] = True print("%d nodes" % sum(name_used)) G = nx.Graph() _names = [] for x in range(len(names)): if name_used[x]: G.add_node(x, label=names[x]) names = _names for x in range(len(coords[0])): G.add_edge(coords[1][0][x], coords[1][1][x], weight=coords[0][x]) if cutoff != 1: nx.write_adjlist(G, ajl_name) return G, names
def createMergedGraph(groupSampleDict, processedDataDir, rawModelDir): print 'Merging genomes from specified taxonomic group' # Loop over the keys of the dictionary, one for each group for group in groupSampleDict: # Create an empty graph object mergedGraph = nx.DiGraph() # Read in the graph of the group and merge with the graph from the previous # iteration for sample in groupSampleDict[group]: # Read in adjacency list and convert to digraph object myDiGraph = nx.read_adjlist(rawModelDir+'/'+sample+'/'+sample+'AdjList.txt', create_using=nx.DiGraph()) # Append to the previous graph mergedGraph = nx.compose(mergedGraph, myDiGraph) # Check that the proper output directory exists. It not, create it. if not os.path.exists(processedDataDir+'/'+group): os.makedirs(processedDataDir+'/'+group) nx.write_adjlist(mergedGraph, processedDataDir+'/'+group+'/'+group+'AdjList.txt') nx.write_graphml(mergedGraph, processedDataDir+'/'+group+'/'+group+'Graph.xml') return
def construct_de_bruijn_velvet(kmers, draw, outfile): #make list of k-1mers for quick edge construction k1mers = [x[:-1] for x in kmers.keys()] k1mers_array = np.array(k1mers) #find overlaps edge_list = [] for kmer in kmers.keys(): matches = np.where(k1mers_array==kmer[1:]) for match in matches[0]: #print match edge_list.append((kmer, kmers.keys()[match])) #make graph G = nx.DiGraph() #add seq_kmers as nodes and overlaps as edges for kmer in kmers.items(): G.add_node(kmer[0], num=kmer[1]) G.add_edges_from(edge_list) # draw the graph if desired if draw == "True": nx.draw_spring(G) plt.show() #output adjacency list format of the graph if desired if outfile != "": nx.write_adjlist(G, outfile) return G
def save_stuff(Graph,a): pass ## #ox.save_graph_shapefile(Graph, filename='network-shape') ## ox.save_graphml(Graph, filename='network.graphml') ## fig, ax = ox.plot_graph(Graph, show=False, save=True, filename='network', file_format='svg') ## ## #G2 = ox.load_graphml('network.graphml') #load ## fig, ax = ox.plot_graph(G2) ## ## gdf = ox.footprints_from_place(place='Piedmont, California, USA') #save building footprints ## gdf.drop(labels='nodes', axis=1).to_file('data/piedmont_bldgs') if (0 in a): ox.save_graphml(Graph,'network.graphml') if (1 in a): fh=open("test.adjlist",'wb') nx.write_adjlist(Graph, fh, delimiter=',') fh.close() if (2 in a): A = nx.adjacency_matrix(Graph) #A=A.todense() #np.savetxt('file1',A,delimiter=',') sp.sparse.save_npz('file1', A, compressed=True) if (3 in a): fire.to_pickle('fire.pkl') healthcare.to_pickle('healthcare.pkl') shops.to_pickle('shops.pkl') return
def createNWs(): for i, NW in enumerate(NW_type): for k in range(numofNWs): if NW == 'SF': G = Scale_free(N, 2.3, 0) elif NW == 'BA': G = BA(N, 5, 0) # scale free Barabási-Albert preferential attachment network #avg deg = 50 if BA_parameter = 25 elif NW == 'ER': G = ER(N, 10, 0) # 3rd arg = with seed (1) or not (0) elif NW == 'WS001': G = SW(N, 10, 0.01, 0) elif NW == 'WS01': G = SW(N, 10, 0.1, 0) elif NW == 'Comm': G = Comm(N, 0) # elif NW == 'Asso': while nx.is_connected(G) == 0: num_of_disconn_graphs = nx.number_connected_components(G) nodes_tobeconn = [] sortedsubgraph = sorted(nx.connected_components(G), key = len, reverse=True) for i in range(num_of_disconn_graphs): disconn_subgr = list(sortedsubgraph[i]) nodeid_insubgr = np.random.randint(len(disconn_subgr)) nodes_tobeconn.append(disconn_subgr[nodeid_insubgr]) for j in range(len(nodes_tobeconn)-1): G.add_edge(nodes_tobeconn[j], nodes_tobeconn[j+1]) fname = NW + str(k) + 'N=' + str(N) + '.dat' nx.write_adjlist(G, os.path.join('NWs', fname)) sys.exit()
def write_adj_matrix(zipfile, network): nx.write_adjlist(network, 'network.txt') zipfile.write('network.txt') os.remove('network.txt') ####################- End of Network Data -####################
def test_adjlist_delimiter(self): fh=io.BytesIO() G = nx.path_graph(3) nx.write_adjlist(G, fh, delimiter=':') fh.seek(0) H = nx.read_adjlist(fh, nodetype=int, delimiter=':') assert_equal(sorted(H.nodes()),sorted(G.nodes())) assert_equal(sorted(H.edges()),sorted(G.edges()))
def save_pop(self,mypath): # Save the current graph population in a textfile if not os.path.isdir(mypath): os.makedirs(mypath) for i in xrange(len(self.graphs)): s = './'+mypath+'/g'+str(i)+'.txt' fh=open(s,'wb') nx.write_adjlist(self.graphs[i],fh) fh.close()
def test_adjlist_delimiter(self): fh = io.BytesIO() G = nx.path_graph(3) nx.write_adjlist(G, fh, delimiter=":") fh.seek(0) H = nx.read_adjlist(fh, nodetype=int, delimiter=":") assert_nodes_equal(list(H), list(G)) assert_edges_equal(list(H.edges()), list(G.edges()))
def test_adjlist_delimiter(self): fh = io.BytesIO() G = nx.path_graph(3) nx.write_adjlist(G, fh, delimiter=':') fh.seek(0) H = nx.read_adjlist(fh, nodetype=int, delimiter=':') assert_nodes_equal(list(H), list(G)) assert_edges_equal(list(H.edges()), list(G.edges()))
def save_graph(self, path): if os.path.exists(path): return dir_path = os.path.dirname(path) if not os.path.exists(dir_path): os.makedirs(dir_path) nx.write_adjlist(self.G, path)
def rebuttal_data(): for dataname in ['flickr','lastfm']: count = 0 degree_thrd = 3 index = [] G = nx.Graph() with open('/home/local/ASUAD/wzhan139/Dropbox (ASU)/Project_Code/gae/'+dataname+'/'+dataname+'.nodes') as f: for line in f: try: word = line.split() if(count%1000==0): print("Processed node " + word[0]) index.append(word[0]) G.add_node(word[0], old_label=word[1]) count += 1 except: break print("Reading node Done!") count = 0 with open('/home/local/ASUAD/wzhan139/Dropbox (ASU)/Project_Code/gae/'+dataname+'/'+dataname+'.edges') as f: for line in f: try: link = line.split() if (count % 100000==0): print("Processed edge " + link[0]) # print("Constructing graph in node " + str(count)) G.add_edge(link[0], link[1]) count += 1 except: break print("Reading edges Done!") # G2 = nx.convert_node_labels_to_integers(G, label_attribute='old_label') num_node = nx.adjacency_matrix(G).shape[0] sparsity = G.number_of_edges() / num_node ** 2 print("no thredshold graph sparsity is " + str(sparsity)) print(nx.info(G)) nx.write_gpickle(G, "large_"+dataname+".nothred.gpickle") remove_node = [] for n, d in G.nodes(data=True): if G.degree(n) < degree_thrd: remove_node.append(n) G.remove_nodes_from(np.asarray(remove_node)) G = nx.convert_node_labels_to_integers(G) num_node = nx.adjacency_matrix(G).shape[0] G3 = nx.from_scipy_sparse_matrix(sp.dia_matrix((np.ones(num_node), 0), shape=nx.adjacency_matrix(G).shape)) G4 = nx.compose(G, G3) nx.write_gpickle(G4, "large_"+dataname+".gpickle") nx.write_adjlist(G4, "large_"+dataname+"_adj") nx.write_edgelist(G4, "large_"+dataname+"_edgelist") sparsity = G4.number_of_edges() / num_node ** 2 print("sparsity is " + str(sparsity)) print(nx.info(G4))
def issues_network(out, repo, github): """Builds issues netowrk""" interactions = datautil.get_issues_interaction(repo, github) graph = networkutil.create_interaction_network(interactions, repo_name=repo) nx.write_adjlist(graph, out)
def allFiles(groupName): fdRead = open('%s' % groupName, 'r') #open file with edge list lines = fdRead.readlines() #read all lines fdRead.close() #close file G = nx.parse_edgelist(lines, nodetype=int) #builte graph with edge list file nx.write_adjlist(G, 'adj_list.txt') #write graph as adjacency matrix to file getPartitions(G) #get partitions graph edgesInCommunitiesGraph('adj_list.txt', 'partitions.txt') #add edges to graph of partitions
def large_social_networks_twitter(): # count = 0 degree_thrd = 3 index = [] G = nx.Graph() with open('/mnt/wzhan139/cross media data/Twitter/twitter_followees.bson', "rb") as f: data = bson.decode_file_iter(f, bson.CodecOptions(unicode_decode_error_handler="ignore")) count = 0 for c, d in enumerate(data): print("Reading node "+ str(c)) index.append(d['user_name']) G.add_node(d['user_name']) count += 1 with open('/mnt/wzhan139/cross media data/Twitter/twitter_followees.bson', "rb") as f: data = bson.decode_file_iter(f, bson.CodecOptions(unicode_decode_error_handler="ignore")) for c, d in enumerate(data): print("Constructing graph in node " + str(c)) for j in range(len(d['followees'])): if G.has_node(d['followees'][j]['screen_name']): G.add_edge(d['user_name'], d['followees'][j]['screen_name']) with open('/mnt/wzhan139/cross media data/Twitter/twitter_followers.bson', "rb") as f: data = bson.decode_file_iter(f, bson.CodecOptions(unicode_decode_error_handler="ignore")) for c, d in enumerate(data): print("Constructing graph in node " + str(c)) for i in range(len(d['followers'])): if G.has_node(d['followers'][i]['screen_name']): G.add_edge(d['user_name'], d['followers'][i]['screen_name']) G2 = nx.convert_node_labels_to_integers(G,label_attribute='old_label') num_node = nx.adjacency_matrix(G2).shape[0] sparsity = G2.number_of_edges() / num_node ** 2 print("no thredshold graph sparsity is " + str(sparsity)) print(nx.info(G2)) nx.write_gpickle(G2, "twitter.nothred.gpickle") remove_node=[] for n, d in G2.nodes(data=True): if G2.degree(n)<degree_thrd: remove_node.append(n) G2.remove_nodes_from(np.asarray(remove_node)) G2 = nx.convert_node_labels_to_integers(G2) num_node=nx.adjacency_matrix(G2).shape[0] G3 = nx.from_scipy_sparse_matrix(sp.dia_matrix((np.ones(num_node), 0), shape=nx.adjacency_matrix(G2).shape)) G4=nx.compose(G2,G3) nx.write_gpickle(G4, "twitter.gpickle") nx.write_adjlist(G4, "twitter_adj") nx.write_edgelist(G4, "twitter_edgelist") sparsity = G4.number_of_edges()/num_node**2 print("sparsity is "+ str(sparsity)) print(nx.info(G4))
def test_adjlist_integers(self): (fd, fname) = tempfile.mkstemp() G = nx.convert_node_labels_to_integers(self.G) nx.write_adjlist(G, fname) H = nx.read_adjlist(fname, nodetype=int) H2 = nx.read_adjlist(fname, nodetype=int) assert_nodes_equal(list(H), list(G)) assert_edges_equal(list(H.edges()), list(G.edges())) os.close(fd) os.unlink(fname)
def convert_graph_to_text(graph, filename): """ given a graph object, write a file containing the adjacency list. this is the minimum data required to reconstruct the graph. :param graph: the graph object to get the list from. :param filename: the name of the file to write to. :return: """ networkx.write_adjlist(graph, filename) return
def large_social_networks_flickr(): count = 0 degree_thrd = 3 index = [] G = nx.Graph() with codecs.open('/mnt/wzhan139/cross media data/Flickr/flickr_friends.json','rU','utf-8') as f: for line in f: try: data=json.loads(line) print("Reading node "+str(count)) index.append(data['user_name']) G.add_node(data['user_name']) count+=1 except: break count = 0 with codecs.open('/mnt/wzhan139/cross media data/Flickr/flickr_friends.json','rU','utf-8') as f: for line in f: try: data=json.loads(line) print("Constructing graph in node "+str(count)) for j in range(len(data['following'])): if G.has_node(data['following'][j]['username']): G.add_edge(data['user_name'], data['following'][j]['username']) except: break G2 = nx.convert_node_labels_to_integers(G,label_attribute='old_label') num_node = nx.adjacency_matrix(G2).shape[0] sparsity = G2.number_of_edges() / num_node ** 2 print("no thredshold graph sparsity is " + str(sparsity)) print(nx.info(G2)) nx.write_gpickle(G2, "flickr.nothred.gpickle") remove_node=[] for n, d in G2.nodes(data=True): if G2.degree(n)<degree_thrd: remove_node.append(n) G2.remove_nodes_from(np.asarray(remove_node)) G2 = nx.convert_node_labels_to_integers(G2) num_node=nx.adjacency_matrix(G2).shape[0] G3 = nx.from_scipy_sparse_matrix(sp.dia_matrix((np.ones(num_node), 0), shape=nx.adjacency_matrix(G2).shape)) G4=nx.compose(G2,G3) nx.write_gpickle(G4, "flickr.gpickle") nx.write_adjlist(G4, "flickr_adj") nx.write_edgelist(G4, "flickr_edgelist") sparsity = G4.number_of_edges()/num_node**2 print("sparsity is "+ str(sparsity)) print(nx.info(G4))
def write_file(G, file_name, file_type="gexf"): file_name = process_directory(file_name) if file_type == "gexf": nx.write_gexf(G, file_name) elif file_type == "adj_list" or file_type == "al": nx.write_adjlist(G, file_name) elif file_type == "edge_list" or file_type == "el": nx.writeedgelist(G, file_name) elif file_type == "GML" or file_type == "gml": nx.write_gml(G, file_name)
def test_adjlist_multigraph(self): G = self.XG (fd, fname) = tempfile.mkstemp() nx.write_adjlist(G, fname) H = nx.read_adjlist(fname, nodetype=int, create_using=nx.MultiGraph()) H2 = nx.read_adjlist(fname, nodetype=int, create_using=nx.MultiGraph()) assert_not_equal(H, H2) # they should be different graphs assert_nodes_equal(list(H), list(G)) assert_edges_equal(list(H.edges()), list(G.edges())) os.close(fd) os.unlink(fname)
def create_barabasi_in_file(n, m, filename): """ Generates a random Network with the Albert-Barabasi-Model Args: n (int): Number of Nodes m (int): Number of edges to attach from a new node to existing nodes filename (str): File weill be stored under that name in the Networks folder. You should use txt format. """ G = networkx.barabasi_albert_graph(n, m) networkx.write_adjlist(G, "Networks/" + filename)
def RGG(n, beta, mean_degree): G = nx.empty_graph(n) degreeArray = utils.degreeDistribution(beta, n, mean_degree) utils.randPairings(G, degreeArray) txtName = "generated/adj-%s-%s-%s-.txt" % (str(n), str(beta), str(mean_degree)) nx.write_adjlist(G, txtName) utils.drawDegreeHistogram(G) if n < 1000: utils.drawGraph(G) pngname = "generated/graph-%s-%s-%s-.png" % (str(n), str(beta), str(mean_degree)) plt.savefig(pngname)
def main(): badges = user_badge_extract('Badges.xml') G = create_graph(badges) # draw_graph(G) # Saving the graph in Pajek format nx.write_pajek(G, "graph.net") # Saving the graph as AdjList nx.write_adjlist(G, "graph.adjlist")
def save_graph(self, graph): """Save the IP graph and graph labels to disk. The graph file format is networkx adjency list""" nx.write_adjlist(graph, self.fname_prefix+"ip_graph.txt") # nx.write_gexf(graph, self.fname_prefix+"graph.gexf") node_labels = list(graph.nodes()) np.savetxt(self.fname_prefix+"node_labels.txt", node_labels, fmt='%s')
def test_adjlist_graph(self): G = self.G (fd, fname) = tempfile.mkstemp() nx.write_adjlist(G, fname) H = nx.read_adjlist(fname) H2 = nx.read_adjlist(fname) assert_not_equal(H, H2) # they should be different graphs assert_nodes_equal(list(H), list(G)) assert_edges_equal(list(H.edges()), list(G.edges())) os.close(fd) os.unlink(fname)
def test_adjlist_digraph(self): G = self.DG (fd, fname) = tempfile.mkstemp() nx.write_adjlist(G, fname) H = nx.read_adjlist(fname, create_using=nx.DiGraph()) H2 = nx.read_adjlist(fname, create_using=nx.DiGraph()) assert H is not H2 # they should be different graphs assert_nodes_equal(list(H), list(G)) assert_edges_equal(list(H.edges()), list(G.edges())) os.close(fd) os.unlink(fname)
def test_adjlist_digraph(self): G = self.DG (fd, fname) = tempfile.mkstemp() nx.write_adjlist(G, fname) H = nx.read_adjlist(fname, create_using=nx.DiGraph()) H2 = nx.read_adjlist(fname, create_using=nx.DiGraph()) assert_not_equal(H, H2) # they should be different graphs assert_nodes_equal(list(H), list(G)) assert_edges_equal(list(H.edges()), list(G.edges())) os.close(fd) os.unlink(fname)
def test_adjlist_graph(self): G=self.G (fd,fname)=tempfile.mkstemp() nx.write_adjlist(G,fname) H=nx.read_adjlist(fname) H2=nx.read_adjlist(fname) assert_not_equal(H,H2) # they should be different graphs assert_equal(sorted(H.nodes()),sorted(G.nodes())) assert_equal(sorted(H.edges()),sorted(G.edges())) os.close(fd) os.unlink(fname)
def commit_networks(in_pattern, out_dir, languages): """ Builds commit network. It retrieves commits from the top most-watched repos of the given language. Then, for each repo, it builds interaction network from the commit history of each blob. param: in_pattern: The path pattern of file containing top most watched repos. out_dir: The pattern of the output dir for both raw data and graph data. lang: List of language. Here is the example of each param: in_pattern = "../data/most_watched/{0}.txt" out_dir = "../data/network/commit/{0}" lang = ['python', 'java'] It should be noted that in out_dir, it is expected that there will be sub-dir graph/ and raw/. """ graph_out = out_dir + "/graph/{1}.txt" raw_out = out_dir + "/raw/{1}.pickle" github = Github(requests_per_second=1) for lang in languages: with open(in_pattern.format(lang), "r") as f: for line in f: repo = line.strip() f_name = repo.replace('/', '_') print "Starting {0} at {1}".format(repo, raw_out.format( lang, f_name)) commits = get_commits_from_repo(repo, github) with open(raw_out.format(lang, f_name), "w") as pickle_f: pickle.dump(commits, pickle_f) print "Starting {0} at {1}".format(repo, graph_out.format( lang, f_name)) commit_interactions = commit_interactions_from_repo(commits) g = networkutil.create_interaction_network( commit_interactions, repo_name=repo) nx.write_adjlist(g, graph_out.format(lang, f_name))
def files_to_R2(G): #adjacency list nx.write_adjlist(G,"file_output/20120711adjlistFULL.csv") #node list text_file = open("file_output/20120710nodes2FULL.csv", "a")# for node in G: text_file.write(str.format(str(node), '10.2f') + ',' + str.format(str(G.position[node][0]), '10.2f') + ', ' + str.format(str(G.position[node][1]), '10.2f') + '\n') text_file.close() #capacity of each edge text_file = open("file_output/20120626edgesFULL.csv", "a")# for edge in G.edges(data=True): text_file.write(str.format(str(edge[0]), '10.2f') + ',' + str.format(str(edge[1]), '10.2f') + ', ' + str.format(str(edge[2]['capacity']), '10.2f') + '\n') text_file.close()
def work(self): with open('data/connected_synonyms.txt', 'r') as f: connected_synonyms_dict = ujson.loads(f.read()) g = nx.Graph() for key in connected_synonyms_dict: g.add_node(key) for key in connected_synonyms_dict: for synonym in connected_synonyms_dict[key]: g.add_edge(key, synonym) # data = json_graph.node_link_data(g) with open('data/connected_synonyms_graph.adjlist', 'wb') as f: # f.write(ujson.dumps(data)) nx.write_adjlist(g, f, delimiter='=-=')
def test_adjlist_multidigraph(self): G=self.XDG (fd,fname)=tempfile.mkstemp() nx.write_adjlist(G,fname) H=nx.read_adjlist(fname,nodetype=int, create_using=nx.MultiDiGraph()) H2=nx.read_adjlist(fname,nodetype=int, create_using=nx.MultiDiGraph()) assert_not_equal(H,H2) # they should be different graphs assert_equal(sorted(H.nodes()),sorted(G.nodes())) assert_equal(sorted(H.edges()),sorted(G.edges())) os.close(fd) os.unlink(fname)
def trackAABB(opts, argv): "AABB Tracking" """ In case there's no label file flist = glob.glob(argv[0]) for f in flist: p = readVTP(f) k = vtk.mutable(0) a = p.GetCellData().GetArray("VortexCluster", k) x = np.zeros(a.GetNumberOfTuples()) for i in range(0, a.GetNumberOfTuples()): v = a.GetValue(i) x[i] = v np.savetxt(f.replace("AABB.vtu", "Cluster_labels.txt"), x, fmt="%d") """ "Load ABB" g = nx.Graph() flist = sorted(glob.glob(argv[1])) labels = [] for (t,f) in enumerate(flist): L = np.genfromtxt(f, dtype=int) for i in range(0, L.max()+1): g.add_node("%d.%d" % (t,i), time=t) labels.append(L) if (t >= opts.maxT): break flist = sorted(glob.glob(argv[0])) prevaabbs = [] for (t,f) in enumerate(flist): print "Processing time #%d" % (t) aabb = np.genfromtxt(f) aabbs = [] for ab in aabb: aabbObj = AABB(ab[0:3],ab[3:6]) aabbs.append(aabbObj) if (t > 0): for (i,x) in enumerate(prevaabbs): ci = "%d.%d" % (t-1,labels[t-1][i]) for (j,y) in enumerate(aabbs): cj = "%d.%d" % (t,labels[t][j]) v = x.Intersect(y) if (v > opts.overlap): g.add_edge(ci,cj) prevaabbs = aabbs if (t >= opts.maxT): break nx.write_adjlist(g, argv[2]) if (opts.subgraph != ""): C = nx.connected_component_subgraphs(g) for i,c in enumerate(C): nx.write_adjlist(c, opts.subgraph % (i))
def save_graph(self,name): import os curPath=os.path.join(os.getcwd(),"out") if not os.path.isdir(curPath): os.mkdir(curPath) nx.write_adjlist(self.g2,os.path.join(curPath,name+'.adjlist')) nx.write_dot(self.g2,os.path.join(curPath , name+'.dot')) fwname = os.path.join(curPath,"szeged_sum_index"+name+ ".txt") f1=open(fwname , "w"); i=0 for lm in self.llw: f1.write(lm) i+=1 f1.close()
def save_param(self, param, name, type): path = os.path.join(self.project_path, '%s.%s' % (name, self.SUFFIXES[type])) if type == ParamTypes.JSON: json.dump(param, open(path, 'w'), indent=4) elif type == ParamTypes.ARRAY: np.save(path, param) elif type == ParamTypes.SPARSE: save_sparse(path, param) elif type == ParamTypes.SPARSE_LIST: save_sparse_list(path, param) elif type == ParamTypes.GRAPH: write_adjlist(param, path) else: raise Exception('invalid type "%s"' % type)
def RGG(n, beta, mean_degree): G = nx.empty_graph(n) powerLawArray = utils.powerLawArray(n, beta, mean_degree) powerLawDegreeArray = np.array(powerLawArray, dtype = np.longlong) sumOfDegrees = powerLawDegreeArray.sum() delimiterArray = np.cumsum(powerLawDegreeArray) delimiterArray = np.insert(delimiterArray, 0, 0) delimiterArray = np.delete(delimiterArray, n) someCounter = 0 while someCounter < sumOfDegrees/2: G.add_edge(np.searchsorted(delimiterArray, rnd.randrange(sumOfDegrees)), np.searchsorted(delimiterArray, rnd.randrange(sumOfDegrees))) someCounter += 1 txtname = "test/adj-%s-%s-%s-.txt" % (str(n), str(beta), str(mean_degree)) nx.write_adjlist(G, txtname)
def save(self, data): '''Save weighted star graph to file as adjacency list and node data.''' options = {} options['defaultextension'] = '.txt' options['filetypes'] = [('all files', '.*'), ('text files', '.txt')] filename = tkFileDialog.asksaveasfilename(**options) if filename: startree = make_graph(data) adjfile = open(filename, 'wb') nx.write_adjlist(startree, adjfile) adjfile.write('\nDATA\n') adjfile.write(str(startree.nodes(data=True))) adjfile.close() print 'Graph data saved to %s' % filename
def findPartitionL(G): partition = community.best_partition(G) size = float(len(set(partition.values()))) print size, len (partition.values()) count = 0 fileList =[] for com in set(partition.values()) : count = count + 1 list_nodes = [nodes for nodes in partition.keys() if partition[nodes] == com] if len (list_nodes) > 250 : n = nx.subgraph(G,list_nodes) filename= BASE_PATH+'Partition'+ str(count) +'.txt' nx.write_adjlist(n, filename, '#') fileList.append(filename) return fileList
def de_bruijn(genomefile, k, outfile, draw): #read fasta sequence f = open(genomefile,'r') seq = '' lines = f.read().split('\n') for line in lines: if (len(line) > 0): if (line[0] != ">"): seq=seq+line L = len(seq) f.close() #split into unique kmers. seq_kmers = [] for base in range(L-k+1): seq_kmers.append(seq[base:base+k]) #make k-1mers. don't care if ther are duplicates here. k1mers = [x[:-1] for x in seq_kmers] k1mers_array = np.array(k1mers) #print seq_kmers #print k1mers_array #find overlaps edge_list = [] for kmer in seq_kmers: matches = np.where(k1mers_array==kmer[1:]) for match in matches[0]: #print match edge_list.append((kmer, seq_kmers[match])) #print edge_list #make graph G = nx.DiGraph() #add seq_kmers as nodes and overlaps as edges G.add_nodes_from(seq_kmers) G.add_edges_from(edge_list) # draw the graph if desired if draw == "True": nx.draw_spring(G) plt.show() #output adjacency list format of the graph nx.write_adjlist(G, outfile)
def main(args): import networkx as nx iname = args['--in'] oname = args['--out'] G = nx.read_adjlist(iname, comments='#') print("Original network size = {0}".format(G.size())) A = np.array(nx.adjacency_matrix(G)) B = renet(A) G2 = nx.from_numpy_matrix(B) print("New network size = {0}".format(G2.size())) rndict = dict(zip(G2.nodes(),G.nodes())) nx.relabel_nodes(G2, rndict, copy=False) nx.write_adjlist(G2, oname)
def main(): parser = createParser() options = parser.parse_args() G = nx.read_weighted_edgelist(options.input, delimiter='\t', comments='#') weights = [ d['weight'] for u,v,d in G.edges_iter(data=True) ] minW = min(weights) maxW = max(weights) cutoffW = 30.6 # (30.6 - minW) / (maxW - minW) print(minW, maxW, cutoffW) if options.binary : I = filteredGraph(G, cutoffW) nx.write_adjlist(I, options.output) else: import numpy lp = float(options.logistic) I = filteredGraph(G, cutoffW) stddev = np.std( [cutoffW - d['weight'] for u,v,d in I.edges_iter(data=True)] + [d['weight'] - cutoffW for u,v,d in I.edges_iter(data=True)] ) n = scipy.stats.norm( loc = cutoffW, scale = stddev) print(stddev) #def prob(x): # return min(1.0, 2.0*n.cdf(x))#(0.5 + ((x-cutoffW) / (maxW-cutoffW))) if (x >= cutoffW) else (0.5*(x-minW) / (cutoffW-minW)) prob = lambda x : 1.0 / ( 1.0 + np.exp( lp * (-x + cutoffW) ) ) H = nx.Graph() H.add_nodes_from(G.nodes()) for u,v,d in G.edges_iter(data=True): w = prob(d['weight']) H.add_edge( u, v, weight=w ) strs = sorted([ (u,v,d['weight']) for u,v,d in H.edges_iter(data=True) ]) adjDict = { k : list(v) for k,v in itertools.groupby(strs, lambda x : x[0] ) } writeAdjList(H, adjDict, lp, options.output)