def load_train_test_graphs(dataset, recache_input): raw_mat_path = 'data/{}.npz'.format(dataset) train_graph_path = 'data/{}/train_graph.pkl'.format(dataset) test_graph_path = 'data/{}/test_graph.pkl'.format(dataset) if recache_input: print('loading sparse matrix from {}'.format(raw_mat_path)) m = load_sparse_csr(raw_mat_path) print('splitting train and test...') train_m, test_m = split_train_test( m, weights=[0.9, 0.1]) print('converting to nx.DiGraph') train_g = nx.from_scipy_sparse_matrix(train_m, create_using=nx.DiGraph(), edge_attribute='sign') test_g = nx.from_scipy_sparse_matrix(test_m, create_using=nx.DiGraph(), edge_attribute='sign') print('saving train and test graphs...') nx.write_gpickle(train_g, train_graph_path) nx.write_gpickle(test_g, test_graph_path) else: print('loading train and test graphs...') train_g = nx.read_gpickle(train_graph_path) test_g = nx.read_gpickle(test_graph_path) return train_g, test_g
def __init__(self, post_fcn = None): self.parse_args() options = self.options input = options.input # Input graph input_path = os.path.join(input, input + '.gpk') self.g = nx.read_gpickle(input_path) if not self.g: raise Exception("null input file for input path %s" % input_path) # Output (reduced) graph. # Nodes: (lat, long) tuples w/ list of associated users & location strings # Edges: weight: number of links in this direction self.r = nx.DiGraph() conn = Connection() self.input_db = conn[self.options.input_db] self.input_coll = self.input_db[self.options.input_coll] print "now processing" self.reduce() print geo_stats(self.r) if options.write: geo_path = os.path.join(input, input + '.grg') nx.write_gpickle(self.r, geo_path)
def mainmain(): redirects = process_redirects(sys.argv[1]) print "redirects", len(redirects) sys.stderr.write(sys.argv[1] + " processed\n") links = process_links(sys.argv[2], redirects) links_t = len(links) print "links", links_t sys.stderr.write(sys.argv[2] + " processed\n") G = networkx.Graph() articles_processed = 0 for article in links: articles_processed = articles_processed + 1 if (articles_processed % 100000) == 0: sys.stdout.write( "links processed=" + str(articles_processed) + "/" + str(links_t) + " (%" + str(articles_processed * 100 / links_t) + ")\n" ) if len(links[article]) < 6: continue G.add_node(article) for l in links[article]: if (l in links) and (article in links[l]): # back link is also present G.add_node(l) G.add_edge(article, l) networkx.write_gpickle(G, sys.argv[3])
def createBridge(numOfNodes, edgeProb, bridgeNodes): ''' numOfNodes: Number of nodes in the clustered part of the Bridge Graph edgeProb: Probability of existance of an edge between any two vertices. bridgeNodes: Number of nodes in the bridge This function creates a Bridge Graph with 2 main clusters connected by a bridge. ''' print "Generating and Saving Bridge Network..." G1 = nx.erdos_renyi_graph(2*numOfNodes + bridgeNodes, edgeProb) #Create an ER graph with number of vertices equal to twice the number of vertices in the clusters plus the number of bridge nodes. G = nx.Graph() #Create an empty graph so that it can be filled with the required components from G1 G.add_edges_from(G1.subgraph(range(numOfNodes)).edges()) #Generate an induced subgraph of the nodes, ranging from 0 to numOfNodes, from G1 and add it to G G.add_edges_from(G1.subgraph(range(numOfNodes + bridgeNodes,2*numOfNodes + bridgeNodes)).edges()) #Generate an induced subgraph of the nodes, ranging from (numOfNodes + bridgeNodes) to (2*numOfNodes + bridgeNodes) A = random.randrange(numOfNodes) #Choose a random vertex from the first component B = random.randrange(numOfNodes + bridgeNodes,2*numOfNodes + bridgeNodes) #Choose a random vertex from the second component prev = A #creating a connection from A to B via the bridge nodes for i in range(numOfNodes, numOfNodes + bridgeNodes): G.add_edge(prev, i) prev = i G.add_edge(i, B) StrMap = {} for node in G.nodes(): StrMap[node] = str(node) G = nx.convert.relabel_nodes(G,StrMap) filename = "BG_" + str(numOfNodes) + "_" + str(edgeProb) + "_" + str(bridgeNodes) + ".gpickle" nx.write_gpickle(G,filename)#generate a gpickle file of the learnt graph. print "Successfully written into " + filename
def activity_in_cells(cells, frames): allActivity = [] # This dictionary keeps the location of cells and average activity in each # cell. global g_ g_.graph["shape"] = cells.shape goodCells = {} for cellColor in range(1, int(cells.max())): print("+ Computing for cell color %d" % cellColor) xs, ys = np.where(cells == cellColor) pixals = zip(xs, ys) # These pixals belong to this cell. if len(pixals) < 1: continue cellActivity = [] g_.add_node(cellColor) g_.node[cellColor]["pixals"] = pixals for x, y in pixals: cellActivity.append(frames[y, x, :]) cellVec = np.mean(cellActivity, axis=0) g_.node[cellColor]["activity"] = cellVec # Attach this activity to graph as well after normalization. allActivity.append(cellVec / cellVec.max()) # Now compute correlation between nodes and add edges for n1, n2 in itertools.combinations(g_.nodes(), 2): v1, v2 = g_.node[n1]["activity"], g_.node[n2]["activity"] g_.add_edge(n1, n2, weight=sync_index(v1, v2, "dilawar"), weight_sigma=sync_index_clip(v1, v2)) cellGraph = "cells_as_graph.gpickle" nx.write_gpickle(g_, cellGraph) print("[INFO] Wrote cell graph to pickle file %s" % cellGraph) print("\t nodes %d" % g_.number_of_nodes()) print("\t edges %d" % g_.number_of_edges()) activity = np.vstack(allActivity) return activity
def addNodeDe_EdgeDist(): """ Add node degree and edge distance on the filtered Graph :return: Graph """ schema = 'total_v3_csvneo4j' Graph_type = 'undirected' alpha_thred = 0.65 nodeDegree_thred = 1.0 DisTypes = ['G','SP','R'] G = nx.read_gpickle('../filteredG_{}_alpha{}_nodeD{}_{}.gpickle'.format(Graph_type,alpha_thred,nodeDegree_thred,schema)) print 'after read' print 'edges: ', len(G.edges()) print 'nodes: ', len(G.nodes()) G = main.addNode_degree(G) print 'finish adding node degree' G = main.addEdge_distance(G,DisTypes) print 'finish adding edge degree' nx.write_gpickle(G,'../addNodeEdgeDegree_{}_{}_alpha{}_nodeD{}_{}.gpickle'.format('+'.join(DisTypes),Graph_type,alpha_thred,nodeDegree_thred,schema)) print 'finishing write gpickle' print 'edges: ', len(G.edges()) print 'nodes: ', len(G.nodes()) return
def graph_preprocessing_with_counts(G_input=None, save_file=None): if not G_input: graph_file = os.path.join(work_dir, "adj_graph.p") G = nx.read_gpickle(graph_file) else: G = G_input.copy() print "Raw graph size:", G.size() print "Raw graph nodes", G.number_of_nodes() profile2prob = {l.split()[0]: float(l.split()[1]) for l in open(os.path.join(work_dir, 'profile_weight.txt'))} for edge in G.edges(data=True): nodes = edge[:2] _weight = edge[2]['weight'] _count = edge[2]['count'] if _count < 3: G.remove_edge(*nodes) print "Pre-processed graph size", G.size() print "Pre-processed graph nodes", G.number_of_nodes() G.remove_nodes_from(nx.isolates(G)) print "Pre-processed graph size", G.size() print "Pre-processed graph nodes", G.number_of_nodes() if save_file: print "Saving to", save_file nx.write_gpickle(G,save_file) return G
def save_pickle_in_cfile(self, local_fname, networkref): """ Creates a pickled version of the graph and stores it in the cfile Parameters ---------- local_fname: string The filename used in the Pickle folder to store networkref: NetworkX Graph instance The NetworkX graph to pickle """ logger.info('Write a generated graph pickle to the connectome file.') picklefilepath = os.path.join(tempfile.gettempdir(),local_fname) from networkx import write_gpickle # add nodekeys, edgekeys, graphid to helpernode 'n0' before storage helperdict = {'nodekeys': networkref.nodekeys.copy(), \ 'edgekeys': networkref.edgekeys.copy(), \ 'graphid' : networkref.networkid } networkref.graph.add_node('n0') networkref.graph.node['n0'] = helperdict write_gpickle(networkref.graph, picklefilepath) networkref.graph.remove_node('n0') from zipfile import ZipFile, ZIP_DEFLATED tmpzipfile = ZipFile(self.data.fullpathtofile, 'a', ZIP_DEFLATED) # store it in the zip file tmpzipfile.write(picklefilepath, 'Pickle/' + local_fname) tmpzipfile.close() # remove pickle file from system logger.debug('Unlink: %s' % picklefilepath) os.unlink(picklefilepath)
def better_display(G,slim,pic,fit=None,pngpath=None,bw=None): import engine.Genetic as Genetic _fit = "C" if fit == "C" else "L" R = Genetic.genetic(G,_fit,pngpath,bw) for node in G: G.node[node]["pos"] = R[node] nx.write_gpickle(G,pic)
def create_nodes(paths, args): """ creates nodes Parameters ---------- paths.node_file : file args.fasta_file : file """ # read in fasta to dictionary. seqs = io.load_fasta(args.contig_file) # create graph. G = nx.MultiGraph() # add nodes to graph. for name, seq in seqs.items(): # skip split names. tmp = name.split(" ") name = tmp[0] # add node. G.add_node(name, {'seq':seq, 'width':len(seq), 'cov':0}) # write to disk. nx.write_gpickle(G, paths.node_file)
def G_init(slim_dict,slim): def check_field(pos,G,rate): for node in G: node_pos = G.node[node]["pos"] if math.sqrt(np.sum((pos-node_pos)**2)) < rate: return True return False if len(slim_dict) < 25: rate = 0.42 elif len(slim_dict) < 42: rate = 0.3 else: rate = 0.25 G = nx.Graph() for node in slim_dict: pos = np.array((0.0,0.0)) while check_field(pos,G,rate): pos = np.array((2.8*random.random()-1.4, 2.8*random.random()-1.4)) G.add_node(node,size=len(GOID2group[node]),color=0,pos=pos) for node in G: G.node[node]["sum"] = 0 G.node[node]["1"] = 0 G.node[node]["2"] = 0 for GOID_1 in slim_dict: for GOID_2 in GO2interact[GOID_1]: if GOID_2 != GOID_1 and GOID_2 in slim_dict: G.add_edge(GOID_1,GOID_2,weight=GO2interact[GOID_1][GOID_2],percent=0.5,percent_1=0.5,percent_2=0.5) png = path+"/results/slim_"+slim+"/slim_"+slim+".png" G = nx.relabel_nodes(G,slim_dict) nx.write_gpickle(G,path+"/results/G_slim_"+slim) draw_G(G,png)
def fit_forestFire_mod(graphSize, graphID, dkPath, original2k, resultPath): """ Runs synthetic graph tests for various 'p' values (burn rate). """ outfile = open(resultPath + graphID + '_ff_dkDistances.txt', 'w') p = 0.01 while p < 1.0: print 'Running modified Forest Fire with parameters: n = ', graphSize, ' p = ', p newFile = graphID + '_ff_' + str(p) # Create synthetic graph syntheticGraph = sm.forestFire_mod(graphSize, p) # Write pickle, edge list, and 2k distro to file print 'Writing pickle and calculating dK-2...\n' nx.write_gpickle(syntheticGraph, resultPath + newFile + '.pickle') getdk2(syntheticGraph, newFile, dkPath, resultPath) # Find distance between the dK-2 distributions dkDistance = tk.get_2k_distance(original2k, resultPath + newFile + '_target.2k') outfile.write(str(dkDistance) + '\tp = ' + str(p) + '\n') outfile.flush() p += 0.01 outfile.close()
def buildGraph(db): DATA_DIR = os.environ['OPENSHIFT_DATA_DIR'] file = os.path.join(DATA_DIR, "UserTagsGraph.gpickle") if not os.path.isfile(file): user_network = nx.Graph() users = DAL.UserTags.getAll(db) user_tags = {} all_tags = {} for user in users: tags = user.tags_to_list() user_tags[user.id] = tags for tag in tags: all_tags.setdefault(tag, set()).add(user.id) for tag in all_tags: user_network.add_edges_from([perm for perm in itertools.permutations(all_tags[tag], 2)]) #save graph to file nx.write_gpickle(user_network, file) else: user_network = nx.read_gpickle(file) return user_network
def computemaxweight(graph,path,protlist,path_lenght,alone): elements=[] nodes=[] ess=[] print "------Starting Graph------" print nx.info(graph) for i in path: max=0 for j in path[i]: count=0 for k in range(0,len(j)-1,1): count=count+float(graph.edge[j[k]][j[k+1]]["weight"]) if count>max: max=count elements=j ess.extend(elements[1:len(elements)-1]) ess=list(set(ess)) H=graph.subgraph(ess+protlist) #H.add_nodes_from(protlist) graphred=check(H,path_lenght,ess,protlist,path) nx.write_gpickle(graphred,"weightmaxfilter.gpickle") f1=open("weightproteins.txt","w") for i in graphred.nodes(): if i in alone: pass else: f1.write(i+"\n")
def main(**kwargs): cells = kwargs["cells"] frames = kwargs["frames"] if isinstance(cells, str): cells = np.load(cells) if isinstance(frames, str): frames = np.load(frames) logger.info("Creating correlation graph") N = int(cells.max()) for i in range(1, N): logger.info("\tDone %d out of %d" % (i, N)) indices = list(zip(*np.where(cells == i))) if len(indices) < 2: continue pixals = [] for y, x in indices: pixals.append(frames[x, y, :]) pixals = np.mean(pixals, axis=0) g_.add_node(i, timeseries=pixals, indices=indices) g_.graph["shape"] = frames[:, :, 0].shape create_correlate_graph(g_) outfile = kwargs.get("output", False) or "correlation_graph.pickle" logger.info("Writing pickle of graph to %s" % outfile) nx.write_gpickle(g_, outfile) logger.info("Graph pickle is saved to %s" % outfile)
def handle_by_file(out_dir, tweet_file, country, net_func=comprehend_network): try: if not os.path.exists(out_dir): os.makedirs(out_dir) if net_func == comprehend_network: net_type = "comprehend" elif net_func == user2user_network: net_type = "user2user" elif net_func == content_based_network: net_type = "content" elif net_func == entity_network: net_type = "entity" elif net_func == entity_corr_network: net_type = "entity_corr" else: net_type = "normal" net = net_func(tweet_file) net.graph["country"] = country g_date = re.search(r'\d{4}-\d{2}-\d{2}', tweet_file).group() net.graph["date"] = g_date out_file = os.path.join(out_dir, "graph_%s_%s" % (net_type, tweet_file.split(os.sep)[-1])) nx.write_gpickle(net, out_file + ".gpickle") nx.write_graphml(net, out_file + ".graphml") except Exception, e: print "Error Encoutered: %s, \n %s" \ % (tweet_file, sys.exc_info()[0]), e
def reduceGraph(read_g, write_g, minEdgeWeight, minNodeDegree, Lp, Sp): """ Simplify the undirected graph and then update the 3 undirected weight properties. :param read_g: is the graph pickle to read :param write_g: is the updated graph pickle to write :param minEdgeWeight: the original weight of each edge should be >= minEdgeWeight :param minNodeDegree: the degree of each node should be >= minNodeDegree. the degree here is G.degree(node), NOT G.degree(node,weight='weight) :return: None """ G=nx.read_gpickle(read_g) print 'number of original nodes: ', nx.number_of_nodes(G) print 'number of original edges: ', nx.number_of_edges(G) for (u,v,w) in G.edges(data='weight'): if w < minEdgeWeight: G.remove_edge(u,v) for n in G.nodes(): if G.degree(n)<minNodeDegree: G.remove_node(n) print 'number of new nodes: ', nx.number_of_nodes(G) print 'number of new edges: ', nx.number_of_edges(G) for (a, b, w) in G.edges_iter(data='weight'): unweight_allocation(G, a, b, w,Lp,Sp) print 'update weight ok' nx.write_gpickle(G, write_g) return
def load_data(): start = time.time() try: print("Loading data from /data pickles and hfd5 adj matrices") f = h5py.File('data/cosponsorship_data.hdf5', 'r') for chamber in ['house', 'senate']: for congress in SUPPORTED_CONGRESSES: adj_matrix_lookup[(chamber, congress)] = np.asarray(f[chamber + str(congress)]) igraph_graph = igraph.load("data/" + chamber + str(congress) + "_igraph.pickle", format="pickle") igraph_graph_lookup[(chamber, congress, False)] = igraph_graph nx_graph = nx.read_gpickle("data/" + chamber + str(congress) + "_nx.pickle") nx_graph_lookup[(chamber, congress, False)] = nx_graph except IOError as e: print("Loading data from cosponsorship files") f = h5py.File("data/cosponsorship_data.hdf5", "w") for chamber in ['house', 'senate']: for congress in SUPPORTED_CONGRESSES: print("Starting %s %s" % (str(congress), chamber)) adj_matrix = load_adjacency_matrices(congress, chamber) data = f.create_dataset(chamber + str(congress), adj_matrix.shape, dtype='f') data[0: len(data)] = adj_matrix # igraph get_cosponsorship_graph(congress, chamber, False).save("data/" + chamber + str(congress) + "_igraph.pickle", "pickle") # networkx nx.write_gpickle(get_cosponsorship_graph_nx(congress, chamber, False), "data/" + chamber + str(congress) + "_nx.pickle") print("Done with %s %s" % (str(congress), chamber)) print("Data loaded in %d seconds" % (time.time() - start))
def save_graph(self, graphname, fmt='edgelist'): """ Saves the graph to disk **Positional Arguments:** graphname: - Filename for the graph **Optional Arguments:** fmt: - Output graph format """ self.g.graph['ecount'] = nx.number_of_edges(self.g) g = nx.convert_node_labels_to_integers(self.g, first_label=1) if fmt == 'edgelist': nx.write_weighted_edgelist(g, graphname, encoding='utf-8') elif fmt == 'gpickle': nx.write_gpickle(g, graphname) elif fmt == 'graphml': nx.write_graphml(g, graphname) else: raise ValueError('edgelist, gpickle, and graphml currently supported') pass
def save_celltype_graph(self, filename="celltype_conn.gml", format="gml"): """ Save the celltype-to-celltype connectivity information in a file. filename -- path of the file to be saved. format -- format to save in. Using GML as GraphML support is not complete in NetworkX. """ start = datetime.now() if format == "gml": nx.write_gml(self.__celltype_graph, filename) elif format == "yaml": nx.write_yaml(self.__celltype_graph, filename) elif format == "graphml": nx.write_graphml(self.__celltype_graph, filename) elif format == "edgelist": nx.write_edgelist(self.__celltype_graph, filename) elif format == "pickle": nx.write_gpickle(self.__celltype_graph, filename) else: raise Exception("Supported formats: gml, graphml, yaml. Received: %s" % (format)) end = datetime.now() delta = end - start config.BENCHMARK_LOGGER.info( "Saved celltype_graph in file %s of format %s in %g s" % (filename, format, delta.seconds + delta.microseconds * 1e-6) ) print "Saved celltype connectivity graph in", filename
def create_graph_df(vtask_paths, graphs_dir_out): """ Creates a frame that maps sourcefiles to networkx digraphs in terms of DOT files :param source_path_list: :param dest_dir_path: :param relabel: :return: """ if not isdir(graphs_dir_out): raise ValueError('Invalid destination directory.') data = [] graphgen_times = [] print('Writing graph representations of verification tasks to {}'.format(graphs_dir_out), flush=True) common_prefix = commonprefix(vtask_paths) for vtask in tqdm(vtask_paths): short_prefix = dirname(common_prefix) path = join(graphs_dir_out, vtask[len(short_prefix):][1:]) if not os.path.exists(dirname(path)): os.makedirs(dirname(path)) ret_path = path + '.pickle' # DEBUG if isfile(ret_path): data.append(ret_path) continue start_time = time.time() graph_path, node_labels_path, edge_types_path, edge_truth_path, node_depths_path \ = _run_cpachecker(abspath(vtask)) nx_digraph = nx.read_graphml(graph_path) node_labels = _read_node_labeling(node_labels_path) nx.set_node_attributes(nx_digraph, 'label', node_labels) edge_types = _read_edge_labeling(edge_types_path) parsed_edge_types = _parse_edge(edge_types) nx.set_edge_attributes(nx_digraph, 'type', parsed_edge_types) edge_truth = _read_edge_labeling(edge_truth_path) parsed_edge_truth = _parse_edge(edge_truth) nx.set_edge_attributes(nx_digraph, 'truth', parsed_edge_truth) node_depths = _read_node_labeling(node_depths_path) parsed_node_depths = _parse_node_depth(node_depths) nx.set_node_attributes(nx_digraph, 'depth', parsed_node_depths) assert not isfile(ret_path) assert node_labels and parsed_edge_types and parsed_edge_truth and parsed_node_depths nx.write_gpickle(nx_digraph, ret_path) data.append(ret_path) gg_time = time.time() - start_time graphgen_times.append(gg_time) return pd.DataFrame({'graph_representation': data}, index=vtask_paths), graphgen_times
def main(): if not (len(sys.argv) == 2 and direction in ["forward, backward"]): print("usage: ./gen_graph.py [forward/backward]", file=sys.stderr) sys.exit(1) direction = sys.argv[1] if direction == "forward": f = roundf else: f = inv_roundf n = 65536 g = nx.DiGraph() for x in range(n): for ns, w in f(convert_int(x)): y = convert_states(ns) g.add_edge(x, y, weight=w) print(x) nx.write_gpickle(g, "{}.gpickle".format(direction)) print("Generated {}.gpickle.".format(direction)) nx.reverse(g, copy=False) nx.write_gpickle(g, "rev_{}.gpickle".format(direction)) print("Generated rev_{}.gpickle.".format(direction))
def store_graph(graph,name=None): filename=datetime.datetime.now().strftime('%Y-%m-%d_%H:%M:%S')+"_Network.gpickle" if name != None: filename=name+".gpickle" nx.write_gpickle(graph,filename) print("Finish storing the graph"+" "+filename) return filename
def createScaleFreeNetwork(numOfNodes, degree): ''' numOfNodes: The number of nodes that the scale free network should have degree: The degree of the Scale Free Network This function creates a Scale Free Network containing 'numOfNodes' nodes, each of degree 'degree' It generates the required graph and saves it in a file. It runs the Reinforcement Algorithm to create a weightMatrix and an ordering of the vertices based on their importance by Flagging. ''' global reinforce_time G = nx.barabasi_albert_graph(numOfNodes, degree) #Create a Scale Free Network of the given number of nodes and degree StrMap = {} for node in G.nodes(): StrMap[node] = str(node) G = nx.convert.relabel_nodes(G,StrMap) print "Undergoing Machine Learning..." start = time.time() H = reinforce(G) #Enforce Machine Learning to generate a gml file of the learnt graph. finish = time.time() reinforce_time = finish - start print "Machine Learning Completed..." filename = "SFN_" + str(numOfNodes) + "_" + str(degree) + '.gpickle' nx.write_gpickle(H,filename)#generate a gpickle file of the learnt graph. print "Learnt graph Successfully written into " + filename
def generate_weak_links_map(self): weak_nodes = self.detect_weak_nodes(-5,25) active_weak_nodes = [node[0] for node in weak_nodes if max([l[1] for l in node[1]]) > 10] ap_nodes = [node for node in self.g.nodes() if self.g.in_degree(node) > 0] edges = self.g.edges(active_weak_nodes) snr_g = nx.DiGraph() snr_g.add_nodes_from(active_weak_nodes + ap_nodes) snr_g.add_edges_from(edges) for node in active_weak_nodes: snr_g.node[node]['type'] = 'sta' for node in ap_nodes: snr_g.node[node]['type'] = 'ap' nx.write_gpickle(snr_g,'graph_pickle_connectivity_%d.pkl' % time.time()) #nx.draw(snr_g,with_labels=False) #pylab.savefig("connectivity-graph-%d.png" % (int(time.time()))) d = json_graph.node_link_data(snr_g) # node-link format to serialize # write json json.dump(d, open('force/force.json','w')) print ap_nodes
def correlate_node_by_sync( cells ): global template_ , avg_ for m, n in itertools.combinations( cells.nodes( ), 2 ): vec1, vec2 = cells.node[m]['timeseries'], cells.node[n]['timeseries'] corr = sync_index( vec1, vec2 ) rcorr = sync_index( vec2, vec1 ) if corr > 0.6: cells.add_edge( m, n, weight = corr ) cells.add_edge( n, m, weight = rcorr ) outfile = 'final.png' plt.figure( figsize = (12,8) ) plt.subplot( 2, 2, 1 ) plt.imshow( avg_, interpolation = 'none', aspect = 'auto' ) plt.title( 'All frames averaged' ) plt.colorbar( ) # orientation = 'horizontal' ) syncImg = np.zeros( shape=template_.shape ) syncDict = defaultdict( list ) nx.write_gpickle( cells, 'cells.gpickle' ) logger.info( 'Logging out after writing to graph.' ) return try: nx.drawing.nx_agraph.write_dot( cells, 'all_cell.dot' ) except Exception as e: logger.warn( 'Failed to write dot file %s' % e ) for i, c in enumerate( nx.attracting_components( cells ) ): if len(c) < 2: continue logger.info( 'Found attracting component of length %d' % len(c) ) for p in c: cv2.circle( syncImg, (p[1], p[0]), 2, (i+1), 2 ) # syncDict[str(c)].append( cells.node[p]['timeseries'] ) plt.subplot( 2, 2, 2 ) plt.imshow( timeseries_ , interpolation = 'none', aspect = 'auto', cmap = 'seismic' ) plt.colorbar( ) #orientation = 'horizontal' ) plt.title( 'Activity of each pixal' ) plt.subplot( 2, 2, 3 ) plt.imshow( syncImg, interpolation = 'none', aspect = 'auto' ) plt.colorbar( ) #orientation = 'horizontal' ) # Here we draw the synchronization. plt.subplot( 2, 2, 4 ) # clusters = [] # for c in syncDict: # clusters += syncDict[c] # # Append two empty lines to separate the clusters. # clusters += [ np.zeros( timeseries_.shape[1] ) ] # try: # plt.imshow( np.vstack(clusters), interpolation = 'none', aspect = 'auto' ) # plt.colorbar( ) #orientation = 'horizontal' ) # except Exception as e: # print( "Couldn't plot clusters %s" % e ) plt.tight_layout( ) plt.savefig( outfile ) logger.info( 'Saved to file %s' % outfile )
def _write_networks_to_file(): g1=AnnotatedGraph() g1.load_HPRDNPInteractome() nx.write_gpickle(g1,LINKROOT+"/datasets/HPRDNPInteractome.gPickle") g2=AnnotatedGraph() g2.load_HPRDOnlyInteractome() nx.write_gpickle(g2,LINKROOT+"/datasets/HPRDInteractome.gPickle")
def persist(self): #print "Persisted" prefixes = ["", ".reserve.01", ".reserve.02", ".reserve.03", ".reserve.04", ".reserve.05"] for i in range(len(prefixes)-2, -1, -1): if os.path.exists(self.graph_file + prefixes[i]): os.rename(self.graph_file + prefixes[i], self.graph_file + prefixes[i+1]) nx.write_gpickle(self.nxgraph, self.graph_file + prefixes[0]) # "/home/alexmak/test.pickle")#
def save_data(self): nx.write_gpickle(self.graph, "popitgraph.pickle") f = open("node_color.pickle", "w") pickle.dump(self.colors, f) f.close() f = open("node_label.pickle", "w") pickle.dump(self.labels, f) f.close()
def fragment_graph(self, path, x=3, y=3, mode='normal'): """ Generate fragments of the graph and save them individually in path. mode == 'pixels': x, y are dimensions of fragments otherwise: x, y are number of fragments in resp. axis """ print "Fragmenting." G = nx.connected_component_subgraphs(self.graph)[0] # bounding box xs = [d['x'] for n, d in G.nodes_iter(data=True)] ys = [d['y'] for n, d in G.nodes_iter(data=True)] x_min = min(xs) x_max = max(xs) y_min = min(ys) y_max = max(ys) # equal sized tiles. otherwise x, y mean number of tiles in # respective axis if mode == 'pixels': x = float(x) y = float(y) x_fragments = int((x_max - x_min)/x) y_fragments = int((y_max - y_min)/y) print "Tiling into {}x{} fragments of size {}x{}.".format( x_fragments, y_fragments, x, y) # fragment into pieces fragments = [] for i in xrange(x_fragments): for j in xrange(y_fragments): x0 = x_min + i/float(x_fragments)*(x_max - x_min) x1 = x0 + 1./float(x_fragments)*(x_max - x_min) y0 = y_min + j/float(y_fragments)*(y_max - y_min) y1 = y0 + 1./float(y_fragments)*(y_max - y_min) nodes = [n for n, d in G.nodes_iter(data=True) if d['x'] >= x0 and d['x'] <= x1 and d['y'] >= y0 and d['y'] <= y1] fragments.append(G.subgraph(nodes)) # save fragments as individual graphs if not os.path.exists(path): os.makedirs(path) print "Saving fragments." name, ext = os.path.splitext(os.path.basename(self.fname)) for i, fragment in enumerate(fragments): nx.write_gpickle(fragment, os.path.join(path, name + '_fragment_{}.gpickle'.format(i)))
def write_graph(graph, path): """Given a graph object and a path, save graph to path as gpickle""" nx.write_gpickle(graph, path)
def main(): # Leggo il file pickle dei retweet # Costruisco un grafo con networkx partendo dai dati ottenuti with open( '../TweetOldSerialization/pickle/BiotestamentoGraph/Gennaio/retweetListBlue.pkl', 'rb') as input: retweetListBlue = pickle.load(input) with open( '../TweetOldSerialization/pickle/BiotestamentoGraph/Gennaio/retweetListRed.pkl', 'rb') as input: retweetListRed = pickle.load(input) with open( '../TweetOldSerialization/pickle/BiotestamentoGraph/Gennaio/retweetListYellow.pkl', 'rb') as input: retweetListYellow = pickle.load(input) with open( '../TweetOldSerialization/pickle/BiotestamentoGraph/Gennaio/probRetBlue.pkl', 'rb') as input: probRetBlue = pickle.load(input) with open( '../TweetOldSerialization/pickle/BiotestamentoGraph/Gennaio/probRetRed.pkl', 'rb') as input: probRetRed = pickle.load(input) List = [] for i in retweetListBlue: List.append(i) for i in retweetListRed: List.append(i) DizPesi = {} for i in probRetBlue: if not DizPesi.has_key(i): DizPesi[i] = probRetBlue[i] else: continue for i in probRetRed: if not DizPesi.has_key(i): DizPesi[i] = probRetRed[i] else: continue nodi_Blue = NodeDict(retweetListBlue) nodi_Red = NodeDict(retweetListRed) G = createGraph(List, DizPesi) size_node_degree = [] print "Numero NODI", len(G.nodes) UpdateNode(retweetListYellow, nodi_Blue) UpdateNode(retweetListYellow, nodi_Red) #print(test) posizioneBlue = PosNode(G.nodes(), nodi_Blue) posizioneRed = PosNode(G.nodes(), nodi_Red) dizPosizioneBlue = PosNodeDizionario(G.nodes, nodi_Blue) dizPosizioneRed = PosNodeDizionario(G.nodes, nodi_Red) #dizPosizioneYellow = PosNodeDizionario(G.nodes,nodi_Yellow); #List of Polarization of Elite and Listener firstPolar = setFirstPolarization(G, dizPosizioneBlue, dizPosizioneRed) #print "Passo 0 di polarizzazione ",firstPolar dictFirstPol = {} x = 0 for i in G.nodes(): if not dictFirstPol.has_key(i): dictFirstPol[i] = firstPolar[x] x = x + 1 list = [] for i in G.nodes(): list.append(i) #matrice di adiacenza partendo dalla lista dei nodi mat_attr = nx.attr_matrix(G, rc_order=list) at_array = np.array(mat_attr) newPol = opinionPolarization(G, at_array, firstPolar, list) dictPol = {} x = 0 for i in G.nodes(): if not dictPol.has_key(i): dictPol[i] = newPol[x] x = x + 1 print(len(G.nodes)) #size = float(len(set(partition.values()))) #cambio i colori dei nodi a seconda del loro grado #Polar = Polarization(p_array,posizioneRed,posizioneBlue,len(G.nodes),matriceProbRetweet) #funziona con la partizione node_color = colorNode(G, nodi_Blue, nodi_Red) #node_colorPol= colorNodePol(len(G.nodes()),newPol) testdict = opinionPolarizationDict(G, at_array, firstPolar, list) print("testdict ", testdict) list_lastPol = testdict.get(len(testdict) - 1) #print(list_lastPol) #print(set(testdict[1])) node_colorPol = colorNodePol(len(G.nodes()), list_lastPol) test = {} x = 0 for i in G.nodes(): if not testdict.has_key(i): test[i] = testdict.get(len(testdict) - 1)[x] x = x + 1 for i in range(0, len(testdict)): if i + 1 == (len(testdict) - 1): break print("i", i, "j", i + 1, " simili=", set(testdict[i]) == set(testdict[i + 1])) #labels= labelPolarization(Polar,G,nodi_Blue,nodi_Red) pos = nx.spring_layout(G) #Per la partizione # list_nodes=[] # for com in set(partition.values()): # count = count + 1. # x=0 # for nodes in partition.keys(): # # print "nodes",nodes # if partition[nodes] == com : # list_nodes.append(nodes) #con la partizione #nx.draw_networkx_nodes(G, pos Biotestamento,list_nodes,with_labels=False,node_color=node_color) nx.write_gpickle(G, '../Test/Biotestamento/Gennaio/grafoBiotestVen.pickle', protocol=pickle.HIGHEST_PROTOCOL) with open( '../Test/Biotestamento/Gennaio/dizionarioPolarizzazioneVenezuela.pickle', "wb") as output: pickle.dump(test, output, pickle.HIGHEST_PROTOCOL) with open( '../Test/Biotestamento/Gennaio/listaColoriPolarizzazioneVenezuela.pickle', "wb") as output: pickle.dump(node_colorPol, output, pickle.HIGHEST_PROTOCOL) nx.draw_networkx_nodes(G, pos, G.nodes(), with_labels=True, node_color=node_colorPol) nx.draw_networkx_edges(G, pos, alpha=0.5, edge_color='b') nx.draw_networkx_labels(G, pos, test, font_size=8) plt.savefig("../Test/Biotestamento/Gennaio/PolarizzazioneVene.png", format="PNG") plt.show()
def write_graph(self, outfile, manifest): """Write the graph to a gpickle file. Before doing so, serialize and include all nodes in their corresponding graph entries. """ out_graph = _updated_graph(self.graph, manifest) nx.write_gpickle(out_graph, outfile)
def read_graph(bestedges, maxerr=100, directed=False): logging.debug("Max error = {0}%".format(maxerr)) tag = "dir." if directed else "" bestgraph = bestedges.split(".")[0] + ".err{0}.{1}graph".format( maxerr, tag) if need_update(bestedges, bestgraph): G = {} if directed else nx.Graph() fp = open(bestedges) best_store = {} for row in fp: if row[0] == '#': continue id1, lib_id, best5, o5, best3, o3, j1, j2 = row.split() id1, best5, best3 = int(id1), int(best5), int(best3) j1, j2 = float(j1), float(j2) if j1 <= maxerr or j2 <= maxerr: if not directed: G.add_node(id1) id1p5, id1p3 = "{0}-5'".format(id1), "{0}-3'".format(id1) best5o5 = "{0}-{1}".format(best5, o5) best3o3 = "{0}-{1}".format(best3, o3) best_store[id1p5] = best5o5 best_store[id1p3] = best3o3 if best5 and j1 <= maxerr: if directed: G[id1p5] = best5o5 else: G.add_edge(best5, id1, weight=10) if best3 and j2 <= maxerr: if directed: G[id1p3] = best3o3 else: G.add_edge(id1, best3, weight=10) # Annotate edge weight for mutual best link, note that edge weights are # (11) set close to 10, to minimize impact to layout (Yifan Hu's # multilevel) nmutuals = 0 for k, v in best_store.items(): if best_store.get(v) == k and k < v: k, v = int(k.split("-")[0]), int(v.split("-")[0]) G[k][v]["weight"] = 11 nmutuals += 1 logging.debug("Mutual best edges: {0}".format(nmutuals)) if directed: fw = open(bestgraph, "w") dump(G, fw) fw.close() else: nx.write_gpickle(G, bestgraph) logging.debug("Graph pickled to `{0}`".format(bestgraph)) # Compute node degree histogram and save in (degree, counts) tab file degrees = G.degree() degree_counter = Counter(degrees.values()) degreesfile = "degrees.txt" fw = open(degreesfile, "w") for degree, count in sorted(degree_counter.items()): print("{0}\t{1}".format(degree, count), file=fw) fw.close() logging.debug("Node degree distribution saved to `{0}`".\ format(degreesfile)) # Save high degree (top 1%) nodes in save in (node, degree) tab file percentile = sorted(degrees.values(), reverse=True)[len(degrees) / 1000] logging.debug("Top 0.1% has degree of at least {0}".format(percentile)) hubs = [(k, v) for k, v in degrees.items() if v >= percentile] hubs.sort(key=lambda x: x[1], reverse=True) # degress descending hubsfile = "hubs.txt" fw = open(hubsfile, "w") for node, degree in hubs: print("{0}\t{1}".format(node, degree), file=fw) fw.close() logging.debug("Hubs saved to `{0}`".format(hubsfile)) logging.debug("Read graph from `{0}`".format(bestgraph)) if directed: G = load(open(bestgraph)) else: G = nx.read_gpickle(bestgraph) graph_stats(G) return G
def main(): if not torch.cuda.is_available(): logger.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logger.info('gpu device = %d' % args.gpu) logger.info("args = %s", args) # # load the correct ops dictionary op_dict_to_load = "operations.%s" % args.ops logger.info('loading op dict: ' + str(op_dict_to_load)) op_dict = eval(op_dict_to_load) # load the correct primitives list primitives_to_load = "genotypes.%s" % args.primitives logger.info('loading primitives:' + primitives_to_load) primitives = eval(primitives_to_load) logger.info('primitives: ' + str(primitives)) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() if args.multi_channel: final_path = None if args.final_path is not None: final_path = np.load(args.final_path) genotype = None if args.load_genotype is not None: genotype = getattr(genotypes, args.load_genotype) cnn_model = model_search.MultiChannelNetwork( args.init_channels, CIFAR_CLASSES, layers=args.layers_of_cells, criterion=criterion, steps=args.layers_in_cells, primitives=primitives, op_dict=op_dict, weighting_algorithm=args.weighting_algorithm, genotype=genotype) #save_graph(cnn_model.G, os.path.join(args.save, 'network_graph.pdf')) if args.load_genotype is not None: # TODO(ahundt) support other batch shapes data_shape = [1, 3, 32, 32] batch = torch.zeros(data_shape) cnn_model(batch) logger.info("loaded genotype_raw_weights = " + str(cnn_model.genotype('raw_weights'))) logger.info("loaded genotype_longest_path = " + str(cnn_model.genotype('longest_path'))) logger.info("loaded genotype greedy_path = " + str(gen_greedy_path(cnn_model.G, strategy="top_down"))) logger.info( "loaded genotype greedy_path_bottom_up = " + str(gen_greedy_path(cnn_model.G, strategy="bottom_up"))) # TODO(ahundt) support other layouts else: cnn_model = model_search.Network( args.init_channels, CIFAR_CLASSES, layers=args.layers_of_cells, criterion=criterion, steps=args.layers_in_cells, primitives=primitives, op_dict=op_dict, weights_are_parameters=args.no_architect, C_mid=args.mid_channels, weighting_algorithm=args.weighting_algorithm) cnn_model = cnn_model.cuda() logger.info("param size = %fMB", utils.count_parameters_in_MB(cnn_model)) if args.load: logger.info('loading weights from: ' + args.load) utils.load(cnn_model, args.load) optimizer = torch.optim.SGD(cnn_model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) # Get preprocessing functions (i.e. transforms) to apply on data train_transform, valid_transform = utils.get_data_transforms(args) # Get the training queue, select training and validation from training set train_queue, valid_queue = dataset.get_training_queues( args.dataset, train_transform, valid_transform, args.data, args.batch_size, args.train_portion, search_architecture=True) lr_schedule = cosine_power_annealing( epochs=args.epochs, max_lr=args.learning_rate, min_lr=args.learning_rate_min, warmup_epochs=args.warmup_epochs, exponent_order=args.lr_power_annealing_exponent_order) epochs = np.arange(args.epochs) + args.start_epoch # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( # optimizer, float(args.epochs), eta_min=args.learning_rate_min) if args.no_architect: architect = None else: architect = Architect(cnn_model, args) epoch_stats = [] stats_csv = args.epoch_stats_file stats_csv = stats_csv.replace('.json', '.csv') with tqdm(epochs, dynamic_ncols=True) as prog_epoch: best_valid_acc = 0.0 best_epoch = 0 # state_dict = {} # og_state_keys = set() # updated_state_keys = set() #saving state_dict for debugging weights by comparison # for key in cnn_model.state_dict(): # state_dict[key] = cnn_model.state_dict()[key].clone() # # logger.info('layer = {}'.format(key)) # logger.info('Total keys in state_dict = {}'.format(len(cnn_model.state_dict().keys()))) # og_state_keys.update(cnn_model.state_dict().keys()) best_stats = {} weights_file = os.path.join(args.save, 'weights.pt') for epoch, learning_rate in zip(prog_epoch, lr_schedule): # scheduler.step() # lr = scheduler.get_lr()[0] for param_group in optimizer.param_groups: param_group['lr'] = learning_rate genotype = None if args.final_path is None: genotype = cnn_model.genotype() logger.info('genotype = %s', genotype) if not args.multi_channel: # the genotype is the alphas in the multi-channel case # print the alphas in other cases logger.info('alphas_normal = %s', cnn_model.arch_weights(0)) logger.info('alphas_reduce = %s', cnn_model.arch_weights(1)) # training train_acc, train_obj = train(train_queue, valid_queue, cnn_model, architect, criterion, optimizer, learning_rate) if args.multi_channel and args.final_path is None: # TODO(ahundt) remove final path and switch back to genotype, and save out raw weights plus optimal path optimal_path = nx.algorithms.dag.dag_longest_path(cnn_model.G) optimal_path_filename = os.path.join( args.save, 'longest_path_layer_sequence.npy') logger.info('Saving model layer sequence object: ' + str(optimal_path_filename)) np.save(optimal_path_filename, optimal_path) graph_filename = os.path.join( args.save, 'network_graph_' + str(epoch) + '.graph') logger.info('Saving updated weight graph: ' + str(graph_filename)) nx.write_gpickle(cnn_model.G, graph_filename) logger.info('optimal_path : %s', optimal_path) # validation valid_acc, valid_obj = infer(valid_queue, cnn_model, criterion) if valid_acc > best_valid_acc: # new best epoch, save weights utils.save(cnn_model, weights_file) if args.multi_channel: graph_filename = os.path.join( args.save, 'network_graph_best_valid' + str(epoch) + '.graph') logger.info('Saving updated weight graph: ' + str(graph_filename)) best_epoch = epoch best_valid_acc = valid_acc prog_epoch.set_description( 'Overview ***** best_epoch: {0} best_valid_acc: {1:.2f} ***** Progress' .format(best_epoch, best_valid_acc)) logger.info( 'epoch, %d, train_acc, %f, valid_acc, %f, train_loss, %f, valid_loss, %f, lr, %e, best_epoch, %d, best_valid_acc, %f', epoch, train_acc, valid_acc, train_obj, valid_obj, learning_rate, best_epoch, best_valid_acc) stats = { 'epoch': epoch, 'train_acc': train_acc, 'valid_acc': valid_acc, 'train_loss': train_obj, 'valid_loss': valid_obj, 'lr': learning_rate, 'best_epoch': best_epoch, 'best_valid_acc': best_valid_acc, 'genotype': str(genotype), 'arch_weights': str(cnn_model.arch_weights) } epoch_stats += [copy.deepcopy(stats)] with open(args.epoch_stats_file, 'w') as f: json.dump(epoch_stats, f, cls=utils.NumpyEncoder) utils.list_of_dicts_to_csv(stats_csv, epoch_stats) # print the final model if args.final_path is None: genotype = cnn_model.genotype() logger.info('genotype = %s', genotype) logger.info('Search for Model Complete! Save dir: ' + str(args.save))
def main(): """ Contains majority of expermiment. Runs a markov chain on the state dual graph, determining how the distribution is affected to changes in the state dual graph. Raises: RuntimeError if PROPOSAL_TYPE of config file is neither 'sierpinski' nor 'convex' """ output_directory = createDirectory(config) epsilon = config["epsilon"] k = config["NUM_DISTRICTS"] updaters = { 'population': Tally('population'), 'cut_edges': cut_edges, } graph, dual = preprocessing(config["INPUT_GRAPH_FILENAME"], output_directory) ideal_population = sum(graph.nodes[x]["population"] for x in graph.nodes()) / k faces = graph.graph["faces"] faces = list(faces) square_faces = [face for face in faces if len(face) == 4] totpop = 0 for node in graph.nodes(): totpop += int(graph.nodes[node]['population']) #length of chain steps = config["CHAIN_STEPS"] #length of each gerrychain step gerrychain_steps = config["GERRYCHAIN_STEPS"] #faces that are currently modified. Code maintains list of modified faces, and at each step selects a face. if face is already in list, #the face is un-modified, and if it is not, the face is modified by the specified proposal type. special_faces = set( [face for face in square_faces if np.random.uniform(0, 1) < .5]) chain_output = defaultdict(list) #start with small score to move in right direction print("Choosing", math.floor(len(faces) * config['PERCENT_FACES']), "faces of the dual graph at each step") max_score = -math.inf #this is the main markov chain for i in tqdm.tqdm(range(1, steps + 1), ncols=100, desc="Chain Progress"): special_faces_proposal = copy.deepcopy(special_faces) proposal_graph = copy.deepcopy(graph) if (config["PROPOSAL_TYPE"] == "sierpinski"): for i in range(math.floor(len(faces) * config['PERCENT_FACES'])): face = random.choice(faces) ##Makes the Markov chain lazy -- this just makes the chain aperiodic. if random.random() > .5: if not (face in special_faces_proposal): special_faces_proposal.append(face) else: special_faces_proposal.remove(face) face_sierpinski_mesh(proposal_graph, special_faces_proposal) elif (config["PROPOSAL_TYPE"] == "add_edge"): for j in range( math.floor(len(square_faces) * config['PERCENT_FACES'])): face = random.choice(square_faces) ##Makes the Markov chain lazy -- this just makes the chain aperiodic. if random.random() > .5: if not (face in special_faces_proposal): special_faces_proposal.add(face) else: special_faces_proposal.remove(face) add_edge_proposal(proposal_graph, special_faces_proposal) else: raise RuntimeError( 'PROPOSAL TYPE must be "sierpinski" or "convex"') initial_partition = Partition(proposal_graph, assignment=config['ASSIGN_COL'], updaters=updaters) # Sets up Markov chain popbound = within_percent_of_ideal_population(initial_partition, epsilon) tree_proposal = partial(recom, pop_col=config['POP_COL'], pop_target=ideal_population, epsilon=epsilon, node_repeats=1) #make new function -- this computes the energy of the current map exp_chain = MarkovChain(tree_proposal, Validator([popbound]), accept=accept.always_accept, initial_state=initial_partition, total_steps=gerrychain_steps) seats_won_for_republicans = [] seats_won_for_democrats = [] for part in exp_chain: rep_seats_won = 0 dem_seats_won = 0 for j in range(k): rep_votes = 0 dem_votes = 0 for n in graph.nodes(): if part.assignment[n] == j: rep_votes += graph.nodes[n]["EL16G_PR_R"] dem_votes += graph.nodes[n]["EL16G_PR_D"] total_seats_dem = int(dem_votes > rep_votes) total_seats_rep = int(rep_votes > dem_votes) rep_seats_won += total_seats_rep dem_seats_won += total_seats_dem seats_won_for_republicans.append(rep_seats_won) seats_won_for_democrats.append(dem_seats_won) seat_score = statistics.mean(seats_won_for_republicans) #implement modified mattingly simulated annealing scheme, from evaluating partisan gerrymandering in wisconsin if i <= math.floor(steps * .67): beta = i / math.floor(steps * .67) else: beta = (i / math.floor(steps * .67)) * 100 temperature = 1 / (beta) weight_seats = 1 weight_flips = -.2 config['PERCENT_FACES'] = config['PERCENT_FACES'] flip_score = len( special_faces) # This is the number of edges being swapped score = weight_seats * seat_score + weight_flips * flip_score ##This is the acceptance step of the Metropolis-Hasting's algorithm. Specifically, rand < min(1, P(x')/P(x)), where P is the energy and x' is proposed state #if the acceptance criteria is met or if it is the first step of the chain def update_outputs(): chain_output['dem_seat_data'].append(seats_won_for_democrats) chain_output['rep_seat_data'].append(seats_won_for_republicans) chain_output['score'].append(score) chain_output['seat_score'].append(seat_score) chain_output['flip_score'].append(flip_score) def propagate_outputs(): for key in chain_output.keys(): chain_output[key].append(chain_output[key][-1]) if i == 1: update_outputs() special_faces = copy.deepcopy(special_faces_proposal) #this is the simplified form of the acceptance criteria, for intuitive purposes #exp((1/temperature) ( proposal_score - previous_score)) elif np.random.uniform(0, 1) < (math.exp(score) / math.exp( chain_output['score'][-1]))**(1 / temperature): update_outputs() special_faces = copy.deepcopy(special_faces_proposal) else: propagate_outputs() #if score is highest seen, save map. if score > max_score: #todo: all graph coloring for graph changes that produced this score nx.write_gpickle(proposal_graph, output_directory + '/' + "max_score", pickle.HIGHEST_PROTOCOL) f = open(output_directory + "/max_score_data.txt", "w+") f.write("maximum score: " + str(score) + "\n" + "edges changed: " + str(len(special_faces)) + "\n" + "Seat Score: " + str(seat_score)) save_obj(special_faces, output_directory + '/', "special_faces") max_score = score plt.plot(range(len(chain_output['score'])), chain_output['score']) plt.xlabel("Meta-Chain Step") plt.ylabel("Score") plot_name = output_directory + '/' + 'score' + '.png' plt.savefig(plot_name) ## Todo: Add scatter plot of the seat_score and flip_score here. save_obj(chain_output, output_directory, "chain_output")
tres = grp.create_dataset("t", (1, ), maxshape=(None, ), dtype=float) # km0=4*2**(1/4) # u0[:,0]=np.exp(-np.linalg.norm(k-k[int(k.shape[0]/2)],axis=0)**2/4**2)*np.exp(1j*np.pi*np.random.random(N)) # u0[:,1]=np.exp(-np.linalg.norm(k-k[int(k.shape[0]/2)],axis=0)**2/4**2)*np.exp(1j*np.pi*np.random.random(N)) # u0=np.sqrt(6*np.sqrt(2/np.pi)*km0**(-5)*np.abs(k)**4*np.exp(-2*(np.abs(k)/km0)**2))*np.exp(1j*np.pi*np.random.random(Nh)) if (save_network): gr = nx.Graph() strs = [np.str(l) for l in trs] gr.add_nodes_from(kn, bipartite=0) gr.add_nodes_from(strs, bipartite=1) for l in range(len(trs)): gr.add_edges_from([(kn[trs[l][0]], strs[l]), (kn[trs[l][1]], strs[l]), (kn[trs[l][2]], strs[l])]) nx.write_gpickle(gr, 'nwfile.pkl') r = spi.RK45(func, t0, u0.ravel().view(dtype=float), t1, max_step=dt) epst = 1e-12 ct = time.time() if (random_forcing == True): force_update() #dtff,dtf,dts,dtss=np.sort((dt,dtr,dtrw,dtout)) toldr = -1.0e12 toldrw = -1.0e12 toldout = -1.0e12 while (r.status == 'running'): told = r.t if (r.t >= toldout + dtout - epst and r.status == 'running'): toldout = r.t
def cd_cluster_evolution_graph( config, source_folder, snaphot_mapping_folder, subseqitem_mapping_folder, target_folder, regulations, ): config_clustering_files, snapshots = get_config_clustering_files( config, source_folder) first = True B = nx.DiGraph() prev_community_id_for_rolled_down = None prev_preprocessed_mappings = None prev_snapshot = None for config_clustering_file, snapshot in zip(config_clustering_files, snapshots): # Add nodes to graph clustering = readwrite.read_community_json( os.path.join(source_folder, config_clustering_file)) with open( os.path.join( subseqitem_mapping_folder, f'{snapshot}_{config["pp_merge"]}.pickle', ), "rb", ) as f: preprocessed_mappings = pickle.load(f) counters_dict = get_cluster_law_names_counting_seqitems( preprocessed_mappings, clustering.communities) most_common_dict = { k: ",".join( [f"{elem_k},{count}" for elem_k, count in v.most_common()]) for k, v in counters_dict.items() } chars_n_dict = get_community_sizes( clustering.communities, preprocessed_mappings["chars_n"], ) tokens_n_dict = get_community_sizes(clustering.communities, preprocessed_mappings["tokens_n"]) for community_key, community_nodes in enumerate( clustering.communities): community_nodes_sorted = sorted( community_nodes, key=lambda n: preprocessed_mappings["tokens_n"].get(n, 0), reverse=True, ) for n in community_nodes_sorted: assert "," not in n B.add_node( f"{snapshot}_{community_key}", bipartite=snapshot, chars_n=chars_n_dict[community_key], tokens_n=tokens_n_dict[community_key], law_names=most_common_dict[community_key], nodes_contained=",".join(community_nodes_sorted), ) communities_rolled_down = [[ n for rolled_up_node in community_nodes for n in preprocessed_mappings["items_mapping"][rolled_up_node] ] for community_nodes in clustering.communities] community_id_for_rolled_down = { n: community_id for community_id, nodes in enumerate(communities_rolled_down) for n in nodes } if not first: with open( os.path.join(snaphot_mapping_folder, f"{prev_snapshot}_{snapshot}.json")) as f: mapping = json.load(f) # draw edges edges_tokens_n = defaultdict(int) edges_chars_n = defaultdict(int) for prev_leaf_and_text_idx, leaf_and_text_idx in mapping.items(): prev_leaf, prev_text_idx = prev_leaf_and_text_idx.rsplit( "_", 1) leaf, text_idx = leaf_and_text_idx.rsplit("_", 1) text_idx = int(text_idx) try: prev_community_id = prev_community_id_for_rolled_down[ prev_leaf] except KeyError as err: report_mapping_error( err, prev_preprocessed_mappings["tokens_n"]) continue try: community_id = community_id_for_rolled_down[leaf] except KeyError as err: report_mapping_error(err, preprocessed_mappings["tokens_n"]) continue prev_community_name = f"{prev_snapshot}_{prev_community_id}" community_name = f"{snapshot}_{community_id}" edge = (prev_community_name, community_name) if leaf in preprocessed_mappings["texts_tokens_n"]: texts_tokens_n = preprocessed_mappings["texts_tokens_n"][ leaf] texts_chars_n = preprocessed_mappings["texts_chars_n"][ leaf] tokens_n = texts_tokens_n[text_idx] chars_n = texts_chars_n[text_idx] else: assert text_idx == 0 tokens_n = preprocessed_mappings["tokens_n"][leaf] chars_n = preprocessed_mappings["chars_n"][leaf] # Use the tokens_n and chars_n values of the later year edges_tokens_n[edge] += tokens_n edges_chars_n[edge] += chars_n B.add_edges_from(edges_tokens_n.keys()) nx.set_edge_attributes(B, edges_tokens_n, "tokens_n") nx.set_edge_attributes(B, edges_chars_n, "chars_n") first = False prev_snapshot = snapshot prev_community_id_for_rolled_down = community_id_for_rolled_down prev_preprocessed_mappings = preprocessed_mappings nx.write_gpickle( B, f"{target_folder}/" f'{filename_for_pp_config(snapshot="all", **config, file_ext=".gpickle.gz")}', ) # Write families families = cluster_families(B, threshold=0.15) path = ( f"{target_folder}/" f'{filename_for_pp_config(snapshot="all", **config, file_ext=".families.json")}' ) with open(path, "w") as f: json.dump(families, f)
def save_graph(self, path): nx.write_gpickle(self.g, path)
parser_init = argparse.ArgumentParser() parser_init.add_argument("--input_graph", help="Graph in gpickle format.") parser_init.add_argument("--percentile", help="Degree percentile.") parser_init.add_argument("--step_size", help="Neighbourhood size.") parser_init.add_argument( "--heuristic", help= "possible options: degree, pagerank_numpy, pagerank_scipy, katz, eigenvector_centrality_numpy, flow_betweenness, communicability, pagerank_scipy" ) parser_init.add_argument("--ontology_id", help="dataset.") parser_init.add_argument("--make_samples", help="dataset.") parser_init.add_argument("--output_graph", help="dataset.") parsed = parser_init.parse_args() G = nx.read_gpickle(parsed.input_graph) if parsed.output_graph: nx.write_gpickle(result_graph, "graph_datasets/" + job_id + ".gpickle") outgraph2 = g2o(G, parsed.percentile, parsed.step_size, parsed.heuristic) if parsed.ontology_id: rdfpart = rm.rdfconverter(outgraph2, "query") ## query is the folder with lists if parsed.make_samples: rdfpart.return_target_n3("samples/" + parsed.ontology_id) ## target folder otype = parsed.ontology_id.split(".")[1] rdfpart.return_background_knowledge("BK/autogen" + parsed.ontology_id, otype)
def log_graph( graph, outdir, filename, identify_self=False, nodecolor="tag", fig_size=(4, 3), dpi=300, label_node_feat=True, edge_vmax=None, args=None, eps=1e-6, ): """ Args: nodecolor: the color of node, can be determined by 'label', or 'feat'. For feat, it needs to be one-hot' """ if len(graph.edges) == 0: return import matplotlib.pyplot as plt plt.switch_backend("agg") cmap = plt.get_cmap("tab20") plt.switch_backend("agg") fig = plt.figure(figsize=fig_size, dpi=dpi) node_colors = [] # edge_colors = [min(max(w, 0.0), 1.0) for (u,v,w) in Gc.edges.data('weight', default=1)] edge_colors = [w for (u, v, w) in graph.edges.data("weight", default=1)] # maximum value for node color vmax = 19 # for i in graph.nodes(): # if nodecolor == "feat" and "feat" in graph.nodes[i]: # num_classes = graph.nodes[i]["feat"].size()[0] # if num_classes >= 10: # cmap = plt.get_cmap("tab20") # vmax = 19 # elif num_classes >= 8: # cmap = plt.get_cmap("tab10") # vmax = 9 # break feat_labels = {} for i in graph.nodes(): if identify_self and "self" in graph.nodes[i]: node_colors.append(0) elif nodecolor == "tag" and "tag" in graph.nodes[i]: node_colors.append(graph.nodes[i]["tag"]) feat_labels[i] = graph.nodes[i]["tag"] elif nodecolor == "feat" and "feat" in Gc.nodes[i]: # print(Gc.nodes[i]['feat']) feat = graph.nodes[i]["feat"].detach().numpy() # idx with pos val in 1D array feat_class = 0 for j in range(len(feat)): if feat[j] == 1: feat_class = j break node_colors.append(feat_class) feat_labels[i] = feat_class else: node_colors.append(1) if not label_node_feat: feat_labels = None plt.switch_backend("agg") fig = plt.figure(figsize=fig_size, dpi=dpi) if graph.number_of_nodes() == 0: raise Exception("empty graph") if graph.number_of_edges() == 0: raise Exception("empty edge") # remove_nodes = [] if len(graph.nodes) > 20: pos_layout = nx.kamada_kawai_layout(graph, weight=None) # pos_layout = nx.spring_layout(graph, weight=None) else: pos_layout = nx.kamada_kawai_layout(graph, weight=None) weights = [d for (u, v, d) in graph.edges(data="weight", default=1)] if edge_vmax is None: edge_vmax = statistics.median_high( [d for (u, v, d) in graph.edges(data="weight", default=1)]) min_color = min([d for (u, v, d) in graph.edges(data="weight", default=1)]) # color range: gray to black edge_vmin = 2 * min_color - edge_vmax print(edge_vmin) print(edge_vmax) print(edge_colors) nx.draw( graph, pos=pos_layout, with_labels=False, font_size=4, labels=feat_labels, node_color=node_colors, vmin=0, vmax=vmax, cmap=cmap, edge_color=edge_colors, edge_cmap=plt.get_cmap("Greys"), edge_vmin=edge_vmin - eps, edge_vmax=edge_vmax, width=1.3, node_size=100, alpha=0.9, ) fig.axes[0].xaxis.set_visible(False) fig.canvas.draw() save_path = os.path.join(outdir, filename) os.makedirs(os.path.dirname(save_path), exist_ok=True) nx.write_gpickle(graph, os.path.splitext(save_path)[0] + '.gpickle') plt.savefig(save_path, format="pdf")
def main(): parser = argparse.ArgumentParser( description= 'maps a given document-author-contribution file to a weighted bipartite network of document and author nodes' ) parser.add_argument( '--contribs', type=argparse.FileType('r'), help='path to input contribution MatrixMarket file (.mm/.mm.bz2)', required=True) parser.add_argument('--bipart-graph', type=argparse.FileType('w'), help='path to output graph (.graph/.graph.bz2) file', required=True) parser.add_argument('--top-n-contribs', type=int, help='keep at most N highest contribs per author', required=True) args = parser.parse_args() input_contribs_path = args.contribs.name output_bipart_graph_path = args.bipart_graph.name top_n_contribs = args.top_n_contribs logger.info('running with:\n{}'.format( pformat({ 'input_contribs_path': input_contribs_path, 'output_bipart_graph_path': output_bipart_graph_path, 'top_n_contribs': top_n_contribs }))) # lade gespeicherte Beiträge contribs = MmCorpus(input_contribs_path) num_docs = contribs.num_docs num_authors = contribs.num_terms logger.info('processing contributions of {} documents, {} authors'.format( num_docs, num_authors)) # erzeuge bipartites Affiliationsnetzwerk: enthält Dokumente & Autoren als Knoten, Dokument-Autor-Beiträge ergeben entsprechende gewichtete Kanten bipart_graph = nx.Graph() doc_nodes = tuple('d' + str(n) for n in range(0, num_docs)) bipart_graph.add_nodes_from(doc_nodes, bipartite=0) auth_nodes = tuple('a' + str(n) for n in range(0, num_authors)) bipart_graph.add_nodes_from(auth_nodes, bipartite=1) bipart_graph.add_weighted_edges_from(get_edges_from_contribs(contribs), weight='weight') log_nwx(bipart_graph) logger.info('bipartite? {}'.format(bipartite.is_bipartite(bipart_graph))) simplify_graph_nwx(bipart_graph) logger.info( 'actual numbers after simplifying: {} docs, {} authors, {} edges'. format(*get_bipartite_node_counts(bipart_graph), len(bipart_graph.edges))) # gib höchsten Knotengrad eines Autoren aus max_degree_author = max(bipart_graph.degree(auth_nodes), key=lambda node_deg: node_deg[1]) logger.info('author {} having max degree of {}'.format(*max_degree_author)) # aktalisiere variablen doc_nodes, auth_nodes = get_bipartite_nodes(bipart_graph) # prune die Anzahl aller inzidenten Kanten von Autoren jeweils auf die K Kanten mit den größten Gewichten logger.info('pruning to top {} edges per author'.format(top_n_contribs)) for auth_node in auth_nodes: logger.debug('author {}'.format(auth_node)) auth_edges = bipart_graph[auth_node] auth_edges = tuple((neighbor, weight['weight']) for neighbor, weight in auth_edges.items()) logger.debug('incident edges \n{}'.format(pformat(auth_edges))) num_remove = len(auth_edges) - top_n_contribs author_min_edges = nsmallest(num_remove, auth_edges, key=lambda edge: edge[1]) logger.debug('removing edges \n{}'.format(pformat(author_min_edges))) bipart_graph.remove_edges_from( (auth_node, neighbor) for neighbor, weight in author_min_edges) # keep_max_edges = 10000 # logger.info('pruning to {} highest edges'.format(keep_max_edges)) # num_edges_to_remove = len(bipart_graph.edges) - keep_max_edges # min_edges = nsmallest(num_edges_to_remove, bipart_graph.edges(data='weight'), key=lambda edge: edge[2]) # bipart_graph.remove_edges_from(min_edges) # log_nwx(bipart_graph) # gib höchsten Knotengrad eines Autoren aus max_degree_author = max(bipart_graph.degree(auth_nodes), key=lambda node_deg: node_deg[1]) logger.info('author {} having max degree of {}'.format(*max_degree_author)) # entferne isolierte Knoten simplify_graph_nwx(bipart_graph) log_nwx(bipart_graph) logger.info('new number of documents {}, authors {}'.format( *get_bipartite_node_counts(bipart_graph))) # speichere Affiliationsnetzwerk logger.info('writing graph to {}'.format(output_bipart_graph_path)) nx.write_gpickle(bipart_graph, output_bipart_graph_path)
mng = plt.get_current_fig_manager() mng.resize(*mng.window.maxsize()) plt.show() if len(pk.selected) > 0: selected(pk.data) else: selected(pk.all) if DG.number_of_nodes() > 0: fn = filter(lambda n: n.name == 0, DG.nodes())[0] ln = filter(lambda n: n.name == len(DG.nodes()) - 1, DG.nodes())[0] DG.add_weighted_edges_from([(ln, fn, ln.dist(fn))]) nx.write_gpickle(DG, "data/mission.pkl") else: DG = nx.read_gpickle("data/mission.pkl") for idx, node in enumerate(sorted(DG.nodes(), key=lambda n: n.name)): nx.draw(DG, dict((n, n.pos) for n in DG.nodes()), node_color=["g" if n.name == idx else "y" for n in DG.nodes()]) plt.show(block=False) node.speed_limit = int(input("Insert speed limit: ")) plt.close() # nx.draw(DG, dict((n, n.pos) for n in DG.nodes()), node_color=["g" if False else "y" for n in DG.nodes()]) # plt.show(block=True) nx.write_gpickle(DG, "data/mission.pkl")
def save_graph(self, graph_path): nx.write_gpickle(self.graph, graph_path)
seed_species=exp_data.species.sig.id_list, # genes seed species all_measured_list=exp_data.species.id_list, # all data measured use_biogrid=True, # expand with biogrid use_hmdb=True, # expand with hmdb use_reactome=True, # expand with reactome use_signor=True, # expand with signor trim_source_sink=True, # remove all source and sink nodes not measured save_name='Data/cisplatin_based_network_new' ) # Load the network, note that it is returned above but for future use # we will use load in network = nx.read_gpickle('Data/cisplatin_based_network.p') utils.add_data_to_graph(network, exp_data) print("Saving network") # write to GML for cytoscape or other program nx.write_gml( network, os.path.join(os.path.dirname(__file__), 'Data', 'cisplatin_network_w_attributes.gml') ) # write to gpickle for fast loading in python nx.write_gpickle( network, os.path.join(os.path.dirname(__file__), 'Data', 'cisplatin_based_network.p'), )
def community(): try: n = getInteger('participants') #initial participants m = getInteger('proposals') #initial proposals initial_sentiment = getFloat('initial_sentiment') except Exception as err: return str(err), 422 plot_name = str(n) + str(m) #initializer network, initial_supply, total_requested = initialize_network(n, m) proposals = get_nodes_by_type(network, 'proposal') participants = get_nodes_by_type(network, 'participant') supporters = get_edges_by_type(network, 'support') influencers = get_edges_by_type(network, 'influence') competitors = get_edges_by_type(network, 'conflict') nx.draw_kamada_kawai(network, nodelist=participants, edgelist=influencers) plt.title('Participants Social Network') plt.savefig('static/plot3-' + plot_name + '.png') plt.clf() nx.draw_kamada_kawai(network, nodelist=proposals, edgelist=competitors, node_color='b') plt.title('Proposals Conflict Network') plt.savefig('static/plot4-' + plot_name + '.png') plt.clf() plt.hist([network.nodes[i]['holdings'] for i in participants]) plt.title('Histogram of Participants Token Holdings') plt.savefig('static/plot5-' + plot_name + '.png') plt.clf() plt.hist([network.nodes[i]['funds_requested'] for i in proposals]) plt.title('Histogram of Proposals Funds Requested') plt.savefig('static/plot6-' + plot_name + '.png') plt.clf() affinities = np.empty((n, m)) for i_ind in range(n): for j_ind in range(m): i = participants[i_ind] j = proposals[j_ind] affinities[i_ind][j_ind] = network.edges[(i, j)]['affinity'] dims = (20, 5) fig, ax = plt.subplots(figsize=dims) sns.heatmap(affinities.T, xticklabels=participants, yticklabels=proposals, square=True, cbar=True, ax=ax) plt.title('affinities between participants and proposals') plt.ylabel('proposal_id') plt.xlabel('participant_id') plt.savefig('static/plot7-' + plot_name + '.png') plt.clf() nx.write_gpickle(network, 'static/network.gpickle') return jsonify({ # inputs 'participants': m, 'proposals': n, 'initial_sentiment': initial_sentiment, # outputs 'initial_supply': initial_supply, 'results': [ 'plot3-' + plot_name + '.png', 'plot4-' + plot_name + '.png', 'plot5-' + plot_name + '.png', 'plot6-' + plot_name + '.png', 'plot7-' + plot_name + '.png', ], 'network': jsonifyNetwork(network) })
def graph_path(graph, tmpdir): gpath = tmpdir / "graph.pkl" nx.write_gpickle(graph, str(gpath)) yield gpath
return G def transformWord(graph, start, goal): paths=collections.deque([ [start] ]) extended=set() while len(paths)!=0: currentPath=paths.popleft() currentWord=currentPath[-1] if currentWord==goal: return currentPath elif currentWord in extended: continue extended.add(currentWord) transforms=graph[currentWord] for word in transforms: if word not in currentPath: #avoid loops paths.append(currentPath[:]+[word]) #no transformation return [] print("First step") dictionary = words2.dictionary graph = constructGraph(dictionary) print("second step") nx.write_gpickle(graph,"test2.gpickle") print("third step") print(transformWord(graph , 'time' , 'space'))
names=["id", "module"]) modules = list(cluster_df.groupby('module')) num_modules = len(modules) - 1 G.graph['modules'] = [[] for i in range(num_modules)] G.graph['edges'] = [[] for i in range(num_modules)] G.graph['size'] = [0] * num_modules for i in range(0, num_modules): module_num = int(modules[i][0]) ids = modules[i][1]['id'] for n in ids: if n in G: G.node[n]['module'] = module_num G.graph['modules'][module_num].append(n) G.graph['size'][module_num] = len(G.graph['modules'][module_num]) for i in range(0, num_modules): mG.add_node(i, size=len(G.graph['modules'][i]), genes=G.graph['modules'][module_num]) for i in range(0, num_modules): for j in range(i + 1, num_modules): cut_size = nx.algorithms.cuts.cut_size(G, G.graph['modules'][i], G.graph['modules'][j]) if cut_size > 0: cut_size = cut_size / ( (G.graph['size'][i] + G.graph['size'][j]) / 2) mG.add_edge(i, j, weight=cut_size) nx.write_gpickle(mG, parsed.opickle)
def plot_save(self, G): utils.simple_plot(G, kys=['ord'], save='./data/img/{}.png'.format(G.name)) nx.write_gpickle(G, './data/pkl/{}.pickle'.format(G.name))
def construct_graph(cpnet_csv_path, cpnet_vocab_path, output_path, prob = 0, prune=False): print('generating ConceptNet graph file...') nltk.download('stopwords', quiet=True) nltk_stopwords = nltk.corpus.stopwords.words('english') nltk_stopwords += ["like", "gone", "did", "going", "would", "could", "get", "in", "up", "may", "wanter"] # issue: mismatch with the stop words in grouding.py blacklist = set(["uk", "us", "take", "make", "object", "person", "people"]) # issue: mismatch with the blacklist in grouding.py concept2id = {} id2concept = {} with open(cpnet_vocab_path, "r", encoding="utf8") as fin: id2concept = [w.strip() for w in fin] concept2id = {w: i for i, w in enumerate(id2concept)} id2relation = merged_relations relation2id = {r: i for i, r in enumerate(id2relation)} # del_cpts = random.sample(range(780000), prob) # del_cpts_dict = np.zeros((800000,)) # del_cpts_dict[del_cpts] = 1 graph = nx.MultiDiGraph() nrow = sum(1 for _ in open(cpnet_csv_path, 'r', encoding='utf-8')) with open(cpnet_csv_path, "r", encoding="utf8") as fin: def not_save(cpt): # if cpt in blacklist or del_cpts_dict[concept2id[cpt]] == 1: if cpt in blacklist: return True '''originally phrases like "branch out" would not be kept in the graph''' # for t in cpt.split("_"): # if t in nltk_stopwords: # return True return False attrs = set() i = 0 for line in tqdm(fin, total=nrow): ls = line.strip().split('\t') rel = relation2id[ls[0]] subj = concept2id[ls[1]] obj = concept2id[ls[2]] weight = float(ls[3]) if prune and (not_save(ls[1]) or not_save(ls[2]) or id2relation[rel] == "hascontext"): continue # if id2relation[rel] == "relatedto" or id2relation[rel] == "antonym": # weight -= 0.3 # continue if subj == obj: # delete loops continue # weight = 1 + float(math.exp(1 - weight)) # issue: ??? # if prune and i<num_changes: # p = random.random() # if p<0.5: # rel = random.choice(list(range(len(relation2id)))) # i = i+1 if (subj, obj, rel) not in attrs: # p = random.random() # if p<prob: # i = i+1 # continue # rel = random.choice(list(range(len(relation2id)))) graph.add_edge(subj, obj, rel=rel, weight=weight) attrs.add((subj, obj, rel)) graph.add_edge(obj, subj, rel=(rel + len(relation2id)), weight=weight) attrs.add((obj, subj, (rel + len(relation2id)))) print(i, " perturbations done") nx.write_gpickle(graph, output_path) print(f"graph file saved to {output_path}") print()
def kfold_validation(self, k=10): available_ram = psutil.virtual_memory()[1] available_ram = int(int(available_ram) * .9 * 1e-9) if available_ram > 5: jvm.start(max_heap_size='5g') else: jvm.start(max_heap_size=str(available_ram)+'g') jvm.start() ### print('\nCaricando '+self.input_file+' con opts -f'+str(self.features_number)+' -c'+self.classifier_name+'\n') # load .arff file dataset = arff.load(open(self.input_file, 'r')) #data = np.array(dataset['data'], dtype=object) data = np.array(dataset['data']) self.features_names = [x[0] for x in dataset['attributes']] self.attributes_number = data.shape[1] self.dataset_features_number = self.attributes_number - self.levels_number # Factorization of Nominal features_index features_encoder = OrdinalEncoder() nominal_features_index = [i for i in range(len(dataset['attributes'][:-self.levels_number])) if dataset['attributes'][i][1] != u'NUMERIC'] if len(nominal_features_index) > 0: data[:, nominal_features_index] = features_encoder.fit_transform(data[:, nominal_features_index]) self.labels_encoders = [] for i in range(self.levels_number): self.labels_encoders.append(LabelEncoder()) self.labels_encoders[-1].fit(data[:, self.dataset_features_number + i]) classifiers_per_fold = [] oracles_per_fold = [] predictions_per_fold = [] probabilities_per_fold = [] predictions_per_fold_all = [] print('\n***\nStart testing with '+str(k)+'Fold cross-validation -f'+str(self.features_number)+' -c'+self.classifier_name+'\n***\n') skf = StratifiedKFold(n_splits=k, shuffle=True) fold_cnt = 1 #for train_index, test_index in skf.split(data, np.array(data[:,self.attributes_number-1], dtype=int)): for train_index, test_index in skf.split(data, data[:,self.attributes_number-1]): print(fold_cnt) fold_cnt += 1 self.classifiers = [] self.training_set = data[train_index, :self.dataset_features_number] self.testing_set = data[test_index, :self.dataset_features_number] self.ground_truth = data[train_index, self.dataset_features_number:] self.oracle = data[test_index, self.dataset_features_number:] self.prediction = np.ndarray(shape=[len(test_index),self.levels_number],dtype='<U24') # Hard Output self.probability = np.ndarray(shape=[len(test_index),self.levels_number],dtype=object) # Soft Output self.prediction_all = np.ndarray(shape=[len(test_index),self.levels_number],dtype='<U24') root = Tree() root.train_index = [i for i in range(self.training_set.shape[0])] root.test_index = [i for i in range(self.testing_set.shape[0])] root.test_index_all = root.test_index root.children_tags = list(set(self.ground_truth[root.train_index, root.level])) root.children_number = len(root.children_tags) root.encoder = LabelEncoder() root.encoder.fit(self.ground_truth[root.train_index, root.level]) if self.has_config and root.tag + '_' + str(root.level + 1) in self.config: if 'f' in self.config[root.tag + '_' + str(root.level + 1)]: root.features_number = self.config[root.tag + '_' + str(root.level + 1)]['f'] elif 'p' in self.config[root.tag + '_' + str(root.level + 1)]: root.packets_number = self.config[root.tag + '_' + str(root.level + 1)]['p'] root.classifier_name = self.config[root.tag + '_' + str(root.level + 1)]['c'] print('\nconfig','tag',root.tag,'level',root.level,'f',root.features_number,'c',root.classifier_name,'train_test_len',len(root.train_index),len(root.test_index)) else: root.features_number = self.features_number root.packets_number = self.packets_number root.classifier_name = self.classifier_name print('\nconfig','tag',root.tag,'level',root.level,'f',root.features_number,'c',root.classifier_name,'train_test_len',len(root.train_index),len(root.test_index)) self.classifiers.append(root) if root.children_number > 1: classifier_to_call = getattr(self, supported_classifiers[root.classifier_name]) classifier_to_call(node=root) else: self.unary_class_results_inferring(root) # Creating hierarchy recursively if root.level < self.levels_number-1 and root.children_number > 0: self.recursive(root) classifiers_per_fold.append(self.classifiers) oracles_per_fold.append(self.oracle) predictions_per_fold.append(self.prediction) probabilities_per_fold.append(self.probability) predictions_per_fold_all.append(self.prediction_all) folder_discr = self.classifier_name if self.has_config: folder_discr = self.config_name material_folder = './data_'+folder_discr+'/material/' if not os.path.exists('./data_'+folder_discr): os.makedirs('./data_'+folder_discr) os.makedirs(material_folder) elif not os.path.exists(material_folder): os.makedirs(material_folder) type_discr = 'flow' feat_discr = '_f_' + str(self.features_number) work_discr = '_w_' + str(self.workers_number) if not self.has_config and self.packets_number != 0: type_discr = 'early' feat_discr = '_p_' + str(self.packets_number) elif self.has_config: if 'p' in self.config: type_discr = 'early' feat_discr = '_c_' + self.config_name if self.has_config and self.classifier_name: if self.features_number != 0: feat_discr = '_f_' + str(self.features_number) + feat_discr + '_' + self.classifier_name if self.packets_number != 0: feat_discr = '_p_' + str(self.packets_number) + feat_discr + '_' + self.classifier_name material_features_folder = './data_'+folder_discr+'/material/features/' material_train_durations_folder = './data_'+folder_discr+'/material/train_durations/' if not os.path.exists(material_folder): os.makedirs(material_folder) os.makedirs(material_features_folder) os.makedirs(material_train_durations_folder) if not os.path.exists(material_features_folder): os.makedirs(material_features_folder) if not os.path.exists(material_train_durations_folder): os.makedirs(material_train_durations_folder) for i in range(self.levels_number): file = open(material_folder + 'multi_' + type_discr + '_level_' + str(i+1) + work_discr + feat_discr + '.dat', 'w+') file.close() for j in range(k): file = open(material_folder + 'multi_' + type_discr + '_level_' + str(i+1) + work_discr + feat_discr + '.dat', 'a') file.write('@fold\n') for o, p in zip(oracles_per_fold[j][:,i], predictions_per_fold[j][:,i]): file.write(str(o)+' '+str(p)+'\n') file.close() # Inferring NW metrics per classifier for classifier in classifiers_per_fold[0]: file = open(material_folder + 'multi_' + type_discr + '_level_' + str(classifier.level+1) + work_discr + feat_discr + '_tag_' + str(classifier.tag) + '.dat', 'w+') file.close() file = open(material_folder + 'multi_' + type_discr + '_level_' + str(classifier.level+1) + work_discr + feat_discr + '_tag_' + str(classifier.tag) + '_all.dat', 'w+') file.close() file = open(material_features_folder + 'multi_' + type_discr + '_level_' + str(classifier.level+1) + work_discr + feat_discr + '_tag_' + str(classifier.tag) + '_features.dat', 'w+') file.close() file = open(material_train_durations_folder + 'multi_' + type_discr + '_level_' + str(classifier.level+1) + work_discr + feat_discr + '_tag_' + str(classifier.tag) + '_test_durations.dat', 'w+') file.close() file = open(material_train_durations_folder + 'multi_' + type_discr + work_discr + feat_discr + '_test_durations.dat', 'w+') file.close() for fold_n, classifiers in enumerate(classifiers_per_fold): for classifier in classifiers: file = open(material_folder + 'multi_' + type_discr + '_level_' + str(classifier.level+1) + work_discr + feat_discr + '_tag_' + str(classifier.tag) + '.dat', 'a') if classifier.level > 0: index = [] for pred_n, prediction in enumerate(predictions_per_fold[fold_n][classifier.test_index, classifier.level-1]): if prediction == oracles_per_fold[fold_n][classifier.test_index[pred_n], classifier.level-1]: index.append(classifier.test_index[pred_n]) prediction_nw = predictions_per_fold[fold_n][index, classifier.level] oracle_nw = oracles_per_fold[fold_n][index, classifier.level] else: prediction_nw = predictions_per_fold[fold_n][classifier.test_index, classifier.level] oracle_nw = oracles_per_fold[fold_n][classifier.test_index, classifier.level] file.write('@fold\n') for o, p in zip(oracle_nw, prediction_nw): file.write(str(o)+' '+str(p)+'\n') file.close() file = open(material_folder + 'multi_' + type_discr + '_level_' + str(classifier.level+1) + work_discr + feat_discr + '_tag_' + str(classifier.tag) + '_all.dat', 'a') prediction_all = predictions_per_fold_all[fold_n][classifier.test_index_all, classifier.level] oracle_all = oracles_per_fold[fold_n][classifier.test_index_all, classifier.level] file.write('@fold\n') for o, p in zip(oracle_all, prediction_all): file.write(str(o)+' '+str(p)+'\n') file.close() file = open(material_features_folder + 'multi_' + type_discr + '_level_' + str(classifier.level+1) + work_discr + feat_discr + '_tag_' + str(classifier.tag) + '_features.dat', 'a') file.write('@fold\n') file.write(self.features_names[classifier.features_index[0]]) for feature_index in classifier.features_index[1:]: file.write(','+self.features_names[feature_index]) file.write('\n') file.close() file = open(material_train_durations_folder + 'multi_' + type_discr + '_level_' + str(classifier.level+1) + work_discr + feat_discr + '_tag_' + str(classifier.tag) + '_test_durations.dat', 'a') file.write('%.6f\n' % (classifier.test_duration)) file.close() # Retrieve train_durations for each classifier test_durations_per_fold = [] for classifiers in classifiers_per_fold: test_durations_per_fold.append([]) for classifier in classifiers: test_durations_per_fold[-1].append(classifier.test_duration) file = open(material_train_durations_folder + 'multi_' + type_discr + work_discr + feat_discr + '_test_durations.dat', 'w+') mean_parallel_test_duration = np.mean(np.max(test_durations_per_fold, axis=1)) std_parallel_test_duration = np.std(np.max(test_durations_per_fold, axis=1)) mean_sequential_test_duration = np.mean(np.sum(test_durations_per_fold, axis=1)) std_sequential_test_duration = np.std(np.sum(test_durations_per_fold, axis=1)) file.write('mean_par,std_par,mean_seq,std_seq\n') file.write('%.6f,%.6f,%.6f,%.6f\n' % (mean_parallel_test_duration,std_parallel_test_duration,mean_sequential_test_duration,std_sequential_test_duration)) file.close() graph_folder = './data_'+folder_discr+'/graph/' if not os.path.exists('./data_'+folder_discr): os.makedirs('./data_'+folder_discr) os.makedirs(graph_folder) elif not os.path.exists(graph_folder): os.makedirs(graph_folder) # Graph plot G = nx.DiGraph() for info in classifiers_per_fold[0]: G.add_node(str(info.level)+' '+info.tag, level=info.level, tag=info.tag, children_tags=info.children_tags) for node_parent, data_parent in G.nodes.items(): for node_child, data_child in G.nodes.items(): if data_child['level']-data_parent['level'] == 1 and any(data_child['tag'] in s for s in data_parent['children_tags']): G.add_edge(node_parent, node_child) nx.write_gpickle(G, graph_folder+'multi_' + type_discr + feat_discr +'_graph.gml') print('\n***\nStart testing with incremental gamma threshold\n***\n') thresholds_number = 9 oracle_gamma = np.ndarray(shape=[levels_number, thresholds_number, k], dtype=object) prediction_gamma = np.ndarray(shape=[levels_number, thresholds_number, k], dtype=object) classified_ratio = np.ndarray(shape=[levels_number, thresholds_number, k], dtype=float) for i in range(thresholds_number): gamma = float(i+1)/10.0 for j in range(k): indexes = [] for l in range(levels_number): for index, p in enumerate(probabilities_per_fold[j][:, l]): if max(p) < gamma: indexes.append(index) new_oracle = np.delete(oracles_per_fold[j][:, l], [indexes]) new_prediction = np.delete(predictions_per_fold[j][:, l], [indexes]) oracle_gamma[l, i, j] = new_oracle prediction_gamma[l, i, j] = new_prediction classified_ratio[l, i, j] = float(len(new_prediction))/float(len(predictions_per_fold[j][:, l])) for i in range(thresholds_number): for l in range(levels_number): file = open(material_folder + 'multi_' + type_discr + '_level_' + str(l) + work_discr + feat_discr + '_gamma_'+str(float(i+1)/10.0)+'.dat', 'w+') for j in range(k): file.write('@fold_cr\n') file.write(str(classified_ratio[l, i, j])+'\n') for o, p in zip(oracle_gamma[l, i, j], prediction_gamma[l, i, j]): file.write(str(o)+' '+str(p)+'\n') file.close() ### jvm.stop()
def cmat( track_file, roi_file, resolution_network_file, matrix_name, matrix_mat_name, endpoint_name, intersections=False, ): """ Create the connection matrix for each resolution using fibers and ROIs. """ import scipy.io as sio stats = {} iflogger.info("Running cmat function") # Identify the endpoints of each fiber en_fname = op.abspath(endpoint_name + "_endpoints.npy") en_fnamemm = op.abspath(endpoint_name + "_endpointsmm.npy") iflogger.info("Reading Trackvis file %s", track_file) fib, hdr = nb.trackvis.read(track_file, False) stats["orig_n_fib"] = len(fib) roi = nb.load(roi_file) # Preserve on-disk type unless scaled roiData = np.asanyarray(roi.dataobj) roiVoxelSize = roi.header.get_zooms() (endpoints, endpointsmm) = create_endpoints_array(fib, roiVoxelSize) # Output endpoint arrays iflogger.info("Saving endpoint array: %s", en_fname) np.save(en_fname, endpoints) iflogger.info("Saving endpoint array in mm: %s", en_fnamemm) np.save(en_fnamemm, endpointsmm) n = len(fib) iflogger.info("Number of fibers: %i", n) # Create empty fiber label array fiberlabels = np.zeros((n, 2)) final_fiberlabels = [] final_fibers_idx = [] # Add node information from specified parcellation scheme path, name, ext = split_filename(resolution_network_file) if ext == ".pck": gp = nx.read_gpickle(resolution_network_file) elif ext == ".graphml": gp = nx.read_graphml(resolution_network_file) else: raise TypeError("Unable to read file:", resolution_network_file) nROIs = len(gp.nodes()) # add node information from parcellation if "dn_position" in gp.nodes[list(gp.nodes())[0]]: G = gp.copy() else: G = nx.Graph() for u, d in gp.nodes(data=True): G.add_node(int(u), **d) # compute a position for the node based on the mean position of the # ROI in voxel coordinates (segmentation volume ) xyz = tuple( np.mean( np.where(np.flipud(roiData) == int(d["dn_correspondence_id"])), axis=1, ) ) G.nodes[int(u)]["dn_position"] = tuple([xyz[0], xyz[2], -xyz[1]]) if intersections: iflogger.info("Filtering tractography from intersections") intersection_matrix, final_fiber_ids = create_allpoints_cmat( fib, roiData, roiVoxelSize, nROIs ) finalfibers_fname = op.abspath( endpoint_name + "_intersections_streamline_final.trk" ) stats["intersections_n_fib"] = save_fibers( hdr, fib, finalfibers_fname, final_fiber_ids ) intersection_matrix = np.matrix(intersection_matrix) I = G.copy() H = nx.from_numpy_matrix(np.matrix(intersection_matrix)) H = nx.relabel_nodes(H, lambda x: x + 1) # relabel nodes so they start at 1 I.add_weighted_edges_from( ((u, v, d["weight"]) for u, v, d in H.edges(data=True)) ) dis = 0 for i in range(endpoints.shape[0]): # ROI start => ROI end try: startROI = int( roiData[endpoints[i, 0, 0], endpoints[i, 0, 1], endpoints[i, 0, 2]] ) endROI = int( roiData[endpoints[i, 1, 0], endpoints[i, 1, 1], endpoints[i, 1, 2]] ) except IndexError: iflogger.error( "AN INDEXERROR EXCEPTION OCCURED FOR FIBER %s. " "PLEASE CHECK ENDPOINT GENERATION", i, ) break # Filter if startROI == 0 or endROI == 0: dis += 1 fiberlabels[i, 0] = -1 continue if startROI > nROIs or endROI > nROIs: iflogger.error( "Start or endpoint of fiber terminate in a voxel which is labeled higher" ) iflogger.error("than is expected by the parcellation node information.") iflogger.error("Start ROI: %i, End ROI: %i", startROI, endROI) iflogger.error("This needs bugfixing!") continue # Update fiber label # switch the rois in order to enforce startROI < endROI if endROI < startROI: tmp = startROI startROI = endROI endROI = tmp fiberlabels[i, 0] = startROI fiberlabels[i, 1] = endROI final_fiberlabels.append([startROI, endROI]) final_fibers_idx.append(i) # Add edge to graph if G.has_edge(startROI, endROI) and "fiblist" in G.edge[startROI][endROI]: G.edge[startROI][endROI]["fiblist"].append(i) else: G.add_edge(startROI, endROI, fiblist=[i]) # create a final fiber length array finalfiberlength = [] if intersections: final_fibers_indices = final_fiber_ids else: final_fibers_indices = final_fibers_idx for idx in final_fibers_indices: # compute length of fiber finalfiberlength.append(length(fib[idx][0])) # convert to array final_fiberlength_array = np.array(finalfiberlength) # make final fiber labels as array final_fiberlabels_array = np.array(final_fiberlabels, dtype=int) iflogger.info( "Found %i (%f percent out of %i fibers) fibers that start or " "terminate in a voxel which is not labeled. (orphans)", dis, dis * 100.0 / n, n, ) iflogger.info("Valid fibers: %i (%f%%)", n - dis, 100 - dis * 100.0 / n) numfib = nx.Graph() numfib.add_nodes_from(G) fibmean = numfib.copy() fibmedian = numfib.copy() fibdev = numfib.copy() for u, v, d in G.edges(data=True): G.remove_edge(u, v) di = {} if "fiblist" in d: di["number_of_fibers"] = len(d["fiblist"]) idx = np.where( (final_fiberlabels_array[:, 0] == int(u)) & (final_fiberlabels_array[:, 1] == int(v)) )[0] di["fiber_length_mean"] = float(np.mean(final_fiberlength_array[idx])) di["fiber_length_median"] = float(np.median(final_fiberlength_array[idx])) di["fiber_length_std"] = float(np.std(final_fiberlength_array[idx])) else: di["number_of_fibers"] = 0 di["fiber_length_mean"] = 0 di["fiber_length_median"] = 0 di["fiber_length_std"] = 0 if not u == v: # Fix for self loop problem G.add_edge(u, v, **di) if "fiblist" in d: numfib.add_edge(u, v, weight=di["number_of_fibers"]) fibmean.add_edge(u, v, weight=di["fiber_length_mean"]) fibmedian.add_edge(u, v, weight=di["fiber_length_median"]) fibdev.add_edge(u, v, weight=di["fiber_length_std"]) iflogger.info("Writing network as %s", matrix_name) nx.write_gpickle(G, op.abspath(matrix_name)) numfib_mlab = nx.to_numpy_matrix(numfib, dtype=int) numfib_dict = {"number_of_fibers": numfib_mlab} fibmean_mlab = nx.to_numpy_matrix(fibmean, dtype=np.float64) fibmean_dict = {"mean_fiber_length": fibmean_mlab} fibmedian_mlab = nx.to_numpy_matrix(fibmedian, dtype=np.float64) fibmedian_dict = {"median_fiber_length": fibmedian_mlab} fibdev_mlab = nx.to_numpy_matrix(fibdev, dtype=np.float64) fibdev_dict = {"fiber_length_std": fibdev_mlab} if intersections: path, name, ext = split_filename(matrix_name) intersection_matrix_name = op.abspath(name + "_intersections") + ext iflogger.info("Writing intersection network as %s", intersection_matrix_name) nx.write_gpickle(I, intersection_matrix_name) path, name, ext = split_filename(matrix_mat_name) if not ext == ".mat": ext = ".mat" matrix_mat_name = matrix_mat_name + ext iflogger.info("Writing matlab matrix as %s", matrix_mat_name) sio.savemat(matrix_mat_name, numfib_dict) if intersections: intersect_dict = {"intersections": intersection_matrix} intersection_matrix_mat_name = op.abspath(name + "_intersections") + ext iflogger.info("Writing intersection matrix as %s", intersection_matrix_mat_name) sio.savemat(intersection_matrix_mat_name, intersect_dict) mean_fiber_length_matrix_name = op.abspath(name + "_mean_fiber_length") + ext iflogger.info( "Writing matlab mean fiber length matrix as %s", mean_fiber_length_matrix_name ) sio.savemat(mean_fiber_length_matrix_name, fibmean_dict) median_fiber_length_matrix_name = op.abspath(name + "_median_fiber_length") + ext iflogger.info( "Writing matlab median fiber length matrix as %s", median_fiber_length_matrix_name, ) sio.savemat(median_fiber_length_matrix_name, fibmedian_dict) fiber_length_std_matrix_name = op.abspath(name + "_fiber_length_std") + ext iflogger.info( "Writing matlab fiber length deviation matrix as %s", fiber_length_std_matrix_name, ) sio.savemat(fiber_length_std_matrix_name, fibdev_dict) fiberlengths_fname = op.abspath(endpoint_name + "_final_fiberslength.npy") iflogger.info("Storing final fiber length array as %s", fiberlengths_fname) np.save(fiberlengths_fname, final_fiberlength_array) fiberlabels_fname = op.abspath(endpoint_name + "_filtered_fiberslabel.npy") iflogger.info("Storing all fiber labels (with orphans) as %s", fiberlabels_fname) np.save(fiberlabels_fname, np.array(fiberlabels, dtype=np.int32)) fiberlabels_noorphans_fname = op.abspath(endpoint_name + "_final_fiberslabels.npy") iflogger.info( "Storing final fiber labels (no orphans) as %s", fiberlabels_noorphans_fname ) np.save(fiberlabels_noorphans_fname, final_fiberlabels_array) iflogger.info("Filtering tractography - keeping only no orphan fibers") finalfibers_fname = op.abspath(endpoint_name + "_streamline_final.trk") stats["endpoint_n_fib"] = save_fibers(hdr, fib, finalfibers_fname, final_fibers_idx) stats["endpoints_percent"] = ( float(stats["endpoint_n_fib"]) / float(stats["orig_n_fib"]) * 100 ) stats["intersections_percent"] = ( float(stats["intersections_n_fib"]) / float(stats["orig_n_fib"]) * 100 ) out_stats_file = op.abspath(endpoint_name + "_statistics.mat") iflogger.info("Saving matrix creation statistics as %s", out_stats_file) sio.savemat(out_stats_file, stats)
def add_travel_time_dir(graph_dir, mask_dir, conv_dict, graph_dir_out, min_z=128, dx=4, dy=4, percentile=90, use_totband=True, use_weighted_mean=True, variable_edge_speed=False, mask_prefix='', save_shapefiles=True, verbose=False): '''Update graph properties to include travel time for entire directory''' pickle_protocol = 4 # 4 is most recent, python 2.7 can't read 4 logger.info("Updating graph properties to include travel time") logger.info(" Writing to: " + str(graph_dir_out)) os.makedirs(graph_dir_out, exist_ok=True) image_names = sorted( [z for z in os.listdir(mask_dir) if z.endswith('.tif')]) for i, image_name in enumerate(image_names): im_root = image_name.split('.')[0] if len(mask_prefix) > 0: im_root = im_root.split(mask_prefix)[-1] out_file = os.path.join(graph_dir_out, im_root + '.gpickle') if (i % 1) == 0: logger.info("\n" + str(i + 1) + " / " + str(len(image_names)) + " " + image_name + " " + im_root) mask_path = os.path.join(mask_dir, image_name) graph_path = os.path.join(graph_dir, im_root + '.gpickle') if not os.path.exists(graph_path): logger.info(" ", i, "DNE, skipping: " + str(graph_path)) return # continue mask = skimage.io.imread(mask_path) G_raw = nx.read_gpickle(graph_path) # see if it's empty if len(G_raw.nodes()) == 0: nx.write_gpickle(G_raw, out_file, protocol=pickle_protocol) continue G = infer_travel_time(G_raw, mask, conv_dict, min_z=min_z, dx=dx, dy=dy, percentile=percentile, use_totband=use_totband, use_weighted_mean=use_weighted_mean, variable_edge_speed=variable_edge_speed, verbose=verbose) G = G.to_undirected() nx.write_gpickle(G, out_file, protocol=pickle_protocol) return
def main(): import os p = 0.7 delta = 1 parser = argparse.ArgumentParser() parser.add_argument('-t', '--type', choices=all_graph_types, help='graph type') parser.add_argument('-s', '--size', type=int, default=0, help="size of graph") parser.add_argument('-e', '--size_exponent', type=int, default=1, help="exponent of the size") parser.add_argument('-b', '--exponent_base', type=int, default=10, help="base of the size exponent") parser.add_argument('-n', '--n_rounds', type=int, default=100, help="number of simulated cascades") args = parser.parse_args() gtype = args.type if args.size: size = args.size output_dir = 'data/{}/{}'.format(gtype, size) else: size = args.exponent_base ** args.size_exponent output_dir = 'data/{}/{}-{}'.format(gtype, args.exponent_base, args.size_exponent) if gtype == KRONECKER_HIER: g = gen_kronecker(P=P_hier, k=args.size_exponent, n_edges=2**args.size_exponent * 3) elif gtype == KRONECKER_PERI: g = gen_kronecker(P=P_peri, k=args.size_exponent, n_edges=2**args.size_exponent * 3) elif gtype == KRONECKER_RAND: g = gen_kronecker(P=P_rand, k=args.size_exponent, n_edges=2**args.size_exponent * 3) elif gtype == PL_TREE: p = 0.88 g = random_powerlaw_tree(size, tries=999999) elif gtype == B_TREE: g = nx.balanced_tree(args.exponent_base, args.size_exponent-1) elif gtype == ER: g = extract_larges_CC(nx.fast_gnp_random_graph(size, 0.1)) elif gtype == BARABASI: g = extract_larges_CC(nx.barabasi_albert_graph(size, 5)) elif gtype == GRID: g = grid_2d(int(np.sqrt(size))) elif gtype == CLIQUE: g = nx.complete_graph(size) elif gtype == LINE: g = nx.path_graph(size) else: raise ValueError('unsupported graph type {}'.format(gtype)) g.remove_edges_from(g.selfloop_edges()) print('|V|={}, |E|={}'.format(g.number_of_nodes(), g.number_of_edges())) if gtype == GRID: mapping = {(i, j): int(np.sqrt(size)) * i + j for i, j in g.nodes_iter()} g = nx.relabel_nodes(g, mapping) else: g = nx.convert_node_labels_to_integers(g) if not os.path.exists(output_dir): os.makedirs(output_dir) print('graph type: {}'.format(gtype)) # g = add_p_and_delta(g, p, delta) output_path = '{}/graph.graphml'.format(output_dir, gtype) print('saving to {}'.format(output_path)) nx.write_graphml(g, output_path) nx.write_gpickle(g, '{}/graph.gpkl'.format(output_dir, gtype)) if False: pkl.dump(time_probas, open('{}/{}.pkl'.format(output_dir, INF_TIME_PROBA_FILE), 'wb')) pkl.dump(node2id, open('{}/{}.pkl'.format(output_dir, NODE2ID_FILE), 'wb')) pkl.dump(id2node, open('{}/{}.pkl'.format(output_dir, ID2NODE_FILE), 'wb'))
syns = wn.synsets(term) for syn_obj in syns: # extracts the text value from the syn object syn = syn_obj.name().split('.')[0] # We have not seen this syn yet if syn not in syn_dict.token2id: # add syn term to dictionary syn_dict.add_documents([[syn]]) # add syn node to graph pickled_graph.add_node(node_count, type='SYN', term_id=syn_dict.token2id[syn], freq_per_doc=-1, vector_ind=-1) syn_to_node_map[syn_dict.token2id[syn]] = node_count # Keep track of values node_count += 1 if syn not in syns_per_term[term]: syns_per_term[term].add(syn) pickled_graph.add_edge(term_ind, syn_to_node_map[syn_dict.token2id[syn]], attr_dict={'weight': 0.5}) nx.write_gpickle(pickled_graph, "final_network.gpickle")
dp["et"] = dp["et_new"] dp["rel_err"] = dp["new_rel_err"] again = True base_graph = g_c break else: print("k updated to {}".format(k)) k = 1 + (k - 1) * .75 if k < 1.0001: break # In[ ]: nx.write_gpickle(base_graph,"data/taxi_graphs/base_graph_hour_{}.pkl".format(HOUR)) # In[ ]: for e, info in base_graph.edges.items(): print(info) print("speed is {}".format(info['dist'] / (info['weight'] / 3600.))) break # In[ ]: #speeds = pd.Series([info["dist"] / (info['weight'] / 3600.) for info in final_graph.edges.values()])
def exportaspickle(ps,name): nx.write_gpickle(ps,name)
json_data = json.loads(data) screen_names_to_user_ids.append((sn, json_data['id'])) g = nx.Graph() ids_of_interest = [str(user_id) for (screen_name, user_id) in screen_names_to_user_ids] for (screen_name, user_id) in screen_names_to_user_ids: print >> sys.stderr, 'Processing', screen_name try: friend_ids = list(r.smembers(getRedisIdByScreenName(screen_name, 'friend_ids'))) print >> sys.stderr, "Adding edge: %s, %s" % (str(user_id), str(friend_id)) [g.add_edge(user_id, friend_id) for friend_id in friend_ids if friend_id in ids_of_interest] except Exception, e: print >> sys.stderr, 'No friend information available. Skipping', screen_name # store graph to disk by pickling it... if not os.path.isdir('out'): os.mkdir('out') filename = os.path.join('out', DB + '.gpickle') nx.write_gpickle(g, filename) print >> sys.stderr, 'Pickle file stored in: %s' % filename # you can read it back out like this... # g = nx.read_gpickle(os.path.join('out', DB + '.gpickle'))
def write_p_net(self, w_path): nx.write_gpickle(self.p_graph, w_path)