def load_graph(fn): """Load graph_tool.Graph from weighted edge list.""" return gt.load_graph_from_csv( fn, directed=True, eprop_types=('string', 'string', 'double'), eprop_names=('fromId', 'toId', 'weight'), string_vals=True, hashed=True, skip_first=True, ecols=(2, 3), )
import graph_tool.all as gt # from graph_tool.all import * import math import matplotlib g = gt.load_graph_from_csv( "csv/graphdata_test.csv", hashed=True, eprop_types=['string', 'string'], eprop_names=["source_handle", "target_handle", "weight"]) # import pdb; pdb.set_trace() source_handle = g.ep["source_handle"] weight = g.ep["weight"] gt.graph_draw(g, vertex_font_size=12, vertex_text="g.ep.source_handle", vertex_shape="double_circle", vertex_fill_color="#729fcf", vertex_pen_width=3, edge_pen_width=1, output="graph-draw.png", output_size=(4000, 4000))
edgelist_df = pd.read_csv(processed_edge_list) edgelist_df.head() # In this "generic gene map", 1 denotes a generic gene and 0 is all other genes. A gene is considered generic if it had a high percentile from SOPHIE and the manually curated set based on the correlation plot seen [here](../pseudomonas_analysis/2_identify_generic_genes_pathways.ipynb). # In[4]: annot_df = pd.read_csv(generic_gene_map, sep='\t', index_col=0) annot_df.head() # In[5]: G = gt.load_graph_from_csv(processed_edge_list, skip_first=True, directed=False, hashed=True, eprop_names=['weight'], eprop_types=['float']) # In[6]: # add vertex property for generic genes vprop_generic = G.new_vertex_property('bool') for ix, v in enumerate(G.vertices()): v_name = G.vp['name'][v] v_label = annot_df.loc[v_name, 'label'] vprop_generic[v] = v_label G.vertex_properties['is_generic'] = vprop_generic # In[7]:
n_clusters=G.graph['number_communities']) except: D = [100] zsvd.append(np.mean(D)) Y = fct.mds_shortest_paths(G, dimension) D = fct.comp_clusters_communities( Y, G.graph['labels_communities'], algo=False, n_clusters=G.graph['number_communities']) zmds.append(np.mean(D)) g = gt.load_graph_from_csv(G.graph['edgelist'], directed=isDirected, csv_options={ "delimiter": " ", "quotechar": '"' }) block = gt.minimize_nested_blockmodel_dl( g, B_min=G.graph['number_communities'], B_max=G.graph['number_communities']) num_block = block.levels[0].get_B() block = block.levels[0].get_blocks() partition = [0 for i in range(G.number_of_nodes())] for i in range(G.number_of_nodes()): #for every node partition[i] = block[i] zsbm.append(ami(partition, G.graph['labels_communities'])) igraph = ig.Read_Edgelist(G.graph['edgelist']) part = igraph.community_infomap()
pending = len(iterables) nexts = cycle(iter(it).next for it in iterables) while pending: try: for next in nexts: yield next() except StopIteration: pending -= 1 nexts = cycle(islice(nexts, pending)) conceptnet_path = os.path.expanduser( '~/project/KB_dump/conceptnet/conceptnet-en.csv') g = load_graph_from_csv(conceptnet_path, directed=False, eprop_types=['string', 'string'], string_vals=True) prefix = '/c/en/' entities = [ ['capoeira', 'hand', 'cartwheel', 'shirt', 'handstand'], ['sunscreen', 'skateboarding', 'soccer', 'tan', 'rubbing'], ['cream', 'mascara', 'writing', 'lifting', 'dictaphone'], ] blackListVertex = set([ find_vertex(g, prop=g.properties[('v', 'name')], match=prefix + b)[0] for b in ['object', 'thing'] ]) blackListEdge = set(['/r/DerivedFrom', '/r/RelatedTo'])
def plot_log_log_dist(g, fname): (data_xs, data_ys) = deg_frequency(g.get_total_degrees(g.get_vertices())) ys = np.divide(data_ys, np.sum(data_ys)) plt.clf() plt.scatter(data_xs, ys, alpha=0.5, color='b', label='Dataset') plt.legend(loc='lower left') plt.xscale('log') plt.yscale('log') plt.xlim(0.5, 1500) plt.ylim(0.0001, 0.5) plt.xlabel("degree") plt.ylabel("fraction of nodes") plt.savefig(fname) G = gt.load_graph_from_csv(FILENAME, csv_options={"delimiter": "\t"}) plot_log_log_dist(G, "dist.png") # state1 = gt.minimize_blockmodel_dl(G, verbose=True) N = len(G.get_vertices()) print(len(G.get_edges())) knock_count = int(KNOCKOUT * N) # to_remove = np.random.randint(0, N, knock_count) # G.remove_vertex(to_remove) # top_degree_nodes = [[idx[0], elem] for idx, elem in np.ndenumerate(G.get_total_degrees(G.get_vertices()))] # top_degree_nodes.sort(key=lambda x: x[1], reverse=True) # top_degree_nodes = top_degree_nodes[0:knock_count] # top_degree_nodes = [i[1] for i in top_degree_nodes] # G.remove_vertex(top_degree_nodes)
squeeze=True) for i, node in enumerate(essentials.values): node_ = ''.join( node.split('-')) # eliminamos '-' de los nombres de las proteinas essentials[i] = node_.replace(' ', '').upper( ) # eliminamos los ' ' inutiles de los nombres de las proteinas ############################################################################################################ #### Creacion del grafo ############################################################################################################ if args.format == 'csv': # si el formato es una lista de links hay que tratarlo distinto (csv) graph = gp.load_graph_from_csv(args.data, string_vals=True, directed=args.is_directed, csv_options={ "delimiter": "\t", "quotechar": "#" }) else: # si no es csv, que lo lea tranqui.. graph = gp.load_graph(args.data, fmt=args.format) ############################################################################################################ #### Distribucion de grado (todos los nodos) ############################################################################################################ v_degrees = np.array([v.out_degree() for v in graph.vertices() ]) # lista de grado por id de nodo degrees, hist = np.unique( v_degrees, # degrees: lista de grados existentes en la red return_counts=True ) # hist: cada elemento k es el numero de nodos de grado k