def network_layout(gmt_fn, outfn=None): ## make a Graph object and write to gml for Gephi to ## do the layout d_gmt = read_gmt(gmt_fn) d_gmt_filt = {} for term, genes in d_gmt.items(): if len(genes) >= 5: d_gmt_filt[term] = genes d_gmt = d_gmt_filt print 'number of terms:', len(d_gmt) umls_ids_kept = d_gmt.keys() adj_matrix = jaccard_matrix(d_gmt) m = adj_matrix > 0.2 # degrees = adj_matrix.sum(axis=0) adj_matrix = adj_matrix * m.astype(int) G = nx.from_numpy_matrix(adj_matrix) print 'G: ',G.number_of_edges(), G.number_of_nodes() for i in range(adj_matrix.shape[0]): # G.node[i]['size'] = degrees[i] # G.node[i]['size'] = len(d_gmt[umls_ids_kept[i]]) G.node[i]['size'] = G.degree(i) G.node[i]['id'] = umls_ids_kept[i] if outfn is not None: nx.write_gml(G, outfn) return G
def make_directed_json_graph_soc(gmt_fn, d_id_name, d_id_category, d_category_color, outfn=None): # make directed graph based on SOC - PT d_gmt = read_gmt(gmt_fn) d_gmt_filt = {} for term, genes in d_gmt.items(): if len(genes) >= 5: d_gmt_filt[term] = genes d_gmt = d_gmt_filt print 'number of terms:', len(d_gmt) umls_ids_kept = d_gmt.keys() adj_matrix = jaccard_matrix(d_gmt) m = adj_matrix > 0.2 adj_matrix = adj_matrix * m.astype(int) Gu = nx.from_numpy_matrix(adj_matrix) # undirected Graph, to get size G = nx.DiGraph() for i in range(len(umls_ids_kept)): umls_id = umls_ids_kept[i] name = d_id_name[umls_id] category = d_id_category[umls_id] color = d_category_color[category] G.add_edge('root', category) G.add_edge(category, umls_id) G.node[umls_id]['size'] = Gu.degree(i) G.node[umls_id]['label'] = name G.node[umls_id]['color'] = color print G.number_of_nodes(), G.number_of_edges() graph_data = json_graph.tree_data(G,root='root') json.dump(graph_data, open(outfn, 'wb')) return
def network_layout(gmt_fn, outfn=None): ## make a Graph object and write to gml for Gephi to ## do the layout d_gmt = read_gmt(gmt_fn) d_gmt_filt = {} for term, genes in d_gmt.items(): if len(genes) >= 5: d_gmt_filt[term] = genes d_gmt = d_gmt_filt print 'number of terms:', len(d_gmt) umls_ids_kept = d_gmt.keys() adj_matrix = jaccard_matrix(d_gmt) m = adj_matrix > 0.2 # degrees = adj_matrix.sum(axis=0) adj_matrix = adj_matrix * m.astype(int) G = nx.from_numpy_matrix(adj_matrix) print 'G: ', G.number_of_edges(), G.number_of_nodes() for i in range(adj_matrix.shape[0]): # G.node[i]['size'] = degrees[i] # G.node[i]['size'] = len(d_gmt[umls_ids_kept[i]]) G.node[i]['size'] = G.degree(i) G.node[i]['id'] = umls_ids_kept[i] if outfn is not None: nx.write_gml(G, outfn) return G
def make_directed_json_graph(gmt_fn, d_id_name, d_id_category, d_category_color, outfn=None): # perform HC and make a directed graph and write to json # for pack visualization d_gmt = read_gmt(gmt_fn) d_gmt_filt = {} for term, genes in d_gmt.items(): if len(genes) >= 5: d_gmt_filt[term] = genes d_gmt = d_gmt_filt print 'number of terms:', len(d_gmt) umls_ids_kept = d_gmt.keys() adj_matrix = jaccard_matrix(d_gmt) hc = AgglomerativeClustering(n_clusters=10) hc.fit(adj_matrix) m = adj_matrix > 0.2 adj_matrix = adj_matrix * m.astype(int) Gu = nx.from_numpy_matrix(adj_matrix) # undirected Graph, to get size G = nx.DiGraph() print adj_matrix.shape, len(umls_ids_kept) for i in range(adj_matrix.shape[0]): cluster_label = hc.labels_[i] umls_id = umls_ids_kept[i] name = d_id_name[umls_id] G.add_edge('root', cluster_label) G.add_edge(cluster_label, umls_id) G.node[umls_id]['size'] = Gu.degree(i) G.node[umls_id]['label'] = name category = d_id_category[umls_id] color = d_category_color[category] G.node[umls_id]['color'] = color print G.number_of_nodes(), G.number_of_edges() graph_data = json_graph.tree_data(G, root='root') json.dump(graph_data, open(outfn, 'wb')) return
def make_directed_json_graph(gmt_fn, d_id_name, d_id_category, d_category_color, outfn=None): # perform HC and make a directed graph and write to json # for pack visualization d_gmt = read_gmt(gmt_fn) d_gmt_filt = {} for term, genes in d_gmt.items(): if len(genes) >= 5: d_gmt_filt[term] = genes d_gmt = d_gmt_filt print 'number of terms:', len(d_gmt) umls_ids_kept = d_gmt.keys() adj_matrix = jaccard_matrix(d_gmt) hc = AgglomerativeClustering(n_clusters=10) hc.fit(adj_matrix) m = adj_matrix > 0.2 adj_matrix = adj_matrix * m.astype(int) Gu = nx.from_numpy_matrix(adj_matrix) # undirected Graph, to get size G = nx.DiGraph() print adj_matrix.shape, len(umls_ids_kept) for i in range(adj_matrix.shape[0]): cluster_label = hc.labels_[i] umls_id = umls_ids_kept[i] name = d_id_name[umls_id] G.add_edge('root', cluster_label) G.add_edge(cluster_label, umls_id) G.node[umls_id]['size'] = Gu.degree(i) G.node[umls_id]['label'] = name category = d_id_category[umls_id] color = d_category_color[category] G.node[umls_id]['color'] = color print G.number_of_nodes(), G.number_of_edges() graph_data = json_graph.tree_data(G,root='root') json.dump(graph_data, open(outfn, 'wb')) return
def make_directed_json_graph_soc(gmt_fn, d_id_name, d_id_category, d_category_color, outfn=None): # make directed graph based on SOC - PT d_gmt = read_gmt(gmt_fn) d_gmt_filt = {} for term, genes in d_gmt.items(): if len(genes) >= 5: d_gmt_filt[term] = genes d_gmt = d_gmt_filt print 'number of terms:', len(d_gmt) umls_ids_kept = d_gmt.keys() adj_matrix = jaccard_matrix(d_gmt) m = adj_matrix > 0.2 adj_matrix = adj_matrix * m.astype(int) Gu = nx.from_numpy_matrix(adj_matrix) # undirected Graph, to get size G = nx.DiGraph() for i in range(len(umls_ids_kept)): umls_id = umls_ids_kept[i] name = d_id_name[umls_id] category = d_id_category[umls_id] color = d_category_color[category] G.add_edge('root', category) G.add_edge(category, umls_id) G.node[umls_id]['size'] = Gu.degree(i) G.node[umls_id]['label'] = name G.node[umls_id]['color'] = color print G.number_of_nodes(), G.number_of_edges() graph_data = json_graph.tree_data(G, root='root') json.dump(graph_data, open(outfn, 'wb')) return