def calculate_lcc_significance(network, nodes, nodes_random=None, bins=None, n_random=1000, min_bin_size=100, seed=452456): # Degree matching problematic for small bin sizes #if bins is None and nodes_random is None: # bins = network_utilities.get_degree_binning(network, min_bin_size) if nodes_random is None: network_nodes = list(network.nodes()) #nodes_random = get_random_nodes(nodes, network, bins = bins, n_random = n_random, min_bin_size = min_bin_size, seed = seed) nodes_random = [] for i in xrange(n_random): shuffle(network_nodes) nodes_random.append(network_nodes[:len(nodes)]) network_sub = network.subgraph(nodes) component_nodes = network_utilities.get_connected_components( network_sub, False)[0] d = len(component_nodes) values = numpy.empty(len(nodes_random)) for i, nodes in enumerate(nodes_random): network_sub = network.subgraph(nodes) component_nodes = network_utilities.get_connected_components( network_sub, False)[0] values[i] = len(component_nodes) m, s = numpy.mean(values), numpy.std(values) if s == 0: z = 0.0 else: z = (d - m) / s return d, z, (m, s)
def calculate_lcc_significance(network, nodes, nodes_random=None, bins=None, n_random=1000, min_bin_size=100, seed=452456): # Degree matching problematic for small bin sizes #if bins is None and nodes_random is None: # bins = network_utilities.get_degree_binning(network, min_bin_size) random.seed(seed) if nodes_random is None: network_nodes = list(network.nodes()) #nodes_random = get_random_nodes(nodes, network, bins = bins, n_random = n_random, min_bin_size = min_bin_size, seed = seed) nodes_random = [] for i in xrange(n_random): random.shuffle(network_nodes) nodes_random.append(network_nodes[:len(nodes)]) network_sub = network.subgraph(nodes) component_nodes = network_utilities.get_connected_components(network_sub, False) #print component_nodes d = len(component_nodes[0]) values = numpy.empty(len(nodes_random)) for i, nodes in enumerate(nodes_random): network_sub = network.subgraph(nodes) component_nodes = network_utilities.get_connected_components(network_sub, False)[0] values[i] = len(component_nodes) m, s = numpy.mean(values), numpy.std(values) if s == 0: z = 0.0 else: z = (d - m) / s return d, z, (m, s), values
def get_diamond_genes(network_file, seeds, file_name, only_lcc=True): network = get_network(network_file, only_lcc=only_lcc) nodes = set(network.nodes()) seeds = set(seeds) & nodes #print len(seeds) n_iteration = 500 if not os.path.exists(file_name): diamond.DIAMOnD(network, seeds, n_iteration, alpha = 1, outfile = file_name) f = open(file_name) f.readline() genes = [] for line in f: rank, geneid = line.strip("\n").split() genes.append(geneid) f.close() if not os.path.exists(file_name + ".coverage"): f_out = open(file_name + ".coverage", 'w') n = float(len(seeds)) component = network.subgraph(seeds) #component = max(networkx.connected_components(component), key=len) components = max(network_utilities.get_connected_components(network, False), key=len) f_out.write("%s %f\n" % ("0", len(component & seeds)/n)) for i, gene in enumerate(genes): rank = i + 1 component = network.subgraph(genes[:rank] + list(seeds)) #component = max(networkx.connected_components(component), key=len) components = max(network_utilities.get_connected_components(network, False), key=len) f_out.write("%s %f\n" % (rank, len(component & seeds)/n)) f_out.close() return genes, nodes
def get_modules_of_graph(sub_graph, module_detection_type, output_file, inflation=1.7): if module_detection_type == "connected": import network_utilities modules = network_utilities.get_connected_components( sub_graph, return_as_graph_list=True) elif module_detection_type == "mcl": from os import system f = open(output_file + ".mcl", 'w') nodes = set() for node1, node2, data in sub_graph.edges(data=True): nodes.add(node1) nodes.add(node2) if 'w' in data: data = str(data['w']) else: data = "-" f.write("%s\t%s\t%s\n" % (node1, node2, data)) for node in sub_graph.nodes(): if node not in nodes: f.write("%s\n" % node) f.close() # Optimum inflation parameter was 1.7-1.8 in a recent comparison paper system("mcl %s --abc -I %f -o %s 2>> %s" % (output_file + ".mcl", inflation, output_file, output_file + ".err")) modules = get_modules_from_file(output_file) else: raise ValueError("Unrecognized module detection type") #print len(modules), map(len, modules) return modules
def get_modules_of_graph(sub_graph, module_detection_type, output_file, inflation=1.7): if module_detection_type == "connected": import network_utilities modules = network_utilities.get_connected_components(sub_graph, return_as_graph_list=True) elif module_detection_type == "mcl": from os import system f = open(output_file + ".mcl", 'w') nodes = set() for node1, node2, data in sub_graph.edges(data=True): nodes.add(node1) nodes.add(node2) if 'w' in data: data = str(data['w']) else: data = "-" f.write("%s\t%s\t%s\n" % (node1, node2, data)) for node in sub_graph.nodes(): if node not in nodes: f.write("%s\n" % node) f.close() # Optimum inflation parameter was 1.7-1.8 in a recent comparison paper system("mcl %s --abc -I %f -o %s 2>> %s" % (output_file + ".mcl", inflation, output_file, output_file + ".err")) modules = get_modules_from_file(output_file) else: raise ValueError("Unrecognized module detection type") #print len(modules), map(len, modules) return modules
def get_network(network_file, only_lcc): network = network_utilities.create_network_from_sif_file(network_file, use_edge_data = False, delim = None, include_unconnected=True) #print len(network.nodes()), len(network.edges()) if only_lcc: components = network_utilities.get_connected_components(network, False) network = network_utilities.get_subgraph(network, components[0]) #print len(network.nodes()), len(network.edges()) return network
def calculate_lcc_significance(network, nodes, nodes_random=None, bins=None, n_random=1000, min_bin_size=100, seed=452456): if bins is None and nodes_random is None: bins = network_utilities.get_degree_binning(network, min_bin_size) if nodes_random is None: nodes_random = get_random_nodes(nodes, network, bins = bins, n_random = n_random, min_bin_size = min_bin_size, seed = seed) network_sub = network.subgraph(nodes) component_nodes = network_utilities.get_connected_components(network_sub, False)[0] d = len(component_nodes) values = numpy.empty(len(nodes_random)) for i, nodes in enumerate(nodes_random): network_sub = network.subgraph(nodes) component_nodes = network_utilities.get_connected_components(network_sub, False)[0] values[i] = len(component_nodes) m, s = numpy.mean(values), numpy.std(values) if s == 0: z = 0.0 else: z = (d - m) / s return d, z, (m, s)
def get_network(network_file, only_lcc): network = network_utilities.create_network_from_sif_file(network_file, use_edge_data = False, delim = None, include_unconnected=True) #print len(network.nodes()), len(network.edges()) if only_lcc: components = network_utilities.get_connected_components(network, False) network = network_utilities.get_subgraph(network, components[0]) #print len(network.nodes()), len(network.edges()) network_lcc_file = network_file + ".lcc" if not os.path.exists(network_lcc_file ): f = open(network_lcc_file, 'w') for u,v in network.edges(): f.write("%s 1 %s\n" % (u, v)) f.close() return network
def get_network(network_file, only_lcc): network = network_utilities.create_network_from_sif_file(network_file, use_edge_data = False, delim = None, include_unconnected=True) #print len(network.nodes()), len(network.edges()) if only_lcc and not network_file.endswith(".lcc"): print "Shrinking network to its LCC", len(network.nodes()), len(network.edges()) components = network_utilities.get_connected_components(network, False) network = network_utilities.get_subgraph(network, components[0]) print "Final shape:", len(network.nodes()), len(network.edges()) #print len(network.nodes()), len(network.edges()) network_lcc_file = network_file + ".lcc" if not os.path.exists(network_lcc_file ): f = open(network_lcc_file, 'w') for u,v in network.edges(): f.write("%s 1 %s\n" % (u, v)) f.close() return network
def get_diseasome_genes(diseasome_file, nodes=None, network=None): """ If nodes is not None, keep only nodes in the network If network is not None, keep only LCC """ disease_to_genes = {} disease_to_category = {} for line in open(diseasome_file): words = line.strip("\n").split("\t") disease = words[1].strip('"') category = words[0] genes = set(words[2:]) if nodes is not None: genes &= nodes if len(genes) == 0: continue if network is not None: network_sub = network.subgraph(genes) genes = network_utilities.get_connected_components(network_sub, False)[0] disease_to_genes[disease] = genes disease_to_category[disease] = category return disease_to_genes, disease_to_category