示例#1
0
def calculate_lcc_significance(network,
                               nodes,
                               nodes_random=None,
                               bins=None,
                               n_random=1000,
                               min_bin_size=100,
                               seed=452456):
    # Degree matching problematic for small bin sizes
    #if bins is None and nodes_random is None:
    #	bins = network_utilities.get_degree_binning(network, min_bin_size)
    if nodes_random is None:
        network_nodes = list(network.nodes())
        #nodes_random = get_random_nodes(nodes, network, bins = bins, n_random = n_random, min_bin_size = min_bin_size, seed = seed)
        nodes_random = []
        for i in xrange(n_random):
            shuffle(network_nodes)
            nodes_random.append(network_nodes[:len(nodes)])
    network_sub = network.subgraph(nodes)
    component_nodes = network_utilities.get_connected_components(
        network_sub, False)[0]
    d = len(component_nodes)
    values = numpy.empty(len(nodes_random))
    for i, nodes in enumerate(nodes_random):
        network_sub = network.subgraph(nodes)
        component_nodes = network_utilities.get_connected_components(
            network_sub, False)[0]
        values[i] = len(component_nodes)
    m, s = numpy.mean(values), numpy.std(values)
    if s == 0:
        z = 0.0
    else:
        z = (d - m) / s
    return d, z, (m, s)
示例#2
0
def calculate_lcc_significance(network, nodes, nodes_random=None, bins=None, n_random=1000, min_bin_size=100, seed=452456):
    # Degree matching problematic for small bin sizes
    #if bins is None and nodes_random is None:
    #	bins = network_utilities.get_degree_binning(network, min_bin_size) 
    random.seed(seed)
    if nodes_random is None:
	network_nodes = list(network.nodes())
	#nodes_random = get_random_nodes(nodes, network, bins = bins, n_random = n_random, min_bin_size = min_bin_size, seed = seed)
	nodes_random = []
	for i in xrange(n_random):
	    random.shuffle(network_nodes)
	    nodes_random.append(network_nodes[:len(nodes)])
    network_sub = network.subgraph(nodes)
    component_nodes = network_utilities.get_connected_components(network_sub, False)
    #print component_nodes 
    d = len(component_nodes[0])
    values = numpy.empty(len(nodes_random)) 
    for i, nodes in enumerate(nodes_random):
	network_sub = network.subgraph(nodes)
	component_nodes = network_utilities.get_connected_components(network_sub, False)[0]
	values[i] = len(component_nodes)
    m, s = numpy.mean(values), numpy.std(values)
    if s == 0:
	z = 0.0
    else:
	z = (d - m) / s
    return d, z, (m, s), values
示例#3
0
def get_diamond_genes(network_file, seeds, file_name, only_lcc=True):
    network = get_network(network_file, only_lcc=only_lcc) 
    nodes = set(network.nodes())
    seeds = set(seeds) & nodes
    #print len(seeds)
    n_iteration = 500
    if not os.path.exists(file_name):
	diamond.DIAMOnD(network, seeds, n_iteration, alpha = 1, outfile = file_name)
    f = open(file_name)
    f.readline()
    genes = []
    for line in f:
	rank, geneid = line.strip("\n").split()
	genes.append(geneid)
    f.close()
    if not os.path.exists(file_name + ".coverage"):
	f_out = open(file_name + ".coverage", 'w')
	n = float(len(seeds))
	component = network.subgraph(seeds)
	#component = max(networkx.connected_components(component), key=len)
	components = max(network_utilities.get_connected_components(network, False), key=len)
	f_out.write("%s %f\n" % ("0", len(component & seeds)/n))
	for i, gene in enumerate(genes):
	    rank = i + 1
	    component = network.subgraph(genes[:rank] + list(seeds))
	    #component = max(networkx.connected_components(component), key=len)
	    components = max(network_utilities.get_connected_components(network, False), key=len)
	    f_out.write("%s %f\n" % (rank, len(component & seeds)/n))
	f_out.close()
    return genes, nodes
示例#4
0
def get_modules_of_graph(sub_graph,
                         module_detection_type,
                         output_file,
                         inflation=1.7):
    if module_detection_type == "connected":
        import network_utilities
        modules = network_utilities.get_connected_components(
            sub_graph, return_as_graph_list=True)
    elif module_detection_type == "mcl":
        from os import system
        f = open(output_file + ".mcl", 'w')
        nodes = set()
        for node1, node2, data in sub_graph.edges(data=True):
            nodes.add(node1)
            nodes.add(node2)
            if 'w' in data:
                data = str(data['w'])
            else:
                data = "-"
            f.write("%s\t%s\t%s\n" % (node1, node2, data))
        for node in sub_graph.nodes():
            if node not in nodes:
                f.write("%s\n" % node)
        f.close()
        # Optimum inflation parameter was 1.7-1.8 in a recent comparison paper
        system("mcl %s --abc -I %f -o %s 2>> %s" %
               (output_file + ".mcl", inflation, output_file,
                output_file + ".err"))
        modules = get_modules_from_file(output_file)
    else:
        raise ValueError("Unrecognized module detection type")
    #print len(modules), map(len, modules)
    return modules
示例#5
0
def get_modules_of_graph(sub_graph, module_detection_type, output_file, inflation=1.7):
    if module_detection_type == "connected":
	import network_utilities
	modules = network_utilities.get_connected_components(sub_graph, return_as_graph_list=True)
    elif module_detection_type == "mcl":
	from os import system
	f = open(output_file + ".mcl", 'w')
	nodes = set()
	for node1, node2, data in sub_graph.edges(data=True):
	    nodes.add(node1)
	    nodes.add(node2)
	    if 'w' in data:
		data = str(data['w'])
	    else:
		data = "-"
	    f.write("%s\t%s\t%s\n" % (node1, node2, data))
	for node in sub_graph.nodes():
	    if node not in nodes:
		f.write("%s\n" % node)
	f.close()
	# Optimum inflation parameter was 1.7-1.8 in a recent comparison paper
	system("mcl %s --abc -I %f -o %s 2>> %s" % (output_file + ".mcl", inflation, output_file, output_file + ".err"))
	modules = get_modules_from_file(output_file)
    else:
	raise ValueError("Unrecognized module detection type")
    #print len(modules), map(len, modules)
    return modules
示例#6
0
def get_network(network_file, only_lcc):
    network = network_utilities.create_network_from_sif_file(network_file, use_edge_data = False, delim = None, include_unconnected=True)
    #print len(network.nodes()), len(network.edges())
    if only_lcc:
	components = network_utilities.get_connected_components(network, False)
	network = network_utilities.get_subgraph(network, components[0])
	#print len(network.nodes()), len(network.edges())
    return network
示例#7
0
def calculate_lcc_significance(network, nodes, nodes_random=None, bins=None, n_random=1000, min_bin_size=100, seed=452456):
    if bins is None and nodes_random is None:
	bins = network_utilities.get_degree_binning(network, min_bin_size) 
    if nodes_random is None:
	nodes_random = get_random_nodes(nodes, network, bins = bins, n_random = n_random, min_bin_size = min_bin_size, seed = seed)
    network_sub = network.subgraph(nodes)
    component_nodes = network_utilities.get_connected_components(network_sub, False)[0]
    d = len(component_nodes)
    values = numpy.empty(len(nodes_random)) 
    for i, nodes in enumerate(nodes_random):
	network_sub = network.subgraph(nodes)
	component_nodes = network_utilities.get_connected_components(network_sub, False)[0]
	values[i] = len(component_nodes)
    m, s = numpy.mean(values), numpy.std(values)
    if s == 0:
	z = 0.0
    else:
	z = (d - m) / s
    return d, z, (m, s) 
示例#8
0
def get_network(network_file, only_lcc):
    network = network_utilities.create_network_from_sif_file(network_file, use_edge_data = False, delim = None, include_unconnected=True)
    #print len(network.nodes()), len(network.edges())
    if only_lcc:
	components = network_utilities.get_connected_components(network, False)
	network = network_utilities.get_subgraph(network, components[0])
	#print len(network.nodes()), len(network.edges())
	network_lcc_file = network_file + ".lcc"
	if not os.path.exists(network_lcc_file ):
	    f = open(network_lcc_file, 'w')
	    for u,v in network.edges():
		f.write("%s 1 %s\n" % (u, v))
	    f.close()
    return network
示例#9
0
def get_network(network_file, only_lcc):
    network = network_utilities.create_network_from_sif_file(network_file, use_edge_data = False, delim = None, include_unconnected=True)
    #print len(network.nodes()), len(network.edges())
    if only_lcc and not network_file.endswith(".lcc"):
	print "Shrinking network to its LCC", len(network.nodes()), len(network.edges())
	components = network_utilities.get_connected_components(network, False)
	network = network_utilities.get_subgraph(network, components[0])
	print "Final shape:", len(network.nodes()), len(network.edges())
	#print len(network.nodes()), len(network.edges())
	network_lcc_file = network_file + ".lcc"
	if not os.path.exists(network_lcc_file ):
	    f = open(network_lcc_file, 'w')
	    for u,v in network.edges():
		f.write("%s 1 %s\n" % (u, v))
	    f.close()
    return network
示例#10
0
def get_diseasome_genes(diseasome_file, nodes=None, network=None):
    """
    If nodes is not None, keep only nodes in the network
    If network is not None, keep only LCC
    """
    disease_to_genes = {}
    disease_to_category = {}
    for line in open(diseasome_file):
	words = line.strip("\n").split("\t")
	disease = words[1].strip('"')
	category = words[0]
	genes = set(words[2:])
	if nodes is not None:
	    genes &= nodes
	    if len(genes) == 0:
		continue
	if network is not None:
	    network_sub = network.subgraph(genes)
	    genes = network_utilities.get_connected_components(network_sub, False)[0]
	disease_to_genes[disease] = genes
	disease_to_category[disease] = category
    return disease_to_genes, disease_to_category