Пример #1
0
def calculate_separation_proximity(network, nodes_from, nodes_to, nodes_from_random=None, nodes_to_random=None, bins=None, n_random=1000, min_bin_size=100, seed=452456, lengths=None):
    """
    Calculate proximity from nodes_from to nodes_to
    If degree binning or random nodes are not given, they are generated
    lengths: precalculated shortest path length dictionary
    """
    nodes_network = set(network.nodes())
    if len(set(nodes_from) & nodes_network) == 0 or len(set(nodes_to) & nodes_network) == 0:
	return None # At least one of the node group not in network
    d = get_separation(network, nodes_from, nodes_to, lengths)
    if bins is None and (nodes_from_random is None or nodes_to_random is None):
	bins = network_utilities.get_degree_binning(network, min_bin_size, lengths) # if lengths is given, it will only use those nodes
    if nodes_from_random is None:
	nodes_from_random = get_random_nodes(nodes_from, network, bins = bins, n_random = n_random, min_bin_size = min_bin_size, seed = seed)
    if nodes_to_random is None:
	nodes_to_random = get_random_nodes(nodes_to, network, bins = bins, n_random = n_random, min_bin_size = min_bin_size, seed = seed)
    random_values_list = zip(nodes_from_random, nodes_to_random)
    values = numpy.empty(len(nodes_from_random)) #n_random
    for i, values_random in enumerate(random_values_list):
	nodes_from, nodes_to = values_random
	values[i] = get_separation(network, nodes_from, nodes_to, lengths)
    m, s = numpy.mean(values), numpy.std(values)
    if s == 0:
	z = 0.0
    else:
	z = (d - m) / s
    return d, z, (m, s) #(z, pval)
Пример #2
0
def calculate_proximity_multiple(network, from_file=None, to_file=None, n_random=1000, min_bin_size=100, seed=452456, lengths=None, out_file="output.txt"):
    """
    Run proximity on each entries of from and to files in a pairwise manner
    output is saved in out_file (e.g., output.txt)
    """
    nodes = set(network.nodes())
    drug_to_targets, drug_to_category = get_diseasome_genes(from_file, nodes = nodes)
    #drug_to_targets = dict((drug, nodes & targets) for drug, targets in drug_to_targets.iteritems())
    disease_to_genes, disease_to_category = get_diseasome_genes(to_file, nodes = nodes)
    # Calculate proximity values
    print len(drug_to_targets), len(disease_to_genes)
    # Get degree binning 
    bins = network_utilities.get_degree_binning(network, min_bin_size)
    f = open(out_file, 'w')
    f.write("source\ttarget\tn.source\tn.target\td\tz\n")
    for drug, nodes_from in drug_to_targets.iteritems():
	values = []
	for disease, nodes_to in disease_to_genes.iteritems():
	    print drug, disease
	    d, z, (m, s) = calculate_proximity(network, nodes_from, nodes_to, nodes_from_random=None, nodes_to_random=None, bins=bins, n_random=n_random, min_bin_size=min_bin_size, seed=seed, lengths=lengths)
	    values.append((drug, disease, z, len(nodes_from), len(nodes_to), d, m, s))
	    #f.write("%s\t%s\t%f\t%f\t%f\t%f\n" % (drug, disease, z, d, m, s))
	values.sort(key=lambda x: x[2])
	for drug, disease, z, k, l, d, m, s in values:
	    #f.write("%s\t%s\t%f\t%d\t%d\t%f\t%f\t%f\n" % (drug, disease, z, k, l, d, m, s))
	    f.write("%s\t%s\t%d\t%d\t%f\t%f\n" % (drug, disease, k, l, d, z))
    f.close()
    return 
Пример #3
0
def calculate_proximity(network, nodes_from, nodes_to, nodes_from_random=None, nodes_to_random=None, bins=None, n_random=1000, min_bin_size=100, seed=452456, lengths=None):
    """
    Calculate proximity from nodes_from to nodes_to
    If degree binning or random nodes are not given, they are generated
    lengths: precalculated shortest path length dictionary
    """
    #distance = "closest"
    #lengths = network_utilities.get_shortest_path_lengths(network, "../data/toy.sif.pcl")
    #d = network_utilities.get_separation(network, lengths, nodes_from, nodes_to, distance, parameters = {})
    nodes_network = set(network.nodes())
    nodes_from = set(nodes_from) & nodes_network 
    nodes_to = set(nodes_to) & nodes_network
    if len(nodes_from) == 0 or len(nodes_to) == 0:
	return None # At least one of the node group not in network
    d = calculate_closest_distance(network, nodes_from, nodes_to, lengths)
    if bins is None and (nodes_from_random is None or nodes_to_random is None):
	bins = network_utilities.get_degree_binning(network, min_bin_size, lengths) # if lengths is given, it will only use those nodes
    if nodes_from_random is None:
	nodes_from_random = get_random_nodes(nodes_from, network, bins = bins, n_random = n_random, min_bin_size = min_bin_size, seed = seed)
    if nodes_to_random is None:
	nodes_to_random = get_random_nodes(nodes_to, network, bins = bins, n_random = n_random, min_bin_size = min_bin_size, seed = seed)
    random_values_list = zip(nodes_from_random, nodes_to_random)
    values = numpy.empty(len(nodes_from_random)) #n_random
    for i, values_random in enumerate(random_values_list):
	nodes_from, nodes_to = values_random
	#values[i] = network_utilities.get_separation(network, lengths, nodes_from, nodes_to, distance, parameters = {})
	values[i] = calculate_closest_distance(network, nodes_from, nodes_to, lengths)
    #pval = float(sum(values <= d)) / len(values) # needs high number of n_random
    m, s = numpy.mean(values), numpy.std(values)
    if s == 0:
	z = 0.0
    else:
	z = (d - m) / s
    return d, z, (m, s) #(z, pval)
def calculate_proximity_multiple(parameter_file_prefix, i_start, i_end):
    network_file, nodes_from, nodes_to, out_file, min_bin_size, n_random, n_seed = get_parameters_from_file(
        parameter_file_prefix + "%s.txt" % i_start)
    network = network_utilities.create_network_from_sif_file(
        network_file,
        use_edge_data=False,
        delim=None,
        include_unconnected=True)
    bins = network_utilities.get_degree_binning(network,
                                                min_bin_size,
                                                lengths=None)
    for i in xrange(i_start, i_end):
        if not os.path.exists(parameter_file_prefix + "%s.txt" % i):
            print "File does not exists for index (aborting):", i
            break
        network_file, nodes_from, nodes_to, out_file, min_bin_size, n_random, n_seed = get_parameters_from_file(
            parameter_file_prefix + "%s.txt" % i)
        if os.path.exists(out_file):
            print "Skipping existing file for index:", i
            continue
        print network_file, nodes_from, nodes_to, n_random, min_bin_size, n_seed, out_file
        values = wrappers.calculate_proximity(network,
                                              nodes_from=nodes_from,
                                              nodes_to=nodes_to,
                                              bins=bins,
                                              n_random=n_random,
                                              min_bin_size=min_bin_size,
                                              seed=n_seed)
        if values is not None:  # not in network
            d, z, (m, s) = values
            #print z, d, (m, s)
            open(out_file, 'w').write("%f %f %f %f\n" % (z, d, m, s))
    return
Пример #5
0
def calculate_proximity(network, nodes_from, nodes_to, nodes_from_random=None, nodes_to_random=None, bins=None, n_random=1000, min_bin_size=100, seed=452456):
    #distance = "closest"
    #lengths = network_utilities.get_shortest_path_lengths(network, "../data/toy.sif.pcl")
    #d = network_utilities.get_separation(network, lengths, nodes_from, nodes_to, distance, parameters = {})
    nodes_network = set(network.nodes())
    if len(set(nodes_from) & nodes_network) == 0 or len(set(nodes_to) & nodes_network) == 0:
	return None # At least one of the node group not in network
    d = calculate_closest_distance(network, nodes_from, nodes_to)
    if bins is None and (nodes_from_random is None or nodes_to_random is None):
	bins = network_utilities.get_degree_binning(network, min_bin_size) 
    if nodes_from_random is None:
	nodes_from_random = get_random_nodes(nodes_from, network, bins = bins, n_random = n_random, min_bin_size = min_bin_size, seed = seed)
    if nodes_to_random is None:
	nodes_to_random = get_random_nodes(nodes_to, network, bins = bins, n_random = n_random, min_bin_size = min_bin_size, seed = seed)
    random_values_list = zip(nodes_from_random, nodes_to_random)
    values = numpy.empty(len(nodes_from_random)) #n_random
    for i, values_random in enumerate(random_values_list):
	nodes_from, nodes_to = values_random
	#values[i] = network_utilities.get_separation(network, lengths, nodes_from, nodes_to, distance, parameters = {})
	values[i] = calculate_closest_distance(network, nodes_from, nodes_to)
    #pval = float(sum(values <= d)) / len(values)
    m, s = numpy.mean(values), numpy.std(values)
    if s == 0:
	z = 0.0
    else:
	z = (d - m) / s
    return d, z, (m, s) #(z, pval)
Пример #6
0
def get_random_nodes(nodes,
                     network,
                     bins=None,
                     n_random=1000,
                     min_bin_size=100,
                     degree_aware=True,
                     seed=None):
    if bins is None:
        # Get degree bins of the network
        bins = network_utilities.get_degree_binning(network, min_bin_size)
    nodes_random = network_utilities.pick_random_nodes_matching_selected(
        network, bins, nodes, n_random, degree_aware, seed=seed)
    return nodes_random
Пример #7
0
def calculate_proximity_multiple(parameter_file_prefix, i_start, i_end):
    network_file, nodes_from, nodes_to, out_file, min_bin_size, n_random, n_seed = get_parameters_from_file(parameter_file_prefix + "%s.txt" % i_start)
    network = network_utilities.create_network_from_sif_file(network_file, use_edge_data = False, delim = None, include_unconnected=True)
    bins = network_utilities.get_degree_binning(network, min_bin_size, lengths=None)
    for i in xrange(i_start, i_end):
	if not os.path.exists(parameter_file_prefix + "%s.txt" % i):
	    print "File does not exists for index (aborting):", i
	    break
	network_file, nodes_from, nodes_to, out_file, min_bin_size, n_random, n_seed = get_parameters_from_file(parameter_file_prefix + "%s.txt" % i)
	if os.path.exists(out_file):
	    print "Skipping existing file for index:", i
	    continue
	print network_file, nodes_from, nodes_to, n_random, min_bin_size, n_seed, out_file
	values = wrappers.calculate_proximity(network, nodes_from = nodes_from, nodes_to = nodes_to, bins = bins, n_random = n_random, min_bin_size = min_bin_size, seed = n_seed)
	if values is not None: # not in network
	    d, z, (m, s) = values
	    #print z, d, (m, s)
	    open(out_file, 'w').write("%f %f %f %f\n" % (z, d, m, s))
    return
Пример #8
0
def calculate_lcc_significance(network, nodes, nodes_random=None, bins=None, n_random=1000, min_bin_size=100, seed=452456):
    if bins is None and nodes_random is None:
	bins = network_utilities.get_degree_binning(network, min_bin_size) 
    if nodes_random is None:
	nodes_random = get_random_nodes(nodes, network, bins = bins, n_random = n_random, min_bin_size = min_bin_size, seed = seed)
    network_sub = network.subgraph(nodes)
    component_nodes = network_utilities.get_connected_components(network_sub, False)[0]
    d = len(component_nodes)
    values = numpy.empty(len(nodes_random)) 
    for i, nodes in enumerate(nodes_random):
	network_sub = network.subgraph(nodes)
	component_nodes = network_utilities.get_connected_components(network_sub, False)[0]
	values[i] = len(component_nodes)
    m, s = numpy.mean(values), numpy.std(values)
    if s == 0:
	z = 0.0
    else:
	z = (d - m) / s
    return d, z, (m, s) 
Пример #9
0
def get_random_nodes(nodes, network, bins=None, n_random=1000, min_bin_size=100, degree_aware=True, seed=None):
    if bins is None:
	# Get degree bins of the network
	bins = network_utilities.get_degree_binning(network, min_bin_size) 
    nodes_random = network_utilities.pick_random_nodes_matching_selected(network, bins, nodes, n_random, degree_aware, seed=seed) 
    return nodes_random