def calculate_separation_proximity(network, nodes_from, nodes_to, nodes_from_random=None, nodes_to_random=None, bins=None, n_random=1000, min_bin_size=100, seed=452456, lengths=None): """ Calculate proximity from nodes_from to nodes_to If degree binning or random nodes are not given, they are generated lengths: precalculated shortest path length dictionary """ nodes_network = set(network.nodes()) if len(set(nodes_from) & nodes_network) == 0 or len(set(nodes_to) & nodes_network) == 0: return None # At least one of the node group not in network d = get_separation(network, nodes_from, nodes_to, lengths) if bins is None and (nodes_from_random is None or nodes_to_random is None): bins = network_utilities.get_degree_binning(network, min_bin_size, lengths) # if lengths is given, it will only use those nodes if nodes_from_random is None: nodes_from_random = get_random_nodes(nodes_from, network, bins = bins, n_random = n_random, min_bin_size = min_bin_size, seed = seed) if nodes_to_random is None: nodes_to_random = get_random_nodes(nodes_to, network, bins = bins, n_random = n_random, min_bin_size = min_bin_size, seed = seed) random_values_list = zip(nodes_from_random, nodes_to_random) values = numpy.empty(len(nodes_from_random)) #n_random for i, values_random in enumerate(random_values_list): nodes_from, nodes_to = values_random values[i] = get_separation(network, nodes_from, nodes_to, lengths) m, s = numpy.mean(values), numpy.std(values) if s == 0: z = 0.0 else: z = (d - m) / s return d, z, (m, s) #(z, pval)
def calculate_proximity_multiple(network, from_file=None, to_file=None, n_random=1000, min_bin_size=100, seed=452456, lengths=None, out_file="output.txt"): """ Run proximity on each entries of from and to files in a pairwise manner output is saved in out_file (e.g., output.txt) """ nodes = set(network.nodes()) drug_to_targets, drug_to_category = get_diseasome_genes(from_file, nodes = nodes) #drug_to_targets = dict((drug, nodes & targets) for drug, targets in drug_to_targets.iteritems()) disease_to_genes, disease_to_category = get_diseasome_genes(to_file, nodes = nodes) # Calculate proximity values print len(drug_to_targets), len(disease_to_genes) # Get degree binning bins = network_utilities.get_degree_binning(network, min_bin_size) f = open(out_file, 'w') f.write("source\ttarget\tn.source\tn.target\td\tz\n") for drug, nodes_from in drug_to_targets.iteritems(): values = [] for disease, nodes_to in disease_to_genes.iteritems(): print drug, disease d, z, (m, s) = calculate_proximity(network, nodes_from, nodes_to, nodes_from_random=None, nodes_to_random=None, bins=bins, n_random=n_random, min_bin_size=min_bin_size, seed=seed, lengths=lengths) values.append((drug, disease, z, len(nodes_from), len(nodes_to), d, m, s)) #f.write("%s\t%s\t%f\t%f\t%f\t%f\n" % (drug, disease, z, d, m, s)) values.sort(key=lambda x: x[2]) for drug, disease, z, k, l, d, m, s in values: #f.write("%s\t%s\t%f\t%d\t%d\t%f\t%f\t%f\n" % (drug, disease, z, k, l, d, m, s)) f.write("%s\t%s\t%d\t%d\t%f\t%f\n" % (drug, disease, k, l, d, z)) f.close() return
def calculate_proximity(network, nodes_from, nodes_to, nodes_from_random=None, nodes_to_random=None, bins=None, n_random=1000, min_bin_size=100, seed=452456, lengths=None): """ Calculate proximity from nodes_from to nodes_to If degree binning or random nodes are not given, they are generated lengths: precalculated shortest path length dictionary """ #distance = "closest" #lengths = network_utilities.get_shortest_path_lengths(network, "../data/toy.sif.pcl") #d = network_utilities.get_separation(network, lengths, nodes_from, nodes_to, distance, parameters = {}) nodes_network = set(network.nodes()) nodes_from = set(nodes_from) & nodes_network nodes_to = set(nodes_to) & nodes_network if len(nodes_from) == 0 or len(nodes_to) == 0: return None # At least one of the node group not in network d = calculate_closest_distance(network, nodes_from, nodes_to, lengths) if bins is None and (nodes_from_random is None or nodes_to_random is None): bins = network_utilities.get_degree_binning(network, min_bin_size, lengths) # if lengths is given, it will only use those nodes if nodes_from_random is None: nodes_from_random = get_random_nodes(nodes_from, network, bins = bins, n_random = n_random, min_bin_size = min_bin_size, seed = seed) if nodes_to_random is None: nodes_to_random = get_random_nodes(nodes_to, network, bins = bins, n_random = n_random, min_bin_size = min_bin_size, seed = seed) random_values_list = zip(nodes_from_random, nodes_to_random) values = numpy.empty(len(nodes_from_random)) #n_random for i, values_random in enumerate(random_values_list): nodes_from, nodes_to = values_random #values[i] = network_utilities.get_separation(network, lengths, nodes_from, nodes_to, distance, parameters = {}) values[i] = calculate_closest_distance(network, nodes_from, nodes_to, lengths) #pval = float(sum(values <= d)) / len(values) # needs high number of n_random m, s = numpy.mean(values), numpy.std(values) if s == 0: z = 0.0 else: z = (d - m) / s return d, z, (m, s) #(z, pval)
def calculate_proximity_multiple(parameter_file_prefix, i_start, i_end): network_file, nodes_from, nodes_to, out_file, min_bin_size, n_random, n_seed = get_parameters_from_file( parameter_file_prefix + "%s.txt" % i_start) network = network_utilities.create_network_from_sif_file( network_file, use_edge_data=False, delim=None, include_unconnected=True) bins = network_utilities.get_degree_binning(network, min_bin_size, lengths=None) for i in xrange(i_start, i_end): if not os.path.exists(parameter_file_prefix + "%s.txt" % i): print "File does not exists for index (aborting):", i break network_file, nodes_from, nodes_to, out_file, min_bin_size, n_random, n_seed = get_parameters_from_file( parameter_file_prefix + "%s.txt" % i) if os.path.exists(out_file): print "Skipping existing file for index:", i continue print network_file, nodes_from, nodes_to, n_random, min_bin_size, n_seed, out_file values = wrappers.calculate_proximity(network, nodes_from=nodes_from, nodes_to=nodes_to, bins=bins, n_random=n_random, min_bin_size=min_bin_size, seed=n_seed) if values is not None: # not in network d, z, (m, s) = values #print z, d, (m, s) open(out_file, 'w').write("%f %f %f %f\n" % (z, d, m, s)) return
def calculate_proximity(network, nodes_from, nodes_to, nodes_from_random=None, nodes_to_random=None, bins=None, n_random=1000, min_bin_size=100, seed=452456): #distance = "closest" #lengths = network_utilities.get_shortest_path_lengths(network, "../data/toy.sif.pcl") #d = network_utilities.get_separation(network, lengths, nodes_from, nodes_to, distance, parameters = {}) nodes_network = set(network.nodes()) if len(set(nodes_from) & nodes_network) == 0 or len(set(nodes_to) & nodes_network) == 0: return None # At least one of the node group not in network d = calculate_closest_distance(network, nodes_from, nodes_to) if bins is None and (nodes_from_random is None or nodes_to_random is None): bins = network_utilities.get_degree_binning(network, min_bin_size) if nodes_from_random is None: nodes_from_random = get_random_nodes(nodes_from, network, bins = bins, n_random = n_random, min_bin_size = min_bin_size, seed = seed) if nodes_to_random is None: nodes_to_random = get_random_nodes(nodes_to, network, bins = bins, n_random = n_random, min_bin_size = min_bin_size, seed = seed) random_values_list = zip(nodes_from_random, nodes_to_random) values = numpy.empty(len(nodes_from_random)) #n_random for i, values_random in enumerate(random_values_list): nodes_from, nodes_to = values_random #values[i] = network_utilities.get_separation(network, lengths, nodes_from, nodes_to, distance, parameters = {}) values[i] = calculate_closest_distance(network, nodes_from, nodes_to) #pval = float(sum(values <= d)) / len(values) m, s = numpy.mean(values), numpy.std(values) if s == 0: z = 0.0 else: z = (d - m) / s return d, z, (m, s) #(z, pval)
def get_random_nodes(nodes, network, bins=None, n_random=1000, min_bin_size=100, degree_aware=True, seed=None): if bins is None: # Get degree bins of the network bins = network_utilities.get_degree_binning(network, min_bin_size) nodes_random = network_utilities.pick_random_nodes_matching_selected( network, bins, nodes, n_random, degree_aware, seed=seed) return nodes_random
def calculate_proximity_multiple(parameter_file_prefix, i_start, i_end): network_file, nodes_from, nodes_to, out_file, min_bin_size, n_random, n_seed = get_parameters_from_file(parameter_file_prefix + "%s.txt" % i_start) network = network_utilities.create_network_from_sif_file(network_file, use_edge_data = False, delim = None, include_unconnected=True) bins = network_utilities.get_degree_binning(network, min_bin_size, lengths=None) for i in xrange(i_start, i_end): if not os.path.exists(parameter_file_prefix + "%s.txt" % i): print "File does not exists for index (aborting):", i break network_file, nodes_from, nodes_to, out_file, min_bin_size, n_random, n_seed = get_parameters_from_file(parameter_file_prefix + "%s.txt" % i) if os.path.exists(out_file): print "Skipping existing file for index:", i continue print network_file, nodes_from, nodes_to, n_random, min_bin_size, n_seed, out_file values = wrappers.calculate_proximity(network, nodes_from = nodes_from, nodes_to = nodes_to, bins = bins, n_random = n_random, min_bin_size = min_bin_size, seed = n_seed) if values is not None: # not in network d, z, (m, s) = values #print z, d, (m, s) open(out_file, 'w').write("%f %f %f %f\n" % (z, d, m, s)) return
def calculate_lcc_significance(network, nodes, nodes_random=None, bins=None, n_random=1000, min_bin_size=100, seed=452456): if bins is None and nodes_random is None: bins = network_utilities.get_degree_binning(network, min_bin_size) if nodes_random is None: nodes_random = get_random_nodes(nodes, network, bins = bins, n_random = n_random, min_bin_size = min_bin_size, seed = seed) network_sub = network.subgraph(nodes) component_nodes = network_utilities.get_connected_components(network_sub, False)[0] d = len(component_nodes) values = numpy.empty(len(nodes_random)) for i, nodes in enumerate(nodes_random): network_sub = network.subgraph(nodes) component_nodes = network_utilities.get_connected_components(network_sub, False)[0] values[i] = len(component_nodes) m, s = numpy.mean(values), numpy.std(values) if s == 0: z = 0.0 else: z = (d - m) / s return d, z, (m, s)
def get_random_nodes(nodes, network, bins=None, n_random=1000, min_bin_size=100, degree_aware=True, seed=None): if bins is None: # Get degree bins of the network bins = network_utilities.get_degree_binning(network, min_bin_size) nodes_random = network_utilities.pick_random_nodes_matching_selected(network, bins, nodes, n_random, degree_aware, seed=seed) return nodes_random