def main(): # Set the seed and network files data_dir = "../../DATA/guild_tutorial/" seed_file = data_dir + "seeds.txt" network_file = data_dir + "interactions.sif" scoring_folder = data_dir + "test/" executable_path = "../guild/scoreN" # Create input files for scoring guild_utilities.prepare_scoring(network_file, seed_file, scoring_folder, non_seed_score=0.01, seed_score=1.0, edge_score=1.0, n_sample=100, delim=" ") # Generate cross validation files node_scores_file = scoring_folder + "node_scores.sif" edge_scores_file = scoring_folder + "edge_scores_netshort.sif" # fill the code to get nodes, seed_to_score, edges and edge_to_score variables below g = network_utilities.create_network_from_sif_file(network_file, use_edge_data=True) seeds = guild_utilities.get_nodes(seed_file) nodes = g.nodes() edges = g.edges() seed_to_score = dict([(node, 1) for node in seeds]) edge_to_score = dict([((u, v), 1) for u, v in edges]) guild_utilities.generate_cross_validation_node_score_files( nodes, seed_to_score, node_scores_file, xval=3, default_score=0.01, replicable=123) guild_utilities.generate_cross_validation_edge_score_as_node_score_files( edges, seed_to_score, edge_to_score, edge_scores_file, xval=3, default_score=0.01, replicable=123) # Run NetScore on these cross validation files guild_utilities.run_scoring(scoring_folder, executable_path, scoring_type="netscore", parameters={ "n_iteration": 2, "n_repetition": 3 }, qname=None, calculate_pvalue=True, xval=3) return
def score_mcl(node_scores_file, network_file, output_scores_file, module_file, default_non_seed_score): g = network_utilities.create_network_from_sif_file(network_file, use_edge_data=True) #modules = get_modules_of_graph(g, "mcl", inflation=2) # if edge weight based clustering is desired seeds, nodes = get_seeds_from_node_scores_file(node_scores_file, default_non_seed_score) modules = get_modules_from_file(module_file) f = open(output_scores_file, 'w') node_to_score = {} #selected = set() for module in modules: module = set(module) #common = module&seeds #if 100*float(len(common))/len(module) > threshold: #selected |= module #score = float(len(common))/len(module) #n = len(module)-len(common) #if n == 0: # continue #score = 1.0/n for node in module: #node_to_score[node] = score neighbors = set(g.neighbors(node)) common = neighbors & module if node in common: common.remove(node) #if len(common) == 0: # continue score = float(len(common&seeds)) / len(module) node_to_score[node] = score for node in nodes: if node in node_to_score: f.write("%s\t%f\n" % (node, node_to_score[node])) else: f.write("%s\t0.0\n" % node) f.close() return
def calculate_proximity_multiple(parameter_file_prefix, i_start, i_end): network_file, nodes_from, nodes_to, out_file, min_bin_size, n_random, n_seed = get_parameters_from_file( parameter_file_prefix + "%s.txt" % i_start) network = network_utilities.create_network_from_sif_file( network_file, use_edge_data=False, delim=None, include_unconnected=True) bins = network_utilities.get_degree_binning(network, min_bin_size, lengths=None) for i in xrange(i_start, i_end): if not os.path.exists(parameter_file_prefix + "%s.txt" % i): print "File does not exists for index (aborting):", i break network_file, nodes_from, nodes_to, out_file, min_bin_size, n_random, n_seed = get_parameters_from_file( parameter_file_prefix + "%s.txt" % i) if os.path.exists(out_file): print "Skipping existing file for index:", i continue print network_file, nodes_from, nodes_to, n_random, min_bin_size, n_seed, out_file values = wrappers.calculate_proximity(network, nodes_from=nodes_from, nodes_to=nodes_to, bins=bins, n_random=n_random, min_bin_size=min_bin_size, seed=n_seed) if values is not None: # not in network d, z, (m, s) = values #print z, d, (m, s) open(out_file, 'w').write("%f %f %f %f\n" % (z, d, m, s)) return
def get_network(network_file, only_lcc): network = network_utilities.create_network_from_sif_file(network_file, use_edge_data = False, delim = None, include_unconnected=True) #print len(network.nodes()), len(network.edges()) if only_lcc: components = network_utilities.get_connected_components(network, False) network = network_utilities.get_subgraph(network, components[0]) #print len(network.nodes()), len(network.edges()) return network
def get_network(network_file, only_lcc): network = network_utilities.create_network_from_sif_file(network_file, use_edge_data = False, delim = None, include_unconnected=True) #print len(network.nodes()), len(network.edges()) if only_lcc: components = network_utilities.get_connected_components(network, False) network = network_utilities.get_subgraph(network, components[0]) #print len(network.nodes()), len(network.edges()) network_lcc_file = network_file + ".lcc" if not os.path.exists(network_lcc_file ): f = open(network_lcc_file, 'w') for u,v in network.edges(): f.write("%s 1 %s\n" % (u, v)) f.close() return network
def get_network(network_file, only_lcc): network = network_utilities.create_network_from_sif_file(network_file, use_edge_data = False, delim = None, include_unconnected=True) #print len(network.nodes()), len(network.edges()) if only_lcc and not network_file.endswith(".lcc"): print "Shrinking network to its LCC", len(network.nodes()), len(network.edges()) components = network_utilities.get_connected_components(network, False) network = network_utilities.get_subgraph(network, components[0]) print "Final shape:", len(network.nodes()), len(network.edges()) #print len(network.nodes()), len(network.edges()) network_lcc_file = network_file + ".lcc" if not os.path.exists(network_lcc_file ): f = open(network_lcc_file, 'w') for u,v in network.edges(): f.write("%s 1 %s\n" % (u, v)) f.close() return network
def main(): """ Get nodes that are top scoring w.r.t. GUILD scores. Assumes that GUILD scores have been calculated already (i.e. python hello_world.py). """ # Set the seed and network files data_dir = "../../DATA/guild_tutorial/" seed_file = data_dir + "seeds.txt" network_file = data_dir + "interactions.sif" enrichment_file = data_dir + "enrichment.txt" scoring_folder = data_dir + "test/" pvalue_file = scoring_folder + "output_scores.sif.netcombo.pval" subnetwork_file = scoring_folder + "subnetwork.sif" # Get GUILD scores node_to_vals = guild_utilities.get_values_from_pvalue_file(pvalue_file) # Get top scoring, i.e. nodes that have p-value <= 0.05 top_nodes = set() for node, vals in node_to_vals.iteritems(): score, pval = vals if pval <= 0.05: top_nodes.add(node) # Load interaction network g = network_utilities.create_network_from_sif_file(network_file, use_edge_data=True) # Get subnetwork induced by top scoring nodes g_sub = network_utilities.get_subgraph(g, top_nodes) # Output subnetwork along with the inverted p-value scores (z-scores) calculated for edges f = open(subnetwork_file, 'w') for u, v in g_sub.edges(): zscore_u = stat_utilities.convert_p_values_to_z_scores( [node_to_vals[u][1]])[0] zscore_v = stat_utilities.convert_p_values_to_z_scores( [node_to_vals[v][1]])[0] score = (zscore_u + zscore_v) / 2 f.write("%s\t%f\t%s\n" % (u, score, v)) f.close() return
def calculate_proximity_multiple(parameter_file_prefix, i_start, i_end): network_file, nodes_from, nodes_to, out_file, min_bin_size, n_random, n_seed = get_parameters_from_file(parameter_file_prefix + "%s.txt" % i_start) network = network_utilities.create_network_from_sif_file(network_file, use_edge_data = False, delim = None, include_unconnected=True) bins = network_utilities.get_degree_binning(network, min_bin_size, lengths=None) for i in xrange(i_start, i_end): if not os.path.exists(parameter_file_prefix + "%s.txt" % i): print "File does not exists for index (aborting):", i break network_file, nodes_from, nodes_to, out_file, min_bin_size, n_random, n_seed = get_parameters_from_file(parameter_file_prefix + "%s.txt" % i) if os.path.exists(out_file): print "Skipping existing file for index:", i continue print network_file, nodes_from, nodes_to, n_random, min_bin_size, n_seed, out_file values = wrappers.calculate_proximity(network, nodes_from = nodes_from, nodes_to = nodes_to, bins = bins, n_random = n_random, min_bin_size = min_bin_size, seed = n_seed) if values is not None: # not in network d, z, (m, s) = values #print z, d, (m, s) open(out_file, 'w').write("%f %f %f %f\n" % (z, d, m, s)) return
def main(): parser = argparse.ArgumentParser() parser.add_argument('-e', '--network_file') #, required=True) parser.add_argument('-s', '--nodes_from') #, required=True) parser.add_argument('-t', '--nodes_to') #, required=True) parser.add_argument('-o', '--out_file') #, required=True) parser.add_argument('-n', '--n_random', type=int, default=1000) parser.add_argument('-m', '--min_bin_size', type=int, default=100) parser.add_argument('-x', '--n_seed', type=int, default=452456) parser.add_argument('-f', '--parameter_file', type=str, default=None) parser.add_argument('-p', '--parameter_file_prefix', type=str, default=None) parser.add_argument('-i', '--parameter_file_start_index', type=int, default=None) parser.add_argument('-j', '--parameter_file_end_index', type=int, default=None) args = parser.parse_args() # Run more than once for given input files if args.parameter_file_prefix is not None: parameter_file_prefix = args.parameter_file_prefix i_start = args.parameter_file_start_index i_end = args.parameter_file_end_index calculate_proximity_multiple(parameter_file_prefix, i_start, i_end) return # Run from input parameter file elif args.parameter_file is not None: network_file, nodes_from, nodes_to, out_file, min_bin_size, n_random, n_seed = get_parameters_from_file(parameter_file_prefix + "%s.txt" % n) # Run once with provided arguments else: nodes_from = args.nodes_from.split(",") nodes_to = args.nodes_to.split(",") network_file = args.network_file n_random = args.n_random min_bin_size = args.min_bin_size n_seed = args.n_seed out_file = args.out_file network = network_utilities.create_network_from_sif_file(network_file, use_edge_data = False, delim = None, include_unconnected=True) #print args print network_file, nodes_from, nodes_to, n_random, min_bin_size, n_seed, out_file values = wrappers.calculate_proximity(network, nodes_from = nodes_from, nodes_to = nodes_to, n_random = n_random, min_bin_size = min_bin_size, seed = n_seed) if values is not None: # not in network d, z, (m, s) = values #print z, d, (m, s) open(out_file, 'w').write("%f %f %f %f\n" % (z, d, m, s)) return
def score_mcl(node_scores_file, network_file, output_scores_file, module_file, default_non_seed_score): g = network_utilities.create_network_from_sif_file(network_file, use_edge_data=True) #modules = get_modules_of_graph(g, "mcl", inflation=2) # if edge weight based clustering is desired seeds, nodes = get_seeds_from_node_scores_file(node_scores_file, default_non_seed_score) modules = get_modules_from_file(module_file) f = open(output_scores_file, 'w') node_to_score = {} #selected = set() for module in modules: module = set(module) #common = module&seeds #if 100*float(len(common))/len(module) > threshold: #selected |= module #score = float(len(common))/len(module) #n = len(module)-len(common) #if n == 0: # continue #score = 1.0/n for node in module: #node_to_score[node] = score neighbors = set(g.neighbors(node)) common = neighbors & module if node in common: common.remove(node) #if len(common) == 0: # continue score = float(len(common & seeds)) / len(module) node_to_score[node] = score for node in nodes: if node in node_to_score: f.write("%s\t%f\n" % (node, node_to_score[node])) else: f.write("%s\t0.0\n" % node) f.close() return
def create_network_from_sif_file(network_file, **kwargs): return network_utilities.create_network_from_sif_file( network_file, **kwargs)
def create_network_from_sif_file(network_file, **kwargs): return network_utilities.create_network_from_sif_file(network_file, **kwargs)