Exemplo n.º 1
0
def create_edge_scores_file(network_file, edge_scores_file, edge_to_score = None, default_score=1):
    g = network_utilities.create_network_from_sif_file(network_file)
    f = open(edge_scores_file, 'w')
    for u,v in g.edges_iter():
	score = default_score
	if edge_to_score.has_key((u,v)):
	    score = edge_to_score[(u,v)]
	elif edge_to_score.has_key((v,u)):
	    score = edge_to_score[(v,u)]
	#else:
	#    print (u,v), 
	f.write("%s %f %s\n" % (u, score, v))
    f.close()
    return
Exemplo n.º 2
0
def get_node_association_score_mapping(network_file, network_file_identifier_type, node_description_file, association_scores_file, association_scores_file_identifier_type, log_file = None, default_seed_score=1.0):
    """
	Maps genes and their scores to nodes in the network using given association_scores_file, correspondance identifiers
    """
    g = network_utilities.create_network_from_sif_file(network_file)
    nodes = g.nodes()
    setNode, setDummy, dictNode, dictDummy = network_utilities.get_nodes_and_edges_from_sif_file(file_name = association_scores_file, store_edge_type = False)
    if dictNode is None:
	dictNode = dict([ (v, default_seed_score) for v in setNode ])
    node_to_genes, gene_to_nodes = biana_output_converter.get_attribute_to_attribute_mapping(node_description_file, network_file_identifier_type, association_scores_file_identifier_type, keys_to_include=set(nodes))
    covered_genes = set()
    seeds = set()
    seed_to_score = {}
    if log_file is not None:
	log_fd = open(log_file, "a")
    else:
	log_fd = None
    for v in nodes: 
	gene_with_score = setNode & node_to_genes[v]
	covered_genes |= gene_with_score

	if len(gene_with_score) > 0:
	    seeds.add(v)
	    if len(gene_with_score) > 1:
		#print "More than one gene:", gene_with_score, "for", v
		if log_fd is not None:
		    log_fd.write("More than one gene: %s for %s\n" % (gene_with_score, v))
	    i=0
	    score = 0
	    for gene in covered_genes:
		i+=1
		score += float(dictNode[gene])
	    score /= i
	    if score <= 0:
		#print "non-positive seed score", v, score, "genes:", node_to_genes[v]
		if log_fd is not None:
		    log_fd.write("non-positive seed score %s %s genes: %s\n" % (v, score, node_to_genes[v]))
	    seed_to_score[v] = score
	#else:
	#    score = default_score
	#node_to_score[v] = score

    #print "Covered genes (seed genes):", len(covered_genes), "among", len(setNode)
    #print "Covered gene products (seed nodes):", len(seeds), "among", g.number_of_nodes()
    if log_fd is not None:
	log_fd.write("Covered genes (seed genes): %s among %s\n" % (len(covered_genes), len(setNode)))
	log_fd.write("Covered gene products (seed nodes): %s among %s\n" % (len(seeds), g.number_of_nodes()))
	log_fd.close()
    return seed_to_score
Exemplo n.º 3
0
def create_network_from_weight_and_score_files(edge_file_weights,
                                               edge_file_scores):
    g = network_utilities.create_network_from_sif_file(
        network_file=edge_file_weights[:-3] + "sif", weighted=True)
    setNode, setEdge, dictNode, dictEdge = network_utilities.get_nodes_and_edges_from_sif_file(
        file_name=edge_file_scores[:-3] + "sif", store_edge_type=True)
    for e, s in dictEdge.iteritems():
        u, v = e
        s = float(s)
        w = g.get_edge(u, v)
        s = s * 100 + 1
        #if s == 0:
        #    s = 0.1
        w /= s
        #print w
        g.add_edge(u, v, w)
    return g
Exemplo n.º 4
0
def create_degree_filtered_network_file(network_file, network_file_filtered, max_degree, largest_connected_component=True):
    """
	Creates a network file removing nodes that has connections more than given degree
    """
    # Load network
    g = network_utilities.create_network_from_sif_file(network_file, use_edge_data = False) #True)
    #network_utilities.analyze_network(g)
    g = network_utilities.filter_network(g = g, degree_threshold = max_degree, largest_connected_component = largest_connected_component) 
    # Remove unconnected nodes
    degrees = g.degree(with_labels=True)
    for id in g.nodes():
	if degrees[id] == 0:
	    g.delete_node(id)
    # Get degrees of highly connected nodes
    #network_utilities.analyze_network(g)
    network_utilities.output_network_in_sif(g, network_file_filtered)
    return
Exemplo n.º 5
0
def old_create_edge_scores_as_node_scores_file(edges, node_to_score, edge_scores_file, ignored_nodes = None, default_score = 0):
    """
	Creates edge score file from node association scores, intended comparing netshort with other algorithms without using other edge reliability/relevance score
    """
    g = network_utilities.create_network_from_sif_file(network_file)
    setNode, setDummy, dictNode, dictDummy = network_utilities.get_nodes_and_edges_from_sif_file(file_name = node_scores_file, store_edge_type = False)
    f = open(edge_scores_file, 'w')
    for u,v in g.edges_iter():
	if ignored_nodes is not None and u in ignored_nodes:
	    score_u = default_score
	else:
	    score_u = dictNode[u]
	if ignored_nodes is not None and v in ignored_nodes:
	    score_v = default_score
	else:
	    score_v = dictNode[v]
	f.write("%s %f %s\n" % (u, (score_u + score_v) / 2, v))
    f.close()
    return
Exemplo n.º 6
0
def create_edge_reliability_filtered_network_file(network_file, network_file_prefix, out_file):
    # linear combination of 
    # pubmed/5
    # db/3
    # method/2
    # jaccard/0.4
    # cc1*cc2/0.4
    edge_to_methods = network_utilities.get_edge_values_from_sif_attribute_file(file_name = network_file_prefix + "_method_id.eda", store_edge_type = False)
    edge_to_sources = network_utilities.get_edge_values_from_sif_attribute_file(file_name = network_file_prefix + "_source.eda", store_edge_type = False)
    edge_to_pubmeds = network_utilities.get_edge_values_from_sif_attribute_file(file_name = network_file_prefix + "_pubmed.eda", store_edge_type = False)
    #edge_to_jaccard = network_utilities.get_jaccard_index_map(g)
    #node_to_ccoef = network_utilities.get_clustering_coefficient_map(g)
    g = network_utilities.create_network_from_sif_file(network_file, use_edge_data = True)
    f = open(out_file, 'w')
    for u,v in g.edges_iter():
	score = 0.0
	if edge_to_methods.has_key((u,v)):
	    score += len(edge_to_methods[(u,v)])/2.0
	    score_method = len(edge_to_methods[(u,v)])
	elif edge_to_methods.has_key((v,u)):
	    score += len(edge_to_methods[(v,u)])/2.0
	    score_method = len(edge_to_methods[(v,u)])
	if edge_to_sources.has_key((u,v)):
	    score += len(edge_to_sources[(u,v)])/3.0
	    score_source = len(edge_to_sources[(u,v)])
	elif edge_to_sources.has_key((v,u)):
	    score += len(edge_to_sources[(v,u)])/3.0
	    score_source = len(edge_to_sources[(v,u)])
	if edge_to_pubmeds.has_key((u,v)):
	    score += len(edge_to_pubmeds[(u,v)])/5.0
	    score_pubmed = len(edge_to_pubmeds[(u,v)])
	elif edge_to_pubmeds.has_key((v,u)):
	    score += len(edge_to_pubmeds[(v,u)])/5.0
	    score_pubmed = len(edge_to_pubmeds[(v,u)])
	#score += node_to_ccoef[u]*node_to_ccoef[v]/0.4
	#score += edge_to_jaccard[(u,v)]/0.4
	#f.write("%s\t%s\t%f\n" % (u, v, score))
	if score_source + score_pubmed > 2: # and score_pubmed > 2:
	    f.write("%s\t%s\t%s\n" % (u, g.get_edge(u,v), v))
    f.close()
    return
Exemplo n.º 7
0
def create_network_from_edge_file(edge_file_weights):
    g = network_utilities.create_network_from_sif_file(
        network_file=edge_file_weights[:-3] + "sif", weighted=True)
    return g
Exemplo n.º 8
0
def get_edges_in_network(network_file, data=False):
    g = network_utilities.create_network_from_sif_file(network_file)
    return g.edges(data=data)
Exemplo n.º 9
0
def create_ARFF_network_metrics_file(network_file, node_to_score, seeds, arff_file_name):
    g = network_utilities.create_network_from_sif_file(network_file, use_edge_data = False)
    network_utilities.create_ARFF_network_metrics_file(g, node_to_score, seeds, arff_file_name)
    return
Exemplo n.º 10
0
def create_R_analyze_network_script(network_file, seeds=None, out_path="./", title = ""):
    g = network_utilities.create_network_from_sif_file(network_file, use_edge_data = False)
    network_utilities.create_R_analyze_network_script(g, seeds, out_path, title)
    network_utilities.create_R_analyze_network_script(g, seeds, out_path, title, scale_by_log=True)
    return
Exemplo n.º 11
0
def analyze_network(network_file, out_file = None, seeds = None):
    g = network_utilities.create_network_from_sif_file(network_file, use_edge_data = False)
    network_utilities.analyze_network(g, out_file = out_file, seeds = seeds)
    return
Exemplo n.º 12
0
def get_nodes_in_network(network_file):
    g = network_utilities.create_network_from_sif_file(network_file)
    return g.nodes()
Exemplo n.º 13
0
def get_network_as_graph(network_file, use_edge_data):
    g = network_utilities.create_network_from_sif_file(network_file_in_sif = network_file, use_edge_data = use_edge_data)
    return g
Exemplo n.º 14
0
def sample_network_preserving_topology(network_sif_file, n_sample, output_prefix):
    g = network_utilities.create_network_from_sif_file(network_file_in_sif = network_sif_file, use_edge_data = True)#, delim = " ")
    for i in xrange(1,n_sample+1):
	g_sampled = network_utilities.randomize_graph(graph=g, randomization_type="preserve_topology_and_node_degree")
	network_utilities.output_network_in_sif(g_sampled, output_prefix+"%s"%i)
    return