def calculate_sab(G, nodes_from, nodes_to):

    # distances WITHIN the two gene sets:
    d_A = separation.calc_single_set_distance(G, set(nodes_from))
    d_B = separation.calc_single_set_distance(G, set(nodes_to))

    # distances BETWEEN the two gene sets:
    d_AB = separation.calc_set_pair_distances(G, set(nodes_from),
                                              set(nodes_to))

    # calculate separation
    s_AB = d_AB - (d_A + d_B) / 2.

    return (s_AB)
    def analyze_proteins(protein_a, protein_b):
        if (protein_a, protein_b) in cache:
            return cache[(protein_a, protein_b)]

        genes_A = set(genes[protein_a]) & all_genes_in_network
        genes_B = set(genes[protein_b]) & all_genes_in_network

        # Perform calculations
        d_A = calc_single_set_distance(G, genes_A)
        d_B = calc_single_set_distance(G, genes_B)
        d_AB = calc_set_pair_distances(G, genes_A, genes_B)
        s_AB = d_AB - (d_A + d_B)/2.

        return d_AB, s_AB
示例#3
0
    def analyze_proteins(protein_a, protein_b):
        if (protein_a, protein_b) in cache:
            return cache[(protein_a, protein_b)]

        genes_A = set(genes[protein_a]) & all_genes_in_network
        genes_B = set(genes[protein_b]) & all_genes_in_network

        # Perform calculations
        d_A = calc_single_set_distance(G, genes_A)
        d_B = calc_single_set_distance(G, genes_B)
        d_AB = calc_set_pair_distances(G, genes_A, genes_B)
        s_AB = d_AB - (d_A + d_B) / 2.

        return d_AB, s_AB
    def analyze_proteins(protein_a, protein_b):
        genes_A = set(genes[protein_a]) & all_genes_in_network
        genes_B = set(genes[protein_b]) & all_genes_in_network

        all_path_lengths = get_pathlengths_for_two_sets(G, genes_A, genes_B)
        all_distances = []

        # Perform calculations
        d_A = calc_single_set_distance(G, genes_A)
        d_B = calc_single_set_distance(G, genes_B)

        for gene_A in genes_A:
            all_distances_A = []
            for gene_B in genes_B:
                if gene_A == gene_B:
                    all_distances_A.append((gene_A, gene_B, 0))
                else:
                    try:
                        all_distances_A.append((gene_A, gene_B, all_path_lengths[min(gene_A, gene_B)][max(gene_A, gene_B)]))
                    except KeyError:
                        pass
            if len(all_distances_A) > 0:
                all_distances.append(min(all_distances_A, key=lambda x: x[2]))

        for gene_B in genes_B:
            all_distances_B = []
            for gene_A in genes_A:
                if gene_A == gene_B:
                    all_distances_B.append((gene_A, gene_B, 0))
                else:
                    try:
                        all_distances_B.append((gene_B, gene_A, all_path_lengths[min(gene_A, gene_B)][max(gene_A, gene_B)]))
                    except KeyError:
                        pass
            if len(all_distances_B) > 0:
                all_distances.append(min(all_distances_B, key=lambda x: x[2]))

        d_AB = np.mean(map(lambda x: x[2], all_distances))
        s_AB = d_AB - (d_A + d_B)/2.

        return d_AB, s_AB, sorted(all_distances, key=lambda x: x[2])
def compare_SD(disease_nodes, Gint, num_reps=10):

    SD_disease = separation.calc_single_set_distance(Gint,disease_nodes)

    
    # get random distribution
    SD_rand = []
    for i in range(num_reps):
        print('calculating random set ' + str(i) + ' out of ' + str(num_reps))
        
        G_temp = nx.configuration_model(Gint.degree().values())
        G_rand = nx.Graph()  # switch from multigraph to digraph
        G_rand.add_edges_from(G_temp.edges())
        # remove self-loops
        #G_rand.remove_edges_from(G_rand.selfloop_edges())
        G_rand = nx.relabel_nodes(G_rand,dict(zip(range(len(G_rand.nodes())),Gint.degree().keys())))
        
        rand_seeds = disease_nodes #set(random.sample(Gint.nodes(),len(disease_nodes)))
        
        # get random shortest distances
        SD_rand.extend(separation.calc_single_set_distance(G_rand,rand_seeds))

        
    return SD_disease, SD_rand
    # --------------------------------------------------------
    #
    # CALCULATE NETWORK QUANTITIES
    #
    # --------------------------------------------------------

    # get lcc size S
    lcc = get_lcc_size(G, gene_set)

    print("\n> lcc size = %s" % (get_lcc_size(G, gene_set)))
    edge_result = get_edges_size(G, gene_set)
    mean = edge_result / len(gene_set)
    print("> edges = %s " % edge_result)
    print("> mean edges = %s" % mean)
    # get mean shortest distance
    d_s = tools.calc_single_set_distance(G, gene_set)
    print("> mean shortest distance = %s" % (d_s))

    results_message = """
> gene set from \"%s\": %s genes
> lcc size   S = %s
> diameter d_s = %s
""" % (gene_file, len(gene_set), lcc, d_s)

    # --------------------------------------------------------
    #
    # CALCULATE RANDOM COMPARISON
    #
    # --------------------------------------------------------

    results_message = results_message + get_random_comparison(G, gene_set, sims)
            len(gene_set_full - all_genes_in_network))
        print "> remaining number of genes: %s" %(len(gene_set))


    # --------------------------------------------------------
    #
    # CALCULATE NETWORK QUANTITIES
    #
    # --------------------------------------------------------

    # get lcc size S
    lcc = get_lcc_size(G,gene_set)
    print "\n> lcc size = %s" %(lcc)

    # get mean shortest distance
    d_s = tools.calc_single_set_distance(G,gene_set)
    print "> mean shortest distance = %s" %(d_s)

    results_message = """
> gene set from \"%s\": %s genes
> lcc size   S = %s
> diameter d_s = %s
"""%(gene_file,len(gene_set),lcc,d_s)

    # --------------------------------------------------------
    #
    # CALCULATE RANDOM COMPARISON
    #
    # --------------------------------------------------------

    results_message += get_random_comparison(G,gene_set,sims)