Exemplo n.º 1
0
            )

    ####################################
    ##### Network Evaluation Setup #####
    ####################################

    # Limit core usage (if defined)
    import mkl
    mkl.set_num_threads(args.cores)

    # Load Network
    network = dit.load_network_file(args.network_path, verbose=args.verbose)
    network_size = len(network.nodes())

    # Load Gene sets
    genesets_raw = dit.load_node_sets(args.node_sets_file,
                                      verbose=args.verbose)

    # Calculate network kernel (also determine propagation constant if not set)
    kernel = nef.construct_prop_kernel(network, alpha=args.alpha, verbose=True)

    # Change background gene list if needed
    if args.background == 'genesets':
        background_node_set = set()
        for geneset in genesets:
            background_node_set = background_node_set.union(genesets[geneset])
        background_nodes = list(
            background_node_set.intersection(set(kernel.index)))
    else:
        background_nodes = list(kernel.index)

    # Filter gene sets
Exemplo n.º 2
0
def main(args):
    input_network_file = args.infile  # Input gene interaction set
    gene_set_file = args.diseasefile
    outname = args.outname
    n_cores = args.cores
    is_verbose = args.verbose
    large = True
    if args.size == 's':
        large = False

    # Load network (We choose a smaller network here for the example's sake)
    network = dit.load_network_file(input_network_file, verbose=is_verbose)

    # Load gene sets for analysis
    genesets = dit.load_node_sets(gene_set_file)

    # Calculate geneset sub-sample rate
    genesets_p = nef.calculate_p(network, genesets)

    # Determine optimal alpha for network (can also be done automatically by next step)
    alpha = prop.calculate_alpha(network)
    # print alpha

    # Calculate network kernel for propagation
    kernel = nef.construct_prop_kernel(network,
                                       alpha=alpha,
                                       verbose=is_verbose)

    # Might want to tweak values here to speed up calculation
    # Calculate the AUPRC values for each gene set
    if large:
        AUPRC_values = nef.large_network_AUPRC_wrapper(kernel,
                                                       genesets,
                                                       genesets_p,
                                                       n=30,
                                                       cores=n_cores,
                                                       verbose=is_verbose)
    else:
        AUPRC_values = nef.small_network_AUPRC_wrapper(kernel,
                                                       genesets,
                                                       genesets_p,
                                                       n=30,
                                                       cores=n_cores,
                                                       verbose=is_verbose)

    # Construct null networks and calculate the AUPRC of the gene sets of the null networks
    # We can use the AUPRC wrapper function for this
    null_AUPRCs = []
    for i in range(10):
        shuffNet = nef.shuffle_network(network,
                                       max_tries_n=10,
                                       verbose=is_verbose)
        shuffNet_kernel = nef.construct_prop_kernel(shuffNet,
                                                    alpha=alpha,
                                                    verbose=is_verbose)
        if large:
            shuffNet_AUPRCs = nef.large_network_AUPRC_wrapper(
                shuffNet_kernel,
                genesets,
                genesets_p,
                n=30,
                cores=n_cores,
                verbose=is_verbose)
        else:
            shuffNet_AUPRCs = nef.small_network_AUPRC_wrapper(
                shuffNet_kernel,
                genesets,
                genesets_p,
                n=30,
                cores=n_cores,
                verbose=is_verbose)
        null_AUPRCs.append(shuffNet_AUPRCs)
        print 'shuffNet', repr(i + 1), 'AUPRCs calculated'

    # Construct table of null AUPRCs
    null_AUPRCs_table = pd.concat(null_AUPRCs, axis=1)
    null_AUPRCs_table.columns = [
        'shuffNet' + repr(i + 1) for i in range(len(null_AUPRCs))
    ]

    # Calculate performance metric of gene sets; This is the Z-score
    network_performance = nef.calculate_network_performance_score(
        AUPRC_values, null_AUPRCs_table, verbose=is_verbose)
    network_performance.name = 'Test Network'
    network_performance.to_csv(outname + '_performance_score.csv', sep='\t')

    # Calculate network performance gain over median null AUPRC;
    network_perf_gain = nef.calculate_network_performance_gain(
        AUPRC_values, null_AUPRCs_table, verbose=is_verbose)
    network_perf_gain.name = 'Test Network'
    network_perf_gain.to_csv(outname + '_performance_gain.csv', sep='\t')

    # # Rank network on average performance across gene sets vs performance on same gene sets in previous network set
    # all_network_performance = pd.read_csv(outname+'.csv', index_col=0, sep='\t')
    # all_network_performance_filt = pd.concat([network_performance, all_network_performance.ix[network_performance.index]], axis=1)
    # network_performance_rank_table = all_network_performance_filt.rank(axis=1, ascending=False)
    # network_performance_rankings = network_performance_rank_table['Test Network']
    #
    # # Rank network on average performance gain across gene sets vs performance gain on same gene sets in previous network set
    # all_network_perf_gain = pd.read_csv(outname+'_Gain.csv', index_col=0, sep='\t')
    # all_network_perf_gain_filt = pd.concat([network_perf_gain, all_network_perf_gain.ix[network_perf_gain.index]], axis=1)
    # network_perf_gain_rank_table = all_network_perf_gain_filt.rank(axis=1, ascending=False)
    # network_perf_gain_rankings = network_perf_gain_rank_table['Test Network']
    #
    # # Network Performance
    # network_performance_metric_ranks = pd.concat([network_performance, network_performance_rankings, network_perf_gain, network_perf_gain_rankings], axis=1)
    # network_performance_metric_ranks.columns = ['Network Performance', 'Network Performance Rank', 'Network Performance Gain', 'Network Performance Gain Rank']
    # network_performance_metric_ranks.sort_values(by=['Network Performance Rank', 'Network Performance', 'Network Performance Gain Rank', 'Network Performance Gain'],
    #                                              ascending=[True, False, True, False])

    # Construct network summary table
    network_summary = {}
    network_summary['Nodes'] = int(len(network.nodes()))
    network_summary['Edges'] = int(len(network.edges()))
    network_summary['Avg Node Degree'] = np.mean(
        dict(network.degree()).values())
    network_summary['Edge Density'] = 2 * network_summary['Edges'] / float(
        (network_summary['Nodes'] * (network_summary['Nodes'] - 1)))
    # network_summary['Avg Network Performance Rank'] = network_performance_rankings.mean()
    # network_summary['Avg Network Performance Rank, Rank'] = int(network_performance_rank_table.mean().rank().ix['Test Network'])
    # network_summary['Avg Network Performance Gain Rank'] = network_perf_gain_rankings.mean()
    # network_summary['Avg Network Performance Gain Rank, Rank'] = int(network_perf_gain_rank_table.mean().rank().ix['Test Network'])
    with open(outname + '_summary', 'w') as f:
        for item in ['Nodes', 'Edges', 'Avg Node Degree', 'Edge Density']:
            f.write(item + ':\t' + repr(network_summary[item]) + '\n')
Exemplo n.º 3
0
def AUPRC_Analysis_single(network_file,
                          genesets_file,
                          shuffle=False,
                          kernel_file=None,
                          prop_constant=None,
                          subsample_iter=30,
                          cores=1,
                          geneset_background=False,
                          save_path=None,
                          verbose=True):
    starttime = time.time()
    # Load network
    network = dit.load_network_file(network_file, verbose=verbose)
    # Shuffle network?
    if shuffle:
        network = shuffle_network(network, verbose=verbose)
    # Get network size
    net_nodes = network.nodes()
    net_size = len(net_nodes)
    if verbose:
        print('Network size:', net_size, 'Nodes')
    # Calculate or load network propagation kernel
    if kernel_file is None:
        # Determine propagation constant
        if prop_constant is None:
            alpha = prop.calculate_alpha(network)
        else:
            alpha = prop_constant
        # Calculate network propagation kernel
        net_kernel = construct_prop_kernel(network,
                                           alpha=alpha,
                                           verbose=verbose)
    else:
        # Load network propagation kernel
        if kernel_file.endswith('.hdf'):
            net_kernel = pd.read_hdf(kernel_file)
        else:
            net_kernel = pd.read_csv(kernel_file)
    # Load node sets to recover
    genesets = dit.load_node_sets(genesets_file, verbose=verbose)
    # Calculate sub-sample rate for each node set given network
    genesets_p = calculate_p(network, genesets)
    # Set background of genes to recover as all network nodes or union of all gene sets' genes
    if geneset_background:
        background_gene_set = set()
        for geneset in genesets:
            background_gene_set = background_gene_set.union(genesets[geneset])
        background_genes = list(
            background_gene_set.intersection(set(net_nodes)))
    else:
        background_genes = list(net_nodes)
    # if network is small:
    if net_size < 10000:
        AUPRC_table = small_network_AUPRC_wrapper(net_kernel,
                                                  genesets,
                                                  genesets_p,
                                                  n=subsample_iter,
                                                  cores=cores,
                                                  bg=background_genes,
                                                  verbose=verbose)
    # if network is large:
    elif (net_size >= 10000) & (net_size < 15000):
        AUPRC_table = large_network_AUPRC_wrapper(net_kernel,
                                                  genesets,
                                                  genesets_p,
                                                  n=subsample_iter,
                                                  cores=cores,
                                                  bg=background_genes,
                                                  verbose=verbose)
    # if network is large:
    else:
        AUPRC_table = large_network_AUPRC_wrapper(net_kernel,
                                                  genesets,
                                                  genesets_p,
                                                  n=subsample_iter,
                                                  cores=1,
                                                  bg=background_genes,
                                                  verbose=verbose)
    if verbose:
        print('AUPRC values calculated', time.time() - starttime, 'seconds')
    # Save table
    if save_path is not None:
        AUPRC_table.to_csv(save_path)
    if verbose:
        print('AUPRC table saved:', save_path)
    return AUPRC_table
Exemplo n.º 4
0
from network_evaluation_tools import network_propagation as prop
import pandas as pd
import numpy as np

import pickle
import os
#%%

# Load network (We choose a smaller network here for the example's sake)
network = dit.load_network_file('../Data/string_edge_list_common_names.tsv', verbose=True, delimiter='\t')
print(len(network.nodes))

#%%

# Load gene sets for analysis
genesets = dit.load_node_sets('../Data/DisGeNET_genesets.txt')

#%%

# Calculate geneset sub-sample rate
genesets_p = nef.calculate_p(network, genesets)

#%%

# Determine optimal alpha for network (can also be done automatically by next step)
alpha = prop.calculate_alpha(network)
print(alpha)

#%%
import networkx as nx
print(len(network.nodes))
Exemplo n.º 5
0
output_dir = sys.argv[3]
if output_dir[-1] != '/':
    output_dir += '/'

# creat the output dir
try:
    os.mkdir(output_dir)
except FileExistsError:
    print('Output dir already exists: ' + output_dir)

# Load network (We choose a smaller network here for the example's sake)
network = dit.load_network_file(sys.argv[1], verbose=True, delimiter='\t')
print(len(network.nodes))

# Load gene sets for analysis
genesets = dit.load_node_sets(sys.argv[2])

# Calculate geneset sub-sample rate
genesets_p = nef.calculate_p(network, genesets)

# Determine optimal alpha for network (can also be done automatically by next step)
alpha = prop.calculate_alpha(network)
print(alpha)

# Calculate network kernel for propagation
kernel = nef.construct_prop_kernel(network, alpha=alpha, verbose=True)

print(kernel.index)
print(genesets)

# Calculate the AUPRC values for each gene set