Exemplo n.º 1
0
def calculate_correlations(table: Table, method: str, p_adjustment_method: str = 'fdr_bh',
                           n_procs: int = 1, sparcc_p: bool = False, bootstraps: int = 1000) -> pd.DataFrame:
    print("Correlating with %s" % method)
    method = correl_methods[method]
    if method in [spearmanr, pearsonr, kendalltau]:
        correls = ca.calculate_correlations(table, method, p_adjust_method=p_adjustment_method, nprocs=n_procs)
    elif method == 'sparcc':
        if sparcc_p:
            correls = ca.fastspar_correlation(table, verbose=True, nprocs=n_procs, calc_pvalues=True,
                                              bootstraps=bootstraps, p_adjust_method=p_adjustment_method)
        else:
            correls = ca.fastspar_correlation(table, verbose=True, nprocs=n_procs)
    else:
        raise ValueError('Provided correlation metric is not an accepted method.')
    return correls
Exemplo n.º 2
0
def within_correls(input_loc, output_loc, correl_method='sparcc', sparcc_filter=False, min_sample=None, procs=1,
                   sparcc_p=1000, p_adjust='fdr_bh', verbose=False):
    logger = general.Logger(path.join(output_loc, "SCNIC_within_log.txt"))
    logger["SCNIC analysis type"] = "within"

    # correlation and p-value adjustment methods
    correl_methods = {'spearman': spearmanr, 'pearson': pearsonr, 'kendall': kendalltau, 'sparcc': 'sparcc'}
    correl_method = correl_methods[correl_method.lower()]

    # get features to be correlated
    table = load_table(input_loc)
    logger["input table"] = input_loc
    if verbose:
        print("Table loaded: " + str(table.shape[0]) + " observations")
        print("")
    logger["number of samples in input table"] = table.shape[1]
    logger["number of observations in input table"] = table.shape[0]

    # make new output directory
    if output_loc is not None:
        if not path.isdir(output_loc):
            os.makedirs(output_loc)
    logger["output directory"] = path.abspath(output_loc)

    # filter
    if sparcc_filter is True:
        table_filt = general.sparcc_paper_filter(table)
        if verbose:
            print("Table filtered: %s observations" % str(table_filt.shape[0]))
            print("")
        logger["sparcc paper filter"] = True
        logger["number of observations present after filter"] = table_filt.shape[0]
    elif min_sample is not None:
        table_filt = general.filter_table(table, min_sample)
        if verbose:
            print("Table filtered: %s observations" % str(table_filt.shape[0]))
            print("")
        logger["min samples present"] = min_sample
        logger["number of observations present after filter"] = table_filt.shape[0]
    else:
        table_filt = table

    logger["number of processors used"] = procs

    # correlate features
    if correl_method in [spearmanr, pearsonr, kendalltau]:
        # calculate correlations
        if verbose:
            print("Correlating with %s" % correl_method)
        # correlate feature
        correls = ca.calculate_correlations(table_filt, correl_method, nprocs=procs, p_adjust_method=p_adjust)
    elif correl_method == 'sparcc':
        if sparcc_p is None:
            correls = ca.fastspar_correlation(table_filt, verbose=verbose, nprocs=procs)
        else:
            correls = ca.fastspar_correlation(table_filt, calc_pvalues=True, bootstraps=sparcc_p,
                                              verbose=verbose, nprocs=procs, p_adjust_method=p_adjust)
    else:
        raise ValueError("How did this even happen?")
    logger["distance metric used"] = correl_method
    if verbose:
        print("Features Correlated")
        print("")

    correls.to_csv(path.join(output_loc, 'correls.txt'), sep='\t', index_label=('feature1', 'feature2'))
    if verbose:
        print("Correls.txt written")

    # make correlation network
    metadata = general.get_metadata_from_table(table_filt)
    net = general.correls_to_net(correls, metadata=metadata)
    nx.write_gml(net, path.join(output_loc, 'correlation_network.gml'))
    if verbose:
        print("Network made")
        print("")

    logger.output_log()
def test_calculate_correlations(biom_table1):
    correls = calculate_correlations(biom_table1, corr_method=pearsonr)
    top_correls = correls.loc[correls.r > .8]
    sig_correls = {('Observ_0', 'Observ_1'), ('Observ_0', 'Observ_2'),
                   ('Observ_1', 'Observ_2'), ('Observ_3', 'Observ_4')}
    assert set(sig_correls) == set(top_correls.index)
Exemplo n.º 4
0
def within_correls(args):
    logger = general.Logger("SCNIC_within_log.txt")
    logger["SCNIC analysis type"] = "within"

    # correlation and p-value adjustment methods
    correl_methods = {'spearman': spearmanr, 'pearson': pearsonr, 'kendall': kendalltau, 'sparcc': 'sparcc'}
    correl_method = correl_methods[args.correl_method.lower()]

    # get features to be correlated
    table = load_table(args.input)
    logger["input table"] = args.input
    if args.verbose:
        print("Table loaded: " + str(table.shape[0]) + " observations")
        print("")
    logger["number of samples in input table"] = table.shape[1]
    logger["number of observations in input table"] = table.shape[0]

    # make new output directory and change to it
    if args.output is not None:
        if not os.path.isdir(args.output):
            os.makedirs(args.output)
        os.chdir(args.output)
    logger["output directory"] = os.getcwd()

    # filter
    if args.sparcc_filter is True:
        table_filt = general.sparcc_paper_filter(table)
        if args.verbose:
            print("Table filtered: %s observations" % str(table_filt.shape[0]))
            print("")
        logger["sparcc paper filter"] = True
        logger["number of observations present after filter"] = table_filt.shape[0]
    elif args.min_sample is not None:
        table_filt = general.filter_table(table, args.min_sample)
        if args.verbose:
            print("Table filtered: %s observations" % str(table_filt.shape[0]))
            print("")
        logger["min samples present"] = args.min_sample
        logger["number of observations present after filter"] = table_filt.shape[0]
    else:
        table_filt = table

    logger["number of processors used"] = args.procs

    # correlate features
    if correl_method in [spearmanr, pearsonr, kendalltau]:
        # calculate correlations
        if args.verbose:
            print("Correlating with %s" % args.correl_method)
        # correlate feature
        correls = ca.calculate_correlations(table_filt, correl_method)
    elif correl_method == 'sparcc':
        correls = ca.fastspar_correlation(table_filt, verbose=args.verbose)
        if args.sparcc_p is not None:
            raise NotImplementedError()  # TODO: reimplement with fastspar
    else:
        raise ValueError("How did this even happen?")
    logger["distance metric used"] = args.correl_method
    if args.verbose:
        print("Features Correlated")
        print("")

    if 'p' in correls.columns:
        correls['p_adj'] = general.p_adjust(correls['p'])
    correls.to_csv('correls.txt', sep='\t', index_label=('feature1', 'feature2'))
    if args.verbose:
        print("Correls.txt written")

    # make correlation network
    metadata = general.get_metadata_from_table(table_filt)
    net = general.correls_to_net(correls, metadata=metadata)
    nx.write_gml(net, 'correlation_network.gml')
    if args.verbose:
        print("Network made")
        print("")

    logger.output_log()