Пример #1
0
def calculate_correlations(table: Table, corr_method: str='spearman',
                           p_adjustment_method: str='fdr_bh') -> pd.DataFrame:
    # TODO: multiprocess this
    corr_method_fun = correl_methods[corr_method]
    correls = pd.DataFrame(index=['r', 'p'])
    for (val_i, id_i, _), (val_j, id_j, _) in table.iter_pairwise(axis='observation'):
        r, p = corr_method_fun(val_i, val_j)
        correls[id_i, id_j] = r, p
    correls = correls.transpose()
    correls.index = pd.MultiIndex.from_tuples(correls.index)  # Turn tuple index into actual multiindex
    if p_adjustment_method is not None:
        correls['p_adjusted'] = p_adjust(correls.p, method=p_adjustment_method)
    correls = correls.sort_values('p')
    return correls
Пример #2
0
def calculate_correlations(
        table: Table,
        corr_method=spearmanr,
        p_adjustment_method: str = 'fdr_bh') -> pd.DataFrame:
    # TODO: multiprocess this
    index = list()
    data = list()
    for (val_i, id_i, _), (val_j, id_j,
                           _) in table.iter_pairwise(axis='observation'):
        r, p = corr_method(val_i, val_j)
        index.append((id_i, id_j))
        data.append((r, p))
    correls = pd.DataFrame(data, index=index, columns=['r', 'p'])
    correls.index = pd.MultiIndex.from_tuples(
        correls.index)  # Turn tuple index into actual multiindex
    if p_adjustment_method is not None:
        correls['p_adjusted'] = p_adjust(correls.p, method=p_adjustment_method)
    return correls
Пример #3
0
def calculate_correlations(table: Table, corr_method=spearmanr, p_adjust_method: str = 'fdr_bh', nprocs=1) -> \
        pd.DataFrame:
    if nprocs > multiprocessing.cpu_count():
        warnings.warn(
            "nprocs greater than CPU count, using all avaliable CPUs")
        nprocs = multiprocessing.cpu_count()

    pool = multiprocessing.Pool(nprocs)
    cor = partial(calculate_correlation, corr_method=corr_method)
    results = pool.map(
        cor,
        pairwise_iter_wo_metadata(table.iter_pairwise(axis='observation')))
    index = [i[0] for i in results]
    data = [i[1] for i in results]
    pool.close()
    pool.join()
    correls = pd.DataFrame(data, index=index, columns=['r', 'p'])
    # Turn tuple index into actual multiindex, now guaranteeing that correls index is sorted
    correls.index = pd.MultiIndex.from_tuples(
        [sorted(i) for i in correls.index])
    if p_adjust_method is not None:
        correls['p_adjusted'] = p_adjust(correls.p, method=p_adjust_method)
    return correls