def calculate_correlations(table: Table, corr_method: str='spearman', p_adjustment_method: str='fdr_bh') -> pd.DataFrame: # TODO: multiprocess this corr_method_fun = correl_methods[corr_method] correls = pd.DataFrame(index=['r', 'p']) for (val_i, id_i, _), (val_j, id_j, _) in table.iter_pairwise(axis='observation'): r, p = corr_method_fun(val_i, val_j) correls[id_i, id_j] = r, p correls = correls.transpose() correls.index = pd.MultiIndex.from_tuples(correls.index) # Turn tuple index into actual multiindex if p_adjustment_method is not None: correls['p_adjusted'] = p_adjust(correls.p, method=p_adjustment_method) correls = correls.sort_values('p') return correls
def calculate_correlations( table: Table, corr_method=spearmanr, p_adjustment_method: str = 'fdr_bh') -> pd.DataFrame: # TODO: multiprocess this index = list() data = list() for (val_i, id_i, _), (val_j, id_j, _) in table.iter_pairwise(axis='observation'): r, p = corr_method(val_i, val_j) index.append((id_i, id_j)) data.append((r, p)) correls = pd.DataFrame(data, index=index, columns=['r', 'p']) correls.index = pd.MultiIndex.from_tuples( correls.index) # Turn tuple index into actual multiindex if p_adjustment_method is not None: correls['p_adjusted'] = p_adjust(correls.p, method=p_adjustment_method) return correls
def calculate_correlations(table: Table, corr_method=spearmanr, p_adjust_method: str = 'fdr_bh', nprocs=1) -> \ pd.DataFrame: if nprocs > multiprocessing.cpu_count(): warnings.warn( "nprocs greater than CPU count, using all avaliable CPUs") nprocs = multiprocessing.cpu_count() pool = multiprocessing.Pool(nprocs) cor = partial(calculate_correlation, corr_method=corr_method) results = pool.map( cor, pairwise_iter_wo_metadata(table.iter_pairwise(axis='observation'))) index = [i[0] for i in results] data = [i[1] for i in results] pool.close() pool.join() correls = pd.DataFrame(data, index=index, columns=['r', 'p']) # Turn tuple index into actual multiindex, now guaranteeing that correls index is sorted correls.index = pd.MultiIndex.from_tuples( [sorted(i) for i in correls.index]) if p_adjust_method is not None: correls['p_adjusted'] = p_adjust(correls.p, method=p_adjust_method) return correls