Exemplo n.º 1
0
def test_keep_stable_genes(donor_expression, thr, per, rank, stab):
    out = correct.keep_stable_genes(donor_expression,
                                    threshold=thr,
                                    percentile=per,
                                    rank=rank)
    assert all([isinstance(f, pd.DataFrame) for f in out])
    for df1, df2 in itertools.combinations(out, 2):
        assert df1.shape == df2.shape

    # check that `return_stability` provides expression and stability
    if thr == 0 and stab:
        out, stab = correct.keep_stable_genes(donor_expression,
                                              threshold=thr,
                                              return_stability=stab)
        assert len(stab) == len(out[0].columns)
        assert np.all(out[0].columns == donor_expression[0].columns)
Exemplo n.º 2
0
def test_keep_stable_genes(donor_expression):
    for thr, per, rank in itertools.product(np.arange(0, 1, 0.1),
                                            [True, False], [True, False]):
        out = correct.keep_stable_genes(donor_expression,
                                        threshold=thr,
                                        percentile=per,
                                        rank=rank)
        assert all([isinstance(f, pd.DataFrame) for f in out])
        for df1, df2 in itertools.combinations(out, 2):
            assert df1.shape == df2.shape

    # check that `return_stability` provides expression and stability
    out, stab = correct.keep_stable_genes(donor_expression,
                                          threshold=0,
                                          return_stability=True)
    assert len(stab) == len(out[0].columns)
    assert np.all(out[0].columns == donor_expression[0].columns)
Exemplo n.º 3
0
def test_keep_stable_genes(donor_expression):
    for thr, per, rank in itertools.product(np.arange(0, 1, 0.1),
                                            [True, False],
                                            [True, False]):
        out = correct.keep_stable_genes(donor_expression, threshold=thr,
                                        percentile=per, rank=rank)
        assert all([isinstance(f, pd.DataFrame) for f in out])
        for df1, df2 in itertools.combinations(out, 2):
            assert df1.shape == df2.shape
Exemplo n.º 4
0
def select_genes(dict_genes, sphere_lh, sphere_rh, n_perm=100, df_gandal=None):
    """Gene enrichment analysis.

    Parameters
    ----------
    dict_genes: dict of pd.Dataframe
        Dictionary with pd.Dataframe of genes for each donor in the AHBA
        dataset.
    sphere_lh: BSPolyData
        Sphere for left hemisphere.
    sphere_rh: BSPolyData
        Sphere for right hemisphere.
    n_perm: int, default=None
    df_gandal: pd.DataFrame, default=None
        Dataframe with logFC for ASD, SCZ and BD.

    Returns
    -------
    selected_genes: list of str
        Genes with significant spatial overlap with idiosyncrasy measures.
    """

    from abagen.correct import keep_stable_genes

    dict_genes = get_shared_genes(dict_genes)
    genes = list(dict_genes.values())[0].columns

    tval_sd, pval_sd = _select_genes_one(dict_genes,
                                         'SD',
                                         sphere_lh,
                                         sphere_rh,
                                         n_perm=n_perm)

    tval_dd, pval_dd = _select_genes_one(dict_genes,
                                         'DD',
                                         sphere_lh,
                                         sphere_rh,
                                         n_perm=n_perm)

    mask_selected = np.logical_and(pval_sd < 0.05, pval_dd < 0.05)
    selected_genes = genes[mask_selected]

    # Only keep stable genes
    stable_genes = keep_stable_genes(list(dict_genes.values()),
                                     threshold=.5,
                                     percentile=False,
                                     rank=True)[0]

    selected_genes = np.intersect1d(stable_genes.columns, selected_genes)

    if df_gandal is None:
        return selected_genes

    df = pd.DataFrame(np.c_[tval_sd, tval_dd],
                      index=genes,
                      columns=['SD', 'DD'])
    df_gandal.rename(columns={'gene_name': 'gene_symbol'}, inplace=True)
    df = df.merge(df_gandal.set_index('gene_symbol'), on='gene_symbol')

    df_res = pd.DataFrame('', columns=['SD', 'DD'], index=['ASD', 'SCZ', 'BD'])
    for idio in ['SD', 'DD']:
        y = df[[idio]]
        for k in ['ASD', 'SCZ', 'BD']:
            fc = f'{k}.log2FC'
            x = df[[fc, 'percentage_gene_gc_content']]
            x['Constant'] = 1
            rlm_results = sm.RLM(y, x).fit()

            tv, pv = rlm_results.tvalues[0], rlm_results.pvalues[0]
            df_res.loc[k, idio] = f'{tv:.3f} ({pv:.3f})'

    return selected_genes, df_res