Exemplo n.º 1
0
def residualize(Y, data, formula_res, formula_full=None):
    """Helper function. See Residualizer
    """
    res = Residualizer(data=data,
                       formula_res=formula_res,
                       formula_full=formula_full)
    return res.fit_transform(Y, res.get_design_mat())
Exemplo n.º 2
0
    1   BIOBD    mannheim     79  0.52     41.06   0.56
    2   BIOBD     creteil     73  0.47     35.27   0.49
    3   BIOBD       udine    126  0.29     38.72   0.43
    4   BIOBD      galway     69  0.41     41.33   0.49
    5   BIOBD  pittsburgh    114  0.68     33.68   0.73
    6   BIOBD    grenoble     32  0.72     43.22   0.53
    7   BIOBD      geneve     52  0.46     31.25   0.46
    8   BSNIP      Boston     54  0.52     34.69   0.61
    9   BSNIP      Dallas     67  0.36     40.24   0.63
    10  BSNIP    Hartford     79  0.34     35.44   0.58
    11  BSNIP   Baltimore     88  0.35     41.70   0.64
    12  BSNIP     Detroit     26  0.19     32.88   0.54
    """

    formula_res, formula_full = "site + age + sex", "site + age + sex + " + target_num
    residualizer = Residualizer(data=pop_w[msk], formula_res=formula_res, formula_full=formula_full)
    Z = residualizer.get_design_mat()

    assert Xim.shape[0] == Z.shape[0] == y.shape[0]

    # -----------------------------------------------------------------------------
    # CV: 5CV(BIOBD) + LSO(biobd+BSNIP)

    pop_ = pop_w[msk]
    pop_ = pop_.reset_index(drop=True)

    # ~~~~~~~
    # CV LSO

    cv_lso_dict = {s:[np.where(pop_.site != s)[0], np.where(pop_.site == s)[0]] for s in pop_.site.unique()}
Exemplo n.º 3
0
    import numpy as np
    import pandas as pd
    import scipy.stats as stats
    from nitk.stats import Residualizer
    import seaborn as sns
    np.random.seed(1)

    # Dataset with site effect on age
    site = np.array([-1] * 50 + [1] * 50)
    age = np.random.uniform(10, 40, size=100) + 5 * site
    y = -0.1 * age + site + np.random.normal(size=100)
    data = pd.DataFrame(dict(y=y, age=age, site=site.astype(object)))

    # Simple residualization on site
    res_spl = Residualizer(data, formula_res="site")
    yres = res_spl.fit_transform(y[:, None], res_spl.get_design_mat())

    # Site residualization adjusted for age
    res_adj = Residualizer(data, formula_res="site", formula_full="age + site")
    yadj = res_adj.fit_transform(y[:, None], res_adj.get_design_mat())

    # Site residualization adjusted for age provides higher correlation, and
    # lower stderr than simple residualization
    lm_res = stats.linregress(age, yres.ravel())
    lm_adj = stats.linregress(age, yadj.ravel())

    np.allclose((lm_res.slope, lm_res.rvalue, lm_res.stderr),
                (-0.079187578, -0.623733003, 0.0100242219))

    np.allclose((lm_adj.slope, lm_adj.rvalue, lm_adj.stderr),
Exemplo n.º 4
0
    import numpy as np
    import pandas as pd
    import scipy.stats as stats
    from nitk.stats import Residualizer
    import seaborn as sns
    np.random.seed(1)

    # Dataset with site effect on age
    site = np.array([-1] * 50 + [1] * 50)
    age = np.random.uniform(10, 40, size=100) + 5 * site
    y = -0.1 * age + site + np.random.normal(size=100)
    data = pd.DataFrame(dict(y=y, age=age, site=site.astype(object)))

    # Simple residualization on site
    res_spl = Residualizer(data, formula_res="site")
    yres = res_spl.fit_transform(y[:, None], res_spl.get_design_mat())

    # Site residualization adjusted for age
    res_adj = Residualizer(data, formula_res="site", formula_full="age + site")
    yadj = res_adj.fit_transform(y[:, None], res_adj.get_design_mat())

    # Site residualization adjusted for age provides higher correlation, and
    # lower stderr than simple residualization
    lm_res = stats.linregress(age, yres.ravel())
    lm_adj = stats.linregress(age, yadj.ravel())

    np.allclose((lm_res.slope, lm_res.rvalue, lm_res.stderr),
                (-0.079187578, -0.623733003, 0.0100242219))

    np.allclose((lm_adj.slope, lm_adj.rvalue, lm_adj.stderr),