Exemplo n.º 1
0
def get_series():
    n = 10
    rng = np.random.RandomState(cfg.random_seed)
    x = np.linspace(0, 100, n) + rng.normal(0, 0.1, size=n)
    x.sort()

    shape = Sigslope()
    t1 = (-1, 40, 50, 0.2)
    y1 = shape.f(t1, x)
    t2 = (1, 30, 25, 0.5)
    y2 = shape.f(t2, x)
    c = -0.95
    sigma = 100 * np.array([[1, c], [c, 1]])
    noise = rng.multivariate_normal([0, 0], sigma, n)
    y = np.c_[y1, y2] + noise

    return SeveralGenesOneRegion(
        expression=y,
        ages=x,
        gene_names=['A', 'B'],
        region_name='THERE',
        original_inds=np.arange(n),
        age_scaler=None,
    )
Exemplo n.º 2
0
def analyze_paired_scores_with_and_without_priors(n_best=10):
    nFitter = Fitter(Sigslope())
    yFitter = Fitter(Sigslope(priors_name), 'normal')

    nFits = get_all_fits(data, nFitter, allow_new_computation=False)
    yFits = get_all_fits(data, yFitter, allow_new_computation=False)

    score_pairs = [(f1.LOO_score, f2.LOO_score)
                   for f1, f2 in iterate_fits(nFits, yFits)]
    nScores, yScores = zip(*score_pairs)

    _, pval = scipy.stats.wilcoxon(nScores, yScores)
    pval = pval / 2  # one sided p-value
    print '*** wilcoxon signed rank p-value (one sided) = {:.3g}'.format(pval)

    # find examples of best improvements
    diffs = [(f2.LOO_score - f1.LOO_score, f1.LOO_score, f2.LOO_score, g, r)
             for dsname, g, r, f1, f2 in iterate_fits(
                 nFits, yFits, R2_threshold=-1, return_keys=True)]
    diffs.sort(reverse=True)
    print 'Gene/Regions for which priors produce best R2 improvement:'
    for i, (delta, R2_without, R2_with, g, r) in enumerate(diffs[:10]):
        print '{i}) {g}@{r}, delta-R2={delta:.3g}. R2_without={R2_without:.3g}, R2_with={R2_with:.3g}'.format(
            **locals())
Exemplo n.º 3
0
def plot_theta_diff_scatter(show_title=False):
    yFitter = Fitter(Sigslope(priors_name), 'normal')
    nFitter = Fitter(Sigslope())
    yFits = get_all_fits(data, yFitter)
    nFits = get_all_fits(data, nFitter)
    pairs = [(nFit.LOO_score, yFit.LOO_score)
             for nFit, yFit in iterate_fits(nFits, yFits)]
    diff_pairs = [(n, y - n) for n, y in pairs
                  if n is not None and y is not None]
    n, d = zip(*diff_pairs)

    fig = plt.figure()
    ax = fig.add_axes([0.15, 0.12, 0.8, 0.8])
    ax.scatter(n, d, alpha=0.5)
    xlims = ax.get_xlim()
    ax.plot(xlims, [0, 0], 'k--')
    ax.set_xlim(*xlims)
    if show_title:
        ax.title(r'Improvement from prior on $\theta$ vs. baseline $R^2$',
                 fontsize=fontsize)
    ax.set_xlabel(r'$R^2$(no priors)', fontsize=fontsize)
    ax.set_ylabel(r'$R^2$($\theta$) - $R^2$(no priors)', fontsize=fontsize)
    ax.tick_params(axis='both', labelsize=fontsize)
    return fig
Exemplo n.º 4
0
def get_series():
    n = 10
    rng = np.random.RandomState(cfg.random_seed)
    x = np.linspace(0,100,n) + rng.normal(0,0.1,size=n)
    x.sort()
    
    shape = Sigslope()
    t1 = (-1,40,50,0.2)
    y1 = shape.f(t1,x)
    t2 = (1,30,25,0.5)
    y2 = shape.f(t2,x)
    c = -0.95
    sigma = 100*np.array([[1, c], [c, 1]])
    noise = rng.multivariate_normal([0,0],sigma,n)
    y = np.c_[y1,y2] + noise
     
    return SeveralGenesOneRegion(
        expression = y, 
        ages = x, 
        gene_names = ['A','B'], 
        region_name = 'THERE', 
        original_inds = np.arange(n), 
        age_scaler = None,
    )
Exemplo n.º 5
0
def analyze_variant(theta, sigma):
    theta_priors = priors_name if theta else None
    sigma_prior = 'normal' if sigma else None
    shape = Sigslope(theta_priors)
    fitter = Fitter(shape, sigma_prior)
    fits = get_all_fits(data, fitter, allow_new_computation=False)
    LOO_scores = [
        f.LOO_score for f in iterate_fits(fits) if f.LOO_score is not None
    ]
    mu, sem = bootstrap(LOO_scores, np.mean)
    return Bunch(
        theta=theta,
        sigma=sigma,
        LOO_scores=LOO_scores,
        mu=mu,
        sem=sem,
    )
Exemplo n.º 6
0
def get_fitter():
    shape = Sigslope(priors='sigslope80')
    fitter = Fitter(shape, sigma_prior='normal')
    return fitter
Exemplo n.º 7
0
        ax.plot(xpos, h, linewidth=3, label=shape.cache_name())
    ax.set_xlim(*zoom)
    ax.set_ylim(0, zoom_max * 1.1)
    ax.legend(loc='best', fontsize=fontsize, frameon=False)
    ax.set_xlabel('test $R^2$ score', fontsize=fontsize)
    ax.set_ylabel("probability density", fontsize=fontsize)
    ax.tick_params(axis='both', labelsize=fontsize)
    return fig


cfg.verbosity = 1
age_scaler = LogScaler()
data = GeneData.load('both').scale_ages(age_scaler)

sigmoid = Sigmoid(priors='sigmoid_wide')
sigslope = Sigslope(priors='sigslope80')
shapes = [sigmoid, sigslope]
fitters = [Fitter(shape, sigma_prior='normal') for shape in shapes]

#####################################################
# Example fits
#####################################################
GRs = [
    ('ABHD4', 'STC', (5, 8)),
]
for g, r, yrange in GRs:
    for fitter in fitters:
        print 'Doing {}@{}...'.format(g, r)
        series = data.get_one_series(g, r)
        theta, _, _, _ = fitter.fit(series.ages, series.single_expression)
        fig = plot_one_series(series, fitter.shape, theta, yrange)
Exemplo n.º 8
0
    tuples = dct_tuples.values()
    pairs = [(x[0], x[1]) for x in tuples]
    fig = plot_comparison_scatter(pairs, pathway)
    filename = join('RP', 'correlation-diff-scatter-{}.png'.format(pathway))
    save_figure(fig, filename, b_close=True, under_results=True)

    fig = plot_comparison_bar(tuples)
    filename = join('RP', 'correlation-diff-bar-{}.png'.format(pathway))
    save_figure(fig, filename, b_close=True, under_results=True)
    fig = plot_comparison_bar(tuples, several_levels=True)
    filename = join(
        'RP', 'correlation-diff-bar-several-levels-{}.png'.format(pathway))
    save_figure(fig, filename, b_close=True, under_results=True)


disable_all_warnings()
cfg.verbosity = 1
age_scaler = LogScaler()
shape = Sigslope('sigslope80')
fitter = Fitter(shape, sigma_prior='normal')

pathways = ['cannabinoids', 'serotonin']
for pathway in pathways:
    data = GeneData.load('both').restrict_pathway(pathway).scale_ages(
        age_scaler)
    fits = get_all_fits(data,
                        fitter,
                        n_correlation_iterations=4,
                        allow_new_computation=False)
    analyze_pathway(pathway, data, fitter, fits)