Exemplo n.º 1
0
def merge_data_csvs(id):

    df = pandas.DataFrame()

    dir = dismod3.settings.JOB_WORKING_DIR % id
    #print dir
    for f in sorted(glob.glob('%s/posterior/data-*.csv' % dir)):
        #print 'merging %s' % f
        df2 = pandas.read_csv(f, index_col=None)
        df2.index = df2['index']
        df = df.drop(set(df.index) & set(df2.index)).append(df2)

    df['residual'] = df['value'] - df['mu_pred']
    df['scaled_residual'] = df['residual'] / pl.sqrt(
        df['value'] * (1 - df['value']) / df['effective_sample_size'])
    #df['scaled_residual'] = df['residual'] * pl.sqrt(df['effective_sample_size'])  # including
    df['abs_scaled_residual'] = pl.absolute(df['scaled_residual'])

    d = .005  # TODO: save delta in these files, use negative binomial to calc logp
    df['logp'] = [
        mc.negative_binomial_like(x * n, (p + 1e-3) * n,
                                  d * (p + 1e-3) * n) for x, p, n in
        zip(df['value'], df['mu_pred'], df['effective_sample_size'])
    ]
    df['logp'][df['data_type'] == 'rr'] = df['scaled_residual'][df['data_type']
                                                                == 'rr']

    df = df.sort('logp')

    #print df.filter('data_type area age_start age_end year_start sex effective_sample_size value residual logp'.split())[:25]
    return df
Exemplo n.º 2
0
def data_likelihood(value=data.deaths, mu=predicted, alpha=omega):
    if alpha >= 10**10:
        return mc.poisson_like(value, mu)
    else:
        if mu.min() <= 0.:
            mu = mu + 10.**-10
        return mc.negative_binomial_like(value, mu, alpha)
Exemplo n.º 3
0
 def data_likelihood(value=np.round(self.training_data.cf * self.training_data.sample_size), mu=param_pred, alpha=alpha):
     if alpha >= 10**10:
         return mc.poisson_like(value, mu)
     else:
         if mu.min() <= 0.:
             mu = mu + 10**-10
         return mc.negative_binomial_like(value, mu, alpha)
Exemplo n.º 4
0
def merge_data_csvs(id):

    df = pandas.DataFrame()

    dir = dismod3.settings.JOB_WORKING_DIR % id
    #print dir
    for f in sorted(glob.glob('%s/posterior/data-*.csv'%dir)):
        #print 'merging %s' % f
        df2 = pandas.read_csv(f, index_col=None)
        df2.index = df2['index']
        df = df.drop(set(df.index)&set(df2.index)).append(df2)

    df['residual'] = df['value'] - df['mu_pred']
    df['scaled_residual'] = df['residual'] / pl.sqrt(df['value'] * (1 - df['value']) / df['effective_sample_size'])
    #df['scaled_residual'] = df['residual'] * pl.sqrt(df['effective_sample_size'])  # including 
    df['abs_scaled_residual'] = pl.absolute(df['scaled_residual'])

    d = .005 # TODO: save delta in these files, use negative binomial to calc logp
    df['logp'] = [mc.negative_binomial_like(x*n, (p+1e-3)*n, d*(p+1e-3)*n) for x,p,n in zip(df['value'], df['mu_pred'], df['effective_sample_size'])]
    df['logp'][df['data_type'] == 'rr'] = df['scaled_residual'][df['data_type'] == 'rr']

    df = df.sort('logp')

    #print df.filter('data_type area age_start age_end year_start sex effective_sample_size value residual logp'.split())[:25]
    return df
Exemplo n.º 5
0
 def obs(value=value,
         S=data_sample,
         N=N,
         mu_i=rates,
         Xz=Xz,
         zeta=zeta,
         delta=delta):
     #zeta_i = .001
     #residual = pl.log(value[S] + zeta_i) - pl.log(mu_i*N[S] + zeta_i)
     #return mc.normal_like(residual, 0, 100. + delta)
     logp = mc.negative_binomial_like(value[S], N[S]*mu_i, delta*pl.exp(Xz*zeta))
     return logp
Exemplo n.º 6
0
        def obs_lb(value=value, N=N,
                   Xa=Xa, Xb=Xb,
                   alpha=alpha, beta=beta, gamma=gamma,
                   bounds_func=vars['bounds_func'],
                   delta=delta,
                   age_indices=ai,
                   age_weights=aw):

            # calculate study-specific rate function
            shifts = np.exp(np.dot(Xa, alpha) + np.dot(Xb, np.atleast_1d(beta)))
            exp_gamma = np.exp(gamma)
            mu_i = [np.dot(weights, bounds_func(s_i * exp_gamma[ages], ages)) for s_i, ages, weights in zip(shifts, age_indices, age_weights)]  # TODO: try vectorizing this loop to increase speed
            rate_param = mu_i*N
            violated_bounds = np.nonzero(rate_param < value)
            logp = mc.negative_binomial_like(value[violated_bounds], rate_param[violated_bounds], delta)
            return logp
Exemplo n.º 7
0
        def obs_lb(value=value, N=N,
                   Xa=Xa, Xb=Xb,
                   alpha=alpha, beta=beta, gamma=gamma,
                   bounds_func=vars['bounds_func'],
                   delta=delta,
                   age_indices=ai,
                   age_weights=aw):

            # calculate study-specific rate function
            shifts = pl.exp(pl.dot(Xa, alpha) + pl.dot(Xb, pl.atleast_1d(beta)))
            exp_gamma = pl.exp(gamma)
            mu_i = [pl.dot(weights, bounds_func(s_i * exp_gamma[ages], ages)) for s_i, ages, weights in zip(shifts, age_indices, age_weights)]  # TODO: try vectorizing this loop to increase speed
            rate_param = mu_i*N
            violated_bounds = pl.nonzero(rate_param < value)
            logp = mc.negative_binomial_like(value[violated_bounds], rate_param[violated_bounds], delta)
            return logp
Exemplo n.º 8
0
def obs(pi=pi, delta=delta):
    return mc.negative_binomial_like(r * n, pi * n, delta)
Exemplo n.º 9
0
 def obs(value=value, N=N,
         mu_i=rates,
         delta=delta,
         Z=Z, eta=0.):
     logp = mc.negative_binomial_like(value, N*mu_i, delta + eta*Z)
     return logp
Exemplo n.º 10
0
 def p_obs(value=p, pi=pi, delta=delta, n=n):
     return mc.negative_binomial_like(pl.maximum(value * n, pi * n), pi * n + 1.0e-9, delta)
Exemplo n.º 11
0
 def AR_dev(AR=AR, mu=exp_rate, r=r):
     return np.array([pm.negative_binomial_like(AR[i], mu[i], r[i]) for i in xrange(len(AR))])
Exemplo n.º 12
0
 def p_obs(value=p, pi=pi, delta=delta, n=n):
     return mc.negative_binomial_like(pl.maximum(value * n, pi * n),
                                      pi * n + 1.e-9, delta)
Exemplo n.º 13
0
 def p_obs(value=p, pi=pi, delta=delta, n=n):
     return mc.negative_binomial_like(
         value[~i_zero] * n[~i_zero], pi[~i_zero] * n[~i_zero] + 1.0e-9, delta[~i_zero]
     )
Exemplo n.º 14
0
 def p_obs(value=p, pi=pi, delta=delta, n=n):
     return mc.negative_binomial_like(value[~i_zero] * n[~i_zero],
                                      pi[~i_zero] * n[~i_zero] + 1.e-9,
                                      delta[~i_zero])
Exemplo n.º 15
0
 def AR_dev(AR=AR, mu=exp_rate, r=r):
     return np.array([
         pm.negative_binomial_like(AR[i], mu[i], r[i])
         for i in xrange(len(AR))
     ])
Exemplo n.º 16
0
        if 'data' in dm.vars[t] and 'p_pred' in dm.vars[t]:
            stats = dm.vars[t]['p_pred'].stats(batches=5)
            dm.vars[t]['data']['mu_pred'] = stats['mean']
            dm.vars[t]['data']['sigma_pred'] = stats['standard deviation']

            stats = dm.vars[t]['pi'].stats(batches=5)
            dm.vars[t]['data']['mc_error'] = stats['mc error']

            dm.vars[t]['data']['residual'] = dm.vars[t]['data'][
                'value'] - dm.vars[t]['data']['mu_pred']
            dm.vars[t]['data']['abs_residual'] = pl.absolute(
                dm.vars[t]['data']['residual'])
            if 'delta' in dm.vars[t]:
                if len(pl.atleast_1d(dm.vars[t]['delta'].value)) == 1:
                    d = pl.atleast_1d(dm.vars[t]['delta'].stats()['mean'])
                    dm.vars[t]['data']['logp'] = [mc.negative_binomial_like(n*p_obs, n*p_pred, n*p_pred*d) for n, p_obs, p_pred  \
                                                  in zip(dm.vars[t]['data']['effective_sample_size'], dm.vars[t]['data']['value'], dm.vars[t]['data']['mu_pred'])]
                else:
                    dm.vars[t]['data']['logp'] = [mc.negative_binomial_like(n*p_obs, n*p_pred, n*p_pred*d) for n, p_obs, p_pred, d \
                                                      in zip(dm.vars[t]['data']['effective_sample_size'], dm.vars[t]['data']['value'], dm.vars[t]['data']['mu_pred'], pl.atleast_1d(dm.vars[t]['delta'].stats()['mean']))]
            try:
                dm.vars[t]['data'].to_csv(
                    dir + '/posterior/data-%s-%s+%s+%s.csv' %
                    (t, predict_area, predict_sex, predict_year))
            except IOError, e:
                print 'WARNING: could not save file'
                print e
        if 'U' in dm.vars[t]:
            re = dm.vars[t]['U'].T
            columns = list(re.columns)
            mu = []
Exemplo n.º 17
0
 def obs(pi=pi, delta=delta):
     return mc.negative_binomial_like(r*n, pi*n, delta)
Exemplo n.º 18
0
            continue
        print 'saving tables for', t
        if 'data' in dm.vars[t] and 'p_pred' in dm.vars[t]:
            stats = dm.vars[t]['p_pred'].stats(batches=5)
            dm.vars[t]['data']['mu_pred'] = stats['mean']
            dm.vars[t]['data']['sigma_pred'] = stats['standard deviation']

            stats = dm.vars[t]['pi'].stats(batches=5)
            dm.vars[t]['data']['mc_error'] = stats['mc error']

            dm.vars[t]['data']['residual'] = dm.vars[t]['data']['value'] - dm.vars[t]['data']['mu_pred']
            dm.vars[t]['data']['abs_residual'] = pl.absolute(dm.vars[t]['data']['residual'])
            if 'delta' in dm.vars[t]:
                if len(pl.atleast_1d(dm.vars[t]['delta'].value)) == 1:
                    d = pl.atleast_1d(dm.vars[t]['delta'].stats()['mean'])
                    dm.vars[t]['data']['logp'] = [mc.negative_binomial_like(n*p_obs, n*p_pred, n*p_pred*d) for n, p_obs, p_pred  \
                                                  in zip(dm.vars[t]['data']['effective_sample_size'], dm.vars[t]['data']['value'], dm.vars[t]['data']['mu_pred'])]
                else:
                    dm.vars[t]['data']['logp'] = [mc.negative_binomial_like(n*p_obs, n*p_pred, n*p_pred*d) for n, p_obs, p_pred, d \
                                                      in zip(dm.vars[t]['data']['effective_sample_size'], dm.vars[t]['data']['value'], dm.vars[t]['data']['mu_pred'], pl.atleast_1d(dm.vars[t]['delta'].stats()['mean']))]
            try:
                dm.vars[t]['data'].to_csv(dir + '/posterior/data-%s-%s+%s+%s.csv'%(t, predict_area, predict_sex, predict_year))
            except IOError, e:
                print 'WARNING: could not save file'
                print e
        if 'U' in dm.vars[t]:
            re = dm.vars[t]['U'].T
            columns = list(re.columns)
            mu = []
            sigma = []
            for n in dm.vars[t]['alpha']: