def calc_lik_with_clonal(combo, si, di, phi_i, pi, ni): # calculate with given phi # (lls currently uses precision fudge factor to get around 0 probability errors when pv = 1) pvs = np.array([get_pv(phi_i, c, pi, ni) for c in combo]) lls = np.array([pm.binomial_like(si, di, pvs[i]) for i,c in enumerate(combo)])-0.00000001 # calculate with clonal phi pvs_cl = np.array([get_pv(np.array(1), c, pi, ni) for c in combo]) lls_cl = np.array([pm.binomial_like(si, di, pvs_cl[i]) for i,c in enumerate(combo)])-0.00000001 return np.array([[pvs, lls], [pvs_cl, lls_cl]])
def test_withzeros(self): "Makes sure no NaN's happen when some probabilities are zero." q = np.zeros(npix) q[0]=.99 # Log-distribution of number of pixels positive. lpf = anopheles.utils.ubl(q) lpp = unequal_binomial_lp(q) pp = np.exp(lpp) pf = np.exp(lpf) assert_equal(pp,pf) lpb = anopheles.utils.bin_ubl(3,8,.1,q) assert(not np.isnan(lpb)) assert(not np.any(np.isnan(pf))) # Binomial mixture from Fortran. pbf = np.exp([anopheles.utils.bin_ubl(x,n_obs,prob_detect,q) for x in xrange(n_obs+1)]) # Do binomial mixture by hand. pbp = np.zeros(n_obs+1) for i in xrange(npix+1): pbp+=np.exp([pm.binomial_like(x,8,prob_detect*float(i)/npix)+lpf[i] for x in xrange(n_obs+1)]) assert_almost_equal(pbf,pbp)
def obs(pi=pi, phi=phi): logp = pl.log(1 - phi) * num_nonzeros + mc.binomial_like( r[nonzeros] * n[nonzeros], n[nonzeros], pi[nonzeros]) for n_i in n[~nonzeros]: logp += pl.log(phi + (1 - phi) * pl.exp(pl.log(1 - pi[~nonzeros]) * n[~nonzeros])).sum() return logp
def N_pos_now( value=pm.utils.round_array(pos[this_slice]), n=pm.utils.round_array(pos[this_slice] + neg[this_slice]), eps_p_f=eps_p_f_now, a1=a1, a2=a2, ): return pm.binomial_like(value, n=n, p=pm.flib.stukel_invlogit(eps_p_f, a1, a2))
def test_binomial_case(self): """Checks for correspondence with the binomial distribution in the case of equal presence probabilities.""" q = np.ones(5)*.2 lpf, lpp, pbf, pbp = standard_things(q) # In this case you can compute the log-p of number of pixels positive directly. lpo = np.array([pm.binomial_like(x,npix,q[0]) for x in range(npix+1)]) assert_almost_equal(lpf, lpo) assert_almost_equal(lpp, lpo)
def standard_things(q,prob_detect=prob_detect,n_obs=n_obs): npix=len(q) # Log-distribution of number of pixels positive. lpf = anopheles.utils.ubl(q) lpp = unequal_binomial_lp(q) assert_equal(lpf, lpp) # Binomial mixture from Fortran. pbf = np.exp([anopheles.utils.bin_ubl(x,n_obs,prob_detect,q) for x in xrange(n_obs+1)]) # Do binomial mixture by hand. pbp = np.zeros(n_obs+1) for i in xrange(npix+1): pbp+=np.exp([pm.binomial_like(x,n_obs,prob_detect*float(i)/npix)+lpf[i] for x in xrange(n_obs+1)]) assert_almost_equal(pbf,pbp) return lpf, lpp, pbf, pbp
def like(self, fdia=256, D=14.0**2 * pi, r=0.078, x0=29.7, y0=91.7, oo=-9.0, pe=0.414, dinvasion=296.4, Kbaldio=0.0029, Dbaldio=14.0**2 * pi, per=0.0, mxr=1.0, vparams=False, hilosgpu=32, v=0.0024, vermapa=False): #print fdia, D, r, x0, y0, oo, pe, dinvasion, Kbaldio, Dbaldio, per, mxr, hilosgpu, v, self.simula(fdia, D, r, x0, y0, oo, pe, dinvasion, Kbaldio, Dbaldio, per, mxr, hilosgpu=hilosgpu, v=v, vermapa=vermapa) l0 = binomial_like(self.mosquitos[:, 4], 1., clip(self.esperados, 1e-6, 1 - 1e-6)) if isnan(l0): l0 = -1e9 elif isinf(l0): l0 = -1e9 self.likelihood = l0 return l0
def calc_lik(combo, si, di, phi_i, pi, ni): pvs = np.array([get_pv(phi_i, c, pi, ni) for c in combo]) lls = np.array([pm.binomial_like(si, di, pvs[i]) for i,c in enumerate(combo)])-0.00000001 return np.array([pvs, lls])
def data_vivax(value = vivax_pos[where_vivax], splrep = None, p = p_vivax, n = np.sum(cur_obs,axis=1)): return pm.binomial_like(x=value, n=n, p=p)
def obs(pi=pi): return mc.binomial_like(r*n, n, pi)
pop_C_prev = pop_C_k.stats()["quantiles"][50] / float(pop_C_N) pop_C_prev_per_1000 = "%.0f" % (pop_C_prev * 1000) print pop_C_prev_per_1000 pop_C_ui = pop_C_k.stats()["95% HPD interval"] / float(pop_C_N) pop_C_ui_per_1000 = "[%.0f, %.0f]" % tuple(pop_C_ui * 1000) print pop_C_ui_per_1000 ### @export 'binomial-model-ppc' r = pl.array(schiz["r"]) n = pl.array(schiz["n"], dtype=int) k = r * n pi = mc.Uninformative("pi", value=0.5) mc.binomial_like(k, n, pi) @mc.potential def obs(pi=pi): return mc.binomial_like(k, n, pi) @mc.deterministic def pred(pi=pi): return mc.rbinomial(n, pi) mc.MCMC([pi, obs, pred]).sample(20000, 10000, 10, verbose=False, progress_bar=False) pl.figure(**book_graphics.quarter_page_params)
def survey_likelihood(sp_sub, survey_plan, data, i, a1, a2): data_ = np.ones_like(sp_sub)*data[i] return pm.binomial_like(data_, survey_plan.n[i], pm.stukel_invlogit(sp_sub, a1, a2))
pop_C_prev = pop_C_k.stats()['quantiles'][50] / float(pop_C_N) pop_C_prev_per_1000 = '%.0f' % (pop_C_prev * 1000) print pop_C_prev_per_1000 pop_C_ui = pop_C_k.stats()['95% HPD interval'] / float(pop_C_N) pop_C_ui_per_1000 = '[%.0f, %.0f]' % tuple(pop_C_ui * 1000) print pop_C_ui_per_1000 ### @export 'binomial-model-ppc' r = pl.array(schiz['r']) n = pl.array(schiz['n'], dtype=int) k = r * n pi = mc.Uninformative('pi', value=.5) mc.binomial_like(k, n, pi) @mc.potential def obs(pi=pi): return mc.binomial_like(k, n, pi) @mc.deterministic def pred(pi=pi): return mc.rbinomial(n, pi) mc.MCMC([pi, obs, pred]).sample(20000, 10000, 10,
def p_obs(value=p, pi=pi, n=n): return mc.binomial_like(value * n, n, pi + 1.0e-9)
def p_obs(value=p, pi=pi, n=n): return mc.binomial_like(value * n, n, pi + 1.e-9)
def d_now(value = vivax_pos[i], splrep = splreps[i_vivax], p = p, n = np.sum(cur_obs)): return pm.binomial_like(x=value, n=n, p=p)
r = k/n iter = 20000 burn = 10000 thin = 10 results = {} xmax = .07 ### @export 'distribution-comparison' pl.figure(**book_graphics.quarter_page_params) ax = pl.axes([.1, .3, .85, .65]) x = pl.arange(0, n_small*pi_true*4, .1) # plot binomial distribution y1 = [pl.exp(mc.binomial_like(x_i, n_small, pi_true)) for x_i in x] pl.step(x, y1, 'k', linewidth=1, linestyle='step:', alpha=.8, label='Binomial') # plot poisson distribution y2 = [pl.exp(mc.poisson_like(x_i, n_small*pi_true)) for x_i in x] pl.plot(x, y2, 'k', linewidth=1, linestyle='steps--', alpha=.8, label='Poisson') pl.legend(loc='upper right', fancybox=True, shadow=True) pl.yticks([0, .05]) pl.xticks([25, 50, 75], ['','','']) pl.axis([-.1, n_small*pi_true*4, -.02, 1.1*max(y1)]) pl.xlabel('Count')
def data_vivax(value=vivax_pos[where_vivax], splrep=None, p=p_vivax, n=np.sum(cur_obs, axis=1)): return pm.binomial_like(x=value, n=n, p=p)
r = k / n iter = 20000 burn = 10000 thin = 10 results = {} xmax = .07 ### @export 'distribution-comparison' pl.figure(**book_graphics.quarter_page_params) ax = pl.axes([.1, .3, .85, .65]) x = pl.arange(0, n_small * pi_true * 4, .1) # plot binomial distribution y1 = [pl.exp(mc.binomial_like(x_i, n_small, pi_true)) for x_i in x] pl.step(x, y1, 'k', linewidth=1, linestyle='step:', alpha=.8, label='Binomial') # plot poisson distribution y2 = [pl.exp(mc.poisson_like(x_i, n_small * pi_true)) for x_i in x] pl.plot(x, y2, 'k', linewidth=1, linestyle='steps--', alpha=.8, label='Poisson') pl.legend(loc='upper right', fancybox=True, shadow=True) pl.yticks([0, .05]) pl.xticks([25, 50, 75], ['', '', ''])
def survey_likelihood(x, survey_plan, data, i): data_ = np.ones_like(x)*data[i] return pm.binomial_like(data_, survey_plan.n[i], pm.invlogit(x))
def p_obs(value=p, pi=pi_latent, n=n): pi_flat = pl.array(pi) return mc.binomial_like((value * n)[i_nonzero], n[i_nonzero], pi_flat[i_nonzero])
def obs(value=r, n=n, logit_p=logit_p): return mc.binomial_like(r, n, mc.invlogit(logit_p))
def post_process_clusters(mcmc, sv_df, snv_df, clus_out_dir, sup, dep, norm, cn_states, sparams, cparams, output_params, map_): merge_clusts = cparams['merge_clusts'] subclone_diff = cparams['subclone_diff'] phi_limit = cparams['phi_limit'] merge_clusts = cparams['merge_clusts'] cnv_pval = cparams['clonal_cnv_pval'] hpd_alpha = cparams['hpd_alpha'] adjust_phis = cparams['adjust_phis'] clus_penalty = output_params['cluster_penalty'] smc_het = output_params['smc_het'] plot = output_params['plot'] try: sv_df = sv_df[sv_df.classification.values != 'SIMU_SV'] except AttributeError: pass npoints = len(snv_df) + len(sv_df) sup, dep, norm, cn_states = sup[: npoints], dep[: npoints], norm[: npoints], cn_states[: npoints] z_trace = mcmc.trace('z')[:] # assign points to highest probability cluster clus_counts = [np.bincount(z_trace[:, i]) for i in range(npoints)] clus_max_prob = [index_max(c) for c in clus_counts] clus_mp_counts = np.bincount(clus_max_prob) clus_idx = np.nonzero(clus_mp_counts)[0] clus_mp_counts = clus_mp_counts[clus_idx] # cluster distribution clus_info = pd.DataFrame(clus_idx, columns=['clus_id']) clus_info['size'] = clus_mp_counts if len(clus_info) < 1: print( "Warning! Could not converge on any major SV clusters. Skipping.\n" ) return None center_trace = mcmc.trace("phi_k")[:] phis = np.array([ mean_confidence_interval(center_trace[:, cid], hpd_alpha) for cid in clus_idx ]) original_phis = phis.copy() adjusted_phis = get_adjusted_phis(clus_info, center_trace, cparams) hpd_lo = '_'.join([str(int(100 - (100 * hpd_alpha))), 'HPD', 'lo']) hpd_hi = '_'.join([str(int(100 - (100 * hpd_alpha))), 'HPD', 'hi']) phis = adjusted_phis if adjust_phis else phis clus_info['phi'] = phis[:, 0] clus_info[hpd_lo] = phis[:, 1] clus_info[hpd_hi] = phis[:, 2] if adjust_phis: clus_info['phi_unadjusted'] = original_phis[:, 0] clus_info['%s_unadjusted' % hpd_lo] = original_phis[:, 1] clus_info['%s_unadjusted' % hpd_hi] = original_phis[:, 2] else: clus_info['phi_adjusted'] = adjusted_phis[:, 0] clus_info['%s_adjusted' % hpd_lo] = adjusted_phis[:, 1] clus_info['%s_adjusted' % hpd_hi] = adjusted_phis[:, 2] clus_ids = clus_info.clus_id.values clus_members = np.array( [np.where(np.array(clus_max_prob) == i)[0] for i in clus_ids]) col_names = map(lambda x: 'cluster' + str(x), clus_ids) df_probs = pd.DataFrame(clus_counts, dtype=float)[clus_ids].fillna(0) df_probs = df_probs.apply(lambda x: x / sum(x), axis=1) df_probs.columns = col_names # cluster certainty clus_max_df = pd.DataFrame(clus_max_prob, columns=['most_likely_assignment']) phi_cols = ["average_ccf", hpd_lo, hpd_hi] phi_matrix = pd.DataFrame(phis[:], index=clus_ids, columns=phi_cols).loc[clus_max_prob] phi_matrix.index = range(len(phi_matrix)) ccert = clus_max_df.join(phi_matrix) clus_info.index = range(len(clus_info)) print('\n\n') print(clus_info[['clus_id', 'size', 'phi']]) print('Compiling and writing output...') dump_out_dir = clus_out_dir if len(snv_df) > 0 and len(sv_df) == 0: # snvs only trace output dump_out_dir = '%s/snvs' % clus_out_dir trace_out = '%s/' % (dump_out_dir) write_output.dump_trace(center_trace, trace_out + 'phi_trace.txt') write_output.dump_trace(z_trace, trace_out + 'z_trace.txt') try: alpha_trace = mcmc.trace('alpha')[:] write_output.dump_trace(alpha_trace, trace_out + 'alpha_trace.txt') except KeyError: pass # cluster plotting if plot: plot_clusters(mcmc.trace, clus_idx, clus_max_prob, sup, dep, clus_out_dir, cparams) # merge clusters if len(clus_info) > 1 and merge_clusts: clus_merged = pd.DataFrame(columns=clus_info.columns, index=clus_info.index) clus_merged, clus_members, merged_ids = merge_clusters(clus_out_dir,clus_info,clus_merged,\ clus_members,[],sup,dep,norm,cn_states,sparams,cparams) if len(clus_merged) != len(clus_info): clus_info = clus_merged df_probs, ccert = merge_results(clus_merged, merged_ids, df_probs, ccert) snv_probs = pd.DataFrame() snv_ccert = pd.DataFrame() snv_members = np.empty(0) z_phi = get_per_variant_phi(z_trace, center_trace) # compile run fit statistics run_fit = pd.DataFrame() if map_ is not None: nclus = len(clus_info) # bic = -2 * map_.lnL + (1 + npoints + nclus * 2) + (nclus * clus_penalty) * np.log(npoints) phis = ccert.average_ccf.values cns, pvs = cluster.get_most_likely_cn_states(cn_states, sup, dep, phis, sparams['pi'], cnv_pval, norm) lls = [] for si, di, pvi in zip(sup, dep, pvs): lls.append(pm.binomial_like(si, di, pvi)) svc_ic = -2 * np.sum(lls) + (npoints + nclus * clus_penalty) * np.log(npoints) run_fit = pd.DataFrame([['svc_IC', svc_ic], ['BIC', map_.BIC], ['AIC', map_.AIC], ['AICc', map_.AICc], ['lnL', map_.lnL], ['logp', map_.logp], ['logp_at_max', map_.logp_at_max], ['param_len', map_.len], ['data_len', map_.data_len]]) if len(snv_df) > 0: snv_pos = ['chrom', 'pos'] snv_probs = df_probs.loc[:len(snv_df) - 1] snv_probs = snv_df[snv_pos].join(snv_probs) snv_ccert = ccert.loc[:len(snv_df) - 1] snv_ccert = snv_df[snv_pos].join(snv_ccert) snv_max_probs = np.array(clus_max_prob)[:len(snv_df)] snv_members = np.array( [np.where(np.array(snv_max_probs) == i)[0] for i in clus_ids]) snv_sup = sup[:len(snv_df)] snv_dep = dep[:len(snv_df)] snv_norm = norm[:len(snv_df)] snv_cn_states = cn_states[:len(snv_df)] snv_z_phi = z_phi[:len(snv_df)] write_output.write_out_files(snv_df, clus_info.copy(), snv_members, snv_probs, snv_ccert, clus_out_dir, sparams['sample'], sparams['pi'], snv_sup, snv_dep, snv_norm, snv_cn_states, run_fit, smc_het, cnv_pval, snv_z_phi, are_snvs=True) sv_probs = pd.DataFrame() sv_ccert = pd.DataFrame() sv_members = np.empty(0) if len(sv_df) > 0: lb = len(snv_df) if len(snv_df) > 0 else 0 sv_pos = ['chr1', 'pos1', 'dir1', 'chr2', 'pos2', 'dir2'] sv_probs = df_probs.loc[lb:lb + len(sv_df) - 1] sv_probs.index = sv_df.index sv_probs = sv_df[sv_pos].join(sv_probs) sv_ccert = ccert.loc[lb:lb + len(sv_df) - 1] sv_ccert.index = sv_df.index sv_ccert = sv_df[sv_pos].join(sv_ccert) sv_max_probs = np.array(clus_max_prob)[:len(sv_df)] sv_members = np.array( [np.where(np.array(sv_max_probs) == i)[0] for i in clus_ids]) sv_sup = sup[lb:lb + len(sv_df)] sv_dep = dep[lb:lb + len(sv_df)] sv_norm = norm[lb:lb + len(sv_df)] sv_cn_states = cn_states[lb:lb + len(sv_df)] sv_z_phi = z_phi[lb:lb + len(sv_df)] write_output.write_out_files(sv_df, clus_info.copy(), sv_members, sv_probs, sv_ccert, clus_out_dir, sparams['sample'], sparams['pi'], sv_sup, sv_dep, sv_norm, sv_cn_states, run_fit, smc_het, cnv_pval, sv_z_phi)
def obs(pi=pi): return mc.binomial_like(k, n, pi)
def obs(pi=pi, phi=phi): logp = pl.log(1-phi)*num_nonzeros + mc.binomial_like(r[nonzeros]*n[nonzeros], n[nonzeros], pi[nonzeros]) for n_i in n[~nonzeros]: logp += pl.log(phi + (1-phi) * pl.exp(pl.log(1-pi[~nonzeros]) * n[~nonzeros])).sum() return logp