def update_V_random_grid_pairwise(model, num_points=10): if model.params.K < 2: return ka, kb = np.random.choice(model.params.K, 2, replace=False) params = model.params.copy() old = params.V[[ka, kb]].flatten() D = params.D dim = 2 * D e = scipy.stats.multivariate_normal.rvs(np.zeros(dim), np.eye(dim)) e /= np.linalg.norm(e) r = scipy.stats.gamma.rvs(1, 1) grid = np.arange(1, num_points + 1) ys = old[np.newaxis, :] + grid[:, np.newaxis] * r * e[np.newaxis, :] log_p_new = np.zeros(num_points) for i in range(num_points): params.V[[ka, kb]] = ys[i].reshape((2, D)) log_p_new[i] = model.joint_dist.log_p(model.data, params) if np.all(np.isneginf(log_p_new)) or np.any(np.isnan(log_p_new)): return try: idx = discrete_rvs( np.exp(0.5 * np.log(grid) + log_normalize(log_p_new))) except ValueError: return new = ys[idx] xs = new[np.newaxis, :] - grid[:, np.newaxis] * r * e[np.newaxis, :] log_p_old = np.zeros(num_points) for i in range(num_points): params.V[[ka, kb]] = xs[i].reshape((2, D)) log_p_old[i] = model.joint_dist.log_p(model.data, params) if do_metropolis_hastings_accept_reject(log_sum_exp(log_p_new), log_sum_exp(log_p_old), 0, 0): params.V[[ka, kb]] = new.reshape((2, D)) else: params.V[[ka, kb]] = old.reshape((2, D)) model.params = params
def update_V_random_grid(model, num_points=10): if model.params.K < 2: return params = model.params.copy() old = params.V.flatten() K, D = params.V.shape dim = K * D e = scipy.stats.multivariate_normal.rvs(np.zeros(dim), np.eye(dim)) e /= np.linalg.norm(e) r = scipy.stats.gamma.rvs(1, 1) grid = np.arange(1, num_points + 1) ys = old[np.newaxis, :] + grid[:, np.newaxis] * r * e[np.newaxis, :] log_p_new = np.zeros(num_points) for i in range(num_points): params.V = ys[i].reshape((K, D)) log_p_new[i] = model.joint_dist.log_p(model.data, params) idx = discrete_rvs(np.exp(0.5 * np.log(grid) + log_normalize(log_p_new))) new = ys[idx] xs = new[np.newaxis, :] - grid[:, np.newaxis] * r * e[np.newaxis, :] log_p_old = np.zeros(num_points) for i in range(num_points): params.V = xs[i].reshape((K, D)) log_p_old[i] = model.joint_dist.log_p(model.data, params) if do_metropolis_hastings_accept_reject(log_sum_exp(log_p_new), log_sum_exp(log_p_old), 0, 0): params.V = new.reshape((K, D)) else: params.V = old.reshape((K, D)) model.params = params
def get_exact_posterior(model): log_p = [] Zs = [] for Z in get_all_binary_matrices(model.params.K, model.data.shape[0]): Zs.append(tuple(Z.flatten())) model.params.Z = Z log_p.append(model.log_p) p = np.exp(log_normalize(np.array(log_p))) return dict(list(zip(Zs, p)))
def do_row_gibbs_update(cols, data, dist, feat_probs, params, row_idx, Zs): log_p1 = np.log(feat_probs[cols]) log_p0 = np.log(1 - feat_probs[cols]) log_p = np.zeros(len(Zs)) for idx in range(len(Zs)): params.Z[row_idx] = Zs[idx] log_p[idx] = np.sum(Zs[idx, cols] * log_p1) + np.sum((1 - Zs[idx, cols]) * log_p0) + \ dist.log_p_row(data, params, row_idx) log_p = log_normalize(log_p) idx = discrete_rvs_gumbel_trick(log_p) params.Z[row_idx] = Zs[idx] return params
def get_sample_data_point(a, b, cn_major, cn_minor, cn_normal=2, error_rate=1e-3, tumour_content=1.0): cn_total = cn_major + cn_minor cn = [] mu = [] log_pi = [] # Consider all possible mutational genotypes consistent with mutation before CN change for x in range(1, cn_major + 1): cn.append((cn_normal, cn_normal, cn_total)) mu.append((error_rate, error_rate, min(1 - error_rate, x / cn_total))) log_pi.append(0) # Consider mutational genotype of mutation before CN change if not already added mutation_after_cn = (cn_normal, cn_total, cn_total) if mutation_after_cn not in cn: cn.append(mutation_after_cn) mu.append((error_rate, error_rate, min(1 - error_rate, 1 / cn_total))) log_pi.append(0) cn = np.array(cn, dtype=np.int) mu = np.array(mu, dtype=np.float) log_pi = log_normalize(np.array(log_pi, dtype=np.float64)) return SampleDataPoint(int(a), int(b), cn, mu, log_pi, tumour_content)
def _propose_split(self, anchors, features, model, V, Z, Z_target=None): k_m = features[0] i, j = anchors _, D = V.shape N, K = Z.shape V_new = np.zeros((K + 1, D), dtype=V.dtype) Z_new = np.zeros((N, K + 1), dtype=Z.dtype) idx = 0 for k in range(K): if k in features: continue V_new[idx] = V[k] Z_new[:, idx] = Z[:, k] idx += 1 weight = np.random.random(D) V_new[-1] = weight * V[k_m] V_new[-2] = (1 - weight) * V[k_m] Z_new[i, -1] = 1 Z_new[j, -2] = 1 active_set = list(np.squeeze(np.where(Z[:, k_m] == 1))) active_set.remove(i) active_set.remove(j) np.random.shuffle(active_set) log_q = 0 log_p = np.zeros(3) params = model.params.copy() params.V = V_new params.Z = Z_new N_prev = 2 for idx in active_set: # + [i, j]: if idx not in [i, j]: N_prev += 1 m_a = np.sum(Z_new[:, -1]) m_b = np.sum(Z_new[:, -2]) params.Z[idx, -1] = 1 params.Z[idx, -2] = 0 log_p[0] = np.log(m_a) + np.log(N_prev - m_b) + model.data_dist.log_p_row( model.data, params, idx) params.Z[idx, -1] = 0 params.Z[idx, -2] = 1 log_p[1] = np.log(N_prev - m_a) + np.log(m_b) + model.data_dist.log_p_row( model.data, params, idx) params.Z[idx, -1] = 1 params.Z[idx, -2] = 1 log_p[2] = np.log(m_a) + np.log(m_b) + model.data_dist.log_p_row( model.data, params, idx) log_p = log_normalize(log_p) if Z_target is None: state = discrete_rvs(np.exp(log_p)) else: if np.all(Z_target[idx] == np.array([1, 0])): state = 0 elif np.all(Z_target[idx] == np.array([0, 1])): state = 1 elif np.all(Z_target[idx] == np.array([1, 1])): state = 2 else: raise Exception('Invalid') if state == 0: Z_new[idx, -1] = 1 Z_new[idx, -2] = 0 elif state == 1: Z_new[idx, -1] = 0 Z_new[idx, -2] = 1 elif state == 2: Z_new[idx, -1] = 1 Z_new[idx, -2] = 1 else: raise Exception('Invalid state') log_q += log_p[state] assert Z_new is params.Z return V_new, Z_new, log_q