def test_learn_h_graph(self): experiment = load_test_experiment() emission = likelihood.NegBinBetaBinLikelihood() emission.h = experiment.h emission.learn_parameters(experiment.x, experiment.l) N = experiment.l.shape[0] M = experiment.h.shape[0] prior = cn_model.CopyNumberPrior(N, M, perfect_cn_prior(experiment.cn)) prior.set_lengths(experiment.l) model = cn_model.HiddenMarkovModel(N, M, emission, prior) _, cn_init = model.optimal_state() graph = genome_graph.GenomeGraph(emission, prior, experiment.adjacencies, experiment.breakpoints) graph.init_copy_number(cn_init) h_init = experiment.h + experiment.h * 0.05 * np.random.randn(*experiment.h.shape) estimator = em.HardAssignmentEstimator(num_em_iter=1) estimator.learn_param(graph, 'h', h_init)
def test_evaluate_q_derivative(self): cn, h, l, phi, r, x = generate_simple_data() emission = likelihood.NegBinBetaBinLikelihood(x, l) emission.h = h N = l.shape[0] M = h.shape[0] prior = cn_model.CopyNumberPrior(perfect_cn_prior(cn)) prior.set_lengths(l) model = cn_model.HiddenMarkovModel(N, M, emission, prior, [(0, N)]) log_posterior, cns, resps = model.posterior_marginals() estimator = em.ExpectationMaximizationEstimator() params = [emission.h_param, emission.r_param, emission.M_param, emission.z_param, emission.hdel_mu_param, emission.loh_p_param] value = np.concatenate([p.value for p in params]) idxs = np.array([p.length for p in params]).cumsum() - params[0].length remixt.tests.utils.assert_grad_correct( estimator.evaluate_q, estimator.evaluate_q_derivative, value, model, cns, resps, params, idxs)
def test_learn_h(self): experiment = load_test_experiment() h_init = experiment.h * (1. + 0.05 * np.random.randn(*experiment.h.shape)) emission = likelihood.NegBinBetaBinLikelihood(experiment.x, experiment.l) emission.h = h_init print experiment.h, h_init N = experiment.l.shape[0] M = experiment.h.shape[0] prior = cn_model.CopyNumberPrior(experiment.l) model = cn_model.HiddenMarkovModel(N, M, emission, prior, experiment.chains, normal_contamination=False) estimator = em.ExpectationMaximizationEstimator() estimator.learn_param(model, emission.h_param)
def test_simple_genome_graph(self): x, cn, h, l, phi = generate_tiny_data() emission = likelihood.NegBinBetaBinLikelihood(x, l) emission.h = h emission.phi = phi N = l.shape[0] M = h.shape[0] prior = cn_model.CopyNumberPrior(l) adjacencies = [(0, 1), (1, 2)] breakpoints = [frozenset([(0, 1), (2, 0)])] for i in xrange(1000): print i random.seed(i) np.random.seed(i) # Modify copy number from known true cn_init = cn.copy() for n in xrange(N): m = np.random.randint(low=1, high=M) for allele in xrange(2): cn_init[n, m, allele] += np.random.randint(low=-1, high=1) # print np.random.randint(low=-1, high=1, size=cn_init[:, 1:, :].shape) # cn_init[:, 1:, :] += np.random.randint(low=-1, high=1, size=cn_init[:, 1:, :].shape) graph = genome_graph.GenomeGraphModel(N, M, emission, prior, adjacencies, breakpoints) graph.init_copy_number(cn_init, init_breakpoints=True) graph.optimize() if not np.all(graph.breakpoint_copy_number[['cn_1', 'cn_2']].values == np.array([1, 0])): print cn print cn_init print graph.segment_cn print graph.breakpoint_copy_number import IPython IPython.embed() raise self.assertTrue( np.all(graph.breakpoint_copy_number[['cn_1', 'cn_2']].values == np.array([1, 0])))
def test_learn_phi(self): experiment = load_test_experiment() emission = likelihood.NegBinLikelihood() emission.h = experiment.h emission.phi = experiment.phi emission.r = experiment.negbin_r N = experiment.l.shape[0] M = experiment.h.shape[0] prior = cn_model.CopyNumberPrior(N, M, perfect_cn_prior(experiment.cn)) prior.set_lengths(experiment.l) model = cn_model.HiddenMarkovModel(N, M, emission, prior) phi_init = experiment.phi + experiment.phi * 0.02 * np.random.randn(*experiment.phi.shape) estimator = em.ExpectationMaximizationEstimator(num_em_iter=1) estimator.learn_param(model, 'phi', phi_init)
def test_build_cn(self): prior = cn_model.CopyNumberPrior(1, 3, uniform_cn_prior()) model = cn_model.HiddenMarkovModel(1, 3, None, prior) cn_max = 6 cn_dev_max = 1 model.cn_max = cn_max model.cn_dev_max = cn_dev_max # Build cn list using the method from CopyNumberModel built_cns = set() for cn in model.build_cn_states(): cn_tuple = list() for m in xrange(3): for ell in xrange(2): cn_tuple.append(cn[0, m, ell]) cn_tuple = tuple(cn_tuple) self.assertNotIn(cn_tuple, built_cns) built_cns.add(cn_tuple) # Build the naive way expected_cns = set() for b1 in xrange(cn_max + 1): for b2 in xrange(cn_max + 1): for d1 in xrange(-cn_dev_max, cn_dev_max + 1): for d2 in xrange(-cn_dev_max, cn_dev_max + 1): if b1 + d1 < 0 or b2 + d2 < 0 or b1 + d1 > cn_max or b2 + d2 > cn_max: continue if (b1 != b2 or b1 + d1 != b2 + d2) and ( b1 <= b2 and b1 + d1 <= b2 + d2): continue cn_tuple = (1, 1, b1, b2, b1 + d1, b2 + d2) self.assertIn(cn_tuple, built_cns) expected_cns.add(cn_tuple) self.assertTrue(expected_cns == built_cns)