def test_makeContinuousPsubDefn(): standard_params = {'is_independent': True, 'is_constant': False} lf_gen = inflate_likelihood_function(_General) model = GeneralBen(DNA.Alphabet, recode_gaps=True, model_gaps=False, optimise_motif_probs=True, name='GeneralBen') lf_ben = model.makeLikelihoodFunction(lf_gen.tree) populate_parameters(lf_ben, lf_gen, **standard_params) ben_ens = get_expected_no_subs(lf_ben) ben_lens = lf_ben.getParamValueDict(['edge'])['length'] gen_ens = get_expected_no_subs(lf_gen) for edge in lf_gen.tree.getTipNames(): assert_array_almost_equal( np.array(lf_ben.getRateMatrixForEdge(edge)) * lf_ben.getParamValue('length', edge), np.array(lf_gen.getRateMatrixForEdge(edge)) * lf_gen.getParamValue('length', edge)) assert_almost_equal(ben_ens[edge], gen_ens[edge]) assert_almost_equal(ben_lens[edge], ben_ens[edge])
def test_hetero_fit(): """hetero_fit should fit GTR plus Gamma models""" pre_lf = nest.inflate_likelihood_function(_GTRplusGamma) prefit = nest.get_expected_no_subs(pre_lf) aln = get_aln('GTRplusGamma', 100000) lfs = nest.hetero_fit(aln, pre_lf.tree, param_limit=20, return_lfs=True) postfit = nest.get_expected_no_subs(lfs[-1]) for taxon in prefit: assert_almost_equal(postfit[taxon], prefit[taxon], decimal=2)
def test_hetero_clock_fit(): """hetero_clock_fit should fit a molecular clock constrained GTR plus Gamma model nested in a GTR plus Gamma model""" pre_lf = nest.inflate_likelihood_function(_GTRplusGammaClockTest) prefit = nest.get_expected_no_subs(pre_lf) aln = get_aln('GTRplusGammaClockTest', 100000) lfs = nest.hetero_clock_fit(aln, pre_lf.tree, outgroup='Opossum', param_limit=20, return_lfs=True) lf_equal_length, lf = lfs assert_less(lf_equal_length.getLogLikelihood(), lf.getLogLikelihood()) postfit = nest.get_expected_no_subs(lf) postfit_equal_length = nest.get_expected_no_subs(lf_equal_length) for taxon in prefit: assert_almost_equal(postfit[taxon], prefit[taxon], decimal=2) assert_almost_equal(postfit_equal_length[taxon], prefit[taxon], decimal=2)
def test_seq_fit(): """seq_fit should fit nested GTR and General models""" for model in 'GTR', 'General': pre_lf = nest.inflate_likelihood_function(eval('_'+model)) prefit = nest.get_expected_no_subs(pre_lf) aln = get_aln(model, 100000) lfs = nest.seq_fit(aln, pre_lf.tree, param_limit=20, return_lfs=model) if model == 'General': assert_less(lfs[0].getLogLikelihood(), lfs[1].getLogLikelihood()) lf = lfs[-1] postfit = nest.get_expected_no_subs(lf) for taxon in prefit: assert_almost_equal(postfit[taxon], prefit[taxon], decimal=2)
def test_deflate_likelihood_function(): """deflate_likelihood_function produces internally consistent statistics""" lf = nest.inflate_likelihood_function(_General) aln = get_aln('General', _General['aln_length']) lf.setAlignment(aln) EN = nest.deflate_likelihood_function(lf)['EN'] assert_equal(EN, nest.get_expected_no_subs(lf))
def param_bootstrap(stats, num_reps=None, model_pos=None, fitter=None, **kw): gene, f_stats, g_stats = stats try: f_row = f_stats[model_pos] except IndexError: logging.error(' Skipping ' + '/'.join(f_stats[0]['tip_names']) + ' in ' + gene + ': position ' + model_pos + ' invalid') return model = f_row['name'] if model_pos in g_stats: g_row = g_stats[model_pos] else: g_row = { 'name': model, 'tip_names': f_row['tip_names'], 'gs_samples': [], 'll_samples': [], 'en_samples': [] } g_stats[model_pos] = g_row gs_samples = g_row['gs_samples'] ll_samples = g_row['ll_samples'] en_samples = g_row['en_samples'] if 'state' in g_row: random.setstate(eval(g_row['state'])) lf = nest.inflate_likelihood_function(f_row) aln_length = f_row['aln_length'] start = time.time() for i in 10 * range(num_reps): if len(gs_samples) >= num_reps: break try: aln = lf.simulateAlignment(aln_length, random_series=random) lfs = fitter(aln, lf.tree, return_lfs=model, **kw) fitted_lf = lfs[model_pos] ll_samples.append(fitted_lf.getLogLikelihood()) gs_samples.append(fitted_lf.getGStatistic()) if 'Q' in fitted_lf.defn_for: en_samples.append(nest.get_expected_no_subs(fitted_lf)) except: logging.warning(' Missed a G stat for ' + model + ' and ' + '/'.join(f_row['tip_names']) + ' in ' + gene + ':\n' + format_exc()) else: logging.error( ' Failed to compile sufficient bootstrap repetitions for ' + model + ' and ' + '/'.join(f_row['tip_names']) + ' in ' + gene) g_row['state'] = repr(random.getstate()) f_row['gs_p'] = (sum(1 for g in gs_samples if g < f_row['gs']), len(gs_samples) + 1) f_row['ll_p'] = (sum(1 for l in ll_samples if l < f_row['ll']), len(ll_samples) + 1) logging.info(' Done ' + model + ' and ' + '/'.join(f_row['tip_names']) + ' in ' + gene + ' in ' + str(time.time() - start) + ' secs') return gene, f_stats, g_stats
def test_clock_fit(): """clock_fit should fit nested GTR, General, and GeneralBen models, some with equal branch lengths""" for modelname in ('GTRClockTest', 'GeneralBen'): model = eval('_' + modelname) pre_lf = nest.inflate_likelihood_function(model) prefit = nest.get_expected_no_subs(pre_lf) aln = get_aln(modelname, 100000) lfs = nest.clock_fit(aln, pre_lf.tree, outgroup='Opossum', param_limit=20, return_lfs='GTR' if modelname.startswith('GTR') else 'General') lf_equal_length, lf = lfs[:2] if modelname[:3] == 'GTR' else lfs[2:] assert_less(lf_equal_length.getLogLikelihood(), lf.getLogLikelihood()) if modelname == 'GeneralBen': assert_less(lfs[0].getLogLikelihood(), lf_equal_length.getLogLikelihood()) postfit = nest.get_expected_no_subs(lf) postfit_equal_length = nest.get_expected_no_subs(lf_equal_length) for taxon in prefit: assert_almost_equal(postfit[taxon], prefit[taxon], decimal=2) assert_almost_equal(postfit_equal_length[taxon], prefit[taxon], decimal=2)
def test_constrain_lengths(): lf_gen = inflate_likelihood_function(_General) aln = get_aln('General', _General['aln_length']) model = GeneralBen(DNA.Alphabet, recode_gaps=True, model_gaps=False, optimise_motif_probs=True) lf_ben = model.makeLikelihoodFunction(lf_gen.tree) for param in lf_ben.getParamNames(): if '/' in param: lf_ben.setParamRule(param, is_independent=True, is_constant=False) lf_ben.setParamRule('length', is_independent=False) lf_ben.setParamRule('length', edge='Opossum', is_independent=True) lf_ben.setAlignment(aln) lf_ben.optimise(local=True, show_progress=False) ens = get_expected_no_subs(lf_ben) lens = lf_ben.getParamValueDict(['edge'])['length'] assert_almost_equal(lens['Mouse'], lens['Human']) for edge in lf_ben.tree.getTipNames(): assert_almost_equal(lens[edge], ens[edge])
def test_get_expected_no_subs(): """expected_no_subs should return dictionary of ENS by edge""" GS_lf = nest.inflate_likelihood_function(_GeneralStationary) EN = nest.get_expected_no_subs(GS_lf) for name in GS_lf.tree.getTipNames(): assert_almost_equal(EN[name], GS_lf.getParamValue('length', name))