def test_makeContinuousPsubDefn():
    standard_params = {'is_independent': True, 'is_constant': False}

    lf_gen = inflate_likelihood_function(_General)

    model = GeneralBen(DNA.Alphabet,
                       recode_gaps=True,
                       model_gaps=False,
                       optimise_motif_probs=True,
                       name='GeneralBen')
    lf_ben = model.makeLikelihoodFunction(lf_gen.tree)

    populate_parameters(lf_ben, lf_gen, **standard_params)

    ben_ens = get_expected_no_subs(lf_ben)
    ben_lens = lf_ben.getParamValueDict(['edge'])['length']
    gen_ens = get_expected_no_subs(lf_gen)
    for edge in lf_gen.tree.getTipNames():
        assert_array_almost_equal(
            np.array(lf_ben.getRateMatrixForEdge(edge)) *
            lf_ben.getParamValue('length', edge),
            np.array(lf_gen.getRateMatrixForEdge(edge)) *
            lf_gen.getParamValue('length', edge))
        assert_almost_equal(ben_ens[edge], gen_ens[edge])
        assert_almost_equal(ben_lens[edge], ben_ens[edge])
Exemplo n.º 2
0
def test_hetero_fit():
    """hetero_fit should fit GTR plus Gamma models"""
    pre_lf = nest.inflate_likelihood_function(_GTRplusGamma)
    prefit = nest.get_expected_no_subs(pre_lf)
    aln = get_aln('GTRplusGamma', 100000)
    lfs = nest.hetero_fit(aln, pre_lf.tree, param_limit=20, return_lfs=True)
    postfit = nest.get_expected_no_subs(lfs[-1])
    for taxon in prefit:
        assert_almost_equal(postfit[taxon], prefit[taxon], decimal=2) 
Exemplo n.º 3
0
def test_hetero_clock_fit():
    """hetero_clock_fit should fit a molecular clock constrained GTR plus Gamma
    model nested in a GTR plus Gamma model"""
    pre_lf = nest.inflate_likelihood_function(_GTRplusGammaClockTest)
    prefit = nest.get_expected_no_subs(pre_lf)
    aln = get_aln('GTRplusGammaClockTest', 100000)
    lfs = nest.hetero_clock_fit(aln, pre_lf.tree, outgroup='Opossum',
            param_limit=20, return_lfs=True)
    lf_equal_length, lf = lfs
    assert_less(lf_equal_length.getLogLikelihood(), lf.getLogLikelihood())
    postfit = nest.get_expected_no_subs(lf)
    postfit_equal_length = nest.get_expected_no_subs(lf_equal_length)
    for taxon in prefit:
        assert_almost_equal(postfit[taxon], prefit[taxon], decimal=2) 
        assert_almost_equal(postfit_equal_length[taxon], prefit[taxon], decimal=2) 
Exemplo n.º 4
0
def test_seq_fit():
    """seq_fit should fit nested GTR and General models"""
    for model in 'GTR', 'General':
        pre_lf = nest.inflate_likelihood_function(eval('_'+model))
        prefit = nest.get_expected_no_subs(pre_lf)
        
        aln = get_aln(model, 100000)
        lfs = nest.seq_fit(aln, pre_lf.tree, param_limit=20, return_lfs=model)
        if model == 'General':
            assert_less(lfs[0].getLogLikelihood(),
                    lfs[1].getLogLikelihood())
        lf = lfs[-1]
        postfit = nest.get_expected_no_subs(lf)
        for taxon in prefit:
            assert_almost_equal(postfit[taxon], prefit[taxon], decimal=2) 
Exemplo n.º 5
0
def test_deflate_likelihood_function():
    """deflate_likelihood_function produces internally consistent statistics"""
    lf = nest.inflate_likelihood_function(_General)
    aln = get_aln('General', _General['aln_length'])
    lf.setAlignment(aln)
    EN = nest.deflate_likelihood_function(lf)['EN']
    assert_equal(EN, nest.get_expected_no_subs(lf))
Exemplo n.º 6
0
def param_bootstrap(stats, num_reps=None, model_pos=None, fitter=None, **kw):
    gene, f_stats, g_stats = stats
    try:
        f_row = f_stats[model_pos]
    except IndexError:
        logging.error(' Skipping ' + '/'.join(f_stats[0]['tip_names']) +
                      ' in ' + gene + ': position ' + model_pos + ' invalid')
        return
    model = f_row['name']

    if model_pos in g_stats:
        g_row = g_stats[model_pos]
    else:
        g_row = {
            'name': model,
            'tip_names': f_row['tip_names'],
            'gs_samples': [],
            'll_samples': [],
            'en_samples': []
        }
        g_stats[model_pos] = g_row
    gs_samples = g_row['gs_samples']
    ll_samples = g_row['ll_samples']
    en_samples = g_row['en_samples']
    if 'state' in g_row:
        random.setstate(eval(g_row['state']))

    lf = nest.inflate_likelihood_function(f_row)
    aln_length = f_row['aln_length']
    start = time.time()
    for i in 10 * range(num_reps):
        if len(gs_samples) >= num_reps:
            break
        try:
            aln = lf.simulateAlignment(aln_length, random_series=random)
            lfs = fitter(aln, lf.tree, return_lfs=model, **kw)
            fitted_lf = lfs[model_pos]
            ll_samples.append(fitted_lf.getLogLikelihood())
            gs_samples.append(fitted_lf.getGStatistic())
            if 'Q' in fitted_lf.defn_for:
                en_samples.append(nest.get_expected_no_subs(fitted_lf))
        except:
            logging.warning(' Missed a G stat for ' + model + ' and ' +
                            '/'.join(f_row['tip_names']) + ' in ' + gene +
                            ':\n' + format_exc())
    else:
        logging.error(
            ' Failed to compile sufficient bootstrap repetitions for ' +
            model + ' and ' + '/'.join(f_row['tip_names']) + ' in ' + gene)
    g_row['state'] = repr(random.getstate())
    f_row['gs_p'] = (sum(1 for g in gs_samples
                         if g < f_row['gs']), len(gs_samples) + 1)
    f_row['ll_p'] = (sum(1 for l in ll_samples
                         if l < f_row['ll']), len(ll_samples) + 1)
    logging.info(' Done ' + model + ' and ' + '/'.join(f_row['tip_names']) +
                 ' in ' + gene + ' in ' + str(time.time() - start) + ' secs')

    return gene, f_stats, g_stats
Exemplo n.º 7
0
def test_clock_fit():
    """clock_fit should fit nested GTR, General, and GeneralBen models,
    some with equal branch lengths"""
    for modelname in ('GTRClockTest', 'GeneralBen'):
        model = eval('_' + modelname)
        pre_lf = nest.inflate_likelihood_function(model)
        prefit = nest.get_expected_no_subs(pre_lf)
        aln = get_aln(modelname, 100000)
        lfs = nest.clock_fit(aln, pre_lf.tree, outgroup='Opossum', param_limit=20, 
                return_lfs='GTR' if modelname.startswith('GTR') else 'General')
        lf_equal_length, lf = lfs[:2] if modelname[:3] == 'GTR' else lfs[2:]
        assert_less(lf_equal_length.getLogLikelihood(), lf.getLogLikelihood())
        if modelname == 'GeneralBen':
            assert_less(lfs[0].getLogLikelihood(),
                        lf_equal_length.getLogLikelihood())
        postfit = nest.get_expected_no_subs(lf)
        postfit_equal_length = nest.get_expected_no_subs(lf_equal_length)
        for taxon in prefit:
            assert_almost_equal(postfit[taxon], prefit[taxon], decimal=2) 
            assert_almost_equal(postfit_equal_length[taxon], prefit[taxon], 
                    decimal=2) 
def test_constrain_lengths():
    lf_gen = inflate_likelihood_function(_General)
    aln = get_aln('General', _General['aln_length'])

    model = GeneralBen(DNA.Alphabet,
                       recode_gaps=True,
                       model_gaps=False,
                       optimise_motif_probs=True)
    lf_ben = model.makeLikelihoodFunction(lf_gen.tree)
    for param in lf_ben.getParamNames():
        if '/' in param:
            lf_ben.setParamRule(param, is_independent=True, is_constant=False)
    lf_ben.setParamRule('length', is_independent=False)
    lf_ben.setParamRule('length', edge='Opossum', is_independent=True)
    lf_ben.setAlignment(aln)
    lf_ben.optimise(local=True, show_progress=False)

    ens = get_expected_no_subs(lf_ben)
    lens = lf_ben.getParamValueDict(['edge'])['length']
    assert_almost_equal(lens['Mouse'], lens['Human'])
    for edge in lf_ben.tree.getTipNames():
        assert_almost_equal(lens[edge], ens[edge])
Exemplo n.º 9
0
def test_get_expected_no_subs():
    """expected_no_subs should return dictionary of ENS by edge"""
    GS_lf = nest.inflate_likelihood_function(_GeneralStationary)
    EN = nest.get_expected_no_subs(GS_lf)
    for name in GS_lf.tree.getTipNames():
        assert_almost_equal(EN[name], GS_lf.getParamValue('length', name))