Пример #1
0
 def test_get_raw_estimates(self):
     """correctly return raw result object"""
     d = EstimateDistances(self.al, HKY85(), est_params=['kappa'])
     d.run()
     expect = {
         ('a', 'b'): {
             'kappa': 1.0000226766004808e-06,
             'length': 0.18232155856115662
         },
         ('a', 'c'): {
             'kappa': 1.0010380037049357e-06,
             'length': 0.087070406623635604
         },
         ('a', 'e'): {
             'kappa': 2.3965871843412687,
             'length': 0.4389176272584539
         },
         ('b', 'e'): {
             'kappa': 2.3965871854366592,
             'length': 0.43891762729173389
         },
         ('c', 'b'): {
             'kappa': 1.0010380037049357e-06,
             'length': 0.087070406623635604
         },
         ('c', 'e'): {
             'kappa': 0.57046787478038707,
             'length': 0.43260232210282784
         }
     }
     got = d.getAllParamValues()
     for pair in expect:
         for param in expect[pair]:
             self.assertAlmostEqual(got[pair][param], expect[pair][param])
Пример #2
0
    def test_EstimateDistances_fromUnaligned(self):
        """Excercising estimate distances from unaligned sequences"""
        d = EstimateDistances(self.collection,
                              JC69(),
                              do_pair_align=True,
                              rigorous_align=True)
        d.run()
        canned_result = {
            ('b', 'e'): 0.440840,
            ('c', 'e'): 0.440840,
            ('a', 'c'): 0.088337,
            ('a', 'b'): 0.188486,
            ('a', 'e'): 0.440840,
            ('b', 'c'): 0.0883373
        }
        result = d.getPairwiseDistances()
        self.assertDistsAlmostEqual(canned_result, result)

        d = EstimateDistances(self.collection,
                              JC69(),
                              do_pair_align=True,
                              rigorous_align=False)
        d.run()
        canned_result = {
            ('b', 'e'): 0.440840,
            ('c', 'e'): 0.440840,
            ('a', 'c'): 0.088337,
            ('a', 'b'): 0.188486,
            ('a', 'e'): 0.440840,
            ('b', 'c'): 0.0883373
        }
        result = d.getPairwiseDistances()
        self.assertDistsAlmostEqual(canned_result, result)
Пример #3
0
 def test_EstimateDistances_other_model_params(self):
     """test getting other model params from EstimateDistances"""
     d = EstimateDistances(self.al, HKY85(), est_params=['kappa'])
     d.run()
     # this will be a Number object with Mean, Median etc ..
     kappa = d.getParamValues('kappa')
     self.assertAlmostEqual(kappa.Mean, 0.8939, 4)
     # this will be a dict with pairwise instances, it's called by the above
     # method, so the correctness of it's values is already checked
     kappa = d.getPairwiseParam('kappa')
Пример #4
0
 def test_EstimateDistances_fromThreeway(self):
     """testing (well, exercising at least), EsimateDistances fromThreeway"""
     d = EstimateDistances(self.al, JC69(), threeway=True)
     d.run()
     canned_result = {('b', 'e'): 0.495312,
                     ('c', 'e'): 0.479380,
                     ('a', 'c'): 0.089934,
                     ('a', 'b'): 0.190021,
                     ('a', 'e'): 0.495305,
                     ('b', 'c'): 0.0899339}
     result = d.getPairwiseDistances(summary_function="mean")
     self.assertDistsAlmostEqual(canned_result, result)
Пример #5
0
 def test_EstimateDistances_fromThreeway(self):
     """testing (well, exercising at least), EsimateDistances fromThreeway"""
     d = EstimateDistances(self.al, JC69(), threeway=True)
     d.run()
     canned_result = {('b', 'e'): 0.495312,
                     ('c', 'e'): 0.479380,
                     ('a', 'c'): 0.089934,
                     ('a', 'b'): 0.190021,
                     ('a', 'e'): 0.495305,
                     ('b', 'c'): 0.0899339}
     result = d.getPairwiseDistances(summary_function="mean")
     self.assertDistsAlmostEqual(canned_result, result)
Пример #6
0
 def test_EstimateDistancesWithMotifProbs(self):
     """EstimateDistances with supplied motif probs"""
     motif_probs= {'A':0.1,'C':0.2,'G':0.2,'T':0.5}
     d = EstimateDistances(self.al, HKY85(), motif_probs=motif_probs)
     d.run()
     canned_result = {('a', 'c'): 0.07537,
                    ('b', 'c'): 0.07537,
                     ('a', 'e'): 0.39921,
                     ('a', 'b'): 0.15096,
                     ('b', 'e'): 0.39921,
                     ('c', 'e'): 0.37243}
     result = d.getPairwiseDistances()
     self.assertDistsAlmostEqual(canned_result, result)
Пример #7
0
 def test_EstimateDistancesWithMotifProbs(self):
     """EstimateDistances with supplied motif probs"""
     motif_probs= {'A':0.1,'C':0.2,'G':0.2,'T':0.5}
     d = EstimateDistances(self.al, HKY85(), motif_probs=motif_probs)
     d.run()
     canned_result = {('a', 'c'): 0.07537,
                    ('b', 'c'): 0.07537,
                     ('a', 'e'): 0.39921,
                     ('a', 'b'): 0.15096,
                     ('b', 'e'): 0.39921,
                     ('c', 'e'): 0.37243}
     result = d.getPairwiseDistances()
     self.assertDistsAlmostEqual(canned_result, result)
Пример #8
0
    def test_EstimateDistances_modify_lf(self):
        """tests modifying the lf"""
        def constrain_fit(lf):
            lf.setParamRule('kappa', is_constant=True)
            lf.optimise(local=True)
            return lf

        d = EstimateDistances(self.al, HKY85(), modify_lf=constrain_fit)
        d.run()
        result = d.getPairwiseDistances()
        d = EstimateDistances(self.al, F81())
        d.run()
        expect = d.getPairwiseDistances()
        self.assertDistsAlmostEqual(expect, result)
Пример #9
0
 def test_EstimateDistances_fromUnaligned(self):
     """Excercising estimate distances from unaligned sequences"""
     d = EstimateDistances(self.collection, JC69(), do_pair_align=True,
                             rigorous_align=True)
     d.run()
     canned_result = {('b', 'e'): 0.440840,
                     ('c', 'e'): 0.440840,
                     ('a', 'c'): 0.088337,
                     ('a', 'b'): 0.188486,
                     ('a', 'e'): 0.440840,
                     ('b', 'c'): 0.0883373}
     result = d.getPairwiseDistances()
     self.assertDistsAlmostEqual(canned_result, result)
     
     d = EstimateDistances(self.collection, JC69(), do_pair_align=True,
                             rigorous_align=False)
     d.run()
     canned_result = {('b', 'e'): 0.440840,
                     ('c', 'e'): 0.440840,
                     ('a', 'c'): 0.088337,
                     ('a', 'b'): 0.188486,
                     ('a', 'e'): 0.440840,
                     ('b', 'c'): 0.0883373}
     result = d.getPairwiseDistances()
     self.assertDistsAlmostEqual(canned_result, result)
Пример #10
0
 def test_get_raw_estimates(self):
     """correctly return raw result object"""
     d = EstimateDistances(self.al, HKY85(), est_params=['kappa'])
     d.run()
     expect = {('a', 'b'): {'kappa': 1.0000226766004808e-06, 'length': 0.18232155856115662},
              ('a', 'c'): {'kappa': 1.0010380037049357e-06, 'length': 0.087070406623635604},
              ('a', 'e'): {'kappa': 2.3965871843412687, 'length': 0.4389176272584539},
              ('b', 'e'): {'kappa': 2.3965871854366592, 'length': 0.43891762729173389},
              ('c', 'b'): {'kappa': 1.0010380037049357e-06, 'length': 0.087070406623635604},
              ('c', 'e'): {'kappa': 0.57046787478038707, 'length': 0.43260232210282784}}
     got = d.getAllParamValues()
     for pair in expect:
         for param in expect[pair]:
             self.assertAlmostEqual(got[pair][param], expect[pair][param])
Пример #11
0
 def test_EstimateDistances_modify_lf(self):
     """tests modifying the lf"""
     def constrain_fit(lf):
         lf.setParamRule('kappa', is_constant=True)
         lf.optimise(local=True)
         return lf
     
     d = EstimateDistances(self.al, HKY85(), modify_lf=constrain_fit)
     d.run()
     result = d.getPairwiseDistances()
     d = EstimateDistances(self.al, F81())
     d.run()
     expect = d.getPairwiseDistances()
     self.assertDistsAlmostEqual(expect, result)
Пример #12
0
 def test_EstimateDistances_other_model_params(self):
     """test getting other model params from EstimateDistances"""
     d = EstimateDistances(self.al, HKY85(), est_params=['kappa'])
     d.run()
     # this will be a Number object with Mean, Median etc ..
     kappa = d.getParamValues('kappa')
     self.assertAlmostEqual(kappa.Mean, 0.8939, 4)
     # this will be a dict with pairwise instances, it's called by the above
     # method, so the correctness of it's values is already checked
     kappa = d.getPairwiseParam('kappa')
Пример #13
0
 def test_EstimateDistances(self):
     """testing (well, exercising at least), EstimateDistances"""
     d = EstimateDistances(self.al, JC69())
     d.run()
     canned_result = {('b', 'e'): 0.440840,
                     ('c', 'e'): 0.440840,
                     ('a', 'c'): 0.088337,
                     ('a', 'b'): 0.188486,
                     ('a', 'e'): 0.440840,
                     ('b', 'c'): 0.0883373}
     result = d.getPairwiseDistances()
     self.assertDistsAlmostEqual(canned_result, result)
     
     # excercise writing to file
     d.writeToFile('junk.txt')
     try:
         os.remove('junk.txt')
     except OSError:
         pass # probably parallel
Пример #14
0
 def test_EstimateDistances(self):
     """testing (well, exercising at least), EstimateDistances"""
     d = EstimateDistances(self.al, JC69())
     d.run()
     canned_result = {('b', 'e'): 0.440840,
                     ('c', 'e'): 0.440840,
                     ('a', 'c'): 0.088337,
                     ('a', 'b'): 0.188486,
                     ('a', 'e'): 0.440840,
                     ('b', 'c'): 0.0883373}
     result = d.getPairwiseDistances()
     self.assertDistsAlmostEqual(canned_result, result)
     
     # excercise writing to file
     d.writeToFile('junk.txt')
     try:
         os.remove('junk.txt')
     except OSError:
         pass # probably parallel
Пример #15
0
def TreeAlign(model, seqs, tree=None, indel_rate=0.01, indel_length=0.01,
    ui = None, ests_from_pairwise=True, param_vals=None):
    """Returns a multiple alignment and tree.
    
    Uses the provided substitution model and a tree for determining the
    progressive order. If a tree is not provided a Neighbour Joining tree is
    constructed from pairwise distances estimated from pairwise aligning the
    sequences. If running in parallel, only the distance estimation is
    parallelised and only the master CPU returns the alignment and tree, other
    CPU's return None, None.
    
    Arguments:
        - model: a substitution model
        - seqs: a sequence collection
        - indel_rate, indel_length: parameters for the progressive pair-HMM
        - ests_from_pairwise: if no tree provided and True, the median value
          of the substitution model parameters are used
        - param_vals: named key, value pairs for model parameters. These
          override ests_from_pairwise.
    """
    _exclude_params = ['mprobs', 'rate', 'bin_switch']
    if param_vals:
        param_vals = dict(param_vals)
    else:
        param_vals = {}
    if isinstance(seqs, dict):
        seq_names = list(seqs.keys())
    else:
        seq_names = seqs.getSeqNames()
    
    two_seqs = len(seq_names) == 2
    
    if tree:
        tip_names = tree.getTipNames()
        tip_names.sort()
        seq_names.sort()
        assert tip_names == seq_names, \
            "names don't match between seqs and tree: tree=%s; seqs=%s" % \
            (tip_names, seq_names)
        ests_from_pairwise = False
    elif two_seqs:
        tree = LoadTree(tip_names=seqs.getSeqNames())
        ests_from_pairwise = False
    else:
        if ests_from_pairwise:
            est_params = [param for param in model.getParamList() \
                                    if param not in _exclude_params]
        else:
            est_params = None
        
        dcalc = EstimateDistances(seqs, model, do_pair_align=True,
                                    est_params=est_params)
        dcalc.run()
        dists = dcalc.getPairwiseDistances()
        tree = NJ.nj(dists)
    
    LF = model.makeLikelihoodFunction(tree.bifurcating(name_unnamed=True), aligned=False)
    if ests_from_pairwise and not param_vals:
        # we use the Median to avoid the influence of outlier pairs
        param_vals = {}
        for param in est_params:
            numbers = dcalc.getParamValues(param)
            print("Param Estimate Summary Stats: %s" % param)
            print(numbers.summarize())
            param_vals[param] = numbers.Median
    
    ui.display("Doing %s alignment" % ["progressive", "pairwise"][two_seqs])
    with LF.updatesPostponed():
        for param, val in list(param_vals.items()):
            LF.setParamRule(param, value=val, is_constant=True)
        LF.setParamRule('indel_rate', value=indel_rate, is_constant=True)
        LF.setParamRule('indel_length', value=indel_length, is_constant=True)
        LF.setSequences(seqs)
    edge = LF.getLogLikelihood().edge
    align = edge.getViterbiPath().getAlignment()
    info = Info()
    info["AlignParams"] = param_vals
    info["AlignParams"].update(dict(indel_length=indel_length, indel_rate=indel_rate))
    align.Info = info
    return align, tree