def _gen_sym_preds(): pair = {"A": "T", "T": "A", "G": "C", "C": "G"} sym_preds = [] for f, t in "AG", "AT", "CG", "CT", "GT": sym_preds.append( MotifChange(f, t, forward_only=True) | MotifChange(pair[f], pair[t], forward_only=True)) return sym_preds
def test_nr_trinuc(self): """This is exercising a TimeReversibleTriucleotide""" preds = [ MotifChange("A", "C"), MotifChange("G", "A"), MotifChange("CGA", "TGA"), ] sm = substitution_model.TimeReversibleTrinucleotide(predicates=preds) got = sm.get_param_list() self.assertEqual(got, ["A/C", "G/A", "CGA/TGA"]) self.assertEqual(len(sm.get_motifs()), 64)
def test_parse(self): """correctly construction""" ag = MotifChange("A", "G") got = parse(str(ag)) self.assertEqual(str(got), "A/G") ts = MotifChange("A", "G") | MotifChange("C", "T") got = parse(str(ts)) self.assertEqual(str(got), "(A/G | C/T)") a_g = MotifChange("A", "G", forward_only=True) t_c = MotifChange("T", "C", forward_only=True) sym = a_g | t_c got = parse(str(sym)) self.assertEqual(str(got), "(A>G | T>C)")
def test_getting_node_mprobs(self): """return correct motif probability vector for tree nodes""" tree = make_tree(treestring="(a:.2,b:.2,(c:.1,d:.1):.1)") aln = make_aligned_seqs(data={ "a": "TGTG", "b": "TGTG", "c": "TGTG", "d": "TGTG" }) motifs = ["T", "C", "A", "G"] aX = MotifChange(motifs[0], motifs[3], forward_only=True).aliased("aX") bX = MotifChange(motifs[3], motifs[0], forward_only=True).aliased("bX") edX = MotifChange(motifs[1], motifs[2], forward_only=True).aliased("edX") cX = MotifChange(motifs[2], motifs[1], forward_only=True).aliased("cX") sm = NonReversibleNucleotide(predicates=[aX, bX, edX, cX], equal_motif_probs=True) lf = sm.make_likelihood_function(tree) lf.set_param_rule("aX", edge="a", value=8.0) lf.set_param_rule("bX", edge="b", value=8.0) lf.set_param_rule("edX", edge="edge.0", value=2.0) lf.set_param_rule("cX", edge="c", value=0.5) lf.set_param_rule("edX", edge="d", value=4.0) lf.set_alignment(aln) # we construct the hand calc variants mprobs = ones(4, float) * 0.25 a = make_p(0.2, (0, 3), 8) a = dot(mprobs, a) b = make_p(0.2, (3, 0), 8) b = dot(mprobs, b) e = make_p(0.1, (1, 2), 2) e = dot(mprobs, e) c = make_p(0.1, (2, 1), 0.5) c = dot(e, c) d = make_p(0.1, (1, 2), 4) d = dot(e, d) prob_vectors = lf.get_motif_probs_by_node() self.assertFloatEqual(prob_vectors["a"].array, a) self.assertFloatEqual(prob_vectors["b"].array, b) self.assertFloatEqual(prob_vectors["c"].array, c) self.assertFloatEqual(prob_vectors["d"].array, d) self.assertFloatEqual(prob_vectors["edge.0"].array, e)
def __init__(self, alphabet, **kw): Stationary.__init__(self, alphabet, **kw) alphabet = self.get_alphabet() # as may be altered by recode_gaps etc. mask = self._instantaneous_mask N = len(alphabet) param_pick = numpy.zeros([N, N], int) predicates = [] last_in_column = [] for d, (row, col) in enumerate(zip(mask, mask.T)): row = list(numpy.flatnonzero(row[d:]) + d) col = list(numpy.flatnonzero(col[d:]) + d) if col: last_in_column.append((col.pop(), d)) else: assert not row inst = [(d, j) for j in row] + [(i, d) for i in col] for (i, j) in inst: (x, y) = [alphabet[k] for k in [i, j]] predicates.append(MotifChange(x, y, forward_only=True)) param_pick[i, j] = len(predicates) self.param_pick = param_pick self.last_in_column = last_in_column predicate_masks, predicate_order = self._adapt_predicates(predicates) self.predicate_masks = predicate_masks self.parameter_order = [] self.predicate_indices = [] for pred in predicate_order: mask = predicate_masks[pred] indices = numpy.nonzero(mask) assert numpy.alltrue(mask[indices] == 1) self.parameter_order.append(pred) self.predicate_indices.append(indices) self.symmetric = False self.check_params_exist()
def a_c(x, y): return (x == "A" and y == "C") or (x == "C" and y == "A") __author__ = "Peter Maxwell and Gavin Huttley" __copyright__ = "Copyright 2007-2021, The Cogent Project" __credits__ = ["Peter Maxwell", "Gavin Huttley"] __license__ = "BSD-3" __version__ = "2021.04.20a" __maintainer__ = "Gavin Huttley" __email__ = "*****@*****.**" __status__ = "Production" a_c = MotifChange("A", "C") trans = MotifChange("A", "G") | MotifChange("T", "C") TREE = make_tree(tip_names="ab") class ScaleRuleTests(unittest.TestCase): def _makeModel(self, predicates, scale_rules=None): scale_rules = scale_rules or [] return substitution_model.TimeReversibleNucleotide( equal_motif_probs=True, model_gaps=False, predicates=predicates, scales=scale_rules, )
Q = self.calcQ(pi, pi, *params) P1 = FastExponentiator(Q)(0.5) P2 = self.calc_psub_matrix(pi, 0.5, *params) assert_allclose(P1, P2) def _solved_nucleotide(name, predicates, rate_matrix_required=True, **kw): if _solved_models is not None and not rate_matrix_required: klass = PredefinedNucleotide else: klass = TimeReversibleNucleotide kw["model_gaps"] = False return klass(name=name, predicates=predicates, **kw) kappa_y = MotifChange("T", "C").aliased("kappa_y") kappa_r = MotifChange("A", "G").aliased("kappa_r") kappa = (kappa_y | kappa_r).aliased("kappa") def TN93(**kw): """Tamura and Nei 1993 model""" kw["recode_gaps"] = True return _solved_nucleotide("TN93", [kappa_y, kappa_r], **kw) def HKY85(**kw): """Hasegawa, Kishino and Yanamo 1985 model""" kw["recode_gaps"] = True return _solved_nucleotide("HKY85", [kappa], **kw)
def _makeMotifChange(self, *args, **kw): pred = MotifChange(*args, **kw) return pred.interpret(self.model)