示例#1
0
 def test_progress_with_guide_tree(self):
     """progressive align works with provided guide tree"""
     tree = make_tree(treestring=self.treestring)
     aligner = align_app.progressive_align(model="nucleotide",
                                           guide_tree=self.treestring)
     aln = aligner(self.seqs)
     self.assertEqual(len(aln), 42)
     aligner = align_app.progressive_align(model="nucleotide",
                                           guide_tree=tree)
     aln = aligner(self.seqs)
     self.assertEqual(len(aln), 42)
     # even if it has underscores in name
     treestring = ("(Bandicoot:0.4,FlyingFox:0.05,(Rhesus_macaque:0.06,"
                   "Human:0.0):0.04);")
     aligner = align_app.progressive_align(model="nucleotide",
                                           guide_tree=treestring)
     data = self.seqs.to_dict()
     data["Rhesus macaque"] = data.pop("Rhesus")
     seqs = make_unaligned_seqs(data)
     aln = aligner(seqs)
     self.assertEqual(len(aln), 42)
     # guide tree with no lengths raises value error
     with self.assertRaises(ValueError):
         _ = align_app.progressive_align(
             model="nucleotide",
             guide_tree="(Bandicoot,FlyingFox,(Rhesus_macaque,Human));",
         )
示例#2
0
 def test_progressive_align_codon(self):
     """progressive alignment with codon models"""
     aligner = align_app.progressive_align(model="GY94")
     aln = aligner(self.seqs)
     self.assertEqual(len(aln), 42)
     aligner = align_app.progressive_align(model="codon")
     aln = aligner(self.seqs)
     self.assertEqual(len(aln), 42)
示例#3
0
 def test_progressive_align_protein(self):
     """progressive alignment with protein models"""
     seqs = self.seqs.get_translation()
     aligner = align_app.progressive_align(model="WG01",
                                           guide_tree=self.treestring)
     aln = aligner(seqs)
     self.assertEqual(len(aln), 14)
     aligner = align_app.progressive_align(model="protein",
                                           guide_tree=self.treestring)
     aln = aligner(seqs)
     self.assertEqual(len(aln), 14)
示例#4
0
 def test_with_genetic_code(self):
     """handles genetic code argument"""
     aligner = align_app.progressive_align(model="GY94", gc="2")
     # the 'TGA' codon is a sense codon in vertebrate mitochondrial
     self.assertTrue("TGA" in aligner._model.get_motifs())
     aligner = align_app.progressive_align(model="codon")
     # but a stop codon in the standard nuclear
     self.assertTrue("TGA" not in aligner._model.get_motifs())
     # try using a nuclear
     with self.assertRaises(TypeError):
         aligner = align_app.progressive_align(model="nucleotide", gc="2")
示例#5
0
 def test_progress_with_guide_tree(self):
     """progressive align works with provided guide tree"""
     tree = make_tree(treestring=self.treestring)
     aligner = align_app.progressive_align(
         model="nucleotide", guide_tree=self.treestring
     )
     aln = aligner(self.seqs)
     self.assertEqual(len(aln), 42)
     aligner = align_app.progressive_align(model="nucleotide", guide_tree=tree)
     aln = aligner(self.seqs)
     self.assertEqual(len(aln), 42)
示例#6
0
    def test_progressive_align_protein_moltype(self):
        """tests guide_tree is None and moltype is protein"""
        from cogent3 import load_aligned_seqs

        seqs = load_aligned_seqs("data/nexus_aa.nxs", moltype="protein")
        seqs = seqs.degap()
        seqs = seqs.take_seqs(["Rat", "Cow", "Human", "Mouse", "Whale"])
        aligner = align_app.progressive_align(model="WG01")
        got = aligner(seqs)
        self.assertNotIsInstance(got, NotCompleted)
        aligner = align_app.progressive_align(model="protein")
        got = aligner(seqs)
        self.assertNotIsInstance(got, NotCompleted)
示例#7
0
def _get_all_composables(tmp_dir_name):
    test_model1 = evo.model("HKY85")
    test_model2 = evo.model("GN")
    test_hyp = evo.hypothesis(test_model1, test_model2)
    test_num_reps = 100

    applications = [
        align.align_to_ref(),
        align.progressive_align(model="GY94"),
        evo.ancestral_states(),
        evo.bootstrap(hyp=test_hyp, num_reps=test_num_reps),
        evo.hypothesis(test_model1, test_model2),
        evo.model("GN"),
        evo.tabulate_stats(),
        sample.fixed_length(100),
        sample.min_length(100),
        io.write_db(tmp_dir_name, create=True),
        io.write_json(tmp_dir_name, create=True),
        io.write_seqs(tmp_dir_name, create=True),
        sample.omit_bad_seqs(),
        sample.omit_degenerates(),
        sample.omit_duplicated(),
        sample.take_codon_positions(1),
        sample.take_named_seqs(),
        sample.trim_stop_codons(gc=1),
        translate.select_translatable(),
        tree.quick_tree(),
        tree.scale_branches(),
        tree.uniformize_tree(),
    ]
    return applications
示例#8
0
    def test_pickle_progressive_align(self):
        """test progressive_align is picklable"""
        from pickle import dumps, loads

        aligner = align_app.progressive_align(model="codon")
        aln = aligner(self.seqs)
        got = loads(dumps(aln))
        self.assertTrue(got)
示例#9
0
    def test_progressive_align_nuc(self):
        """progressive alignment with nuc models"""
        aligner = align_app.progressive_align(model="TN93", distance="TN93")
        aln = aligner(self.seqs)
        expect = {
            "Rhesus": "GCCAGCTCATTACAGCATGAGAACAG---TTTGTTACTCACT",
            "Human": "GCCAGCTCATTACAGCATGAGAACAGCAGTTTATTACTCACT",
            "Bandicoot": "NACTCATTAATGCTTGAAACCAGCAG---TTTATTGTCCAAC",
            "FlyingFox": "GCCAGCTCTTTACAGCATGAGAACAG---TTTATTATACACT",
        }
        got = aln.to_dict()
        self.assertEqual(got, expect)

        # using default
        aligner = align_app.progressive_align(model="TN93", distance="TN93")
        aln = aligner(self.seqs)
        self.assertEqual(len(aln), 42)
        self.assertEqual(aln.moltype, aligner._moltype)
示例#10
0
 def test_progressive_fails(self):
     """should return NotCompletedResult along with message"""
     # Bandicoot has an inf-frame stop codon
     seqs = make_unaligned_seqs(
         data={"Human": "GCCTCA", "Rhesus": "GCCAGCTCA", "Bandicoot": "TGATCATTA"},
         moltype="dna",
     )
     aligner = align_app.progressive_align(model="codon")
     got = aligner(seqs)
     self.assertTrue(type(got), NotCompleted)
示例#11
0
def _get_all_composable_apps():
    applications = [
        align.align_to_ref(),
        align.progressive_align(model="GY94"),
        sample.fixed_length(100),
        sample.min_length(100),
        io.write_seqs(os.getcwd()),
        sample.omit_bad_seqs(),
        sample.omit_degenerates(),
        sample.take_codon_positions(1),
        sample.take_named_seqs(),
        sample.trim_stop_codons(gc=1),
    ]
    return applications
示例#12
0
    def test_composite_pickleable(self):
        """composable functions should be pickleable"""
        from pickle import dumps
        from cogent3.app import io, sample, evo, tree, translate, align

        read = io.load_aligned(moltype="dna")
        dumps(read)
        trans = translate.select_translatable()
        dumps(trans)
        aln = align.progressive_align("nucleotide")
        dumps(aln)
        just_nucs = sample.omit_degenerates(moltype="dna")
        dumps(just_nucs)
        limit = sample.fixed_length(1000, random=True)
        dumps(limit)
        mod = evo.model("HKY85")
        dumps(mod)
        qt = tree.quick_tree()
        dumps(qt)
        proc = read + trans + aln + just_nucs + limit + mod
        dumps(proc)
示例#13
0
    def test_est_dist_pair_slow(self):
        """tests the distance between seq pairs in aln"""

        aligner = align.align_to_ref()
        aln3 = aligner(self.seqs3)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR")
        got = fast_slow_dist(aln3).to_dict()
        assert_allclose(got[("Human", "Mouse")], got[("Mouse", "Human")])
        self.assertTrue(0 <= got[("Mouse", "Human")])
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93")
        got = fast_slow_dist(aln3).to_dict()
        assert_allclose(got[("Human", "Mouse")], got[("Mouse", "Human")])
        self.assertTrue(0 <= got[("Mouse", "Human")])

        aligner = align.align_to_ref(ref_seq="Human")
        aln3 = aligner(self.seqs3)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR")
        got = fast_slow_dist(aln3).to_dict()
        assert_allclose(got[("Human", "Mouse")], got[("Mouse", "Human")])
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93")
        got = fast_slow_dist(aln3).to_dict()
        assert_allclose(got[("Human", "Mouse")], got[("Mouse", "Human")])
        self.assertTrue(0 <= got[("Mouse", "Human")])

        aligner = align.align_to_ref(ref_seq="Mouse")
        aln3 = aligner(self.seqs3)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR")
        got = fast_slow_dist(aln3).to_dict()
        self.assertTrue(0 <= got[("Mouse", "Human")])
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93")
        got = fast_slow_dist(aln3).to_dict()
        self.assertTrue(0 <= got[("Mouse", "Human")])

        aligner = align.align_to_ref()
        aln3 = aligner(self.seqs4)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR")
        got = fast_slow_dist(aln3).to_dict()
        self.assertTrue(0 <= got[("Human", "Opossum")])
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93")
        got = fast_slow_dist(aln3).to_dict()
        self.assertTrue(0 <= got[("Human", "Opossum")])

        aligner = align.align_to_ref(ref_seq="Human")
        aln3 = aligner(self.seqs4)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR")
        got = fast_slow_dist(aln3).to_dict()
        self.assertTrue(0 <= got[("Human", "Opossum")])
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93")
        got = fast_slow_dist(aln3).to_dict()
        self.assertTrue(0 <= got[("Human", "Opossum")])

        aligner = align.align_to_ref(ref_seq="Opossum")
        aln3 = aligner(self.seqs4)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR")
        got = fast_slow_dist(aln3).to_dict()
        self.assertTrue(0 <= got[("Human", "Opossum")])
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93")
        got = fast_slow_dist(aln3).to_dict()
        self.assertTrue(0 <= got[("Human", "Opossum")])

        treestring = "(Human:0.2,Bandicoot:0.2)"
        aligner = align.progressive_align(model="WG01", guide_tree=treestring)
        _ = aligner(self.seqs5)
示例#14
0
    def test_est_dist_pair_slow(self):
        """tests the distance between seq pairs in aln"""

        aligner = align.align_to_ref()
        aln3 = aligner(self.seqs3)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR")
        got = fast_slow_dist(aln3).to_dict()
        assert_allclose(got[("Human", "Mouse")], got[("Mouse", "Human")])
        self.assertTrue(got[("Mouse", "Human")] >= 0)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93")
        got = fast_slow_dist(aln3).to_dict()
        assert_allclose(got[("Human", "Mouse")], got[("Mouse", "Human")])
        self.assertTrue(got[("Mouse", "Human")] >= 0)

        aligner = align.align_to_ref(ref_seq="Human")
        aln3 = aligner(self.seqs3)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR")
        got = fast_slow_dist(aln3).to_dict()
        assert_allclose(got[("Human", "Mouse")], got[("Mouse", "Human")])
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93")
        got = fast_slow_dist(aln3).to_dict()
        assert_allclose(got[("Human", "Mouse")], got[("Mouse", "Human")])
        self.assertTrue(got[("Mouse", "Human")] >= 0)

        aligner = align.align_to_ref(ref_seq="Mouse")
        aln3 = aligner(self.seqs3)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR")
        got = fast_slow_dist(aln3).to_dict()
        self.assertTrue(got[("Mouse", "Human")] >= 0)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93")
        got = fast_slow_dist(aln3).to_dict()
        self.assertTrue(got[("Mouse", "Human")] >= 0)

        aligner = align.align_to_ref()
        aln3 = aligner(self.seqs4)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR")
        got = fast_slow_dist(aln3).to_dict()
        self.assertTrue(got[("Human", "Opossum")] >= 0)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93")
        got = fast_slow_dist(aln3).to_dict()
        self.assertTrue(got[("Human", "Opossum")] >= 0)

        aligner = align.align_to_ref(ref_seq="Human")
        aln3 = aligner(self.seqs4)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR")
        got = fast_slow_dist(aln3).to_dict()
        self.assertTrue(got[("Human", "Opossum")] >= 0)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93")
        got = fast_slow_dist(aln3).to_dict()
        self.assertTrue(got[("Human", "Opossum")] >= 0)

        aligner = align.align_to_ref(ref_seq="Opossum")
        aln3 = aligner(self.seqs4)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR")
        got = fast_slow_dist(aln3).to_dict()
        self.assertTrue(got[("Human", "Opossum")] >= 0)
        fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93")
        got = fast_slow_dist(aln3).to_dict()
        self.assertTrue(got[("Human", "Opossum")] >= 0)

        # now as a process
        proc = align.align_to_ref() + dist_app.fast_slow_dist(
            fast_calc="hamming", moltype="dna")
        got = proc(self.seqs1)
        self.assertEqual(got[("Human", "Rhesus")], 1)

        treestring = "(Human:0.2,Bandicoot:0.2)"
        aligner = align.progressive_align(model="WG01", guide_tree=treestring)
        _ = aligner(self.seqs5)