def _get_all_composables(tmp_dir_name): test_model1 = evo.model("HKY85") test_model2 = evo.model("GN") test_hyp = evo.hypothesis(test_model1, test_model2) test_num_reps = 100 applications = [ align.align_to_ref(), align.progressive_align(model="GY94"), evo.ancestral_states(), evo.bootstrap(hyp=test_hyp, num_reps=test_num_reps), evo.hypothesis(test_model1, test_model2), evo.model("GN"), evo.tabulate_stats(), sample.fixed_length(100), sample.min_length(100), io.write_db(tmp_dir_name, create=True), io.write_json(tmp_dir_name, create=True), io.write_seqs(tmp_dir_name, create=True), sample.omit_bad_seqs(), sample.omit_degenerates(), sample.omit_duplicated(), sample.take_codon_positions(1), sample.take_named_seqs(), sample.trim_stop_codons(gc=1), translate.select_translatable(), tree.quick_tree(), tree.scale_branches(), tree.uniformize_tree(), ] return applications
def test_write_db(self): """writing with overwrite in MPI should reset db""" dstore = io_app.get_data_store("data", suffix="fasta") members = dstore.filtered( callback=lambda x: "brca1.fasta" not in x.split("/")) reader = io_app.load_unaligned() aligner = align_app.align_to_ref() writer = write_db("delme.tinydb", create=True, if_exists="overwrite") process = reader + aligner + writer r = process.apply_to( members, logger=False, show_progress=False, parallel=True, par_kw=dict(use_mpi=True), ) expect = [str(m) for m in process.data_store] process.data_store.close() # now get read only and check what's in there result = io_app.get_data_store("delme.tinydb") got = [str(m) for m in result] assert got == expect
def test_align_to_ref_generic_moltype(self): """tests when the moltype is generic""" test_moltypes = ["text", "rna", "protein", "protein_with_stop", "bytes", "ab"] for test_moltype in test_moltypes: aligner = align_app.align_to_ref(moltype=test_moltype) self.assertEqual(aligner._moltype.label, test_moltype) self.assertEqual( aligner._kwargs["S"], make_generic_scoring_dict(10, get_moltype(test_moltype)), )
def test_align_to_ref(self): """correctly aligns to a reference""" aligner = align_app.align_to_ref(ref_seq="Human") aln = aligner(self.seqs) expect = { "Bandicoot": "---NACTCATTAATGCTTGAAACCAGCAGTTTATTGTCCAAC", "FlyingFox": "GCCAGCTCTTTACAGCATGAGAACAG---TTTATTATACACT", "Human": "GCCAGCTCATTACAGCATGAGAACAGCAGTTTATTACTCACT", "Rhesus": "GCCAGCTCATTACAGCATGAGAAC---AGTTTGTTACTCACT", } self.assertEqual(aln.to_dict(), expect)
def _get_all_composable_apps(): applications = [ align.align_to_ref(), align.progressive_align(model="GY94"), sample.fixed_length(100), sample.min_length(100), io.write_seqs(os.getcwd()), sample.omit_bad_seqs(), sample.omit_degenerates(), sample.take_codon_positions(1), sample.take_named_seqs(), sample.trim_stop_codons(gc=1), ] return applications
def test_aln_to_ref_known(self): """correctly recapitulates known case""" orig = make_aligned_seqs( { "Ref": "CAG---GAGAACAGAAACCCAT--TACTCACT", "Qu1": "CAG---GAGAACAG---CCCGTGTTACTCACT", "Qu2": "CAGCATGAGAACAGAAACCCGT--TA---ACT", "Qu3": "CAGCATGAGAACAGAAACCCGT----CTCACT", "Qu4": "CAGCATGAGAACAGAAACCCGTGTTACTCACT", "Qu5": "CAG---GAGAACAG---CCCAT--TACTCACT", "Qu6": "CAG---GA-AACAG---CCCAT--TACTCACT", "Qu7": "CAG---GA--ACAGA--CCCGT--TA---ACT", }, moltype="dna", ) expect = orig.to_dict() aligner = align_app.align_to_ref(ref_seq="Ref") aln = aligner(orig.degap()) self.assertEqual(aln.to_dict(), expect)
def test_align_to_ref_result_has_moltype(self): """aligned object has correct moltype""" aligner = align_app.align_to_ref(moltype="dna") got = aligner(self.seqs) self.assertEqual(got.moltype.label, "dna")
def test_est_dist_pair_slow(self): """tests the distance between seq pairs in aln""" aligner = align.align_to_ref() aln3 = aligner(self.seqs3) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR") got = fast_slow_dist(aln3).to_dict() assert_allclose(got[("Human", "Mouse")], got[("Mouse", "Human")]) self.assertTrue(0 <= got[("Mouse", "Human")]) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93") got = fast_slow_dist(aln3).to_dict() assert_allclose(got[("Human", "Mouse")], got[("Mouse", "Human")]) self.assertTrue(0 <= got[("Mouse", "Human")]) aligner = align.align_to_ref(ref_seq="Human") aln3 = aligner(self.seqs3) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR") got = fast_slow_dist(aln3).to_dict() assert_allclose(got[("Human", "Mouse")], got[("Mouse", "Human")]) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93") got = fast_slow_dist(aln3).to_dict() assert_allclose(got[("Human", "Mouse")], got[("Mouse", "Human")]) self.assertTrue(0 <= got[("Mouse", "Human")]) aligner = align.align_to_ref(ref_seq="Mouse") aln3 = aligner(self.seqs3) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR") got = fast_slow_dist(aln3).to_dict() self.assertTrue(0 <= got[("Mouse", "Human")]) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93") got = fast_slow_dist(aln3).to_dict() self.assertTrue(0 <= got[("Mouse", "Human")]) aligner = align.align_to_ref() aln3 = aligner(self.seqs4) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR") got = fast_slow_dist(aln3).to_dict() self.assertTrue(0 <= got[("Human", "Opossum")]) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93") got = fast_slow_dist(aln3).to_dict() self.assertTrue(0 <= got[("Human", "Opossum")]) aligner = align.align_to_ref(ref_seq="Human") aln3 = aligner(self.seqs4) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR") got = fast_slow_dist(aln3).to_dict() self.assertTrue(0 <= got[("Human", "Opossum")]) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93") got = fast_slow_dist(aln3).to_dict() self.assertTrue(0 <= got[("Human", "Opossum")]) aligner = align.align_to_ref(ref_seq="Opossum") aln3 = aligner(self.seqs4) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR") got = fast_slow_dist(aln3).to_dict() self.assertTrue(0 <= got[("Human", "Opossum")]) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93") got = fast_slow_dist(aln3).to_dict() self.assertTrue(0 <= got[("Human", "Opossum")]) treestring = "(Human:0.2,Bandicoot:0.2)" aligner = align.progressive_align(model="WG01", guide_tree=treestring) _ = aligner(self.seqs5)
def test_est_dist_pair_slow(self): """tests the distance between seq pairs in aln""" aligner = align.align_to_ref() aln3 = aligner(self.seqs3) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR") got = fast_slow_dist(aln3).to_dict() assert_allclose(got[("Human", "Mouse")], got[("Mouse", "Human")]) self.assertTrue(got[("Mouse", "Human")] >= 0) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93") got = fast_slow_dist(aln3).to_dict() assert_allclose(got[("Human", "Mouse")], got[("Mouse", "Human")]) self.assertTrue(got[("Mouse", "Human")] >= 0) aligner = align.align_to_ref(ref_seq="Human") aln3 = aligner(self.seqs3) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR") got = fast_slow_dist(aln3).to_dict() assert_allclose(got[("Human", "Mouse")], got[("Mouse", "Human")]) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93") got = fast_slow_dist(aln3).to_dict() assert_allclose(got[("Human", "Mouse")], got[("Mouse", "Human")]) self.assertTrue(got[("Mouse", "Human")] >= 0) aligner = align.align_to_ref(ref_seq="Mouse") aln3 = aligner(self.seqs3) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR") got = fast_slow_dist(aln3).to_dict() self.assertTrue(got[("Mouse", "Human")] >= 0) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93") got = fast_slow_dist(aln3).to_dict() self.assertTrue(got[("Mouse", "Human")] >= 0) aligner = align.align_to_ref() aln3 = aligner(self.seqs4) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR") got = fast_slow_dist(aln3).to_dict() self.assertTrue(got[("Human", "Opossum")] >= 0) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93") got = fast_slow_dist(aln3).to_dict() self.assertTrue(got[("Human", "Opossum")] >= 0) aligner = align.align_to_ref(ref_seq="Human") aln3 = aligner(self.seqs4) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR") got = fast_slow_dist(aln3).to_dict() self.assertTrue(got[("Human", "Opossum")] >= 0) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93") got = fast_slow_dist(aln3).to_dict() self.assertTrue(got[("Human", "Opossum")] >= 0) aligner = align.align_to_ref(ref_seq="Opossum") aln3 = aligner(self.seqs4) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="GTR") got = fast_slow_dist(aln3).to_dict() self.assertTrue(got[("Human", "Opossum")] >= 0) fast_slow_dist = dist_app.fast_slow_dist(slow_calc="TN93") got = fast_slow_dist(aln3).to_dict() self.assertTrue(got[("Human", "Opossum")] >= 0) # now as a process proc = align.align_to_ref() + dist_app.fast_slow_dist( fast_calc="hamming", moltype="dna") got = proc(self.seqs1) self.assertEqual(got[("Human", "Rhesus")], 1) treestring = "(Human:0.2,Bandicoot:0.2)" aligner = align.progressive_align(model="WG01", guide_tree=treestring) _ = aligner(self.seqs5)