def test_fixedlength(self): """correctly returns data with specified length""" aln = make_aligned_seqs(data=[("a", "GCAAGCGTTTAT"), ("b", "GCTTTTGTCAAT")]) fl = sample.fixed_length(4) got = fl(aln) self.assertEqual(len(got), 4) fl = sample.fixed_length(9, moltype="dna") got = fl(aln) self.assertEqual(len(got), 9) self.assertEqual(list(got.moltype), list(DNA)) alns = [ make_aligned_seqs(data=[("a", "GCAAGCGTTTAT"), ("b", "GCTTTTGTCAAT")], moltype=DNA), make_aligned_seqs(data=[("a", "GGAAGCGT"), ("b", "GCTTT-GT")], moltype=DNA), ] fl = sample.fixed_length(9) got = [a for a in map(fl, alns) if a] self.assertEqual(len(got[0]), 9) expected = dict((("a", "GCAAGCGTT"), ("b", "GCTTTTGTC"))) self.assertEqual(got[0].to_dict(), expected) fl = sample.fixed_length(600) got = [a for a in map(fl, alns) if a] expected = [] self.assertEqual(got, expected) # returns NotCompletedResult if nothing satisifies got = fl(alns[0]) self.assertTrue(type(got) == sample.NotCompleted) fl = sample.fixed_length(9, random=True) got = fl(aln) self.assertEqual(len(got), 9) self.assertEqual(set(aln.names), set("ab")) # these will be just a subset as sampling one triplet fl = sample.fixed_length(3, random=True, motif_length=3) d = make_aligned_seqs(data=[("a", "GCAAGCGTGTAT"), ("b", "GCTACTGTCAAT")]) expect = d.to_dict() got = fl(d) self.assertEqual(len(got), 3) for name, seq in got.to_dict().items(): self.assertIn(seq, expect[name]) fl = sample.fixed_length(9, start=2) got = fl(aln) self.assertEqual(len(got), 9) self.assertEqual(got.to_dict(), aln[2:11].to_dict()) fl = sample.fixed_length(4, start="random") expect = aln.to_dict() got = fl(aln) self.assertEqual(len(got), 4) for name, seq in got.to_dict().items(): self.assertIn(seq, expect[name])
def _get_all_composables(tmp_dir_name): test_model1 = evo.model("HKY85") test_model2 = evo.model("GN") test_hyp = evo.hypothesis(test_model1, test_model2) test_num_reps = 100 applications = [ align.align_to_ref(), align.progressive_align(model="GY94"), evo.ancestral_states(), evo.bootstrap(hyp=test_hyp, num_reps=test_num_reps), evo.hypothesis(test_model1, test_model2), evo.model("GN"), evo.tabulate_stats(), sample.fixed_length(100), sample.min_length(100), io.write_db(tmp_dir_name, create=True), io.write_json(tmp_dir_name, create=True), io.write_seqs(tmp_dir_name, create=True), sample.omit_bad_seqs(), sample.omit_degenerates(), sample.omit_duplicated(), sample.take_codon_positions(1), sample.take_named_seqs(), sample.trim_stop_codons(gc=1), translate.select_translatable(), tree.quick_tree(), tree.scale_branches(), tree.uniformize_tree(), ] return applications
def _get_all_composable_apps(): applications = [ align.align_to_ref(), align.progressive_align(model="GY94"), sample.fixed_length(100), sample.min_length(100), io.write_seqs(os.getcwd()), sample.omit_bad_seqs(), sample.omit_degenerates(), sample.take_codon_positions(1), sample.take_named_seqs(), sample.trim_stop_codons(gc=1), ] return applications
def test_composite_pickleable(self): """composable functions should be pickleable""" from pickle import dumps from cogent3.app import io, sample, evo, tree, translate, align read = io.load_aligned(moltype="dna") dumps(read) trans = translate.select_translatable() dumps(trans) aln = align.progressive_align("nucleotide") dumps(aln) just_nucs = sample.omit_degenerates(moltype="dna") dumps(just_nucs) limit = sample.fixed_length(1000, random=True) dumps(limit) mod = evo.model("HKY85") dumps(mod) qt = tree.quick_tree() dumps(qt) proc = read + trans + aln + just_nucs + limit + mod dumps(proc)