class AlignmentVariationFilters(unittest.TestCase): def setUp(self): if not os.path.exists(temp_dir): os.makedirs(temp_dir) self.aln_obj = AlignmentList([], sql_db=sql_db) def tearDown(self): self.aln_obj.clear_alignments() self.aln_obj.con.close() shutil.rmtree(temp_dir) def test_variation_filter_min(self): self.aln_obj.add_alignment_files(variable_data) self.aln_obj.filter_segregating_sites(None, None) self.assertEqual(len(self.aln_obj.alignments), 3) def test_variation_var_sites(self): self.aln_obj.add_alignment_files(variable_data) self.aln_obj.filter_segregating_sites(1, 2) self.assertEqual(len(self.aln_obj.alignments), 0) def test_variation_var_sites2(self): self.aln_obj.add_alignment_files(variable_data) self.aln_obj.filter_segregating_sites(1, 3) self.assertEqual(len(self.aln_obj.alignments), 1) def test_variation_inf_min(self): self.aln_obj.add_alignment_files(variable_data) self.aln_obj.filter_informative_sites(None, None) self.assertEqual(len(self.aln_obj.alignments), 3) def test_variation_inf_sites(self): self.aln_obj.add_alignment_files(variable_data) self.aln_obj.filter_informative_sites(1, 4) self.assertEqual(len(self.aln_obj.alignments), 1) def test_variation_inf_sites2(self): self.aln_obj.add_alignment_files(variable_data) self.aln_obj.filter_informative_sites(1, 1) print(self.aln_obj.alignments) self.assertEqual(len(self.aln_obj.alignments), 1)
class AlignmentCodonFilters(unittest.TestCase): def setUp(self): if not os.path.exists(temp_dir): os.makedirs(temp_dir) self.aln_obj = AlignmentList([], sql_db=sql_db) def tearDown(self): self.aln_obj.clear_alignments() self.aln_obj.con.close() shutil.rmtree(temp_dir) def test_codon_filter_pos1(self): self.aln_obj.add_alignment_files(codon_filter) self.aln_obj.filter_codon_positions([True, False, False], table_out="master_out") s = [] for _, seq, _ in self.aln_obj.iter_alignments("master_out"): s.append(seq) self.assertEqual(s, ["a" * 16] * 10) def test_codon_filter_pos2(self): self.aln_obj.add_alignment_files(codon_filter) self.aln_obj.filter_codon_positions([False, True, False], table_out="master_out") s = [] for _, seq, _ in self.aln_obj.iter_alignments("master_out"): s.append(seq) self.assertEqual(s, ["t" * 16] * 10) def test_codon_filter_pos3(self): self.aln_obj.add_alignment_files(codon_filter) self.aln_obj.filter_codon_positions([False, False, True], table_out="master_out") s = [] for _, seq, _ in self.aln_obj.iter_alignments("master_out"): s.append(seq) self.assertEqual(s, ["g" * 16] * 10) def test_codon_filter_pos12(self): self.aln_obj.add_alignment_files(codon_filter) self.aln_obj.filter_codon_positions([True, True, False], table_out="master_out") s = [] for _, seq, _ in self.aln_obj.iter_alignments("master_out"): s.append(seq) self.assertEqual(s, ["at" * 16] * 10) def test_codon_filter_pos13(self): self.aln_obj.add_alignment_files(codon_filter) self.aln_obj.filter_codon_positions([True, False, True], table_out="master_out") s = [] for _, seq, _ in self.aln_obj.iter_alignments("master_out"): s.append(seq) self.assertEqual(s, ["ag" * 16] * 10) def test_codon_filter_all(self): self.aln_obj.add_alignment_files(codon_filter) self.aln_obj.filter_codon_positions([True, True, True]) s = [] for _, seq, _ in self.aln_obj.iter_alignments(): s.append(seq) self.assertEqual(s, ["atg" * 16] * 10)
class AlignmentMissingFiltersTest(unittest.TestCase): def setUp(self): if not os.path.exists(temp_dir): os.makedirs(temp_dir) self.aln_obj = AlignmentList([], sql_db=sql_db) def tearDown(self): self.aln_obj.clear_alignments() self.aln_obj.con.close() shutil.rmtree(temp_dir) def test_filter_default(self): self.aln_obj.add_alignment_files([ "trifusion/tests/data/missing_data.phy", "trifusion/tests/data/missing_data2.phy" ]) self.aln_obj.filter_missing_data(25, 50) s = [] for aln in self.aln_obj: s.append(aln.locus_length) self.assertEqual(s, [42, 43]) def test_filter_and_concat(self): self.aln_obj.add_alignment_files([ "trifusion/tests/data/missing_data.phy", "trifusion/tests/data/missing_data2.phy" ]) self.aln_obj.filter_missing_data(25, 50, table_out="master_out") self.aln_obj.concatenate(table_in="master_out") self.assertEqual(self.aln_obj.size, 85) def test_no_filters(self): self.aln_obj.add_alignment_files([ "trifusion/tests/data/missing_data.phy", "trifusion/tests/data/missing_data2.phy" ]) self.aln_obj.filter_missing_data(100, 100) s = [] for aln in self.aln_obj: s.append(aln.locus_length) self.assertEqual(s, [50, 50]) def test_no_missing(self): self.aln_obj.add_alignment_files([ "trifusion/tests/data/missing_data.phy", "trifusion/tests/data/missing_data2.phy" ]) self.aln_obj.filter_missing_data(0, 0) s = [] for aln in self.aln_obj: s.append(aln.locus_length) self.assertEqual(s, [0, 19]) def test_no_data_aln_default_filters(self): self.aln_obj.add_alignment_files( ["trifusion/tests/data/missing_data3.phy"]) self.aln_obj.filter_missing_data(25, 50) s = None for aln in self.aln_obj: s = aln.locus_length self.assertEqual(s, 0) def test_no_data_aln_no_filters(self): self.aln_obj.add_alignment_files( ["trifusion/tests/data/missing_data3.phy"]) self.aln_obj.filter_missing_data(100, 100) s = None for aln in self.aln_obj: s = aln.locus_length self.assertEqual(s, 50)
class LoadAlignmentsTest(unittest.TestCase): def setUp(self): if not os.path.exists(temp_dir): os.makedirs(temp_dir) self.aln_obj = AlignmentList([], sql_db=sql_db) def tearDown(self): self.aln_obj.clear_alignments() self.aln_obj.con.close() shutil.rmtree(temp_dir) def test_dna_load(self): self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db) self.assertEqual(["DNA"], self.aln_obj.sequence_code) def test_protein_load(self): self.aln_obj = AlignmentList(protein_no_missing, sql_db=sql_db) self.assertEqual(["Protein"], self.aln_obj.sequence_code) def test_mixed_type_load(self): self.aln_obj = AlignmentList(mixed_seq_type, sql_db=sql_db) self.assertEqual(["DNA", "Protein"], sorted(self.aln_obj.sequence_code)) def test_class_instance(self): self.aln_obj = AlignmentList([], sql_db=sql_db) self.assertIsInstance(self.aln_obj.alignments, OrderedDict) def test_load_fas(self): self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db) def test_load_single_fas(self): single_aln = Alignment(dna_data_fas[0], sql_cursor=self.aln_obj.cur) def test_load_phy(self): self.aln_obj = AlignmentList(dna_data_phy, sql_db=sql_db) def test_load_single_phy(self): single_aln = Alignment(dna_data_phy[0], sql_cursor=self.aln_obj.cur) def test_load_single_interleave_phy(self): single_aln = Alignment(phylip_interleave[0], sql_cursor=self.aln_obj.cur) def test_load_nex(self): self.aln_obj = AlignmentList(dna_data_nex, sql_db=sql_db) def test_load_single_nex(self): single_aln = Alignment(dna_data_nex[0], sql_cursor=self.aln_obj.cur) def test_load_interleave_nex(self): single_aln = Alignment(concatenated_interleave_nexus[0], sql_cursor=self.aln_obj.cur) def test_load_stc(self): self.aln_obj = AlignmentList(dna_data_stc, sql_db=sql_db) def test_load_single_stc(self): single_aln = Alignment(dna_data_stc[0], sql_cursor=self.aln_obj.cur, db_idx=self.aln_obj._idx + 1) def test_load_loci(self): self.aln_obj = AlignmentList(dna_data_loci, sql_db=sql_db) def test_load_single_loci(self): single_aln = Alignment(dna_data_loci[0], sql_cursor=self.aln_obj.cur, db_idx=self.aln_obj._idx + 1) def test_load_nexus_par(self): self.aln_obj = AlignmentList(concatenated_medium_nexus, sql_db=sql_db) self.assertTrue(self.aln_obj.partitions.partitions) def test_load_wrong_type(self): self.aln_obj = AlignmentList(bad_file, sql_db=sql_db) self.assertTrue(self.aln_obj.bad_alignments) def test_duplicate_files(self): self.aln_obj = AlignmentList(dna_data_loci + dna_data_loci, sql_db=sql_db) self.assertTrue(self.aln_obj.duplicate_alignments) def test_unequal_length(self): self.aln_obj = AlignmentList(unequal_file, sql_db=sql_db) self.assertTrue(self.aln_obj.non_alignments) def test_bad_file_removal_from_db(self): self.aln_obj = AlignmentList(unequal_file, sql_db=sql_db) self.aln_obj.add_alignment_files(dna_data_fas) s = 0 for tx, _, _ in self.aln_obj.iter_alignments(): if tx == "Seq1": s += 1 self.assertEqual(s, 0) def test_load_no_data(self): self.aln_obj = AlignmentList(no_data, sql_db=sql_db) def test_alternative_missing(self): self.aln_obj = AlignmentList(alternative_missing, sql_db=sql_db) aln = self.aln_obj.alignments.values()[0] self.assertEqual(aln.sequence_code[1], "?") def test_dna_missing_default(self): self.aln_obj = AlignmentList(single_dna, sql_db=sql_db) aln = self.aln_obj.alignments.values()[0] self.assertEqual(aln.sequence_code[1], "n") def test_protein_missing_default(self): self.aln_obj = AlignmentList(protein_no_missing, sql_db=sql_db) aln = self.aln_obj.alignments.values()[0] self.assertEqual(aln.sequence_code[1], "x") def test_dna_missing_eval(self): self.aln_obj = AlignmentList(concatenated_medium_nexus, sql_db=sql_db) aln = self.aln_obj.alignments.values()[0] self.assertEqual(aln.sequence_code[1], "n") def test_protein_missing_eval(self): self.aln_obj = AlignmentList(protein_normal_missing, sql_db=sql_db) aln = self.aln_obj.alignments.values()[0] self.assertEqual(aln.sequence_code[1], "x") def test_non_ascii_taxon_names(self): self.aln_obj = AlignmentList(non_ascii, sql_db=sql_db) non_ascii_tx = [x for x in self.aln_obj.taxa_names if x == '\xc3\xa9!"#$%&/=?\'~\xc2\xba\xc2\xaa^"><'] self.assertEqual(len(non_ascii_tx), 1) def test_non_ascii_iteration(self): self.aln_obj = AlignmentList(non_ascii, sql_db=sql_db) non_ascii_tx = [] for tx, _, _ in self.aln_obj.iter_alignments(): if tx == '\xc3\xa9!"#$%&/=?\'~\xc2\xba\xc2\xaa^"><': non_ascii_tx.append(tx) self.assertEqual(len(non_ascii_tx), 1) def test_non_ascii_get_taxaidx(self): self.aln_obj = AlignmentList(non_ascii, sql_db=sql_db) aln = self.aln_obj.alignments.values()[0] non_ascii_tx = [x for x in aln.taxa_idx if x == '\xc3\xa9!"#$%&/=?\'~\xc2\xba\xc2\xaa^"><'] self.assertEqual(len(non_ascii_tx), 1) def test_non_ascii_iter_columns(self): self.aln_obj = AlignmentList(non_ascii, sql_db=sql_db) tx_list, _, _ = next(self.aln_obj.iter_columns(include_taxa=True)) non_ascii_tx = [x for x in tx_list if x == '\xc3\xa9!"#$%&/=?\'~\xc2\xba\xc2\xaa^"><'] self.assertEqual(len(non_ascii_tx), 1)
class AlignmentTaxaFilters(unittest.TestCase): def setUp(self): if not os.path.exists(temp_dir): os.makedirs(temp_dir) self.aln_obj = AlignmentList([], sql_db=sql_db) def tearDown(self): self.aln_obj.clear_alignments() self.aln_obj.con.close() shutil.rmtree(temp_dir) def test_filter_min_taxa(self): self.aln_obj.add_alignment_files(dna_data_fas) self.aln_obj.filter_min_taxa(50) self.assertEqual(len(self.aln_obj.alignments), 5) def test_filter_min_taxa_max(self): self.aln_obj.add_alignment_files(dna_data_fas) self.aln_obj.filter_min_taxa(100) self.assertEqual(len(self.aln_obj.alignments), 1) def test_filter_min_taxa_min(self): self.aln_obj.add_alignment_files(dna_data_fas) self.aln_obj.filter_min_taxa(0) self.assertEqual(len(self.aln_obj.alignments), 7) def test_filter_by_taxa_include(self): self.aln_obj.add_alignment_files(dna_data_fas) self.aln_obj.filter_by_taxa(["spa", "spb", "spc", "spd"], "Contain") self.assertEqual(len(self.aln_obj.alignments), 2) def test_filter_by_taxa_exclude(self): self.aln_obj.add_alignment_files(dna_data_fas) self.aln_obj.filter_by_taxa(["spa", "spb", "spc", "spd"], "Exclude") self.assertEqual(len(self.aln_obj.alignments), 5) def test_filter_by_taxa_all(self): self.aln_obj.add_alignment_files(dna_data_fas) self.aln_obj.filter_by_taxa(["no_taxa"], "Contain") self.assertEqual(len(self.aln_obj.alignments), 0) def test_filter_by_taxa_from_file(self): self.aln_obj.add_alignment_files(dna_data_fas) self.aln_obj.filter_by_taxa("trifusion/tests/data/filter_taxa.txt", "Contain") self.assertEqual(len(self.aln_obj.alignments), 2)
class AlignmentMissingFiltersTest(unittest.TestCase): def setUp(self): if not os.path.exists(temp_dir): os.makedirs(temp_dir) self.aln_obj = AlignmentList([], sql_db=sql_db) def tearDown(self): self.aln_obj.clear_alignments() self.aln_obj.con.close() shutil.rmtree(temp_dir) def test_filter_default(self): self.aln_obj.add_alignment_files( ["trifusion/tests/data/missing_data.phy", "trifusion/tests/data/missing_data2.phy"] ) self.aln_obj.filter_missing_data(25, 50) s = [] for aln in self.aln_obj: s.append(aln.locus_length) self.assertEqual(s, [42, 43]) def test_filter_and_concat(self): self.aln_obj.add_alignment_files( ["trifusion/tests/data/missing_data.phy", "trifusion/tests/data/missing_data2.phy"] ) self.aln_obj.filter_missing_data(25, 50, table_out="master_out") self.aln_obj.concatenate(table_in="master_out") self.assertEqual(self.aln_obj.size, 85) def test_no_filters(self): self.aln_obj.add_alignment_files( ["trifusion/tests/data/missing_data.phy", "trifusion/tests/data/missing_data2.phy"] ) self.aln_obj.filter_missing_data(100, 100) s = [] for aln in self.aln_obj: s.append(aln.locus_length) self.assertEqual(s, [50, 50]) def test_no_missing(self): self.aln_obj.add_alignment_files( ["trifusion/tests/data/missing_data.phy", "trifusion/tests/data/missing_data2.phy"] ) self.aln_obj.filter_missing_data(0, 0) s = [] for aln in self.aln_obj: s.append(aln.locus_length) self.assertEqual(s, [0, 19]) def test_no_data_aln_default_filters(self): self.aln_obj.add_alignment_files( ["trifusion/tests/data/missing_data3.phy"] ) self.aln_obj.filter_missing_data(25, 50) s = None for aln in self.aln_obj: s = aln.locus_length self.assertEqual(s, 0) def test_no_data_aln_no_filters(self): self.aln_obj.add_alignment_files( ["trifusion/tests/data/missing_data3.phy"] ) self.aln_obj.filter_missing_data(100, 100) s = None for aln in self.aln_obj: s = aln.locus_length self.assertEqual(s, 50)
class AlignmentCodonFilters(unittest.TestCase): def setUp(self): if not os.path.exists(temp_dir): os.makedirs(temp_dir) self.aln_obj = AlignmentList([],sql_db=sql_db) def tearDown(self): self.aln_obj.clear_alignments() self.aln_obj.con.close() shutil.rmtree(temp_dir) def test_codon_filter_pos1(self): self.aln_obj.add_alignment_files(codon_filter) self.aln_obj.filter_codon_positions([True, False, False], table_out="master_out") s = [] for _, seq, _ in self.aln_obj.iter_alignments("master_out"): s.append(seq) self.assertEqual(s, ["a" * 16] * 10) def test_codon_filter_pos2(self): self.aln_obj.add_alignment_files(codon_filter) self.aln_obj.filter_codon_positions([False, True, False], table_out="master_out") s = [] for _, seq, _ in self.aln_obj.iter_alignments("master_out"): s.append(seq) self.assertEqual(s, ["t" * 16] * 10) def test_codon_filter_pos3(self): self.aln_obj.add_alignment_files(codon_filter) self.aln_obj.filter_codon_positions([False, False, True], table_out="master_out") s = [] for _, seq, _ in self.aln_obj.iter_alignments("master_out"): s.append(seq) self.assertEqual(s, ["g" * 16] * 10) def test_codon_filter_pos12(self): self.aln_obj.add_alignment_files(codon_filter) self.aln_obj.filter_codon_positions([True, True, False], table_out="master_out") s = [] for _, seq, _ in self.aln_obj.iter_alignments("master_out"): s.append(seq) self.assertEqual(s, ["at" * 16] * 10) def test_codon_filter_pos13(self): self.aln_obj.add_alignment_files(codon_filter) self.aln_obj.filter_codon_positions([True, False, True], table_out="master_out") s = [] for _, seq, _ in self.aln_obj.iter_alignments("master_out"): s.append(seq) self.assertEqual(s, ["ag" * 16] * 10) def test_codon_filter_all(self): self.aln_obj.add_alignment_files(codon_filter) self.aln_obj.filter_codon_positions([True, True, True]) s = [] for _, seq, _ in self.aln_obj.iter_alignments(): s.append(seq) self.assertEqual(s, ["atg" * 16] * 10)
class LoadAlignmentsTest(unittest.TestCase): def setUp(self): if not os.path.exists(temp_dir): os.makedirs(temp_dir) self.aln_obj = AlignmentList([], sql_db=sql_db) def tearDown(self): self.aln_obj.clear_alignments() self.aln_obj.con.close() shutil.rmtree(temp_dir) def test_dna_load(self): self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db) self.assertEqual(["DNA"], self.aln_obj.sequence_code) def test_protein_load(self): self.aln_obj = AlignmentList(protein_no_missing, sql_db=sql_db) self.assertEqual(["Protein"], self.aln_obj.sequence_code) def test_mixed_type_load(self): self.aln_obj = AlignmentList(mixed_seq_type, sql_db=sql_db) self.assertEqual(["DNA", "Protein"], sorted(self.aln_obj.sequence_code)) def test_class_instance(self): self.aln_obj = AlignmentList([], sql_db=sql_db) self.assertIsInstance(self.aln_obj.alignments, OrderedDict) def test_load_fas(self): self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db) def test_load_single_fas(self): single_aln = Alignment(dna_data_fas[0], sql_cursor=self.aln_obj.cur) def test_load_phy(self): self.aln_obj = AlignmentList(dna_data_phy, sql_db=sql_db) def test_load_single_phy(self): single_aln = Alignment(dna_data_phy[0], sql_cursor=self.aln_obj.cur) def test_load_single_interleave_phy(self): single_aln = Alignment(phylip_interleave[0], sql_cursor=self.aln_obj.cur) def test_load_nex(self): self.aln_obj = AlignmentList(dna_data_nex, sql_db=sql_db) def test_load_single_nex(self): single_aln = Alignment(dna_data_nex[0], sql_cursor=self.aln_obj.cur) def test_load_interleave_nex(self): single_aln = Alignment(concatenated_interleave_nexus[0], sql_cursor=self.aln_obj.cur) def test_load_stc(self): self.aln_obj = AlignmentList(dna_data_stc, sql_db=sql_db) def test_load_single_stc(self): single_aln = Alignment(dna_data_stc[0], sql_cursor=self.aln_obj.cur, db_idx=self.aln_obj._idx + 1) def test_load_loci(self): self.aln_obj = AlignmentList(dna_data_loci, sql_db=sql_db) def test_load_single_loci(self): single_aln = Alignment(dna_data_loci[0], sql_cursor=self.aln_obj.cur, db_idx=self.aln_obj._idx + 1) def test_load_nexus_par(self): self.aln_obj = AlignmentList(concatenated_medium_nexus, sql_db=sql_db) self.assertTrue(self.aln_obj.partitions.partitions) def test_load_wrong_type(self): self.aln_obj = AlignmentList(bad_file, sql_db=sql_db) self.assertTrue(self.aln_obj.bad_alignments) def test_duplicate_files(self): self.aln_obj = AlignmentList(dna_data_loci + dna_data_loci, sql_db=sql_db) self.assertTrue(self.aln_obj.duplicate_alignments) def test_unequal_length(self): self.aln_obj = AlignmentList(unequal_file, sql_db=sql_db) self.assertTrue(self.aln_obj.non_alignments) def test_bad_file_removal_from_db(self): self.aln_obj = AlignmentList(unequal_file, sql_db=sql_db) self.aln_obj.add_alignment_files(dna_data_fas) s = 0 for tx, _, _ in self.aln_obj.iter_alignments(): if tx == "Seq1": s += 1 self.assertEqual(s, 0) def test_load_no_data(self): self.aln_obj = AlignmentList(no_data, sql_db=sql_db) def test_alternative_missing(self): self.aln_obj = AlignmentList(alternative_missing, sql_db=sql_db) aln = self.aln_obj.alignments.values()[0] self.assertEqual(aln.sequence_code[1], "?") def test_dna_missing_default(self): self.aln_obj = AlignmentList(single_dna, sql_db=sql_db) aln = self.aln_obj.alignments.values()[0] self.assertEqual(aln.sequence_code[1], "n") def test_protein_missing_default(self): self.aln_obj = AlignmentList(protein_no_missing, sql_db=sql_db) aln = self.aln_obj.alignments.values()[0] self.assertEqual(aln.sequence_code[1], "x") def test_dna_missing_eval(self): self.aln_obj = AlignmentList(concatenated_medium_nexus, sql_db=sql_db) aln = self.aln_obj.alignments.values()[0] self.assertEqual(aln.sequence_code[1], "n") def test_protein_missing_eval(self): self.aln_obj = AlignmentList(protein_normal_missing, sql_db=sql_db) aln = self.aln_obj.alignments.values()[0] self.assertEqual(aln.sequence_code[1], "x") def test_non_ascii_taxon_names(self): self.aln_obj = AlignmentList(non_ascii, sql_db=sql_db) non_ascii_tx = [ x for x in self.aln_obj.taxa_names if x == '\xc3\xa9!"#$%&/=?\'~\xc2\xba\xc2\xaa^"><' ] self.assertEqual(len(non_ascii_tx), 1) def test_non_ascii_iteration(self): self.aln_obj = AlignmentList(non_ascii, sql_db=sql_db) non_ascii_tx = [] for tx, _, _ in self.aln_obj.iter_alignments(): if tx == '\xc3\xa9!"#$%&/=?\'~\xc2\xba\xc2\xaa^"><': non_ascii_tx.append(tx) self.assertEqual(len(non_ascii_tx), 1) def test_non_ascii_get_taxaidx(self): self.aln_obj = AlignmentList(non_ascii, sql_db=sql_db) aln = self.aln_obj.alignments.values()[0] non_ascii_tx = [ x for x in aln.taxa_idx if x == '\xc3\xa9!"#$%&/=?\'~\xc2\xba\xc2\xaa^"><' ] self.assertEqual(len(non_ascii_tx), 1) def test_non_ascii_iter_columns(self): self.aln_obj = AlignmentList(non_ascii, sql_db=sql_db) tx_list, _, _ = next(self.aln_obj.iter_columns(include_taxa=True)) non_ascii_tx = [ x for x in tx_list if x == '\xc3\xa9!"#$%&/=?\'~\xc2\xba\xc2\xaa^"><' ] self.assertEqual(len(non_ascii_tx), 1)