Пример #1
0
class AlignmentCodonFilters(unittest.TestCase):
    def setUp(self):

        if not os.path.exists(temp_dir):
            os.makedirs(temp_dir)

        self.aln_obj = AlignmentList([], sql_db=sql_db)

    def tearDown(self):

        self.aln_obj.clear_alignments()
        self.aln_obj.con.close()
        shutil.rmtree(temp_dir)

    def test_codon_filter_pos1(self):

        self.aln_obj.add_alignment_files(codon_filter)

        self.aln_obj.filter_codon_positions([True, False, False],
                                            table_out="master_out")

        s = []
        for _, seq, _ in self.aln_obj.iter_alignments("master_out"):
            s.append(seq)

        self.assertEqual(s, ["a" * 16] * 10)

    def test_codon_filter_pos2(self):

        self.aln_obj.add_alignment_files(codon_filter)

        self.aln_obj.filter_codon_positions([False, True, False],
                                            table_out="master_out")

        s = []
        for _, seq, _ in self.aln_obj.iter_alignments("master_out"):
            s.append(seq)

        self.assertEqual(s, ["t" * 16] * 10)

    def test_codon_filter_pos3(self):

        self.aln_obj.add_alignment_files(codon_filter)

        self.aln_obj.filter_codon_positions([False, False, True],
                                            table_out="master_out")

        s = []
        for _, seq, _ in self.aln_obj.iter_alignments("master_out"):
            s.append(seq)

        self.assertEqual(s, ["g" * 16] * 10)

    def test_codon_filter_pos12(self):

        self.aln_obj.add_alignment_files(codon_filter)

        self.aln_obj.filter_codon_positions([True, True, False],
                                            table_out="master_out")

        s = []
        for _, seq, _ in self.aln_obj.iter_alignments("master_out"):
            s.append(seq)

        self.assertEqual(s, ["at" * 16] * 10)

    def test_codon_filter_pos13(self):

        self.aln_obj.add_alignment_files(codon_filter)

        self.aln_obj.filter_codon_positions([True, False, True],
                                            table_out="master_out")

        s = []
        for _, seq, _ in self.aln_obj.iter_alignments("master_out"):
            s.append(seq)

        self.assertEqual(s, ["ag" * 16] * 10)

    def test_codon_filter_all(self):

        self.aln_obj.add_alignment_files(codon_filter)

        self.aln_obj.filter_codon_positions([True, True, True])

        s = []
        for _, seq, _ in self.aln_obj.iter_alignments():
            s.append(seq)

        self.assertEqual(s, ["atg" * 16] * 10)
Пример #2
0
class AlignmentCodonFilters(unittest.TestCase):

    def setUp(self):

        if not os.path.exists(temp_dir):
            os.makedirs(temp_dir)

        self.aln_obj = AlignmentList([],sql_db=sql_db)

    def tearDown(self):

        self.aln_obj.clear_alignments()
        self.aln_obj.con.close()
        shutil.rmtree(temp_dir)

    def test_codon_filter_pos1(self):

        self.aln_obj.add_alignment_files(codon_filter)

        self.aln_obj.filter_codon_positions([True, False, False],
                                            table_out="master_out")

        s = []
        for _, seq, _ in self.aln_obj.iter_alignments("master_out"):
            s.append(seq)

        self.assertEqual(s, ["a" * 16] * 10)

    def test_codon_filter_pos2(self):

        self.aln_obj.add_alignment_files(codon_filter)

        self.aln_obj.filter_codon_positions([False, True, False],
                                            table_out="master_out")

        s = []
        for _, seq, _ in self.aln_obj.iter_alignments("master_out"):
            s.append(seq)

        self.assertEqual(s, ["t" * 16] * 10)

    def test_codon_filter_pos3(self):

        self.aln_obj.add_alignment_files(codon_filter)

        self.aln_obj.filter_codon_positions([False, False, True],
                                            table_out="master_out")

        s = []
        for _, seq, _ in self.aln_obj.iter_alignments("master_out"):
            s.append(seq)

        self.assertEqual(s, ["g" * 16] * 10)

    def test_codon_filter_pos12(self):

        self.aln_obj.add_alignment_files(codon_filter)

        self.aln_obj.filter_codon_positions([True, True, False],
                                            table_out="master_out")

        s = []
        for _, seq, _ in self.aln_obj.iter_alignments("master_out"):
            s.append(seq)

        self.assertEqual(s, ["at" * 16] * 10)

    def test_codon_filter_pos13(self):

        self.aln_obj.add_alignment_files(codon_filter)

        self.aln_obj.filter_codon_positions([True, False, True],
                                            table_out="master_out")

        s = []
        for _, seq, _ in self.aln_obj.iter_alignments("master_out"):
            s.append(seq)

        self.assertEqual(s, ["ag" * 16] * 10)

    def test_codon_filter_all(self):

        self.aln_obj.add_alignment_files(codon_filter)

        self.aln_obj.filter_codon_positions([True, True, True])

        s = []
        for _, seq, _ in self.aln_obj.iter_alignments():
            s.append(seq)

        self.assertEqual(s, ["atg" * 16] * 10)
class LoadAlignmentsTest(unittest.TestCase):

    def setUp(self):

        if not os.path.exists(temp_dir):
            os.makedirs(temp_dir)

        self.aln_obj = AlignmentList([], sql_db=sql_db)

    def tearDown(self):

        self.aln_obj.clear_alignments()
        self.aln_obj.con.close()
        shutil.rmtree(temp_dir)

    def test_dna_load(self):

        self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db)

        self.assertEqual(["DNA"], self.aln_obj.sequence_code)

    def test_protein_load(self):

        self.aln_obj = AlignmentList(protein_no_missing, sql_db=sql_db)

        self.assertEqual(["Protein"], self.aln_obj.sequence_code)

    def test_mixed_type_load(self):

        self.aln_obj = AlignmentList(mixed_seq_type, sql_db=sql_db)

        self.assertEqual(["DNA", "Protein"],
                         sorted(self.aln_obj.sequence_code))

    def test_class_instance(self):

        self.aln_obj = AlignmentList([], sql_db=sql_db)
        self.assertIsInstance(self.aln_obj.alignments, OrderedDict)

    def test_load_fas(self):

        self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db)

    def test_load_single_fas(self):

        single_aln = Alignment(dna_data_fas[0], sql_cursor=self.aln_obj.cur)

    def test_load_phy(self):

        self.aln_obj = AlignmentList(dna_data_phy, sql_db=sql_db)

    def test_load_single_phy(self):

        single_aln = Alignment(dna_data_phy[0], sql_cursor=self.aln_obj.cur)

    def test_load_single_interleave_phy(self):

        single_aln = Alignment(phylip_interleave[0],
                               sql_cursor=self.aln_obj.cur)

    def test_load_nex(self):

        self.aln_obj = AlignmentList(dna_data_nex, sql_db=sql_db)

    def test_load_single_nex(self):

        single_aln = Alignment(dna_data_nex[0], sql_cursor=self.aln_obj.cur)

    def test_load_interleave_nex(self):

        single_aln = Alignment(concatenated_interleave_nexus[0],
                               sql_cursor=self.aln_obj.cur)

    def test_load_stc(self):

        self.aln_obj = AlignmentList(dna_data_stc, sql_db=sql_db)

    def test_load_single_stc(self):

        single_aln = Alignment(dna_data_stc[0], sql_cursor=self.aln_obj.cur,
                               db_idx=self.aln_obj._idx + 1)

    def test_load_loci(self):

        self.aln_obj = AlignmentList(dna_data_loci, sql_db=sql_db)

    def test_load_single_loci(self):

        single_aln = Alignment(dna_data_loci[0], sql_cursor=self.aln_obj.cur,
                               db_idx=self.aln_obj._idx + 1)

    def test_load_nexus_par(self):

        self.aln_obj = AlignmentList(concatenated_medium_nexus, sql_db=sql_db)
        self.assertTrue(self.aln_obj.partitions.partitions)

    def test_load_wrong_type(self):

        self.aln_obj = AlignmentList(bad_file, sql_db=sql_db)
        self.assertTrue(self.aln_obj.bad_alignments)

    def test_duplicate_files(self):

        self.aln_obj = AlignmentList(dna_data_loci + dna_data_loci,
                                     sql_db=sql_db)
        self.assertTrue(self.aln_obj.duplicate_alignments)

    def test_unequal_length(self):

        self.aln_obj = AlignmentList(unequal_file, sql_db=sql_db)
        self.assertTrue(self.aln_obj.non_alignments)

    def test_bad_file_removal_from_db(self):

        self.aln_obj = AlignmentList(unequal_file, sql_db=sql_db)
        self.aln_obj.add_alignment_files(dna_data_fas)

        s = 0

        for tx, _, _ in self.aln_obj.iter_alignments():
            if tx == "Seq1":
                s += 1

        self.assertEqual(s, 0)

    def test_load_no_data(self):

        self.aln_obj = AlignmentList(no_data, sql_db=sql_db)

    def test_alternative_missing(self):

        self.aln_obj = AlignmentList(alternative_missing, sql_db=sql_db)

        aln = self.aln_obj.alignments.values()[0]

        self.assertEqual(aln.sequence_code[1], "?")

    def test_dna_missing_default(self):

        self.aln_obj = AlignmentList(single_dna, sql_db=sql_db)

        aln = self.aln_obj.alignments.values()[0]

        self.assertEqual(aln.sequence_code[1], "n")

    def test_protein_missing_default(self):

        self.aln_obj = AlignmentList(protein_no_missing, sql_db=sql_db)

        aln = self.aln_obj.alignments.values()[0]

        self.assertEqual(aln.sequence_code[1], "x")

    def test_dna_missing_eval(self):

        self.aln_obj = AlignmentList(concatenated_medium_nexus, sql_db=sql_db)

        aln = self.aln_obj.alignments.values()[0]

        self.assertEqual(aln.sequence_code[1], "n")

    def test_protein_missing_eval(self):

        self.aln_obj = AlignmentList(protein_normal_missing, sql_db=sql_db)

        aln = self.aln_obj.alignments.values()[0]

        self.assertEqual(aln.sequence_code[1], "x")

    def test_non_ascii_taxon_names(self):

        self.aln_obj = AlignmentList(non_ascii, sql_db=sql_db)

        non_ascii_tx = [x for x in self.aln_obj.taxa_names if
                        x == '\xc3\xa9!"#$%&/=?\'~\xc2\xba\xc2\xaa^"><']
        self.assertEqual(len(non_ascii_tx), 1)

    def test_non_ascii_iteration(self):

        self.aln_obj = AlignmentList(non_ascii, sql_db=sql_db)

        non_ascii_tx = []

        for tx, _, _ in self.aln_obj.iter_alignments():

            if tx == '\xc3\xa9!"#$%&/=?\'~\xc2\xba\xc2\xaa^"><':
                non_ascii_tx.append(tx)

        self.assertEqual(len(non_ascii_tx), 1)

    def test_non_ascii_get_taxaidx(self):

        self.aln_obj = AlignmentList(non_ascii, sql_db=sql_db)

        aln = self.aln_obj.alignments.values()[0]

        non_ascii_tx = [x for x in aln.taxa_idx
                        if x == '\xc3\xa9!"#$%&/=?\'~\xc2\xba\xc2\xaa^"><']

        self.assertEqual(len(non_ascii_tx), 1)

    def test_non_ascii_iter_columns(self):

        self.aln_obj = AlignmentList(non_ascii, sql_db=sql_db)

        tx_list, _, _ = next(self.aln_obj.iter_columns(include_taxa=True))

        non_ascii_tx = [x for x in tx_list
                        if x == '\xc3\xa9!"#$%&/=?\'~\xc2\xba\xc2\xaa^"><']

        self.assertEqual(len(non_ascii_tx), 1)
Пример #4
0
class LoadAlignmentsTest(unittest.TestCase):
    def setUp(self):

        if not os.path.exists(temp_dir):
            os.makedirs(temp_dir)

        self.aln_obj = AlignmentList([], sql_db=sql_db)

    def tearDown(self):

        self.aln_obj.clear_alignments()
        self.aln_obj.con.close()
        shutil.rmtree(temp_dir)

    def test_dna_load(self):

        self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db)

        self.assertEqual(["DNA"], self.aln_obj.sequence_code)

    def test_protein_load(self):

        self.aln_obj = AlignmentList(protein_no_missing, sql_db=sql_db)

        self.assertEqual(["Protein"], self.aln_obj.sequence_code)

    def test_mixed_type_load(self):

        self.aln_obj = AlignmentList(mixed_seq_type, sql_db=sql_db)

        self.assertEqual(["DNA", "Protein"],
                         sorted(self.aln_obj.sequence_code))

    def test_class_instance(self):

        self.aln_obj = AlignmentList([], sql_db=sql_db)
        self.assertIsInstance(self.aln_obj.alignments, OrderedDict)

    def test_load_fas(self):

        self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db)

    def test_load_single_fas(self):

        single_aln = Alignment(dna_data_fas[0], sql_cursor=self.aln_obj.cur)

    def test_load_phy(self):

        self.aln_obj = AlignmentList(dna_data_phy, sql_db=sql_db)

    def test_load_single_phy(self):

        single_aln = Alignment(dna_data_phy[0], sql_cursor=self.aln_obj.cur)

    def test_load_single_interleave_phy(self):

        single_aln = Alignment(phylip_interleave[0],
                               sql_cursor=self.aln_obj.cur)

    def test_load_nex(self):

        self.aln_obj = AlignmentList(dna_data_nex, sql_db=sql_db)

    def test_load_single_nex(self):

        single_aln = Alignment(dna_data_nex[0], sql_cursor=self.aln_obj.cur)

    def test_load_interleave_nex(self):

        single_aln = Alignment(concatenated_interleave_nexus[0],
                               sql_cursor=self.aln_obj.cur)

    def test_load_stc(self):

        self.aln_obj = AlignmentList(dna_data_stc, sql_db=sql_db)

    def test_load_single_stc(self):

        single_aln = Alignment(dna_data_stc[0],
                               sql_cursor=self.aln_obj.cur,
                               db_idx=self.aln_obj._idx + 1)

    def test_load_loci(self):

        self.aln_obj = AlignmentList(dna_data_loci, sql_db=sql_db)

    def test_load_single_loci(self):

        single_aln = Alignment(dna_data_loci[0],
                               sql_cursor=self.aln_obj.cur,
                               db_idx=self.aln_obj._idx + 1)

    def test_load_nexus_par(self):

        self.aln_obj = AlignmentList(concatenated_medium_nexus, sql_db=sql_db)
        self.assertTrue(self.aln_obj.partitions.partitions)

    def test_load_wrong_type(self):

        self.aln_obj = AlignmentList(bad_file, sql_db=sql_db)
        self.assertTrue(self.aln_obj.bad_alignments)

    def test_duplicate_files(self):

        self.aln_obj = AlignmentList(dna_data_loci + dna_data_loci,
                                     sql_db=sql_db)
        self.assertTrue(self.aln_obj.duplicate_alignments)

    def test_unequal_length(self):

        self.aln_obj = AlignmentList(unequal_file, sql_db=sql_db)
        self.assertTrue(self.aln_obj.non_alignments)

    def test_bad_file_removal_from_db(self):

        self.aln_obj = AlignmentList(unequal_file, sql_db=sql_db)
        self.aln_obj.add_alignment_files(dna_data_fas)

        s = 0

        for tx, _, _ in self.aln_obj.iter_alignments():
            if tx == "Seq1":
                s += 1

        self.assertEqual(s, 0)

    def test_load_no_data(self):

        self.aln_obj = AlignmentList(no_data, sql_db=sql_db)

    def test_alternative_missing(self):

        self.aln_obj = AlignmentList(alternative_missing, sql_db=sql_db)

        aln = self.aln_obj.alignments.values()[0]

        self.assertEqual(aln.sequence_code[1], "?")

    def test_dna_missing_default(self):

        self.aln_obj = AlignmentList(single_dna, sql_db=sql_db)

        aln = self.aln_obj.alignments.values()[0]

        self.assertEqual(aln.sequence_code[1], "n")

    def test_protein_missing_default(self):

        self.aln_obj = AlignmentList(protein_no_missing, sql_db=sql_db)

        aln = self.aln_obj.alignments.values()[0]

        self.assertEqual(aln.sequence_code[1], "x")

    def test_dna_missing_eval(self):

        self.aln_obj = AlignmentList(concatenated_medium_nexus, sql_db=sql_db)

        aln = self.aln_obj.alignments.values()[0]

        self.assertEqual(aln.sequence_code[1], "n")

    def test_protein_missing_eval(self):

        self.aln_obj = AlignmentList(protein_normal_missing, sql_db=sql_db)

        aln = self.aln_obj.alignments.values()[0]

        self.assertEqual(aln.sequence_code[1], "x")

    def test_non_ascii_taxon_names(self):

        self.aln_obj = AlignmentList(non_ascii, sql_db=sql_db)

        non_ascii_tx = [
            x for x in self.aln_obj.taxa_names
            if x == '\xc3\xa9!"#$%&/=?\'~\xc2\xba\xc2\xaa^"><'
        ]
        self.assertEqual(len(non_ascii_tx), 1)

    def test_non_ascii_iteration(self):

        self.aln_obj = AlignmentList(non_ascii, sql_db=sql_db)

        non_ascii_tx = []

        for tx, _, _ in self.aln_obj.iter_alignments():

            if tx == '\xc3\xa9!"#$%&/=?\'~\xc2\xba\xc2\xaa^"><':
                non_ascii_tx.append(tx)

        self.assertEqual(len(non_ascii_tx), 1)

    def test_non_ascii_get_taxaidx(self):

        self.aln_obj = AlignmentList(non_ascii, sql_db=sql_db)

        aln = self.aln_obj.alignments.values()[0]

        non_ascii_tx = [
            x for x in aln.taxa_idx
            if x == '\xc3\xa9!"#$%&/=?\'~\xc2\xba\xc2\xaa^"><'
        ]

        self.assertEqual(len(non_ascii_tx), 1)

    def test_non_ascii_iter_columns(self):

        self.aln_obj = AlignmentList(non_ascii, sql_db=sql_db)

        tx_list, _, _ = next(self.aln_obj.iter_columns(include_taxa=True))

        non_ascii_tx = [
            x for x in tx_list
            if x == '\xc3\xa9!"#$%&/=?\'~\xc2\xba\xc2\xaa^"><'
        ]

        self.assertEqual(len(non_ascii_tx), 1)