Пример #1
0
    def test_model_detection(self):

        self.aln_obj.clear_alignments()
        self.aln_obj = AlignmentList(models_nexus_data,
                                     db_con=self.aln_obj.con,
                                     db_cur=self.aln_obj.cur,
                                     sql_db=sql_db)

        self.assertEqual(
            self.aln_obj.partitions.models,
            OrderedDict([
                ('Teste1.fas', [[['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                                [None], []]),
                ('Teste2.fas', [[['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                                [None], []]),
                ('Teste3.fas', [[['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                                [None], []]),
                ('Teste4.fas', [[['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                                [None], []]),
                ('Teste5.fas', [[['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                                [None], []]),
                ('Teste6.fas', [[['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                                [None], []]),
                ('Teste7.fas', [[['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                                [None], []])
            ]))
Пример #2
0
    def test_protein_missing_eval(self):

        self.aln_obj = AlignmentList(protein_normal_missing, sql_db=sql_db)

        aln = self.aln_obj.alignments.values()[0]

        self.assertEqual(aln.sequence_code[1], "x")
Пример #3
0
    def test_dna_missing_eval(self):

        self.aln_obj = AlignmentList(concatenated_medium_nexus, sql_db=sql_db)

        aln = self.aln_obj.alignments.values()[0]

        self.assertEqual(aln.sequence_code[1], "n")
Пример #4
0
    def test_dna_missing_default(self):

        self.aln_obj = AlignmentList(single_dna, sql_db=sql_db)

        aln = self.aln_obj.alignments.values()[0]

        self.assertEqual(aln.sequence_code[1], "n")
Пример #5
0
    def test_alternative_missing(self):

        self.aln_obj = AlignmentList(alternative_missing, sql_db=sql_db)

        aln = self.aln_obj.alignments.values()[0]

        self.assertEqual(aln.sequence_code[1], "?")
Пример #6
0
    def setUp(self):

        if not os.path.exists(temp_dir):
            os.makedirs(temp_dir)

        self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db)
        self.aln_obj.partitions.reset(cur=self.aln_obj.cur, )
Пример #7
0
    def test_single_partition(self):

        self.aln_obj = AlignmentList([dna_data_fas[0]],
                                     db_con=self.aln_obj.con,
                                     db_cur=self.aln_obj.cur,
                                     sql_db=sql_db)

        self.assertTrue(self.aln_obj.partitions.is_single())
Пример #8
0
    def test_bad_wrong_size_nexus(self):

        self.aln_obj = AlignmentList(bad_wrong_size, sql_db=sql_db)

        aln_obj = self.aln_obj.alignments.values()[0]
        data = [aln_obj.name, aln_obj.locus_length, len(aln_obj.taxa_idx)]

        self.assertEqual(data, ["bad_wrong_size.nex", 898, 12])
Пример #9
0
    def test_no_end_colon_interleave(self):

        self.aln_obj = AlignmentList(bad_no_end_interleave, sql_db=sql_db)

        aln_obj = self.aln_obj.alignments.values()[0]
        data = [aln_obj.name, aln_obj.locus_length, len(aln_obj.taxa_idx)]

        self.assertEqual(data, ["bad_no_end_interleave.nex", 898, 12])
Пример #10
0
    def test_non_ascii_taxon_names(self):

        self.aln_obj = AlignmentList(non_ascii, sql_db=sql_db)

        non_ascii_tx = [
            x for x in self.aln_obj.taxa_names
            if x == '\xc3\xa9!"#$%&/=?\'~\xc2\xba\xc2\xaa^"><'
        ]
        self.assertEqual(len(non_ascii_tx), 1)
Пример #11
0
    def test_non_ascii_iter_columns(self):

        self.aln_obj = AlignmentList(non_ascii, sql_db=sql_db)

        tx_list, _, _ = next(self.aln_obj.iter_columns(include_taxa=True))

        non_ascii_tx = [
            x for x in tx_list
            if x == '\xc3\xa9!"#$%&/=?\'~\xc2\xba\xc2\xaa^"><'
        ]

        self.assertEqual(len(non_ascii_tx), 1)
Пример #12
0
    def test_non_ascii_get_taxaidx(self):

        self.aln_obj = AlignmentList(non_ascii, sql_db=sql_db)

        aln = self.aln_obj.alignments.values()[0]

        non_ascii_tx = [
            x for x in aln.taxa_idx
            if x == '\xc3\xa9!"#$%&/=?\'~\xc2\xba\xc2\xaa^"><'
        ]

        self.assertEqual(len(non_ascii_tx), 1)
Пример #13
0
    def test_non_ascii_iteration(self):

        self.aln_obj = AlignmentList(non_ascii, sql_db=sql_db)

        non_ascii_tx = []

        for tx, _, _ in self.aln_obj.iter_alignments():

            if tx == '\xc3\xa9!"#$%&/=?\'~\xc2\xba\xc2\xaa^"><':
                non_ascii_tx.append(tx)

        self.assertEqual(len(non_ascii_tx), 1)
Пример #14
0
    def test_bad_file_removal_from_db(self):

        self.aln_obj = AlignmentList(unequal_file, sql_db=sql_db)
        self.aln_obj.add_alignment_files(dna_data_fas)

        s = 0

        for tx, _, _ in self.aln_obj.iter_alignments():
            if tx == "Seq1":
                s += 1

        self.assertEqual(s, 0)
    def test_non_ascii_taxon_names(self):

        self.aln_obj = AlignmentList(non_ascii, sql_db=sql_db)

        non_ascii_tx = [x for x in self.aln_obj.taxa_names if
                        x == '\xc3\xa9!"#$%&/=?\'~\xc2\xba\xc2\xaa^"><']
        self.assertEqual(len(non_ascii_tx), 1)
    def test_dna_missing_default(self):

        self.aln_obj = AlignmentList(single_dna, sql_db=sql_db)

        aln = self.aln_obj.alignments.values()[0]

        self.assertEqual(aln.sequence_code[1], "n")
    def test_dna_missing_eval(self):

        self.aln_obj = AlignmentList(concatenated_medium_nexus, sql_db=sql_db)

        aln = self.aln_obj.alignments.values()[0]

        self.assertEqual(aln.sequence_code[1], "n")
    def test_alternative_missing(self):

        self.aln_obj = AlignmentList(alternative_missing, sql_db=sql_db)

        aln = self.aln_obj.alignments.values()[0]

        self.assertEqual(aln.sequence_code[1], "?")
Пример #19
0
    def setUp(self):

        if not os.path.exists(temp_dir):
            os.makedirs(temp_dir)

        self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db)
        self.aln_obj.partitions.reset(cur=self.aln_obj.cur,)
    def test_protein_missing_eval(self):

        self.aln_obj = AlignmentList(protein_normal_missing, sql_db=sql_db)

        aln = self.aln_obj.alignments.values()[0]

        self.assertEqual(aln.sequence_code[1], "x")
Пример #21
0
    def test_single_partition(self):

        self.aln_obj = AlignmentList([dna_data_fas[0]],
                                     db_con=self.aln_obj.con,
                                     db_cur=self.aln_obj.cur,
                                     sql_db=sql_db)

        self.assertTrue(self.aln_obj.partitions.is_single())
Пример #22
0
    def test_model_detection_codons(self):

        self.aln_obj.clear_alignments()
        self.aln_obj = AlignmentList(models_codon_nexus_data,
                                     db_cur=self.aln_obj.cur,
                                     db_con=self.aln_obj.con,
                                     sql_db=sql_db)

        self.assertEqual(
            self.aln_obj.partitions.models,
            OrderedDict([('Teste1.fas_1',
                          [[['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'],
                            ['nst=6', 'statefreqpr=fixed(equal)'],
                            ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                           [None, None, None], []]),
                         ('Teste2.fas_86',
                          [[['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'],
                            ['nst=6', 'statefreqpr=fixed(equal)'],
                            ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                           [None, None, None], []]),
                         ('Teste3.fas_171',
                          [[['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'],
                            ['nst=6', 'statefreqpr=fixed(equal)'],
                            ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                           [None, None, None], []]),
                         ('Teste4.fas_256',
                          [[['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'],
                            ['nst=6', 'statefreqpr=fixed(equal)'],
                            ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                           [None, None, None], []]),
                         ('Teste5.fas_341',
                          [[['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'],
                            ['nst=6', 'statefreqpr=fixed(equal)'],
                            ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                           [None, None, None], []]),
                         ('Teste6.fas_426',
                          [[['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'],
                            ['nst=6', 'statefreqpr=fixed(equal)'],
                            ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                           [None, None, None], []]),
                         ('Teste7.fas_511',
                          [[['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'],
                            ['nst=6', 'statefreqpr=fixed(equal)'],
                            ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                           [None, None, None], []])]))
Пример #23
0
    def test_clear_alns(self):

        self.aln_obj.clear_alignments()
        aln = AlignmentList([], sql_db=sql_db)

        self.assertTrue(
            compare_inst(self.aln_obj, aln, [
                "log_progression", "locus_length", "partitions", "cur", "con"
            ]))
Пример #24
0
    def test_import_new_partscheme(self):

        self.aln_obj.clear_alignments()
        self.aln_obj.con.close()

        self.aln_obj = AlignmentList(concatenated_medium_nexus, sql_db=sql_db)

        self.aln_obj.partitions.reset()

        self.aln_obj.partitions.read_from_file(concatenated_small_par[0],
                                               no_aln_check=True)

        res = self.aln_obj.partitions.get_partition_names()

        self.assertEqual(res, [
            "BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas",
            "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas"
        ])
    def test_non_ascii_iter_columns(self):

        self.aln_obj = AlignmentList(non_ascii, sql_db=sql_db)

        tx_list, _, _ = next(self.aln_obj.iter_columns(include_taxa=True))

        non_ascii_tx = [x for x in tx_list
                        if x == '\xc3\xa9!"#$%&/=?\'~\xc2\xba\xc2\xaa^"><']

        self.assertEqual(len(non_ascii_tx), 1)
    def test_non_ascii_get_taxaidx(self):

        self.aln_obj = AlignmentList(non_ascii, sql_db=sql_db)

        aln = self.aln_obj.alignments.values()[0]

        non_ascii_tx = [x for x in aln.taxa_idx
                        if x == '\xc3\xa9!"#$%&/=?\'~\xc2\xba\xc2\xaa^"><']

        self.assertEqual(len(non_ascii_tx), 1)
Пример #27
0
    def test_remove_partition_from_file_original(self):

        self.aln_obj.clear_alignments()
        self.aln_obj.con.close()

        self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db)

        self.aln_obj.partitions.remove_partition(
            file_name="trifusion/tests/data/BaseConc3.fas")

        # Check if remaining partition ranges are continuous
        cont = True
        prev = 0
        for r in self.aln_obj.partitions.partitions.values():
            if r[0][0][0] == prev:
                prev = r[0][0][1] + 1
            else:
                cont = False

        self.expect_equal(cont, True)
    def test_bad_file_removal_from_db(self):

        self.aln_obj = AlignmentList(unequal_file, sql_db=sql_db)
        self.aln_obj.add_alignment_files(dna_data_fas)

        s = 0

        for tx, _, _ in self.aln_obj.iter_alignments():
            if tx == "Seq1":
                s += 1

        self.assertEqual(s, 0)
    def test_bad_wrong_size_nexus(self):

        self.aln_obj = AlignmentList(bad_wrong_size, sql_db=sql_db)

        aln_obj = self.aln_obj.alignments.values()[0]
        data = [aln_obj.name,
                aln_obj.locus_length,
                len(aln_obj.taxa_idx)]

        self.assertEqual(data, ["bad_wrong_size.nex",
                                898,
                                12])
    def test_no_end_colon_interleave(self):

        self.aln_obj = AlignmentList(bad_no_end_interleave, sql_db=sql_db)

        aln_obj = self.aln_obj.alignments.values()[0]
        data = [aln_obj.name,
                aln_obj.locus_length,
                len(aln_obj.taxa_idx)]

        self.assertEqual(data, ["bad_no_end_interleave.nex",
                                898,
                                12])
    def test_non_ascii_iteration(self):

        self.aln_obj = AlignmentList(non_ascii, sql_db=sql_db)

        non_ascii_tx = []

        for tx, _, _ in self.aln_obj.iter_alignments():

            if tx == '\xc3\xa9!"#$%&/=?\'~\xc2\xba\xc2\xaa^"><':
                non_ascii_tx.append(tx)

        self.assertEqual(len(non_ascii_tx), 1)
Пример #32
0
    def test_concat_custom_fileset_from_phy_partfile(self):

        self.aln_obj.clear_alignments()
        self.aln_obj.con.close()
        self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db)
        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0])

        self.aln_obj.update_active_alignments([
            join(data_path, "BaseConc1.fas"),
            join(data_path, "BaseConc2.fas")
        ])

        self.aln_obj.concatenate()

        key_data = [
            sorted(self.aln_obj.partitions.partitions.keys()),
            sorted(self.aln_obj.partitions.partitions_alignments.keys()),
            sorted(self.aln_obj.partitions.models.keys())
        ]

        self.expect_equal(key_data, [["BaseConc1.fas", "BaseConc2.fas"]] * 3)
Пример #33
0
    def test_concat_custom_fileset_from_phy_partfile(self):

        self.aln_obj.clear_alignments()
        self.aln_obj.con.close()
        self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db)
        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0])

        self.aln_obj.update_active_alignments(
            [join(data_path, "BaseConc1.fas"),
             join(data_path, "BaseConc2.fas")])

        self.aln_obj.concatenate()

        key_data = [
            sorted(self.aln_obj.partitions.partitions.keys()),
            sorted(self.aln_obj.partitions.partitions_alignments.keys()),
            sorted(self.aln_obj.partitions.models.keys())]

        self.expect_equal(key_data, [["BaseConc1.fas", "BaseConc2.fas"]] * 3)
Пример #34
0
    def test_import_new_partscheme(self):

        self.aln_obj.clear_alignments()
        self.aln_obj.con.close()

        self.aln_obj = AlignmentList(concatenated_medium_nexus,
                                     sql_db=sql_db)

        self.aln_obj.partitions.reset()

        self.aln_obj.partitions.read_from_file(concatenated_small_par[0],
                                               no_aln_check=True)

        res = self.aln_obj.partitions.get_partition_names()

        self.assertEqual(res, ["BaseConc1.fas", "BaseConc2.fas",
                               "BaseConc3.fas", "BaseConc4.fas",
                               "BaseConc5.fas", "BaseConc6.fas",
                               "BaseConc7.fas"])
Пример #35
0
    def test_remove_partition_from_file_original(self):

        self.aln_obj.clear_alignments()
        self.aln_obj.con.close()

        self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db)

        self.aln_obj.partitions.remove_partition(
            file_name="trifusion/tests/data/BaseConc3.fas")

        # Check if remaining partition ranges are continuous
        cont = True
        prev = 0
        for r in self.aln_obj.partitions.partitions.values():
            if r[0][0][0] == prev:
                prev = r[0][0][1] + 1
            else:
                cont = False

        self.expect_equal(cont, True)
Пример #36
0
def load_proc(aln_list, file_list, nm, queue):
    try:
        if aln_list:
            aln_list.add_alignment_files(file_list, shared_namespace=nm)
            aln_obj = aln_list
        else:
            aln_obj = AlignmentList(file_list, shared_namespace=nm)

        queue.put(aln_obj)

    except MultipleSequenceTypes:
        nm.exception = "multiple_type"

    except IOError:
        return

    except KillByUser:
        return

    except Exception as e:
        logging.exception("Unexpected error when loading input data")
        print(e)
Пример #37
0
    def test_model_detection_codons(self):

        self.aln_obj.clear_alignments()
        self.aln_obj = AlignmentList(models_codon_nexus_data,
                                     db_cur=self.aln_obj.cur,
                                     db_con=self.aln_obj.con,
                                     sql_db=sql_db)

        self.assertEqual(self.aln_obj.partitions.models,
                         OrderedDict([('Teste1.fas_1', [
                             [['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'],
                              ['nst=6', 'statefreqpr=fixed(equal)'],
                              ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                             [None, None, None], []]), ('Teste2.fas_86', [
                             [['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'],
                              ['nst=6', 'statefreqpr=fixed(equal)'],
                              ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                             [None, None, None], []]), ('Teste3.fas_171', [
                             [['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'],
                              ['nst=6', 'statefreqpr=fixed(equal)'],
                              ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                             [None, None, None], []]), ('Teste4.fas_256', [
                             [['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'],
                              ['nst=6', 'statefreqpr=fixed(equal)'],
                              ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                             [None, None, None], []]), ('Teste5.fas_341', [
                             [['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'],
                              ['nst=6', 'statefreqpr=fixed(equal)'],
                              ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                             [None, None, None], []]), ('Teste6.fas_426', [
                             [['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'],
                              ['nst=6', 'statefreqpr=fixed(equal)'],
                              ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                             [None, None, None], []]), ('Teste7.fas_511', [
                             [['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'],
                              ['nst=6', 'statefreqpr=fixed(equal)'],
                              ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                             [None, None, None], []])]))
Пример #38
0
    def test_model_detection(self):

        self.aln_obj.clear_alignments()
        self.aln_obj = AlignmentList(models_nexus_data,
                                     db_con=self.aln_obj.con,
                                     db_cur=self.aln_obj.cur,
                                     sql_db=sql_db)

        self.assertEqual(self.aln_obj.partitions.models,
                         OrderedDict([('Teste1.fas', [
                             [['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                             [None], []]), ('Teste2.fas', [
                             [['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                             [None], []]), ('Teste3.fas', [
                             [['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                             [None], []]), ('Teste4.fas', [
                             [['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                             [None], []]), ('Teste5.fas', [
                             [['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                             [None], []]), ('Teste6.fas', [
                             [['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                             [None], []]), ('Teste7.fas', [
                             [['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                             [None], []])]))
Пример #39
0
    def setUp(self):

        if not os.path.exists(temp_dir):
            os.makedirs(temp_dir)

        self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db)
Пример #40
0
class SeconaryOpsTest(unittest.TestCase):
    def setUp(self):

        if not os.path.exists(temp_dir):
            os.makedirs(temp_dir)

        self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db)

    def tearDown(self):

        self.aln_obj.clear_alignments()
        self.aln_obj.con.close()
        shutil.rmtree(temp_dir)

    def test_summary_stats_all(self):

        sum_table, table_data = self.aln_obj.get_summary_stats()

        self.assertEqual(
            [sum_table, table_data],
            [{
                'missing': '5 (0.04%)',
                'taxa': 24,
                'genes': 7,
                'informative': '0 (0.0%)',
                'gaps': '0 (0.0%)',
                'avg_gaps': 0.0,
                'avg_missing': 1.0,
                'variable': '7 (1.18%)',
                'seq_len': 595,
                'avg_var': 1.0,
                'avg_inf': 0.0
            },
             [[
                 'Genes', 'Taxa', 'Alignment length', 'Gaps', 'Gaps per gene',
                 'Missing data', 'Missing data per gene', 'Variable sites',
                 'Variable sites per gene', 'Informative sites',
                 'Informative sites per gene'
             ],
              [
                  7, 24, 595, '0 (0.0%)', 0.0, '5 (0.04%)', 1.0, '7 (1.18%)',
                  1.0, '0 (0.0%)', 0.0
              ]]])

    def test_summary_stats_one_active(self):

        sum_table, table_data = self.aln_obj.get_summary_stats(
            [join(data_path, "BaseConc1.fas")])

        self.assertEqual(
            [sum_table, table_data],
            [{
                'missing': '1 (0.05%)',
                'taxa': 24,
                'genes': 1,
                'informative': '0 (0.0%)',
                'gaps': '0 (0.0%)',
                'avg_gaps': 0.0,
                'avg_missing': 1.0,
                'variable': '1 (1.18%)',
                'seq_len': 85,
                'avg_var': 1.0,
                'avg_inf': 0.0
            },
             [[
                 'Genes', 'Taxa', 'Alignment length', 'Gaps', 'Gaps per gene',
                 'Missing data', 'Missing data per gene', 'Variable sites',
                 'Variable sites per gene', 'Informative sites',
                 'Informative sites per gene'
             ],
              [
                  1, 24, 85, '0 (0.0%)', 0.0, '1 (0.05%)', 1.0, '1 (1.18%)',
                  1.0, '0 (0.0%)', 0.0
              ]]])

    def test_single_aln_outlier_mdata(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.outlier_missing_data(),
                         {"exception": "single_alignment"})

    def test_single_aln_outlier_mdata_sp(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        print(self.aln_obj.alignments)

        self.assertEqual(self.aln_obj.outlier_missing_data_sp(),
                         {"exception": "single_alignment"})

    def test_single_aln_outlier_seg(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.outlier_segregating(),
                         {"exception": "single_alignment"})

    def test_single_aln_outlier_seg_sp(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        print(self.aln_obj.alignments)

        self.assertEqual(self.aln_obj.outlier_segregating_sp(),
                         {"exception": "single_alignment"})

    def test_single_aln_outlier_seqsize(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.outlier_sequence_size(),
                         {"exception": "single_alignment"})

    def test_single_aln_outlier_seqsize_sp(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.outlier_sequence_size_sp(),
                         {"exception": "single_alignment"})

    def test_single_aln_average_seqsize_per_species(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.average_seqsize_per_species(),
                         {"exception": "single_alignment"})

    def test_single_aln_average_seqsize(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.average_seqsize(),
                         {"exception": "single_alignment"})

    def test_single_aln_sequence_similarity(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.sequence_similarity(),
                         {"exception": "single_alignment"})

    def test_single_aln_sequence_segregation(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.sequence_segregation(),
                         {"exception": "single_alignment"})

    def test_single_aln_length_polymorphism_correlation(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.length_polymorphism_correlation(),
                         {"exception": "single_alignment"})

    def test_single_aln_taxa_distribution(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.taxa_distribution(),
                         {"exception": "single_alignment"})

    def test_single_aln_cumulative_missing_genes(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.cumulative_missing_genes(),
                         {"exception": "single_alignment"})

    def test_single_aln_gene_occupancy(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.gene_occupancy(),
                         {"exception": "single_alignment"})

    def test_single_aln_missing_data_distribution(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.missing_data_distribution(),
                         {"exception": "single_alignment"})

    def test_single_aln_missing_genes_average(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.missing_genes_average(),
                         {"exception": "single_alignment"})

    def test_no_data(self):

        self.aln_obj = AlignmentList([], sql_db=sql_db)

        self.assertEqual(self.aln_obj.gene_occupancy(),
                         {'exception': "empty_data"})

    def test_gene_occupancy(self):

        self.assertTrue(self.aln_obj.gene_occupancy())

    def test_missing_data_distribution(self):

        self.assertTrue(self.aln_obj.missing_data_distribution())

    def test_missing_data_per_species(self):

        self.assertTrue(self.aln_obj.missing_data_per_species())

    def test_missing_genes_per_species(self):

        self.assertTrue(self.aln_obj.missing_genes_per_species())

    def test_missing_genes_average(self):

        self.assertTrue(self.aln_obj.missing_genes_average())

    def test_average_seqsize_per_species(self):

        self.assertTrue(self.aln_obj.average_seqsize_per_species())

    def test_average_seqsize(self):

        self.assertTrue(self.aln_obj.average_seqsize())

    def test_characters_proportion(self):

        self.assertTrue(self.aln_obj.characters_proportion())

    def test_characters_proportion_per_species(self):

        self.assertTrue(self.aln_obj.characters_proportion_per_species())

    def test_characters_proportion_gene(self):

        self.assertTrue(
            self.aln_obj.characters_proportion_gene(
                join(data_path, "BaseConc1.fas"), 10))

    def test_sequence_similarity(self):

        self.assertTrue(self.aln_obj.sequence_similarity())

    def test_sequence_similarity_per_species(self):

        self.assertTrue(self.aln_obj.sequence_similarity_per_species())

    def test_sequence_similarity_gene(self):

        self.assertTrue(
            self.aln_obj.sequence_similarity_gene(
                join(data_path, "BaseConc1.fas"), 10))

    def test_sequence_conservation(self):

        self.assertTrue(
            self.aln_obj.sequence_conservation_gnp(
                join(data_path, "BaseConc1.fas"), 10))

    def test_sequence_segregation(self):

        self.assertTrue(self.aln_obj.sequence_segregation())

    def test_sequence_segregation_per_species(self):

        self.assertTrue(self.aln_obj.sequence_segregation_per_species())

    def test_sequence_segregation_gene(self):

        self.assertTrue(
            self.aln_obj.sequence_segregation_gene(
                join(data_path, "BaseConc1.fas"), 10))

    def test_length_polymorphism_correlation(self):

        self.assertTrue(self.aln_obj.length_polymorphism_correlation())

    def test_allele_frequency_spectrum(self):

        self.assertTrue(self.aln_obj.allele_frequency_spectrum())

    def test_allele_frequency_spectrum_gene(self):

        self.assertTrue(
            self.aln_obj.allele_frequency_spectrum_gene(
                join(data_path, "BaseConc1.fas"), None))

    def test_taxa_distribution(self):

        self.assertTrue(self.aln_obj.taxa_distribution())

    def test_cumulative_missing_genes(self):

        self.assertTrue(self.aln_obj.cumulative_missing_genes())

    def test_outlier_missing_data(self):

        self.assertTrue(self.aln_obj.outlier_missing_data())

    def test_outlier_missing_data_sp(self):

        self.assertTrue(self.aln_obj.outlier_missing_data_sp())

    def test_outlier_segregating(self):

        self.assertTrue(self.aln_obj.outlier_segregating())

    def test_outlier_segregating_sp(self):

        self.assertTrue(self.aln_obj.outlier_segregating_sp())

    def test_outlier_sequence_size(self):

        self.assertTrue(self.aln_obj.outlier_sequence_size())

    def test_outlier_sequence_size_sp(self):

        self.assertTrue(self.aln_obj.outlier_sequence_size_sp())
Пример #41
0
    def test_no_data(self):

        self.aln_obj = AlignmentList([], sql_db=sql_db)

        self.assertEqual(self.aln_obj.gene_occupancy(),
                         {'exception': "empty_data"})
    def test_bad_no_format_line_nexus(self):

        self.aln_obj = AlignmentList(bad_no_format_line, sql_db=sql_db)

        self.assertEqual(self.aln_obj.bad_alignments, bad_no_format_line)
    def test_bad_wrong_dimensions_nexus(self):

        self.aln_obj = AlignmentList(bad_wrong_dimensions, sql_db=sql_db)

        self.assertEqual(self.aln_obj.bad_alignments, bad_wrong_dimensions)
    def test_protein_load(self):

        self.aln_obj = AlignmentList(protein_no_missing, sql_db=sql_db)

        self.assertEqual(["Protein"], self.aln_obj.sequence_code)
Пример #45
0
    def test_single_partition(self):

        self.aln_obj.clear_alignments()
        self.aln_obj = AlignmentList([dna_data_fas[0]], sql_db=sql_db)

        self.assertTrue(self.aln_obj.partitions.is_single())
Пример #46
0
    def setUp(self):

        self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db)
        self.aln_obj.partitions.reset()
    def test_load_fas(self):

        self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db)
Пример #48
0
    def setUp(self):

        if not os.path.exists(temp_dir):
            os.makedirs(temp_dir)

        self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db)
Пример #49
0
class SeconaryOpsTest(unittest.TestCase):

    def setUp(self):

        if not os.path.exists(temp_dir):
            os.makedirs(temp_dir)

        self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db)

    def tearDown(self):

        self.aln_obj.clear_alignments()
        self.aln_obj.con.close()
        shutil.rmtree(temp_dir)

    def test_summary_stats_all(self):

        sum_table, table_data = self.aln_obj.get_summary_stats()

        self.assertEqual([sum_table, table_data],
                         [{'missing': '5 (0.04%)', 'taxa': 24, 'genes': 7,
                           'informative': '0 (0.0%)', 'gaps': '0 (0.0%)',
                           'avg_gaps': 0.0, 'avg_missing': 1.0, 'variable': '7 (1.18%)',
                           'seq_len': 595, 'avg_var': 1.0, 'avg_inf': 0.0},
                          [['Genes', 'Taxa', 'Alignment length', 'Gaps',
                            'Gaps per gene', 'Missing data',
                            'Missing data per gene', 'Variable sites',
                            'Variable sites per gene', 'Informative sites',
                            'Informative sites per gene'],
                           [7, 24, 595, '0 (0.0%)', 0.0, '5 (0.04%)', 1.0,
                            '7 (1.18%)', 1.0, '0 (0.0%)', 0.0]]])

    def test_summary_stats_one_active(self):

        sum_table, table_data = self.aln_obj.get_summary_stats([
            join(data_path, "BaseConc1.fas")])

        self.assertEqual([sum_table, table_data],
                         [{'missing': '1 (0.05%)', 'taxa': 24, 'genes': 1,
                           'informative': '0 (0.0%)', 'gaps': '0 (0.0%)',
                           'avg_gaps': 0.0, 'avg_missing': 1.0, 'variable': '1 (1.18%)',
                           'seq_len': 85, 'avg_var': 1.0, 'avg_inf': 0.0},
                          [['Genes', 'Taxa', 'Alignment length', 'Gaps',
                            'Gaps per gene', 'Missing data',
                            'Missing data per gene', 'Variable sites',
                            'Variable sites per gene', 'Informative sites',
                            'Informative sites per gene'],
                           [1, 24, 85, '0 (0.0%)', 0.0, '1 (0.05%)', 1.0,
                            '1 (1.18%)', 1.0, '0 (0.0%)', 0.0]]])

    def test_single_aln_outlier_mdata(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.outlier_missing_data(),
                         {"exception": "single_alignment"})

    def test_single_aln_outlier_mdata_sp(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        print(self.aln_obj.alignments)

        self.assertEqual(self.aln_obj.outlier_missing_data_sp(),
                         {"exception": "single_alignment"})

    def test_single_aln_outlier_seg(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.outlier_segregating(),
                         {"exception": "single_alignment"})

    def test_single_aln_outlier_seg_sp(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        print(self.aln_obj.alignments)

        self.assertEqual(self.aln_obj.outlier_segregating_sp(),
                         {"exception": "single_alignment"})

    def test_single_aln_outlier_seqsize(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.outlier_sequence_size(),
                         {"exception": "single_alignment"})

    def test_single_aln_outlier_seqsize_sp(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.outlier_sequence_size_sp(),
                         {"exception": "single_alignment"})

    def test_single_aln_average_seqsize_per_species(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.average_seqsize_per_species(),
                         {"exception": "single_alignment"})

    def test_single_aln_average_seqsize(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.average_seqsize(),
                         {"exception": "single_alignment"})

    def test_single_aln_sequence_similarity(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.sequence_similarity(),
                         {"exception": "single_alignment"})

    def test_single_aln_sequence_segregation(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.sequence_segregation(),
                         {"exception": "single_alignment"})

    def test_single_aln_length_polymorphism_correlation(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.length_polymorphism_correlation(),
                         {"exception": "single_alignment"})

    def test_single_aln_taxa_distribution(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.taxa_distribution(),
                         {"exception": "single_alignment"})

    def test_single_aln_cumulative_missing_genes(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.cumulative_missing_genes(),
                         {"exception": "single_alignment"})

    def test_single_aln_gene_occupancy(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.gene_occupancy(),
                         {"exception": "single_alignment"})

    def test_single_aln_missing_data_distribution(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.missing_data_distribution(),
                         {"exception": "single_alignment"})

    def test_single_aln_missing_genes_average(self):

        self.aln_obj.update_active_alignments([dna_data_fas[0]])

        self.assertEqual(self.aln_obj.missing_genes_average(),
                         {"exception": "single_alignment"})

    def test_no_data(self):

        self.aln_obj = AlignmentList([], sql_db=sql_db)

        self.assertEqual(self.aln_obj.gene_occupancy(),
                         {'exception': "empty_data"})

    def test_gene_occupancy(self):

        self.assertTrue(self.aln_obj.gene_occupancy())

    def test_missing_data_distribution(self):

        self.assertTrue(self.aln_obj.missing_data_distribution())

    def test_missing_data_per_species(self):

        self.assertTrue(self.aln_obj.missing_data_per_species())

    def test_missing_genes_per_species(self):

        self.assertTrue(self.aln_obj.missing_genes_per_species())

    def test_missing_genes_average(self):

        self.assertTrue(self.aln_obj.missing_genes_average())

    def test_average_seqsize_per_species(self):

        self.assertTrue(self.aln_obj.average_seqsize_per_species())

    def test_average_seqsize(self):

        self.assertTrue(self.aln_obj.average_seqsize())

    def test_characters_proportion(self):

        self.assertTrue(self.aln_obj.characters_proportion())

    def test_characters_proportion_per_species(self):

        self.assertTrue(self.aln_obj.characters_proportion_per_species())

    def test_characters_proportion_gene(self):

        self.assertTrue(self.aln_obj.characters_proportion_gene(
            join(data_path, "BaseConc1.fas"), 10
        ))

    def test_sequence_similarity(self):

        self.assertTrue(self.aln_obj.sequence_similarity())

    def test_sequence_similarity_per_species(self):

        self.assertTrue(self.aln_obj.sequence_similarity_per_species())

    def test_sequence_similarity_gene(self):

        self.assertTrue(self.aln_obj.sequence_similarity_gene(
            join(data_path, "BaseConc1.fas"), 10))

    def test_sequence_conservation(self):

        self.assertTrue(self.aln_obj.sequence_conservation_gnp(
            join(data_path, "BaseConc1.fas"), 10
        ))

    def test_sequence_segregation(self):

        self.assertTrue(self.aln_obj.sequence_segregation())

    def test_sequence_segregation_per_species(self):

        self.assertTrue(self.aln_obj.sequence_segregation_per_species())

    def test_sequence_segregation_gene(self):

        self.assertTrue(self.aln_obj.sequence_segregation_gene(
            join(data_path, "BaseConc1.fas"), 10))

    def test_length_polymorphism_correlation(self):

        self.assertTrue(self.aln_obj.length_polymorphism_correlation())

    def test_allele_frequency_spectrum(self):

        self.assertTrue(self.aln_obj.allele_frequency_spectrum())

    def test_allele_frequency_spectrum_gene(self):

        self.assertTrue(self.aln_obj.allele_frequency_spectrum_gene(
            join(data_path, "BaseConc1.fas"), None))

    def test_taxa_distribution(self):

        self.assertTrue(self.aln_obj.taxa_distribution())

    def test_cumulative_missing_genes(self):

        self.assertTrue(self.aln_obj.cumulative_missing_genes())

    def test_outlier_missing_data(self):

        self.assertTrue(self.aln_obj.outlier_missing_data())

    def test_outlier_missing_data_sp(self):

        self.assertTrue(self.aln_obj.outlier_missing_data_sp())

    def test_outlier_segregating(self):

        self.assertTrue(self.aln_obj.outlier_segregating())

    def test_outlier_segregating_sp(self):

        self.assertTrue(self.aln_obj.outlier_segregating_sp())

    def test_outlier_sequence_size(self):

        self.assertTrue(self.aln_obj.outlier_sequence_size())

    def test_outlier_sequence_size_sp(self):

        self.assertTrue(self.aln_obj.outlier_sequence_size_sp())
Пример #50
0
    def test_no_data(self):

        self.aln_obj = AlignmentList([], sql_db=sql_db)

        self.assertEqual(self.aln_obj.gene_occupancy(),
                         {'exception': "empty_data"})
    def test_load_phy(self):

        self.aln_obj = AlignmentList(dna_data_phy, sql_db=sql_db)
    def test_class_instance(self):

        self.aln_obj = AlignmentList([], sql_db=sql_db)
        self.assertIsInstance(self.aln_obj.alignments, OrderedDict)
    def test_mixed_type_load(self):

        self.aln_obj = AlignmentList(mixed_seq_type, sql_db=sql_db)

        self.assertEqual(["DNA", "Protein"],
                         sorted(self.aln_obj.sequence_code))
Пример #54
0
class AlignmentTaxaFilters(unittest.TestCase):
    def setUp(self):

        if not os.path.exists(temp_dir):
            os.makedirs(temp_dir)

        self.aln_obj = AlignmentList([], sql_db=sql_db)

    def tearDown(self):

        self.aln_obj.clear_alignments()
        self.aln_obj.con.close()
        shutil.rmtree(temp_dir)

    def test_filter_min_taxa(self):

        self.aln_obj.add_alignment_files(dna_data_fas)

        self.aln_obj.filter_min_taxa(50)

        self.assertEqual(len(self.aln_obj.alignments), 5)

    def test_filter_min_taxa_max(self):

        self.aln_obj.add_alignment_files(dna_data_fas)

        self.aln_obj.filter_min_taxa(100)

        self.assertEqual(len(self.aln_obj.alignments), 1)

    def test_filter_min_taxa_min(self):

        self.aln_obj.add_alignment_files(dna_data_fas)

        self.aln_obj.filter_min_taxa(0)

        self.assertEqual(len(self.aln_obj.alignments), 7)

    def test_filter_by_taxa_include(self):

        self.aln_obj.add_alignment_files(dna_data_fas)

        self.aln_obj.filter_by_taxa(["spa", "spb", "spc", "spd"], "Contain")

        self.assertEqual(len(self.aln_obj.alignments), 2)

    def test_filter_by_taxa_exclude(self):

        self.aln_obj.add_alignment_files(dna_data_fas)

        self.aln_obj.filter_by_taxa(["spa", "spb", "spc", "spd"], "Exclude")

        self.assertEqual(len(self.aln_obj.alignments), 5)

    def test_filter_by_taxa_all(self):

        self.aln_obj.add_alignment_files(dna_data_fas)

        self.aln_obj.filter_by_taxa(["no_taxa"], "Contain")

        self.assertEqual(len(self.aln_obj.alignments), 0)

    def test_filter_by_taxa_from_file(self):

        self.aln_obj.add_alignment_files(dna_data_fas)

        self.aln_obj.filter_by_taxa("trifusion/tests/data/filter_taxa.txt",
                                    "Contain")

        self.assertEqual(len(self.aln_obj.alignments), 2)
    def test_dna_load(self):

        self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db)

        self.assertEqual(["DNA"], self.aln_obj.sequence_code)
Пример #56
0
class AlignmentCodonFilters(unittest.TestCase):
    def setUp(self):

        if not os.path.exists(temp_dir):
            os.makedirs(temp_dir)

        self.aln_obj = AlignmentList([], sql_db=sql_db)

    def tearDown(self):

        self.aln_obj.clear_alignments()
        self.aln_obj.con.close()
        shutil.rmtree(temp_dir)

    def test_codon_filter_pos1(self):

        self.aln_obj.add_alignment_files(codon_filter)

        self.aln_obj.filter_codon_positions([True, False, False],
                                            table_out="master_out")

        s = []
        for _, seq, _ in self.aln_obj.iter_alignments("master_out"):
            s.append(seq)

        self.assertEqual(s, ["a" * 16] * 10)

    def test_codon_filter_pos2(self):

        self.aln_obj.add_alignment_files(codon_filter)

        self.aln_obj.filter_codon_positions([False, True, False],
                                            table_out="master_out")

        s = []
        for _, seq, _ in self.aln_obj.iter_alignments("master_out"):
            s.append(seq)

        self.assertEqual(s, ["t" * 16] * 10)

    def test_codon_filter_pos3(self):

        self.aln_obj.add_alignment_files(codon_filter)

        self.aln_obj.filter_codon_positions([False, False, True],
                                            table_out="master_out")

        s = []
        for _, seq, _ in self.aln_obj.iter_alignments("master_out"):
            s.append(seq)

        self.assertEqual(s, ["g" * 16] * 10)

    def test_codon_filter_pos12(self):

        self.aln_obj.add_alignment_files(codon_filter)

        self.aln_obj.filter_codon_positions([True, True, False],
                                            table_out="master_out")

        s = []
        for _, seq, _ in self.aln_obj.iter_alignments("master_out"):
            s.append(seq)

        self.assertEqual(s, ["at" * 16] * 10)

    def test_codon_filter_pos13(self):

        self.aln_obj.add_alignment_files(codon_filter)

        self.aln_obj.filter_codon_positions([True, False, True],
                                            table_out="master_out")

        s = []
        for _, seq, _ in self.aln_obj.iter_alignments("master_out"):
            s.append(seq)

        self.assertEqual(s, ["ag" * 16] * 10)

    def test_codon_filter_all(self):

        self.aln_obj.add_alignment_files(codon_filter)

        self.aln_obj.filter_codon_positions([True, True, True])

        s = []
        for _, seq, _ in self.aln_obj.iter_alignments():
            s.append(seq)

        self.assertEqual(s, ["atg" * 16] * 10)
Пример #57
0
class PartitonsTest(ExpectingTestCase):
    def setUp(self):

        if not os.path.exists(temp_dir):
            os.makedirs(temp_dir)

        self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db)
        self.aln_obj.partitions.reset(cur=self.aln_obj.cur, )

    def tearDown(self):

        self.aln_obj.clear_alignments()
        self.aln_obj.con.close()
        shutil.rmtree(temp_dir)

    def test_read_from_nexus(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0],
                                               no_aln_check=True)

        self.assertEqual(len(self.aln_obj.partitions.partitions), 7)

    def test_read_from_phylip(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_par[0],
                                               no_aln_check=True)

        self.assertEqual(len(self.aln_obj.partitions.partitions), 7)

    def test_bad_partitions_phy(self):

        e = self.aln_obj.partitions.read_from_file(partition_bad_phy[0],
                                                   no_aln_check=True)

        self.assertTrue(isinstance(e, InvalidPartitionFile))

    def test_unsorted_part_phylip(self):

        self.aln_obj.partitions.read_from_file(partition_unsorted_phy[0],
                                               no_aln_check=True)

        data = [
            self.aln_obj.partitions.partitions.keys(),
            self.aln_obj.partitions.counter
        ]

        self.assertEqual(data, [[
            "BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas",
            "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas"
        ], 595])

    def test_phylip_dot_notation(self):

        self.aln_obj.partitions.read_from_file(partition_dot_not[0],
                                               no_aln_check=True)

        data = [
            self.aln_obj.partitions.partitions.keys(),
            self.aln_obj.partitions.counter
        ]

        self.assertEqual(data, [[
            "BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas",
            "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas"
        ], 595])

    def test_nexus_dot_notation(self):

        self.aln_obj.partitions.read_from_file(dot_notation_nex[0],
                                               no_aln_check=True)

        data = [
            self.aln_obj.partitions.partitions.keys(),
            self.aln_obj.partitions.counter
        ]

        self.assertEqual(data, [[
            "BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas",
            "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas"
        ], 595])

    def test_bad_dot_notation(self):

        e = self.aln_obj.partitions.read_from_file(bad_dot_notation_nex[0],
                                                   no_aln_check=True)

        self.assertTrue(isinstance(e, InvalidPartitionFile))

    def test_import_new_partscheme(self):

        self.aln_obj.clear_alignments()
        self.aln_obj.con.close()

        self.aln_obj = AlignmentList(concatenated_medium_nexus, sql_db=sql_db)

        self.aln_obj.partitions.reset()

        self.aln_obj.partitions.read_from_file(concatenated_small_par[0],
                                               no_aln_check=True)

        res = self.aln_obj.partitions.get_partition_names()

        self.assertEqual(res, [
            "BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas",
            "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas"
        ])

    def test_add_duplicate_name(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_par[0],
                                               no_aln_check=True)

        self.assertRaises(
            PartitionException,
            self.aln_obj.partitions.add_partition("BaseCond1.fas", length=100))

    def test_get_partition_names(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_par[0],
                                               no_aln_check=True)

        res = self.aln_obj.partitions.get_partition_names()

        self.assertEqual(res, [
            "BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas",
            "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas"
        ])

    def test_get_partition_names_withCodon(self):

        self.aln_obj.partitions.read_from_file(
            concatenated_smallCodon_parNex[0], no_aln_check=True)

        res = self.aln_obj.partitions.get_partition_names()

        self.assertEqual(res, [
            "BaseConc1.fas_1_1", "BaseConc1.fas_1_2", "BaseConc1.fas_1_3",
            "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas", "BaseConc5.fas",
            "BaseConc6.fas", "BaseConc7.fas"
        ])

    def test_single_partition(self):

        self.aln_obj = AlignmentList([dna_data_fas[0]],
                                     db_con=self.aln_obj.con,
                                     db_cur=self.aln_obj.cur,
                                     sql_db=sql_db)

        self.assertTrue(self.aln_obj.partitions.is_single())

    def test_multiple_partitions(self):

        self.assertFalse(self.aln_obj.partitions.is_single())

    def test_remove_partition_from_file_original(self):

        self.aln_obj.clear_alignments()
        self.aln_obj.con.close()

        self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db)

        self.aln_obj.partitions.remove_partition(
            file_name="trifusion/tests/data/BaseConc3.fas")

        # Check if remaining partition ranges are continuous
        cont = True
        prev = 0
        for r in self.aln_obj.partitions.partitions.values():
            if r[0][0][0] == prev:
                prev = r[0][0][1] + 1
            else:
                cont = False

        self.expect_equal(cont, True)

    def test_remove_partition_from_name(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0],
                                               no_aln_check=True)
        self.aln_obj.partitions.remove_partition("BaseConc3.fas")

        # Check keys from _partitions, partitions_alignment and models
        key_data = [
            list(self.aln_obj.partitions.partitions.keys()),
            list(self.aln_obj.partitions.partitions_alignments.keys()),
            list(self.aln_obj.partitions.models.keys())
        ]

        self.expect_equal(key_data, [[
            "BaseConc1.fas", "BaseConc2.fas", "BaseConc4.fas", "BaseConc5.fas",
            "BaseConc6.fas", "BaseConc7.fas"
        ]] * 3)

        # Check if remaining partition ranges are continuous
        cont = True
        prev = 0
        for r in self.aln_obj.partitions.partitions.values():
            if r[0][0][0] == prev:
                prev = r[0][0][1] + 1
            else:
                cont = False

        self.expect_equal(cont, True)

    def test_remove_partition_from_file(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0],
                                               no_aln_check=True)
        self.aln_obj.partitions.remove_partition(file_name="BaseConc3.fas")

        # Check keys from _partitions, partitions_alignment and models
        key_data = [
            list(self.aln_obj.partitions.partitions.keys()),
            list(self.aln_obj.partitions.partitions_alignments.keys()),
            list(self.aln_obj.partitions.models.keys())
        ]

        self.expect_equal(key_data, [[
            "BaseConc1.fas", "BaseConc2.fas", "BaseConc4.fas", "BaseConc5.fas",
            "BaseConc6.fas", "BaseConc7.fas"
        ]] * 3)

        # Check if remaining partition ranges are continuous
        cont = True
        prev = 0
        for r in self.aln_obj.partitions.partitions.values():
            if r[0][0][0] == prev:
                prev = r[0][0][1] + 1
            else:
                cont = False

        self.expect_equal(cont, True)

    def test_change_name(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0],
                                               no_aln_check=True)

        self.aln_obj.partitions.change_name("BaseConc1.fas", "OtherName")

        key_data = [
            list(self.aln_obj.partitions.partitions.keys()),
            list(self.aln_obj.partitions.partitions_alignments.keys()),
            list(self.aln_obj.partitions.models.keys())
        ]

        self.expect_equal(key_data, [[
            "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas", "BaseConc5.fas",
            "BaseConc6.fas", "BaseConc7.fas", "OtherName"
        ]] * 3)

        # Check if remaining partition ranges are continuous
        cont = True
        prev = 0

        self.aln_obj.partitions.partitions = OrderedDict(
            sorted(self.aln_obj.partitions.partitions.iteritems(),
                   key=lambda x: x[1][0]))

        for r in self.aln_obj.partitions.partitions.values():
            if r[0][0][0] == prev:
                prev = r[0][0][1] + 1
            else:
                cont = False

        self.expect_equal(cont, True)

    def test_merge_partitions(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0],
                                               no_aln_check=True)

        self.aln_obj.partitions.merge_partitions([
            "BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas",
            "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas"
        ], "New_part")

        key_data = [
            list(self.aln_obj.partitions.partitions.keys()),
            list(self.aln_obj.partitions.partitions_alignments.keys()),
            list(self.aln_obj.partitions.models.keys())
        ]

        self.assertEqual(key_data, [["New_part"]] * 3)

    def test_split_partition(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0],
                                               no_aln_check=True)

        self.aln_obj.partitions.split_partition("BaseConc1.fas",
                                                [[0, 50], [51, 84]],
                                                ["part1", "part2"])

        key_data = [
            list(self.aln_obj.partitions.partitions.keys()),
            list(self.aln_obj.partitions.partitions_alignments.keys()),
            list(self.aln_obj.partitions.models.keys())
        ]

        self.expect_equal(key_data, [[
            "part1", "part2", "BaseConc2.fas", "BaseConc3.fas",
            "BaseConc4.fas", "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas"
        ]] * 3)

        # Check if remaining partition ranges are continuous
        cont = True
        prev = 0

        self.aln_obj.partitions.partitions = OrderedDict(
            sorted(self.aln_obj.partitions.partitions.iteritems(),
                   key=lambda x: x[1][0]))

        for r in self.aln_obj.partitions.partitions.values():
            print(r)
            if r[0][0][0] == prev:
                prev = r[0][0][1] + 1
            else:
                cont = False

        self.expect_equal(cont, True)

    def test_merge_and_split(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0],
                                               no_aln_check=True)

        self.aln_obj.partitions.merge_partitions(
            ["BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas"], "new_part")

        self.aln_obj.partitions.split_partition("new_part")

        key_data = [
            sorted(self.aln_obj.partitions.partitions.keys()),
            sorted(self.aln_obj.partitions.partitions_alignments.keys()),
            sorted(self.aln_obj.partitions.models.keys())
        ]

        self.expect_equal(key_data, [[
            "BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas",
            "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas"
        ]] * 3)

    def test_merge_and_custom_split1(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0],
                                               no_aln_check=True)

        self.aln_obj.partitions.merge_partitions(
            ["BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas"], "new_part")

        self.aln_obj.partitions.split_partition("new_part", [(0, 50),
                                                             (51, 254)],
                                                ["one", "two"])

        key_data = [
            self.aln_obj.partitions.partitions_alignments["one"],
            self.aln_obj.partitions.partitions_alignments["two"]
        ]

        self.assertEqual(key_data,
                         [['BaseConc1.fas'],
                          ['BaseConc1.fas', 'BaseConc3.fas', 'BaseConc2.fas']])

    def test_merge_and_custom_split2(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0],
                                               no_aln_check=True)

        self.aln_obj.partitions.merge_partitions(
            ["BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas"], "new_part")

        self.aln_obj.partitions.split_partition("new_part", [(0, 84),
                                                             (85, 254)],
                                                ["one", "two"])

        key_data = [
            self.aln_obj.partitions.partitions_alignments["one"],
            self.aln_obj.partitions.partitions_alignments["two"]
        ]

        self.assertEqual(
            key_data, [['BaseConc1.fas'], ['BaseConc3.fas', 'BaseConc2.fas']])

    def test_concat_custom_fileset_from_phy_partfile(self):

        self.aln_obj.clear_alignments()
        self.aln_obj.con.close()
        self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db)
        self.aln_obj.partitions.read_from_file(concatenated_small_par[0])

        self.aln_obj.update_active_alignments([
            join(data_path, "BaseConc1.fas"),
            join(data_path, "BaseConc2.fas")
        ])

        self.aln_obj.concatenate()

        key_data = [
            sorted(self.aln_obj.partitions.partitions.keys()),
            sorted(self.aln_obj.partitions.partitions_alignments.keys()),
            sorted(self.aln_obj.partitions.models.keys())
        ]

        self.expect_equal(key_data, [["BaseConc1.fas", "BaseConc2.fas"]] * 3)

    def test_concat_custom_fileset_from_phy_partfile(self):

        self.aln_obj.clear_alignments()
        self.aln_obj.con.close()
        self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db)
        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0])

        self.aln_obj.update_active_alignments([
            join(data_path, "BaseConc1.fas"),
            join(data_path, "BaseConc2.fas")
        ])

        self.aln_obj.concatenate()

        key_data = [
            sorted(self.aln_obj.partitions.partitions.keys()),
            sorted(self.aln_obj.partitions.partitions_alignments.keys()),
            sorted(self.aln_obj.partitions.models.keys())
        ]

        self.expect_equal(key_data, [["BaseConc1.fas", "BaseConc2.fas"]] * 3)

    def test_merge_with_custom_fileset(self):

        self.aln_obj.clear_alignments()
        self.aln_obj.con.close()
        self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db)
        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0])

        self.aln_obj.partitions.merge_partitions(
            ["BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas"], "new_part")

        self.aln_obj.update_active_alignments([
            join(data_path, "BaseConc1.fas"),
            join(data_path, "BaseConc5.fas")
        ])

        self.aln_obj.concatenate()

        key_data = [
            sorted(self.aln_obj.partitions.partitions.keys()),
            sorted(self.aln_obj.partitions.partitions_alignments.keys()),
            sorted(self.aln_obj.partitions.models.keys())
        ]

        self.expect_equal(key_data, [["BaseConc1.fas", "BaseConc5.fas"]] * 3)

    def test_model_detection(self):

        self.aln_obj.clear_alignments()
        self.aln_obj = AlignmentList(models_nexus_data,
                                     db_con=self.aln_obj.con,
                                     db_cur=self.aln_obj.cur,
                                     sql_db=sql_db)

        self.assertEqual(
            self.aln_obj.partitions.models,
            OrderedDict([
                ('Teste1.fas', [[['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                                [None], []]),
                ('Teste2.fas', [[['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                                [None], []]),
                ('Teste3.fas', [[['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                                [None], []]),
                ('Teste4.fas', [[['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                                [None], []]),
                ('Teste5.fas', [[['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                                [None], []]),
                ('Teste6.fas', [[['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                                [None], []]),
                ('Teste7.fas', [[['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                                [None], []])
            ]))

    def test_model_detection_codons(self):

        self.aln_obj.clear_alignments()
        self.aln_obj = AlignmentList(models_codon_nexus_data,
                                     db_cur=self.aln_obj.cur,
                                     db_con=self.aln_obj.con,
                                     sql_db=sql_db)

        self.assertEqual(
            self.aln_obj.partitions.models,
            OrderedDict([('Teste1.fas_1',
                          [[['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'],
                            ['nst=6', 'statefreqpr=fixed(equal)'],
                            ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                           [None, None, None], []]),
                         ('Teste2.fas_86',
                          [[['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'],
                            ['nst=6', 'statefreqpr=fixed(equal)'],
                            ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                           [None, None, None], []]),
                         ('Teste3.fas_171',
                          [[['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'],
                            ['nst=6', 'statefreqpr=fixed(equal)'],
                            ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                           [None, None, None], []]),
                         ('Teste4.fas_256',
                          [[['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'],
                            ['nst=6', 'statefreqpr=fixed(equal)'],
                            ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                           [None, None, None], []]),
                         ('Teste5.fas_341',
                          [[['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'],
                            ['nst=6', 'statefreqpr=fixed(equal)'],
                            ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                           [None, None, None], []]),
                         ('Teste6.fas_426',
                          [[['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'],
                            ['nst=6', 'statefreqpr=fixed(equal)'],
                            ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                           [None, None, None], []]),
                         ('Teste7.fas_511',
                          [[['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'],
                            ['nst=6', 'statefreqpr=fixed(equal)'],
                            ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                           [None, None, None], []])]))

    def test_set_model(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0],
                                               no_aln_check=True)

        self.aln_obj.partitions.set_model("BaseConc1.fas", ["GTR"])

        self.assertEqual(
            self.aln_obj.partitions.models,
            OrderedDict([('BaseConc1.fas', [[[]], ['GTR'], []]),
                         ('BaseConc2.fas', [[[]], [None], []]),
                         ('BaseConc3.fas', [[[]], [None], []]),
                         ('BaseConc4.fas', [[[]], [None], []]),
                         ('BaseConc5.fas', [[[]], [None], []]),
                         ('BaseConc6.fas', [[[]], [None], []]),
                         ('BaseConc7.fas', [[[]], [None], []])]))

    def test_set_model_all(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0],
                                               no_aln_check=True)

        self.aln_obj.partitions.set_model("BaseConc1.fas", ["GTR"],
                                          apply_all=True)

        self.assertEqual(
            self.aln_obj.partitions.models,
            OrderedDict([('BaseConc1.fas', [[[]], ['GTR'], []]),
                         ('BaseConc2.fas', [[[]], ['GTR'], []]),
                         ('BaseConc3.fas', [[[]], ['GTR'], []]),
                         ('BaseConc4.fas', [[[]], ['GTR'], []]),
                         ('BaseConc5.fas', [[[]], ['GTR'], []]),
                         ('BaseConc6.fas', [[[]], ['GTR'], []]),
                         ('BaseConc7.fas', [[[]], ['GTR'], []])]))

    def test_set_model_codon(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0],
                                               no_aln_check=True)

        self.aln_obj.partitions.set_model("BaseConc1.fas", ["GTR", "SYM"],
                                          links=["12", "3"],
                                          apply_all=True)

        self.assertEqual(
            self.aln_obj.partitions.models,
            OrderedDict([
                ('BaseConc1.fas', [[[]], ['GTR', 'SYM'], ['12', '3']]),
                ('BaseConc2.fas', [[[]], ['GTR', 'SYM'], ['12', '3']]),
                ('BaseConc3.fas', [[[]], ['GTR', 'SYM'], ['12', '3']]),
                ('BaseConc4.fas', [[[]], ['GTR', 'SYM'], ['12', '3']]),
                ('BaseConc5.fas', [[[]], ['GTR', 'SYM'], ['12', '3']]),
                ('BaseConc6.fas', [[[]], ['GTR', 'SYM'], ['12', '3']]),
                ('BaseConc7.fas', [[[]], ['GTR', 'SYM'], ['12', '3']])
            ]))
Пример #58
0
class AlignmentMissingFiltersTest(unittest.TestCase):
    def setUp(self):

        if not os.path.exists(temp_dir):
            os.makedirs(temp_dir)

        self.aln_obj = AlignmentList([], sql_db=sql_db)

    def tearDown(self):

        self.aln_obj.clear_alignments()
        self.aln_obj.con.close()
        shutil.rmtree(temp_dir)

    def test_filter_default(self):

        self.aln_obj.add_alignment_files([
            "trifusion/tests/data/missing_data.phy",
            "trifusion/tests/data/missing_data2.phy"
        ])
        self.aln_obj.filter_missing_data(25, 50)

        s = []
        for aln in self.aln_obj:
            s.append(aln.locus_length)

        self.assertEqual(s, [42, 43])

    def test_filter_and_concat(self):

        self.aln_obj.add_alignment_files([
            "trifusion/tests/data/missing_data.phy",
            "trifusion/tests/data/missing_data2.phy"
        ])

        self.aln_obj.filter_missing_data(25, 50, table_out="master_out")

        self.aln_obj.concatenate(table_in="master_out")

        self.assertEqual(self.aln_obj.size, 85)

    def test_no_filters(self):

        self.aln_obj.add_alignment_files([
            "trifusion/tests/data/missing_data.phy",
            "trifusion/tests/data/missing_data2.phy"
        ])

        self.aln_obj.filter_missing_data(100, 100)

        s = []
        for aln in self.aln_obj:
            s.append(aln.locus_length)

        self.assertEqual(s, [50, 50])

    def test_no_missing(self):

        self.aln_obj.add_alignment_files([
            "trifusion/tests/data/missing_data.phy",
            "trifusion/tests/data/missing_data2.phy"
        ])
        self.aln_obj.filter_missing_data(0, 0)

        s = []
        for aln in self.aln_obj:
            s.append(aln.locus_length)

        self.assertEqual(s, [0, 19])

    def test_no_data_aln_default_filters(self):

        self.aln_obj.add_alignment_files(
            ["trifusion/tests/data/missing_data3.phy"])

        self.aln_obj.filter_missing_data(25, 50)

        s = None
        for aln in self.aln_obj:
            s = aln.locus_length

        self.assertEqual(s, 0)

    def test_no_data_aln_no_filters(self):

        self.aln_obj.add_alignment_files(
            ["trifusion/tests/data/missing_data3.phy"])

        self.aln_obj.filter_missing_data(100, 100)

        s = None
        for aln in self.aln_obj:
            s = aln.locus_length

        self.assertEqual(s, 50)
Пример #59
0
class PartitonsTest(ExpectingTestCase):

    def setUp(self):

        if not os.path.exists(temp_dir):
            os.makedirs(temp_dir)

        self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db)
        self.aln_obj.partitions.reset(cur=self.aln_obj.cur,)

    def tearDown(self):

        self.aln_obj.clear_alignments()
        self.aln_obj.con.close()
        shutil.rmtree(temp_dir)

    def test_read_from_nexus(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0],
                                               no_aln_check=True)

        self.assertEqual(len(self.aln_obj.partitions.partitions), 7)

    def test_read_from_phylip(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_par[0],
                                               no_aln_check=True)

        self.assertEqual(len(self.aln_obj.partitions.partitions), 7)

    def test_bad_partitions_phy(self):

        e = self.aln_obj.partitions.read_from_file(partition_bad_phy[0],
                                                   no_aln_check=True)

        self.assertTrue(isinstance(e, InvalidPartitionFile))

    def test_unsorted_part_phylip(self):

        self.aln_obj.partitions.read_from_file(partition_unsorted_phy[0],
                                               no_aln_check=True)

        data = [self.aln_obj.partitions.partitions.keys(),
                self.aln_obj.partitions.counter]

        self.assertEqual(data, [["BaseConc1.fas", "BaseConc2.fas",
                                 "BaseConc3.fas", "BaseConc4.fas",
                                 "BaseConc5.fas", "BaseConc6.fas",
                                 "BaseConc7.fas"],
                                595])

    def test_phylip_dot_notation(self):

        self.aln_obj.partitions.read_from_file(partition_dot_not[0],
                                               no_aln_check=True)

        data = [self.aln_obj.partitions.partitions.keys(),
                self.aln_obj.partitions.counter]

        self.assertEqual(data, [["BaseConc1.fas", "BaseConc2.fas",
                                 "BaseConc3.fas", "BaseConc4.fas",
                                 "BaseConc5.fas", "BaseConc6.fas",
                                 "BaseConc7.fas"],
                                595])

    def test_nexus_dot_notation(self):

        self.aln_obj.partitions.read_from_file(dot_notation_nex[0],
                                               no_aln_check=True)

        data = [self.aln_obj.partitions.partitions.keys(),
                self.aln_obj.partitions.counter]

        self.assertEqual(data, [["BaseConc1.fas", "BaseConc2.fas",
                                 "BaseConc3.fas", "BaseConc4.fas",
                                 "BaseConc5.fas", "BaseConc6.fas",
                                 "BaseConc7.fas"],
                                595])

    def test_import_new_partscheme(self):

        self.aln_obj.clear_alignments()
        self.aln_obj.con.close()

        self.aln_obj = AlignmentList(concatenated_medium_nexus,
                                     sql_db=sql_db)

        self.aln_obj.partitions.reset()

        self.aln_obj.partitions.read_from_file(concatenated_small_par[0],
                                               no_aln_check=True)

        res = self.aln_obj.partitions.get_partition_names()

        self.assertEqual(res, ["BaseConc1.fas", "BaseConc2.fas",
                               "BaseConc3.fas", "BaseConc4.fas",
                               "BaseConc5.fas", "BaseConc6.fas",
                               "BaseConc7.fas"])

    def test_add_duplicate_name(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_par[0],
                                               no_aln_check=True)

        self.assertRaises(PartitionException,
                          self.aln_obj.partitions.add_partition(
                              "BaseCond1.fas", length=100))

    def test_get_partition_names(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_par[0],
                                               no_aln_check=True)

        res = self.aln_obj.partitions.get_partition_names()

        self.assertEqual(res, ["BaseConc1.fas", "BaseConc2.fas",
                               "BaseConc3.fas", "BaseConc4.fas",
                               "BaseConc5.fas", "BaseConc6.fas",
                               "BaseConc7.fas"])

    def test_get_partition_names_withCodon(self):

        self.aln_obj.partitions.read_from_file(
            concatenated_smallCodon_parNex[0], no_aln_check=True)

        res = self.aln_obj.partitions.get_partition_names()

        self.assertEqual(res, ["BaseConc1.fas_1_1", "BaseConc1.fas_1_2",
                               "BaseConc1.fas_1_3", "BaseConc2.fas",
                               "BaseConc3.fas", "BaseConc4.fas",
                               "BaseConc5.fas", "BaseConc6.fas",
                               "BaseConc7.fas"])

    def test_single_partition(self):

        self.aln_obj = AlignmentList([dna_data_fas[0]],
                                     db_con=self.aln_obj.con,
                                     db_cur=self.aln_obj.cur,
                                     sql_db=sql_db)

        self.assertTrue(self.aln_obj.partitions.is_single())

    def test_multiple_partitions(self):

        self.assertFalse(self.aln_obj.partitions.is_single())

    def test_remove_partition_from_file_original(self):

        self.aln_obj.clear_alignments()
        self.aln_obj.con.close()

        self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db)

        self.aln_obj.partitions.remove_partition(
            file_name="trifusion/tests/data/BaseConc3.fas")

        # Check if remaining partition ranges are continuous
        cont = True
        prev = 0
        for r in self.aln_obj.partitions.partitions.values():
            if r[0][0][0] == prev:
                prev = r[0][0][1] + 1
            else:
                cont = False

        self.expect_equal(cont, True)

    def test_remove_partition_from_name(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0],
                                               no_aln_check=True)
        self.aln_obj.partitions.remove_partition("BaseConc3.fas")

        # Check keys from _partitions, partitions_alignment and models
        key_data = [list(self.aln_obj.partitions.partitions.keys()),
                    list(self.aln_obj.partitions.partitions_alignments.keys()),
                    list(self.aln_obj.partitions.models.keys())]

        self.expect_equal(key_data,
                          [["BaseConc1.fas", "BaseConc2.fas",
                           "BaseConc4.fas",
                           "BaseConc5.fas", "BaseConc6.fas",
                           "BaseConc7.fas"]] * 3)

        # Check if remaining partition ranges are continuous
        cont = True
        prev = 0
        for r in self.aln_obj.partitions.partitions.values():
            if r[0][0][0] == prev:
                prev = r[0][0][1] + 1
            else:
                cont = False

        self.expect_equal(cont, True)

    def test_remove_partition_from_file(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0],
                                               no_aln_check=True)
        self.aln_obj.partitions.remove_partition(file_name="BaseConc3.fas")

        # Check keys from _partitions, partitions_alignment and models
        key_data = [list(self.aln_obj.partitions.partitions.keys()),
                    list(self.aln_obj.partitions.partitions_alignments.keys()),
                    list(self.aln_obj.partitions.models.keys())]

        self.expect_equal(key_data,
                          [["BaseConc1.fas", "BaseConc2.fas",
                            "BaseConc4.fas",
                            "BaseConc5.fas", "BaseConc6.fas",
                            "BaseConc7.fas"]] * 3)

        # Check if remaining partition ranges are continuous
        cont = True
        prev = 0
        for r in self.aln_obj.partitions.partitions.values():
            if r[0][0][0] == prev:
                prev = r[0][0][1] + 1
            else:
                cont = False

        self.expect_equal(cont, True)

    def test_change_name(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0],
                                               no_aln_check=True)

        self.aln_obj.partitions.change_name("BaseConc1.fas", "OtherName")

        key_data = [list(self.aln_obj.partitions.partitions.keys()),
                    list(self.aln_obj.partitions.partitions_alignments.keys()),
                    list(self.aln_obj.partitions.models.keys())]

        self.expect_equal(key_data,
                          [["BaseConc2.fas",
                           "BaseConc3.fas", "BaseConc4.fas",
                           "BaseConc5.fas", "BaseConc6.fas",
                           "BaseConc7.fas", "OtherName"]] * 3)

        # Check if remaining partition ranges are continuous
        cont = True
        prev = 0

        self.aln_obj.partitions.partitions = OrderedDict(sorted(
            self.aln_obj.partitions.partitions.iteritems(),
            key=lambda x: x[1][0]
        ))

        for r in self.aln_obj.partitions.partitions.values():
            if r[0][0][0] == prev:
                prev = r[0][0][1] + 1
            else:
                cont = False

        self.expect_equal(cont, True)

    def test_merge_partitions(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0],
                                               no_aln_check=True)

        self.aln_obj.partitions.merge_partitions(
            ["BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas", "BaseConc4.fas",
             "BaseConc5.fas", "BaseConc6.fas", "BaseConc7.fas"], "New_part")

        key_data = [list(self.aln_obj.partitions.partitions.keys()),
                    list(self.aln_obj.partitions.partitions_alignments.keys()),
                    list(self.aln_obj.partitions.models.keys())]

        self.assertEqual(key_data, [["New_part"]] * 3)

    def test_split_partition(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0],
                                               no_aln_check=True)

        self.aln_obj.partitions.split_partition("BaseConc1.fas",
                                                [[0, 50], [51, 84]],
                                                ["part1", "part2"])

        key_data = [list(self.aln_obj.partitions.partitions.keys()),
                    list(self.aln_obj.partitions.partitions_alignments.keys()),
                    list(self.aln_obj.partitions.models.keys())]

        self.expect_equal(key_data,
                          [["part1", "part2", "BaseConc2.fas",
                            "BaseConc3.fas", "BaseConc4.fas",
                            "BaseConc5.fas", "BaseConc6.fas",
                            "BaseConc7.fas"]] * 3)

        # Check if remaining partition ranges are continuous
        cont = True
        prev = 0

        self.aln_obj.partitions.partitions = OrderedDict(sorted(
            self.aln_obj.partitions.partitions.iteritems(),
            key=lambda x: x[1][0]
        ))

        for r in self.aln_obj.partitions.partitions.values():
            print(r)
            if r[0][0][0] == prev:
                prev = r[0][0][1] + 1
            else:
                cont = False

        self.expect_equal(cont, True)

    def test_merge_and_split(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0],
                                               no_aln_check=True)

        self.aln_obj.partitions.merge_partitions(
            ["BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas"], "new_part")

        self.aln_obj.partitions.split_partition("new_part")

        key_data = [sorted(self.aln_obj.partitions.partitions.keys()),
                    sorted(self.aln_obj.partitions.partitions_alignments.keys()),
                    sorted(self.aln_obj.partitions.models.keys())]

        self.expect_equal(key_data, [["BaseConc1.fas", "BaseConc2.fas",
                               "BaseConc3.fas", "BaseConc4.fas",
                               "BaseConc5.fas", "BaseConc6.fas",
                               "BaseConc7.fas"]] * 3)

    def test_merge_and_custom_split1(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0],
                                               no_aln_check=True)

        self.aln_obj.partitions.merge_partitions(
            ["BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas"], "new_part")

        self.aln_obj.partitions.split_partition("new_part",
                                                [(0, 50), (51, 254)],
                                                ["one", "two"])

        key_data = [self.aln_obj.partitions.partitions_alignments["one"],
                    self.aln_obj.partitions.partitions_alignments["two"]]

        self.assertEqual(key_data,
                         [['BaseConc1.fas'],
                          ['BaseConc1.fas', 'BaseConc3.fas', 'BaseConc2.fas']])

    def test_merge_and_custom_split2(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0],
                                               no_aln_check=True)

        self.aln_obj.partitions.merge_partitions(
            ["BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas"], "new_part")

        self.aln_obj.partitions.split_partition("new_part",
                                                [(0, 84), (85, 254)],
                                                ["one", "two"])

        key_data = [self.aln_obj.partitions.partitions_alignments["one"],
                    self.aln_obj.partitions.partitions_alignments["two"]]

        self.assertEqual(key_data,
                         [['BaseConc1.fas'],
                          ['BaseConc3.fas', 'BaseConc2.fas']])

    def test_concat_custom_fileset_from_phy_partfile(self):

        self.aln_obj.clear_alignments()
        self.aln_obj.con.close()
        self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db)
        self.aln_obj.partitions.read_from_file(concatenated_small_par[0])

        self.aln_obj.update_active_alignments(
            [join(data_path, "BaseConc1.fas"),
             join(data_path, "BaseConc2.fas")])

        self.aln_obj.concatenate()

        key_data = [
            sorted(self.aln_obj.partitions.partitions.keys()),
            sorted(self.aln_obj.partitions.partitions_alignments.keys()),
            sorted(self.aln_obj.partitions.models.keys())]

        self.expect_equal(key_data, [["BaseConc1.fas", "BaseConc2.fas"]] * 3)

    def test_concat_custom_fileset_from_phy_partfile(self):

        self.aln_obj.clear_alignments()
        self.aln_obj.con.close()
        self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db)
        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0])

        self.aln_obj.update_active_alignments(
            [join(data_path, "BaseConc1.fas"),
             join(data_path, "BaseConc2.fas")])

        self.aln_obj.concatenate()

        key_data = [
            sorted(self.aln_obj.partitions.partitions.keys()),
            sorted(self.aln_obj.partitions.partitions_alignments.keys()),
            sorted(self.aln_obj.partitions.models.keys())]

        self.expect_equal(key_data, [["BaseConc1.fas", "BaseConc2.fas"]] * 3)

    def test_merge_with_custom_fileset(self):

        self.aln_obj.clear_alignments()
        self.aln_obj.con.close()
        self.aln_obj = AlignmentList(dna_data_fas, sql_db=sql_db)
        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0])

        self.aln_obj.partitions.merge_partitions(
            ["BaseConc1.fas", "BaseConc2.fas", "BaseConc3.fas"], "new_part")

        self.aln_obj.update_active_alignments(
            [join(data_path, "BaseConc1.fas"),
             join(data_path, "BaseConc5.fas")])

        self.aln_obj.concatenate()

        key_data = [
            sorted(self.aln_obj.partitions.partitions.keys()),
            sorted(self.aln_obj.partitions.partitions_alignments.keys()),
            sorted(self.aln_obj.partitions.models.keys())]

        self.expect_equal(key_data, [["BaseConc1.fas", "BaseConc5.fas"]] * 3)

    def test_model_detection(self):

        self.aln_obj.clear_alignments()
        self.aln_obj = AlignmentList(models_nexus_data,
                                     db_con=self.aln_obj.con,
                                     db_cur=self.aln_obj.cur,
                                     sql_db=sql_db)

        self.assertEqual(self.aln_obj.partitions.models,
                         OrderedDict([('Teste1.fas', [
                             [['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                             [None], []]), ('Teste2.fas', [
                             [['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                             [None], []]), ('Teste3.fas', [
                             [['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                             [None], []]), ('Teste4.fas', [
                             [['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                             [None], []]), ('Teste5.fas', [
                             [['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                             [None], []]), ('Teste6.fas', [
                             [['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                             [None], []]), ('Teste7.fas', [
                             [['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                             [None], []])]))

    def test_model_detection_codons(self):

        self.aln_obj.clear_alignments()
        self.aln_obj = AlignmentList(models_codon_nexus_data,
                                     db_cur=self.aln_obj.cur,
                                     db_con=self.aln_obj.con,
                                     sql_db=sql_db)

        self.assertEqual(self.aln_obj.partitions.models,
                         OrderedDict([('Teste1.fas_1', [
                             [['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'],
                              ['nst=6', 'statefreqpr=fixed(equal)'],
                              ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                             [None, None, None], []]), ('Teste2.fas_86', [
                             [['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'],
                              ['nst=6', 'statefreqpr=fixed(equal)'],
                              ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                             [None, None, None], []]), ('Teste3.fas_171', [
                             [['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'],
                              ['nst=6', 'statefreqpr=fixed(equal)'],
                              ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                             [None, None, None], []]), ('Teste4.fas_256', [
                             [['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'],
                              ['nst=6', 'statefreqpr=fixed(equal)'],
                              ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                             [None, None, None], []]), ('Teste5.fas_341', [
                             [['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'],
                              ['nst=6', 'statefreqpr=fixed(equal)'],
                              ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                             [None, None, None], []]), ('Teste6.fas_426', [
                             [['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'],
                              ['nst=6', 'statefreqpr=fixed(equal)'],
                              ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                             [None, None, None], []]), ('Teste7.fas_511', [
                             [['nst=2', 'statefreqpr=dirichlet(1,1,1,1)'],
                              ['nst=6', 'statefreqpr=fixed(equal)'],
                              ['nst=6', 'statefreqpr=dirichlet(1,1,1,1)']],
                             [None, None, None], []])]))

    def test_set_model(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0],
                                               no_aln_check=True)

        self.aln_obj.partitions.set_model("BaseConc1.fas", ["GTR"])

        self.assertEqual(self.aln_obj.partitions.models,
                         OrderedDict([('BaseConc1.fas', [[[]], ['GTR'], []]),
                                      ('BaseConc2.fas', [[[]], [None], []]),
                                      ('BaseConc3.fas', [[[]], [None], []]),
                                      ('BaseConc4.fas', [[[]], [None], []]),
                                      ('BaseConc5.fas', [[[]], [None], []]),
                                      ('BaseConc6.fas', [[[]], [None], []]),
                                      ('BaseConc7.fas', [[[]], [None], []])])
                         )

    def test_set_model_all(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0],
                                               no_aln_check=True)

        self.aln_obj.partitions.set_model("BaseConc1.fas", ["GTR"],
                                          apply_all=True)

        self.assertEqual(self.aln_obj.partitions.models,
                         OrderedDict([('BaseConc1.fas', [[[]], ['GTR'], []]),
                                      ('BaseConc2.fas', [[[]], ['GTR'], []]),
                                      ('BaseConc3.fas', [[[]], ['GTR'], []]),
                                      ('BaseConc4.fas', [[[]], ['GTR'], []]),
                                      ('BaseConc5.fas', [[[]], ['GTR'], []]),
                                      ('BaseConc6.fas', [[[]], ['GTR'], []]),
                                      ('BaseConc7.fas', [[[]], ['GTR'], []])])
                         )

    def test_set_model_codon(self):

        self.aln_obj.partitions.read_from_file(concatenated_small_parNex[0],
                                               no_aln_check=True)

        self.aln_obj.partitions.set_model("BaseConc1.fas", ["GTR", "SYM"],
                                          links=["12", "3"],
                                          apply_all=True)

        self.assertEqual(self.aln_obj.partitions.models,
                         OrderedDict([('BaseConc1.fas',
                                       [[[]], ['GTR', 'SYM'], ['12', '3']]), (
                                      'BaseConc2.fas',
                                      [[[]], ['GTR', 'SYM'], ['12', '3']]), (
                                      'BaseConc3.fas',
                                      [[[]], ['GTR', 'SYM'], ['12', '3']]), (
                                      'BaseConc4.fas',
                                      [[[]], ['GTR', 'SYM'], ['12', '3']]), (
                                      'BaseConc5.fas',
                                      [[[]], ['GTR', 'SYM'], ['12', '3']]), (
                                      'BaseConc6.fas',
                                      [[[]], ['GTR', 'SYM'], ['12', '3']]), (
                                      'BaseConc7.fas',
                                      [[[]], ['GTR', 'SYM'], ['12', '3']])]))
Пример #60
0
class AlignmentVariationFilters(unittest.TestCase):
    def setUp(self):

        if not os.path.exists(temp_dir):
            os.makedirs(temp_dir)

        self.aln_obj = AlignmentList([], sql_db=sql_db)

    def tearDown(self):

        self.aln_obj.clear_alignments()
        self.aln_obj.con.close()
        shutil.rmtree(temp_dir)

    def test_variation_filter_min(self):

        self.aln_obj.add_alignment_files(variable_data)

        self.aln_obj.filter_segregating_sites(None, None)

        self.assertEqual(len(self.aln_obj.alignments), 3)

    def test_variation_var_sites(self):

        self.aln_obj.add_alignment_files(variable_data)

        self.aln_obj.filter_segregating_sites(1, 2)

        self.assertEqual(len(self.aln_obj.alignments), 0)

    def test_variation_var_sites2(self):
        self.aln_obj.add_alignment_files(variable_data)

        self.aln_obj.filter_segregating_sites(1, 3)

        self.assertEqual(len(self.aln_obj.alignments), 1)

    def test_variation_inf_min(self):
        self.aln_obj.add_alignment_files(variable_data)

        self.aln_obj.filter_informative_sites(None, None)

        self.assertEqual(len(self.aln_obj.alignments), 3)

    def test_variation_inf_sites(self):

        self.aln_obj.add_alignment_files(variable_data)

        self.aln_obj.filter_informative_sites(1, 4)

        self.assertEqual(len(self.aln_obj.alignments), 1)

    def test_variation_inf_sites2(self):

        self.aln_obj.add_alignment_files(variable_data)

        self.aln_obj.filter_informative_sites(1, 1)

        print(self.aln_obj.alignments)

        self.assertEqual(len(self.aln_obj.alignments), 1)