Пример #1
0
    def test_validate_lengths(self):
        """
        """
        self.assertTrue(self.a1._validate_lengths())
        self.assertTrue(self.a2._validate_lengths())
        self.assertTrue(self.empty._validate_lengths())

        self.assertTrue(
            Alignment([DNASequence('TTT', id="d1")])._validate_lengths())
        self.assertFalse(
            Alignment(
                [DNASequence('TTT', id="d1"),
                 DNASequence('TT', id="d2")])._validate_lengths())
Пример #2
0
    def test_subalignment(self):
        """subalignment functions as expected
        """
        # keep seqs by ids
        actual = self.a1.subalignment(seqs_to_keep=['d1', 'd3'])
        expected = Alignment([self.d1, self.d3])
        self.assertEqual(actual, expected)

        # keep seqs by indices
        actual = self.a1.subalignment(seqs_to_keep=[0, 2])
        expected = Alignment([self.d1, self.d3])
        self.assertEqual(actual, expected)

        # keep seqs by ids (invert)
        actual = self.a1.subalignment(seqs_to_keep=['d1', 'd3'],
                                      invert_seqs_to_keep=True)
        expected = Alignment([self.d2])
        self.assertEqual(actual, expected)

        # keep seqs by indices (invert)
        actual = self.a1.subalignment(seqs_to_keep=[0, 2],
                                      invert_seqs_to_keep=True)
        expected = Alignment([self.d2])
        self.assertEqual(actual, expected)

        # keep positions
        actual = self.a1.subalignment(positions_to_keep=[0, 2, 3])
        d1 = DNASequence('.AC', id="d1")
        d2 = DNASequence('TAC', id="d2")
        d3 = DNASequence('.AC', id="d3")
        expected = Alignment([d1, d2, d3])
        self.assertEqual(actual, expected)

        # keep positions (invert)
        actual = self.a1.subalignment(positions_to_keep=[0, 2, 3],
                                      invert_positions_to_keep=True)
        d1 = DNASequence('.C-GTTGG..', id="d1")
        d2 = DNASequence('TCGGT-GGCC', id="d2")
        d3 = DNASequence('-C-GTTGC--', id="d3")
        expected = Alignment([d1, d2, d3])
        self.assertEqual(actual, expected)

        # keep seqs and positions
        actual = self.a1.subalignment(seqs_to_keep=[0, 2],
                                      positions_to_keep=[0, 2, 3])
        d1 = DNASequence('.AC', id="d1")
        d3 = DNASequence('.AC', id="d3")
        expected = Alignment([d1, d3])
        self.assertEqual(actual, expected)

        # keep seqs and positions (invert)
        actual = self.a1.subalignment(seqs_to_keep=[0, 2],
                                      positions_to_keep=[0, 2, 3],
                                      invert_seqs_to_keep=True,
                                      invert_positions_to_keep=True)
        d2 = DNASequence('TCGGT-GGCC', id="d2")
        expected = Alignment([d2])
        self.assertEqual(actual, expected)
Пример #3
0
 def setUp(self):
     """Setup for Fasta tests."""
     self.strings = ['AAAA', 'CCCC', 'gggg', 'uuuu']
     self.fasta_no_label = '>0\nAAAA\n>1\nCCCC\n>2\ngggg\n>3\nuuuu'
     self.fasta_with_label =\
         '>1st\nAAAA\n>2nd\nCCCC\n>3rd\nGGGG\n>4th\nUUUU'
     self.fasta_with_label_lw2 =\
         '>1st\nAA\nAA\n>2nd\nCC\nCC\n>3rd\nGG\nGG\n>4th\nUU\nUU'
     self.alignment_dict = {
         '1st': 'AAAA',
         '2nd': 'CCCC',
         '3rd': 'GGGG',
         '4th': 'UUUU'
     }
     self.sequence_objects_a = [
         DNASequence('ACTCGAGATC', 'seq1'),
         DNASequence('GGCCT', 'seq2')
     ]
     self.sequence_objects_b = [
         BiologicalSequence('ACTCGAGATC', 'seq1'),
         BiologicalSequence('GGCCT', 'seq2')
     ]
     seqs = [
         DNASequence("ACC--G-GGTA..", id="seq1"),
         DNASequence("TCC--G-GGCA..", id="seqs2")
     ]
     self.alignment = Alignment(seqs)
Пример #4
0
    def test_majority_consensus(self):
        """majority_consensus functions as expected
        """
        d1 = DNASequence('TTT', id="d1")
        d2 = DNASequence('TT-', id="d2")
        d3 = DNASequence('TC-', id="d3")
        a1 = Alignment([d1, d2, d3])
        self.assertEqual(a1.majority_consensus(), DNASequence('TT-'))

        d1 = DNASequence('T', id="d1")
        d2 = DNASequence('A', id="d2")
        a1 = Alignment([d1, d2])
        self.assertTrue(a1.majority_consensus() in
                        [DNASequence('T'), DNASequence('A')])

        self.assertEqual(self.empty.majority_consensus(), '')
Пример #5
0
    def test_is_valid(self):
        """is_valid functions as expected
        """
        self.assertTrue(self.a1.is_valid())
        self.assertTrue(self.a2.is_valid())
        self.assertTrue(self.empty.is_valid())

        # invalid because of length mismatch
        d1 = DNASequence('..ACC-GTTGG..', id="d1")
        d2 = DNASequence('TTACCGGT-GGC', id="d2")
        self.assertFalse(Alignment([d1, d2]).is_valid())

        # invalid because of invalid charaters
        d1 = DNASequence('..ACC-GTXGG..', id="d1")
        d2 = DNASequence('TTACCGGT-GGCC', id="d2")
        self.assertFalse(Alignment([d1, d2]).is_valid())
Пример #6
0
    def __call__(self,
                 seq_path,
                 result_path=None,
                 log_path=None,
                 failure_path=None):
        # load candidate sequences
        seq_file = open(seq_path, 'U')
        candidate_sequences = parse_fasta(seq_file)

        # load template sequences
        template_alignment = []
        template_alignment_fp = self.Params['template_filepath']
        for seq_id, seq in parse_fasta(open(template_alignment_fp)):
            # replace '.' characters with '-' characters
            template_alignment.append((seq_id, seq.replace('.', '-').upper()))
        template_alignment = Alignment.from_fasta_records(template_alignment,
                                                          DNASequence,
                                                          validate=True)

        # initialize_logger
        logger = NastLogger(log_path)

        # get function for pairwise alignment method
        pairwise_alignment_f = pairwise_alignment_methods[
            self.Params['pairwise_alignment_method']]

        pynast_aligned, pynast_failed = pynast_seqs(
            candidate_sequences,
            template_alignment,
            min_pct=self.Params['min_pct'],
            min_len=self.Params['min_len'],
            align_unaligned_seqs_f=pairwise_alignment_f,
            logger=logger,
            temp_dir=get_qiime_temp_dir())

        logger.record(str(self))

        for i, seq in enumerate(pynast_failed):
            skb_seq = DNASequence(str(seq), id=seq.Name)
            pynast_failed[i] = skb_seq
        pynast_failed = SequenceCollection(pynast_failed)

        for i, seq in enumerate(pynast_aligned):
            skb_seq = DNASequence(str(seq), id=seq.Name)
            pynast_aligned[i] = skb_seq
        pynast_aligned = Alignment(pynast_aligned)

        if failure_path is not None:
            fail_file = open(failure_path, 'w')
            fail_file.write(pynast_failed.to_fasta())
            fail_file.close()

        if result_path is not None:
            result_file = open(result_path, 'w')
            result_file.write(pynast_aligned.to_fasta())
            result_file.close()
            return None
        else:
            return pynast_aligned
Пример #7
0
    def setUp(self):
        self.d1 = DNASequence('..ACC-GTTGG..', id="d1")
        self.d2 = DNASequence('TTACCGGT-GGCC', id="d2")
        self.d3 = DNASequence('.-ACC-GTTGC--', id="d3")

        self.r1 = RNASequence('UUAU-', id="r1")
        self.r2 = RNASequence('ACGUU', id="r2")

        self.seqs1 = [self.d1, self.d2, self.d3]
        self.seqs2 = [self.r1, self.r2]

        self.seqs1_t = [('d1', '..ACC-GTTGG..'), ('d2', 'TTACCGGT-GGCC'),
                        ('d3', '.-ACC-GTTGC--')]
        self.seqs2_t = [('r1', 'UUAU-'), ('r2', 'ACGUU')]

        self.a1 = Alignment(self.seqs1)
        self.a2 = Alignment(self.seqs2)
        self.empty = Alignment([])
Пример #8
0
    def test_omit_gap_positions(self):
        """omitting gap positions functions as expected
        """
        expected = self.a2
        self.assertEqual(self.a2.omit_gap_positions(1.0), expected)
        self.assertEqual(self.a2.omit_gap_positions(0.51), expected)

        r1 = RNASequence('UUAU', id="r1")
        r2 = RNASequence('ACGU', id="r2")
        expected = Alignment([r1, r2])
        self.assertEqual(self.a2.omit_gap_positions(0.49), expected)

        r1 = RNASequence('UUAU', id="r1")
        r2 = RNASequence('ACGU', id="r2")
        expected = Alignment([r1, r2])
        self.assertEqual(self.a2.omit_gap_positions(0.0), expected)

        self.assertEqual(self.empty.omit_gap_positions(0.0), self.empty)
        self.assertEqual(self.empty.omit_gap_positions(0.49), self.empty)
        self.assertEqual(self.empty.omit_gap_positions(1.0), self.empty)
Пример #9
0
    def test_call_pynast_test1_alt_min_pct(self):
        """PyNastAligner: returns no result when min_pct too high
        """
        aligner = PyNastAligner({
            'template_filepath': self.pynast_test1_template_fp,
            'min_len': 15,
            'min_pct': 100.0})

        actual_aln = aligner(self.pynast_test1_input_fp)
        expected_aln = Alignment([])

        self.assertEqual(actual_aln, expected_aln)
Пример #10
0
    def test_fasta_from_alignment_from_alignment(self):
        """should return correct fasta string for alignment object"""
        # alignment with a few sequences
        obs = fasta_from_alignment(self.alignment)
        self.assertEquals('>seq1\nACC--G-GGTA..\n>seqs2\nTCC--G-GGCA..', obs)

        # empty alginment
        obs = fasta_from_alignment(Alignment([]))
        self.assertEquals('', obs)

        # alignment with a few sequences
        obs = fasta_from_alignment(self.alignment, sort=False)
        self.assertEquals('>seq1\nACC--G-GGTA..\n>seqs2\nTCC--G-GGCA..', obs)
Пример #11
0
    def test_omit_gap_sequences(self):
        """omitting gap sequences functions as expected
        """
        expected = self.a2
        self.assertEqual(self.a2.omit_gap_sequences(1.0), expected)
        self.assertEqual(self.a2.omit_gap_sequences(0.20), expected)

        expected = Alignment([self.r2])
        self.assertEqual(self.a2.omit_gap_sequences(0.19), expected)

        self.assertEqual(self.empty.omit_gap_sequences(0.0), self.empty)
        self.assertEqual(self.empty.omit_gap_sequences(0.2), self.empty)
        self.assertEqual(self.empty.omit_gap_sequences(1.0), self.empty)
Пример #12
0
    def test_to_phylip_map_labels(self):
        """to_phylip functions as expected with label mapping
        """
        d1 = DNASequence('..ACC-GTTGG..', id="d1")
        d2 = DNASequence('TTACCGGT-GGCC', id="d2")
        d3 = DNASequence('.-ACC-GTTGC--', id="d3")
        a = Alignment([d1, d2, d3])

        phylip_str, id_map = a.to_phylip(map_labels=True, label_prefix="s")
        self.assertEqual(id_map, {'s1': 'd1', 's3': 'd3', 's2': 'd2'})
        expected = "\n".join([
            "3 13", "s1 ..ACC-GTTGG..", "s2 TTACCGGT-GGCC", "s3 .-ACC-GTTGC--"
        ])
        self.assertEqual(phylip_str, expected)
Пример #13
0
    def test_to_phylip(self):
        """to_phylip functions as expected
        """
        d1 = DNASequence('..ACC-GTTGG..', id="d1")
        d2 = DNASequence('TTACCGGT-GGCC', id="d2")
        d3 = DNASequence('.-ACC-GTTGC--', id="d3")
        a = Alignment([d1, d2, d3])

        phylip_str, id_map = a.to_phylip(map_labels=False)
        self.assertEqual(id_map, {'d1': 'd1', 'd3': 'd3', 'd2': 'd2'})
        expected = "\n".join([
            "3 13", "d1 ..ACC-GTTGG..", "d2 TTACCGGT-GGCC", "d3 .-ACC-GTTGC--"
        ])
        self.assertEqual(phylip_str, expected)
Пример #14
0
    def test_ne(self):
        """inequality operator functions as expected
        """
        self.assertFalse(self.s1 != self.s1)
        self.assertTrue(self.s1 != self.s2)

        # SequenceCollections with different number of sequences are not equal
        self.assertTrue(self.s1 != SequenceCollection([self.d1]))

        class FakeSequenceCollection(SequenceCollection):
            pass

        # SequenceCollections of different types are not equal
        self.assertTrue(self.s1 != FakeSequenceCollection([self.d1, self.d2]))
        self.assertTrue(self.s1 != Alignment([self.d1, self.d2]))

        # SequenceCollections with different sequences are not equal
        self.assertTrue(self.s1 != SequenceCollection([self.d1, self.r1]))
Пример #15
0
    def test_init_validate(self):
        """initialization with validation functions as expected
        """
        Alignment(self.seqs1, validate=True)

        # invalid DNA character
        invalid_seqs1 = [
            self.d1, self.d2, self.d3,
            DNASequence('.-ACC-GTXGC--', id="i1")
        ]
        self.assertRaises(SequenceCollectionError,
                          Alignment,
                          invalid_seqs1,
                          validate=True)

        # invalid lengths (they're not all equal)
        invalid_seqs2 = [
            self.d1, self.d2, self.d3,
            DNASequence('.-ACC-GTGC--', id="i2")
        ]
        self.assertRaises(SequenceCollectionError,
                          Alignment,
                          invalid_seqs2,
                          validate=True)