def test_validate_lengths(self): """ """ self.assertTrue(self.a1._validate_lengths()) self.assertTrue(self.a2._validate_lengths()) self.assertTrue(self.empty._validate_lengths()) self.assertTrue( Alignment([DNASequence('TTT', id="d1")])._validate_lengths()) self.assertFalse( Alignment( [DNASequence('TTT', id="d1"), DNASequence('TT', id="d2")])._validate_lengths())
def test_subalignment(self): """subalignment functions as expected """ # keep seqs by ids actual = self.a1.subalignment(seqs_to_keep=['d1', 'd3']) expected = Alignment([self.d1, self.d3]) self.assertEqual(actual, expected) # keep seqs by indices actual = self.a1.subalignment(seqs_to_keep=[0, 2]) expected = Alignment([self.d1, self.d3]) self.assertEqual(actual, expected) # keep seqs by ids (invert) actual = self.a1.subalignment(seqs_to_keep=['d1', 'd3'], invert_seqs_to_keep=True) expected = Alignment([self.d2]) self.assertEqual(actual, expected) # keep seqs by indices (invert) actual = self.a1.subalignment(seqs_to_keep=[0, 2], invert_seqs_to_keep=True) expected = Alignment([self.d2]) self.assertEqual(actual, expected) # keep positions actual = self.a1.subalignment(positions_to_keep=[0, 2, 3]) d1 = DNASequence('.AC', id="d1") d2 = DNASequence('TAC', id="d2") d3 = DNASequence('.AC', id="d3") expected = Alignment([d1, d2, d3]) self.assertEqual(actual, expected) # keep positions (invert) actual = self.a1.subalignment(positions_to_keep=[0, 2, 3], invert_positions_to_keep=True) d1 = DNASequence('.C-GTTGG..', id="d1") d2 = DNASequence('TCGGT-GGCC', id="d2") d3 = DNASequence('-C-GTTGC--', id="d3") expected = Alignment([d1, d2, d3]) self.assertEqual(actual, expected) # keep seqs and positions actual = self.a1.subalignment(seqs_to_keep=[0, 2], positions_to_keep=[0, 2, 3]) d1 = DNASequence('.AC', id="d1") d3 = DNASequence('.AC', id="d3") expected = Alignment([d1, d3]) self.assertEqual(actual, expected) # keep seqs and positions (invert) actual = self.a1.subalignment(seqs_to_keep=[0, 2], positions_to_keep=[0, 2, 3], invert_seqs_to_keep=True, invert_positions_to_keep=True) d2 = DNASequence('TCGGT-GGCC', id="d2") expected = Alignment([d2]) self.assertEqual(actual, expected)
def setUp(self): """Setup for Fasta tests.""" self.strings = ['AAAA', 'CCCC', 'gggg', 'uuuu'] self.fasta_no_label = '>0\nAAAA\n>1\nCCCC\n>2\ngggg\n>3\nuuuu' self.fasta_with_label =\ '>1st\nAAAA\n>2nd\nCCCC\n>3rd\nGGGG\n>4th\nUUUU' self.fasta_with_label_lw2 =\ '>1st\nAA\nAA\n>2nd\nCC\nCC\n>3rd\nGG\nGG\n>4th\nUU\nUU' self.alignment_dict = { '1st': 'AAAA', '2nd': 'CCCC', '3rd': 'GGGG', '4th': 'UUUU' } self.sequence_objects_a = [ DNASequence('ACTCGAGATC', 'seq1'), DNASequence('GGCCT', 'seq2') ] self.sequence_objects_b = [ BiologicalSequence('ACTCGAGATC', 'seq1'), BiologicalSequence('GGCCT', 'seq2') ] seqs = [ DNASequence("ACC--G-GGTA..", id="seq1"), DNASequence("TCC--G-GGCA..", id="seqs2") ] self.alignment = Alignment(seqs)
def test_majority_consensus(self): """majority_consensus functions as expected """ d1 = DNASequence('TTT', id="d1") d2 = DNASequence('TT-', id="d2") d3 = DNASequence('TC-', id="d3") a1 = Alignment([d1, d2, d3]) self.assertEqual(a1.majority_consensus(), DNASequence('TT-')) d1 = DNASequence('T', id="d1") d2 = DNASequence('A', id="d2") a1 = Alignment([d1, d2]) self.assertTrue(a1.majority_consensus() in [DNASequence('T'), DNASequence('A')]) self.assertEqual(self.empty.majority_consensus(), '')
def test_is_valid(self): """is_valid functions as expected """ self.assertTrue(self.a1.is_valid()) self.assertTrue(self.a2.is_valid()) self.assertTrue(self.empty.is_valid()) # invalid because of length mismatch d1 = DNASequence('..ACC-GTTGG..', id="d1") d2 = DNASequence('TTACCGGT-GGC', id="d2") self.assertFalse(Alignment([d1, d2]).is_valid()) # invalid because of invalid charaters d1 = DNASequence('..ACC-GTXGG..', id="d1") d2 = DNASequence('TTACCGGT-GGCC', id="d2") self.assertFalse(Alignment([d1, d2]).is_valid())
def __call__(self, seq_path, result_path=None, log_path=None, failure_path=None): # load candidate sequences seq_file = open(seq_path, 'U') candidate_sequences = parse_fasta(seq_file) # load template sequences template_alignment = [] template_alignment_fp = self.Params['template_filepath'] for seq_id, seq in parse_fasta(open(template_alignment_fp)): # replace '.' characters with '-' characters template_alignment.append((seq_id, seq.replace('.', '-').upper())) template_alignment = Alignment.from_fasta_records(template_alignment, DNASequence, validate=True) # initialize_logger logger = NastLogger(log_path) # get function for pairwise alignment method pairwise_alignment_f = pairwise_alignment_methods[ self.Params['pairwise_alignment_method']] pynast_aligned, pynast_failed = pynast_seqs( candidate_sequences, template_alignment, min_pct=self.Params['min_pct'], min_len=self.Params['min_len'], align_unaligned_seqs_f=pairwise_alignment_f, logger=logger, temp_dir=get_qiime_temp_dir()) logger.record(str(self)) for i, seq in enumerate(pynast_failed): skb_seq = DNASequence(str(seq), id=seq.Name) pynast_failed[i] = skb_seq pynast_failed = SequenceCollection(pynast_failed) for i, seq in enumerate(pynast_aligned): skb_seq = DNASequence(str(seq), id=seq.Name) pynast_aligned[i] = skb_seq pynast_aligned = Alignment(pynast_aligned) if failure_path is not None: fail_file = open(failure_path, 'w') fail_file.write(pynast_failed.to_fasta()) fail_file.close() if result_path is not None: result_file = open(result_path, 'w') result_file.write(pynast_aligned.to_fasta()) result_file.close() return None else: return pynast_aligned
def setUp(self): self.d1 = DNASequence('..ACC-GTTGG..', id="d1") self.d2 = DNASequence('TTACCGGT-GGCC', id="d2") self.d3 = DNASequence('.-ACC-GTTGC--', id="d3") self.r1 = RNASequence('UUAU-', id="r1") self.r2 = RNASequence('ACGUU', id="r2") self.seqs1 = [self.d1, self.d2, self.d3] self.seqs2 = [self.r1, self.r2] self.seqs1_t = [('d1', '..ACC-GTTGG..'), ('d2', 'TTACCGGT-GGCC'), ('d3', '.-ACC-GTTGC--')] self.seqs2_t = [('r1', 'UUAU-'), ('r2', 'ACGUU')] self.a1 = Alignment(self.seqs1) self.a2 = Alignment(self.seqs2) self.empty = Alignment([])
def test_omit_gap_positions(self): """omitting gap positions functions as expected """ expected = self.a2 self.assertEqual(self.a2.omit_gap_positions(1.0), expected) self.assertEqual(self.a2.omit_gap_positions(0.51), expected) r1 = RNASequence('UUAU', id="r1") r2 = RNASequence('ACGU', id="r2") expected = Alignment([r1, r2]) self.assertEqual(self.a2.omit_gap_positions(0.49), expected) r1 = RNASequence('UUAU', id="r1") r2 = RNASequence('ACGU', id="r2") expected = Alignment([r1, r2]) self.assertEqual(self.a2.omit_gap_positions(0.0), expected) self.assertEqual(self.empty.omit_gap_positions(0.0), self.empty) self.assertEqual(self.empty.omit_gap_positions(0.49), self.empty) self.assertEqual(self.empty.omit_gap_positions(1.0), self.empty)
def test_call_pynast_test1_alt_min_pct(self): """PyNastAligner: returns no result when min_pct too high """ aligner = PyNastAligner({ 'template_filepath': self.pynast_test1_template_fp, 'min_len': 15, 'min_pct': 100.0}) actual_aln = aligner(self.pynast_test1_input_fp) expected_aln = Alignment([]) self.assertEqual(actual_aln, expected_aln)
def test_fasta_from_alignment_from_alignment(self): """should return correct fasta string for alignment object""" # alignment with a few sequences obs = fasta_from_alignment(self.alignment) self.assertEquals('>seq1\nACC--G-GGTA..\n>seqs2\nTCC--G-GGCA..', obs) # empty alginment obs = fasta_from_alignment(Alignment([])) self.assertEquals('', obs) # alignment with a few sequences obs = fasta_from_alignment(self.alignment, sort=False) self.assertEquals('>seq1\nACC--G-GGTA..\n>seqs2\nTCC--G-GGCA..', obs)
def test_omit_gap_sequences(self): """omitting gap sequences functions as expected """ expected = self.a2 self.assertEqual(self.a2.omit_gap_sequences(1.0), expected) self.assertEqual(self.a2.omit_gap_sequences(0.20), expected) expected = Alignment([self.r2]) self.assertEqual(self.a2.omit_gap_sequences(0.19), expected) self.assertEqual(self.empty.omit_gap_sequences(0.0), self.empty) self.assertEqual(self.empty.omit_gap_sequences(0.2), self.empty) self.assertEqual(self.empty.omit_gap_sequences(1.0), self.empty)
def test_to_phylip_map_labels(self): """to_phylip functions as expected with label mapping """ d1 = DNASequence('..ACC-GTTGG..', id="d1") d2 = DNASequence('TTACCGGT-GGCC', id="d2") d3 = DNASequence('.-ACC-GTTGC--', id="d3") a = Alignment([d1, d2, d3]) phylip_str, id_map = a.to_phylip(map_labels=True, label_prefix="s") self.assertEqual(id_map, {'s1': 'd1', 's3': 'd3', 's2': 'd2'}) expected = "\n".join([ "3 13", "s1 ..ACC-GTTGG..", "s2 TTACCGGT-GGCC", "s3 .-ACC-GTTGC--" ]) self.assertEqual(phylip_str, expected)
def test_to_phylip(self): """to_phylip functions as expected """ d1 = DNASequence('..ACC-GTTGG..', id="d1") d2 = DNASequence('TTACCGGT-GGCC', id="d2") d3 = DNASequence('.-ACC-GTTGC--', id="d3") a = Alignment([d1, d2, d3]) phylip_str, id_map = a.to_phylip(map_labels=False) self.assertEqual(id_map, {'d1': 'd1', 'd3': 'd3', 'd2': 'd2'}) expected = "\n".join([ "3 13", "d1 ..ACC-GTTGG..", "d2 TTACCGGT-GGCC", "d3 .-ACC-GTTGC--" ]) self.assertEqual(phylip_str, expected)
def test_ne(self): """inequality operator functions as expected """ self.assertFalse(self.s1 != self.s1) self.assertTrue(self.s1 != self.s2) # SequenceCollections with different number of sequences are not equal self.assertTrue(self.s1 != SequenceCollection([self.d1])) class FakeSequenceCollection(SequenceCollection): pass # SequenceCollections of different types are not equal self.assertTrue(self.s1 != FakeSequenceCollection([self.d1, self.d2])) self.assertTrue(self.s1 != Alignment([self.d1, self.d2])) # SequenceCollections with different sequences are not equal self.assertTrue(self.s1 != SequenceCollection([self.d1, self.r1]))
def test_init_validate(self): """initialization with validation functions as expected """ Alignment(self.seqs1, validate=True) # invalid DNA character invalid_seqs1 = [ self.d1, self.d2, self.d3, DNASequence('.-ACC-GTXGC--', id="i1") ] self.assertRaises(SequenceCollectionError, Alignment, invalid_seqs1, validate=True) # invalid lengths (they're not all equal) invalid_seqs2 = [ self.d1, self.d2, self.d3, DNASequence('.-ACC-GTGC--', id="i2") ] self.assertRaises(SequenceCollectionError, Alignment, invalid_seqs2, validate=True)