Exemplo n.º 1
0
    def test_unaligned_muscle(self):
        if not functions.which('muscle'):
            _LOG.warning('muscle not found... skipping tests.')
            return
        seq1 = SeqRecord(Seq('AC--GTNAC-TYATR'), id='1')
        seq2 = SeqRecord(Seq('ACN-GTAAC--CATT'), id='2')
        # 'ACGTNACTYATR'
        # 'ACNGTAACCATT'
        diffs, l = seqstats.get_differences(seq1, seq2, aligned = False,
                ignore_gaps = False,
                aligner_tools = ['muscle'])
        e = {11: ('R', 'T'),
             3:  ('T', 'G'),
             6:  ('C', 'A'),
             7:  ('T', 'C')}
        self.assertEqual(diffs, e)
        self.assertEqual(l, 12)

        seq1 = SeqRecord(Seq('ATCCGT'), id='1')
        seq2 = SeqRecord(Seq('ACCGT'), id='2')
        diffs, l = seqstats.get_differences(seq1, seq2, aligned = False,
                ignore_gaps = True,
                aligner_tools = ['muscle'])
        self.assertEqual(diffs, {})
        self.assertEqual(l, 5)
        diffs, l = seqstats.get_differences(seq1, seq2, aligned = False,
                ignore_gaps = False,
                aligner_tools = ['muscle'])
        self.assertEqual(diffs, {1: ('T', '-')})
        self.assertEqual(l, 6)
Exemplo n.º 2
0
    def test_unaligned_muscle(self):
        if not functions.which('muscle'):
            _LOG.warning('muscle not found... skipping tests.')
            return
        distance_iter = seqstats.sample_distance_iter(
                seq_iter = self.seqs,
                sample_size = 2,
                per_site = False,
                aligned = False,
                ignore_gaps = True,
                aligner_tools = ['muscle'])
        for i, (seq1, seq2, d, drc) in enumerate(distance_iter):
            self.assertEqual(
                    self.expected[seq1.id][seq2.id],
                    d)
        self.assertEqual(i, 5)

        distance_iter = seqstats.sample_distance_iter(
                seq_iter = self.seqs,
                sample_size = 2,
                per_site = True,
                aligned = False,
                ignore_gaps = True,
                aligner_tools = ['muscle'])
        for i, (seq1, seq2, d, drc) in enumerate(distance_iter):
            self.assertAlmostEqual(
                    self.expected[seq1.id][seq2.id] / 4.0,
                    d)
        self.assertEqual(i, 5)
Exemplo n.º 3
0
    def test_unaligned_muscle(self):
        if not functions.which('muscle'):
            _LOG.warning('muscle not found... skipping tests.')
            return
        seq1 = SeqRecord(Seq('AC--GTNAC-TYATR'), id='1')
        seq2 = SeqRecord(Seq('ACN-GTAAC--CATT'), id='2')
        # 'ACGTNACTYATR'
        # 'ACNGTAACCATT'
        d = seqstats.distance(seq1, seq2, per_site = False, aligned = False,
                ignore_gaps = False,
                aligner_tools = ['muscle'])
        self.assertEqual(d, 4)
        dps = seqstats.distance(seq1, seq2, per_site = True, aligned = False,
                ignore_gaps = False,
                aligner_tools = ['muscle'])
        self.assertAlmostEqual(dps, 4 / float(12))

        seq1 = SeqRecord(Seq('ATCCGT'), id='1')
        seq2 = SeqRecord(Seq('ACCGT'), id='2')
        d = seqstats.distance(seq1, seq2, per_site = False, aligned = False,
                ignore_gaps = True,
                aligner_tools = ['muscle'])
        self.assertEqual(d, 0)
        dps = seqstats.distance(seq1, seq2, per_site = True, aligned = False,
                ignore_gaps = True,
                aligner_tools = ['muscle'])
        self.assertEqual(dps, 0.0)
        d = seqstats.distance(seq1, seq2, per_site = False, aligned = False,
                ignore_gaps = False,
                aligner_tools = ['muscle'])
        self.assertEqual(d, 1)
        dps = seqstats.distance(seq1, seq2, per_site = True, aligned = False,
                ignore_gaps = False,
                aligner_tools = ['muscle'])
        self.assertEqual(dps, 1 / float(6))
Exemplo n.º 4
0
 def test_rev_comp_gappy_muscle(self):
     if not functions.which('muscle'):
         _LOG.warning('muscle not found... skipping tests.')
         return
     self.rc_seqs = dataio.get_buffered_seq_iter([self.gappy_path])
     d, e = seqsum.summarize_distances(self.rc_seqs,
                                       sample_size=5,
                                       per_site=False,
                                       aligned=False,
                                       ignore_gaps=True,
                                       do_full_alignment=False,
                                       aligner_tools=['muscle'],
                                       full_aligner_tools=None)
     self.assertTrue(len(e) < 1)
     self.assertEqual(len(d), 31)
Exemplo n.º 5
0
 def __init__(self, exe='muscle', out_path=None, **kwargs):
     self.__class__.count += 1
     self.name = self.__class__.__name__ + '-' + str(self.count)
     self.exe = functions.which(exe)
     if not self.exe:
         raise errors.ExternalToolNotFoundError(
             'Cannot find muscle executable')
     _LOG.debug('{0}: Using exe {1!r}'.format(self.name, self.exe))
     self.kwargs = kwargs
     if ('input' in self.kwargs) or ('out' in self.kwargs):
         raise ValueError('MuscleAligner does not accept keyword '
                          'arguments `input`/`out`.')
     if out_path:
         out_path = expand_path(out_path)
     self.out_path = out_path
     self.cmd = None
Exemplo n.º 6
0
 def __init__(self, exe='mafft', out_path=None, **kwargs):
     self.__class__.count += 1
     self.name = self.__class__.__name__ + '-' + str(self.count)
     self.exe = functions.which(exe)
     if not self.exe:
         raise errors.ExternalToolNotFoundError(
             'Cannot find mafft executable')
     _LOG.debug('{0}: Using exe {1!r}'.format(self.name, self.exe))
     self.kwargs = kwargs
     if 'input' in self.kwargs:
         raise ValueError('MafftAligner does not accept the keyword '
                          'argument `input`.')
     if not self.kwargs:
         self.kwargs = {'auto': True}
     self.out_path = out_path
     self.cmd = None
Exemplo n.º 7
0
 def __init__(self, exe = 'muscle', out_path = None, **kwargs):
     self.__class__.count += 1
     self.name = self.__class__.__name__ + '-' + str(self.count)
     self.exe = functions.which(exe)
     if not self.exe:
         raise errors.ExternalToolNotFoundError(
                 'Cannot find muscle executable')
     _LOG.debug('{0}: Using exe {1!r}'.format(self.name, self.exe))
     self.kwargs = kwargs
     if ('input' in self.kwargs) or ('out' in self.kwargs):
         raise ValueError('MuscleAligner does not accept keyword '
                 'arguments `input`/`out`.')
     if out_path:
         out_path = expand_path(out_path)
     self.out_path = out_path
     self.cmd = None
Exemplo n.º 8
0
 def __init__(self, exe = 'mafft', out_path = None, **kwargs):
     self.__class__.count += 1
     self.name = self.__class__.__name__ + '-' + str(self.count)
     self.exe = functions.which(exe)
     if not self.exe:
         raise errors.ExternalToolNotFoundError(
                 'Cannot find mafft executable')
     _LOG.debug('{0}: Using exe {1!r}'.format(self.name, self.exe))
     self.kwargs = kwargs
     if 'input' in self.kwargs:
         raise ValueError('MafftAligner does not accept the keyword '
                 'argument `input`.')
     if not self.kwargs:
         self.kwargs = {'auto': True}
     self.out_path = out_path
     self.cmd = None
Exemplo n.º 9
0
 def test_full_alignment_muscle(self):
     if not functions.which('muscle'):
         _LOG.warning('muscle not found... skipping tests.')
         return
     d, e = seqsum.summarize_distances(self.seqs,
                                       sample_size=0,
                                       per_site=False,
                                       aligned=False,
                                       ignore_gaps=True,
                                       do_full_alignment=True,
                                       aligner_tools=['muscle'])
     self.assertEqual(e, [])
     self.assertEqual(sorted(d.keys()), sorted(self.expected_means.keys()))
     for k in iterkeys(d):
         self.assertEqual(d[k].maximum, self.expected_maxs[k])
         self.assertAlmostEqual(d[k].mean, self.expected_means[k])
Exemplo n.º 10
0
 def test_rev_comp_error_mafft_full(self):
     if not functions.which('mafft'):
         _LOG.warning('mafft not found... skipping tests.')
         return
     self.rc_seqs = dataio.get_buffered_seq_iter([self.rc_path])
     d, e = seqsum.summarize_distances(self.rc_seqs,
                                       sample_size=0,
                                       per_site=False,
                                       aligned=False,
                                       ignore_gaps=False,
                                       do_full_alignment=True,
                                       aligner_tools=['mafft'],
                                       full_aligner_tools=['mafft'])
     self.assertEqual(len(e), 11)
     for rce in e:
         self.assertTrue('Homo_sapiens' in rce)
     self.assertEqual(len(d), 12)