Пример #1
0
 def setUp(self):
     self.gbfetch = package_paths.scripts_path("gbfetch.py")
     self.mkTestDir()
     self.out = self.getTestFile('gbfetch_test.txt')
     self.expected_fasta = package_paths.data_path(
         'JF314863-JF314866.fasta')
     self.expected_gb = package_paths.data_path('JF314863-JF314866.gb')
Пример #2
0
 def setUp(self):
     self.seqs = [
         SeqRecord(Seq('CCA--CGTAA'), id='1'),
         SeqRecord(Seq('CCG--CGTAA'), id='2'),
         SeqRecord(Seq('CCA--TATAA'), id='3')
     ]
     self.expected_means = {'1': 1.5, '2': 2.0, '3': 2.5}
     self.expected_maxs = {'1': 2, '2': 3, '3': 3}
     self.rc_path = package_paths.data_path('primates-rev-comp-error.fasta')
     self.gappy_path = package_paths.data_path('melittobia-its1.fasta')
     self.rc_gappy_path = package_paths.data_path(
         'melittobia-its1-rev-comp-error.fasta')
Пример #3
0
 def test_copy(self):
     seq2 = SeqIO.read(package_paths.data_path('JF314862.gb'),
                       format='gb',
                       alphabet=IUPAC.ambiguous_dna)
     self.assertTrue(sequtils.sequences_are_equal(self.seq, seq2))
     seq2.name += 'a'
     self.assertFalse(sequtils.sequences_are_equal(self.seq, seq2))
Пример #4
0
 def test_format_conversion_protein(self):
     for filename in [
             'caenophidia.fasta', 'caenophidia.phylip', 'caenophidia.nexus'
     ]:
         in_ext = os.path.splitext(filename)[-1]
         if in_ext == '.phylip':
             in_format = 'phylip-relaxed'
         else:
             in_format = in_ext.replace('.', '')
         in_path = package_paths.data_path(filename)
         for out_ext, out_format in iteritems(self.to_formats):
             if out_ext == in_ext:
                 continue
             if out_format == 'genbank':
                 continue
             out_path = self.getTestFile(filename.replace(in_ext, out_ext))
             _LOG.info('converting {0} to {1}'.format(filename, out_ext))
             self.exe_seqaid([in_path, out_path, '-d', 'aa'])
             seqs_in = SeqIO.parse(in_path,
                                   format=in_format,
                                   alphabet=IUPAC.extended_protein)
             seqs_out = SeqIO.parse(out_path,
                                    format=out_format,
                                    alphabet=IUPAC.extended_protein)
             self.assertSameSequenceData(seqs_in, seqs_out, aligned=True)
Пример #5
0
 def test_copy(self):
     seq2 = SeqIO.read(
             package_paths.data_path('JF314862.gb'),
             format='gb',
             alphabet=IUPAC.ambiguous_dna)
     self.assertTrue(sequtils.sequences_are_equal(self.seq, seq2))
     seq2.name += 'a'
     self.assertFalse(sequtils.sequences_are_equal(self.seq, seq2))
Пример #6
0
 def test_basic(self):
     p1 = package_paths.data_path('primates.nexus')
     p2 = package_paths.data_path('primates.fasta')
     
     l = 898
     summaries = seqstats.get_seq_summaries_from_files([p1, p2])
     g = summaries.pop('global')
     self.assertEqual(g.n, 24)
     self.assertEqual(g.maximum, l)
     self.assertEqual(g.minimum, l)
     self.assertAlmostEqual(g.mean, 898.0)
     self.assertAlmostEqual(g.variance, 0.0)
     for k, s in summaries.items():
         self.assertTrue(k.endswith('primates.nexus') or k.endswith(
                 'primates.fasta'))
         self.assertEqual(s.maximum, l)
         self.assertEqual(s.minimum, l)
         self.assertAlmostEqual(s.mean, 898.0)
         self.assertAlmostEqual(s.variance, 0.0)
Пример #7
0
 def setUp(self):
     self.mkTestDir()
     self.gb_path = package_paths.data_path('JF314862.gb')
     self.fasta_path = package_paths.data_path('JF314862.fasta')
     self.seq_str = 'CATCATCAACATCATCGTGCCCTGCGTGCTCATCTCCTTCGTGGCTGTGC' + \
                    'TCGTCTACTTTCTGCCTGCCAAGGGTAACGCTGGCACCAGGCGGCTGTGG' + \
                    'GACTGCCTGTGCCATAGGCGTGAAGAGGGCAGGCCATGTGGCTGGGCAGA' + \
                    'GGGAGGGAAGTGGGGGACAGCCACCGCTGGGAGACTGGCACCTGGGCCCA' + \
                    'GTGCCCGTCATTTCCCCATCACATGGGCTTGGGGACATGGAAGCCAGTCC' + \
                    'TGTGGGAGCAGACAGACACTCCCGGCTGCCGTGTCAGTCCTTAGGGCTGG' + \
                    'CTGGACTCTCTCTGCACAGCCTCCCACTGTCAGTCCCAGGACCATCCATG' + \
                    'TCCTAGGCATGTCTAGGCAGAGCCAGGCCCTTTCCAGGTGCCCTGGGACC' + \
                    'CCGTCTCACGTGTCGATCCCCTCACTCTCCACATCCTGGCAGCGGGTGGG' + \
                    'CAGAAGTGCACCGTCTCCATCAATGTCC'
     self.fasta_str = ">s1\nACGTGCTATCTATCGTATTTAG\n"
     self.small_fasta = self.getTestFile('small.fasta')
     out = open(self.small_fasta, 'w')
     out.write(self.fasta_str)
     out.close()
Пример #8
0
 def setUp(self):
     self.mkTestDir()
     self.gb_path = package_paths.data_path('JF314862.gb')
     self.fasta_path = package_paths.data_path('JF314862.fasta')
     self.seq_str = 'CATCATCAACATCATCGTGCCCTGCGTGCTCATCTCCTTCGTGGCTGTGC' + \
                    'TCGTCTACTTTCTGCCTGCCAAGGGTAACGCTGGCACCAGGCGGCTGTGG' + \
                    'GACTGCCTGTGCCATAGGCGTGAAGAGGGCAGGCCATGTGGCTGGGCAGA' + \
                    'GGGAGGGAAGTGGGGGACAGCCACCGCTGGGAGACTGGCACCTGGGCCCA' + \
                    'GTGCCCGTCATTTCCCCATCACATGGGCTTGGGGACATGGAAGCCAGTCC' + \
                    'TGTGGGAGCAGACAGACACTCCCGGCTGCCGTGTCAGTCCTTAGGGCTGG' + \
                    'CTGGACTCTCTCTGCACAGCCTCCCACTGTCAGTCCCAGGACCATCCATG' + \
                    'TCCTAGGCATGTCTAGGCAGAGCCAGGCCCTTTCCAGGTGCCCTGGGACC' + \
                    'CCGTCTCACGTGTCGATCCCCTCACTCTCCACATCCTGGCAGCGGGTGGG' + \
                    'CAGAAGTGCACCGTCTCCATCAATGTCC'
     self.fasta_str = ">s1\nACGTGCTATCTATCGTATTTAG\n"
     self.small_fasta = self.getTestFile('small.fasta')
     out = open(self.small_fasta, 'w')
     out.write(self.fasta_str)
     out.close()
Пример #9
0
 def setUp(self):
     self.id = '354698774'
     self.acc = 'JF314862'
     self.singleton_fasta = package_paths.data_path('JF314862.fasta')
     self.singleton_gb = package_paths.data_path('JF314862.gb')
     self.id_list = ['354698776', '354698778', '354698780', '354698782']
     self.acc_list = ['JF314863', 'JF314864', 'JF314865', 'JF314866']
     self.long_acc_list = [
             'JF314862',
             'JF314863',
             'JF314864',
             'JF314865',
             'JF314866',
             'JF314867',
             'JF314868',
             'JF314869',
             'JF314870',
             'JF314871',
             'JF314872',
             'JF314873',
             'JF314874',
             'JF314875',
             'JF314876',]
     self.ids = ','.join(self.id_list)
     self.multi_fasta = package_paths.data_path('JF314863-JF314866.fasta')
     self.multi_gb = package_paths.data_path('JF314863-JF314866.gb')
     self.long_multi_fasta = package_paths.data_path(
             'JF314862-JF314876.fasta')
     self.long_multi_gb = package_paths.data_path('JF314862-JF314876.gb')
Пример #10
0
 def setUp(self):
     self.id = '354698774'
     self.acc = 'JF314862'
     self.singleton_fasta = package_paths.data_path('JF314862.fasta')
     self.singleton_gb = package_paths.data_path('JF314862.gb')
     self.id_list = ['354698776', '354698778', '354698780', '354698782']
     self.acc_list = ['JF314863', 'JF314864', 'JF314865', 'JF314866']
     self.long_acc_list = [
         'JF314862',
         'JF314863',
         'JF314864',
         'JF314865',
         'JF314866',
         'JF314867',
         'JF314868',
         'JF314869',
         'JF314870',
         'JF314871',
         'JF314872',
         'JF314873',
         'JF314874',
         'JF314875',
         'JF314876',
     ]
     self.ids = ','.join(self.id_list)
     self.multi_fasta = package_paths.data_path('JF314863-JF314866.fasta')
     self.multi_gb = package_paths.data_path('JF314863-JF314866.gb')
     self.long_multi_fasta = package_paths.data_path(
         'JF314862-JF314876.fasta')
     self.long_multi_gb = package_paths.data_path('JF314862-JF314876.gb')
Пример #11
0
 def test_simble_gb_seq(self):
     rs = RecognitionSeq('TAG', 3)
     fp = package_paths.data_path('JF314863-JF314866.gb')
     seqs = SeqIO.parse(fp, format='gb', alphabet=IUPAC.ambiguous_dna)
     s = next(seqs)
     self.assertEqual(s.name, 'JF314863')
     fragments = list(rs.digest(s))
     self.assertEqual(len(fragments), 6)
     lengths = [117, 172, 62, 10, 10, 102]
     for i in range(len(fragments)):
         f = fragments[i]
         self.assertIsInstance(f, Fragment)
         self.assertEqual(len(f), lengths[i])
Пример #12
0
 def test_caenophidia(self):
     formats = {'fasta': '.fasta', 'phylip-relaxed': '.phylip',
             'nexus': '.nexus'}
     for in_format, in_ext in iteritems(formats):
         in_file = package_paths.data_path('caenophidia' + in_ext)
         for out_format, out_ext in iteritems(formats):
             out_file = self.getTestFile('caenophidia' + out_ext)
             n = convert_format(in_file=in_file,
                     in_format=in_format,
                     out_file=out_file,
                     out_format=out_format,
                     data_type='protein')
             self.assertEqual(n, 114)
             in_seqs = SeqIO.parse(in_file, format=in_format,
                     alphabet=IUPAC.extended_protein)
             out_seqs = SeqIO.parse(out_file, format=out_format,
                     alphabet=IUPAC.extended_protein)
             self.assertSameData(in_seqs, out_seqs)
Пример #13
0
 def test_simble_gb_seq(self):
     rs = RecognitionSeq('TAG', 3)
     fp = package_paths.data_path('JF314863-JF314866.gb')
     seqs = SeqIO.parse(fp, format='gb', alphabet=IUPAC.ambiguous_dna)
     s = next(seqs)
     self.assertEqual(s.name, 'JF314863')
     ds = DigestSummary(rs, s)
     self.assertIsInstance(ds, DigestSummary)
     self.assertEqual(ds.recognition_seq, str(rs.seq))
     self.assertEqual(ds.molecule_id, s.id)
     self.assertEqual(ds.molecule_name, s.name)
     self.assertEqual(ds.molecule_description, s.description)
     self.assertIsInstance(ds.length_distribution, dict)
     self.assertEqual(ds.length_distribution, {
             13: 2,
             65: 1,
             175: 1,})
     self.assertEqual(ds.molecule_length, len(s))
Пример #14
0
 def test_limnonectes(self):
     formats = {'fasta': '.fasta', 'phylip-relaxed': '.phylip',
             'nexus': '.nexus'}
     for in_format, in_ext in iteritems(formats):
         in_file = package_paths.data_path('limnonectes' + in_ext)
         for out_format, out_ext in iteritems(formats):
             out_file = self.getTestFile('limnonectes' + out_ext)
             n = convert_format(in_file=in_file,
                     in_format=in_format,
                     out_file=out_file,
                     out_format=out_format,
                     data_type='dna')
             self.assertEqual(n, 80)
             in_seqs = SeqIO.parse(in_file, format=in_format,
                     alphabet=IUPAC.ambiguous_dna)
             out_seqs = SeqIO.parse(out_file, format=out_format,
                     alphabet=IUPAC.ambiguous_dna)
             self.assertSameData(in_seqs, out_seqs)
Пример #15
0
 def test_extra_length(self):
     expected = {
             'JF314863': {
                     23: 2,
                     75: 1,
                     185: 1,},
             'JF314864': {
                     23: 1,
                     75: 1,
                     190: 1,},
             'JF314865': {
                     23: 1,
                     85: 1,
                     188: 1,},
             'JF314866': {
                     23: 2,
                     75: 1,
                     185: 1,},
             'combined' : {
                     23: 6,
                     75: 3,
                     85: 1,
                     185: 2,
                     188: 1,
                     190: 1,}}
     rs = 'TAG'
     cs = '3'
     self.exe_seqdigest(['-s', rs,
                         '-c', cs,
                         '-g', '354698776,354698778',
                         '-x', '10',
                         package_paths.data_path('JF314865-JF314866.gb')])
     results = {}
     for k in iterkeys(expected):
         result_file_path = os.path.join(self.test_dir,
                 ".".join([k, 'txt']))
         self.appendTestFile(result_file_path)
         results[k] = self.parse_result_file(result_file_path)
     self.assertEqual(expected, results)
Пример #16
0
 def test_accessions(self):
     expected = {
             'JF314863': {
                     13: 2,
                     65: 1,
                     175: 1,},
             'JF314864': {
                     13: 1,
                     65: 1,
                     180: 1,},
             'JF314865': {
                     13: 1,
                     75: 1,
                     178: 1,},
             'JF314866': {
                     13: 2,
                     65: 1,
                     175: 1,},
             'combined' : {
                     13: 6,
                     65: 3,
                     75: 1,
                     175: 2,
                     178: 1,
                     180: 1,}}
     rs = 'TAG'
     cs = '3'
     self.exe_seqdigest(['-s', rs,
                         '-c', cs,
                         '-a', 'JF314863,JF314864',
                         package_paths.data_path('JF314865-JF314866.gb')])
     results = {}
     for k in iterkeys(expected):
         result_file_path = os.path.join(self.test_dir,
                 ".".join([k, 'txt']))
         self.appendTestFile(result_file_path)
         results[k] = self.parse_result_file(result_file_path)
     self.assertEqual(expected, results)
Пример #17
0
 def test_limnonectes(self):
     formats = {
         'fasta': '.fasta',
         'phylip-relaxed': '.phylip',
         'nexus': '.nexus'
     }
     for in_format, in_ext in iteritems(formats):
         in_file = package_paths.data_path('limnonectes' + in_ext)
         for out_format, out_ext in iteritems(formats):
             out_file = self.getTestFile('limnonectes' + out_ext)
             n = convert_format(in_file=in_file,
                                in_format=in_format,
                                out_file=out_file,
                                out_format=out_format,
                                data_type='dna')
             self.assertEqual(n, 80)
             in_seqs = SeqIO.parse(in_file,
                                   format=in_format,
                                   alphabet=IUPAC.ambiguous_dna)
             out_seqs = SeqIO.parse(out_file,
                                    format=out_format,
                                    alphabet=IUPAC.ambiguous_dna)
             self.assertSameData(in_seqs, out_seqs)
Пример #18
0
 def test_caenophidia(self):
     formats = {
         'fasta': '.fasta',
         'phylip-relaxed': '.phylip',
         'nexus': '.nexus'
     }
     for in_format, in_ext in iteritems(formats):
         in_file = package_paths.data_path('caenophidia' + in_ext)
         for out_format, out_ext in iteritems(formats):
             out_file = self.getTestFile('caenophidia' + out_ext)
             n = convert_format(in_file=in_file,
                                in_format=in_format,
                                out_file=out_file,
                                out_format=out_format,
                                data_type='protein')
             self.assertEqual(n, 114)
             in_seqs = SeqIO.parse(in_file,
                                   format=in_format,
                                   alphabet=IUPAC.extended_protein)
             out_seqs = SeqIO.parse(out_file,
                                    format=out_format,
                                    alphabet=IUPAC.extended_protein)
             self.assertSameData(in_seqs, out_seqs)
Пример #19
0
 def setUp(self):
     self.gb_path = package_paths.data_path('JF314863-JF314866.gb')
     self.seqs = get_seq_dict(self.gb_path, format='gb', data_type='dna')
Пример #20
0
 def setUp(self):
     self.id_list = ['354698776', '354698778', '354698780', '354698782']
     self.multi_gb = package_paths.data_path('JF314863-JF314866.gb')
Пример #21
0
 def setUp(self):
     self.gbfetch = package_paths.scripts_path("gbfetch.py")
     self.mkTestDir()
     self.out = self.getTestFile('gbfetch_test.txt')
     self.expected_fasta = package_paths.data_path('JF314863-JF314866.fasta')
     self.expected_gb = package_paths.data_path('JF314863-JF314866.gb')
Пример #22
0
 def setUp(self):
     self.gb_path = package_paths.data_path('JF314863-JF314866.gb')
     self.fasta_path = package_paths.data_path('JF314863-JF314866.fasta')
     self.names = ['JF' + str(x) for x in range(314863, 314867)]
     self.ids = [str(x) + '.1' for x in self.names]
Пример #23
0
 def setUp(self):
     self.seq = SeqIO.read(package_paths.data_path('JF314862.gb'),
                           format='gb',
                           alphabet=IUPAC.ambiguous_dna)
Пример #24
0
 def setUp(self):
     self.gb_path = package_paths.data_path('JF314863-JF314866.gb')
     self.seqs = get_buffered_seq_iter([self.gb_path],
                                       format='gb',
                                       data_type='dna')
Пример #25
0
 def setUp(self):
     self.seq = SeqIO.read(
             package_paths.data_path('JF314862.gb'),
             format='gb',
             alphabet=IUPAC.ambiguous_dna)
Пример #26
0
 def setUp(self):
     self.gb_path = package_paths.data_path('JF314863-JF314866.gb')
     self.seqs = get_seq_dict(self.gb_path, format='gb', data_type='dna')
Пример #27
0
 def setUp(self):
     self.path = package_paths.data_path('primates.partitions.txt')
     self.line = 'another line\n'
Пример #28
0
 def setUp(self):
     self.path = package_paths.data_path('primates.partitions.txt')
     self.line = 'another line\n'
Пример #29
0
 def setUp(self):
     self.gb_path = package_paths.data_path('JF314863-JF314866.gb')
     self.seqs = get_buffered_seq_iter([self.gb_path], format='gb',
             data_type='dna')
Пример #30
0
 def setUp(self):
     self.gb_path = package_paths.data_path('JF314863-JF314866.gb')
     self.fasta_path = package_paths.data_path('JF314863-JF314866.fasta')
     self.names = ['JF' + str(x) for x in range(314863, 314867)]
     self.ids = [str(x) + '.1' for x in self.names]
Пример #31
0
 def setUp(self):
     self.id_list = ['354698776', '354698778', '354698780', '354698782']
     self.multi_gb = package_paths.data_path('JF314863-JF314866.gb')