Пример #1
0
 def test_join(self):
     """
     Test BioSeqs.join() method.
     """
     infile1 = 'Fasta/f001.fasta'
     infile2 = 'Phylip/f003.phylip'
     self.assertTrue(os.path.isfile(infile1))
     self.assertTrue(os.path.isfile(infile2))
     seq_db = BioSeqs.from_seqfile(infile1, 'fasta')
     extra_db = BioSeqs.from_seqfile(infile2, 'phylip')
     seq_db.join(extra_db)
     # Check the sequence data
     indict1 = SeqIO.to_dict(SeqIO.parse(infile1, 'fasta'))
     indict2 = SeqIO.to_dict(SeqIO.parse(infile2, 'phylip'))
     self.assertEqual(len(indict1) + len(indict2), len(seq_db))
     for key, value in viewitems(indict1):
         self.assertEqual(str(value.seq), str(seq_db.data[key].seq))
     for key, value in viewitems(indict2):
         self.assertEqual(str(value.seq), str(seq_db.data[key].seq))
     # Check the report information
     self.assertIn('local', seq_db._report[0][1])
     self.assertIn('Tests/Fasta/f001.fasta', seq_db._report[0][2])
     self.assertIn('fasta', seq_db._report[0][3])
     self.assertIn('local', seq_db._report[1][1])
     self.assertIn('Tests/Phylip/f003.phylip', seq_db._report[1][2])
     self.assertIn('phylip', seq_db._report[1][3])
Пример #2
0
 def test_join ( self ) :
     """
     Test BioSeqs.join() method.
     """
     infile1 = 'Fasta/f001.fasta'
     infile2 = 'Phylip/f003.phylip'
     self.assertTrue(os.path.isfile(infile1))
     self.assertTrue(os.path.isfile(infile2))
     seq_db = BioSeqs.from_seqfile(infile1, 'fasta')
     extra_db = BioSeqs.from_seqfile(infile2, 'phylip')
     seq_db.join(extra_db)
     # Check the sequence data
     indict1 = SeqIO.to_dict(SeqIO.parse(infile1, 'fasta'))
     indict2 = SeqIO.to_dict(SeqIO.parse(infile2, 'phylip'))
     self.assertEqual(len(indict1) + len(indict2), len(seq_db))
     for key, value in viewitems(indict1) :
         self.assertEqual(str(value.seq), str(seq_db.data[key].seq))
     for key, value in viewitems(indict2) :
         self.assertEqual(str(value.seq), str(seq_db.data[key].seq))
     # Check the report information
     self.assertIn('local', seq_db._report[0][1])
     self.assertIn('Tests/Fasta/f001.fasta', seq_db._report[0][2])
     self.assertIn('fasta', seq_db._report[0][3])
     self.assertIn('local', seq_db._report[1][1])
     self.assertIn('Tests/Phylip/f003.phylip', seq_db._report[1][2])
     self.assertIn('phylip', seq_db._report[1][3])
Пример #3
0
 def test_seqfile_source(self):
     """
     Test BioSeqs.from_seqfile() and BioSeqs.write() methods.
     """
     infile = 'Fasta/f001.fasta'
     self.assertTrue(os.path.isfile(infile))
     seq_db = BioSeqs.from_seqfile(infile, 'fasta')
     outfile = 'tmp_test.gb'
     outrepfile = 'tmp_test.rep'
     self.files_to_clean.add(outfile)
     self.files_to_clean.add(outrepfile)
     seq_db.write(outfile)
     self.assertTrue(os.path.isfile(outfile))
     # Check the content of both sequence files
     indict = SeqIO.to_dict(SeqIO.parse(infile, 'fasta'))
     outdict = SeqIO.to_dict(SeqIO.parse(outfile, 'gb'))
     self.assertEqual(len(indict), len(outdict))
     for key, value in viewitems(indict):
         self.assertEqual(str(value.seq), str(outdict[key].seq))
     # Check the content of the report file
     with open(outrepfile, 'r') as repfile:
         for line in repfile.readlines():
             self.assertTrue(
                 ('Num. sequences: 50' in line) or ('History:' in line)
                 or (bool(
                     re.match(
                         r"""\d\d\d\d/\d\d/\d\d\ \d\d:\d\d:\d\d[ ]+
                                    local[ ]+.*Tests/Fasta/f001\.fasta
                                    [ ]+fasta""", line, re.VERBOSE))))
Пример #4
0
 def test_seqfile_source ( self ) :
     """
     Test BioSeqs.from_seqfile() and BioSeqs.write() methods.
     """
     infile = 'Fasta/f001.fasta'
     self.assertTrue(os.path.isfile(infile))
     seq_db = BioSeqs.from_seqfile(infile, 'fasta')
     outfile = 'tmp_test.gb'
     outrepfile = 'tmp_test.rep'
     self.files_to_clean.add(outfile)
     self.files_to_clean.add(outrepfile)
     seq_db.write(outfile)
     self.assertTrue(os.path.isfile(outfile))
     # Check the content of both sequence files
     indict = SeqIO.to_dict(SeqIO.parse(infile, 'fasta'))
     outdict = SeqIO.to_dict(SeqIO.parse(outfile, 'gb'))
     self.assertEqual(len(indict), len(outdict))
     for key, value in viewitems(indict) :
         self.assertEqual(str(value.seq), str(outdict[key].seq))
     # Check the content of the report file
     with open(outrepfile, 'r') as repfile :
         for line in repfile.readlines() :
             self.assertTrue(('Num. sequences: 50' in line) or
                 ('History:' in line) or
                 (bool(re.match(r"""\d\d\d\d/\d\d/\d\d\ \d\d:\d\d:\d\d[ ]+
                                    local[ ]+.*Tests/Fasta/f001\.fasta
                                    [ ]+fasta""", line, re.VERBOSE))))
Пример #5
0
 def test_statistics(self):
     """
     Test BioSeqs.statistics() method.
     """
     infile = 'Fasta/f001.fasta'
     self.assertTrue(os.path.isfile(infile))
     seq_db = BioSeqs.from_seqfile(infile, 'fasta')
     nseqs, mean, stdev, minimum, maximum = seq_db.statistics()
     # Check the resultant values
     self.assertEqual(nseqs, len(seq_db))
     self.assertEqual(mean, 579.02)
     self.assertEqual(stdev, 0.14)
     self.assertEqual(minimum, 579)
     self.assertEqual(maximum, 580)
Пример #6
0
 def test_statistics ( self ) :
     """
     Test BioSeqs.statistics() method.
     """
     infile = 'Fasta/f001.fasta'
     self.assertTrue(os.path.isfile(infile))
     seq_db = BioSeqs.from_seqfile(infile, 'fasta')
     nseqs, mean, stdev, minimum, maximum = seq_db.statistics()
     # Check the resultant values
     self.assertEqual(nseqs, len(seq_db))
     self.assertEqual(mean, 579.02)
     self.assertEqual(stdev, 0.14)
     self.assertEqual(minimum, 579)
     self.assertEqual(maximum, 580)
Пример #7
0
 def test_entrez_source ( self ) :
     """
     Test BioSeqs.from_entrez() method and len() property.
     """
     query = '"h**o sapiens"[porgn] AND mitochondrion[Filter] AND ' \
             'mRNA[Filter]'
     seq_db = BioSeqs.from_entrez(entrez_db='nuccore', query=query,
         email='*****@*****.**', max_fetch=10)
     # Check the number of sequences fetched
     self.assertEqual(len(seq_db), 10)
     # Check report information
     self.assertIn('entrez', seq_db._report[0][1])
     self.assertIn('nuccore', seq_db._report[0][2])
     self.assertIn(query, seq_db._report[0][3])
Пример #8
0
 def test_entrez_source(self):
     """
     Test BioSeqs.from_entrez() method and len() property.
     """
     query = '"h**o sapiens"[porgn] AND mitochondrion[Filter] AND ' \
             'mRNA[Filter]'
     seq_db = BioSeqs.from_entrez(entrez_db='nuccore',
                                  query=query,
                                  email='*****@*****.**',
                                  max_fetch=10)
     # Check the number of sequences fetched
     self.assertEqual(len(seq_db), 10)
     # Check report information
     self.assertIn('entrez', seq_db._report[0][1])
     self.assertIn('nuccore', seq_db._report[0][2])
     self.assertIn(query, seq_db._report[0][3])
Пример #9
0
 def test_update ( self ) :
     """
     Test BioSeqs.update() method.
     """
     query = '"h**o sapiens"[porgn] AND mitochondrion[Filter] AND ' \
             'mRNA[Filter]'
     seq_db = BioSeqs.from_entrez(entrez_db='nuccore', email='*****@*****.**',
                                  query=query, max_fetch=10)
     # Check the number of sequences fetched
     self.assertEqual(len(seq_db), 10)
     # Update the database fetching all the available sequences
     seq_db.update('*****@*****.**')
     # Check the number of sequences fetched
     handle = Entrez.esearch(db='nuccore', term=query, rettype='count')
     num_seqs = int(Entrez.read(handle)['Count'])
     self.assertEqual(len(seq_db), num_seqs)
Пример #10
0
 def test_str ( self ) :
     """
     Test str() property.
     """
     infile = 'Fasta/f001.fasta'
     self.assertTrue(os.path.isfile(infile))
     seq_db = BioSeqs.from_seqfile(infile, 'fasta')
     outstr = str(seq_db)
     # Check the output string
     lines = outstr.split('\n')
     self.assertTrue(bool(re.match(r"""DB:\ \{clpA_\d+,\ clpA_\d+,\ clpA_\d+,
         \ clpA_\d+,\ clpA_\d+,\.\.\.\}""", lines[0], re.VERBOSE)))
     self.assertEqual(lines[1], 'Num. sequences: 50')
     self.assertEqual(lines[2], 'History:')
     self.assertTrue(bool(re.match(r"""\d\d\d\d/\d\d/\d\d\ \d\d:\d\d:\d\d[ ]+
         local[ ]+.*Tests/Fasta/f001\.fasta[ ]+fasta""", lines[3],
         re.VERBOSE)))
Пример #11
0
 def test_update(self):
     """
     Test BioSeqs.update() method.
     """
     query = '"h**o sapiens"[porgn] AND mitochondrion[Filter] AND ' \
             'mRNA[Filter]'
     seq_db = BioSeqs.from_entrez(entrez_db='nuccore',
                                  email='*****@*****.**',
                                  query=query,
                                  max_fetch=10)
     # Check the number of sequences fetched
     self.assertEqual(len(seq_db), 10)
     # Update the database fetching all the available sequences
     seq_db.update('*****@*****.**')
     # Check the number of sequences fetched
     handle = Entrez.esearch(db='nuccore', term=query, rettype='count')
     num_seqs = int(Entrez.read(handle)['Count'])
     self.assertEqual(len(seq_db), num_seqs)
Пример #12
0
 def test_bioseqs_source(self):
     """
     Test BioSeqs.from_bioseqs() method and len() property.
     """
     infile = 'BioSeqs/f001.gb'
     inrepfile = 'BioSeqs/f001.rep'
     self.assertTrue(os.path.isfile(infile))
     self.assertTrue(os.path.isfile(inrepfile))
     seq_db = BioSeqs.from_bioseqs(infile)
     # Check the content of the BioSeqs' object
     indict = SeqIO.to_dict(SeqIO.parse(infile, 'gb'))
     self.assertEqual(len(seq_db), len(indict))
     for key, value in viewitems(indict):
         self.assertEqual(str(value.seq), str(seq_db.data[key].seq))
     # Check the content of the BioSeqs' report
     with open(inrepfile, 'r') as repfile:
         line = repfile.readline().strip()  # Num. sequences: 50
         self.assertEqual(len(seq_db), int(line[-2:]))
         line = repfile.readline()  # History:
         line = repfile.readline().strip()  # [First source information]
         source_info = line.split('    ')
         self.assertEqual(seq_db._report[0], tuple(source_info))
Пример #13
0
 def test_bioseqs_source ( self ) :
     """
     Test BioSeqs.from_bioseqs() method and len() property.
     """
     infile = 'BioSeqs/f001.gb'
     inrepfile = 'BioSeqs/f001.rep'
     self.assertTrue(os.path.isfile(infile))
     self.assertTrue(os.path.isfile(inrepfile))
     seq_db = BioSeqs.from_bioseqs(infile)
     # Check the content of the BioSeqs' object
     indict = SeqIO.to_dict(SeqIO.parse(infile, 'gb'))
     self.assertEqual(len(seq_db), len(indict))
     for key, value in viewitems(indict) :
         self.assertEqual(str(value.seq), str(seq_db.data[key].seq))
     # Check the content of the BioSeqs' report
     with open(inrepfile, 'r') as repfile :
         line = repfile.readline().strip() # Num. sequences: 50
         self.assertEqual(len(seq_db), int(line[-2:]))
         line = repfile.readline() # History:
         line = repfile.readline().strip() # [First source information]
         source_info = line.split('    ')
         self.assertEqual(seq_db._report[0], tuple(source_info))
Пример #14
0
 def test_str(self):
     """
     Test str() property.
     """
     infile = 'Fasta/f001.fasta'
     self.assertTrue(os.path.isfile(infile))
     seq_db = BioSeqs.from_seqfile(infile, 'fasta')
     outstr = str(seq_db)
     # Check the output string
     lines = outstr.split('\n')
     self.assertTrue(
         bool(
             re.match(
                 r"""DB:\ \{clpA_\d+,\ clpA_\d+,\ clpA_\d+,
         \ clpA_\d+,\ clpA_\d+,\.\.\.\}""", lines[0], re.VERBOSE)))
     self.assertEqual(lines[1], 'Num. sequences: 50')
     self.assertEqual(lines[2], 'History:')
     self.assertTrue(
         bool(
             re.match(
                 r"""\d\d\d\d/\d\d/\d\d\ \d\d:\d\d:\d\d[ ]+
         local[ ]+.*Tests/Fasta/f001\.fasta[ ]+fasta""", lines[3],
                 re.VERBOSE)))