def test_alignment(self):
     """
     Testing default procedure for the genes method with alignment assitance
     for sequences without biological information (FASTA input instead of
     GENBANK).
     """
     infile = "Fasta/f006.fasta"
     informat = "fasta"
     # Check the input
     self.assertTrue(os.path.isfile(infile))
     self.assertEqual(len(list(SeqIO.parse(infile, informat))), 5)
     # Generate the gene clustering without metadata
     subset_dict = Cluster.get_subsets("genes", infile, informat)
     # Check the output
     self.assertEqual(len(subset_dict), 1)
     self.assertEqual(len(subset_dict["unprocessable"]), 5)
     # Generate the gene clustering with external metadata (from a reference
     # sequence)
     subset_dict = Cluster.get_subsets("genes", infile, informat, ref_seq="rCRS", alignment_bin=mafft_exe)
     # Check the output
     self.assertEqual(len(subset_dict), 98)
     self.assertNotIn("unprocessable", subset_dict)
     for key, value in viewitems(subset_dict):
         self.assertNotEqual(len(value), 0)
         self.assertTrue(len(value) % 5 == 0)
 def test_alignment(self):
     """
     Testing default procedure for the genes method with alignment assitance
     for sequences without biological information (FASTA input instead of
     GENBANK).
     """
     infile = 'Fasta/f006.fasta'
     informat = 'fasta'
     # Check the input
     self.assertTrue(os.path.isfile(infile))
     self.assertEqual(len(list(SeqIO.parse(infile, informat))), 5)
     # Generate the gene clustering without metadata
     subset_dict = Cluster.get_subsets('genes', infile, informat)
     # Check the output
     self.assertEqual(len(subset_dict), 1)
     self.assertEqual(len(subset_dict['unprocessable']), 5)
     # Generate the gene clustering with external metadata (from a reference
     # sequence)
     subset_dict = Cluster.get_subsets('genes',
                                       infile,
                                       informat,
                                       ref_seq='rCRS',
                                       alignment_bin=mafft_exe)
     # Check the output
     self.assertEqual(len(subset_dict), 98)
     self.assertNotIn('unprocessable', subset_dict)
     for key, value in viewitems(subset_dict):
         self.assertNotEqual(len(value), 0)
         self.assertTrue(len(value) % 5 == 0)
 def test_clustering(self):
     """
     Testing procedure for the PRD method.
     """
     infile = "Fasta/f007.fasta"
     informat = "fasta"
     treefile = "Newick/f007.newick"
     treeformat = "newick"
     # Check the input
     self.assertTrue(os.path.isfile(infile))
     self.assertEqual(len(list(SeqIO.parse(infile, informat))), 100)
     self.assertTrue(os.path.isfile(treefile))
     # Generate the subset division
     subset_dict = Cluster.get_subsets(
         "prd",
         infile,
         informat,
         tree_file=treefile,
         file_format=treeformat,
         subset_size=25,
         overlapping=4,
         binary=dcm3_exe,
     )
     # Check the output
     self.assertEqual(len(subset_dict), 17)
     result = [len(value) for value in viewvalues(subset_dict)]
     result.sort()
     self.assertEqual(result, [16, 17, 17, 18, 18, 19, 19, 19, 20, 20, 20, 20, 22, 22, 23, 24, 25])
 def test_log_file(self):
     """
     Testing the genes method with the generation of a log file in a given
     path.
     """
     infile = "Genbank/f006.genbank"
     informat = "genbank"
     logfile = "tmp_test.log"
     # Check the input
     self.assertTrue(os.path.isfile(infile))
     self.assertEqual(len(list(SeqIO.parse(infile, informat))), 5)
     self.assertFalse(os.path.isfile(logfile))
     self.add_file_to_clean("tmp_test.log")
     # Generate the gene clustering
     subset_dict = Cluster.get_subsets("genes", infile, informat, log_file=logfile)
     # Check the clustering output
     self.assertEqual(len(subset_dict), 63)
     self.assertIn("unprocessable", subset_dict)
     self.assertEqual(len(subset_dict["unprocessable"]), 0)
     # Check the content of the log file
     self.assertTrue(os.path.isfile(logfile))
     with open(logfile, "r") as flog:
         content = flog.readlines()
         for feature in ["> misc_feature\n", "> D-loop\n", "> rRNA\n", "> tRNA\n", "> CDS\n", "> gene\n"]:
             self.assertIn(feature, content)
 def test_clustering(self):
     """
     Testing procedure for the PRD method.
     """
     infile = 'Fasta/f007.fasta'
     informat = 'fasta'
     treefile = 'Newick/f007.newick'
     treeformat = 'newick'
     # Check the input
     self.assertTrue(os.path.isfile(infile))
     self.assertEqual(len(list(SeqIO.parse(infile, informat))), 100)
     self.assertTrue(os.path.isfile(treefile))
     # Generate the subset division
     subset_dict = Cluster.get_subsets('prd',
                                       infile,
                                       informat,
                                       tree_file=treefile,
                                       file_format=treeformat,
                                       subset_size=25,
                                       overlapping=4,
                                       binary=dcm3_exe)
     # Check the output
     self.assertEqual(len(subset_dict), 17)
     result = [len(value) for value in viewvalues(subset_dict)]
     result.sort()
     self.assertEqual(result, [
         16, 17, 17, 18, 18, 19, 19, 19, 20, 20, 20, 20, 22, 22, 23, 24, 25
     ])
 def test_log_file(self):
     """
     Testing the genes method with the generation of a log file in a given
     path.
     """
     infile = 'Genbank/f006.genbank'
     informat = 'genbank'
     logfile = 'tmp_test.log'
     # Check the input
     self.assertTrue(os.path.isfile(infile))
     self.assertEqual(len(list(SeqIO.parse(infile, informat))), 5)
     self.assertFalse(os.path.isfile(logfile))
     self.add_file_to_clean('tmp_test.log')
     # Generate the gene clustering
     subset_dict = Cluster.get_subsets('genes',
                                       infile,
                                       informat,
                                       log_file=logfile)
     # Check the clustering output
     self.assertEqual(len(subset_dict), 63)
     self.assertIn('unprocessable', subset_dict)
     self.assertEqual(len(subset_dict['unprocessable']), 0)
     # Check the content of the log file
     self.assertTrue(os.path.isfile(logfile))
     with open(logfile, 'r') as flog:
         content = flog.readlines()
         for feature in [
                 '> misc_feature\n', '> D-loop\n', '> rRNA\n', '> tRNA\n',
                 '> CDS\n', '> gene\n'
         ]:
             self.assertIn(feature, content)
 def test_clustering(self):
     """
     Testing procedure for the naive rows method.
     """
     infile = "Fasta/f001.fasta"
     informat = "fasta"
     # Check the input
     self.assertTrue(os.path.isfile(infile))
     self.assertEqual(len(list(SeqIO.parse(infile, informat))), 50)
     # Generate the alignment
     subset_dict = Cluster.get_subsets("rows", infile, informat, 5)
     # Check the output
     self.assertEqual(len(subset_dict), 5)
     for subset in viewvalues(subset_dict):
         self.assertEqual(len(subset), 10)
 def test_default(self):
     """
     Testing default procedure for the genes method.
     """
     infile = "Genbank/f006.genbank"
     informat = "genbank"
     # Check the input
     self.assertTrue(os.path.isfile(infile))
     self.assertEqual(len(list(SeqIO.parse(infile, informat))), 5)
     # Generate the gene clustering
     subset_dict = Cluster.get_subsets("genes", infile, informat)
     # Check the output
     self.assertEqual(len(subset_dict), 63)
     self.assertIn("unprocessable", subset_dict)
     self.assertEqual(len(subset_dict["unprocessable"]), 0)
 def test_clustering ( self ) :
     """
     Testing procedure for the naive rows method.
     """
     infile = 'Fasta/f001.fasta'
     informat = 'fasta'
     # Check the input
     self.assertTrue(os.path.isfile(infile))
     self.assertEqual(len(list(SeqIO.parse(infile, informat))), 50)
     # Generate the alignment
     subset_dict = Cluster.get_subsets('rows', infile, informat, 5)
     # Check the output
     self.assertEqual(len(subset_dict), 5)
     for subset in viewvalues(subset_dict) :
         self.assertEqual(len(subset), 10)
 def test_default(self):
     """
     Testing default procedure for the genes method.
     """
     infile = 'Genbank/f006.genbank'
     informat = 'genbank'
     # Check the input
     self.assertTrue(os.path.isfile(infile))
     self.assertEqual(len(list(SeqIO.parse(infile, informat))), 5)
     # Generate the gene clustering
     subset_dict = Cluster.get_subsets('genes', infile, informat)
     # Check the output
     self.assertEqual(len(subset_dict), 63)
     self.assertIn('unprocessable', subset_dict)
     self.assertEqual(len(subset_dict['unprocessable']), 0)
 def test_feature_filter(self):
     """
     Testing the genes method with a feature filter.
     """
     infile = "Genbank/f006.genbank"
     informat = "genbank"
     # Check the input
     self.assertTrue(os.path.isfile(infile))
     self.assertEqual(len(list(SeqIO.parse(infile, informat))), 5)
     # Generate the gene clustering
     subset_dict = Cluster.get_subsets("genes", infile, informat, feature_filter=["CDS"])
     # Check the output
     self.assertEqual(len(subset_dict), 14)
     for key, subset in viewitems(subset_dict):
         if key == "unprocessable":
             self.assertEqual(len(subset), 0)
         else:
             self.assertEqual(len(subset), 5)
 def test_feature_filter(self):
     """
     Testing the genes method with a feature filter.
     """
     infile = 'Genbank/f006.genbank'
     informat = 'genbank'
     # Check the input
     self.assertTrue(os.path.isfile(infile))
     self.assertEqual(len(list(SeqIO.parse(infile, informat))), 5)
     # Generate the gene clustering
     subset_dict = Cluster.get_subsets('genes',
                                       infile,
                                       informat,
                                       feature_filter=['CDS'])
     # Check the output
     self.assertEqual(len(subset_dict), 14)
     for key, subset in viewitems(subset_dict):
         if (key == 'unprocessable'):
             self.assertEqual(len(subset), 0)
         else:
             self.assertEqual(len(subset), 5)