def test_alignment(self): """ Testing default procedure for the genes method with alignment assitance for sequences without biological information (FASTA input instead of GENBANK). """ infile = "Fasta/f006.fasta" informat = "fasta" # Check the input self.assertTrue(os.path.isfile(infile)) self.assertEqual(len(list(SeqIO.parse(infile, informat))), 5) # Generate the gene clustering without metadata subset_dict = Cluster.get_subsets("genes", infile, informat) # Check the output self.assertEqual(len(subset_dict), 1) self.assertEqual(len(subset_dict["unprocessable"]), 5) # Generate the gene clustering with external metadata (from a reference # sequence) subset_dict = Cluster.get_subsets("genes", infile, informat, ref_seq="rCRS", alignment_bin=mafft_exe) # Check the output self.assertEqual(len(subset_dict), 98) self.assertNotIn("unprocessable", subset_dict) for key, value in viewitems(subset_dict): self.assertNotEqual(len(value), 0) self.assertTrue(len(value) % 5 == 0)
def test_alignment(self): """ Testing default procedure for the genes method with alignment assitance for sequences without biological information (FASTA input instead of GENBANK). """ infile = 'Fasta/f006.fasta' informat = 'fasta' # Check the input self.assertTrue(os.path.isfile(infile)) self.assertEqual(len(list(SeqIO.parse(infile, informat))), 5) # Generate the gene clustering without metadata subset_dict = Cluster.get_subsets('genes', infile, informat) # Check the output self.assertEqual(len(subset_dict), 1) self.assertEqual(len(subset_dict['unprocessable']), 5) # Generate the gene clustering with external metadata (from a reference # sequence) subset_dict = Cluster.get_subsets('genes', infile, informat, ref_seq='rCRS', alignment_bin=mafft_exe) # Check the output self.assertEqual(len(subset_dict), 98) self.assertNotIn('unprocessable', subset_dict) for key, value in viewitems(subset_dict): self.assertNotEqual(len(value), 0) self.assertTrue(len(value) % 5 == 0)
def test_clustering(self): """ Testing procedure for the PRD method. """ infile = "Fasta/f007.fasta" informat = "fasta" treefile = "Newick/f007.newick" treeformat = "newick" # Check the input self.assertTrue(os.path.isfile(infile)) self.assertEqual(len(list(SeqIO.parse(infile, informat))), 100) self.assertTrue(os.path.isfile(treefile)) # Generate the subset division subset_dict = Cluster.get_subsets( "prd", infile, informat, tree_file=treefile, file_format=treeformat, subset_size=25, overlapping=4, binary=dcm3_exe, ) # Check the output self.assertEqual(len(subset_dict), 17) result = [len(value) for value in viewvalues(subset_dict)] result.sort() self.assertEqual(result, [16, 17, 17, 18, 18, 19, 19, 19, 20, 20, 20, 20, 22, 22, 23, 24, 25])
def test_log_file(self): """ Testing the genes method with the generation of a log file in a given path. """ infile = "Genbank/f006.genbank" informat = "genbank" logfile = "tmp_test.log" # Check the input self.assertTrue(os.path.isfile(infile)) self.assertEqual(len(list(SeqIO.parse(infile, informat))), 5) self.assertFalse(os.path.isfile(logfile)) self.add_file_to_clean("tmp_test.log") # Generate the gene clustering subset_dict = Cluster.get_subsets("genes", infile, informat, log_file=logfile) # Check the clustering output self.assertEqual(len(subset_dict), 63) self.assertIn("unprocessable", subset_dict) self.assertEqual(len(subset_dict["unprocessable"]), 0) # Check the content of the log file self.assertTrue(os.path.isfile(logfile)) with open(logfile, "r") as flog: content = flog.readlines() for feature in ["> misc_feature\n", "> D-loop\n", "> rRNA\n", "> tRNA\n", "> CDS\n", "> gene\n"]: self.assertIn(feature, content)
def test_clustering(self): """ Testing procedure for the PRD method. """ infile = 'Fasta/f007.fasta' informat = 'fasta' treefile = 'Newick/f007.newick' treeformat = 'newick' # Check the input self.assertTrue(os.path.isfile(infile)) self.assertEqual(len(list(SeqIO.parse(infile, informat))), 100) self.assertTrue(os.path.isfile(treefile)) # Generate the subset division subset_dict = Cluster.get_subsets('prd', infile, informat, tree_file=treefile, file_format=treeformat, subset_size=25, overlapping=4, binary=dcm3_exe) # Check the output self.assertEqual(len(subset_dict), 17) result = [len(value) for value in viewvalues(subset_dict)] result.sort() self.assertEqual(result, [ 16, 17, 17, 18, 18, 19, 19, 19, 20, 20, 20, 20, 22, 22, 23, 24, 25 ])
def test_log_file(self): """ Testing the genes method with the generation of a log file in a given path. """ infile = 'Genbank/f006.genbank' informat = 'genbank' logfile = 'tmp_test.log' # Check the input self.assertTrue(os.path.isfile(infile)) self.assertEqual(len(list(SeqIO.parse(infile, informat))), 5) self.assertFalse(os.path.isfile(logfile)) self.add_file_to_clean('tmp_test.log') # Generate the gene clustering subset_dict = Cluster.get_subsets('genes', infile, informat, log_file=logfile) # Check the clustering output self.assertEqual(len(subset_dict), 63) self.assertIn('unprocessable', subset_dict) self.assertEqual(len(subset_dict['unprocessable']), 0) # Check the content of the log file self.assertTrue(os.path.isfile(logfile)) with open(logfile, 'r') as flog: content = flog.readlines() for feature in [ '> misc_feature\n', '> D-loop\n', '> rRNA\n', '> tRNA\n', '> CDS\n', '> gene\n' ]: self.assertIn(feature, content)
def test_clustering(self): """ Testing procedure for the naive rows method. """ infile = "Fasta/f001.fasta" informat = "fasta" # Check the input self.assertTrue(os.path.isfile(infile)) self.assertEqual(len(list(SeqIO.parse(infile, informat))), 50) # Generate the alignment subset_dict = Cluster.get_subsets("rows", infile, informat, 5) # Check the output self.assertEqual(len(subset_dict), 5) for subset in viewvalues(subset_dict): self.assertEqual(len(subset), 10)
def test_default(self): """ Testing default procedure for the genes method. """ infile = "Genbank/f006.genbank" informat = "genbank" # Check the input self.assertTrue(os.path.isfile(infile)) self.assertEqual(len(list(SeqIO.parse(infile, informat))), 5) # Generate the gene clustering subset_dict = Cluster.get_subsets("genes", infile, informat) # Check the output self.assertEqual(len(subset_dict), 63) self.assertIn("unprocessable", subset_dict) self.assertEqual(len(subset_dict["unprocessable"]), 0)
def test_clustering ( self ) : """ Testing procedure for the naive rows method. """ infile = 'Fasta/f001.fasta' informat = 'fasta' # Check the input self.assertTrue(os.path.isfile(infile)) self.assertEqual(len(list(SeqIO.parse(infile, informat))), 50) # Generate the alignment subset_dict = Cluster.get_subsets('rows', infile, informat, 5) # Check the output self.assertEqual(len(subset_dict), 5) for subset in viewvalues(subset_dict) : self.assertEqual(len(subset), 10)
def test_default(self): """ Testing default procedure for the genes method. """ infile = 'Genbank/f006.genbank' informat = 'genbank' # Check the input self.assertTrue(os.path.isfile(infile)) self.assertEqual(len(list(SeqIO.parse(infile, informat))), 5) # Generate the gene clustering subset_dict = Cluster.get_subsets('genes', infile, informat) # Check the output self.assertEqual(len(subset_dict), 63) self.assertIn('unprocessable', subset_dict) self.assertEqual(len(subset_dict['unprocessable']), 0)
def test_feature_filter(self): """ Testing the genes method with a feature filter. """ infile = "Genbank/f006.genbank" informat = "genbank" # Check the input self.assertTrue(os.path.isfile(infile)) self.assertEqual(len(list(SeqIO.parse(infile, informat))), 5) # Generate the gene clustering subset_dict = Cluster.get_subsets("genes", infile, informat, feature_filter=["CDS"]) # Check the output self.assertEqual(len(subset_dict), 14) for key, subset in viewitems(subset_dict): if key == "unprocessable": self.assertEqual(len(subset), 0) else: self.assertEqual(len(subset), 5)
def test_feature_filter(self): """ Testing the genes method with a feature filter. """ infile = 'Genbank/f006.genbank' informat = 'genbank' # Check the input self.assertTrue(os.path.isfile(infile)) self.assertEqual(len(list(SeqIO.parse(infile, informat))), 5) # Generate the gene clustering subset_dict = Cluster.get_subsets('genes', infile, informat, feature_filter=['CDS']) # Check the output self.assertEqual(len(subset_dict), 14) for key, subset in viewitems(subset_dict): if (key == 'unprocessable'): self.assertEqual(len(subset), 0) else: self.assertEqual(len(subset), 5)