def test_get_preprocess_fasta_cmd_sff_run_prefix_match_error_1(self): # Test that the run prefixes in the prep_template and the file names # actually match and raise an error if not conn_handler = SQLConnectionHandler() sql = (""" INSERT INTO qiita.filepath (filepath_id, filepath, filepath_type_id, checksum, checksum_algorithm_id, data_directory_id) VALUES (19, '1_new.sff', 17, 852952723, 1, 5); INSERT INTO qiita.raw_filepath (raw_data_id , filepath_id) VALUES (3, 19); INSERT INTO qiita.filepath (filepath_id, filepath, filepath_type_id, checksum, checksum_algorithm_id, data_directory_id) VALUES (20, '1_error.sff', 17, 852952723, 1, 5); INSERT INTO qiita.raw_filepath (raw_data_id , filepath_id) VALUES (3, 20); UPDATE qiita.prep_1 SET run_prefix='preprocess_test'; UPDATE qiita.prep_1 SET run_prefix='new' WHERE sample_id = '1.SKB8.640193'; """) conn_handler.execute(sql) raw_data = RawData(3) params = Preprocessed454Params(1) prep_template = PrepTemplate(1) with self.assertRaises(ValueError): _get_preprocess_fasta_cmd(raw_data, prep_template, params)
def test_get_preprocess_fasta_cmd_sff_run_prefix_match_error_2(self): # Should raise error self.sff_prep_template_rp['1.SKB8.640193']['run_prefix'] = 'test1' self.sff_prep_template_rp['1.SKD8.640184']['run_prefix'] = 'test2' self.sff_prep_template_rp['1.SKB7.640196']['run_prefix'] = 'error' self.sff_prep_template_rp.generate_files() for _, fp in self.sff_prep_template_rp.get_filepaths(): self.files_to_remove.append(fp) params = Preprocessed454Params(1) with self.assertRaises(ValueError): _get_preprocess_fasta_cmd( self.raw_data_rp, self.sff_prep_template_rp, params)
def test_get_preprocess_fasta_cmd_sff_run_prefix(self): # Need to alter the run_prefix of one sample so we can test the # multiple values conn_handler = SQLConnectionHandler() sql = ("UPDATE qiita.prep_1 SET run_prefix='test1' WHERE " "sample_id = '1.SKM9.640192'") conn_handler.execute(sql) raw_data = RawData(3) params = Preprocessed454Params(1) prep_template = PrepTemplate(1) obs_cmd, obs_output_dir = _get_preprocess_fasta_cmd( raw_data, prep_template, params) obs_cmds = obs_cmd.split('; ') # assumming that test_get_preprocess_fasta_cmd_sff_no_run_prefix is # working we only need to test for the commands being ran and # that n is valid self.assertEqual(len(obs_cmds), 8) self.assertTrue(obs_cmds[0].startswith('process_sff.py')) self.assertTrue(obs_cmds[1].startswith('process_sff.py')) self.assertTrue(obs_cmds[2].startswith('split_libraries.py')) self.assertIn('-n 1', obs_cmds[2]) self.assertTrue(obs_cmds[3].startswith('split_libraries.py')) self.assertIn('-n 800000', obs_cmds[3]) self.assertTrue(obs_cmds[4].startswith('cat')) self.assertIn('split_library_log.txt', obs_cmds[4]) self.assertTrue(obs_cmds[5].startswith('cat')) self.assertTrue('seqs.fna', obs_cmds[5]) self.assertTrue(obs_cmds[6].startswith('cat')) self.assertIn('seqs_filtered.qual', obs_cmds[6])
def test_get_preprocess_fasta_cmd_sff_run_prefix_match(self): # Test that the run prefixes in the prep_template and the file names # actually match and raise an error if not tmp_dir = mkdtemp() fp = join(tmp_dir, 'new.sff') with open(fp, 'w') as f: f.write('\n') self.files_to_remove.append(fp) self.dirs_to_remove.append(tmp_dir) self.raw_data_rp.add_filepaths([(fp, self.raw_sff_id)]) params = Preprocessed454Params(1) obs_cmd, obs_output_dir = _get_preprocess_fasta_cmd( self.raw_data_rp, self.sff_prep_template_rp, params) obs_cmds = obs_cmd.split('; ') # assumming that test_get_preprocess_fasta_cmd_sff_no_run_prefix is # working we only need to test for the commands being ran and # that n is valid self.assertEqual(len(obs_cmds), 9) self.assertTrue(obs_cmds[0].startswith('process_sff.py')) self.assertTrue(obs_cmds[1].startswith('process_sff.py')) self.assertTrue(obs_cmds[2].startswith('process_sff.py')) self.assertTrue(obs_cmds[3].startswith('split_libraries.py')) self.assertIn('-n 1', obs_cmds[3]) self.assertTrue(obs_cmds[4].startswith('split_libraries.py')) self.assertIn('-n 800000', obs_cmds[4]) self.assertTrue(obs_cmds[5].startswith('cat')) self.assertIn('split_library_log.txt', obs_cmds[5]) self.assertTrue(obs_cmds[6].startswith('cat')) self.assertIn('seqs.fna', obs_cmds[6]) self.assertEqual(len(obs_cmds[6].split(' ')), 5) self.assertTrue(obs_cmds[7].startswith('cat')) self.assertIn('seqs_filtered.qual', obs_cmds[7]) self.assertEqual(len(obs_cmds[7].split(' ')), 5)
def test_get_preprocess_fasta_cmd_sff_run_prefix_match_error_1(self): # Test that the run prefixes in the prep_template and the file names # actually match and raise an error if not fp = self.path_builder('new.sff') with open(fp, 'w') as f: f.write('\n') self.files_to_remove.append(fp) fp_error = self.path_builder('error.sff') with open(fp_error, 'w') as f: f.write('\n') self.files_to_remove.append(fp_error) self.raw_data_rp.add_filepaths( [(fp, self.raw_sff_id), (fp_error, self.raw_sff_id)]) params = Preprocessed454Params(1) with self.assertRaises(ValueError): _get_preprocess_fasta_cmd( self.raw_data_rp, self.sff_prep_template_rp, params)
def test_get_preprocess_fasta_cmd_sff_run_prefix_match_error_2(self): # Should raise error conn_handler = SQLConnectionHandler() sql = (""" UPDATE qiita.prep_1 SET run_prefix='test1'; UPDATE qiita.prep_1 SET run_prefix='test2' WHERE sample_id = '1.SKB2.640194'; UPDATE qiita.prep_1 SET run_prefix='error' WHERE sample_id = '1.SKB8.640193'; """) conn_handler.execute(sql) raw_data = RawData(3) params = Preprocessed454Params(1) prep_template = PrepTemplate(1) with self.assertRaises(ValueError): _get_preprocess_fasta_cmd(raw_data, prep_template, params)
def test_get_preprocess_fasta_cmd_sff(self): raw_data = RawData(3) params = Preprocessed454Params(1) prep_template = PrepTemplate(1) obs_cmd, obs_output_dir = _get_preprocess_fasta_cmd( raw_data, prep_template, params) get_raw_path = partial(join, self.db_dir, 'raw_data') seqs_fp = [ get_raw_path('preprocess_test1.sff'), get_raw_path('preprocess_test2.sff') ] exp_cmd_1 = ' '.join( ["process_sff.py", "-i %s" % seqs_fp[0], "-o %s" % obs_output_dir]) exp_cmd_2 = ' '.join( ["process_sff.py", "-i %s" % seqs_fp[1], "-o %s" % obs_output_dir]) fasta_files = ','.join([ join(obs_output_dir, "preprocess_test1.fna"), join(obs_output_dir, "preprocess_test2.fna") ]) qual_files = ','.join([ join(obs_output_dir, "preprocess_test1.qual"), join(obs_output_dir, "preprocess_test2.qual") ]) exp_cmd_3a = ' '.join(["split_libraries.py", "-f %s" % fasta_files]) exp_cmd_3b = ' '.join([ "-q %s" % qual_files, "-d", "-o %s" % obs_output_dir, params.to_str() ]) exp_cmd_4 = ' '.join([ "convert_fastaqual_fastq.py", "-f %s/seqs.fna" % obs_output_dir, "-q %s/seqs_filtered.qual" % obs_output_dir, "-o %s" % obs_output_dir, "-F" ]) obs_cmds = obs_cmd.split('; ') # We are splitting the command into two parts because there is no way # that we can know the filepath of the mapping file. We thus split the # command on the mapping file path and we check that the two parts # of the commands is correct obs_cmd_3a, obs_cmd_3b_temp = obs_cmds[2].split(' -m ', 1) obs_cmd_3b = obs_cmd_3b_temp.split(' ', 1)[1] self.assertEqual(obs_cmds[0], exp_cmd_1) self.assertEqual(obs_cmds[1], exp_cmd_2) self.assertEqual(obs_cmd_3a, exp_cmd_3a) self.assertEqual(obs_cmd_3b, exp_cmd_3b) self.assertEqual(obs_cmds[3], exp_cmd_4)
def test_get_preprocess_fasta_cmd_sff_no_run_prefix(self): params = Preprocessed454Params(1) obs_cmd, obs_output_dir = _get_preprocess_fasta_cmd( self.raw_data, self.sff_prep_template, params) get_raw_path = partial(join, self.db_dir, 'raw_data') seqs_fp = [get_raw_path('%d_preprocess_test1.sff' % self.raw_data.id), get_raw_path('%d_preprocess_test2.sff' % self.raw_data.id)] exp_cmd_1 = ' '.join(["process_sff.py", "-i %s" % seqs_fp[0], "-o %s" % obs_output_dir]) exp_cmd_2 = ' '.join(["process_sff.py", "-i %s" % seqs_fp[1], "-o %s" % obs_output_dir]) fasta_files = ','.join([ join(obs_output_dir, "%s_preprocess_test1.fna" % self.raw_data.id), join(obs_output_dir, "%s_preprocess_test2.fna" % self.raw_data.id)] ) qual_files = ','.join([ join(obs_output_dir, "%s_preprocess_test1.qual" % self.raw_data.id), join(obs_output_dir, "%s_preprocess_test2.qual" % self.raw_data.id)]) exp_cmd_3a = ' '.join(["split_libraries.py", "-f %s" % fasta_files]) exp_cmd_3b = ' '.join(["-q %s" % qual_files, "-d", "-o %s" % obs_output_dir, params.to_str()]) exp_cmd_4 = ' '.join(["convert_fastaqual_fastq.py", "-f %s/seqs.fna" % obs_output_dir, "-q %s/seqs_filtered.qual" % obs_output_dir, "-o %s" % obs_output_dir, "-F"]) obs_cmds = obs_cmd.split('; ') # We are splitting the command into two parts because there is no way # that we can know the filepath of the mapping file. We thus split the # command on the mapping file path and we check that the two parts # of the commands is correct obs_cmd_3a, obs_cmd_3b_temp = obs_cmds[2].split(' -m ', 1) obs_cmd_3b = obs_cmd_3b_temp.split(' ', 1)[1] self.assertEqual(obs_cmds[0], exp_cmd_1) self.assertEqual(obs_cmds[1], exp_cmd_2) self.assertEqual(obs_cmd_3a, exp_cmd_3a) self.assertEqual(obs_cmd_3b, exp_cmd_3b) self.assertEqual(obs_cmds[3], exp_cmd_4)
def test_get_preprocess_fasta_cmd_sff_run_prefix_match(self): # Test that the run prefixes in the prep_template and the file names # actually match and raise an error if not conn_handler = SQLConnectionHandler() sql = (""" INSERT INTO qiita.filepath (filepath_id, filepath, filepath_type_id, checksum, checksum_algorithm_id, data_directory_id) VALUES (19, '1_new.sff', 17, 852952723, 1, 5); INSERT INTO qiita.raw_filepath (raw_data_id , filepath_id) VALUES (3, 19); UPDATE qiita.prep_1 SET run_prefix='preprocess_test'; UPDATE qiita.prep_1 SET run_prefix='new' WHERE sample_id = '1.SKB8.640193'; """) conn_handler.execute(sql) raw_data = RawData(3) params = Preprocessed454Params(1) prep_template = PrepTemplate(1) obs_cmd, obs_output_dir = _get_preprocess_fasta_cmd( raw_data, prep_template, params) obs_cmds = obs_cmd.split('; ') # assumming that test_get_preprocess_fasta_cmd_sff_no_run_prefix is # working we only need to test for the commands being ran and # that n is valid self.assertEqual(len(obs_cmds), 9) self.assertTrue(obs_cmds[0].startswith('process_sff.py')) self.assertTrue(obs_cmds[1].startswith('process_sff.py')) self.assertTrue(obs_cmds[2].startswith('process_sff.py')) self.assertTrue(obs_cmds[3].startswith('split_libraries.py')) self.assertIn('-n 1', obs_cmds[3]) self.assertTrue(obs_cmds[4].startswith('split_libraries.py')) self.assertIn('-n 800000', obs_cmds[4]) self.assertTrue(obs_cmds[5].startswith('cat')) self.assertIn('split_library_log.txt', obs_cmds[5]) self.assertTrue(obs_cmds[6].startswith('cat')) self.assertIn('seqs.fna', obs_cmds[6]) self.assertEqual(len(obs_cmds[6].split(' ')), 5) self.assertTrue(obs_cmds[7].startswith('cat')) self.assertIn('seqs_filtered.qual', obs_cmds[7]) self.assertEqual(len(obs_cmds[7].split(' ')), 5)
def test_get_preprocess_sff_gz_cmd(self): # test the *.sff.gz files are handled correctly params = Preprocessed454Params(1) obs_cmd, obs_output_dir = _get_preprocess_fasta_cmd( self.raw_data_gz, self.sff_prep_template, params) obs_cmds = obs_cmd.split('; ') # assumming that all the other tests pass, we only need to test # gz file format. self.assertEqual(len(obs_cmds), 3) self.assertRegexpMatches(obs_cmds[0], r'process_sff.py\s+.*' '-i\s+.*.sff.gz\s+') self.assertRegexpMatches(obs_cmds[1], r'split_libraries.py\s+.*' '-f\s+.*.fna\s+.*' '-q\s+.*.qual\s+') self.assertRegexpMatches(obs_cmds[2], r'convert_fastaqual_fastq.py.*' '-f\s+.*seqs.fna\s+.*' '-q\s+.*seqs_filtered.qual\s+')
def test_get_preprocess_fasta_cmd_sff_run_prefix(self): params = Preprocessed454Params(1) obs_cmd, obs_output_dir = _get_preprocess_fasta_cmd( self.raw_data_rp, self.sff_prep_template_rp, params) obs_cmds = obs_cmd.split('; ') # assumming that test_get_preprocess_fasta_cmd_sff_no_run_prefix is # working we only need to test for the commands being ran and # that n is valid self.assertEqual(len(obs_cmds), 8) self.assertTrue(obs_cmds[0].startswith('process_sff.py')) self.assertTrue(obs_cmds[1].startswith('process_sff.py')) self.assertTrue(obs_cmds[2].startswith('split_libraries.py')) self.assertIn('-n 1', obs_cmds[2]) self.assertTrue(obs_cmds[3].startswith('split_libraries.py')) self.assertIn('-n 800000', obs_cmds[3]) self.assertTrue(obs_cmds[4].startswith('cat')) self.assertIn('split_library_log.txt', obs_cmds[4]) self.assertTrue(obs_cmds[5].startswith('cat')) self.assertIn('seqs.fna', obs_cmds[5]) self.assertTrue(obs_cmds[6].startswith('cat')) self.assertIn('seqs_filtered.qual', obs_cmds[6])