def pick_closed_reference_otus(qclient, job_id, parameters, out_dir): """Run split libraries fastq with the given parameters Parameters ---------- qclient : tgp.qiita_client.QiitaClient The Qiita server client job_id : str The job id parameters : dict The parameter values to run split libraries out_dir : str Yhe path to the job's output directory Returns ------- dict The results of the job Raises ------ ValueError If there is any error gathering the information from the server """ qclient.update_job_step(job_id, "Step 1 of 4: Collecting information") artifact_id = parameters['input_data'] fps_info = qclient.get("/qiita_db/artifacts/%s/filepaths/" % artifact_id) fps = fps_info['filepaths'] reference_id = parameters['reference'] ref_info = qclient.get("/qiita_db/references/%s/filepaths/" % reference_id) reference_fps = ref_info['filepaths'] qclient.update_job_step(job_id, "Step 2 of 4: Generating command") command, pick_out = generate_pick_closed_reference_otus_cmd( fps, out_dir, parameters, reference_fps) qclient.update_job_step(job_id, "Step 3 of 4: Executing OTU picking") std_out, std_err, return_value = system_call(command) if return_value != 0: error_msg = ("Error running OTU picking:\nStd out: %s\nStd err: %s" % (std_out, std_err)) return False, None, error_msg qclient.update_job_step(job_id, "Step 4 of 4: Generating tgz sortmerna folder") try: generate_sortmerna_tgz(pick_out) except Exception as e: error_msg = ("Error while tgz failures:\nError: %s" % str(e)) return False, None, error_msg artifacts_info = generate_artifact_info(pick_out) return True, artifacts_info, ""
def split_libraries_fastq(qclient, job_id, parameters, out_dir): """Run split libraries fastq with the given parameters Parameters ---------- qclient : tgp.qiita_client.QiitaClient The Qiita server client job_id : str The job id parameters : dict The parameter values to run split libraries out_dir : str Yhe path to the job's output directory Returns ------- dict The results of the job """ # Step 1 get the rest of the information need to run split libraries update_job_step(qclient, job_id, "Step 1 of 4: Collecting information") artifact_id = parameters['input_data'] filepaths, mapping_file, atype = get_artifact_information( qclient, artifact_id) # Step 2 generate the split libraries fastq command update_job_step(qclient, job_id, "Step 2 of 4: Generating command") command, sl_out = generate_split_libraries_fastq_cmd( filepaths, mapping_file, atype, out_dir, parameters) # Step 3 execute split libraries update_job_step( qclient, job_id, "Step 3 of 4: Executing demultiplexing and quality control") std_out, std_err, return_value = system_call(command) if return_value != 0: raise RuntimeError( "Error processing files:\nStd output: %s\n Std error:%s" % (std_out, std_err)) # Step 4 generate the demux file update_job_step(qclient, job_id, "Step 4 of 4: Generating demux file") generate_demux_file(sl_out) artifacts_info = generate_artifact_info(sl_out) return format_payload(True, artifacts_info=artifacts_info)
def test_system_call_error(self): obs_out, obs_err, obs_val = system_call("IHopeThisCommandDoesNotExist") self.assertEqual(obs_out, "") self.assertTrue("command not found" in obs_err) self.assertEqual(obs_val, 127)
def test_system_call(self): obs_out, obs_err, obs_val = system_call("pwd") self.assertEqual(obs_out, "%s\n" % getcwd()) self.assertEqual(obs_err, "") self.assertEqual(obs_val, 0)
def split_libraries(qclient, job_id, parameters, out_dir): """Run split libraries with the given parameters Parameters ---------- qclient : tgp.qiita_client.QiitaClient The Qiita server client job_id : str The job id parameters : dict The parameter values to run split libraries out_dir : str Yhe path to the job's output directory Returns ------- dict The results of the job Raises ------ NotImplementedError If one of the filepath types attached to the artifact is not recognized ValueError If the artifact has SFF and fasta files IF the artifact has qual files but not fasta files If the artifact has fasta files but not quals RuntimeError If there is an error processing an sff file If there is an error running split_libraries.py If there is an error merging the results """ # Step 1 get the rest of the information need to run split libraries update_job_step(qclient, job_id, "Step 1 of 4: Collecting information") artifact_id = parameters['input_data'] filepaths, mapping_file, atype = get_artifact_information( qclient, artifact_id) # Step 2 generate the split libraries command update_job_step(qclient, job_id, "Step 2 of 4: preparing files") sffs = [] seqs = [] quals = [] for fp, fp_type in filepaths: if fp_type == 'raw_sff': sffs.append(fp) elif fp_type == 'raw_fasta': seqs.append(fp) elif fp_type == 'raw_qual': quals.append(fp) else: raise NotImplementedError("File type not supported %s" % fp_type) if seqs and sffs: raise ValueError('Cannot have SFF and raw fasta on the same artifact') elif quals and not seqs: raise ValueError('Cannot have just qual files on the artifact, you ' 'also need raw fasta files') elif seqs and not quals: raise ValueError('It is not currently possible to process fasta ' 'file(s) without qual file(s). This will be ' 'supported in the future. You can track progress on ' 'this by following: ' 'https://github.com/biocore/qiita/issues/953') elif seqs: seqs = sorted(seqs) quals = sorted(quals) else: cmds, seqs, quals = generate_process_sff_commands(sffs, out_dir) len_cmds = len(cmds) for i, cmd in enumerate(cmds): update_job_step( qclient, job_id, "Step 2 of 4: preparing files (processing sff file %d of %d)" % (i, len_cmds)) std_out, std_err, return_value = system_call(cmd) if return_value != 0: raise RuntimeError( "Error processing sff file:\nStd output: %s\n Std error:%s" % (std_out, std_err)) output_dir = join(out_dir, 'sl_out') commands, sl_outs = generate_split_libraries_cmd(seqs, quals, mapping_file, output_dir, parameters) # Step 3 execute split libraries cmd_len = len(commands) for i, cmd in enumerate(commands): update_job_step( qclient, job_id, "Step 3 of 4: Executing demultiplexing and quality control " "(%d of %d)" % (i, cmd_len)) std_out, std_err, return_value = system_call(cmd) if return_value != 0: raise RuntimeError( "Error running split libraries:\nStd output: %s\nStd error:%s" % (std_out, std_err)) # Step 4 merging results if cmd_len > 1: update_job_step(qclient, job_id, "Step 4 of 4: Merging results (concatenating files)") to_cat = ['split_library_log.txt', 'seqs.fna'] if quals: to_cat.append('seqs_filtered.qual') for tc in to_cat: files = [join(x, tc) for x in sl_outs] cmd = "cat %s > %s" % (' '.join(files), join(output_dir, tc)) std_out, std_err, return_value = system_call(cmd) if return_value != 0: raise RuntimeError( "Error concatenating %s files:\nStd output: %s\n" "Std error:%s" % (tc, std_out, std_err)) if quals: update_job_step( qclient, job_id, "Step 4 of 4: Merging results (converting fastqual to fastq)") cmd = ("convert_fastaqual_fastq.py -f %s -q %s -o %s -F" % (join(output_dir, 'seqs.fna'), join(output_dir, 'seqs_filtered.qual'), output_dir)) std_out, std_err, return_value = system_call(cmd) if return_value != 0: raise RuntimeError( "Error converting the fasta/qual files to fastq") update_job_step(qclient, job_id, "Step 4 of 4: Merging results (generating demux file)") generate_demux_file(output_dir) artifacts_info = generate_artifact_info(output_dir) return format_payload(True, artifacts_info=artifacts_info)
def split_libraries(qclient, job_id, parameters, out_dir): """Run split libraries with the given parameters Parameters ---------- qclient : tgp.qiita_client.QiitaClient The Qiita server client job_id : str The job id parameters : dict The parameter values to run split libraries out_dir : str Yhe path to the job's output directory Returns ------- dict The results of the job Raises ------ NotImplementedError If one of the filepath types attached to the artifact is not recognized ValueError If the artifact has SFF and fasta files IF the artifact has qual files but not fasta files If the artifact has fasta files but not quals RuntimeError If there is an error processing an sff file If there is an error running split_libraries.py If there is an error merging the results """ # Step 1 get the rest of the information need to run split libraries qclient.update_job_step(job_id, "Step 1 of 4: Collecting information") artifact_id = parameters['input_data'] filepaths, mapping_file, atype = get_artifact_information( qclient, artifact_id) # Step 2 generate the split libraries command qclient.update_job_step(job_id, "Step 2 of 4: preparing files") sffs = [] seqs = [] quals = [] for fp, fp_type in filepaths: if fp_type == 'raw_sff': sffs.append(fp) elif fp_type == 'raw_fasta': seqs.append(fp) elif fp_type == 'raw_qual': quals.append(fp) elif fp_type == 'html_summary': # Ignore the HTML summary file continue else: raise NotImplementedError("File type not supported %s" % fp_type) if seqs and sffs: raise ValueError('Cannot have SFF and raw fasta on the same artifact') elif quals and not seqs: raise ValueError('Cannot have just qual files on the artifact, you ' 'also need raw fasta files') elif seqs and not quals: raise ValueError('It is not currently possible to process fasta ' 'file(s) without qual file(s). This will be ' 'supported in the future. You can track progress on ' 'this by following: ' 'https://github.com/biocore/qiita/issues/953') elif seqs: seqs = sorted(seqs) quals = sorted(quals) else: cmds, seqs, quals = generate_process_sff_commands(sffs, out_dir) len_cmds = len(cmds) for i, cmd in enumerate(cmds): qclient.update_job_step( job_id, "Step 2 of 4: preparing files (processing sff file %d of %d)" % (i, len_cmds)) std_out, std_err, return_value = system_call(cmd) if return_value != 0: raise RuntimeError( "Error processing sff file:\nStd output: %s\n Std error:%s" % (std_out, std_err)) output_dir = join(out_dir, 'sl_out') commands, sl_outs = generate_split_libraries_cmd( seqs, quals, mapping_file, output_dir, parameters) # Step 3 execute split libraries cmd_len = len(commands) for i, cmd in enumerate(commands): qclient.update_job_step( job_id, "Step 3 of 4: Executing demultiplexing and quality control " "(%d of %d)" % (i, cmd_len)) std_out, std_err, return_value = system_call(cmd) if return_value != 0: raise RuntimeError( "Error running split libraries:\nStd output: %s\nStd error:%s" % (std_out, std_err)) # Step 4 merging results if cmd_len > 1: qclient.update_job_step( job_id, "Step 4 of 4: Merging results (concatenating files)") to_cat = ['split_library_log.txt', 'seqs.fna'] if quals: to_cat.append('seqs_filtered.qual') for tc in to_cat: files = [join(x, tc) for x in sl_outs] cmd = "cat %s > %s" % (' '.join(files), join(output_dir, tc)) std_out, std_err, return_value = system_call(cmd) if return_value != 0: raise RuntimeError( "Error concatenating %s files:\nStd output: %s\n" "Std error:%s" % (tc, std_out, std_err)) if quals: qclient.update_job_step( job_id, "Step 4 of 4: Merging results (converting fastqual to fastq)") cmd = ("convert_fastaqual_fastq.py -f %s -q %s -o %s -F" % (join(output_dir, 'seqs.fna'), join(output_dir, 'seqs_filtered.qual'), output_dir)) std_out, std_err, return_value = system_call(cmd) if return_value != 0: raise RuntimeError( "Error converting the fasta/qual files to fastq") qclient.update_job_step( job_id, "Step 4 of 4: Merging results (generating demux file)") generate_demux_file(output_dir) artifacts_info = generate_artifact_info(output_dir) return format_payload(True, artifacts_info=artifacts_info)
def pick_closed_reference_otus(qclient, job_id, parameters, out_dir): """Run split libraries fastq with the given parameters Parameters ---------- qclient : tgp.qiita_client.QiitaClient The Qiita server client job_id : str The job id parameters : dict The parameter values to run split libraries out_dir : str Yhe path to the job's output directory Returns ------- dict The results of the job Raises ------ ValueError If there is any error gathering the information from the server """ update_job_step(qclient, job_id, "Step 1 of 3: Collecting information") artifact_id = parameters['input_data'] fps_info = qclient.get("/qiita_db/artifacts/%s/filepaths/" % artifact_id) if not fps_info or not fps_info['success']: error_msg = "Could not get artifact filepath information: %s" if fps_info: error_msg = error_msg % fps_info['error'] else: error_msg = error_msg % "could not connect with the server" raise ValueError(error_msg) fps = fps_info['filepaths'] reference_id = parameters['reference'] ref_info = qclient.get("/qiita_db/references/%s/filepaths/" % reference_id) if not ref_info or not ref_info['success']: error_msg = "Could not get artifact filepath information: %s" if ref_info: error_msg = error_msg % ref_info['error'] else: error_msg = error_msg % "could not connect with the server" raise ValueError(error_msg) reference_fps = ref_info['filepaths'] update_job_step(qclient, job_id, "Step 2 of 3: Generating command") command, pick_out = generate_pick_closed_reference_otus_cmd( fps, out_dir, parameters, reference_fps) update_job_step(qclient, job_id, "Step 3 of 3: Executing OTU picking") std_out, std_err, return_value = system_call(command) if return_value != 0: error_msg = ("Error running OTU picking:\nStd out: %s\nStd err: %s" % (std_out, std_err)) return format_payload(False, error_msg=error_msg) artifacts_info = generate_artifact_info(pick_out) return format_payload(True, artifacts_info=artifacts_info)