def manage_analysis(self): if self.annotation_mode in 'metacv': print_verbose("For a detailed analysis blastn with XML output is needed") else: # test for blastrun with outfmt 5 mode if is_xml(self.blast_output): # create a SQLite DB from the xml file self.parse_to_db(to_string(self.blast_output), self.parsed_db_out) # test the exit code, because next script need the output as input if self.exitcode is 0: # update input parser_out = absolute_path(update_reads(self.parsed_db_out, self.parser_name, 'db')) # raise step_number self.step_number += 1 # create a new database with taxonomical annotations self.annotate_db(parser_out, self.annotated_db_out) # test the exit code, because next script need the output as input if self.exitcode is 0: # update input annotated_output = absolute_path(update_reads(self.annotated_db_out, self.R_annotate_name, 'db')) # raise step_number self.step_number += 1 # subset the taxonomical database for a better and # faster analysis after the pipline has finished self.subset_db(annotated_output, self.subseted_db_out, parser_out) # raise step_number self.step_number += 1 else: print_verbose("ERROR: Annotated Database could not be subseted correctly") # create a pie chart of the blast data with Krona Webtools if self.krona: self.krona_report(self.blast_output, self.krona_report_out, parser_out) return [parser_out, annotated_output] else: print_verbose("ERROR: XML file could not be parsed") # test for blast tabular output elif is_tabular(self.blast_output) and self.krona is True: self.krona_report(self.blast_output, self.krona_report_out, '') return [] else: print_verbose("ERROR: Blast output file is not in xml or tabular format.\n" + "Please use outfmt 5 or 6 for Blast run") return []
def manage_annotation(self): blastn_out = '' metacv_out = '' # run the annotation functions when the module is initialized if self.mode.lower() == 'blastn': # is executable existing and runnable? if is_executable(self.blastn_exe): # start annotation with blastn self.blastn(self.blast_dir) # set the output file for further steps blastn_out = update_reads(self.blast_dir, 'blastn', blast_output(self.outfmt).split('.')[1]) # raise step_number self.step_number += 1 elif self.mode.lower() == 'metacv': # is executable existing and runnable? if is_executable(self.metacv_exe): # start annotation with metacv self.metacv(self.metacv_dir) # set the output file for further steps metacv_out = update_reads(self.metacv_dir, self.metacv_name, 'res') # raise step_number self.step_number += 1 else: # is executable existing and runnable? if is_executable(self.blastn_exe) and is_executable(self.metacv_exe): # start annotation with both tools self.blastn(self.blast_dir) # test for ending and set the right blast output blastn_out = update_reads(self.blast_dir, 'blastn', blast_output(self.outfmt).split('.')[1]) self.metacv(self.metacv_dir) metacv_out = update_reads(self.metacv_dir, self.metacv_name, 'res') # raise step_number self.step_number += 1 return [self.step_number, blastn_out, metacv_out]
def manage_assembly(self): concatinated = '' assembled = '' # run the assembling functions when the module is initialized if self.mode.lower() == 'flash': # is executable existing and runnable? if is_executable(self.flash_exe): # start concatination and update the input for next step self.concatinate(self.concat_out) self.step_number += 1 # merge the concatinated reads with non concatinated rest if (self.merge_uncombined): self.input = merge_files([to_string(update_reads(self.concat_out, 'extendedFrags', 'fastq')), to_string(update_reads(self.concat_out, 'out.notCombined', 'fastq'))], self.concat_out, 'merged_concat','fastq') else: concatinated = update_reads(self.concat_out, 'extendedFrags', 'fastq') self.input = concatinated if self.mode.lower() == 'metavelvet': # is executable existing and runnable? if is_executable(self.velveth_exe) and is_executable(self.velveth_exe) and is_executable(self.metavelvet_exe): # start assembly and update the input for next step self.assemble_reads(self.assembly_out) assembled = update_reads(self.assembly_out, 'meta-velvetg', 'fa') self.input = assembled self.step_number += 1 if self.mode.lower() == 'both': #TODO: not working because of auto mode --> see logs # is executable existing and runnable? if is_executable(self.flash_exe) and is_executable(self.velveth_exe) and is_executable(self.velveth_exe) and is_executable(self.metavelvet_exe): # start processing and update the input for next step self.concatinate(self.concat_out) concatinated = update_reads(self.out, 'extendedFrags', 'fastq') self.input = concatinated self.assemble_reads(self.assembly_out) assembled = update_reads(self.assembly_out, 'meta-velvetg', 'fa') self.step_number += 1 self.input = assembled return [self.step_number, self.input, concatinated, assembled]
def manage_preprocessing(self): # run the preprocessing functions when the module is initialized try: is_fastq(self.input) except FastQException: self.quality = False self.trim = False if self.quality: # is executable existing and runnable? if is_executable(self.fastqc_exe): self.qualityCheck() # raise the step number for cmd output self.step_number += 1 #self.files.set_quality_report() if self.trim: if is_executable(self.trim_exe): self.trim_and_filter() # raise the step number for cmd output self.step_number += 1 return [self.step_number, update_reads(self.trim_dir, 'val', 'fq')] else: return [self.step_number]
def metacv(self, outputdir): # create a dir for output create_outputdir(outputdir) # select the input for metacv and convert it in an usable format if self.contigs is True: input = to_string(self.input) else: input = to_string(self.raw) # print actual informations about the step on stdout print_step(self.step_number, 'Annotation', 'Annotate bacterial reads with MetaCV', '%s %s %s' % (self.metacv_seq, self.metacv_mode, self.metacv_orf)) newline() # metacv has a maximum thread number of 64 # parameter has to be adjusted if self.threads > 64: threads = 64 else: threads = self.threads classify = open_logfile(self.logdir + 'metacv.classify.log') # start MetaCV function and wait until completion p = subprocess.Popen(shlex.split('%s classify %s %s %s %s %s %s --threads=%s' % (self.metacv_exe, self.metacv_db, input, self.metacv_name, self.metacv_seq, self.metacv_mode, self.metacv_orf, threads)), stderr = subprocess.PIPE, stdout = subprocess.PIPE, cwd = outputdir + os.sep) # during processing pipe the output and print it on screen while p.poll() is None: if self.verbose: print_verbose(p.stderr.readline()) classify.write(p.stderr.readline()) else: print_compact(p.stderr.readline().rstrip('\n')) classify.write(p.stderr.readline()) # wait until process is finished p.wait() if p.returncode: raise MetaCVException(self.logdir + 'metacv.classify.log') else: # remove unused error logs remove_empty_logfile(self.logdir + 'metacv.classify.log') # print actual informations about the step on stdout print_step(self.step_number, 'Annotation', 'Analyse the results of MetaCV', '%s %s %s' % (self.metacv_total_reads, self.metacv_min_qual, self.metacv_taxon)) newline() res2table = open_logfile(self.logdir + 'metacv.res2table.log') # start MetaCV's res2table function and wait until completion p = subprocess.Popen(shlex.split('%s res2table %s %s %s %s %s %s --threads=%s' % (self.metacv_exe, self.metacv_db, to_string(update_reads(outputdir,'metpipe','res')), self.metacv_name + '.res2table', self.metacv_total_reads, self.metacv_min_qual, self.metacv_taxon, threads)), stderr = subprocess.PIPE, stdout = subprocess.PIPE, cwd = outputdir + os.sep) # during processing pipe the output and print it on screen while p.poll() is None: if self.verbose: print_verbose(p.stderr.readline()) res2table.write(p.stderr.readline()) else: print_compact(p.stderr.readline().rstrip('\n')) res2table.write(p.stderr.readline()) # wait until process is finished p.wait() if p.returncode: raise MetaCVSumException(self.logdir + 'metacv.res2table.log') else: # remove unused error logs remove_empty_logfile(self.logdir + 'metacv.res2table.log') # print actual informations about the step on stdout print_step(self.step_number, 'Annotation', 'Summarize the results of MetaCV', self.metacv_min_qual) newline() res2sum = open_logfile(self.logdir + 'metacv.res2sum.log') # start MetaCV's res2sum function and wait until completion # the workingdir must be specified to maintain the correct # order of output files p = subprocess.Popen(shlex.split('%s res2sum %s %s %s %s' % (self.metacv_exe, self.metacv_db, to_string(update_reads(outputdir,'metpipe','res')), self.metacv_name + '.res2sum', self.metacv_min_qual)), stderr = subprocess.PIPE, stdout = subprocess.PIPE, cwd = outputdir + os.sep) # during processing pipe the output and print it on screen while p.poll() is None: if self.verbose: print_verbose(p.stderr.readline()) res2sum.write(p.stderr.readline()) else: print_compact(p.stderr.readline().rstrip('\n')) res2sum.write(p.stderr.readline()) # wait until process is finished p.wait() if p.returncode: raise MetaCVSumException(self.logdir + 'metacv.res2sum.log') else: # remove unused error logs remove_empty_logfile(self.logdir + 'metacv.res2sum.log') # print summary of the process after completion print_verbose('Annotation with MetaCV complete \n') print_running_time(self.time) newline()
def krona_report(self, input, output, parser_output): # create a dir for output create_outputdir(output) # generate path and name for output file outfile = output + os.sep + self.krona_name + '.html' # test type of input file if is_tabular(input): # print actual informations about the step on stdout print_step(self.step_number, 'Analysis', 'Create Overview from tabular output', self.krona_parameter) newline() # start the Krona import script for Blast tabular output # pipe all output for stdout in a logfile p = subprocess.Popen(shlex.split('perl -l %s %s -o %s %s %s' % (self.perl_lib, self.krona_exe, outfile, self.krona_parameter, to_string(input))), stdout = open_logfile(self.logdir + 'krona.log'), stderr = open_logfile(self.logdir + 'krona.err.log')) # wait until process is complete p.wait() if p.returncode: raise KronaException(self.logdir + 'krona.err.log') else: # remove unused error logs remove_empty_logfile(self.logdir + 'krona.err.log') # print summary of the process after completion print_verbose('Creation of Krona Pie Chart complete \n') print_running_time(self.time) newline() elif is_xml(input) and is_db(parser_output): print_step(self.step_number, 'Analysis', 'Create Overview from XML output', self.krona_parameter) # convert the values from database to tabular format extract_tabular(to_string(parser_output), output) # set the new input input = update_reads(output, 'extracted_from_DB','tab') # start the Krona import script for Blast tabular output # pipe all output for stdout in a logfile p = subprocess.Popen(shlex.split('perl -l %s %s -o %s %s %s' % (self.perl_lib, self.krona_exe, outfile, self.krona_parameter, to_string(input))), stdout = open_logfile(self.logdir + 'krona.log'), stderr = open_logfile(self.logdir + 'krona.err.log')) # wait until process is complete p.wait() if p.returncode: raise KronaException(self.logdir + 'krona.err.log') else: # remove unused error logs remove_empty_logfile(self.logdir + 'krona.err.log') # print summary of the process after completion print_verbose('Creation of Krona Pie Chart complete \n') print_running_time(self.time) newline() elif not is_tabular(input) or not is_xml(input): raise KronaFormatException() else: print_verbose('ERROR 25: Krona Report could not be generated, because of unknown reasons') sys.exit(1)