def subset_db(self, input, output, parser_out): # create a dir for output create_outputdir(output) # because of multiple possible classifier the database will be subseted in a loop, # so that every classifier can be processed for i in range(len(self.R_subset_classifier)): # print actual informations about the step on stdout print_step(self.step_number, 'Analysis', 'Subset the database for %s' % (self.R_subset_classifier[i]), '--bitscore %s --rank %s' % (self.R_subset_bitscore, self.R_subset_rank[i])) newline() # generate name for database file outfile = '%s%s%s%s' % (output, os.sep, self.R_subset_classifier[i], '.db') logfile = open_logfile(self.logdir + self.R_subset_classifier[i] + '.log') # remove old databases with the same name if os.path.exists(outfile): os.remove(outfile) # start the process with classifier i and the complete output from the annotation step before p = subprocess.Popen(shlex.split('%s -i %s -o %s --classifier %s --bitscore %s --rank %s --taxon %s --blast %s' % (self.R_subset_exe, to_string(input), outfile, self.R_subset_classifier[i], self.R_subset_bitscore, self.R_subset_rank[i], self.R_subset_taxon_db, to_string(parser_out))), stdout = subprocess.PIPE) # during processing print output in verbose mode and update the logfile while p.poll() is None: if self.verbose: print_verbose(p.stdout.readline()) logfile.write(p.stdout.readline()) else: logfile.write(p.stdout.readline()) # wait until process is complete p.wait() if p.returncode: raise SubsetDBException(self.logdir + self.R_subset_classifier[i] + '.log') # print summary of the process after completion print_verbose('Subsetting of annotated Blast database complete \n') print_running_time(self.time) newline()
def annotate_db(self, input, output): # create a dir for output create_outputdir(output) # generate filename for db outfile = output + os.sep + self.R_annotate_name + '.db' # open a logfile for annotation process logfile = open_logfile(self.logdir + 'annotation_of_db.log') # remove old databases with same name if os.path.exists(outfile): os.remove(outfile) # print actual informations about the step on stdout print_step(self.step_number, 'Analysis', 'Annotate taxonomical data to blast database', self.R_annotate_parameter) newline() # start the parser and wait until completion p = subprocess.Popen(shlex.split('%s -i %s -o %s %s --taxon %s' % (self.R_annotate_exe, to_string(input), outfile, self.R_annotate_parameter, self.R_annotate_taxon_db)), stdout = subprocess.PIPE) # print information about the status while p.poll() is None: if self.verbose: print_verbose(p.stdout.readline()) logfile.write(p.stdout.readline()) else: logfile.write(p.stdout.readline()) # wait until process is complete p.wait() # save the exit code for later function calls self.exitcode = p.returncode # raise Exception when an error occurs during processing if p.returncode: raise AnnotateDBException(self.logdir + 'annotation_of_db.log') else: # print summary of the process after completion print_verbose('Taxonomical annotation of blast database complete \n') print_running_time(self.time) newline()
def concatinate(self, outputdir): # create a dir for output create_outputdir(outputdir) # print actual informations about the step on stdout print_step(self.step_number, 'Assembly', 'Concatinate Reads', self.concat_parameter) newline() # open the logfile logfile = open_logfile(self.logdir + 'concatination.log') # start the program Flash with parameter from the conf file a # errors will be piped to extra error logfile p = subprocess.Popen(shlex.split('%s -t %d -d %s %s %s' % (self.flash_exe, self.threads, outputdir, self.concat_parameter, self.input)), stdout = subprocess.PIPE, stderr = open_logfile(self.logdir + 'flash.err.log')) # during processing print Flash output in verbose mode and update the logfile while p.poll() is None: if self.verbose: print_verbose(p.stdout.readline()) logfile.write(p.stdout.readline()) else: print_compact(p.stdout.readline().rstrip('\n')) logfile.write(p.stdout.readline()) # wait until process is finished p.wait() if p.returncode: raise FlashException(self.logdir + 'flash.err.log') else: # remove empty error logs remove_empty_logfile(self.logdir + 'flash.err.log') # print summary of the process after completion newline() print_verbose('Concatination complete \n') print_running_time(self.time)
def parse_to_db(self, input, output): # create a dir for output create_outputdir(output) # generate filename for db outfile = output + os.sep + self.parser_name + '.db' # remove old databases with same name if os.path.exists(outfile): os.remove(outfile) # print actual informations about the step on stdout print_step(self.step_number, 'Analysis', 'Parse database from blast results', self.parser_parameter) newline() # start the parser and wait until completion p = subprocess.Popen(shlex.split('%s -o %s %s %s' % (self.parser_exe, outfile, self.parser_parameter, input)), stdout = subprocess.PIPE, stderr = open_logfile(self.logdir + 'parser.err.log')) # print information about the status while p.poll() is None: if self.verbose: print_verbose(p.stdout.readline()) else: print_compact(p.stdout.readline().rstrip('\n')) # wait until process is complete p.wait() # save the exit code for later function calls self.exitcode = p.returncode # raise Exception when an error occurs during processing if p.returncode: raise ParserException(self.logdir + 'parser.err.log') else: # remove empty error logs remove_empty_logfile(self.logdir + 'parser.err.log') # print summary of the process after completion print_verbose('Parsing of blast XML File complete \n') print_running_time(self.time) newline()
def blastn(self, outputdir): # create a dir for output create_outputdir(outputdir) # blastn can only run with fasta files, so input has to be converted if is_fastq(self.input): # print actual informations about the step on stdout print_step(self.step_number, 'Annotation', 'convert fastq files', cut_path(self.input)) newline() self.input = convert_fastq(self.input, self.blast_dir, self.converter_exe) # blastn can only annotated one file, so input has to be merged to one file if is_paired(self.input): # print actual informations about the step on stdout print_step(self.step_number, 'Annotation', 'merging reads to on file', cut_path(self.input)) newline() self.input = merge_files(self.input, self.blast_dir, 'merged', 'fasta') # define the outputformat for the blastn results outfile = outputdir + os.sep + blast_output(self.outfmt) # print actual informations about the step on stdout print_step(self.step_number, 'Annotation', 'blast sequences against nt database', self.blast_parameter) newline() # start blastn and wait until completion # logfile is not requiered, because blastn has no log function and no output to stdout p = subprocess.Popen(shlex.split('%s -db %s -query %s -out %s -num_threads %s %s ' % (self.blastn_exe, self.blastn_db, to_string(self.input), outfile, self.threads, self.blast_parameter)), stderr = open_logfile(self.logdir + 'blastn.err.log')) # wait until process is complete p.wait() if p.returncode: raise BlastnException(self.logdir + 'blastn.err.log') else: # remove the temporary files: converted fastq files and the merged fasta files remove_file(outputdir + os.sep, 'converted', 'fasta') remove_file(outputdir + os.sep, 'merged', 'fasta') # remove unused error logs remove_empty_logfile(self.logdir + 'blastn.err.log') # print summary of the process after completion print_verbose('Annotation with blastn complete \n') print_running_time(self.time) newline()
def trim_and_filter(self): # create a dir for output create_outputdir(self.trim_dir) # print actual informations about the step on stdout print_step(self.step_number, 'Preprocess', 'quality based trimming and filtering', self.trim_parameter) newline() # open the log file self.logfile = open_logfile(self.logdir + 'trimming.log') # start trim_galore with the given parameter and specified output dir p = subprocess.Popen(shlex.split('%s %s -o %s %s' % (self.trim_exe, self.trim_parameter, self.trim_dir, to_string(self.input))), stdout = subprocess.PIPE, stderr = subprocess.PIPE) # wait until process is finished p.wait() # after processing write all generated output to the log file for line in p.stderr: if self.verbose: # in verbose mode additionally print output to stdout print_verbose(line) self.logfile.write(line) else: self.logfile.write(line) if p.returncode: raise TrimGaloreException(self.logfile.name) else: # print summary of the process after completion print_verbose('Trimming and filtering complete \n') print_running_time(self.time) newline()
def qualityCheck(self): # create a dir for output create_outputdir(self.quality_dir) # print actual informations about the step on stdout print_step(self.step_number, 'Preprocess', 'quality analysis', self.fastqc_parameter) newline() # run FastQC with the given parameter, in seperate threads and extract the output p = subprocess.Popen(shlex.split('%s -t %s -o %s --extract %s %s' % (self.fastqc_exe, self.threads, self.quality_dir, self.fastqc_parameter, to_string(self.input))), stdout = subprocess.PIPE, stderr = subprocess.PIPE) # during processing pipe the output and print it on screen while p.poll() is None: if self.verbose: print_verbose(p.stderr.readline()) else: print_compact(p.stderr.readline().rstrip('\n')) # wait until process is finished p.wait() if p.returncode: raise FastQCException() else: # print summary of the process after completion print_verbose('Quality check complete for %s\n' % (self.input)) print_running_time(self.time) newline()
def assemble_reads(self, outputdir): # create a dir for output create_outputdir(outputdir) # print actual informations about the step on stdout print_step(self.step_number, 'Assembly', 'Creating Hashmaps', self.velveth_parameter) newline() # open the first logfile velveth_log = open_logfile(self.logdir + 'velveth.log') # start the program velveth with parameter from the conf file and automatic detection # of the input file format # errors will be piped to extra error logfile p = subprocess.Popen(shlex.split('%s %s %s %s -fmtAuto %s' % (self.velveth_exe, outputdir, self.kmer, self.velveth_parameter, self.input)), stdout = subprocess.PIPE, stderr = open_logfile(self.logdir + 'velveth.err.log')) # during processing print velveth output in verbose mode and update the logfile while p.poll() is None: if self.verbose: print_verbose(p.stdout.readline()) velveth_log.write(p.stdout.readline()) else: #self.log.print_compact(p.stdout.readline()) velveth_log.write(p.stdout.readline()) # wait until process is finished p.wait() if p.returncode: raise VelvetHException(self.logdir + 'velveth.err.log') else: # remove empty error logs remove_empty_logfile(self.logdir + 'velveth.err.log') # print actual informations about the step on stdout print_step(self.step_number, 'Assembly', 'Creating Graph', self.velvetg_parameter) newline() # open the second logfile velvetg_log = open_logfile(self.logdir + 'velvetg.log') # start the program velvetg in the dir of velveth, with the parameter of the conf file # errors will be piped to extra error logfile p = subprocess.Popen(shlex.split('%s %s %s' % (self.velvetg_exe, outputdir, self.velvetg_parameter)), stdout = subprocess.PIPE, stderr = open_logfile(self.logdir + 'velvetg.err.log')) # during processing print velveth output in verbose mode and update the logfile while p.poll() is None: if self.verbose: print_verbose(p.stdout.readline()) velvetg_log.write(p.stdout.readline()) else: #self.log.print_compact(p.stdout.readline()) velvetg_log.write(p.stdout.readline()) # wait until process is finished p.wait() if p.returncode: raise VelvetGException(self.logdir + 'velvetg.err.log') else: # remove empty error logs remove_empty_logfile(self.logdir + 'velvetg.err.log') # print actual informations about the step on stdout print_step(self.step_number, 'Assembly', 'Metagenomic Assembly', self.metavelvet_parameter) newline() # open the third logfile meta_log = open_logfile(self.logdir + 'metavelvet.log') # start the program meta-velvetg in the dir of velveth and velvetg, # with the parameter of the conf file # errors will be piped to extra error logfile p = subprocess.Popen(shlex.split('%s %s %s' % (self.metavelvet_exe, outputdir, self.metavelvet_parameter)), stdout = subprocess.PIPE, stderr = open_logfile(self.logdir + 'metavelvet.err.log')) # during processing print velveth output in verbose mode and update the logfile while p.poll() is None: if self.verbose: print_verbose(p.stdout.readline()) meta_log.write(p.stdout.readline()) else: #self.log.print_compact(p.stdout.readline()) meta_log.write(p.stdout.readline()) # wait until process is finished p.wait() if p.returncode: raise MetaVelvetException(self.logdir + 'metavelvet.err.log') else: # remove empty error logs remove_empty_logfile(self.logdir + 'metavelvet.err.log') newline() # print summary of the process after completion print_verbose('Assembly complete \n') print_running_time(self.time) newline()
def metacv(self, outputdir): # create a dir for output create_outputdir(outputdir) # select the input for metacv and convert it in an usable format if self.contigs is True: input = to_string(self.input) else: input = to_string(self.raw) # print actual informations about the step on stdout print_step(self.step_number, 'Annotation', 'Annotate bacterial reads with MetaCV', '%s %s %s' % (self.metacv_seq, self.metacv_mode, self.metacv_orf)) newline() # metacv has a maximum thread number of 64 # parameter has to be adjusted if self.threads > 64: threads = 64 else: threads = self.threads classify = open_logfile(self.logdir + 'metacv.classify.log') # start MetaCV function and wait until completion p = subprocess.Popen(shlex.split('%s classify %s %s %s %s %s %s --threads=%s' % (self.metacv_exe, self.metacv_db, input, self.metacv_name, self.metacv_seq, self.metacv_mode, self.metacv_orf, threads)), stderr = subprocess.PIPE, stdout = subprocess.PIPE, cwd = outputdir + os.sep) # during processing pipe the output and print it on screen while p.poll() is None: if self.verbose: print_verbose(p.stderr.readline()) classify.write(p.stderr.readline()) else: print_compact(p.stderr.readline().rstrip('\n')) classify.write(p.stderr.readline()) # wait until process is finished p.wait() if p.returncode: raise MetaCVException(self.logdir + 'metacv.classify.log') else: # remove unused error logs remove_empty_logfile(self.logdir + 'metacv.classify.log') # print actual informations about the step on stdout print_step(self.step_number, 'Annotation', 'Analyse the results of MetaCV', '%s %s %s' % (self.metacv_total_reads, self.metacv_min_qual, self.metacv_taxon)) newline() res2table = open_logfile(self.logdir + 'metacv.res2table.log') # start MetaCV's res2table function and wait until completion p = subprocess.Popen(shlex.split('%s res2table %s %s %s %s %s %s --threads=%s' % (self.metacv_exe, self.metacv_db, to_string(update_reads(outputdir,'metpipe','res')), self.metacv_name + '.res2table', self.metacv_total_reads, self.metacv_min_qual, self.metacv_taxon, threads)), stderr = subprocess.PIPE, stdout = subprocess.PIPE, cwd = outputdir + os.sep) # during processing pipe the output and print it on screen while p.poll() is None: if self.verbose: print_verbose(p.stderr.readline()) res2table.write(p.stderr.readline()) else: print_compact(p.stderr.readline().rstrip('\n')) res2table.write(p.stderr.readline()) # wait until process is finished p.wait() if p.returncode: raise MetaCVSumException(self.logdir + 'metacv.res2table.log') else: # remove unused error logs remove_empty_logfile(self.logdir + 'metacv.res2table.log') # print actual informations about the step on stdout print_step(self.step_number, 'Annotation', 'Summarize the results of MetaCV', self.metacv_min_qual) newline() res2sum = open_logfile(self.logdir + 'metacv.res2sum.log') # start MetaCV's res2sum function and wait until completion # the workingdir must be specified to maintain the correct # order of output files p = subprocess.Popen(shlex.split('%s res2sum %s %s %s %s' % (self.metacv_exe, self.metacv_db, to_string(update_reads(outputdir,'metpipe','res')), self.metacv_name + '.res2sum', self.metacv_min_qual)), stderr = subprocess.PIPE, stdout = subprocess.PIPE, cwd = outputdir + os.sep) # during processing pipe the output and print it on screen while p.poll() is None: if self.verbose: print_verbose(p.stderr.readline()) res2sum.write(p.stderr.readline()) else: print_compact(p.stderr.readline().rstrip('\n')) res2sum.write(p.stderr.readline()) # wait until process is finished p.wait() if p.returncode: raise MetaCVSumException(self.logdir + 'metacv.res2sum.log') else: # remove unused error logs remove_empty_logfile(self.logdir + 'metacv.res2sum.log') # print summary of the process after completion print_verbose('Annotation with MetaCV complete \n') print_running_time(self.time) newline()
def krona_report(self, input, output, parser_output): # create a dir for output create_outputdir(output) # generate path and name for output file outfile = output + os.sep + self.krona_name + '.html' # test type of input file if is_tabular(input): # print actual informations about the step on stdout print_step(self.step_number, 'Analysis', 'Create Overview from tabular output', self.krona_parameter) newline() # start the Krona import script for Blast tabular output # pipe all output for stdout in a logfile p = subprocess.Popen(shlex.split('perl -l %s %s -o %s %s %s' % (self.perl_lib, self.krona_exe, outfile, self.krona_parameter, to_string(input))), stdout = open_logfile(self.logdir + 'krona.log'), stderr = open_logfile(self.logdir + 'krona.err.log')) # wait until process is complete p.wait() if p.returncode: raise KronaException(self.logdir + 'krona.err.log') else: # remove unused error logs remove_empty_logfile(self.logdir + 'krona.err.log') # print summary of the process after completion print_verbose('Creation of Krona Pie Chart complete \n') print_running_time(self.time) newline() elif is_xml(input) and is_db(parser_output): print_step(self.step_number, 'Analysis', 'Create Overview from XML output', self.krona_parameter) # convert the values from database to tabular format extract_tabular(to_string(parser_output), output) # set the new input input = update_reads(output, 'extracted_from_DB','tab') # start the Krona import script for Blast tabular output # pipe all output for stdout in a logfile p = subprocess.Popen(shlex.split('perl -l %s %s -o %s %s %s' % (self.perl_lib, self.krona_exe, outfile, self.krona_parameter, to_string(input))), stdout = open_logfile(self.logdir + 'krona.log'), stderr = open_logfile(self.logdir + 'krona.err.log')) # wait until process is complete p.wait() if p.returncode: raise KronaException(self.logdir + 'krona.err.log') else: # remove unused error logs remove_empty_logfile(self.logdir + 'krona.err.log') # print summary of the process after completion print_verbose('Creation of Krona Pie Chart complete \n') print_running_time(self.time) newline() elif not is_tabular(input) or not is_xml(input): raise KronaFormatException() else: print_verbose('ERROR 25: Krona Report could not be generated, because of unknown reasons') sys.exit(1)