Пример #1
0
 def subset_db(self, input, output, parser_out):
     
     # create a dir for output
     create_outputdir(output)
     
     # because of multiple possible classifier the database will be subseted in a loop,
     # so that every classifier can be processed
     for i in range(len(self.R_subset_classifier)):
         # print actual informations about the step on stdout    
         print_step(self.step_number, 
                    'Analysis', 
                    'Subset the database for %s' % (self.R_subset_classifier[i]),
                     '--bitscore %s --rank %s' % (self.R_subset_bitscore,
                                                  self.R_subset_rank[i]))
         newline()
         # generate name for database file
         outfile = '%s%s%s%s' % (output, os.sep, self.R_subset_classifier[i], '.db') 
         logfile = open_logfile(self.logdir + self.R_subset_classifier[i] + '.log')
         
         # remove old databases with the same name
         if os.path.exists(outfile):
             os.remove(outfile)
             
         # start the process with classifier i and the complete output from the annotation step before
         p = subprocess.Popen(shlex.split('%s -i %s -o %s --classifier %s --bitscore %s --rank %s --taxon %s --blast %s' 
                                          % (self.R_subset_exe, 
                                             to_string(input), 
                                             outfile,
                                             self.R_subset_classifier[i],
                                             self.R_subset_bitscore,
                                             self.R_subset_rank[i],
                                             self.R_subset_taxon_db,
                                             to_string(parser_out))),
                              stdout = subprocess.PIPE)
         
         # during processing print output in verbose mode and update the logfile
         while p.poll() is None:
             if self.verbose:
                 print_verbose(p.stdout.readline())
                 logfile.write(p.stdout.readline())
             else:
                 logfile.write(p.stdout.readline())
         # wait until process is complete
         p.wait()
         if p.returncode:
             raise SubsetDBException(self.logdir + self.R_subset_classifier[i] + '.log')
         
     # print summary of the process after completion
     print_verbose('Subsetting of annotated Blast database complete \n')
     print_running_time(self.time)
     newline()
Пример #2
0
 def __init__(self, threads, step_number, verbose, time, logdir, input, mode,
              flash_exe, concat_dir, concat_parameter, merge_uncombined, 
              velveth_exe, velvetg_exe, metavelvet_exe, assembly_out, kmer, 
              velveth_parameter, velvetg_parameter, metavelvet_parameter):
     
     # init general variables
     self.threads = threads
     self.step_number = step_number
     self.verbose = verbose
     self.time = time
     self.logdir = logdir
     self.input = to_string(input)
     self.mode = mode
      
     # init flash specific variables
     self.flash_exe = flash_exe
     self.concat_out = concat_dir 
     self.concat_parameter = concat_parameter
     self.merge_uncombined = merge_uncombined
     
     # init metavelvet specific variables
     self.velveth_exe = velveth_exe
     self.velvetg_exe = velvetg_exe
     self.metavelvet_exe = metavelvet_exe
     self.assembly_out = assembly_out
     self.kmer = kmer
     self.velveth_parameter = velveth_parameter
     self.velvetg_parameter = velvetg_parameter
     self.metavelvet_parameter = metavelvet_parameter
Пример #3
0
 def blastn(self, outputdir):
         
     # create a dir for output
     create_outputdir(outputdir)
     
     # blastn can only run with fasta files, so input has to be converted
     if is_fastq(self.input):
         # print actual informations about the step on stdout
         print_step(self.step_number,
                    'Annotation', 
                    'convert fastq files',
                    cut_path(self.input))
         newline()
         self.input = convert_fastq(self.input, self.blast_dir, self.converter_exe)
     
     # blastn can only annotated one file, so input has to be merged to one file
     if is_paired(self.input):
         # print actual informations about the step on stdout
         print_step(self.step_number,
                    'Annotation',
                    'merging reads to on file',
                    cut_path(self.input))
         newline()
         self.input = merge_files(self.input, self.blast_dir, 'merged', 'fasta')
     
     # define the outputformat for the blastn results
     outfile = outputdir + os.sep + blast_output(self.outfmt)
     
     # print actual informations about the step on stdout
     print_step(self.step_number,
                'Annotation',
                'blast sequences against nt database',
                self.blast_parameter)
     newline()
     # start blastn and wait until completion
     # logfile is not requiered, because blastn has no log function and no output to stdout
     p = subprocess.Popen(shlex.split('%s -db %s -query %s -out %s -num_threads %s %s ' % 
                                      (self.blastn_exe,
                                       self.blastn_db,
                                       to_string(self.input),
                                       outfile,
                                       self.threads, 
                                       self.blast_parameter)),
                          stderr = open_logfile(self.logdir + 'blastn.err.log'))
     # wait until process is complete
     p.wait()
     
     if p.returncode:
         raise BlastnException(self.logdir + 'blastn.err.log')
     else:
         # remove the temporary files: converted fastq files and the merged fasta files
         remove_file(outputdir + os.sep, 'converted', 'fasta')
         remove_file(outputdir + os.sep, 'merged', 'fasta')
         # remove unused error logs
         remove_empty_logfile(self.logdir + 'blastn.err.log')
     
         # print summary of the process after completion
         print_verbose('Annotation with blastn complete \n')
         print_running_time(self.time)
         newline()
Пример #4
0
    def manage_assembly(self):
        
        concatinated = ''
        assembled = ''
        # run the assembling functions when the module is initialized
        if self.mode.lower() == 'flash':
            # is executable existing and runnable?
            if is_executable(self.flash_exe):
                # start concatination and update the input for next step
                self.concatinate(self.concat_out)
                self.step_number += 1
                # merge the concatinated reads with non concatinated rest
                if (self.merge_uncombined):
                    self.input = merge_files([to_string(update_reads(self.concat_out, 'extendedFrags', 'fastq')),
                                             to_string(update_reads(self.concat_out, 'out.notCombined', 'fastq'))],
                                             self.concat_out,
                                             'merged_concat','fastq')
                else:
                    concatinated = update_reads(self.concat_out, 'extendedFrags', 'fastq')
                    self.input = concatinated
                

        if self.mode.lower() == 'metavelvet':
            # is executable existing and runnable?
            if is_executable(self.velveth_exe) and is_executable(self.velveth_exe) and is_executable(self.metavelvet_exe):
                # start assembly and update the input for next step
                self.assemble_reads(self.assembly_out)
                assembled = update_reads(self.assembly_out, 'meta-velvetg', 'fa')
                self.input = assembled
                self.step_number += 1
                
        if self.mode.lower() == 'both':
            #TODO: not working because of auto mode --> see logs
            
            # is executable existing and runnable?
            if is_executable(self.flash_exe) and is_executable(self.velveth_exe) and is_executable(self.velveth_exe) and is_executable(self.metavelvet_exe):
                # start processing and update the input for next step
                self.concatinate(self.concat_out)
                concatinated = update_reads(self.out, 'extendedFrags', 'fastq')
                self.input = concatinated
                self.assemble_reads(self.assembly_out)
                assembled = update_reads(self.assembly_out, 'meta-velvetg', 'fa')
                self.step_number += 1
                self.input = assembled
        return [self.step_number, self.input, concatinated, assembled]
Пример #5
0
def apply_query(query, query_idx, endpoint):
    query_instance = get_query_instance(query)
    ep = endpoints[endpoint]
    print('----apply query----', ep, query_idx)
    responses = {}
    if ep[TYPE] == REMOTE_EP:
        qt = QueryTool(endpoint=ep[ENDPOINT],
                       mode=modes[ep[MODE]],
                       relax_num_ep=1,
                       disable_timeout=True)
        response, stat, errors = query_instance.ask_all(query_tool=qt,
                                                        start=query_idx,
                                                        end=query_idx + 1,
                                                        prefilter=False)
        if errors:
            print('errors:', errors)
        if response:
            responses = {
                'dummy':
                xmltodict.parse(to_string(response))['graphqueries_responses']
            }
    else:
        if query_instance.related_docs:
            for ttl_file in query_instance.related_docs[query_idx]:
                qt = QueryTool(endpoint=ep[FOLDER_PATH] + ttl_file + '.ttl',
                               mode=modes[ep[MODE]],
                               relax_num_ep=1,
                               use_fuseki=ep[ENDPOINT],
                               disable_timeout=True)
                response, stat, errors = query_instance.ask_all(
                    query_tool=qt,
                    start=query_idx,
                    end=query_idx + 1,
                    root_doc=ttl_file,
                    prefilter=False)
                if errors:
                    print('errors:', errors)
                responses[ttl_file] = xmltodict.parse(
                    to_string(response))['graphqueries_responses']
    return responses
Пример #6
0
 def manage_analysis(self):
     
    
     if self.annotation_mode in 'metacv':
         print_verbose("For a detailed analysis blastn with XML output is needed")
     else:
         # test for blastrun with outfmt 5 mode
         if is_xml(self.blast_output):
             # create a SQLite DB from the xml file
             self.parse_to_db(to_string(self.blast_output), self.parsed_db_out)
             # test the exit code, because next script need the output as input
             if self.exitcode is 0:
                 # update input 
                 parser_out = absolute_path(update_reads(self.parsed_db_out, 
                                                         self.parser_name, 
                                                         'db'))
                 # raise step_number
                 self.step_number += 1
                 # create a new database with taxonomical annotations
                 self.annotate_db(parser_out, self.annotated_db_out)
                 # test the exit code, because next script need the output as input
                 if self.exitcode is 0:
                     # update input
                     annotated_output = absolute_path(update_reads(self.annotated_db_out, 
                                                                   self.R_annotate_name, 
                                                                   'db'))
                     # raise step_number
                     self.step_number += 1
                     # subset the taxonomical database for a better and 
                     # faster analysis after the pipline has finished
                     self.subset_db(annotated_output, self.subseted_db_out, parser_out)
                     # raise step_number
                     self.step_number += 1
                 else:
                     print_verbose("ERROR: Annotated Database could not be subseted correctly") 
                 # create a pie chart of the blast data with Krona Webtools 
                 if self.krona:
                     self.krona_report(self.blast_output, self.krona_report_out, parser_out)
                     
                 return [parser_out, annotated_output]   
             else: 
                 print_verbose("ERROR: XML file could not be parsed")
         # test for blast tabular output
         elif is_tabular(self.blast_output) and self.krona is True:
             self.krona_report(self.blast_output, self.krona_report_out, '')
             return []
         else:
             print_verbose("ERROR: Blast output file is not in xml or tabular format.\n" +
                           "Please use outfmt 5 or 6 for Blast run")
             return []
Пример #7
0
    def annotate_db(self, input, output):

        # create a dir for output
        create_outputdir(output)
        # generate filename for db
        outfile = output + os.sep + self.R_annotate_name + '.db'
        # open a logfile for annotation process
        logfile = open_logfile(self.logdir + 'annotation_of_db.log')
        
        # remove old databases with same name
        if os.path.exists(outfile):
            os.remove(outfile)
        
        # print actual informations about the step on stdout    
        print_step(self.step_number, 'Analysis', 
                   'Annotate taxonomical data to blast database',
                   self.R_annotate_parameter)
        newline()
        # start the parser and wait until completion
        p = subprocess.Popen(shlex.split('%s -i %s -o %s %s --taxon %s' 
                                         % (self.R_annotate_exe, 
                                            to_string(input), 
                                            outfile, 
                                            self.R_annotate_parameter,
                                            self.R_annotate_taxon_db)),
                             stdout = subprocess.PIPE)
        
        # print information about the status
        while p.poll() is None:
            if self.verbose:
                print_verbose(p.stdout.readline())
                logfile.write(p.stdout.readline())
            else:
                logfile.write(p.stdout.readline())
        # wait until process is complete
        p.wait()
        # save the exit code for later function calls 
        self.exitcode = p.returncode
        
        # raise Exception when an error occurs during processing
        if p.returncode:
            raise AnnotateDBException(self.logdir + 'annotation_of_db.log')
        else:
            # print summary of the process after completion
            print_verbose('Taxonomical annotation of blast database complete \n')
            print_running_time(self.time)
            newline()
Пример #8
0
 def trim_and_filter(self):
     
     # create a dir for output
     create_outputdir(self.trim_dir)
     
     # print actual informations about the step on stdout
     print_step(self.step_number, 
                'Preprocess', 
                'quality based trimming and filtering',
                self.trim_parameter)
     newline()
     
     # open the log file
     self.logfile = open_logfile(self.logdir + 'trimming.log')
     
     # start trim_galore with the given parameter and specified output dir
     p = subprocess.Popen(shlex.split('%s %s -o %s %s' % 
                                      (self.trim_exe,
                                       self.trim_parameter,
                                       self.trim_dir,
                                       to_string(self.input))),
                         stdout = subprocess.PIPE,
                         stderr = subprocess.PIPE)
     # wait until process is finished
     p.wait()
     # after processing write all generated output to the log file
     for line in p.stderr:
         if self.verbose:
             # in verbose mode additionally print output to stdout 
             print_verbose(line)
             self.logfile.write(line)
         else:
             self.logfile.write(line)
     if p.returncode:
         raise TrimGaloreException(self.logfile.name)
     else:
         # print summary of the process after completion
         print_verbose('Trimming and filtering complete \n')
         print_running_time(self.time)
         newline()
Пример #9
0
    def qualityCheck(self):
        
        # create a dir for output
        create_outputdir(self.quality_dir)
        
        # print actual informations about the step on stdout
        print_step(self.step_number, 
                   'Preprocess', 
                   'quality analysis',
                   self.fastqc_parameter)
        newline()
        
        # run FastQC with the given parameter, in seperate threads and extract the output

        p = subprocess.Popen(shlex.split('%s -t %s -o %s --extract %s %s' 
                                         % (self.fastqc_exe,
                                            self.threads,
                                            self.quality_dir, 
                                            self.fastqc_parameter,
                                            to_string(self.input))),
                             stdout = subprocess.PIPE,
                             stderr = subprocess.PIPE)
        
        # during processing pipe the output and print it on screen
        while p.poll() is None:
            if self.verbose:
                print_verbose(p.stderr.readline())
            else:
                print_compact(p.stderr.readline().rstrip('\n'))
        # wait until process is finished
        p.wait()
        
        if p.returncode:
            raise FastQCException()
        else:
            # print summary of the process after completion
            print_verbose('Quality check complete for %s\n' % (self.input))
            print_running_time(self.time)
            newline()
Пример #10
0
 def metacv(self, outputdir):
     
     # create a dir for output
     create_outputdir(outputdir)
     
     # select the input for metacv and convert it in an usable format
     if self.contigs is True:
         input = to_string(self.input)
     else:
         input = to_string(self.raw)
         
     # print actual informations about the step on stdout
     print_step(self.step_number, 
                'Annotation', 
                'Annotate bacterial reads with MetaCV',
                 '%s %s %s' % (self.metacv_seq,
                               self.metacv_mode,
                               self.metacv_orf))
     newline()
     
     # metacv has a maximum thread number of 64
     # parameter has to be adjusted
     if self.threads > 64:
             threads = 64
     else:
             threads = self.threads
     classify = open_logfile(self.logdir + 'metacv.classify.log')
     # start MetaCV function and wait until completion
     p = subprocess.Popen(shlex.split('%s classify %s %s %s %s %s %s --threads=%s' % 
                                     (self.metacv_exe,
                                      self.metacv_db,
                                      input,
                                      self.metacv_name,
                                      self.metacv_seq, 
                                      self.metacv_mode, 
                                      self.metacv_orf,
                                      threads)),
                         stderr = subprocess.PIPE, 
                         stdout = subprocess.PIPE,
                         cwd = outputdir + os.sep)
     # during processing pipe the output and print it on screen
     while p.poll() is None:
         if self.verbose:
             print_verbose(p.stderr.readline())
             classify.write(p.stderr.readline())
         else:
             print_compact(p.stderr.readline().rstrip('\n'))
             classify.write(p.stderr.readline())
     # wait until process is finished        
     p.wait()
     
     if p.returncode:
         raise MetaCVException(self.logdir + 'metacv.classify.log')
     else:
         # remove unused error logs
         remove_empty_logfile(self.logdir + 'metacv.classify.log')
         
         # print actual informations about the step on stdout
         print_step(self.step_number, 
                    'Annotation', 
                    'Analyse the results of MetaCV',
                    '%s %s %s' % (self.metacv_total_reads, 
                                  self.metacv_min_qual, 
                                  self.metacv_taxon))
         newline() 
         res2table = open_logfile(self.logdir + 'metacv.res2table.log')
         # start MetaCV's res2table function and wait until completion
         p = subprocess.Popen(shlex.split('%s res2table %s %s %s %s %s %s --threads=%s' % 
                                          (self.metacv_exe,
                                           self.metacv_db,
                                           to_string(update_reads(outputdir,'metpipe','res')),
                                           self.metacv_name + '.res2table',
                                           self.metacv_total_reads, 
                                           self.metacv_min_qual, 
                                           self.metacv_taxon,
                                           threads)),
                              stderr = subprocess.PIPE, 
                              stdout = subprocess.PIPE,
                              cwd = outputdir + os.sep)
         # during processing pipe the output and print it on screen
         while p.poll() is None:
             if self.verbose:
                 print_verbose(p.stderr.readline())
                 res2table.write(p.stderr.readline())
                 
             else:
                 print_compact(p.stderr.readline().rstrip('\n'))
                 res2table.write(p.stderr.readline())
         # wait until process is finished
         p.wait()
     
         if p.returncode:
             raise MetaCVSumException(self.logdir + 'metacv.res2table.log')
         else:
             # remove unused error logs
             remove_empty_logfile(self.logdir + 'metacv.res2table.log')
         # print actual informations about the step on stdout
         print_step(self.step_number, 
                    'Annotation', 
                    'Summarize the results of MetaCV',
                    self.metacv_min_qual)
         newline()
         
         res2sum = open_logfile(self.logdir + 'metacv.res2sum.log')
         # start MetaCV's res2sum function and wait until completion
         # the workingdir must be specified to maintain the correct 
         # order of output files
         p = subprocess.Popen(shlex.split('%s res2sum %s %s %s %s' %
                                          (self.metacv_exe,
                                           self.metacv_db,
                                           to_string(update_reads(outputdir,'metpipe','res')),
                                           self.metacv_name + '.res2sum',
                                           self.metacv_min_qual)),
                              stderr = subprocess.PIPE, 
                              stdout = subprocess.PIPE,
                              cwd = outputdir + os.sep)
         # during processing pipe the output and print it on screen
         while p.poll() is None:
             if self.verbose:
                 print_verbose(p.stderr.readline())
                 res2sum.write(p.stderr.readline())
             else:
                 print_compact(p.stderr.readline().rstrip('\n'))
                 res2sum.write(p.stderr.readline())
         # wait until process is finished
         p.wait()
     
         if p.returncode:
             raise MetaCVSumException(self.logdir + 'metacv.res2sum.log')
         else:
             # remove unused error logs
             remove_empty_logfile(self.logdir + 'metacv.res2sum.log')
     
         # print summary of the process after completion
         print_verbose('Annotation with MetaCV complete \n')
         print_running_time(self.time)
         newline()
Пример #11
0
 def krona_report(self, input, output, parser_output):
     
     # create a dir for output
     create_outputdir(output)
     # generate path and name for output file
     outfile = output + os.sep + self.krona_name + '.html'
     
     # test type of input file
     if is_tabular(input):
         # print actual informations about the step on stdout    
         print_step(self.step_number, 
                    'Analysis', 
                    'Create Overview from tabular output',
                    self.krona_parameter)
         newline()
         
         # start the Krona import script for Blast tabular output
         # pipe all output for stdout in a logfile
         p = subprocess.Popen(shlex.split('perl -l %s %s -o %s %s %s' 
                                          % (self.perl_lib,
                                             self.krona_exe,
                                             outfile,
                                             self.krona_parameter,
                                             to_string(input))),
                              stdout = open_logfile(self.logdir + 'krona.log'),
                              stderr = open_logfile(self.logdir + 'krona.err.log'))
         # wait until process is complete
         p.wait()
         if p.returncode:
             raise KronaException(self.logdir + 'krona.err.log')
         else:
             # remove unused error logs
             remove_empty_logfile(self.logdir + 'krona.err.log')
             # print summary of the process after completion
             print_verbose('Creation of Krona Pie Chart complete \n')
             print_running_time(self.time)
             newline()
         
     elif is_xml(input) and is_db(parser_output):
         print_step(self.step_number, 
                    'Analysis', 
                    'Create Overview from XML output',
                    self.krona_parameter)
         # convert the values from database to tabular format
         extract_tabular(to_string(parser_output), output)
         # set the new input
         input = update_reads(output, 'extracted_from_DB','tab')
         
         # start the Krona import script for Blast tabular output
         # pipe all output for stdout in a logfile
         p = subprocess.Popen(shlex.split('perl -l %s %s -o %s %s %s' 
                                          % (self.perl_lib,
                                             self.krona_exe,
                                             outfile,
                                             self.krona_parameter,
                                             to_string(input))),
                              stdout = open_logfile(self.logdir + 'krona.log'),
                              stderr = open_logfile(self.logdir + 'krona.err.log'))
         # wait until process is complete
         p.wait()
         if p.returncode:
             raise KronaException(self.logdir + 'krona.err.log')
         else:
             # remove unused error logs
             remove_empty_logfile(self.logdir + 'krona.err.log')
             # print summary of the process after completion
             print_verbose('Creation of Krona Pie Chart complete \n')
             print_running_time(self.time)
             newline()
         
     elif not is_tabular(input) or not is_xml(input):
         raise KronaFormatException()
     else:
         print_verbose('ERROR 25: Krona Report could not be generated, because of unknown reasons')
         sys.exit(1)
Пример #12
0
 def dumps_json_report(self):
     return to_string({
         'kb_id': self.kb_id,
         'success': self.success,
         'failed': self.failed
     })
Пример #13
0
def main(argv = None):

  # hardcode defaults
  RESULT_DIR = '%s%sresult' % (sys.path[0], os.sep)
  PARAM_FILE = '%s%sparameter.conf' % (sys.path[0], os.sep)
  STEPS = ['preprocessing', 'annotate', 'assembly', 'analysis']

  # Get the starting time
  starting_time = time.time()

  # setup Argument Parser for stdin arguments
  parser = argparse.ArgumentParser(add_help = True)

  # define arguments
  parser.add_argument('input', nargs = '+', action = 'store', 
                      help = 'single or paired input files in <fastq> format')
  parser.add_argument('--version', action = 'version', version = '%(prog)s 0.5')
  parser.add_argument('-v', dest = 'verbose', action = 'store_true', default = False,
                      help = 'more detailed output (default = False)')
  parser.add_argument('-t', dest = 'threads', type = int, action = 'store', 
                      default = multiprocessing.cpu_count() - 1,
                      help = 'number of threads to use (default = %d)' 
                      % (multiprocessing.cpu_count() - 1))
  parser.add_argument('-p', dest = 'param', action = 'store', default = PARAM_FILE,
                      help = 'use alternative config file (default = parameter.conf)')
  parser.add_argument('-s', dest = 'skip', action = 'store', default = '', 
                      choices = ['preprocessing', 'assembly', 'annotation','analysis'],
                      help = 'skip steps in the pipeline (default = None)')
  parser.add_argument('-o', dest = 'output', action = 'store', default = RESULT_DIR,
                      help = 'use alternative output folder')
  parser.add_argument('-a', dest = 'assembler', default = 'MetaVelvet', 
                      choices = ['metavelvet', 'flash','both'],
                      help = 'assembling program to use (default = MetaVelvet)')
  parser.add_argument('-c', dest = 'annotation', default = 'both',
                      choices = ['metacv', 'blastn', 'both'],
                      help = 'classifier to use for annotation (default = both)')     
  parser.add_argument('--use_contigs', dest = 'use_contigs', action = 'store_true', 
                      default = 'False',
                      help = 'should MetaCV use assembled Reads or RAW Reads (default = RAW')                                    
  parser.add_argument('--notrimming', dest = 'trim', action = 'store_false', default = True,
                      help = 'trim and filter input reads? (default = True)')
  parser.add_argument('--noquality', dest = 'quality', action = 'store_false', default = True,
                    help = 'create no quality report (default = True)')
  parser.add_argument('--noreport', dest = 'krona', action = 'store_false', default = True,
                      help = 'create no pie chart with the annotated taxonomical data (default = True)')
  parser.add_argument('--merge', dest = 'merge_uncombined', action = 'store_true', default = False,
                      help = 'merge concatinated reads with not concatinated (default = False)')

  args = parser.parse_args()
  # init the Pipeline
  RESULT_DIR = args.output if args.output else RESULT_DIR
  # check if param File exists
  if os.path.isfile(args.param):
      PARAM_FILE = args.param
  else:
      if os.path.isfile(PARAM_FILE):
        sys.stderr.write('ERROR 3: Parameter File could not be found!\n')
        sys.stderr.write('Use standard Parameter File:\n%s\n\n' % (PARAM_FILE))
      else:
          raise ParamFileNotFound(args.param)
    
  # check if input exists
  if not all(os.path.isfile(file) for file in args.input):
      raise InputNotFound(to_string(args.input))

  if __name__ == '__main__':   

    # create outputdir and log folder
    create_outputdir(RESULT_DIR)
    create_outputdir(RESULT_DIR + os.sep +'log')

    # create the global settings object
    settings = General(args.threads, args.verbose, args.skip, starting_time, args.trim, 
                       args.quality, args.krona, args.use_contigs, args.merge_uncombined, args.assembler, 
                       args.annotation, 1)

    # setup the input, outputs and important files
    files = FileSettings(absolute_path(args.input), os.path.normpath(RESULT_DIR), PARAM_FILE)

    exe = Executables(PARAM_FILE)
    # get the all skipped steps
    skip = to_string(settings.get_skip())

    try:
      print "hello"
      # START the modules of Pipeline and wait until completion
      if skip in 'preprocessing' and skip:
          skip_msg(skip)
      else:
          # init the preprocessing module
          pre = Preprocess(settings.get_threads(), 
                           settings.get_step_number(),
                           settings.get_verbose(),
                           settings.get_actual_time(),
                           files.get_input(),
                           files.get_logdir(),
                           exe.get_FastQC(),
                           settings.get_quality(),
                           files.get_quality_dir(),
                           parse_parameter(FastQC_Parameter(PARAM_FILE)),
                           exe.get_TrimGalore(),
                           settings.get_trim(),
                           files.get_trim_dir(), 
                           parse_parameter(TrimGalore_Parameter(PARAM_FILE)))
          # run preprocessing functions
          results = pre.manage_preprocessing()
          # update pipeline variables with results
          settings.set_step_number(results[0])
          if len(results) > 1:
              files.set_input(absolute_path(results[1]))
              files.set_preprocessed_output(absolute_path(results[1]))

      if skip in 'assembly' and skip:
        skip_msg(skip)
      else:
        # init the assembly module 
        assembly = Assembly(settings.get_threads(), 
                            settings.get_step_number(),
                            settings.get_verbose(),
                            settings.get_actual_time(),
                            files.get_logdir(),
                            files.get_input(),
                            settings.get_assembler(),
                            exe.get_Flash(),
                            files.get_concat_dir(),
                            parse_parameter(FLASH_Parameter(PARAM_FILE)),
                            settings.get_merge_uncombined(),
                            exe.get_Velveth(),
                            exe.get_Velvetg(),
                            exe.get_MetaVelvet(),
                            files.get_assembly_dir(),
                            Velveth_Parameter(PARAM_FILE).get_kmer(PARAM_FILE),
                            parse_parameter(Velveth_Parameter(PARAM_FILE)),
                            parse_parameter(Velvetg_Parameter(PARAM_FILE)),
                            parse_parameter(MetaVelvet_Parameter(PARAM_FILE)))
        # run assembly functions
        results = assembly.manage_assembly()
        # update pipeline variables with results
        settings.set_step_number(results[0])
        files.set_input(absolute_path(results[1]))
        files.set_concatinated_output(absolute_path(results[2]))
        files.set_assembled_output(absolute_path(results[3]))
  
      if skip in 'annotation'and skip:
          skip_msg(skip)
      else:
          # init the annotation module
          anno = Annotation(settings.get_threads(), 
                            settings.get_step_number(),
                            settings.get_verbose(),
                            settings.get_actual_time(),
                            files.get_logdir(),
                            files.get_input(),
                            files.get_raw(),
                            settings.get_annotation(),
                            settings.get_use_contigs(),
                            exe.get_Blastn(),
                            exe.get_Blastn_DB(),
                            exe.get_Converter(),
                            files.get_blastn_dir(),
                            Blastn_Parameter(PARAM_FILE).outfmt,
                            parse_parameter(Blastn_Parameter(PARAM_FILE)),
                            exe.get_MetaCV(),
                            exe.get_MetaCV_DB(),
                            files.get_metacv_dir(),
                            MetaCV_Parameter(PARAM_FILE).get_seq(),
                            MetaCV_Parameter(PARAM_FILE).get_mode(),
                            MetaCV_Parameter(PARAM_FILE).get_orf(),
                            MetaCV_Parameter(PARAM_FILE).get_total_reads(),
                            MetaCV_Parameter(PARAM_FILE).get_min_qual(),
                            MetaCV_Parameter(PARAM_FILE).get_taxon(),
                            MetaCV_Parameter(PARAM_FILE).get_name())

          # run the annotation functions
          results = anno.manage_annotation()
          settings.set_step_number(results[0])
          files.set_blastn_output(absolute_path(results[1]))
          files.set_metacv_output(absolute_path(results[2]))
      
      if skip in 'analysis' and skip:
          skip_msg(skip)
      else:
          # init the analysis module
          analysis = Analysis(settings.get_threads(),
                              settings.get_step_number(),
                              settings.get_verbose(),
                              settings.get_actual_time(),
                              files.get_logdir(),
                              settings.get_annotation(),
                              files.get_output(),
                              files.get_parsed_db_dir(),
                              files.get_annotated_db_dir(),
                              files.get_subseted_db_dir(),
                              files.get_krona_report_dir(),
                              files.get_blastn_output(),
                              files.get_metacv_output(),
                              exe.get_Parser(), 
                              parse_parameter(blastParser_Parameter(PARAM_FILE)),
                              blastParser_Parameter(PARAM_FILE).get_name(),
                              exe.get_Annotate(),
                              parse_parameter(Rannotate_Parameter(PARAM_FILE)),
                              Rannotate_Parameter(PARAM_FILE).get_name(),
                              Rannotate_Parameter(PARAM_FILE).get_taxon_db(),
                              exe.get_Subset(),
                              subsetDB_Parameter(PARAM_FILE).get_bitscore(),
                              subsetDB_Parameter(PARAM_FILE).get_classifier(),
                              subsetDB_Parameter(PARAM_FILE).get_rank(),
                              subsetDB_Parameter(PARAM_FILE).get_taxon_db(),
                              exe.get_Krona_Blast(),
                              parse_parameter(Krona_Parameter(PARAM_FILE)),
                              Krona_Parameter(PARAM_FILE).get_name(),
                              settings.get_krona(),
                              exe.get_Perl_lib())
          # run the analysis function
          results = analysis.manage_analysis()
          files.set_parser_output(absolute_path(results[0]))
          files.set_annotated_output(absolute_path(results[1]))    
        
    except KeyboardInterrupt:

     sys.stdout.write('\nERROR 1 : Operation cancelled by User!\n')
     sys.exit(1)

    # print ending message
    print_verbose('\nPIPELINE COMPLETE!\n\n')
    print_running_time(settings.get_actual_time())
Пример #14
0
def search_xml(file_path, target_qid, tag):
    root = ET.parse(file_path).getroot()
    for response in root.iter(tag):
        if response.attrib['id'] == target_qid:
            return xmltodict.parse(to_string(response))