Пример #1
0
 def blastn(self, outputdir):
     
     createOutputDir(Settings.output + os.sep + outputdir)
     
     # check the input files for filetype - fastq Files need conversion into fasta for blastn
     if len(Settings.input) > 1: 
         if testForFQ(' '.join(str(i)for i in Settings.input)):
             self.convertToFasta(outputdir)
     else: 
         if testForFQ(Settings.input[0]):
             self.convertToFasta(outputdir)
         
     # update information of cmd
     sys.stdout.write('\nStep:       Classify with Blastn \n')
     sys.stdout.write('Arguments: ' + ParamFileArguments(Blastn_Parameter()) + '\n')
     # create outputfile name
     if Blastn_Parameter().outfmt == 5:
         outfile = 'blastn.xml' 
     else: 
         outfile = 'blastn.tab'
         
     # start blastn and wait until completion
     p = subprocess.Popen(shlex.split('%s -db %s -query %s -out %s -num_threads %s %s ' % (Settings.BLASTN, Settings.blastdb_nt, Settings.input[0],
                                                                   Settings.output + os.sep + outputdir + os.sep + outfile,
                                                                   Settings.threads, ParamFileArguments(Blastn_Parameter()))))
     p.wait()
     
     # save path to the blastn results
     Settings.blast_output = Settings.output + os.sep + outputdir + os.sep + outfile
     
     # print out the processing time of this step
     sys.stdout.write('processed in: ' + getDHMS(time.time()-Settings.actual_time)  + '\n')
     Settings.actual_time = time.time()
     
     return True
Пример #2
0
 def concat(self, outputdir):
     
     createOutputDir(Settings.output + os.sep + outputdir)
     # update information on cmd
     sys.stdout.write('\nStep:       Concatenate the reads \n')
     sys.stdout.write('Arguments: ' + ParamFileArguments(Concat_Parameter()) + '\n')
     
     concatlog = open(Settings.logdir + 'concat.log.txt', 'w')
     if len(Settings.input) > 1:
         # for paired end files
         
         if Settings.verbose:
             p = subprocess.Popen(shlex.split('%s -i %s -j %s -o %s -t %s %s ' % (Settings.CONCAT, str(Settings.input[0]),
                                                                                  str(Settings.input[1]),
                                                                                  Settings.output + os.sep + outputdir, Settings.threads,
                                                                                  ParamFileArguments(Concat_Parameter()))),
                                  stdout=open(Settings.output + os.sep + outputdir + os.sep + 'alignments.txt', 'w'),
                                  stderr=subprocess.PIPE)
             for line in p.stderr:
                 sys.stdout.write(line)
                 concatlog.write(line)
         else:
             p = subprocess.Popen(shlex.split('%s -i %s -j %s -o %s -t %s %s ' % (Settings.CONCAT, str(Settings.input[0]),
                                                                                  str(Settings.input[1]),
                                                                                  Settings.output + os.sep + outputdir, Settings.threads,
                                                                                  ParamFileArguments(Concat_Parameter()))),
                                  stdout=open(Settings.output + os.sep + outputdir + os.sep + 'alignments.txt', 'w'),
                                  stderr=concatlog)
     else:
         # for single end files
         if Settings.verbose:
             p = subprocess.Popen(shlex.split('%s -i %s -o %s -t %s %s ' % (Settings.CONCAT, str(Settings.input),
                                                                            Settings.output + os.sep + outputdir, Settings.threads,
                                                                            ParamFileArguments(Concat_Parameter()))),
                                  stdout=open(Settings.output + os.sep + outputdir + os.sep + 'alignments.txt'),
                                  stderr=subprocess.PIPE)
             for line in p.stderr:
                 sys.stdout.write(line)
                 concatlog.write(line)
         else:
             p = subprocess.Popen(shlex.split('%s -i %s -o %s -t %s %s ' % (Settings.CONCAT, str(Settings.input),
                                                                            Settings.output + os.sep + outputdir, Settings.threads,
                                                                            ParamFileArguments(Concat_Parameter()))),
                                  stdout=open(Settings.output + os.sep + outputdir + os.sep + 'alignments.txt', 'w'),
                                  stderr=concatlog)
     p.wait()
     
     # move files from the first level to the specified output folder
     moveFiles(Settings.output + os.sep , Settings.output + os.sep + outputdir + os.sep, '.fastq')
     # update the Setings.input var for further processing
     Settings.input = [Settings.output + os.sep + outputdir + os.sep + 'concat-contigs.fastq']
     
     # print out the processing time of this step
     sys.stdout.write('processed in: ' + getDHMS(time.time()-Settings.actual_time)  + '\n')
     Settings.actual_time = time.time()
     
     return True
Пример #3
0
    def trimming(self, outputdir):
        
        createOutputDir(Settings.output + os.sep + outputdir)
        # update information on cmd
        sys.stdout.write('\nStep:       Quality trimming and filtering \n')
        sys.stdout.write('Arguments: ' + ParamFileArguments(TrimGalore_Parameter()) + '\n\n')
        
        # start TrimGalore and wait until task complete
        trimlog = open(Settings.logdir + 'trim.log.txt', 'w')
        if Settings.verbose:
            
            p = subprocess.Popen(shlex.split('%s %s -o %s %s' % (Settings.TRIMGALORE, ParamFileArguments(TrimGalore_Parameter())
                                                                 , (Settings.output + os.sep + outputdir),
                                                                 ' '.join(str(i)for i in Settings.input))),
                                 stderr=subprocess.PIPE)
            for line in p.stderr:
                sys.stdout.write(line)
                trimlog.write(line)
        else:   
            p = subprocess.Popen(shlex.split('%s %s -o %s %s' % (Settings.TRIMGALORE, ParamFileArguments(TrimGalore_Parameter())
                                                                 , (Settings.output + os.sep + outputdir),
                                                                 ' '.join(str(i)for i in Settings.input)))
                                 , stdout=subprocess.PIPE, stderr=trimlog)
        p.wait()
        
        # search for the processed input files and update input files in settings object
    
        if len(Settings.input) > 1:
            Settings.input = [Settings.output + os.sep + outputdir + os.sep + [f for f in os.listdir(Settings.output + os.sep + outputdir) if (f.endswith('.fq') and 'val' in f)][1],
						      Settings.output + os.sep + outputdir + os.sep + [f for f in os.listdir(Settings.output + os.sep + outputdir) if (f.endswith('.fq') and 'val' in f)][0]]
        else:
            Settings.input = [Settings.output + os.sep + outputdir + os.sep + [f for f in os.listdir(Settings.output + os.sep + outputdir) if (f.endswith('.fq') and 'val' in f)][0]]
        
        # print out the processing time of this step
        sys.stdout.write('processed in: ' + getDHMS(time.time()-Settings.actual_time)  + '\n')
        Settings.actual_time = time.time()
        
        return True
Пример #4
0
    def fastqc(self, outputdir):
    
        createOutputDir(Settings.output + os.sep + outputdir)
        # update information on cmd
        sys.stdout.write('\nStep:       Quality analysis with FastQC \n')
        sys.stdout.write('Arguments: ' + ParamFileArguments(FastQC_Parameter()) + '\n\n')
        
        # start FastQC and wait until task complete
        p = subprocess.Popen(shlex.split('%s -t %s -o %s -q --extract %s %s' % (Settings.FASTQC, Settings.threads, Settings.output + os.sep + outputdir,
                                                                                ParamFileArguments(FastQC_Parameter()),
                                                                                 ' '.join(str(i)for i in Settings.input))))
        p.wait()

        # update the Settings.quality_report var for log purposes
        for r, d, f in os.walk(Settings.output + os.sep + outputdir + os.sep):
            for files in f:
                if files.endswith('_data.txt'):
                    Settings.quality_report.append(os.path.join(r, files))
        # print out the processing time of this step
        sys.stdout.write('processed in: ' + getDHMS(time.time()-Settings.actual_time)  + '\n')
        Settings.actual_time = time.time()
         
        return True
Пример #5
0
    def assembly(self, outputdir):
        
        createOutputDir(Settings.output + os.sep + outputdir + os.sep)
        # update information on cmd
        sys.stdout.write('\nStep:       Creating Hastables \n')
        sys.stdout.write('Arguments: ' + ParamFileArguments(Velveth_Parameter()) + '\n\n')
        # start velveth and wait for completion
        velvethlog = open(Settings.logdir + 'velveth.log.txt', 'w')
        if Settings.verbose:
            p = subprocess.Popen(shlex.split('%s %s %s %s -fmtAuto %s ' % (Settings.VELVETH, Settings.output + os.sep + outputdir,
                                                                           Settings.kmer, ParamFileArguments(Velveth_Parameter()) ,
                                                                           ' '.join(str(i)for i in Settings.input))),
                                 stdout=subprocess.PIPE,stderr=open(Settings.logdir + 'velveth.err.txt','w')) 
            for line in p.stdout:
                sys.stdout.write(line)
                velvethlog.write(line)
        else:
            p = subprocess.Popen(shlex.split('%s %s %s %s -fmtAuto %s ' % (Settings.VELVETH, Settings.output + os.sep + outputdir,
                                                                           Settings.kmer, ParamFileArguments(Velveth_Parameter()) ,
                                                                           ' '.join(str(i)for i in Settings.input))),
                                 stdout=velvethlog,stderr=open(Settings.logdir + 'velveth.err.txt','w'))
        p.wait()
        
        # update information on cmd
        sys.stdout.write('\nStep:       Create Graph for Assembly \n')
        sys.stdout.write('Arguments: ' + ParamFileArguments(Velvetg_Parameter()) + '\n\n')

        # start velvetg to create the graph for the metagenomic assembly
        velvetglog = open(Settings.logdir + 'velvetg.log.txt', 'w')
        if Settings.verbose:
            p = subprocess.Popen(shlex.split('%s %s %s' % (Settings.VELVETG, Settings.output + os.sep + outputdir,
                                                           ParamFileArguments(Velvetg_Parameter()))),
                                 stdout=subprocess.PIPE,stderr=open(Settings.logdir + 'velvetg.err.txt','w'))
            for line in p.stdout:
                sys.stdout.write(line)
                velvetglog.write(line)
        else:
            p = subprocess.Popen(shlex.split('%s %s %s' % (Settings.VELVETG, Settings.output + os.sep + outputdir,
                                                           ParamFileArguments(Velvetg_Parameter()))),
                                 stdout=velvetglog,stderr=open(Settings.logdir + 'velvetg.err.txt','w'))
        p.wait()
        
        # update information on cmd
        sys.stdout.write('\nStep:       Search for metagenomic Contigs \n')
        sys.stdout.write('Arguments: ' + ParamFileArguments(MetaVelvet_Parameter()) + '\n')
        metavelvetlog = open(Settings.logdir + 'meta-velvetg.log.txt', 'w')
        if Settings.verbose:
            p = subprocess.Popen(shlex.split('%s %s %s' % (Settings.METAVELVET,Settings.output + os.sep + outputdir, 
                                                          ParamFileArguments(MetaVelvet_Parameter()))),
                                 stdout=subprocess.PIPE,stderr=open(Settings.logdir + 'meta-velvetg.err.txt','w'))
            for line in p.stdout:
                sys.stdout.write(line)
                metavelvetlog.write(line)
        else:
            p = subprocess.Popen(shlex.split('%s %s%s' % (Settings.METAVELVET,Settings.output + os.sep + outputdir+os.sep, 
                                                           ParamFileArguments(MetaVelvet_Parameter()))),
                                 stdout=metavelvetlog,stderr=open(Settings.logdir + 'meta-velvetg.err.txt','w'))
        p.wait()
        
        # update Settings.input for further processing
        Settings.input = [Settings.output + os.sep + outputdir + os.sep + 'meta-velvetg.contigs.fa']
        
        # print out the processing time of this step
        sys.stdout.write('processed in: ' + getDHMS(time.time()-Settings.actual_time)  + '\n')
        Settings.actual_time = time.time()
        
        return True
Пример #6
0
 def metaCV(self, outputdir):
     
     createOutputDir(Settings.output + os.sep + outputdir)
     # update the information of cmd
     sys.stdout.write('\nStep:       Classify with MetaCV \n')
     sys.stdout.write('Arguments: ' + ParamFileArguments(MetaCV_Parameter()) + '\n')
     # start metaCV and wait until completion ATTENTION: need 32GB RAM
     metacvlog=open(Settings.logdir + 'metacv.log', 'w')
     if Settings.verbose:
         p = subprocess.Popen(shlex.split('%s classify %s %s %s %s' % (Settings.METACV, Settings.metacv_db,
                                                                       ' '.join(str(i)for i in Settings.input), 'metpipe',
                                                                       ParamFileArguments(MetaCV_Parameter()))),
                              stderr=subprocess.PIPE,stdout=subprocess.PIPE)
         for line in p.stderr:
                 sys.stdout.write(line)
                 metacvlog.write(line)
     else:
         p = subprocess.Popen(shlex.split('%s classify %s %s %s %s' % (Settings.METACV, Settings.metacv_db,
                                                                       ' '.join(str(i)for i in Settings.input), 'metpipe',
                                                                       ParamFileArguments(MetaCV_Parameter()))),
                              stderr=metacvlog,stdout=subprocess.PIPE)
     p.wait()
     
     # move all necessary files into metacv output folder
     moveFiles(sys.path[0] + os.sep, Settings.output + os.sep + outputdir, '.csv')
     moveFiles(sys.path[0] + os.sep, Settings.output + os.sep + outputdir, '.res')
     moveFiles(sys.path[0] + os.sep, Settings.output + os.sep + outputdir, '.faa')    
     # save the path to the metaCV result file 
     Settings.metaCV_output.append(os.path.normpath(os.path.join(sys.path[0] + os.sep, Settings.output + os.sep + outputdir + os.sep,
                                                                 str([f for f in os.listdir(Settings.output + os.sep + outputdir) if f.endswith('.res')][0]))))
     
     # update the information of cmd
     sys.stdout.write('\nStep:       Create summary of MetaCV results \n')
     # create an summary of the metacv run
     if Settings.verbose:
         p = subprocess.Popen(shlex.split('%s res2table %s %s %s --threads=%s' % (Settings.METACV, 
                                                                                  Settings.metacv_db, 
                                                                                  Settings.metaCV_output[0], 
                                                                                  'metpipe.res2table', 
                                                                                  Settings.threads)))
     else:
         p = subprocess.Popen(shlex.split('%s res2table %s %s %s --threads=%s' % (Settings.METACV, 
                                                                                  Settings.metacv_db, 
                                                                                  Settings.metaCV_output[0], 
                                                                                  'metpipe.res2table', 
                                                                                  Settings.threads)),
                              stderr=open(Settings.logdir + 'metacv.log', 'w'))
     p.wait()
     
     # move the summary to the output folder
     moveFiles(sys.path[0] + os.sep, Settings.output + os.sep + outputdir + os.sep, '.res2table')
     moveFiles(sys.path[0] + os.sep, Settings.output + os.sep + outputdir + os.sep, '.fun_hist')
     moveFiles(sys.path[0] + os.sep, Settings.output + os.sep + outputdir + os.sep, '.tax_hist')
     # save the path to the summary of metacv
     Settings.metaCV_output.append(os.path.normpath(os.path.join(sys.path[0] + os.sep, Settings.output + os.sep + outputdir + os.sep,
                                                                 str([f for f in os.listdir(Settings.output + os.sep + outputdir) if f.endswith('.fun_hist') or f.endswith('.tax_hist')][0]))))
     
     # create a list of all found taxa in the metacv result
     if Settings.verbose:
         p = subprocess.Popen(shlex.split('%s res2sum %s %s %s' % (Settings.METACV, Settings.metacv_db, 
                                                                   Settings.metaCV_output[0], 'metpipe.res2sum')))
     else:
         p = subprocess.Popen(shlex.split('%s res2sum %s %s %s' % (Settings.METACV, Settings.metacv_db, 
                                                                   Settings.metaCV_output[0], 'metpipe.res2sum')),
                             stderr=open(Settings.logdir + 'metacv.log', 'w'))
     p.wait()
     
     # move the list to the output folder
     moveFiles(sys.path[0] + os.sep, Settings.output + os.sep + outputdir + os.sep, '.res2sum')
     Settings.metaCV_output.append(os.path.normpath(os.path.join(sys.path[0] + os.sep, Settings.output + os.sep + outputdir + os.sep,
                                                                 str([f for f in os.listdir(Settings.output + os.sep + outputdir) if f.endswith('.res2sum')][0]))))
     
     # print out the processing time of this step
     sys.stdout.write('processed in: ' + getDHMS(time.time()-Settings.actual_time)  + '\n')
     Settings.actual_time = time.time()
     
     return True
Пример #7
0
    print('param file %s is not readable' % (args.param))
    sys.exit()

# create the global settings object
settings = Settings(args.kmer, args.threads, PROGRAM_DIR, args.verbose,
                    args.skip, starting_time, args.input, args.output,
                    args.output + os.sep + 'log' + os.sep, args.param,
                    args.trim, args.quality, args.assembler, args.classify,
                    args.summary)

# fill the pipeline with tasks
queue = deque([])
queue = createTasks(settings, Programs())
# print the summary of the settings
consoleSummary(settings)

# working queue - run until queue is empty or an error occured
while (queue):
    actualElement = queue.popleft()
    if actualElement.getTask()(actualElement.getOutputDir()):
        continue
    else:
        sys.stderr.write(
            'ERROR!!! \nPlease check the log files for further information')
        print '\nPIPELINE NOT COMPLETE'
        sys.exit()

sys.stdout.write('\nPIPELINE COMPLETE!\n\n')
sys.stdout.write('processed in ' +
                 getDHMS(time.time() - Settings.starting_time) + '\n')