def submit(self): # Get the number of samples from the configuration logger.info("Submitting initial mapping runs.") for sample in self.config['Samples']: s = self.config['Samples'][sample] params = {} for k in self.config: params[k] = self.config[k] params['working_dir'] = s['working_dir'] params['finished_dir'] = s['finished_dir'] #params['reference'] = s['reference'] params['reference'] = os.path.join(s['working_dir'], 'I000_contigs.fasta') params['sample'] = sample if 'PE1' in s and 'PE2' in s: params['PE1'] = s['PE1'] params['PE2'] = s['PE2'] if 'SE' in s: params['SE'] = s['SE'] # mapper = Mapper(params) self.q.put(Mapper.to_job(params))
def start(self): sample = self.params['sample'] logger.info("Sample: %s Starting finisher" % self.params['sample']) finished_dir = self.params['finished_dir'] sample_finished = False targets_written = 0 iteration = self.params['iteration'] #Set up output for both finished and additional mapping outputs fin_outf = open(os.path.join(finished_dir, 'contigs.fasta'), 'a') remap_outf = open( os.path.join(self.params['working_dir'], 'I%03d' % self.params['iteration'] + '_contigs.fasta'), 'w') #check whether the sample is globally finished if self.params['iteration'] >= self.params['numcycles']: sample_finished = True #loop over the current set of targets_folders for target_folder in self.params['targets']: #Extract target specific details: target_map_against_reads = False safe_target = target_folder.split("/")[ -1] # get last element of path name target = self.params['safe_targets'][safe_target] cur_reads = self.params['readcounts'][target][ iteration] # note that this is a counter, so no key errors can occur previous_reads = self.params['readcounts'][target][iteration - 1] #Get finished assembly status: with open(os.path.join(target_folder, 'finished'), 'r') as finishedf: l = finishedf.readline().strip().split()[0] logger.info("Sample: %s target: %s finishing target.." % (self.params['sample'], target)) logger.info( "Sample: %s target: %s iteration: %s Assembly reports status: %s." % (sample, target, self.params['iteration'], l)) if l in ('assembly_failed', 'map_against_reads'): target_map_against_reads = True if l == 'assembly_killed': #only write out the reads, assembly won't have contigs self.write_target(target, target_folder, outf=fin_outf, finished=False, map_against_reads=False, killed=True) elif sample_finished: # everything goes into the final file/folders. self.write_target( target, target_folder, outf=fin_outf, finished=True, ) elif target_map_against_reads and cur_reads > previous_reads and iteration < 3: #Only map against reads if we have improvement in mapping and we haven't been mapping for multiple iterations targets_written += self.write_target(target, target_folder, outf=remap_outf, finished=False, map_against_reads=True) else: #Check read counts and retire target, or send it back for re-mapping depending on mapped reads if iteration > 1 and cur_reads != 0 and previous_reads != 0: if cur_reads / previous_reads > self.params[ 'max_incorporation']: logger.info( "Sample %s target %s hit a repetitive region, no more mapping will be done" % (self.params['sample'], target)) self.write_target(target, target_folder, outf=fin_outf, finished=True, status='Repeat') elif cur_reads <= previous_reads and iteration > 2: #Give the mapper a couple extra iterations in case the first mapping got a lot of reads which didn't assemble logger.info( "Sample %s target %s did not incorporate any more reads, no more mapping will be done" % (self.params['sample'], target)) self.write_target(target, target_folder, outf=fin_outf, finished=True) else: #nothing fancy is going on, just write the contigs out for remapping targets_written += self.write_target(target, target_folder, outf=remap_outf, finished=False) else: #nothing fancy is going on, just write the contigs out for remapping targets_written += self.write_target(target, target_folder, outf=remap_outf, finished=False) fin_outf.flush() remap_outf.flush() fin_outf.close() remap_outf.close() if targets_written > 0: # Build a new mapper and put it on the queue from ARC.runners import Mapper mapper_params = {} for k in self.params: mapper_params[k] = self.params[k] del mapper_params['targets'] mapper_params['reference'] = os.path.join( self.params['working_dir'], 'I%03d' % self.params['iteration'] + '_contigs.fasta') self.submit(Mapper.to_job(mapper_params)) logger.info("Sample: %s Added new mapper to queue: iteration %s" % (self.params['sample'], self.params['iteration'])) else: logger.info( "Sample: %s Mapper not added to queue. Work finished." % self.params['sample'])
def start(self): sample = self.params['sample'] logger.info("Sample: %s Starting finisher" % self.params['sample']) finished_dir = self.params['finished_dir'] sample_finished = False targets_written = 0 iteration = self.params['iteration'] #Set up output for both finished and additional mapping outputs fin_outf = open(os.path.join(finished_dir, 'contigs.fasta'), 'a') remap_outf = open(os.path.join(self.params['working_dir'], 'I%03d' % self.params['iteration'] + '_contigs.fasta'), 'w') #check whether the sample is globally finished if self.params['iteration'] >= self.params['numcycles']: sample_finished = True #loop over the current set of targets_folders for target_folder in self.params['targets']: #Extract target specific details: target_map_against_reads = False safe_target = target_folder.split("/")[-1] # get last element of path name target = self.params['safe_targets'][safe_target] cur_reads = self.params['readcounts'][target][iteration] # note that this is a counter, so no key errors can occur previous_reads = self.params['readcounts'][target][iteration - 1] #Get finished assembly status: with open(os.path.join(target_folder, 'finished'), 'r') as finishedf: l = finishedf.readline().strip().split()[0] logger.info("Sample: %s target: %s finishing target.." % (self.params['sample'], target)) logger.info("Sample: %s target: %s iteration: %s Assembly reports status: %s." % (sample, target, self.params['iteration'], l)) if l in ('assembly_failed', 'map_against_reads'): target_map_against_reads = True if l == 'assembly_killed': #only write out the reads, assembly won't have contigs self.write_target(target, target_folder, outf=fin_outf, finished=False, map_against_reads=False, killed=True) elif sample_finished: # everything goes into the final file/folders. self.write_target(target, target_folder, outf=fin_outf, finished=True, ) elif target_map_against_reads and cur_reads > previous_reads and iteration < 3: #Only map against reads if we have improvement in mapping and we haven't been mapping for multiple iterations targets_written += self.write_target(target, target_folder, outf=remap_outf, finished=False, map_against_reads=True) else: #Check read counts and retire target, or send it back for re-mapping depending on mapped reads if iteration > 1 and cur_reads != 0 and previous_reads != 0: if cur_reads / previous_reads > self.params['max_incorporation']: logger.info("Sample %s target %s hit a repetitive region, no more mapping will be done" % (self.params['sample'], target)) self.write_target(target, target_folder, outf=fin_outf, finished=True, status='Repeat') elif cur_reads <= previous_reads and iteration > 2: #Give the mapper a couple extra iterations in case the first mapping got a lot of reads which didn't assemble logger.info("Sample %s target %s did not incorporate any more reads, no more mapping will be done" % (self.params['sample'], target)) self.write_target(target, target_folder, outf=fin_outf, finished=True) else: #nothing fancy is going on, just write the contigs out for remapping targets_written += self.write_target(target, target_folder, outf=remap_outf, finished=False) else: #nothing fancy is going on, just write the contigs out for remapping targets_written += self.write_target(target, target_folder, outf=remap_outf, finished=False) fin_outf.flush() remap_outf.flush() fin_outf.close() remap_outf.close() if targets_written > 0: # Build a new mapper and put it on the queue from ARC.runners import Mapper mapper_params = {} for k in self.params: mapper_params[k] = self.params[k] del mapper_params['targets'] mapper_params['reference'] = os.path.join(self.params['working_dir'], 'I%03d' % self.params['iteration'] + '_contigs.fasta') self.submit(Mapper.to_job(mapper_params)) logger.info("Sample: %s Added new mapper to queue: iteration %s" % (self.params['sample'], self.params['iteration'])) else: logger.info("Sample: %s Mapper not added to queue. Work finished." % self.params['sample'])