def _precommand_initiation(self, input_fp, output_dir, working_dir, params): if params['chimera_detection_method'] == 'blast_fragments': blast_db, db_files_to_remove = \ build_blast_db_from_fasta_path(params['reference_seqs_fp'], output_dir=working_dir) self.files_to_remove += db_files_to_remove params['blast_db'] = blast_db elif params['chimera_detection_method'] == 'ChimeraSlayer': # copy the reference files to working dir # ChimeraSlayer creates an index file of the ref and # will crash without write permission in the ref seqs dir aligned_reference_seqs_fp = params['aligned_reference_seqs_fp'] _, new_ref_filename = split(aligned_reference_seqs_fp) copy(aligned_reference_seqs_fp, working_dir) aligned_reference_seqs_fp = working_dir + "/" + new_ref_filename self.files_to_remove.append(aligned_reference_seqs_fp) params['aligned_reference_seqs_fp'] = aligned_reference_seqs_fp # if given, also copy the unaligned ref db reference_seqs_fp = params['reference_seqs_fp'] if reference_seqs_fp: _, new_ref_filename = split(reference_seqs_fp) copy(reference_seqs_fp, working_dir) reference_seqs_fp = working_dir + "/" + new_ref_filename else: # otherwise create it reference_seqs_fp = write_degapped_fasta_to_file( parse_fasta(open(aligned_reference_seqs_fp)), tmp_dir=working_dir) # delete it afterwards self.files_to_remove.append(reference_seqs_fp) params['reference_seqs_fp'] = reference_seqs_fp # build blast db of reference, otherwise ChimeraSlayer will do it # and parallel jobs clash _, db_files_to_remove = \ build_blast_db_from_fasta_path(reference_seqs_fp) self.files_to_remove += db_files_to_remove # make the index file globally # Reason: ChimeraSlayer first checks to see if the index file is # there. If not it tries to create it. This can lead to race # condition if several parallel jobs try to create it at the same # time. make_cidx_file(aligned_reference_seqs_fp) self.files_to_remove.append(aligned_reference_seqs_fp + ".cidx") else: raise ValueError("Unrecognized chimera detection method '%s'." % params['chimera_detection_method'])
def _precommand_initiation(self, input_fp, output_dir, working_dir, params): if params['chimera_detection_method'] == 'blast_fragments': blast_db, db_files_to_remove = \ build_blast_db_from_fasta_path(params['reference_seqs_fp'], output_dir=working_dir) self.files_to_remove += db_files_to_remove params['blast_db'] = blast_db elif params['chimera_detection_method'] == 'ChimeraSlayer': #copy the reference files to working dir #ChimeraSlayer creates an index file of the ref and #will crash without write permission in the ref seqs dir aligned_reference_seqs_fp = params['aligned_reference_seqs_fp'] _, new_ref_filename = split(aligned_reference_seqs_fp) copy(aligned_reference_seqs_fp, working_dir) aligned_reference_seqs_fp = working_dir + "/" + new_ref_filename self.files_to_remove.append(aligned_reference_seqs_fp) params['aligned_reference_seqs_fp'] = aligned_reference_seqs_fp #if given, also copy the unaligned ref db reference_seqs_fp = params['reference_seqs_fp'] if reference_seqs_fp: _, new_ref_filename = split(reference_seqs_fp) copy(reference_seqs_fp, working_dir) reference_seqs_fp = working_dir + "/" + new_ref_filename else: #otherwise create it reference_seqs_fp = write_degapped_fasta_to_file( MinimalFastaParser(open(aligned_reference_seqs_fp)), tmp_dir=working_dir) #delete it afterwards self.files_to_remove.append(reference_seqs_fp) params['reference_seqs_fp'] = reference_seqs_fp #build blast db of reference, otherwise ChimeraSlayer will do it #and parallel jobs clash _, db_files_to_remove = \ build_blast_db_from_fasta_path(reference_seqs_fp) self.files_to_remove += db_files_to_remove #make the index file globally #Reason: ChimeraSlayer first checks to see if the index file is #there. If not it tries to create it. This can lead to race #condition if several parallel jobs try to create it at the same #time. make_cidx_file(aligned_reference_seqs_fp) self.files_to_remove.append(aligned_reference_seqs_fp + ".cidx") else: raise ValueError("Unrecognized chimera detection method '%s'." % params['chimera_detection_method'])
def get_chimeras_from_Nast_aligned(seqs_fp, ref_db_aligned_fp=None, ref_db_fasta_fp=None, HALT_EXEC=False, min_div_ratio=None, keep_intermediates=False): """remove chimeras from seqs_fp using chimeraSlayer. seqs_fp: a filepath with the seqs to check in the file ref_db_aligned_fp: fp to (pynast) aligned reference sequences ref_db_fasta_fp: same seqs as above, just unaligned. Will be computed on the fly if not provided, HALT_EXEC: stop execution if true min_div_ratio: passed to ChimeraSlayer App """ files_to_remove = [] #might come in as FilePath object with quotes seqs_fp = str(seqs_fp) seqs_fp = seqs_fp.rstrip('"') seqs_fp = seqs_fp.lstrip('"') seqs_dir, new_seqs_fp = split(seqs_fp) #if fp is in current dir, we fake a dir change if seqs_dir == "": seqs_dir = "./" #Chimera Slayer puts some temp files in current dir and some in dir of input file #use exe_dir to change to dir of input file, so to have all tmp files in one place params = {'--query_NAST': new_seqs_fp, '--exec_dir': seqs_dir} if ref_db_aligned_fp == None and ref_db_fasta_fp == None: #use default db, whose relative position to the #ChimeraSlayer binary is hardcoded pass else: if not ref_db_fasta_fp: #make degapped reference file ref_db_fasta_fp = write_degapped_fasta_to_file(MinimalFastaParser( \ open(ref_db_aligned_fp))) files_to_remove.append(ref_db_fasta_fp) #use user db params.update({ '--db_NAST': abspath(ref_db_aligned_fp), '--db_FASTA': abspath(ref_db_fasta_fp) }) if min_div_ratio != None: params.update({'-R': min_div_ratio}) app = ChimeraSlayer(params=params, HALT_EXEC=HALT_EXEC) app_results = app() # this is a FilePath object in case of success. # How can we test for failure here? # if not exists(app_results['CPS']): # raise ApplicationError, "ChimeraSlayer failed. No output file." chimeras = parse_CPS_file((app_results['CPS'])) if not keep_intermediates: app.remove_intermediate_files() remove_files(files_to_remove) return chimeras
def get_chimeras_from_Nast_aligned(seqs_fp, ref_db_aligned_fp=None, ref_db_fasta_fp=None, HALT_EXEC=False, min_div_ratio=None, keep_intermediates=False): """remove chimeras from seqs_fp using chimeraSlayer. seqs_fp: a filepath with the seqs to check in the file ref_db_aligned_fp: fp to (pynast) aligned reference sequences ref_db_fasta_fp: same seqs as above, just unaligned. Will be computed on the fly if not provided, HALT_EXEC: stop execution if true min_div_ratio: passed to ChimeraSlayer App """ files_to_remove = [] #might come in as FilePath object with quotes seqs_fp = str(seqs_fp) seqs_fp = seqs_fp.rstrip('"') seqs_fp = seqs_fp.lstrip('"') seqs_dir, new_seqs_fp = split(seqs_fp) #if fp is in current dir, we fake a dir change if seqs_dir == "": seqs_dir = "./" #Chimera Slayer puts some temp files in current dir and some in dir of input file #use exe_dir to change to dir of input file, so to have all tmp files in one place params={'--query_NAST': new_seqs_fp, '--exec_dir': seqs_dir} if ref_db_aligned_fp==None and ref_db_fasta_fp==None: #use default db, whose relative position to the #ChimeraSlayer binary is hardcoded pass else: if not ref_db_fasta_fp: #make degapped reference file ref_db_fasta_fp = write_degapped_fasta_to_file(MinimalFastaParser( \ open(ref_db_aligned_fp))) files_to_remove.append(ref_db_fasta_fp) #use user db params.update({'--db_NAST': abspath(ref_db_aligned_fp), '--db_FASTA': abspath(ref_db_fasta_fp)}) if min_div_ratio !=None: params.update({'-R':min_div_ratio}) app = ChimeraSlayer(params=params, HALT_EXEC=HALT_EXEC) app_results = app() # this is a FilePath object in case of success. # How can we test for failure here? # if not exists(app_results['CPS']): # raise ApplicationError, "ChimeraSlayer failed. No output file." chimeras = parse_CPS_file((app_results['CPS'])) if not keep_intermediates: app.remove_intermediate_files() remove_files(files_to_remove) return chimeras