def extract_reads_subsample(data_folder, adaID, fragment, n_reads, VERBOSE=0, summary=True): '''Extract a subsample of reads from the initial sample premapped''' from hivwholeseq.utils.mapping import extract_mapped_reads_subsample input_filename = get_divided_filename(data_folder, adaID, fragment, type='bam') output_filename = get_mapped_filename(data_folder, adaID, fragment, n_iter=1, type='bam') n_written = extract_mapped_reads_subsample(input_filename, output_filename, n_reads, VERBOSE=VERBOSE) if summary: with open(get_summary_fn(data_folder, adaID, fragment), 'a') as f: f.write('\n') f.write('Subsample of reads copied: ' + str(n_written)) f.write('\n')
def extract_reads_subsample(data_folder, adaID, fragment, n_reads, VERBOSE=0, summary=True): '''Extract a subsample of reads from the initial sample premapped''' from hivwholeseq.utils.mapping import extract_mapped_reads_subsample input_filename = get_divided_filename(data_folder, adaID, fragment, type='bam') output_filename = get_mapped_filename(data_folder, adaID, fragment, n_iter=1, type='bam') n_written = extract_mapped_reads_subsample(input_filename, output_filename, n_reads, VERBOSE=VERBOSE) if summary: with open(get_summary_fn(data_folder, adaID, fragment), 'a') as f: f.write('\n') f.write('Subsample of reads copied: '+str(n_written)) f.write('\n')
def map_stampy_singlethread(sample, fragment, VERBOSE=0, n_pairs=-1, summary=True, only_chunk=None, filtered=True): '''Map using stampy, single thread (no cluster queueing race conditions)''' pname = sample.patient samplename_pat = sample['patient sample'] seq_run = sample['seq run'] data_folder = sample.sequencing_run['folder'] adaID = sample['adapter'] PCR = int(sample.PCR) if VERBOSE: print 'Map via stampy (single thread): '+samplename+' '+fragment if summary: summary_filename = get_map_initial_summary_filename(pname, samplename_pat, samplename, fragment, PCR=PCR) # Specific fragment (e.g. F5 --> F5bi) frag_spec = filter(lambda x: fragment in x, sample.regions_complete) if not len(frag_spec): if summary: with open(summary_filename, 'a') as f: f.write('Failed (specific fragment for '+fragment+'not found).\n') raise ValueError(samplename+': fragment '+fragment+' not found.') else: frag_spec = frag_spec[0] input_filename = get_input_filename(data_folder, adaID, frag_spec, type='bam', only_chunk=only_chunk, filtered=filtered) # NOTE: we introduced fragment nomenclature late, e.g. F3a. Check for that if not os.path.isfile(input_filename): if fragment == 'F3': input_filename = input_filename.replace('F3a', 'F3') # Check existance of input file, because stampy creates output anyway if not os.path.isfile(input_filename): if summary: with open(summary_filename, 'a') as f: f.write('Failed (input file for mapping not found).\n') raise ValueError(samplename+', fragment '+fragment+': input file not found.') # Extract subsample of reads if requested if n_pairs > 0: from hivwholeseq.utils.mapping import extract_mapped_reads_subsample input_filename_sub = get_mapped_to_initial_filename(pname, samplename_pat, samplename, fragment, PCR=PCR, type='bam')[:-4]+\ '_unmapped.bam' n_written = extract_mapped_reads_subsample(input_filename, input_filename_sub, n_pairs, VERBOSE=VERBOSE) # Get output filename output_filename = get_mapped_to_initial_filename(pname, samplename_pat, samplename, fragment, PCR=PCR, type='sam', only_chunk=only_chunk) # Map call_list = [stampy_bin, '-g', get_initial_index_filename(pname, fragment, ext=False), '-h', get_initial_hash_filename(pname, fragment, ext=False), '-o', output_filename, '--overwrite', '--substitutionrate='+subsrate, '--gapopen', stampy_gapopen, '--gapextend', stampy_gapextend] if stampy_sensitive: call_list.append('--sensitive') if n_pairs > 0: call_list = call_list + ['-M', input_filename_sub] else: call_list = call_list + ['-M', input_filename] call_list = map(str, call_list) if VERBOSE >=2: print ' '.join(call_list) sp.call(call_list) output_filename_bam = get_mapped_to_initial_filename(pname, samplename_pat, samplename, fragment, type='bam', PCR=PCR, only_chunk=only_chunk) convert_sam_to_bam(output_filename_bam) if summary: with open(summary_filename, 'a') as f: f.write('Stampy mapped (single thread).\n') if only_chunk is None: if VERBOSE >= 1: print 'Remove temporary files: sample '+samplename remove_mapped_init_tempfiles(pname, samplename_pat, samplename, fragment, PCR=PCR, VERBOSE=VERBOSE, only_chunk=only_chunk) if summary: with open(summary_filename, 'a') as f: f.write('Temp mapping files removed.\n') f.write('\n') if n_pairs > 0: os.remove(input_filename_sub)