示例#1
0
def extract_reads_subsample(data_folder,
                            adaID,
                            fragment,
                            n_reads,
                            VERBOSE=0,
                            summary=True):
    '''Extract a subsample of reads from the initial sample premapped'''
    from hivwholeseq.utils.mapping import extract_mapped_reads_subsample

    input_filename = get_divided_filename(data_folder,
                                          adaID,
                                          fragment,
                                          type='bam')
    output_filename = get_mapped_filename(data_folder,
                                          adaID,
                                          fragment,
                                          n_iter=1,
                                          type='bam')

    n_written = extract_mapped_reads_subsample(input_filename,
                                               output_filename,
                                               n_reads,
                                               VERBOSE=VERBOSE)

    if summary:
        with open(get_summary_fn(data_folder, adaID, fragment), 'a') as f:
            f.write('\n')
            f.write('Subsample of reads copied: ' + str(n_written))
            f.write('\n')
def extract_reads_subsample(data_folder, adaID, fragment, n_reads, VERBOSE=0,
                            summary=True):
    '''Extract a subsample of reads from the initial sample premapped'''
    from hivwholeseq.utils.mapping import extract_mapped_reads_subsample

    input_filename = get_divided_filename(data_folder, adaID, fragment, type='bam')
    output_filename = get_mapped_filename(data_folder, adaID, fragment, n_iter=1, type='bam')

    n_written = extract_mapped_reads_subsample(input_filename, output_filename,
                                               n_reads, VERBOSE=VERBOSE)

    if summary:
        with open(get_summary_fn(data_folder, adaID, fragment), 'a') as f:
            f.write('\n')
            f.write('Subsample of reads copied: '+str(n_written))
            f.write('\n')
def map_stampy_singlethread(sample, fragment, VERBOSE=0, n_pairs=-1,
                            summary=True, only_chunk=None, filtered=True):
    '''Map using stampy, single thread (no cluster queueing race conditions)'''
    pname = sample.patient
    samplename_pat = sample['patient sample']
    seq_run = sample['seq run']
    data_folder = sample.sequencing_run['folder']
    adaID = sample['adapter']
    PCR = int(sample.PCR)

    if VERBOSE:
        print 'Map via stampy (single thread): '+samplename+' '+fragment

    if summary:
        summary_filename = get_map_initial_summary_filename(pname, samplename_pat, 
                                                            samplename, fragment,
                                                            PCR=PCR)

    # Specific fragment (e.g. F5 --> F5bi)
    frag_spec = filter(lambda x: fragment in x, sample.regions_complete)
    if not len(frag_spec):
        if summary:
            with open(summary_filename, 'a') as f:
                f.write('Failed (specific fragment for '+fragment+'not found).\n')

        raise ValueError(samplename+': fragment '+fragment+' not found.')
    else:
        frag_spec = frag_spec[0]

    input_filename = get_input_filename(data_folder, adaID, frag_spec, type='bam',
                                        only_chunk=only_chunk, filtered=filtered)

    # NOTE: we introduced fragment nomenclature late, e.g. F3a. Check for that
    if not os.path.isfile(input_filename):
        if fragment == 'F3':
            input_filename = input_filename.replace('F3a', 'F3')

    # Check existance of input file, because stampy creates output anyway
    if not os.path.isfile(input_filename):
        if summary:
            with open(summary_filename, 'a') as f:
                f.write('Failed (input file for mapping not found).\n')

        raise ValueError(samplename+', fragment '+fragment+': input file not found.')

    # Extract subsample of reads if requested
    if n_pairs > 0:
        from hivwholeseq.utils.mapping import extract_mapped_reads_subsample
        input_filename_sub = get_mapped_to_initial_filename(pname, samplename_pat,
                                                            samplename, fragment,
                                                            PCR=PCR,
                                                            type='bam')[:-4]+\
                '_unmapped.bam'
        n_written = extract_mapped_reads_subsample(input_filename,
                                                   input_filename_sub,
                                                   n_pairs, VERBOSE=VERBOSE)

    # Get output filename
    output_filename = get_mapped_to_initial_filename(pname, samplename_pat, 
                                                     samplename, fragment,
                                                     PCR=PCR,
                                                     type='sam', only_chunk=only_chunk)

    # Map
    call_list = [stampy_bin,
                 '-g', get_initial_index_filename(pname, fragment, ext=False),
                 '-h', get_initial_hash_filename(pname, fragment, ext=False),
                 '-o', output_filename,
                 '--overwrite',
                 '--substitutionrate='+subsrate,
                 '--gapopen', stampy_gapopen,
                 '--gapextend', stampy_gapextend]
    if stampy_sensitive:
        call_list.append('--sensitive')

    if n_pairs > 0:
        call_list = call_list + ['-M', input_filename_sub]
    else:
        call_list = call_list + ['-M', input_filename]
    call_list = map(str, call_list)
    if VERBOSE >=2:
        print ' '.join(call_list)
    sp.call(call_list)

    output_filename_bam = get_mapped_to_initial_filename(pname, samplename_pat,
                                                         samplename, fragment,
                                                         type='bam',
                                                         PCR=PCR,
                                                         only_chunk=only_chunk)
    convert_sam_to_bam(output_filename_bam)

    if summary:
        with open(summary_filename, 'a') as f:
            f.write('Stampy mapped (single thread).\n')

    if only_chunk is None:
        if VERBOSE >= 1:
            print 'Remove temporary files: sample '+samplename
        remove_mapped_init_tempfiles(pname, samplename_pat,
                                     samplename, fragment,
                                     PCR=PCR,
                                     VERBOSE=VERBOSE, only_chunk=only_chunk)

    if summary:
        with open(summary_filename, 'a') as f:
            f.write('Temp mapping files removed.\n')
            f.write('\n')

    if n_pairs > 0:
        os.remove(input_filename_sub)