def make_consensus(data_folder, adaID, fragment, n_iter, qual_min=20, VERBOSE=0,
                   coverage_min=10, summary=True):
    '''Make consensus sequence from the mapped reads'''
    if VERBOSE:
        print 'Build consensus: '+adaID+' '+fragment+' iteration '+str(n_iter)
    
    # Read reference
    reffilename = get_reference_filename(data_folder, adaID, fragment, n_iter)
    refseq = SeqIO.read(reffilename, 'fasta')
    ref = np.array(refseq)

    # Open BAM file
    bamfilename = get_mapped_filename(data_folder, adaID, fragment, n_iter)
    if not os.path.isfile(bamfilename):
        convert_sam_to_bam(bamfilename)

    (counts, inserts) = get_allele_counts_insertions_from_file_unfiltered(bamfilename,\
                                len(refseq), qual_min=qual_min,
                                match_len_min=match_len_min)

    consensus_final = build_consensus(counts, inserts,
                                      coverage_min=coverage_min,
                                      VERBOSE=VERBOSE)

    if summary:
        with open(get_summary_fn(data_folder, adaID, fragment), 'a') as f:
            f.write('Consensus built for iteration '+str(n_iter))
            f.write('\n')

    return refseq, consensus_final
def make_index_and_hash(data_folder, adaID, fragment, n_iter, VERBOSE=0,
                        summary=True):
    '''Make index and hash files for reference or consensus'''
    if VERBOSE:
        print 'Build stampy hashes: '+adaID+' '+fragment+' iteration '+str(n_iter)

    # 1. Make genome index file for 6 fragments (chromosomes)
    call_list = [stampy_bin,
                 '--species="HIV adaID '+adaID+' fragment '+fragment+'"',
                 '--overwrite',
                 '-G', get_index_file(data_folder, adaID, fragment, n_iter, ext=False),
                 get_reference_filename(data_folder, adaID, fragment, n_iter),
                ]
    if VERBOSE >= 3:
        print ' '.join(call_list)
    sp.call(call_list)
    
    # 2. Build a hash file for 6 fragments
    call_list = [stampy_bin,
                 '--overwrite',
                 '-g', get_index_file(data_folder, adaID, fragment, n_iter, ext=False),
                 '-H', get_hash_file(data_folder, adaID, fragment, n_iter, ext=False),
                ]
    if VERBOSE >= 3:
        print ' '.join(call_list)
    sp.call(call_list)

    if summary:
        with open(get_summary_fn(data_folder, adaID, fragment), 'a') as f:
            f.write('Made index and hash files for iteration '+str(n_iter))
            f.write('\n')
def map_stampy(data_folder, adaID, fragment, n_iter, VERBOSE=0, summary=True):
    '''Map using stampy'''
    if VERBOSE:
        print 'Map via stampy: '+adaID+' '+fragment+' iteration '+str(n_iter)

    # Input and output files
    input_filename = get_mapped_filename(data_folder, adaID, fragment,
                                         n_iter - 1, type='bam')
    output_filename = get_mapped_filename(data_folder, adaID, fragment,
                                          n_iter, type='sam')

    # Map
    call_list = [stampy_bin,
                 '-g', get_index_file(data_folder, adaID, fragment,
                                      n_iter, ext=False),
                 '-h', get_hash_file(data_folder, adaID, fragment,
                                     n_iter, ext=False), 
                 '-o', output_filename,
                 '--overwrite',
                 '--substitutionrate='+subsrate,
                 '--gapopen', stampy_gapopen,
                 '--gapextend', stampy_gapextend]
    if stampy_sensitive:
        call_list.append('--sensitive')
    call_list = call_list + ['-M', input_filename]
    call_list = map(str, call_list)
    if VERBOSE >=2:
        print ' '.join(call_list)
    sp.call(call_list)

    if summary:
        with open(get_summary_fn(data_folder, adaID, fragment), 'a') as f:
            f.write('Mapped completed for iteration '+str(n_iter))
            f.write('\n')
示例#4
0
def extract_reads_subsample(data_folder,
                            adaID,
                            fragment,
                            n_reads,
                            VERBOSE=0,
                            summary=True):
    '''Extract a subsample of reads from the initial sample premapped'''
    from hivwholeseq.utils.mapping import extract_mapped_reads_subsample

    input_filename = get_divided_filename(data_folder,
                                          adaID,
                                          fragment,
                                          type='bam')
    output_filename = get_mapped_filename(data_folder,
                                          adaID,
                                          fragment,
                                          n_iter=1,
                                          type='bam')

    n_written = extract_mapped_reads_subsample(input_filename,
                                               output_filename,
                                               n_reads,
                                               VERBOSE=VERBOSE)

    if summary:
        with open(get_summary_fn(data_folder, adaID, fragment), 'a') as f:
            f.write('\n')
            f.write('Subsample of reads copied: ' + str(n_written))
            f.write('\n')
def extract_reads_subsample(data_folder, adaID, fragment, n_reads, VERBOSE=0,
                            summary=True):
    '''Extract a subsample of reads from the initial sample premapped'''
    from hivwholeseq.utils.mapping import extract_mapped_reads_subsample

    input_filename = get_divided_filename(data_folder, adaID, fragment, type='bam')
    output_filename = get_mapped_filename(data_folder, adaID, fragment, n_iter=1, type='bam')

    n_written = extract_mapped_reads_subsample(input_filename, output_filename,
                                               n_reads, VERBOSE=VERBOSE)

    if summary:
        with open(get_summary_fn(data_folder, adaID, fragment), 'a') as f:
            f.write('\n')
            f.write('Subsample of reads copied: '+str(n_written))
            f.write('\n')
示例#6
0
def make_index_and_hash(data_folder,
                        adaID,
                        fragment,
                        n_iter,
                        VERBOSE=0,
                        summary=True):
    '''Make index and hash files for reference or consensus'''
    if VERBOSE:
        print 'Build stampy hashes: ' + adaID + ' ' + fragment + ' iteration ' + str(
            n_iter)

    # 1. Make genome index file for 6 fragments (chromosomes)
    call_list = [
        stampy_bin,
        '--species="HIV adaID ' + adaID + ' fragment ' + fragment + '"',
        '--overwrite',
        '-G',
        get_index_file(data_folder, adaID, fragment, n_iter, ext=False),
        get_reference_filename(data_folder, adaID, fragment, n_iter),
    ]
    if VERBOSE >= 3:
        print ' '.join(call_list)
    sp.call(call_list)

    # 2. Build a hash file for 6 fragments
    call_list = [
        stampy_bin,
        '--overwrite',
        '-g',
        get_index_file(data_folder, adaID, fragment, n_iter, ext=False),
        '-H',
        get_hash_file(data_folder, adaID, fragment, n_iter, ext=False),
    ]
    if VERBOSE >= 3:
        print ' '.join(call_list)
    sp.call(call_list)

    if summary:
        with open(get_summary_fn(data_folder, adaID, fragment), 'a') as f:
            f.write('Made index and hash files for iteration ' + str(n_iter))
            f.write('\n')
示例#7
0
def make_consensus(data_folder,
                   adaID,
                   fragment,
                   n_iter,
                   qual_min=20,
                   VERBOSE=0,
                   coverage_min=10,
                   summary=True):
    '''Make consensus sequence from the mapped reads'''
    if VERBOSE:
        print 'Build consensus: ' + adaID + ' ' + fragment + ' iteration ' + str(
            n_iter)

    # Read reference
    reffilename = get_reference_filename(data_folder, adaID, fragment, n_iter)
    refseq = SeqIO.read(reffilename, 'fasta')
    ref = np.array(refseq)

    # Open BAM file
    bamfilename = get_mapped_filename(data_folder, adaID, fragment, n_iter)
    if not os.path.isfile(bamfilename):
        convert_sam_to_bam(bamfilename)

    (counts, inserts) = get_allele_counts_insertions_from_file_unfiltered(bamfilename,\
                                len(refseq), qual_min=qual_min,
                                match_len_min=match_len_min)

    consensus_final = build_consensus(counts,
                                      inserts,
                                      coverage_min=coverage_min,
                                      VERBOSE=VERBOSE)

    if summary:
        with open(get_summary_fn(data_folder, adaID, fragment), 'a') as f:
            f.write('Consensus built for iteration ' + str(n_iter))
            f.write('\n')

    return refseq, consensus_final
示例#8
0
def map_stampy(data_folder, adaID, fragment, n_iter, VERBOSE=0, summary=True):
    '''Map using stampy'''
    if VERBOSE:
        print 'Map via stampy: ' + adaID + ' ' + fragment + ' iteration ' + str(
            n_iter)

    # Input and output files
    input_filename = get_mapped_filename(data_folder,
                                         adaID,
                                         fragment,
                                         n_iter - 1,
                                         type='bam')
    output_filename = get_mapped_filename(data_folder,
                                          adaID,
                                          fragment,
                                          n_iter,
                                          type='sam')

    # Map
    call_list = [
        stampy_bin, '-g',
        get_index_file(data_folder, adaID, fragment, n_iter, ext=False), '-h',
        get_hash_file(data_folder, adaID, fragment, n_iter, ext=False), '-o',
        output_filename, '--overwrite', '--substitutionrate=' + subsrate,
        '--gapopen', stampy_gapopen, '--gapextend', stampy_gapextend
    ]
    if stampy_sensitive:
        call_list.append('--sensitive')
    call_list = call_list + ['-M', input_filename]
    call_list = map(str, call_list)
    if VERBOSE >= 2:
        print ' '.join(call_list)
    sp.call(call_list)

    if summary:
        with open(get_summary_fn(data_folder, adaID, fragment), 'a') as f:
            f.write('Mapped completed for iteration ' + str(n_iter))
            f.write('\n')
                refseq, consensus = make_consensus(data_folder, adaID, fragment,
                                                   n_iter,
                                                   VERBOSE=VERBOSE, summary=summary)

                write_consensus_intermediate(data_folder, adaID, fragment, n_iter, consensus)

                match = check_new_old_consensi(refseq, consensus)

                # Start a new round if not converged
                if (not match) and (n_iter < iterations_max):
                    n_iter += 1
                    if VERBOSE:
                        print 'Starting again for iteration '+str(n_iter) 

                    if summary:
                        with open(get_summary_fn(data_folder, adaID, fragment), 'a') as f:
                            f.write('\n')
                            f.write('Starting new iteration '+str(n_iter))
                            f.write('\n')

                # or terminate
                else:
                    write_consensus_final(seq_run, adaID, fragment, consensus)
                    if VERBOSE:
                        if match:
                            print 'Consensus converged at iteration '+str(n_iter)+\
                                    ': adaID', adaID, fragment
                        else:
                            print 'Maximal number of iterations reached: adaID', \
                                    adaID, fragment
示例#10
0
                                                   VERBOSE=VERBOSE,
                                                   summary=summary)

                write_consensus_intermediate(data_folder, adaID, fragment,
                                             n_iter, consensus)

                match = check_new_old_consensi(refseq, consensus)

                # Start a new round if not converged
                if (not match) and (n_iter < iterations_max):
                    n_iter += 1
                    if VERBOSE:
                        print 'Starting again for iteration ' + str(n_iter)

                    if summary:
                        with open(get_summary_fn(data_folder, adaID, fragment),
                                  'a') as f:
                            f.write('\n')
                            f.write('Starting new iteration ' + str(n_iter))
                            f.write('\n')

                # or terminate
                else:
                    write_consensus_final(seq_run, adaID, fragment, consensus)
                    if VERBOSE:
                        if match:
                            print 'Consensus converged at iteration '+str(n_iter)+\
                                    ': adaID', adaID, fragment
                        else:
                            print 'Maximal number of iterations reached: adaID', \
                                    adaID, fragment