示例#1
0
def _thread_worker(aln_reader, chunk_feeder, platform, results_queue,
                   error_queue):
    try:
        while True:
            ctg_region = chunk_feeder.get_chunk()
            if ctg_region is None:
                break
            ctg_aln = aln_reader.get_alignments(ctg_region.ctg_id,
                                                ctg_region.start,
                                                ctg_region.end)
            ctg_id = ctg_region.ctg_id
            if len(ctg_aln) == 0:
                continue

            ctg_aln = aln_reader.trim_and_transpose(ctg_aln, ctg_region.start,
                                                    ctg_region.end)
            ctg_aln, _mean_cov = get_uniform_alignments(ctg_aln)

            profile, aln_errors = _contig_profile(ctg_aln, platform)
            sequence = _flatten_profile(profile)
            results_queue.put((ctg_id, ctg_region.start, sequence, aln_errors))

    except Exception as e:
        logger.error("Thread exception")
        logger.error(traceback.format_exc())
        error_queue.put(e)
示例#2
0
def _thread_worker(aln_reader, chunk_feeder, contigs_info, err_mode,
                   results_queue, error_queue, bubbles_file,
                   bubbles_file_lock):
    """
    Will run in parallel
    """
    try:
        while True:
            ctg_region = chunk_feeder.get_chunk()
            if ctg_region is None:
                break
            ctg_aln = aln_reader.get_alignments(ctg_region.ctg_id,
                                                ctg_region.start,
                                                ctg_region.end)
            ctg_id = ctg_region.ctg_id
            if len(ctg_aln) == 0:
                continue
            ref_seq = aln_reader.get_region_sequence(ctg_region.ctg_id,
                                                     ctg_region.start,
                                                     ctg_region.end)

            #since we are working with contig chunks, tranform alignment coorinates
            ctg_aln = aln_reader.trim_and_transpose(ctg_aln, ctg_region.start,
                                                    ctg_region.end)
            ctg_aln, mean_cov = get_uniform_alignments(ctg_aln)

            profile, aln_errors = _compute_profile(ctg_aln, ref_seq)
            partition, num_long_bubbles = _get_partition(profile, err_mode)
            ctg_bubbles = _get_bubble_seqs(ctg_aln, profile, partition, ctg_id)

            ##
            coverage_cap = 0.9 * cfg.vals["max_read_coverage"]
            if mean_cov > coverage_cap:
                mean_cov = aln_reader.get_median_depth(ctg_region.ctg_id,
                                                       ctg_region.start,
                                                       ctg_region.end)
            ##

            ctg_bubbles, num_empty = _postprocess_bubbles(ctg_bubbles)
            ctg_bubbles, num_long_branch = _split_long_bubbles(ctg_bubbles)

            #transform coordinates back
            for b in ctg_bubbles:
                b.position += ctg_region.start

            with bubbles_file_lock:
                _output_bubbles(ctg_bubbles, open(bubbles_file, "a"))
            results_queue.put(
                (ctg_id, len(ctg_bubbles), num_long_bubbles, num_empty,
                 num_long_branch, aln_errors, mean_cov))

            del profile
            del ctg_bubbles

    except Exception as e:
        logger.error("Thread exception")
        logger.error(traceback.format_exc())
        error_queue.put(e)
示例#3
0
文件: consensus.py 项目: xjyx/Flye
def _contig_profile(alignment, platform, genome_len):
    """
    Computes alignment profile
    """

    #leave the best uniform alignments
    alignment = get_uniform_alignments(alignment, genome_len)

    aln_errors = []
    profile = [Profile() for _ in range(genome_len)]
    #max_aln_err = cfg.vals["err_modes"][platform]["max_aln_error"]
    for aln in alignment:
        #if aln.err_rate > max_aln_err: continue
        aln_errors.append(aln.err_rate)

        #after gap shifting it is possible that
        #two gaps are aligned against each other
        qry_seq = shift_gaps(aln.trg_seq, aln.qry_seq)
        trg_seq = shift_gaps(qry_seq, aln.trg_seq)

        trg_pos = aln.trg_start
        for trg_nuc, qry_nuc in zip(trg_seq, qry_seq):
            if trg_nuc == "-":
                trg_pos -= 1
            if trg_pos >= genome_len:
                trg_pos -= genome_len

            #total += 1
            prof_elem = profile[trg_pos]
            if trg_nuc == "-" and qry_nuc != "-":
                prof_elem.insertions[aln.qry_id] += qry_nuc
            else:
                prof_elem.nucl = trg_nuc
                prof_elem.matches[qry_nuc] += 1

            trg_pos += 1

    #print "len", genome_len, "median coverage", cov_threshold
    #print "total bases: ", total, "discarded bases: ", discarded
    #print "filtered", float(discarded) / total
    #print ""

    return profile, aln_errors
示例#4
0
文件: bubbles.py 项目: xjyx/Flye
def _thread_worker(aln_reader, contigs_info, err_mode, results_queue,
                   error_queue, bubbles_file_handle, bubbles_file_lock):
    """
    Will run in parallel
    """
    try:
        aln_reader.init_reading()
        while not aln_reader.is_eof():
            ctg_id, ctg_aln = aln_reader.get_chunk()
            if ctg_id is None:
                break

            #logger.debug("Processing {0}".format(ctg_id))
            #get top unifom alignments
            ctg_aln = get_uniform_alignments(ctg_aln,
                                             contigs_info[ctg_id].length)

            profile, aln_errors = _compute_profile(ctg_aln, err_mode,
                                                   contigs_info[ctg_id].length)
            partition, num_long_bubbles = _get_partition(profile, err_mode)
            ctg_bubbles = _get_bubble_seqs(ctg_aln, err_mode, profile,
                                           partition, contigs_info[ctg_id])
            mean_cov = sum([len(b.branches)
                            for b in ctg_bubbles]) // (len(ctg_bubbles) + 1)
            ctg_bubbles, num_empty, num_long_branch = \
                                    _postprocess_bubbles(ctg_bubbles)
            results_queue.put(
                (ctg_id, len(ctg_bubbles), num_long_bubbles, num_empty,
                 num_long_branch, aln_errors, mean_cov))
            with bubbles_file_lock:
                _output_bubbles(ctg_bubbles, bubbles_file_handle)

            del profile
            del ctg_bubbles

        aln_reader.stop_reading()

    except Exception as e:
        error_queue.put(e)