示例#1
0
def process_sequences(bwts, bns, pacseq, seq_reader, N, analyze_seqs=None):
  gopt = bwa.gap_init_opt()
  popt = bwa.pe_init_opt()

  ii      = bwa.isize_info_t()
  last_ii = bwa.isize_info_t()
  last_ii.avg = -1.0

  while 1:
    pairs = read_seq_pairs(seq_reader, N)
    seq_pairs_read = len(pairs)
    if seq_pairs_read == 0:
      break
    bwsa = bwa.build_bws_array(pairs)
    bwa.cal_sa_reg_gap(0, bwts, seq_pairs_read, bwsa[0], gopt)
    bwa.cal_sa_reg_gap(0, bwts, seq_pairs_read, bwsa[1], gopt)
    cnt_chg = bwa.cal_pac_pos_pe(bwts, seq_pairs_read, bwsa,
                                 ii, popt, gopt, last_ii)
    sys.stderr.write('ii: %r\n' %[ii.avg, ii.std, ii.low, ii.high])
    bwa.paired_sw(bns, pacseq, seq_pairs_read, bwsa, popt, ii)
    bwa.refine_gapped(bns, seq_pairs_read, bwsa[0], pacseq)
    bwa.refine_gapped(bns, seq_pairs_read, bwsa[1], pacseq)
    analyze_seqs(gopt[0], bns, seq_pairs_read, bwsa)
    bwa.free_seq(N, bwsa[0])
    bwa.free_seq(N, bwsa[1])
示例#2
0
def process_sequences(bwts, bns, pacseq, seq_reader, N, analyze_seqs=None):
    gopt = bwa.gap_init_opt()
    popt = bwa.pe_init_opt()

    ii = bwa.isize_info_t()
    last_ii = bwa.isize_info_t()
    last_ii.avg = -1.0

    while 1:
        pairs = read_seq_pairs(seq_reader, N)
        seq_pairs_read = len(pairs)
        if seq_pairs_read == 0:
            break
        bwsa = bwa.build_bws_array(pairs)
        bwa.cal_sa_reg_gap(0, bwts, seq_pairs_read, bwsa[0], gopt)
        bwa.cal_sa_reg_gap(0, bwts, seq_pairs_read, bwsa[1], gopt)
        cnt_chg = bwa.cal_pac_pos_pe(bwts, seq_pairs_read, bwsa, ii, popt,
                                     gopt, last_ii)
        sys.stderr.write('ii: %r\n' % [ii.avg, ii.std, ii.low, ii.high])
        bwa.paired_sw(bns, pacseq, seq_pairs_read, bwsa, popt, ii)
        bwa.refine_gapped(bns, seq_pairs_read, bwsa[0], pacseq)
        bwa.refine_gapped(bns, seq_pairs_read, bwsa[1], pacseq)
        analyze_seqs(gopt[0], bns, seq_pairs_read, bwsa)
        bwa.free_seq(N, bwsa[0])
        bwa.free_seq(N, bwsa[1])
示例#3
0
def run_bwa_py_sampe(refseq_fname, read_fname, mate_fname):
    read_flow = Bio.SeqIO.parse(open(read_fname), 'fastq-illumina')
    mate_flow = Bio.SeqIO.parse(open(mate_fname), 'fastq-illumina')
    pairs = [x for x in it.izip(read_flow, mate_flow)]
    print_meminfo("AFTER READING PAIRS")
    bwsa = bwa.build_bws_array(pairs)
    print_meminfo("AFTER BUILDING BWSA")
    bwts = bwa.restore_index(refseq_fname)
    print_meminfo("AFTER RESTORING INDEX")
    bnsp, pacseq = bwa.restore_reference(refseq_fname)
    print_meminfo("AFTER RESTORING REFERENCE")
    gopt, popt = bwa.gap_init_opt(), bwa.pe_init_opt()
    ii, last_ii = bwa.isize_info_t(), bwa.isize_info_t()
    last_ii.avg = -1.0
    l = len(pairs)
    print_meminfo("AFTER INIT OPT & II")
    bwa.cal_sa_reg_gap(0, bwts, l, bwsa[0], gopt)
    bwa.cal_sa_reg_gap(0, bwts, l, bwsa[1], gopt)
    print_meminfo("AFTER CAL_SA_REG_GAP")
    cnt_chg = bwa.cal_pac_pos_pe(bwts, l, bwsa, ii, popt, gopt, last_ii)
    print_meminfo("AFTER CAL_PAC_POS_PE")
    bwa.paired_sw(bnsp, pacseq, l, bwsa, popt, ii)
    print_meminfo("AFTER PAIRED_SW")
    bwa.refine_gapped(bnsp, l, bwsa[0], pacseq)
    bwa.refine_gapped(bnsp, l, bwsa[1], pacseq)
    print_meminfo("AFTER REFINE_GAPPED")
    for k in xrange(l):
        v1 = bwa.analyze_hit(gopt[0], bnsp, bwsa[0][k], bwsa[1][k])
        v2 = bwa.analyze_hit(gopt[0], bnsp, bwsa[1][k], bwsa[0][k])
    print_meminfo("AFTER ANALYZE_HIT")
    # deallocate seq & ref data
    for i in 0, 1:
        bwa.free_seq(l, bwsa[i])
        bwa.bwt_destroy(bwts[i])
    bwa.bns_destroy(bnsp)
    print_meminfo("AFTER DEALLOC")
    del pacseq
    n_unreachable = gc.collect()
    logging.debug("n_unreachable = %d" % n_unreachable)
    print_meminfo("AFTER DEL PACSEQ")
    del pairs
    n_unreachable = gc.collect()
    logging.debug("n_unreachable = %d" % n_unreachable)
    print_meminfo("AFTER DEL PAIRS")
示例#4
0
def run_bwa_py_sampe(refseq_fname, read_fname, mate_fname, seq_list_len=None):
    size_list = []
    resident_list = []
    failed_ii = 0

    read_flow = Bio.SeqIO.parse(open(read_fname), "fastq-illumina")
    mate_flow = Bio.SeqIO.parse(open(mate_fname), "fastq-illumina")

    # pairs = [x for x in it.izip(read_flow, mate_flow)]
    pairs_flow = it.izip(read_flow, mate_flow)

    while 1:
        pairs = list(it.islice(pairs_flow, 0, seq_list_len))
        if len(pairs) == 0:
            break
        size, resident = print_meminfo("AFTER READING PAIRS")
        size_list.append(size)
        resident_list.append(resident)

        bwsa = bwa.build_bws_array(pairs)
        size, resident = print_meminfo("AFTER BUILDING BWSA")
        size_list.append(size)
        resident_list.append(resident)

        bwts = bwa.restore_index(refseq_fname)
        size, resident = print_meminfo("AFTER RESTORING INDEX")
        size_list.append(size)
        resident_list.append(resident)

        bnsp, pacseq = bwa.restore_reference(refseq_fname)
        size, resident = print_meminfo("AFTER RESTORING REFERENCE")
        size_list.append(size)
        resident_list.append(resident)

        gopt, popt = bwa.gap_init_opt(), bwa.pe_init_opt()
        ii, last_ii = bwa.isize_info_t(), bwa.isize_info_t()
        last_ii.avg = -1.0
        l = len(pairs)
        size, resident = print_meminfo("AFTER INIT OPT & II")
        size_list.append(size)
        resident_list.append(resident)

        bwa.cal_sa_reg_gap(0, bwts, l, bwsa[0], gopt)
        bwa.cal_sa_reg_gap(0, bwts, l, bwsa[1], gopt)
        size, resident = print_meminfo("AFTER CAL_SA_REG_GAP")
        size_list.append(size)
        resident_list.append(resident)

        cnt_chg = bwa.cal_pac_pos_pe(bwts, l, bwsa, ii, popt, gopt, last_ii)
        size, resident = print_meminfo("AFTER CAL_PAC_POS_PE")
        size_list.append(size)
        resident_list.append(resident)

        # sys.stderr.write("ii=%f\n" % ii.avg)
        if ii.avg < 0.0:
            failed_ii += 1

        bwa.paired_sw(bnsp, pacseq, l, bwsa, popt, ii)
        size, resident = print_meminfo("AFTER PAIRED_SW")
        size_list.append(size)
        resident_list.append(resident)

        bwa.refine_gapped(bnsp, l, bwsa[0], pacseq)
        bwa.refine_gapped(bnsp, l, bwsa[1], pacseq)
        size, resident = print_meminfo("AFTER REFINE_GAPPED")
        size_list.append(size)
        resident_list.append(resident)

        for k in xrange(l):
            v1 = bwa.analyze_hit(gopt[0], bnsp, bwsa[0][k], bwsa[1][k])
            v2 = bwa.analyze_hit(gopt[0], bnsp, bwsa[1][k], bwsa[0][k])
        size, resident = print_meminfo("AFTER ANALYZE_HIT")
        size_list.append(size)
        resident_list.append(resident)

        # deallocate seq & ref data
        for i in 0, 1:
            bwa.free_seq(l, bwsa[i])
            bwa.bwt_destroy(bwts[i])
        bwa.bns_destroy(bnsp)
        size, resident = print_meminfo("AFTER DEALLOC")
        size_list.append(size)
        resident_list.append(resident)

        del pacseq
        n_unreachable = gc.collect()
        logging.debug("n_unreachable = %d" % n_unreachable)
        size, resident = print_meminfo("AFTER DEL PACSEQ")
        size_list.append(size)
        resident_list.append(resident)

        del pairs
        n_unreachable = gc.collect()
        logging.debug("n_unreachable = %d" % n_unreachable)
        size, resident = print_meminfo("AFTER DEL PAIRS")
        size_list.append(size)
        resident_list.append(resident)

    return max(size_list), max(resident_list), failed_ii
示例#5
0
def run_bwa_py_sampe(refseq_fname, read_fname, mate_fname, seq_list_len=None):
    size_list = []
    resident_list = []
    failed_ii = 0

    read_flow = Bio.SeqIO.parse(open(read_fname), 'fastq-illumina')
    mate_flow = Bio.SeqIO.parse(open(mate_fname), 'fastq-illumina')

    #pairs = [x for x in it.izip(read_flow, mate_flow)]
    pairs_flow = it.izip(read_flow, mate_flow)

    while 1:
        pairs = list(it.islice(pairs_flow, 0, seq_list_len))
        if len(pairs) == 0:
            break
        size, resident = print_meminfo("AFTER READING PAIRS")
        size_list.append(size)
        resident_list.append(resident)

        bwsa = bwa.build_bws_array(pairs)
        size, resident = print_meminfo("AFTER BUILDING BWSA")
        size_list.append(size)
        resident_list.append(resident)

        bwts = bwa.restore_index(refseq_fname)
        size, resident = print_meminfo("AFTER RESTORING INDEX")
        size_list.append(size)
        resident_list.append(resident)

        bnsp, pacseq = bwa.restore_reference(refseq_fname)
        size, resident = print_meminfo("AFTER RESTORING REFERENCE")
        size_list.append(size)
        resident_list.append(resident)

        gopt, popt = bwa.gap_init_opt(), bwa.pe_init_opt()
        ii, last_ii = bwa.isize_info_t(), bwa.isize_info_t()
        last_ii.avg = -1.0
        l = len(pairs)
        size, resident = print_meminfo("AFTER INIT OPT & II")
        size_list.append(size)
        resident_list.append(resident)

        bwa.cal_sa_reg_gap(0, bwts, l, bwsa[0], gopt)
        bwa.cal_sa_reg_gap(0, bwts, l, bwsa[1], gopt)
        size, resident = print_meminfo("AFTER CAL_SA_REG_GAP")
        size_list.append(size)
        resident_list.append(resident)

        cnt_chg = bwa.cal_pac_pos_pe(bwts, l, bwsa, ii, popt, gopt, last_ii)
        size, resident = print_meminfo("AFTER CAL_PAC_POS_PE")
        size_list.append(size)
        resident_list.append(resident)

        #sys.stderr.write("ii=%f\n" % ii.avg)
        if ii.avg < 0.0:
            failed_ii += 1

        bwa.paired_sw(bnsp, pacseq, l, bwsa, popt, ii)
        size, resident = print_meminfo("AFTER PAIRED_SW")
        size_list.append(size)
        resident_list.append(resident)

        bwa.refine_gapped(bnsp, l, bwsa[0], pacseq)
        bwa.refine_gapped(bnsp, l, bwsa[1], pacseq)
        size, resident = print_meminfo("AFTER REFINE_GAPPED")
        size_list.append(size)
        resident_list.append(resident)

        for k in xrange(l):
            v1 = bwa.analyze_hit(gopt[0], bnsp, bwsa[0][k], bwsa[1][k])
            v2 = bwa.analyze_hit(gopt[0], bnsp, bwsa[1][k], bwsa[0][k])
        size, resident = print_meminfo("AFTER ANALYZE_HIT")
        size_list.append(size)
        resident_list.append(resident)

        # deallocate seq & ref data
        for i in 0, 1:
            bwa.free_seq(l, bwsa[i])
            bwa.bwt_destroy(bwts[i])
        bwa.bns_destroy(bnsp)
        size, resident = print_meminfo("AFTER DEALLOC")
        size_list.append(size)
        resident_list.append(resident)

        del pacseq
        n_unreachable = gc.collect()
        logging.debug("n_unreachable = %d" % n_unreachable)
        size, resident = print_meminfo("AFTER DEL PACSEQ")
        size_list.append(size)
        resident_list.append(resident)

        del pairs
        n_unreachable = gc.collect()
        logging.debug("n_unreachable = %d" % n_unreachable)
        size, resident = print_meminfo("AFTER DEL PAIRS")
        size_list.append(size)
        resident_list.append(resident)

    return max(size_list), max(resident_list), failed_ii