if not found: if VERBOSE >= 1: print 'not filtered (probably no HIV reads)' continue frac_dist = 1.0 * n_distant / n_good if frac_dist < 0.01: if VERBOSE >= 1: print '< 1% of reads are distant' else: if VERBOSE >= 1: print '{:3.0%}'.format(frac_dist), 'of reads are distant' consrec = sample.get_consensus(fragment) bamfilename = get_mapped_filename(data_folder, adaID, fragment, filtered=False) (ds, edges, seqs) = fish_distant_reads(bamfilename, consrec, VERBOSE=VERBOSE, min_mismatches=min_mismatches, max_mismatches=max_mismatches, maxseqs=maxseqs) indrandom = np.arange(len(ds)) np.random.shuffle(indrandom) ds = ds[indrandom] edges = np.array(edges)[indrandom] seqs = [seqs[i] for i in indrandom] for irp, (dpair, edgepair, seqpair) in enumerate(izip(ds, edges, seqs)): # NOTE: Take only the most distant read of a pair