bysize_dir = os.path.join(outdir,'by_size/uni_len') bysize_done = os.path.join(outdir,'by_size.done') denovo_ref = os.path.join(outdir,'denovo.fa') if os.path.exists(denovo_ref): print >> sys.stderr, 'REMOVE REF: %s' % denovo_ref os.unlink(denovo_ref) if os.path.exists(bysize_done): ofbysize = get_uniqued_by_size(bysize_dir) else: all_quality = defaultdict(dict) for uniqued in uniqueds: load_uniqued(all_quality,uniqued,count_by_ind=True) print >> sys.stderr, 'LOAD COMPLETE. WRITE BY-SIZE.' ofbysize = write_uniqued_by_size(all_quality,bysize_dir) del all_quality ret = os.system('touch %s' % bysize_done) sizes = sorted(ofbysize.keys(),reverse=True) for i in sizes: print >> sys.stderr, '\nSTART %s' % i uni = ofbysize[i] ufq = uniqued_to_fastq(uni) nreads = get_read_count(ufq)
mm = 0 for c1, c2 in zip(s1, s2): if c1 != c2: mm += 1 if mm > dist: return True return False if __name__ == "__main__": seqlen, dist, uniqueds, outfile = sys.argv[1:] all_quality = defaultdict(dict) for u in uniqueds.split(","): rtd_run.load_uniqued(all_quality, u, count_by_ind=True) seq_by_len = defaultdict(list) for k in all_quality.keys(): seq_by_len[len(k)].append(k) seqs = seq_by_len[int(seqlen)] seqs.sort() offby = {} for i, s in enumerate(seqs): offby[s] = [si for si in seqs if si != s and not si in offby.keys() and not reject_pair(s, si, int(dist))] print >>sys.stderr, "\r%s / %s" % (i, len(seqs)), open(outfile, "w").write(offby.__repr__())
bysize_dir = os.path.join(outdir, 'by_size/uni_len') bysize_done = os.path.join(outdir, 'by_size.done') denovo_ref = os.path.join(outdir, 'denovo.fa') if os.path.exists(denovo_ref): print >> sys.stderr, 'REMOVE REF: %s' % denovo_ref os.unlink(denovo_ref) if os.path.exists(bysize_done): ofbysize = get_uniqued_by_size(bysize_dir) else: all_quality = defaultdict(dict) for uniqued in uniqueds: load_uniqued(all_quality, uniqued, count_by_ind=True) print >> sys.stderr, 'LOAD COMPLETE. WRITE BY-SIZE.' ofbysize = write_uniqued_by_size(all_quality, bysize_dir) del all_quality ret = os.system('touch %s' % bysize_done) sizes = sorted(ofbysize.keys(), reverse=True) for i in sizes: print >> sys.stderr, '\nSTART %s' % i uni = ofbysize[i] ufq = uniqued_to_fastq(uni) nreads = get_read_count(ufq)