#counts = {} for read in screed.open(sample_filename): misses = 0 miss_str = '' for i in range(len(read.sequence) - k + 1): kmer = read.sequence[i:i+k] #was_in = [] for i in range(0,len(bfs)): bf = bfs[i] if kmer in bf: misses += 0 miss_str += '1' #was_in.append(i) elif dna.reverse_complement(kmer) in bf: misses += 0 miss_str += '1' #was_in.append(i) else: misses += 1 miss_str += '0' if misses > 0 : print misses print miss_str # if str(was_in) in counts: # counts[str(was_in)] += 1 # else: # counts[str(was_in)] = 1
# error_state tracks the state of the read # 0 : no error yet # 1 : in the first error # 2 : past the first error # 3 : second error found error_state = [0] * len(bfs) # error_len tracks the length of the first error error_len = [0] * len(bfs) for i in range(len(read.sequence) - k + 1): kmer = read.sequence[i:i+k] for i in range(0,len(bfs)): # if the state is 3, then no need to check if error_state[i] != 3: bf = bfs[i] if (kmer in bf) or (dna.reverse_complement(kmer) in bf): # move out of the first error if error_state[i] == 1: error_state[i] = 2 else: # first error, switch states and up the length if error_state[i] == 0: error_state[i] = 1 error_len[i] += 1 # continuation of the first error, up the length elif error_state[i] == 1: error_len[i] += 1 #second error found elif error_state[i] == 2: error_state[i] = 3
with open(filename, 'rb') as fp: data = zlib.decompress(fp.read()) bf.tables = cPickle.loads(data) bfs.append(bf) buckets = [[0]*options.num_buckets for x in range(0,len(bfs))] step = float(1)/float(options.num_buckets); for read in screed.open(options.sample_filename): kmers_found = [0] * len(bfs) num_kmers = len(read.sequence) - k + 1 for i in range(num_kmers): kmer = read.sequence[i:i+k] for i in range(0,len(bfs)): bf = bfs[i] if kmer in bf or dna.reverse_complement(kmer) in bf: kmers_found[i] += 1 print kmers_found, for i in range(len(bfs)): bucket = \ min(9,int((float(kmers_found[i])/float(num_kmers))/float(step))) print bucket, buckets[i][bucket] += 1 print for bucket in buckets: print "\t".join(map(str, bucket)) #if str(was_in) in counts: #counts[str(was_in)] += 1 #else: