def query_library_bytes_shm(genome, sequence, shm_genome_bytes): library_bytes = shm_genome_bytes query_bytes = np.array([ bytes_translation_dict.get(sequence[(j)*4:(j)*4+4],0) for j in range(5)], dtype=np.dtype("uint8")) mismatches_threshold = 5 f = library_bytes g = query_bytes h = np.zeros((len(library_bytes)/len(query_bytes),), dtype = np.dtype("uint32")) import bc n_matches = bc.striding_8bit_comparison(f,g,h,mismatches_threshold) matches_list = h[:n_matches] print "GENOME: {0}".format(genome) print "N MATCHES: ", len(matches_list) #return matches_list[:100] return matches_list
def query_library_bytes(nlines): print "starting query, loading lib" LIBRARY_BYTES_PATH = os.path.join(RD_DATAROOT,"{0}_bytes.npy".format(nlines)) with open(LIBRARY_BYTES_PATH) as f: library_bytes = np.load(f) print "loaded lib, setting up query bytes" tests = [] for e in re.compile(">", re.M).split(ltests.strip()): if not e: continue match = re.compile( "(?P<id>.*)\n(?P<guide>\S{20})\s*(?P<nrg>\S{3})",re.M).search(e) tests.append(match.groupdict()) query_bytes = np.array([bytes_translation_dict[tests[0]["guide"][(j)*4:(j)*4+4]] for j in range(5)], dtype=np.dtype("uint8")) threshold_mismatches = 4 bits_mismatch_threshold = threshold_mismatches f = library_bytes g = query_bytes h = np.zeros((len(library_bytes)/len(query_bytes),), dtype = np.dtype("uint32")) print "running comparison" import bc times = [utcnow()] n_matches = bc.striding_8bit_comparison(f,g,h,bits_mismatch_threshold) times+=[utcnow()] compare_time = times[1] - times[0] print "compared {0} matches in {1} ({2} microsec/ million)".format(len(library_bytes), compare_time,(compare_time.seconds * 1e6 + compare_time.microseconds)/(float(len(library_bytes)/1e6)) ) matches_list = h[:n_matches] print "done comparing, computing NZ elts" #matches = np.nonzero(h)[0] print "python, n_matches: {0}".format(n_matches) print "first match: {0}".format(matches_list[0])
def query_library_bytes_shm(genome, sequence, shm_genome_bytes): library_bytes = shm_genome_bytes query_bytes = np.array([ bytes_translation_dict.get(sequence[(j) * 4:(j) * 4 + 4], 0) for j in range(5) ], dtype=np.dtype("uint8")) mismatches_threshold = 5 f = library_bytes g = query_bytes h = np.zeros((len(library_bytes) / len(query_bytes), ), dtype=np.dtype("uint32")) import bc n_matches = bc.striding_8bit_comparison(f, g, h, mismatches_threshold) matches_list = h[:n_matches] print "GENOME: {0}".format(genome) print "N MATCHES: ", len(matches_list) #return matches_list[:100] return matches_list