Пример #1
0
def query_library_bytes_shm(genome, sequence, shm_genome_bytes):
    library_bytes = shm_genome_bytes
    query_bytes = np.array([ bytes_translation_dict.get(sequence[(j)*4:(j)*4+4],0) 
                           for j in range(5)],
                           dtype=np.dtype("uint8"))
    mismatches_threshold = 5

    f = library_bytes
    g = query_bytes
    h = np.zeros((len(library_bytes)/len(query_bytes),), dtype = np.dtype("uint32"))

    import bc
    n_matches = bc.striding_8bit_comparison(f,g,h,mismatches_threshold)

    matches_list = h[:n_matches]
    print "GENOME: {0}".format(genome)
    print "N MATCHES: ", len(matches_list)

    #return matches_list[:100]
    return matches_list
Пример #2
0
def query_library_bytes(nlines):  
    print "starting query, loading lib"
    LIBRARY_BYTES_PATH =  os.path.join(RD_DATAROOT,"{0}_bytes.npy".format(nlines))

    with open(LIBRARY_BYTES_PATH) as f:
        library_bytes = np.load(f)

    print "loaded lib, setting up query bytes"
    tests = []
    for e in re.compile(">", re.M).split(ltests.strip()):
        if not e: continue
        match = re.compile( "(?P<id>.*)\n(?P<guide>\S{20})\s*(?P<nrg>\S{3})",re.M).search(e)
        tests.append(match.groupdict())

        
    query_bytes = np.array([bytes_translation_dict[tests[0]["guide"][(j)*4:(j)*4+4]] 
                            for j in range(5)],
                            dtype=np.dtype("uint8"))
    threshold_mismatches = 4
    bits_mismatch_threshold = threshold_mismatches

    f = library_bytes
    g = query_bytes
    h = np.zeros((len(library_bytes)/len(query_bytes),), dtype = np.dtype("uint32"))

    print "running comparison"
    import bc

    times = [utcnow()]
    n_matches = bc.striding_8bit_comparison(f,g,h,bits_mismatch_threshold)
    times+=[utcnow()]
    compare_time = times[1] - times[0]
    print "compared {0} matches in {1} ({2} microsec/ million)".format(len(library_bytes), compare_time,(compare_time.seconds * 1e6 + compare_time.microseconds)/(float(len(library_bytes)/1e6)) )
    matches_list = h[:n_matches]
    
    print "done comparing, computing NZ elts"
    #matches = np.nonzero(h)[0]
    print "python, n_matches: {0}".format(n_matches)
    print "first match: {0}".format(matches_list[0])
Пример #3
0
def query_library_bytes_shm(genome, sequence, shm_genome_bytes):
    library_bytes = shm_genome_bytes
    query_bytes = np.array([
        bytes_translation_dict.get(sequence[(j) * 4:(j) * 4 + 4], 0)
        for j in range(5)
    ],
                           dtype=np.dtype("uint8"))
    mismatches_threshold = 5

    f = library_bytes
    g = query_bytes
    h = np.zeros((len(library_bytes) / len(query_bytes), ),
                 dtype=np.dtype("uint32"))

    import bc
    n_matches = bc.striding_8bit_comparison(f, g, h, mismatches_threshold)

    matches_list = h[:n_matches]
    print "GENOME: {0}".format(genome)
    print "N MATCHES: ", len(matches_list)

    #return matches_list[:100]
    return matches_list