Пример #1
0
def search_cuda_fast(search_filename):
    BLOCK_SIZE = 100000000
    GRID_SIZE = 5000

    block_search = cuda_block_search()

    patterns, pattern_starts = encode_strings(
        [bytes(string, 'utf-8') for string in searched_words()])
    patterns_gpu = gpuarray.to_gpu(patterns)
    pattern_starts_gpu = gpuarray.to_gpu(pattern_starts)

    matches = np.zeros(GRID_SIZE,
                       dtype=np.int32)  # Must be higher than max id in kernel
    matches_gpu = gpuarray.to_gpu(matches)

    matches_count = 0
    keyword_count = len(searched_words())

    with open(search_filename, "rb") as search_file:
        while True:
            block = search_file.read(BLOCK_SIZE)
            if not block:
                break

            last_newline = block.rfind(b'\n')
            final_block = block
            final_block_gpu = gpuarray.to_gpu(np.array(final_block))

            if len(block) == BLOCK_SIZE:
                search_file.seek(last_newline - len(block), 1)

            block_search(final_block_gpu,
                         np.int32(last_newline),
                         patterns_gpu,
                         pattern_starts_gpu,
                         np.int32(len(searched_words())),
                         np.int32(BLOCK_SIZE / GRID_SIZE),
                         matches_gpu,
                         block=(keyword_count, 1, 1),
                         grid=(GRID_SIZE, 1))

    result = pycuda.gpuarray.sum(matches_gpu).get()

    print("File done")

    return result
Пример #2
0
def search_file(search_filename):
    keywords = searched_words()
    matches = 0
    with open(search_filename, "r") as search_file:
        for line in tqdm.tqdm(search_file):
            matches += int(is_match(line, keywords))

    return matches
Пример #3
0
def search_file(search_filename):
    keywords = searched_words()
    regex = re.compile('|'.join(keywords), re.UNICODE)

    matches = 0
    with open(search_filename, "r") as search_file:
        for line in tqdm.tqdm(search_file):
            matches += int(is_match_regex(line, regex))

    return matches
Пример #4
0
def search_cuda_fast(search_filename):
    BLOCK_SIZE = 100000000
    GRID_SIZE = 5000

    patterns, pattern_starts = encode_strings(
        [bytes(string, 'utf-8') for string in searched_words()])
    pattern_gpu = cuda.to_device(patterns)
    pattern_starts_gpu = cuda.to_device(pattern_starts)

    matches = np.zeros(GRID_SIZE, dtype=np.int32)
    matches_gpu = cuda.to_device(matches)

    keyword_count = len(searched_words())

    with open(search_filename, "rb") as search_file:
        while True:
            block = search_file.read(BLOCK_SIZE)
            if not block:
                break

            last_newline = block.rfind(b'\n')
            final_block = block
            final_block_gpu = cuda.to_device(
                np.frombuffer(final_block, dtype=np.ubyte))

            if len(block) == BLOCK_SIZE:
                search_file.seek(last_newline - len(block), 1)

            block_search[GRID_SIZE, keyword_count](
                final_block_gpu,
                cuda.to_device(np.array([last_newline], dtype=np.int32)),
                pattern_gpu, pattern_starts_gpu,
                cuda.to_device(np.array([keyword_count], dtype=np.int32)),
                cuda.to_device(
                    np.array([BLOCK_SIZE / GRID_SIZE],
                             dtype=np.int32)), matches_gpu)

    result_array = matches_gpu.copy_to_host()
    result = result_array.sum()

    print("File done")

    return result
Пример #5
0
def search_file(search_filename):
    keywords = searched_words()
    flashtext_processor = KeywordProcessor()
    for keyword in keywords:
        flashtext_processor.add_keyword(keyword)
 
    matches = 0
    with open(search_filename, "r") as search_file:
        for line in tqdm.tqdm(search_file):
            matches += int(is_match_flashtext(line, flashtext_processor))

    return matches
def searching_worker(text_queue, file_count):
    global files_read
    global global_matches
    keywords = searched_words()
    matches = 0
    while files_read != file_count:  # if some files are still not read till the end, continue
        while True:
            try:
                batch = text_queue.get(True, 5.0)
            except queue.Empty as e:
                break

            for text in batch:
                if is_match(text, keywords):
                    matches += 1

    with lock:
        global_matches += matches
Пример #7
0
def searching_process(text_queue, matches_shared, files_read, file_count):
    keywords = searched_words()
    matches = 0
    while files_read.value != file_count:
        while True:
            try:
                batch = text_queue.get(True, 5.0)
            except queue.Empty as e:
                break

            for text in batch:
                if is_match(text, keywords):
                    matches += 1

    with matches_shared.get_lock():
        matches_shared.value += matches

    print("Searching process finished")