示例#1
0
def _to_sorted_blocks(fin: io.BufferedIOBase, memory_size):
    while True:
        sorted_values = sorted(read_content(fin, memory_size))
        if not sorted_values:
            break

        f = tmp_file()
        write_content(f, sorted_values)
        f.close()
        yield f
示例#2
0
def _merge_blocks(tmp_files, fout: io.BufferedIOBase, memory_size: int):
    # let's make output buffer slightly larger
    # we can use 3 times `memory_size` for buffers
    buffer_size = 3 * memory_size // (len(tmp_files) + 2)
    for i, f in enumerate(tmp_files):
        f = open(f.name, 'a+b')
        f.seek(0)
        tmp_files[i] = f

    generators = [read_content(f, batch_size=buffer_size) for f in tmp_files]
    write_content(fout, heapq.merge(*generators), batch_size=2 * buffer_size)
    for f in tmp_files:
        f.close()
示例#3
0
def merge_sort_stupid(fin: io.BufferedIOBase, fout: io.BufferedIOBase, memory_size: int, left=0, count=None):
    fout.seek(0)
    if count is None:
        count = content_length(fin, preserve_pos=False)

    if count <= memory_size:
        go_to_pos(fin, left)
        write_content(fout, sorted(read_content(fin, count=count)), batch_size=memory_size)
        return

    with tmp_file() as left_f, tmp_file() as right_f:
        merge_sort_stupid(fin, left_f, memory_size, left, count=count // 2)
        merge_sort_stupid(fin, right_f, memory_size, left + count // 2, count=count - count // 2)
        left_f.seek(0)
        right_f.seek(0)
        write_content(fout, heapq.merge(read_content(left_f, batch_size=memory_size // 2),
                                        read_content(right_f, batch_size=memory_size // 2)),
                      batch_size=memory_size)
示例#4
0
 def _test_simple(self, values, sort_f, memory_size=None):
     with tmp_file() as input_file, tmp_file() as output_file:
         write_content(input_file, values)
         input_file.seek(0)
         sort_f(input_file, output_file, memory_size=self._memory_size if memory_size is None else memory_size)
         self._check_sorted(input_file, output_file)