def _to_sorted_blocks(fin: io.BufferedIOBase, memory_size): while True: sorted_values = sorted(read_content(fin, memory_size)) if not sorted_values: break f = tmp_file() write_content(f, sorted_values) f.close() yield f
def _merge_blocks(tmp_files, fout: io.BufferedIOBase, memory_size: int): # let's make output buffer slightly larger # we can use 3 times `memory_size` for buffers buffer_size = 3 * memory_size // (len(tmp_files) + 2) for i, f in enumerate(tmp_files): f = open(f.name, 'a+b') f.seek(0) tmp_files[i] = f generators = [read_content(f, batch_size=buffer_size) for f in tmp_files] write_content(fout, heapq.merge(*generators), batch_size=2 * buffer_size) for f in tmp_files: f.close()
def merge_sort_stupid(fin: io.BufferedIOBase, fout: io.BufferedIOBase, memory_size: int, left=0, count=None): fout.seek(0) if count is None: count = content_length(fin, preserve_pos=False) if count <= memory_size: go_to_pos(fin, left) write_content(fout, sorted(read_content(fin, count=count)), batch_size=memory_size) return with tmp_file() as left_f, tmp_file() as right_f: merge_sort_stupid(fin, left_f, memory_size, left, count=count // 2) merge_sort_stupid(fin, right_f, memory_size, left + count // 2, count=count - count // 2) left_f.seek(0) right_f.seek(0) write_content(fout, heapq.merge(read_content(left_f, batch_size=memory_size // 2), read_content(right_f, batch_size=memory_size // 2)), batch_size=memory_size)
def _test_simple(self, values, sort_f, memory_size=None): with tmp_file() as input_file, tmp_file() as output_file: write_content(input_file, values) input_file.seek(0) sort_f(input_file, output_file, memory_size=self._memory_size if memory_size is None else memory_size) self._check_sorted(input_file, output_file)