def test_simple_int(self): with tempfile.TemporaryDirectory() as temp_dir: path = os.path.join(temp_dir, 'loss') writer = Writer(path, compression=None) for index in range(1000): writer.append_record(str(index).encode(), index={'subset': 'train', 'subtask': 'domain'}) for index in range(500): writer.append_record(str(index).encode(), index={'subset': 'val', 'subtask': 'domain'}) writer.close() writer = Writer(path, compression=None) for index in range(100): writer.append_record(str(index).encode(), index={'subset': 'train', 'subtask': 'domain'}) for index in range(100): writer.append_record(str(index).encode(), index={'subset': 'val', 'subtask': 'domain'}) writer.close() reader = Reader(path) for index in range(1000): assert index == int(reader.get(index, {'subset': 'train', 'subtask': 'domain'})) for index in range(1000, 1100): assert index - 1000 == int(reader.get(index, {'subset': 'train', 'subtask': 'domain'})) for index in range(500): assert index == int(reader.get(index, {'subset': 'val', 'subtask': 'domain'})) for index in range(500, 600): assert index - 500 == int(reader.get(index, {'subset': 'val', 'subtask': 'domain'})) reader.close()
def test_uncommitted_read_on_closed(self): with tempfile.TemporaryDirectory() as temp_dir: path = os.path.join(temp_dir, 'loss') writer = Writer(path, compression='gzip') length = 1000 for index in range(length): writer.append_record(str(index).encode()) writer.close() reader = Reader(path, uncommitted_bucket_visible=True) assert reader.get_records_num() == length for index in range(length): assert index == int(reader.get(index).decode()) reader.close()
def test_simple_int(self): with tempfile.TemporaryDirectory() as temp_dir: path = os.path.join(temp_dir, 'loss') writer = Writer(path, compression=None) length = 1000 for index in range(length): writer.append_record(str(index).encode()) writer.close() reader = Reader(path) assert reader.get_records_num() == length for index in range(length): assert index == int(reader.get(index).decode())
def test_simple_binary(self): with tempfile.TemporaryDirectory() as temp_dir: path = os.path.join(temp_dir, 'loss') writer = Writer(path, compression=None) length = 5000 for index in range(length): entry = (str(index) * index).encode() writer.append_record(entry) writer.close() reader = Reader(path) assert reader.get_records_num() == length for index in range(length): entry = (str(index) * index).encode() assert entry == reader.get(index)
def test_append_mode_binary(self): with tempfile.TemporaryDirectory() as temp_dir: path = os.path.join(temp_dir, 'loss') length = 1000 chunks = 5 chunk_len = length // chunks for chunk in range(chunks): writer = Writer(path, rewrite=False) for index in range(chunk * chunk_len, (chunk + 1) * chunk_len): entry = str(index).encode() writer.append_record(entry) writer.close() reader = Reader(path) assert reader.get_records_num() == length for index in range(length): entry = str(index).encode() assert entry == reader.get(index)
def test_write_mode_binary(self): with tempfile.TemporaryDirectory() as temp_dir: path = os.path.join(temp_dir, 'loss') length = 1000 writer = Writer(path, rewrite=True) for index in range(length // 2): writer.append_record(b'0') writer.close() writer = Writer(path, rewrite=True) for index in range(length // 2, length): entry = str(index).encode() writer.append_record(entry) writer.close() reader = Reader(path) assert reader.get_records_num() == length // 2 for index in range(length // 2, length): entry = str(index).encode() assert entry == reader.get(index - length // 2)