def test_skip_length(): data = NumpyIO(bytearray(2**21)) for num in [1, 63, 64, 64 * 127, 64 * 128, 63 * 128**2, 64 * 128**2]: block, _ = writer.make_definitions(np.zeros(num), True) data.seek(0, 0) core.skip_definition_bytes(data, num) assert len(block) == data.tell()
def test_skip_length(): class MockIO: loc = 0 for num in [1, 63, 64, 64*127, 64*128, 63*128**2, 64*128**2]: block, _ = writer.make_definitions(np.zeros(num), True) MockIO.loc = 0 core.skip_definition_bytes(MockIO, num) assert len(block) == MockIO.loc
def test_make_definitions_with_nulls(): for _ in range(10): out = np.empty(1000, dtype=np.int32) o = encoding.Numpy32(out) data = pd.Series(np.random.choice([True, None], size=np.random.randint(1, 1000))) out, d2 = writer.make_definitions(data, False) i = encoding.Numpy8(np.fromstring(out, dtype=np.uint8)) encoding.read_rle_bit_packed_hybrid(i, 1, length=None, o=o) out = o.so_far()[: len(data)] assert (out == ~data.isnull()).sum()
def test_make_definitions_with_nulls(): for _ in range(10): out = np.empty(1000, dtype=np.int32) o = cencoding.NumpyIO(out.view("uint8")) data = pd.Series(np.random.choice([True, None], size=np.random.randint(1, 1000))) defs, d2 = writer.make_definitions(data, False) buf = np.frombuffer(defs, dtype=np.uint8) i = cencoding.NumpyIO(buf) cencoding.read_rle_bit_packed_hybrid(i, 1, length=0, o=o) assert (out[:len(data)] == ~data.isnull()).sum()
def test_make_definitions_with_nulls(): for _ in range(10): out = np.empty(1000, dtype=np.int32) o = encoding.Numpy32(out) data = pd.Series( np.random.choice([True, None], size=np.random.randint(1, 1000))) out, d2 = writer.make_definitions(data, False) i = encoding.Numpy8(np.frombuffer(out, dtype=np.uint8)) encoding.read_rle_bit_packed_hybrid(i, 1, length=None, o=o) out = o.so_far()[:len(data)] assert (out == ~data.isnull()).sum()
def test_make_definitions_without_nulls(): for _ in range(100): out = np.empty(10000, dtype=np.int32) o = cencoding.NumpyIO(out.view("uint8")) data = pd.Series([True] * np.random.randint(1, 10000)) defs, d2 = writer.make_definitions(data, True) l = len(data) << 1 p = 1 while l > 127: l >>= 7 p += 1 assert len(defs) == 4 + p + 1 # "length", num_count, value i = cencoding.NumpyIO(np.frombuffer(defs, dtype=np.uint8)) cencoding.read_rle_bit_packed_hybrid(i, 1, length=0, o=o) assert (out[:o.tell() // 4] == ~data.isnull()).sum()
def test_make_definitions_without_nulls(): for _ in range(100): out = np.empty(10000, dtype=np.int32) o = encoding.Numpy32(out) data = pd.Series([True] * np.random.randint(1, 10000)) out, d2 = writer.make_definitions(data, True) l = len(data) << 1 p = 1 while l > 127: l >>= 7 p += 1 assert len(out) == 4 + p + 1 # "length", num_count, value i = encoding.Numpy8(np.frombuffer(out, dtype=np.uint8)) encoding.read_rle_bit_packed_hybrid(i, 1, length=None, o=o) out = o.so_far() assert (out == ~data.isnull()).sum()
def test_make_definitions_without_nulls(): for _ in range(100): out = np.empty(10000, dtype=np.int32) o = encoding.Numpy32(out) data = pd.Series([True] * np.random.randint(1, 10000)) out, d2 = writer.make_definitions(data, True) l = len(data) << 1 p = 1 while l > 127: l >>= 7 p += 1 assert len(out) == 4 + p + 1 # "length", num_count, value i = encoding.Numpy8(np.fromstring(out, dtype=np.uint8)) encoding.read_rle_bit_packed_hybrid(i, 1, length=None, o=o) out = o.so_far() assert (out == ~data.isnull()).sum()