def test_dump_load(dtype, byteorder): dtype = np.dtype(dtype).newbyteorder(byteorder) flat = np.arange(5, dtype=np.int8) self = RaggedArray.from_lengths(flat, [2, 3, 0]) _byteorder = "big" if _big_endian(dtype) else "little" _bin_int = lambda x: int.to_bytes(x, dtype.itemsize, _byteorder) bin = self.dumps(ldtype=dtype) target = (_bin_int(2), flat[0:2].tobytes(), _bin_int(3), flat[2:5].tobytes(), _bin_int(0), b"") # yapf: disable # Convert to lists only to make the pytest traceback more readable. assert list(bin) == list(b"".join(target)) from rockhopper._ragged_array import slug assert slug.dll.count_rows(ptr(bin), len(bin), _2_power(dtype), _big_endian(dtype), flat.itemsize) == len(self) with pytest.raises(ValueError): RaggedArray.loads(bin.tobytes() + b"\x01", dtype=self.dtype, ldtype=dtype) parsed, consumed = RaggedArray.loads(bin, dtype=self.dtype, ldtype=dtype) assert np.array_equal(self.starts, parsed.starts) assert np.array_equal(self.ends, parsed.ends) assert np.array_equal(self.flat, parsed.flat) assert consumed == len(bin)
def test_implicit_bounds(): flat = np.random.random(10) bounds = [0, 3, 8, 8, 10] self = RaggedArray(flat, bounds) assert self.flat is flat assert np.all(self.starts == bounds[:-1]) assert np.all(self.ends == bounds[1:]) _test_get_row(self) assert RaggedArray(flat, bounds, dtype=np.float32).dtype == np.float32
def test_3d(): self = RaggedArray.from_nested([ [[0, 1, 2], [3, 4, 5]], [[6, 7, 8], [9, 10, 11]], [[12, 13, 14], [15, 16, 17], [18, 19, 20]], [], ]) assert np.array_equal(self.flat, np.arange(21).reshape((7, 3))) assert len(self) == 4 assert self.dtype == int assert self.itemshape == (3, ) assert self.itemsize == 3 * self.dtype.itemsize assert self[-1].shape == (0, 3) # This array is already packed so `repacked` should be an exact copy. repacked = self.repacked() assert np.array_equal(self.flat, repacked.flat) assert np.array_equal(self.starts, repacked.starts) assert np.array_equal(self.ends, repacked.ends) cuboidals = self.to_rectangular_arrays() assert len(cuboidals) == 3 assert cuboidals[0].shape == (2, 2, 3) assert cuboidals[1].shape == (1, 3, 3) assert cuboidals[2].shape == (1, 0, 3) flat = np.concatenate([i.reshape((-1, 3)) for i in cuboidals], axis=0) assert np.array_equal(flat, self.flat)
def test_byteswap(in_place): self = RaggedArray.from_nested(SIMPLE, dtype=np.uint16) swapped = self.byteswap(inplace=in_place) assert (self is swapped) is in_place assert np.shares_memory(self.flat, swapped.flat) is in_place assert swapped.dtype == np.uint16 assert swapped[0].tolist() == [0x0100, 0x0200]
def test_misc_exceptions(): self = RaggedArray.from_nested(NESTED) with pytest.raises(RequestMeError, match="A stepped columns index"): self[2, ::2] with pytest.raises(IndexError, match="Too many indices .* 2 but 3 "): self[0, 0, 0]
def test_from_nested(dtype): self = RaggedArray.from_nested(NESTED, dtype=dtype) assert len(self) == len(NESTED) assert np.array_equal([len(i) for i in NESTED], self.ends - self.starts) assert all(map(np.array_equal, self, NESTED)) if dtype is None: assert self.dtype == int else: assert self.dtype == dtype
def test_overflow(): """Test dumps() for large row lengths with too small row-length dtype.""" self = RaggedArray.from_lengths(np.arange(1000), [0, 150, 255, 256, 300]) with pytest.raises(OverflowError, match="Row 3 with length 256 is .* an uint8 integer."): self.dumps(ldtype=np.uint8) self.dumps(np.int16)
def test_from_ids(n, id_max): ids = np.random.randint(0, id_max, n) # -- Test ``sub_enumerate()`` -- counts, sub_ids = sub_enumerate(ids, id_max) starts = np.empty_like(counts) starts[0] = 0 counts[:-1].cumsum(out=starts[1:]) unique = starts[ids] + sub_ids # Doing the above should provide a unique location for each item. assert np.all(np.sort(unique) == np.arange(len(ids))) # -- Test ``RaggedArray.group_by()`` and ``RaggedArray.multi_from_ids()``-- # Cheat a bit by generating data to be grouped based on its group number. # This way, the ragged array can be validated simply by testing: # ragged[i] == f(i) # where f() is the made up function used to generate the data from ``ids``. # Create a basic ragged array with ``sqrt(ids)`` as its data. self = RaggedArray.group_by(np.sqrt(ids), ids, id_max) assert len(self) == id_max # Create 3 ragged arrays simultaneously with data ``ids``, ``ids * 2`` and # ``ids *3`` respectively. datas = ids, ids * 2, ids * 3 times_1, times_2, times_3 = RaggedArray.groups_by(ids, *datas, id_max=id_max) assert len(times_1) == len(times_2) == len(times_3) == id_max # Create a single 3D array using the same information as above. _3D = RaggedArray.group_by(np.array(datas).T, ids, id_max) assert len(_3D) == id_max assert _3D.itemshape == (3, ) # Check the contents of each. for i in range(id_max): assert np.all(self[i] == np.sqrt(i)) assert np.all(times_1[i] == i) assert np.all(times_2[i] == 2 * i) assert np.all(times_3[i] == 3 * i) assert np.all(_3D[i] == np.array([i, 2 * i, 3 * i]))
def test_check(): with pytest.raises(ValueError, match=r".* lengths .* \(5\) .* \(6\) do not match"): RaggedArray(np.empty(10), np.arange(5), np.arange(6)) with pytest.raises(ValueError, match=r"Row 2, .* flat\[5\] .* flat\[3\], .* \(-2\)"): RaggedArray(np.empty(10), [0, 2, 5, 1], [1, 2, 3, 3]) with pytest.raises(ValueError, match=r"Row 1, .* flat\[5\] .* flat\[4\], .* \(-1\)"): RaggedArray(np.empty(10), [0, 5, 4, 6, 7]) with pytest.raises(IndexError, match=r"starts\[2\] = -2 < 0"): RaggedArray(np.empty(10), [0, 1, -2, -3, 4], [1, 2, 3, 4, 5]) with pytest.raises(IndexError, match=r"ends\[3\] = 14 >= len\(flat\) = 10"): RaggedArray(np.empty(10), [0, 1, 2, 3, 4], [1, 2, 3, 14, 5])
def test_dump_byteorder(): self = RaggedArray.from_nested([[0x0109, 0x0208, 0x0307]], dtype=np.uint16) bin = list( self.astype(self.dtype.newbyteorder(">")).dumps(ldtype=np.uint8)) assert bin == [3, 1, 9, 2, 8, 3, 7] bin = list( self.astype(self.dtype.newbyteorder("<")).dumps(ldtype=np.uint8)) assert bin == [3, 9, 1, 8, 2, 7, 3]
def test_pickle(): self = RaggedArray.from_nested([ ["cake", "biscuits"], ["socks"], ["orange", "lemon", "pineapple"], ]) copied = pickle.loads(pickle.dumps(self)) assert np.array_equal(self.starts, copied.starts) assert np.array_equal(self.ends, copied.ends) assert np.array_equal(self.flat, copied.flat)
def test_repacked(): flat = np.random.random(10) starts, ends = zip([2, 4], [5, 5], [3, 8], [8, 10]) self = RaggedArray(flat, starts, ends) packed = self.repacked() assert len(packed) == len(self) assert np.array_equal(packed.ends - packed.starts, self.ends - self.starts) assert all(map(np.array_equal, self, packed)) assert np.array_equal(packed.starts[1:], packed.ends[:-1]) assert packed.starts[0] == 0 assert packed.ends[-1] == len(packed.flat)
def test_explicit_bounds(): flat = np.random.random(10) starts = [2, 4, 4, 9] ends = [4, 4, 8, 10] self = RaggedArray(flat, starts, ends) assert self.flat is flat assert np.all(self.starts == starts) assert np.all(self.ends == ends) _test_get_row(self) assert self.astype(np.float32).dtype == np.float32
def test_3d(): self = RaggedArray.from_nested([ [[0, 1, 2], [3, 4, 5]], [[6, 7, 8], [9, 10, 11]], [[12, 13, 14], [15, 16, 17], [18, 19, 20]], [], ], dtype=np.intc) # By using the same dtype as starts and ends (intc), it is safe (and far # easier to read) to think of the raw binary from ``self.dumps()`` as a # series of integers. target = [2, 0, 1, 2, 3, 4, 5, 2, 6, 7, 8, 9, 10, 11, 3, 12, 13, 14, 15, 16, 17, 18, 19, 20, 0] # yapf: disable assert np.frombuffer(self.dumps(), np.intc).tolist() == target parsed, _ = RaggedArray.loads(self.dumps(), dtype=np.dtype(np.intc) * 3) assert np.array_equal(self.starts, parsed.starts) assert np.array_equal(self.ends, parsed.ends) assert np.array_equal(self.flat, parsed.flat)
def test_3d(): self = RaggedArray.from_nested([ [[0, 1, 2], [3, 4, 5]], [[6, 7, 8], [9, 10, 11]], [[12, 13, 14], [15, 16, 17], [18, 19, 20]], [], ]) assert self[2, 1].tolist() == [15, 16, 17] assert self[2, 1, 2] == 17 assert self[[2, 0], [1, 1]].tolist() == [[15, 16, 17], [3, 4, 5]] assert self[[2, 0], [1, 1], [0, 1]].tolist() == [15, 4] with pytest.raises(RequestMeError, match="Returning ragged .* from >2D"): self[:3, :2, 0]
def test_slice_index(): """Test ragged[slice, number]""" self = RaggedArray.from_nested(BIG_NESTED) assert self[:, 0].tolist() == [0, 2, 3, 7, 8] assert self[:, -1].tolist() == [1, 2, 6, 7, 10] assert self[:2, 0].tolist() == [0, 2] assert self[7:, 0].tolist() == [] assert self[::2, 1].tolist() == [1, 4, 9] assert self[2::2, np.arange(2)].tolist() == [[3, 4], [8, 9]] assert self[2::2, np.arange(-1, 2)].tolist() == [[6, 3, 4], [10, 8, 9]] with pytest.raises(IndexError, match="Index -2 .* row 3 .* size 1"): self[2:, [0, -1, -2, 0]]
def test_rectangular(n): """Test :meth:`RaggedArray.to_rectangular_arrays()` on arrays of different sizes. """ np.random.seed(0) self = RaggedArray(np.arange(n), np.sort(np.random.randint(0, n, n))) lengths = np.array([len(i) for i in self]) out = self.to_rectangular_arrays() out_shapes = [i.shape for i in out] start = 0 for (count, length) in out_shapes: assert np.all(lengths[start:start + count] == length) start += count if len(self): assert np.array_equal(self.repacked().flat, np.concatenate(out, axis=None)) else: assert len(out) == 0
def test_index_index(): """Test ragged[number, number]""" self = RaggedArray.from_nested(NESTED) # Regular scalar lookup. assert self[0, 0] == 1 assert self[0, 1] == 2 assert self[(0, 1)] == 2 assert self[1, -1] == 5 assert self[-2, -3] == 1 # The various index out of bounds errors. # Make sure that the right numbers are reported in the error messages. with pytest.raises(IndexError, match="10 .* 2"): # This is a regular NumPy exception. self[10, 0] with pytest.raises(IndexError, match="Index 4 .* row 0 .* size 3"): self[0, 4] with pytest.raises(IndexError, match="Index 3 .* row 1 .* size 2"): self[1, 3] with pytest.raises(IndexError, match="Index -3 .* row 1 .* size 2"): self[1, -3] # Bulk scalar lookup. assert self[[1, 0], [1, 2]].tolist() == [5, 3] assert self[[0, 1, 0], [2, 0, 2]].tolist() == [3, 4, 3] with pytest.raises(IndexError, match="Index 2 .* row 1 .* size 2"): self[[0, 1, 0], [1, 2, 4]] assert self[0, [1, 2, 0]].tolist() == [2, 3, 1] assert self[[1, 0], 0].tolist() == [4, 1] assert self[[[1], [0]], [[0, 1]]].tolist() == [[4, 5], [1, 2]] with pytest.raises(IndexError, match="Index 2 .* row 1 .* size 2"): self[[0, 1], 2] with pytest.raises(IndexError, match="2 .* axis 0 .* size 2"): # This is a regular NumPy exception. self[[0, 2], 1]
def test_sorted_rectangular(n): """Test :meth:`RaggedArray.to_rectangular_arrays(reorder=True)`.""" np.random.seed(0) self = RaggedArray(np.arange(n), np.sort(np.random.randint(0, n, n))) args, out = self.to_rectangular_arrays(reorder=True) # The shapes of the arrays in ``out`` should be counts of rows in ``self`` # with a given row length. ``out_shapes`` should be a list of # ``(number_of_rows_of_length, row_length)`` pairs, sorted in ascending # order of ``row_length``. out_shapes = [i.shape for i in out] # Check that the above is true. counts = collections.Counter(len(i) for i in self) assert [i[::-1] for i in sorted(counts.items())] == out_shapes if len(args): # The flattened data should have been reordered but otherwise preserved. assert np.array_equal(self[args].repacked().flat, np.concatenate(out, axis=None)) else: # ``np.concatenate()`` requires at least one input. assert out == []
def test_group_by_input_normalisation_and_type_checking(): id_max = 20 # Generate random ``ids`` with at least one of each value. ids = np.append(np.random.randint(0, id_max, 30), np.arange(id_max)) np.random.shuffle(ids) data = np.random.random(ids.shape) explicit = RaggedArray.group_by(data, ids, id_max, check_ids=False) implicit = RaggedArray.group_by(data, ids) assert len(implicit) == len(explicit) == id_max assert np.all(implicit.starts == explicit.starts) with pytest.raises(IndexError): RaggedArray.group_by(data, ids, id_max - 1) with pytest.raises(IndexError): RaggedArray.group_by(data, ids - 1) shifted = RaggedArray.group_by(data, ids + 1) assert len(shifted) == id_max + 1 assert len(shifted[0]) == 0 assert np.all(shifted[1:].starts == explicit.starts)
def test_write_slice_index(): """Test ragged[slice, number] = x""" self = RaggedArray.from_nested(BIG_NESTED) self[:, 0] = [1, 2, 3, 4, 5] assert self[:, 0].tolist() == [1, 2, 3, 4, 5] assert self[:2, 0].tolist() == [1, 2] self[:2, -1] = 99 assert self[0, -1] == self[1, -1] == 99 # Neither of these should do anything because there is no row 7. self[7:, 0] = [] self[7:, 0] = 0 self[::2, 1] = [100, 101, 102] assert self[::2, 1].tolist() == [100, 101, 102] self[0::2, np.arange(-1, 2)] = [[51, 52, 53], [54, 55, 56], [57, 58, 59]] # self[0, 1] and self[0, -1] are the same so that cell gets written to # twice. It takes the most recently set value (53). All the rest are simply # what went in. assert self[::2, [-1, 0, 1]].tolist() == \ [[53, 52, 53], [54, 55, 56], [57, 58, 59]]
import pytest from rockhopper import RaggedArray, ragged_array pytestmark = pytest.mark.order(2) REPRs = [ """\ RaggedArray.from_nested([ [0, 1, 2, 3, 4], [5], [ 6, 7, 8, 9, 10, 11, 12, 13, 14], [15], [], ])""", """\ RaggedArray.from_nested([ [], [[4., 5., 6., 7.]], [[ 8., 9., 10., 11.], [12., 13., 14., 15.], [16., 17., 18., 19.]], [], [], ])""" ] @pytest.mark.parametrize("repr_", REPRs) def test_repr(repr_): self = eval(repr_) assert isinstance(self, RaggedArray)
def test_long_repr(): self = ragged_array(np.arange(1 << 12)[:, np.newaxis]) assert repr(self) == """\
def test_corruption(): """Invalid input should raise a deliberate :class:`ValueError`. Not a seg-fault.""" bin = np.array([2, 100, 101, 1, 102, 0], np.uint16).tobytes() # End halfway through the 1st length. with pytest.raises(ValueError, match="through a row"): RaggedArray.loads(bin[:1], np.uint16, ldtype=np.uint16) with pytest.raises(ValueError, match="leaves -1 bytes for the flat data"): RaggedArray.loads(bin[:1], np.uint16, ldtype=np.uint16, rows=1) assert len(RaggedArray.loads(bin[:1], None, rows=0)[0]) == 0 # End after the 1st row length but before the row data. with pytest.raises(ValueError, match="through a row"): RaggedArray.loads(bin[:2], np.uint16, ldtype=np.uint16) # Again but with rows specified. with pytest.raises(ValueError, match="Only 0 out of .* 1 rows were read."): RaggedArray.loads(bin[:2], np.uint16, ldtype=np.uint16, rows=1) # A full row of binary data - should work. RaggedArray.loads(bin[:6], ldtype=np.uint16, dtype=np.uint16) # But not of the user expects more rows. with pytest.raises(ValueError, match="Only 1 out of .* 2 rows were read."): RaggedArray.loads(bin[:6], ldtype=np.uint16, dtype=np.uint16, rows=2) # Be sure the empty last row doesn't get lost. ragged, consumed = RaggedArray.loads(bin, ldtype=np.uint16, dtype=np.uint16) assert len(ragged) == 3 assert consumed == len(bin) RaggedArray.loads(bin, ldtype=np.uint16, dtype=np.uint16, rows=3)
def test_empty(ldtype): self, consumed = RaggedArray.loads(b"", None, ldtype=ldtype) assert len(self) == 0 assert len(self.flat) == 0 assert consumed == 0
def test_from_lengths(): flat = np.arange(10) self = RaggedArray.from_lengths(flat, [2, 3, 0, 4]) assert self.flat is flat assert np.array_equal(self.starts, [0, 2, 5, 5]) assert np.array_equal(self.ends, [2, 5, 5, 9])
def test_number_slice(rows, columns): """Test ragged[number, slice]""" self = RaggedArray.from_nested(BIG_NESTED) target = [i[columns] for i in BIG_NESTED[rows]] assert self[rows, columns].tolist() == target
def test_write_number_slice(): self = RaggedArray.from_nested(BIG_NESTED) with pytest.raises(RequestMeError): self[:, :2] = 1
def test_too_big(): flat = np.empty(1 << 31, np.dtype([])) with pytest.raises(NotImplementedError, match="Flat lengths .*"): RaggedArray(flat, [])
def test_str(str_): nested = eval(re.sub(r"\s+", ",", re.sub(r"\[\s+", "[", str_))) self = RaggedArray.from_nested(nested) assert str(self) == str_