Пример #1
0
def test_dump_load(dtype, byteorder):
    dtype = np.dtype(dtype).newbyteorder(byteorder)
    flat = np.arange(5, dtype=np.int8)
    self = RaggedArray.from_lengths(flat, [2, 3, 0])

    _byteorder = "big" if _big_endian(dtype) else "little"
    _bin_int = lambda x: int.to_bytes(x, dtype.itemsize, _byteorder)

    bin = self.dumps(ldtype=dtype)
    target = (_bin_int(2), flat[0:2].tobytes(),
              _bin_int(3), flat[2:5].tobytes(),
              _bin_int(0), b"")  # yapf: disable

    # Convert to lists only to make the pytest traceback more readable.
    assert list(bin) == list(b"".join(target))

    from rockhopper._ragged_array import slug
    assert slug.dll.count_rows(ptr(bin), len(bin), _2_power(dtype),
                               _big_endian(dtype), flat.itemsize) == len(self)

    with pytest.raises(ValueError):
        RaggedArray.loads(bin.tobytes() + b"\x01",
                          dtype=self.dtype,
                          ldtype=dtype)

    parsed, consumed = RaggedArray.loads(bin, dtype=self.dtype, ldtype=dtype)
    assert np.array_equal(self.starts, parsed.starts)
    assert np.array_equal(self.ends, parsed.ends)
    assert np.array_equal(self.flat, parsed.flat)
    assert consumed == len(bin)
Пример #2
0
def test_implicit_bounds():
    flat = np.random.random(10)
    bounds = [0, 3, 8, 8, 10]

    self = RaggedArray(flat, bounds)
    assert self.flat is flat
    assert np.all(self.starts == bounds[:-1])
    assert np.all(self.ends == bounds[1:])

    _test_get_row(self)
    assert RaggedArray(flat, bounds, dtype=np.float32).dtype == np.float32
Пример #3
0
def test_3d():
    self = RaggedArray.from_nested([
        [[0, 1, 2], [3, 4, 5]],
        [[6, 7, 8], [9, 10, 11]],
        [[12, 13, 14], [15, 16, 17], [18, 19, 20]],
        [],
    ])
    assert np.array_equal(self.flat, np.arange(21).reshape((7, 3)))
    assert len(self) == 4
    assert self.dtype == int
    assert self.itemshape == (3, )
    assert self.itemsize == 3 * self.dtype.itemsize
    assert self[-1].shape == (0, 3)

    # This array is already packed so `repacked` should be an exact copy.
    repacked = self.repacked()
    assert np.array_equal(self.flat, repacked.flat)
    assert np.array_equal(self.starts, repacked.starts)
    assert np.array_equal(self.ends, repacked.ends)

    cuboidals = self.to_rectangular_arrays()
    assert len(cuboidals) == 3
    assert cuboidals[0].shape == (2, 2, 3)
    assert cuboidals[1].shape == (1, 3, 3)
    assert cuboidals[2].shape == (1, 0, 3)
    flat = np.concatenate([i.reshape((-1, 3)) for i in cuboidals], axis=0)
    assert np.array_equal(flat, self.flat)
Пример #4
0
def test_byteswap(in_place):
    self = RaggedArray.from_nested(SIMPLE, dtype=np.uint16)

    swapped = self.byteswap(inplace=in_place)
    assert (self is swapped) is in_place
    assert np.shares_memory(self.flat, swapped.flat) is in_place
    assert swapped.dtype == np.uint16
    assert swapped[0].tolist() == [0x0100, 0x0200]
Пример #5
0
def test_misc_exceptions():
    self = RaggedArray.from_nested(NESTED)

    with pytest.raises(RequestMeError, match="A stepped columns index"):
        self[2, ::2]

    with pytest.raises(IndexError, match="Too many indices .* 2 but 3 "):
        self[0, 0, 0]
Пример #6
0
def test_from_nested(dtype):
    self = RaggedArray.from_nested(NESTED, dtype=dtype)
    assert len(self) == len(NESTED)
    assert np.array_equal([len(i) for i in NESTED], self.ends - self.starts)
    assert all(map(np.array_equal, self, NESTED))
    if dtype is None:
        assert self.dtype == int
    else:
        assert self.dtype == dtype
Пример #7
0
def test_overflow():
    """Test dumps() for large row lengths with too small row-length dtype."""

    self = RaggedArray.from_lengths(np.arange(1000), [0, 150, 255, 256, 300])
    with pytest.raises(OverflowError,
                       match="Row 3 with length 256 is .* an uint8 integer."):
        self.dumps(ldtype=np.uint8)

    self.dumps(np.int16)
Пример #8
0
def test_from_ids(n, id_max):
    ids = np.random.randint(0, id_max, n)

    # -- Test ``sub_enumerate()`` --
    counts, sub_ids = sub_enumerate(ids, id_max)
    starts = np.empty_like(counts)
    starts[0] = 0
    counts[:-1].cumsum(out=starts[1:])
    unique = starts[ids] + sub_ids
    # Doing the above should provide a unique location for each item.
    assert np.all(np.sort(unique) == np.arange(len(ids)))

    # -- Test ``RaggedArray.group_by()`` and ``RaggedArray.multi_from_ids()``--
    # Cheat a bit by generating data to be grouped based on its group number.
    # This way, the ragged array can be validated simply by testing:
    #    ragged[i] == f(i)
    # where f() is the made up function used to generate the data from ``ids``.

    # Create a basic ragged array with ``sqrt(ids)`` as its data.
    self = RaggedArray.group_by(np.sqrt(ids), ids, id_max)
    assert len(self) == id_max

    # Create 3 ragged arrays simultaneously with data ``ids``, ``ids * 2`` and
    # ``ids *3`` respectively.
    datas = ids, ids * 2, ids * 3
    times_1, times_2, times_3 = RaggedArray.groups_by(ids,
                                                      *datas,
                                                      id_max=id_max)
    assert len(times_1) == len(times_2) == len(times_3) == id_max

    # Create a single 3D array using the same information as above.
    _3D = RaggedArray.group_by(np.array(datas).T, ids, id_max)
    assert len(_3D) == id_max
    assert _3D.itemshape == (3, )

    # Check the contents of each.
    for i in range(id_max):
        assert np.all(self[i] == np.sqrt(i))

        assert np.all(times_1[i] == i)
        assert np.all(times_2[i] == 2 * i)
        assert np.all(times_3[i] == 3 * i)

        assert np.all(_3D[i] == np.array([i, 2 * i, 3 * i]))
Пример #9
0
def test_check():
    with pytest.raises(ValueError,
                       match=r".* lengths .* \(5\) .* \(6\) do not match"):
        RaggedArray(np.empty(10), np.arange(5), np.arange(6))

    with pytest.raises(ValueError,
                       match=r"Row 2, .* flat\[5\] .* flat\[3\], .* \(-2\)"):
        RaggedArray(np.empty(10), [0, 2, 5, 1], [1, 2, 3, 3])

    with pytest.raises(ValueError,
                       match=r"Row 1, .* flat\[5\] .* flat\[4\], .* \(-1\)"):
        RaggedArray(np.empty(10), [0, 5, 4, 6, 7])

    with pytest.raises(IndexError, match=r"starts\[2\] = -2 < 0"):
        RaggedArray(np.empty(10), [0, 1, -2, -3, 4], [1, 2, 3, 4, 5])

    with pytest.raises(IndexError,
                       match=r"ends\[3\] = 14 >= len\(flat\) = 10"):
        RaggedArray(np.empty(10), [0, 1, 2, 3, 4], [1, 2, 3, 14, 5])
Пример #10
0
def test_dump_byteorder():
    self = RaggedArray.from_nested([[0x0109, 0x0208, 0x0307]], dtype=np.uint16)

    bin = list(
        self.astype(self.dtype.newbyteorder(">")).dumps(ldtype=np.uint8))
    assert bin == [3, 1, 9, 2, 8, 3, 7]

    bin = list(
        self.astype(self.dtype.newbyteorder("<")).dumps(ldtype=np.uint8))
    assert bin == [3, 9, 1, 8, 2, 7, 3]
Пример #11
0
def test_pickle():
    self = RaggedArray.from_nested([
        ["cake", "biscuits"],
        ["socks"],
        ["orange", "lemon", "pineapple"],
    ])

    copied = pickle.loads(pickle.dumps(self))
    assert np.array_equal(self.starts, copied.starts)
    assert np.array_equal(self.ends, copied.ends)
    assert np.array_equal(self.flat, copied.flat)
Пример #12
0
def test_repacked():
    flat = np.random.random(10)
    starts, ends = zip([2, 4], [5, 5], [3, 8], [8, 10])

    self = RaggedArray(flat, starts, ends)
    packed = self.repacked()

    assert len(packed) == len(self)
    assert np.array_equal(packed.ends - packed.starts, self.ends - self.starts)
    assert all(map(np.array_equal, self, packed))
    assert np.array_equal(packed.starts[1:], packed.ends[:-1])
    assert packed.starts[0] == 0
    assert packed.ends[-1] == len(packed.flat)
Пример #13
0
def test_explicit_bounds():
    flat = np.random.random(10)
    starts = [2, 4, 4, 9]
    ends = [4, 4, 8, 10]

    self = RaggedArray(flat, starts, ends)
    assert self.flat is flat
    assert np.all(self.starts == starts)
    assert np.all(self.ends == ends)

    _test_get_row(self)

    assert self.astype(np.float32).dtype == np.float32
Пример #14
0
def test_3d():
    self = RaggedArray.from_nested([
        [[0, 1, 2], [3, 4, 5]],
        [[6, 7, 8], [9, 10, 11]],
        [[12, 13, 14], [15, 16, 17], [18, 19, 20]],
        [],
    ],
                                   dtype=np.intc)

    # By using the same dtype as starts and ends (intc), it is safe (and far
    # easier to read) to think of the raw binary from ``self.dumps()`` as a
    # series of integers.

    target = [2, 0, 1, 2, 3, 4, 5,
              2, 6, 7, 8, 9, 10, 11,
              3, 12, 13, 14, 15, 16, 17, 18, 19, 20,
              0]  # yapf: disable

    assert np.frombuffer(self.dumps(), np.intc).tolist() == target

    parsed, _ = RaggedArray.loads(self.dumps(), dtype=np.dtype(np.intc) * 3)
    assert np.array_equal(self.starts, parsed.starts)
    assert np.array_equal(self.ends, parsed.ends)
    assert np.array_equal(self.flat, parsed.flat)
Пример #15
0
def test_3d():
    self = RaggedArray.from_nested([
        [[0, 1, 2], [3, 4, 5]],
        [[6, 7, 8], [9, 10, 11]],
        [[12, 13, 14], [15, 16, 17], [18, 19, 20]],
        [],
    ])

    assert self[2, 1].tolist() == [15, 16, 17]
    assert self[2, 1, 2] == 17

    assert self[[2, 0], [1, 1]].tolist() == [[15, 16, 17], [3, 4, 5]]
    assert self[[2, 0], [1, 1], [0, 1]].tolist() == [15, 4]

    with pytest.raises(RequestMeError, match="Returning ragged .* from >2D"):
        self[:3, :2, 0]
Пример #16
0
def test_slice_index():
    """Test ragged[slice, number]"""
    self = RaggedArray.from_nested(BIG_NESTED)

    assert self[:, 0].tolist() == [0, 2, 3, 7, 8]
    assert self[:, -1].tolist() == [1, 2, 6, 7, 10]

    assert self[:2, 0].tolist() == [0, 2]
    assert self[7:, 0].tolist() == []
    assert self[::2, 1].tolist() == [1, 4, 9]

    assert self[2::2, np.arange(2)].tolist() == [[3, 4], [8, 9]]
    assert self[2::2, np.arange(-1, 2)].tolist() == [[6, 3, 4], [10, 8, 9]]

    with pytest.raises(IndexError, match="Index -2 .* row 3 .* size 1"):
        self[2:, [0, -1, -2, 0]]
Пример #17
0
def test_rectangular(n):
    """Test :meth:`RaggedArray.to_rectangular_arrays()` on arrays of different
    sizes.
    """
    np.random.seed(0)
    self = RaggedArray(np.arange(n), np.sort(np.random.randint(0, n, n)))
    lengths = np.array([len(i) for i in self])

    out = self.to_rectangular_arrays()
    out_shapes = [i.shape for i in out]

    start = 0
    for (count, length) in out_shapes:
        assert np.all(lengths[start:start + count] == length)
        start += count

    if len(self):
        assert np.array_equal(self.repacked().flat,
                              np.concatenate(out, axis=None))
    else:
        assert len(out) == 0
Пример #18
0
def test_index_index():
    """Test ragged[number, number]"""
    self = RaggedArray.from_nested(NESTED)

    # Regular scalar lookup.
    assert self[0, 0] == 1
    assert self[0, 1] == 2
    assert self[(0, 1)] == 2
    assert self[1, -1] == 5
    assert self[-2, -3] == 1

    # The various index out of bounds errors.
    # Make sure that the right numbers are reported in the error messages.
    with pytest.raises(IndexError, match="10 .* 2"):
        # This is a regular NumPy exception.
        self[10, 0]
    with pytest.raises(IndexError, match="Index 4 .* row 0 .* size 3"):
        self[0, 4]
    with pytest.raises(IndexError, match="Index 3 .* row 1 .* size 2"):
        self[1, 3]
    with pytest.raises(IndexError, match="Index -3 .* row 1 .* size 2"):
        self[1, -3]

    # Bulk scalar lookup.
    assert self[[1, 0], [1, 2]].tolist() == [5, 3]
    assert self[[0, 1, 0], [2, 0, 2]].tolist() == [3, 4, 3]

    with pytest.raises(IndexError, match="Index 2 .* row 1 .* size 2"):
        self[[0, 1, 0], [1, 2, 4]]

    assert self[0, [1, 2, 0]].tolist() == [2, 3, 1]
    assert self[[1, 0], 0].tolist() == [4, 1]
    assert self[[[1], [0]], [[0, 1]]].tolist() == [[4, 5], [1, 2]]

    with pytest.raises(IndexError, match="Index 2 .* row 1 .* size 2"):
        self[[0, 1], 2]
    with pytest.raises(IndexError, match="2 .* axis 0 .* size 2"):
        # This is a regular NumPy exception.
        self[[0, 2], 1]
Пример #19
0
def test_sorted_rectangular(n):
    """Test :meth:`RaggedArray.to_rectangular_arrays(reorder=True)`."""
    np.random.seed(0)
    self = RaggedArray(np.arange(n), np.sort(np.random.randint(0, n, n)))
    args, out = self.to_rectangular_arrays(reorder=True)

    # The shapes of the arrays in ``out`` should be counts of rows in ``self``
    # with a given row length. ``out_shapes`` should be a list of
    # ``(number_of_rows_of_length, row_length)`` pairs, sorted in ascending
    # order of ``row_length``.
    out_shapes = [i.shape for i in out]
    # Check that the above is true.
    counts = collections.Counter(len(i) for i in self)
    assert [i[::-1] for i in sorted(counts.items())] == out_shapes

    if len(args):
        # The flattened data should have been reordered but otherwise preserved.
        assert np.array_equal(self[args].repacked().flat,
                              np.concatenate(out, axis=None))
    else:
        # ``np.concatenate()`` requires at least one input.
        assert out == []
Пример #20
0
def test_group_by_input_normalisation_and_type_checking():
    id_max = 20
    # Generate random ``ids`` with at least one of each value.
    ids = np.append(np.random.randint(0, id_max, 30), np.arange(id_max))
    np.random.shuffle(ids)
    data = np.random.random(ids.shape)

    explicit = RaggedArray.group_by(data, ids, id_max, check_ids=False)
    implicit = RaggedArray.group_by(data, ids)

    assert len(implicit) == len(explicit) == id_max
    assert np.all(implicit.starts == explicit.starts)

    with pytest.raises(IndexError):
        RaggedArray.group_by(data, ids, id_max - 1)
    with pytest.raises(IndexError):
        RaggedArray.group_by(data, ids - 1)
    shifted = RaggedArray.group_by(data, ids + 1)
    assert len(shifted) == id_max + 1
    assert len(shifted[0]) == 0
    assert np.all(shifted[1:].starts == explicit.starts)
Пример #21
0
def test_write_slice_index():
    """Test ragged[slice, number] = x"""
    self = RaggedArray.from_nested(BIG_NESTED)

    self[:, 0] = [1, 2, 3, 4, 5]
    assert self[:, 0].tolist() == [1, 2, 3, 4, 5]
    assert self[:2, 0].tolist() == [1, 2]

    self[:2, -1] = 99
    assert self[0, -1] == self[1, -1] == 99

    # Neither of these should do anything because there is no row 7.
    self[7:, 0] = []
    self[7:, 0] = 0

    self[::2, 1] = [100, 101, 102]
    assert self[::2, 1].tolist() == [100, 101, 102]

    self[0::2, np.arange(-1, 2)] = [[51, 52, 53], [54, 55, 56], [57, 58, 59]]
    # self[0, 1] and self[0, -1] are the same so that cell gets written to
    # twice. It takes the most recently set value (53). All the rest are simply
    # what went in.
    assert self[::2, [-1, 0, 1]].tolist() == \
           [[53, 52, 53], [54, 55, 56], [57, 58, 59]]
Пример #22
0
import pytest

from rockhopper import RaggedArray, ragged_array

pytestmark = pytest.mark.order(2)

REPRs = [
    """\
RaggedArray.from_nested([
    [0, 1, 2, 3, 4],
    [5],
    [ 6,  7,  8,  9, 10, 11, 12, 13, 14],
    [15],
    [],
])""", """\
RaggedArray.from_nested([
    [],
    [[4., 5., 6., 7.]],
    [[ 8.,  9., 10., 11.],
     [12., 13., 14., 15.],
     [16., 17., 18., 19.]],
    [],
    [],
])"""
]


@pytest.mark.parametrize("repr_", REPRs)
def test_repr(repr_):
    self = eval(repr_)
    assert isinstance(self, RaggedArray)
Пример #23
0
def test_long_repr():
    self = ragged_array(np.arange(1 << 12)[:, np.newaxis])
    assert repr(self) == """\
Пример #24
0
def test_corruption():
    """Invalid input should raise a deliberate :class:`ValueError`. Not a
    seg-fault."""

    bin = np.array([2, 100, 101, 1, 102, 0], np.uint16).tobytes()

    # End halfway through the 1st length.
    with pytest.raises(ValueError, match="through a row"):
        RaggedArray.loads(bin[:1], np.uint16, ldtype=np.uint16)
    with pytest.raises(ValueError, match="leaves -1 bytes for the flat data"):
        RaggedArray.loads(bin[:1], np.uint16, ldtype=np.uint16, rows=1)

    assert len(RaggedArray.loads(bin[:1], None, rows=0)[0]) == 0

    # End after the 1st row length but before the row data.
    with pytest.raises(ValueError, match="through a row"):
        RaggedArray.loads(bin[:2], np.uint16, ldtype=np.uint16)

    # Again but with rows specified.
    with pytest.raises(ValueError, match="Only 0 out of .* 1 rows were read."):
        RaggedArray.loads(bin[:2], np.uint16, ldtype=np.uint16, rows=1)

    # A full row of binary data - should work.
    RaggedArray.loads(bin[:6], ldtype=np.uint16, dtype=np.uint16)

    # But not of the user expects more rows.
    with pytest.raises(ValueError, match="Only 1 out of .* 2 rows were read."):
        RaggedArray.loads(bin[:6], ldtype=np.uint16, dtype=np.uint16, rows=2)

    # Be sure the empty last row doesn't get lost.
    ragged, consumed = RaggedArray.loads(bin,
                                         ldtype=np.uint16,
                                         dtype=np.uint16)
    assert len(ragged) == 3
    assert consumed == len(bin)
    RaggedArray.loads(bin, ldtype=np.uint16, dtype=np.uint16, rows=3)
Пример #25
0
def test_empty(ldtype):
    self, consumed = RaggedArray.loads(b"", None, ldtype=ldtype)
    assert len(self) == 0
    assert len(self.flat) == 0
    assert consumed == 0
Пример #26
0
def test_from_lengths():
    flat = np.arange(10)
    self = RaggedArray.from_lengths(flat, [2, 3, 0, 4])
    assert self.flat is flat
    assert np.array_equal(self.starts, [0, 2, 5, 5])
    assert np.array_equal(self.ends, [2, 5, 5, 9])
Пример #27
0
def test_number_slice(rows, columns):
    """Test ragged[number, slice]"""
    self = RaggedArray.from_nested(BIG_NESTED)

    target = [i[columns] for i in BIG_NESTED[rows]]
    assert self[rows, columns].tolist() == target
Пример #28
0
def test_write_number_slice():
    self = RaggedArray.from_nested(BIG_NESTED)

    with pytest.raises(RequestMeError):
        self[:, :2] = 1
Пример #29
0
def test_too_big():
    flat = np.empty(1 << 31, np.dtype([]))
    with pytest.raises(NotImplementedError, match="Flat lengths .*"):
        RaggedArray(flat, [])
Пример #30
0
def test_str(str_):
    nested = eval(re.sub(r"\s+", ",", re.sub(r"\[\s+", "[", str_)))
    self = RaggedArray.from_nested(nested)
    assert str(self) == str_