def test_partitioned(): array = awkward1.repartition( awkward1.Array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), 3) form, container, num_partitions = awkward1.to_arrayset( array, partition_first=True) assert awkward1.from_arrayset(form, container, num_partitions, partition_first=True).tolist() == [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 ] form, container, num_partitions = awkward1.to_arrayset( array, partition_first=False) assert awkward1.from_arrayset(form, container, num_partitions, partition_first=False).tolist() == [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 ] one = awkward1.Array([1, 2, 3, 4, 5]) two = awkward1.Array([6, 7, 8, 9, 10]) container = {} form1, _, _ = awkward1.to_arrayset(one, container, 0) form2, _, _ = awkward1.to_arrayset(two, container, 1) assert form1 == form2 assert awkward1.from_arrayset( form1, container, 2).tolist() == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] assert pickle.loads(pickle.dumps( array, -1)).tolist() == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
def test_emptyarray(): assert awkward1.from_arrayset(*awkward1.to_arrayset([])).tolist() == [] assert awkward1.from_arrayset( *awkward1.to_arrayset([[], [], []])).tolist() == [[], [], []] assert pickle.loads(pickle.dumps(awkward1.Array([]), -1)).tolist() == [] assert pickle.loads(pickle.dumps(awkward1.Array([[], [], []]), -1)).tolist() == [[], [], []]
def test_recordarray(): assert awkward1.from_arrayset( *awkward1.to_arrayset([(1.1, [1]), (2.2, [1, 2]), (3.3, [1, 2, 3])])) == [ (1.1, [1]), (2.2, [1, 2]), (3.3, [1, 2, 3]) ] assert awkward1.from_arrayset(*awkward1.to_arrayset([{ "x": 1.1, "y": [1] }, { "x": 2.2, "y": [1, 2] }, { "x": 3.3, "y": [1, 2, 3] }])) == [{ "x": 1.1, "y": [1] }, { "x": 2.2, "y": [1, 2] }, { "x": 3.3, "y": [1, 2, 3] }] assert pickle.loads( pickle.dumps( awkward1.Array([(1.1, [1]), (2.2, [1, 2]), (3.3, [1, 2, 3])]), -1)) == [(1.1, [1]), (2.2, [1, 2]), (3.3, [1, 2, 3])] assert pickle.loads( pickle.dumps( awkward1.Array([{ "x": 1.1, "y": [1] }, { "x": 2.2, "y": [1, 2] }, { "x": 3.3, "y": [1, 2, 3] }]), -1)) == [{ "x": 1.1, "y": [1] }, { "x": 2.2, "y": [1, 2] }, { "x": 3.3, "y": [1, 2, 3] }]
def test_longer_than_expected(): array = ak.Array( ak.layout.ListOffsetArray64( ak.layout.Index64([0, 2, 4]), ak.layout.RecordArray({ "item1": ak.layout.NumpyArray(np.arange(4)), "longitem": ak.layout.NumpyArray(np.arange(6)), }), )) out = ak.from_arrayset(*ak.to_arrayset(array), lazy=True, lazy_lengths=2) assert ak.to_list(out) == [[{ 'item1': 0, 'longitem': 0 }, { 'item1': 1, 'longitem': 1 }], [{ 'item1': 2, 'longitem': 2 }, { 'item1': 3, 'longitem': 3 }]]
def test_listoffsetarray(): assert awkward1.from_arrayset( *awkward1.to_arrayset([[1, 2, 3], [], [4, 5]])).tolist() == [[1, 2, 3], [], [4, 5]] assert awkward1.from_arrayset(*awkward1.to_arrayset( ["one", "two", "three", "four", "five"])).tolist() == [ "one", "two", "three", "four", "five" ] assert awkward1.from_arrayset(*awkward1.to_arrayset( [["one", "two", "three"], [], ["four", "five"]])).tolist() == [[ "one", "two", "three" ], [], ["four", "five"]] assert pickle.loads( pickle.dumps(awkward1.Array([[1, 2, 3], [], [4, 5]]), -1)).tolist() == [[1, 2, 3], [], [4, 5]]
def test_unmaskedarray(): content = awkward1.Array([1, 2, 3, 4, 5]).layout unmaskedarray = awkward1.layout.UnmaskedArray(content) assert awkward1.from_arrayset( *awkward1.to_arrayset(unmaskedarray)).tolist() == [1, 2, 3, 4, 5] assert pickle.loads(pickle.dumps(awkward1.Array(unmaskedarray), -1)).tolist() == [1, 2, 3, 4, 5]
def test_indexedoptionarray(): assert awkward1.from_arrayset( *awkward1.to_arrayset([1, 2, 3, None, None, 5])).tolist() == [ 1, 2, 3, None, None, 5 ] assert pickle.loads( pickle.dumps(awkward1.Array([1, 2, 3, None, None, 5]), -1)).tolist() == [1, 2, 3, None, None, 5]
def test_listarray(): listoffsetarray = awkward1.Array([[1, 2, 3], [], [4, 5]]).layout listarray = awkward1.layout.ListArray64(listoffsetarray.starts, listoffsetarray.stops, listoffsetarray.content) assert awkward1.from_arrayset( *awkward1.to_arrayset(listarray)).tolist() == [[1, 2, 3], [], [4, 5]] assert pickle.loads(pickle.dumps(awkward1.Array(listarray), -1)).tolist() == [[1, 2, 3], [], [4, 5]]
def test_lazy(): array = awkward1.Array([1, 2, 3, 4, 5]) form, container, num_partitions = awkward1.to_arrayset(array) assert awkward1.from_arrayset(form, container, num_partitions, lazy=True, lazy_lengths=5).tolist() == [1, 2, 3, 4, 5]
def test_regulararray(): content = awkward1.Array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]).layout regulararray = awkward1.layout.RegularArray(content, 3) assert awkward1.from_arrayset( *awkward1.to_arrayset(regulararray)).tolist() == [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]] assert pickle.loads(pickle.dumps(awkward1.Array(regulararray), -1)).tolist() == [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]
def test_bytemaskedarray(): content = awkward1.Array([0.0, 1.1, 2.2, 3.3, 4.4]).layout mask = awkward1.layout.Index8( numpy.array([False, True, True, False, False], dtype=numpy.int8)) bytemaskedarray = awkward1.layout.ByteMaskedArray(mask, content, True) assert awkward1.from_arrayset( *awkward1.to_arrayset(bytemaskedarray)).tolist() == [ None, 1.1, 2.2, None, None ] assert pickle.loads( pickle.dumps(awkward1.Array(bytemaskedarray), -1)).tolist() == [None, 1.1, 2.2, None, None]
def test_indexedarray(): content = awkward1.Array([0.0, 1.1, 2.2, 3.3, 4.4]).layout index = awkward1.layout.Index64( numpy.array([3, 1, 1, 4, 2], dtype=numpy.int64)) indexedarray = awkward1.layout.IndexedArray64(index, content) assert awkward1.from_arrayset( *awkward1.to_arrayset(indexedarray)).tolist() == [ 3.3, 1.1, 1.1, 4.4, 2.2 ] assert pickle.loads( pickle.dumps(awkward1.Array(indexedarray), -1)).tolist() == [3.3, 1.1, 1.1, 4.4, 2.2]
def test_lazy_partitioned(): array = awkward1.repartition( awkward1.Array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]), 3) form, container, num_partitions = awkward1.to_arrayset(array) assert num_partitions == 4 assert awkward1.from_arrayset(form, container, num_partitions, lazy=True, lazy_lengths=[3, 3, 3, 1]).tolist() == [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 ]
def ak_to_zarr(arr, path): z = zarr.open_group(path, mode='w') schema, arr_dict, parts = ak.to_arrayset(arr) z.attrs['schema'] = schema.tojson() for key, data in arr_dict.items(): z.create_dataset(key, dtype=data.dtype, data=data, shape=data.shape)
def test_lazy_arrayset(): array = ak.from_json(""" [ { "listcollection": [ {"item1": 1, "item2": 2}, {"item1": 2, "item2": 4}, {"item1": 3, "item2": 6} ], "collection": {"item1": 3, "item2": 4}, "singleton": 5, "listsingleton": [1, 2, 3], "unioncollection": {"item1": 3}, "masked": null }, { "listcollection": [ {"item1": 1, "item2": 2}, {"item1": 2, "item2": 4}, {"item1": 3, "item2": 6} ], "collection": {"item1": 3, "item2": 4}, "singleton": 5, "listsingleton": [1, 2, 3], "unioncollection": [{"item1": 2}], "masked": 4 }, { "listcollection": [ {"item1": 1, "item2": 2}, {"item1": 2, "item2": 4}, {"item1": 3, "item2": 6} ], "collection": {"item1": 3, "item2": 4}, "singleton": 5, "listsingleton": [1, 2, 3], "unioncollection": {"item1": 4}, "masked": 4 } ]""") canary = Canary() prefix = "kitty" form, container, npart = ak.to_arrayset(array, container=canary, prefix=prefix) assert not any(op[0] == "get" for op in canary.ops) canary.ops = [] cache = {} out = ak.from_arrayset(form, container, lazy=True, lazy_cache=cache, lazy_lengths=3, prefix=prefix, lazy_cache_key="hello") assert len(canary.ops) == 0 assert len(cache) == 0 assert len(out) == 3 assert len(canary.ops) == 0 assert len(cache) == 0 assert ak.to_list(ak.num(out.listcollection)) == [3, 3, 3] assert set(canary.ops) == {('get', 'kitty-node1-offsets')} assert set(cache) == {'hello', 'hello-kitty-node1-virtual'} canary.ops = [] cache.clear() assert ak.to_list(out.unioncollection) == [{ 'item1': 3 }, [{ 'item1': 2 }], { 'item1': 4 }] assert set(canary.ops) == {('get', 'kitty-node11-tags'), ('get', 'kitty-node11-index'), ('get', 'kitty-node14-offsets'), ('get', 'kitty-node13'), ('get', 'kitty-node16')} assert set(cache) == { 'hello', 'hello-kitty-node11-virtual', 'hello-kitty-node13-virtual', 'hello-kitty-node16-virtual' } canary.ops = [] cache.clear() assert ak.to_list(out.masked) == [None, 4, 4] assert set(canary.ops) == {('get', 'kitty-node17-index'), ('get', 'kitty-node18')} assert set(cache) == {'hello', 'hello-kitty-node17-virtual'} canary.ops = [] cache.clear()
def test_unionarray(): assert awkward1.from_arrayset(*awkward1.to_arrayset([[1, 2, 3], [], 4, 5])).tolist() == [[1, 2, 3], [], 4, 5] assert pickle.loads(pickle.dumps(awkward1.Array([[1, 2, 3], [], 4, 5]), -1)).tolist() == [[1, 2, 3], [], 4, 5]
def test_numpyarray(): assert awkward1.from_arrayset(*awkward1.to_arrayset([1, 2, 3, 4, 5])).tolist() == [1, 2, 3, 4, 5] assert pickle.loads(pickle.dumps(awkward1.Array([1, 2, 3, 4, 5]), -1)).tolist() == [1, 2, 3, 4, 5]