def test_arrow_nomask():
    array = awkward1.Array([1.1, 2.2, 3.3, 4.4, None])
    assert str(awkward1.type(awkward1.from_arrow(
        awkward1.to_arrow(array)))) == "5 * ?float64"
    assert str(
        awkward1.type(awkward1.from_arrow(awkward1.to_arrow(
            array[:-1])))) == "4 * ?float64"
def test_fromarrow():
    boolarray = awkward1.layout.NumpyArray(numpy.array([True, True, True, False, False, True, False, True, False, True]))
    assert awkward1.to_list(awkward1.from_arrow(awkward1.to_arrow(boolarray), highlevel=False)) == awkward1.to_list(boolarray)

    content = awkward1.layout.NumpyArray(
        numpy.array([0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9, 10.10]))
    assert awkward1.to_list(awkward1.from_arrow(awkward1.to_arrow(content), highlevel=False)) == awkward1.to_list(content)

    offsets = awkward1.layout.Index64(numpy.array([0, 3, 3, 5, 6, 10, 10]))

    listoffsetarray = awkward1.layout.ListOffsetArray64(offsets, content)
    assert awkward1.to_list(awkward1.from_arrow(awkward1.to_arrow(listoffsetarray), highlevel=False)) == awkward1.to_list(listoffsetarray)

    regulararray = awkward1.layout.RegularArray(listoffsetarray, 2)
    assert awkward1.to_list(awkward1.from_arrow(awkward1.to_arrow(regulararray), highlevel=False)) == awkward1.to_list(regulararray)

    content1 = awkward1.layout.NumpyArray(numpy.array([1, 2, 3, 4, 5]))
    content2 = awkward1.layout.NumpyArray(
        numpy.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]))
    offsets = awkward1.layout.Index32(numpy.array([0, 3, 3, 5, 6, 9]))
    recordarray = awkward1.layout.RecordArray(
        [content1, listoffsetarray, content2, content1], keys=["one", "chonks", "2", "wonky"])
    assert awkward1.to_list(awkward1.from_arrow(awkward1.to_arrow(recordarray), highlevel=False)) == awkward1.to_list(recordarray)

    content0 = awkward1.Array([[1.1, 2.2, 3.3], [], [4.4, 5.5]]).layout
    content = awkward1.Array(
        ["one", "two", "three", "four", "five", "six", "seven", "eight", "nine"]).layout
    tags = awkward1.layout.Index8(
        numpy.array([1, 1, 0, 0, 1, 0, 1, 1], dtype=numpy.int8))
    index = awkward1.layout.Index32(
        numpy.array([0, 1, 0, 1, 2, 2, 4, 3], dtype=numpy.int32))
    array = awkward1.layout.UnionArray8_32(tags, index, [content0, content])
    assert awkward1.to_list(awkward1.from_arrow(awkward1.to_arrow(array), highlevel=False)) == awkward1.to_list(array)
Exemplo n.º 3
0
def array_to_fletcher_or_numpy(array):
    import fletcher

    arrow_array = awkward1.to_arrow(array)
    fletcher_array = fletcher.FletcherContinuousArray(arrow_array)
    if (array.ndim >= 2) or (fletcher_array.data.null_count > 0):
        return fletcher_array
    if "list<" not in str(fletcher_array.dtype):
        a = array.layout
        if hasattr(a, "content"):
            a = a.content
        return np.array(a, copy=False)
    return fletcher_array
def test_arrow_toarrow_string():
    a = awkward1.from_iter(["one", "two", "three"], highlevel=False)
    assert awkward1.to_list(awkward1.from_arrow(awkward1.to_arrow(a), highlevel=False)) == awkward1.to_list(a)
    a = awkward1.from_iter([["one", "two", "three"], [], ["four", "five"]], highlevel=False)
    assert awkward1.to_list(awkward1.from_arrow(awkward1.to_arrow(a), highlevel=False)) == awkward1.to_list(a)
    if hasattr(pyarrow.BinaryArray, 'from_buffers'):
        a = awkward1.from_iter([b"one", b"two", b"three"], highlevel=False)
        assert awkward1.to_list(awkward1.from_arrow(awkward1.to_arrow(a), highlevel=False)) == [b"one", b"two", b"three"]
        a = awkward1.from_iter([[b"one", b"two", b"three"], [], [b"four", b"five"]], highlevel=False)
        assert awkward1.to_list(awkward1.from_arrow(awkward1.to_arrow(a), highlevel=False)) == [[b"one", b"two", b"three"], [], [b"four", b"five"]]
    else:
        a = awkward1.from_iter([b"one", b"two", b"three"], highlevel=False)
        assert awkward1.to_list(awkward1.from_arrow(awkward1.to_arrow(a), highlevel=False)) == ["one", "two", "three"]
        a = awkward1.from_iter([[b"one", b"two", b"three"], [], [b"four", b"five"]], highlevel=False)
        assert awkward1.to_list(awkward1.from_arrow(awkward1.to_arrow(a), highlevel=False)) == [["one", "two", "three"], [], ["four", "five"]]
Exemplo n.º 5
0
import zarr
import pickle
import base64
import pyarrow as pa
import awkward1 as ak

arr = ak.fromjson("/Users/mdurant/Downloads/bikeroutes.json")
parr = ak.to_arrow(arr)


def pa_to_zarr(arr, path):
    z = zarr.open_group(path, mode='w')
    z.attrs['none'] = [b is None for b in arr.buffers()]
    z.attrs['length'] = len(arr)
    for i, buf in enumerate(arr.buffers()):
        if buf is None:
            continue
        z.empty(name=i, dtype='uint8', shape=(len(buf), ))
        z[:] = buf
    z.attrs['type'] = base64.b64encode(pickle.dumps(arr.type)).decode()


def zarr_to_pa(path):
    z = zarr.open_group(path, mode='r')
    buffers = [None if n else pa.py_buffer(z[i][:])
               for i, n in enumerate(z.attrs['none'])]
    typ = pickle.loads(base64.b64decode(z.attrs['type'].encode()))
    return pa.Array.from_buffers(typ, length=z.attrs['length'], buffers=buffers)


def ak_to_zarr(arr, path):
def test_arrow_coverage100():
    a = awkward1.from_iter([True, True, False, False, True, False, True, False], highlevel=False)
    assert awkward1.to_arrow(a).to_pylist() == awkward1.to_list(a)

    a = awkward1.layout.ListOffsetArray32(awkward1.layout.Index32(numpy.array([0, 5, 10], "i4")), awkward1.layout.NumpyArray(numpy.frombuffer(b"hellothere", "u1"), parameters={"__array__": "bytes"}), parameters={"__array__": "bytestring"})
    assert [x for x in awkward1.to_arrow(a)] == [b"hello", b"there"]

    a = awkward1.layout.ByteMaskedArray(awkward1.layout.Index8(numpy.array([False, True, False, False, True, True])), awkward1.layout.ListOffsetArray32(awkward1.layout.Index32(numpy.array([0, 5, 10, 15, 20, 25, 30], "i4")), awkward1.layout.NumpyArray(numpy.frombuffer(b"hellotherehellotherehellothere", "u1"), parameters={"__array__": "bytes"}), parameters={"__array__": "bytestring"}), valid_when=False)
    assert [x for x in awkward1.to_arrow(a)] == [b"hello", None, b"hello", b"there", None, None]

    a = awkward1.layout.ByteMaskedArray(awkward1.layout.Index8(numpy.array([False, True])), awkward1.layout.ListOffsetArray32(awkward1.layout.Index32(numpy.array([0, 5, 10], "i4")), awkward1.layout.NumpyArray(numpy.frombuffer(b"hellothere", "u1"), parameters={"__array__": "bytes"}), parameters={"__array__": "bytestring"}), valid_when=False)
    assert [x for x in awkward1.to_arrow(a)] == [b"hello", None]

    a = awkward1.layout.IndexedOptionArray32(awkward1.layout.Index32(numpy.array([-1, 1, -1, 0, 0, -1], "i4")), awkward1.layout.ListOffsetArray32(awkward1.layout.Index32(numpy.array([0, 5, 10], "i4")), awkward1.layout.NumpyArray(numpy.frombuffer(b"hellothere", "u1"), parameters={"__array__": "bytes"}), parameters={"__array__": "bytestring"}))
    assert [x for x in awkward1.to_arrow(a)] == [None, b"there", None, b"hello", b"hello", None]

    a = awkward1.layout.ListOffsetArray32(awkward1.layout.Index32(numpy.array([0, 5, 10], "i4")), awkward1.layout.NumpyArray(numpy.frombuffer(b"hellothere", "u1"), parameters={"__array__": "chars"}), parameters={"__array__": "string"})
    assert [x for x in awkward1.to_arrow(a)] == ["hello", "there"]

    a = awkward1.layout.ByteMaskedArray(awkward1.layout.Index8(numpy.array([False, True, False, False, True, True])), awkward1.layout.ListOffsetArray32(awkward1.layout.Index32(numpy.array([0, 5, 10, 15, 20, 25, 30], "i4")), awkward1.layout.NumpyArray(numpy.frombuffer(b"hellotherehellotherehellothere", "u1"), parameters={"__array__": "chars"}), parameters={"__array__": "string"}), valid_when=False)
    assert [x for x in awkward1.to_arrow(a)] == ["hello", None, "hello", "there", None, None]
    assert awkward1.to_list(awkward1.from_arrow(awkward1.to_arrow(a))) == ["hello", None, "hello", "there", None, None]

    a = awkward1.layout.ByteMaskedArray(awkward1.layout.Index8(numpy.array([False, True, False, False, True, True])), awkward1.layout.ListOffsetArray64(awkward1.layout.Index64(numpy.array([0, 5, 10, 15, 20, 25, 30], "i8")), awkward1.layout.NumpyArray(numpy.frombuffer(b"hellotherehellotherehellothere", "u1"), parameters={"__array__": "chars"}), parameters={"__array__": "string"}), valid_when=False)
    assert [x for x in awkward1.to_arrow(a)] == ["hello", None, "hello", "there", None, None]
    assert awkward1.to_list(awkward1.from_arrow(awkward1.to_arrow(a))) == ["hello", None, "hello", "there", None, None]

    a = awkward1.layout.ByteMaskedArray(awkward1.layout.Index8(numpy.array([False, True, False, False, True, True])), awkward1.layout.ListOffsetArray64(awkward1.layout.Index64(numpy.array([0, 5, 10, 15, 20, 25, 30], "i8")), awkward1.layout.NumpyArray(numpy.frombuffer(b"hellotherehellotherehellothere", "u1"), parameters={"__array__": "bytes"}), parameters={"__array__": "bytestring"}), valid_when=False)
    assert [x for x in awkward1.to_arrow(a)] == [b"hello", None, b"hello", b"there", None, None]
    assert awkward1.to_list(awkward1.from_arrow(awkward1.to_arrow(a))) == [b"hello", None, b"hello", b"there", None, None]

    a = awkward1.layout.ByteMaskedArray(awkward1.layout.Index8(numpy.array([False, True])), awkward1.layout.ListOffsetArray32(awkward1.layout.Index32(numpy.array([0, 5, 10], "i4")), awkward1.layout.NumpyArray(numpy.frombuffer(b"hellothere", "u1"), parameters={"__array__": "chars"}), parameters={"__array__": "string"}), valid_when=False)
    assert [x for x in awkward1.to_arrow(a)] == ["hello", None]

    a = awkward1.layout.IndexedOptionArray32(awkward1.layout.Index32(numpy.array([-1, 1, -1, 0, 0, -1], "i4")), awkward1.layout.ListOffsetArray32(awkward1.layout.Index32(numpy.array([0, 5, 10], "i4")), awkward1.layout.NumpyArray(numpy.frombuffer(b"hellothere", "u1"), parameters={"__array__": "chars"}), parameters={"__array__": "string"}))
    assert [x for x in awkward1.to_arrow(a)] == [None, "there", None, "hello", "hello", None]

    a = awkward1.layout.ListOffsetArray32(awkward1.layout.Index32(numpy.array([0, 5, 10], "i4")), awkward1.layout.NumpyArray(numpy.frombuffer(b"hellothere", "u1")))
    assert [x for x in awkward1.to_arrow(a)] == [[104, 101, 108, 108, 111], [116, 104, 101, 114, 101]]

    a = awkward1.layout.ByteMaskedArray(awkward1.layout.Index8(numpy.array([False, True, False, False, True, True])), awkward1.layout.ListOffsetArray32(awkward1.layout.Index32(numpy.array([0, 5, 10, 15, 20, 25, 30], "i4")), awkward1.layout.NumpyArray(numpy.frombuffer(b"hellotherehellotherehellothere", "u1"))), valid_when=False)
    assert [x for x in awkward1.to_arrow(a)] == [[104, 101, 108, 108, 111], None, [104, 101, 108, 108, 111], [116, 104, 101, 114, 101], None, None]

    a = awkward1.layout.ByteMaskedArray(awkward1.layout.Index8(numpy.array([False, True])), awkward1.layout.ListOffsetArray32(awkward1.layout.Index32(numpy.array([0, 5, 10], "i4")), awkward1.layout.NumpyArray(numpy.frombuffer(b"hellothere", "u1"))), valid_when=False)
    assert [x for x in awkward1.to_arrow(a)] == [[104, 101, 108, 108, 111], None]

    a = awkward1.layout.IndexedOptionArray32(awkward1.layout.Index32(numpy.array([-1, 1, -1, 0, 0, -1], "i4")), awkward1.layout.ListOffsetArray32(awkward1.layout.Index32(numpy.array([0, 5, 10], "i4")), awkward1.layout.NumpyArray(numpy.frombuffer(b"hellothere", "u1"))))
    assert [x for x in awkward1.to_arrow(a)] == [None, [116, 104, 101, 114, 101], None, [104, 101, 108, 108, 111], [104, 101, 108, 108, 111], None]

    a = awkward1.layout.IndexedOptionArray32(awkward1.layout.Index32(numpy.array([-1, 1, -1, 0, 0, -1], "i4")), awkward1.layout.RegularArray(awkward1.layout.NumpyArray(numpy.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6])), 3))
    assert awkward1.to_arrow(a).to_pylist() == [None, [4.4, 5.5, 6.6], None, [1.1, 2.2, 3.3], [1.1, 2.2, 3.3], None]

    a = awkward1.layout.IndexedOptionArray32(awkward1.layout.Index32(numpy.array([-1, 1, -1, 0, 0, -1, 1, -1], "i4")), awkward1.layout.RegularArray(awkward1.layout.NumpyArray(numpy.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6])), 3))
    assert awkward1.to_arrow(a).to_pylist() == [None, [4.4, 5.5, 6.6], None, [1.1, 2.2, 3.3], [1.1, 2.2, 3.3], None, [4.4, 5.5, 6.6], None]

    a = awkward1.layout.IndexedOptionArray64(awkward1.layout.Index64(numpy.array([-1, 1, -1, 0, 0, -1, 1, -1], "i8")), awkward1.layout.RegularArray(awkward1.layout.NumpyArray(numpy.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6])), 3))
    assert awkward1.to_arrow(a).to_pylist() == [None, [4.4, 5.5, 6.6], None, [1.1, 2.2, 3.3], [1.1, 2.2, 3.3], None, [4.4, 5.5, 6.6], None]

    a = awkward1.layout.ByteMaskedArray(awkward1.layout.Index8(numpy.array([True, True, True, True, False, False])), awkward1.layout.IndexedOptionArray32(awkward1.layout.Index32(numpy.array([-1, 1, -1, 0, 0, -1], "i4")), awkward1.layout.RegularArray(awkward1.layout.NumpyArray(numpy.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6])), 3)), valid_when=True)
    assert awkward1.to_arrow(a).to_pylist() == [None, [4.4, 5.5, 6.6], None, [1.1, 2.2, 3.3], None, None]

    a = awkward1.layout.UnmaskedArray(awkward1.layout.ListOffsetArray32(awkward1.layout.Index32(numpy.array([0, 5, 10], "i4")), awkward1.layout.NumpyArray(numpy.frombuffer(b"hellothere", "u1"))))
    assert [x for x in awkward1.to_arrow(a)] == [[104, 101, 108, 108, 111], [116, 104, 101, 114, 101]]

    a = pyarrow.array(["one", "two", "three", "two", "two", "one", "three", "one"]).dictionary_encode()
    b = awkward1.from_arrow(a, highlevel=False)
    assert isinstance(b, awkward1._util.indexedtypes)
    assert awkward1.to_list(b) == ["one", "two", "three", "two", "two", "one", "three", "one"]

    a = awkward1.Array([[1.1, 2.2, 3.3], [], None, [4.4, 5.5]])
    assert awkward1.to_list(awkward1.from_arrow(awkward1.to_arrow(a))) == [[1.1, 2.2, 3.3], [], None, [4.4, 5.5]]

    a = awkward1.layout.ByteMaskedArray(awkward1.layout.Index8(numpy.array([False, False, False, True, True, False, False])), awkward1.layout.NumpyArray(numpy.array([1.1, 2.2, 3.3, 999, 314, 4.4, 5.5])), valid_when=False)
    assert awkward1.to_arrow(a).to_pylist() == [1.1, 2.2, 3.3, None, None, 4.4, 5.5]

    a = awkward1.layout.ByteMaskedArray(awkward1.layout.Index8(numpy.array([False, False, False, True, True, False, False])), awkward1.from_iter([b"hello", b"", b"there", b"yuk", b"", b"o", b"hellothere"], highlevel=False), valid_when=False)
    assert awkward1.to_arrow(a).to_pylist() == [b"hello", b"", b"there", None, None, b"o", b"hellothere"]

    a = awkward1.layout.ByteMaskedArray(awkward1.layout.Index8([True, True, False, True]), awkward1.from_iter([[1.1, 2.2, 3.3], [], [999], [4.4, 5.5]], highlevel=False), valid_when=True)
    assert awkward1.to_list(awkward1.from_arrow(awkward1.to_arrow(a))) == [[1.1, 2.2, 3.3], [], None, [4.4, 5.5]]

    a = awkward1.from_iter([[1, 2, 3], [], [4, 5], 999, 123], highlevel=False)
    assert awkward1.to_arrow(a).to_pylist() == [[1, 2, 3], [], [4, 5], 999, 123]
    assert awkward1.to_list(awkward1.from_arrow(awkward1.to_arrow(a))) == [[1, 2, 3], [], [4, 5], 999, 123]

    b = awkward1.layout.ByteMaskedArray(awkward1.layout.Index8(numpy.array([True, True, False, False, True])), a, valid_when=True)
    assert awkward1.to_arrow(b).to_pylist() == [[1, 2, 3], [], None, None, 123]
    assert awkward1.to_list(awkward1.from_arrow(awkward1.to_arrow(b))) == [[1, 2, 3], [], None, None, 123]

    content1 = awkward1.from_iter([1.1, 2.2, 3.3, 4.4, 5.5], highlevel=False)
    content2 = awkward1.layout.NumpyArray(numpy.array([], dtype=numpy.int32))
    a = awkward1.layout.UnionArray8_32(awkward1.layout.Index8(numpy.array([0, 0, 0, 0, 0], "i1")), awkward1.layout.Index32(numpy.array([0, 1, 2, 3, 4], "i4")), [content1, content2])
    assert awkward1.to_list(a) == [1.1, 2.2, 3.3, 4.4, 5.5]
    assert awkward1.to_arrow(a).to_pylist() == [1.1, 2.2, 3.3, 4.4, 5.5]
    assert awkward1.to_list(awkward1.from_arrow(awkward1.to_arrow(a))) == [1.1, 2.2, 3.3, 4.4, 5.5]

    a = pyarrow.UnionArray.from_sparse(pyarrow.array([0, 0, 0, 0, 0], type=pyarrow.int8()), [pyarrow.array([0.0, 1.1, None, 3.3, 4.4]), pyarrow.array([True, None, False, True, False])])
    assert awkward1.to_list(awkward1.from_arrow(a, highlevel=False)) == [0.0, 1.1, None, 3.3, 4.4]

    uniontype = pyarrow.union([pyarrow.field("0", pyarrow.list_(pyarrow.float64())),
                               pyarrow.field("1", pyarrow.float64())],
                              "sparse",
                              [0, 1])
    a = pyarrow.Array.from_buffers(
            uniontype,
            5,
            [pyarrow.py_buffer(numpy.array([3], "u1")),
             pyarrow.py_buffer(numpy.array([0, 1, 0, 1, 1], "i1")),
             None],
            children=[pyarrow.array([[0.0, 1.1, 2.2], [], [3.3, 4.4], [5.5], [6.6, 7.7, 8.8, 9.9]]),
                      pyarrow.array([0.0, 1.1, 2.2, 3.3, 4.4])])
    assert a.to_pylist() == [[0.0, 1.1, 2.2], 1.1, None, None, None]
    assert awkward1.to_list(awkward1.from_arrow(a)) == [[0.0, 1.1, 2.2], 1.1, None, None, None]

    a = pyarrow.chunked_array([pyarrow.array([1.1, 2.2, 3.3, 4.4, 5.5])])
    assert awkward1.to_list(awkward1.from_arrow(a, highlevel=False)) == [1.1, 2.2, 3.3, 4.4, 5.5]
def test_toarrow():
    content = awkward1.Array(
        ["one", "two", "three", "four", "five", "six", "seven", "eight", "nine"]).layout
    bitmask = awkward1.layout.IndexU8(numpy.array([40, 34], dtype=numpy.uint8))
    array = awkward1.layout.BitMaskedArray(bitmask, content, False, 9, False)
    assert awkward1.to_arrow(array).to_pylist() == awkward1.to_list(array)

    bytemask = awkward1.layout.Index8(
        numpy.array([False, True, False], dtype=numpy.bool))
    array = awkward1.layout.ByteMaskedArray(bytemask, content, True)
    assert awkward1.to_arrow(array).to_pylist() == awkward1.to_list(array)

    array = awkward1.layout.NumpyArray(
        numpy.array([0.0, 1.1, 2.2, 3.3, 4.4, 5.5]))
    assert isinstance(awkward1.to_arrow(array),
                      (pyarrow.lib.Tensor, pyarrow.lib.Array))
    assert awkward1.to_arrow(array).to_pylist() == [
        0.0, 1.1, 2.2, 3.3, 4.4, 5.5]

    array = awkward1.layout.NumpyArray(
        numpy.array([[0.0, 1.1], [2.2, 3.3], [4.4, 5.5]]))
    assert isinstance(awkward1.to_arrow(array),
                      (pyarrow.lib.Tensor, pyarrow.lib.Array))
    assert awkward1.to_arrow(array) == pyarrow.Tensor.from_numpy(
        numpy.array([[0.0, 1.1], [2.2, 3.3], [4.4, 5.5]]))

    array = awkward1.layout.EmptyArray()
    assert isinstance(awkward1.to_arrow(array), (pyarrow.lib.Array))
    assert awkward1.to_arrow(array).to_pylist() == []

    content = awkward1.layout.NumpyArray(
        numpy.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]))
    offsets = awkward1.layout.Index64(numpy.array([0, 3, 3, 5, 6, 9]))
    array = awkward1.layout.ListOffsetArray64(offsets, content)
    assert isinstance(awkward1.to_arrow(array), (pyarrow.LargeListArray))
    assert awkward1.to_arrow(array).to_pylist() == [[1.1, 2.2, 3.3], [], [
        4.4, 5.5], [6.6], [7.7, 8.8, 9.9]]

    content = awkward1.layout.NumpyArray(
        numpy.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]))
    offsets = awkward1.layout.IndexU32(numpy.array([0, 3, 3, 5, 6, 9]))
    array = awkward1.layout.ListOffsetArrayU32(offsets, content)
    assert isinstance(awkward1.to_arrow(array), (pyarrow.LargeListArray))
    assert awkward1.to_arrow(array).to_pylist() == [[1.1, 2.2, 3.3], [], [
        4.4, 5.5], [6.6], [7.7, 8.8, 9.9]]

    # Testing parameters
    content = awkward1.Array(
        ["one", "two", "three", "four", "five", "six", "seven", "eight", "nine"]).layout
    offsets = awkward1.layout.Index32(numpy.array([0, 3, 3, 5, 6, 9]))
    array = awkward1.layout.ListOffsetArray32(offsets, content)
    assert awkward1.to_arrow(array).to_pylist() == [['one', 'two', 'three'], [], [
        'four', 'five'], ['six'], ['seven', 'eight', 'nine']]

    content = awkward1.layout.NumpyArray(
        numpy.array([0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9, 10.10]))
    offsets = awkward1.layout.Index64(numpy.array([0, 3, 3, 5, 6, 10, 10]))
    listoffsetarray = awkward1.layout.ListOffsetArray64(offsets, content)
    regulararray = awkward1.layout.RegularArray(listoffsetarray, 2)
    starts = awkward1.layout.Index64(numpy.array([0, 1]))
    stops = awkward1.layout.Index64(numpy.array([2, 3]))
    listarray = awkward1.layout.ListArray64(starts, stops, regulararray)

    assert isinstance(awkward1.to_arrow(listarray), (pyarrow.LargeListArray))
    assert awkward1.to_arrow(listarray).to_pylist() == [[[[0.0, 1.1, 2.2], []], [
        [3.3, 4.4], [5.5]]], [[[3.3, 4.4], [5.5]], [[6.6, 7.7, 8.8, 9.9], []]]]

    assert isinstance(awkward1.to_arrow(regulararray),
                      (pyarrow.LargeListArray))
    assert awkward1.to_arrow(regulararray).to_pylist() == [[[0.0, 1.1, 2.2], []], [
        [3.3, 4.4], [5.5]], [[6.6, 7.7, 8.8, 9.9], []]]

    content1 = awkward1.layout.NumpyArray(numpy.array([1, 2, 3, 4, 5]))
    content2 = awkward1.layout.NumpyArray(
        numpy.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]))
    offsets = awkward1.layout.Index32(numpy.array([0, 3, 3, 5, 6, 9]))

    recordarray = awkward1.layout.RecordArray(
        [content1, listoffsetarray, content2, content1], keys=["one", "two", "2", "wonky"])

    assert isinstance(awkward1.to_arrow(recordarray), (pyarrow.StructArray))
    assert awkward1.to_arrow(recordarray).to_pylist() == [{'one': 1, 'two': [0.0, 1.1, 2.2], '2': 1.1, 'wonky': 1}, {'one': 2, 'two': [], '2': 2.2, 'wonky': 2}, {
        'one': 3, 'two': [3.3, 4.4], '2': 3.3, 'wonky': 3}, {'one': 4, 'two': [5.5], '2': 4.4, 'wonky': 4}, {'one': 5, 'two': [6.6, 7.7, 8.8, 9.9], '2': 5.5, 'wonky': 5}]

    content0 = awkward1.Array([[1.1, 2.2, 3.3], [], [4.4, 5.5]]).layout
    content = awkward1.Array(
        ["one", "two", "three", "four", "five", "six", "seven", "eight", "nine"]).layout
    tags = awkward1.layout.Index8(
        numpy.array([1, 1, 0, 0, 1, 0, 1, 1], dtype=numpy.int8))
    index = awkward1.layout.Index32(
        numpy.array([0, 1, 0, 1, 2, 2, 4, 3], dtype=numpy.int32))
    unionarray = awkward1.layout.UnionArray8_32(
        tags, index, [content0, content1])

    assert isinstance(awkward1.to_arrow(unionarray), (pyarrow.UnionArray))
    assert awkward1.to_arrow(unionarray).to_pylist() == [
        1, 2, [1.1, 2.2, 3.3], [], 3, [4.4, 5.5], 5, 4]

    content = awkward1.layout.NumpyArray(
        numpy.array([0.0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8, 9.9]))
    index = awkward1.layout.Index32(
        numpy.array([0, 2, 4, 6, 8, 9, 7, 5], dtype=numpy.int64))
    indexedarray = awkward1.layout.IndexedArray32(index, content)

    assert isinstance(awkward1.to_arrow(indexedarray),
                      (pyarrow.DictionaryArray))
    assert awkward1.to_arrow(indexedarray).to_pylist() == [
        0.0, 2.2, 4.4, 6.6, 8.8, 9.9, 7.7, 5.5]

    bytemaskedarray = awkward1.layout.ByteMaskedArray(awkward1.layout.Index8(numpy.array(
        [True, True, False, False, False], dtype=numpy.int8)), listoffsetarray, True)

    assert awkward1.to_arrow(bytemaskedarray).to_pylist() == [
        [0.0, 1.1, 2.2], [], None, None, None]

    bytemaskedarray = awkward1.layout.ByteMaskedArray(awkward1.layout.Index8(
        numpy.array([True, False], dtype=numpy.int8)), listarray, True)
    assert awkward1.to_arrow(bytemaskedarray).to_pylist(
    ) == awkward1.to_list(bytemaskedarray)

    bytemaskedarray = awkward1.layout.ByteMaskedArray(awkward1.layout.Index8(
        numpy.array([True, False], dtype=numpy.int8)), recordarray, True)
    assert awkward1.to_arrow(bytemaskedarray).to_pylist(
    ) == awkward1.to_list(bytemaskedarray)

    bytemaskedarray = awkward1.layout.ByteMaskedArray(awkward1.layout.Index8(numpy.array([True, False, False], dtype=numpy.int8)), indexedarray, True)
    assert awkward1.to_arrow(bytemaskedarray).to_pylist() == awkward1.to_list(bytemaskedarray)

    bytemaskedarray = awkward1.layout.ByteMaskedArray(awkward1.layout.Index8(
        numpy.array([True, False, False], dtype=numpy.int8)), unionarray, True)
    assert awkward1.to_arrow(bytemaskedarray).to_pylist(
    ) == awkward1.to_list(bytemaskedarray)

    ioa = awkward1.layout.IndexedOptionArray32(awkward1.layout.Index32([-30, 19, 6, 7, -3, 21, 13, 22, 17, 9, -12, 16]), awkward1.layout.NumpyArray(numpy.array([5.2, 1.7, 6.7, -0.4, 4.0, 7.8, 3.8, 6.8, 4.2, 0.3, 4.6, 6.2,
                                                                                                                                                                 6.9, -0.7, 3.9, 1.6, 8.7, -0.7, 3.2, 4.3, 4.0, 5.8, 4.2, 7.0,
                                                                                                                                                                 5.6, 3.8])))
    assert awkward1.to_arrow(ioa).to_pylist() == awkward1.to_list(ioa)
Exemplo n.º 8
0
 def __arrow_array__(self):
     return ak.to_arrow(self.data)