def test_categorical_from_arrow_ChunkedArray():
    indices = [0, 1, 0, 1, 2, 0, 2]
    indices_new_schema = [0, 1, 0, 1, 0]

    dictionary = pyarrow.array([2019, 2020, 2021])
    dictionary_new_schema = pyarrow.array([2019, 2020])

    dict_array = pyarrow.DictionaryArray.from_arrays(pyarrow.array(indices),
                                                     dictionary)
    dict_array_new_schema = pyarrow.DictionaryArray.from_arrays(
        pyarrow.array(indices_new_schema), dictionary_new_schema)

    batch = pyarrow.RecordBatch.from_arrays([dict_array], ["year"])
    batch_new_schema = pyarrow.RecordBatch.from_arrays([dict_array_new_schema],
                                                       ["year"])

    batches = [batch] * 3
    batches_mixed_schema = [batch] + [batch_new_schema]

    table = pyarrow.Table.from_batches(batches)
    table_mixed_schema = pyarrow.Table.from_batches(batches_mixed_schema)

    array = ak.from_arrow(table)
    array_mixed_schema = ak.from_arrow(table_mixed_schema)

    assert np.asarray(
        array.layout.field(0).content.index).tolist() == indices * 3
    assert (np.asarray(
        array_mixed_schema.layout.field(0).content.index).tolist() == indices +
            indices_new_schema)
def test_union_from_arrow():
    original = ak.Array([1.1, 2.2, [1, 2, 3], "hello"])
    pa_array = ak.to_arrow(original)
    reconstituted = ak.from_arrow(pa_array)
    assert str(reconstituted.type) == "4 * union[float64, var * int64, string]"
    assert reconstituted.tolist() == [1.1, 2.2, [1, 2, 3], "hello"]

    original = ak.Array([1.1, 2.2, None, [1, 2, 3], "hello"])
    pa_array = ak.to_arrow(original)
    reconstituted = ak.from_arrow(pa_array)
    assert (str(reconstituted.type) ==
            "5 * union[?float64, option[var * int64], option[string]]")
    assert reconstituted.tolist() == [1.1, 2.2, None, [1, 2, 3], "hello"]
def test_optional_categorical_from_arrow():
    # construct categorical array from option-typed DictionaryArray
    indices = pyarrow.array([0, 1, 0, 1, 2, 0, 2])
    nan_indices = pyarrow.array([0, 1, 0, 1, 2, None, 0, 2])
    dictionary = pyarrow.array([2019, 2020, 2021])

    dict_array = pyarrow.DictionaryArray.from_arrays(indices, dictionary)
    categorical_array = ak.from_arrow(dict_array)
    assert categorical_array.layout.parameter("__array__") == "categorical"

    option_dict_array = pyarrow.DictionaryArray.from_arrays(
        nan_indices, dictionary)
    option_categorical_array = ak.from_arrow(option_dict_array)
    assert option_categorical_array.layout.parameter(
        "__array__") == "categorical"
def test_to_table_2():
    array = ak.Array([
        [{
            "x": 0.0,
            "y": []
        }, {
            "x": 1.1,
            "y": [1]
        }, {
            "x": 2.2,
            "y": None
        }],
        [],
        [{
            "x": 3.3,
            "y": [1, 2, 3]
        }, None, {
            "x": 4.4,
            "y": [1, 2, 3, 4]
        }],
    ])
    assert str(
        array.type) == '3 * var * ?{"x": float64, "y": option[var * int64]}'
    pa_table = ak.to_arrow_table(array)
    array2 = ak.from_arrow(pa_table)
    assert str(array2.type) == str(array.type)
    assert array2.tolist() == array.tolist()
def test_to_arrow_table():
    assert ak.from_arrow(
        ak.to_arrow_table(
            ak.Array([[{
                "x": 1.1,
                "y": [1]
            }], [], [{
                "x": 2.2,
                "y": [1, 2]
            }]]),
            explode_records=True,
        )).tolist() == [
            {
                "x": [1.1],
                "y": [[1]]
            },
            {
                "x": [],
                "y": []
            },
            {
                "x": [2.2],
                "y": [[1, 2]]
            },
        ]
    assert ak.from_arrow(
        ak.to_arrow_table(
            ak.Array([{
                "x": 1.1,
                "y": [1]
            }, {
                "x": 2.2,
                "y": [1, 2]
            }]))).tolist() == [{
                "x": 1.1,
                "y": [1]
            }, {
                "x": 2.2,
                "y": [1, 2]
            }]
def test():
    assert ak.from_arrow(pyarrow.Table.from_pydict({"x":
                                                    [None, None,
                                                     None]})).tolist() == [{
                                                         "x":
                                                         None
                                                     }, {
                                                         "x":
                                                         None
                                                     }, {
                                                         "x":
                                                         None
                                                     }]
    assert ak.from_arrow(
        pyarrow.Table.from_pydict({"x": [[None, None], [],
                                         [None]]})).tolist() == [{
                                             "x": [None, None]
                                         }, {
                                             "x": []
                                         }, {
                                             "x": [None]
                                         }]
def test_list_from_arrow():
    original = ak.Array([[1.1, 2.2, 3.3], [], [4.4, 5.5]])
    pa_array = ak.to_arrow(original)
    reconstituted = ak.from_arrow(pa_array)
    assert str(reconstituted.type) == "3 * var * float64"
    assert reconstituted.tolist() == [[1.1, 2.2, 3.3], [], [4.4, 5.5]]

    original = ak.Array([[1.1, 2.2, None], [], [4.4, 5.5]])
    pa_array = ak.to_arrow(original)
    reconstituted = ak.from_arrow(pa_array)
    assert str(reconstituted.type) == "3 * var * ?float64"
    assert reconstituted.tolist() == [[1.1, 2.2, None], [], [4.4, 5.5]]

    original = ak.Array([[1.1, 2.2, 3.3], [], None, [4.4, 5.5]])
    pa_array = ak.to_arrow(original)
    reconstituted = ak.from_arrow(pa_array)
    assert str(reconstituted.type) == "4 * option[var * float64]"
    assert reconstituted.tolist() == [[1.1, 2.2, 3.3], [], None, [4.4, 5.5]]

    original = ak.Array([[1.1, 2.2, None], [], None, [4.4, 5.5]])
    pa_array = ak.to_arrow(original)
    reconstituted = ak.from_arrow(pa_array)
    assert str(reconstituted.type) == "4 * option[var * ?float64]"
    assert reconstituted.tolist() == [[1.1, 2.2, None], [], None, [4.4, 5.5]]
示例#8
0
def test():
    struct = pyarrow.struct([
        pyarrow.field("x", pyarrow.list_(pyarrow.float64(), 2)),
        pyarrow.field("y", pyarrow.list_(pyarrow.float64(), 2)),
    ])
    array = pyarrow.array(
        [
            {
                "x": [1.1, 2.1],
                "y": [3.1, 4.1]
            },
            {
                "x": [1.2, 2.2],
                "y": [3.2, 4.2]
            },
            {
                "x": [1.3, 2.3],
                "y": [3.3, 4.3]
            },
        ],
        type=struct,
    )
    as_awkward = ak.from_arrow(array)

    assert ak.to_list(as_awkward) == [
        {
            "x": [1.1, 2.1],
            "y": [3.1, 4.1]
        },
        {
            "x": [1.2, 2.2],
            "y": [3.2, 4.2]
        },
        {
            "x": [1.3, 2.3],
            "y": [3.3, 4.3]
        },
    ]
    assert (str(as_awkward.type) ==
            '3 * {"x": option[2 * ?float64], "y": option[2 * ?float64]}')
def test_from_buffers():
    array = ak.Array([
        [{
            "x": 0.0,
            "y": []
        }, {
            "x": 1.1,
            "y": [1]
        }, {
            "x": 2.2,
            "y": None
        }],
        [],
        [{
            "x": 3.3,
            "y": [1, 2, 3]
        }, None, {
            "x": 4.4,
            "y": [1, 2, 3, 4]
        }],
    ])
    assert str(
        array.type) == '3 * var * ?{"x": float64, "y": option[var * int64]}'
    pa_table = ak.to_arrow_table(array)
    awkward_array = ak.from_arrow(pa_table)
    form, length, container = ak.to_buffers(awkward_array)
    reconstituted = ak.from_buffers(form, length, container, lazy=True)
    assert reconstituted[2].tolist() == [
        {
            "x": 3.3,
            "y": [1, 2, 3]
        },
        None,
        {
            "x": 4.4,
            "y": [1, 2, 3, 4]
        },
    ]
def test_record_from_arrow():
    x_content = ak.Array([1.1, 2.2, 3.3, 4.4, 5.5]).layout
    z_content = ak.Array([1, 2, 3, None, 5]).layout

    original = ak.Array(
        ak.layout.RecordArray(
            [
                x_content,
                ak.layout.UnmaskedArray(x_content),
                z_content,
            ],
            ["x", "y", "z"],
        ))
    pa_array = ak.to_arrow(original)
    reconstituted = ak.from_arrow(pa_array)
    assert str(
        reconstituted.type) == '5 * {"x": float64, "y": ?float64, "z": ?int64}'
    assert reconstituted.tolist() == [
        {
            "x": 1.1,
            "y": 1.1,
            "z": 1
        },
        {
            "x": 2.2,
            "y": 2.2,
            "z": 2
        },
        {
            "x": 3.3,
            "y": 3.3,
            "z": 3
        },
        {
            "x": 4.4,
            "y": 4.4,
            "z": None
        },
        {
            "x": 5.5,
            "y": 5.5,
            "z": 5
        },
    ]

    original = ak.Array(
        ak.layout.ByteMaskedArray(
            ak.layout.Index8(
                np.array([False, True, False, False, False], np.int8)),
            original.layout,
            valid_when=False,
        ))
    pa_array = ak.to_arrow(original)
    reconstituted = ak.from_arrow(pa_array)
    assert str(reconstituted.type
               ) == '5 * ?{"x": float64, "y": ?float64, "z": ?int64}'
    assert reconstituted.tolist() == [
        {
            "x": 1.1,
            "y": 1.1,
            "z": 1
        },
        None,
        {
            "x": 3.3,
            "y": 3.3,
            "z": 3
        },
        {
            "x": 4.4,
            "y": 4.4,
            "z": None
        },
        {
            "x": 5.5,
            "y": 5.5,
            "z": 5
        },
    ]
def test_from_arrow():
    array = ak.from_arrow(
        pyarrow.array(
            [datetime.datetime(2002, 1, 23), datetime.datetime(2019, 2, 20)],
            type=pyarrow.date64(),
        )
    )
    assert array.tolist() == [
        np.datetime64("2002-01-23T00:00:00.000"),
        np.datetime64("2019-02-20T00:00:00.000"),
    ]

    array = ak.from_arrow(
        pyarrow.array(
            [datetime.datetime(2002, 1, 23), datetime.datetime(2019, 2, 20)],
            type=pyarrow.date32(),
        )
    )
    assert array.tolist() == [
        np.datetime64("2002-01-23T00:00:00.000"),
        np.datetime64("2019-02-20T00:00:00.000"),
    ]

    array = ak.from_arrow(
        pyarrow.array(
            [datetime.time(1, 0, 0), datetime.time(2, 30, 0)],
            type=pyarrow.time64("us"),
        )
    )
    assert array.tolist() == [
        np.datetime64("1970-01-01T01:00:00.000"),
        np.datetime64("1970-01-01T02:30:00.000"),
    ]

    array = ak.from_arrow(
        pyarrow.array(
            [datetime.time(1, 0, 0), datetime.time(2, 30, 0)],
            type=pyarrow.time64("ns"),
        )
    )
    assert array.tolist() == [
        np.datetime64("1970-01-01T01:00:00.000"),
        np.datetime64("1970-01-01T02:30:00.000"),
    ]

    array = ak.from_arrow(
        pyarrow.array(
            [datetime.time(1, 0, 0), datetime.time(2, 30, 0)],
            type=pyarrow.time32("s"),
        )
    )
    assert array.tolist() == [
        np.datetime64("1970-01-01T01:00:00.000"),
        np.datetime64("1970-01-01T02:30:00.000"),
    ]

    array = ak.from_arrow(
        pyarrow.array(
            [datetime.time(1, 0, 0), datetime.time(2, 30, 0)],
            type=pyarrow.time32("ms"),
        )
    )
    assert array.tolist() == [
        np.datetime64("1970-01-01T01:00:00.000"),
        np.datetime64("1970-01-01T02:30:00.000"),
    ]

    array = ak.from_arrow(
        pyarrow.array(
            [datetime.datetime(2002, 1, 23), datetime.datetime(2019, 2, 20)],
            type=pyarrow.timestamp("s"),
        )
    )
    assert array.tolist() == [
        np.datetime64("2002-01-23T00:00:00.000"),
        np.datetime64("2019-02-20T00:00:00.000"),
    ]

    array = ak.from_arrow(
        pyarrow.array(
            [datetime.datetime(2002, 1, 23), datetime.datetime(2019, 2, 20)],
            type=pyarrow.timestamp("ms"),
        )
    )
    assert array.tolist() == [
        np.datetime64("2002-01-23T00:00:00.000"),
        np.datetime64("2019-02-20T00:00:00.000"),
    ]

    array = ak.from_arrow(
        pyarrow.array(
            [datetime.datetime(2002, 1, 23), datetime.datetime(2019, 2, 20)],
            type=pyarrow.timestamp("us"),
        )
    )
    assert array.tolist() == [
        np.datetime64("2002-01-23T00:00:00.000"),
        np.datetime64("2019-02-20T00:00:00.000"),
    ]

    array = ak.from_arrow(
        pyarrow.array(
            [datetime.datetime(2002, 1, 23), datetime.datetime(2019, 2, 20)],
            type=pyarrow.timestamp("ns"),
        )
    )
    assert array.tolist() == [
        np.datetime64("2002-01-23T00:00:00.000"),
        np.datetime64("2019-02-20T00:00:00.000"),
    ]

    array = ak.from_arrow(
        pyarrow.array(
            [datetime.timedelta(5), datetime.timedelta(10)],
            type=pyarrow.duration("s"),
        )
    )
    assert array.tolist() == [
        np.timedelta64(5, "D"),
        np.timedelta64(10, "D"),
    ]

    array = ak.from_arrow(
        pyarrow.array(
            [datetime.timedelta(5), datetime.timedelta(10)],
            type=pyarrow.duration("ms"),
        )
    )
    assert array.tolist() == [
        np.timedelta64(5, "D"),
        np.timedelta64(10, "D"),
    ]

    array = ak.from_arrow(
        pyarrow.array(
            [datetime.timedelta(5), datetime.timedelta(10)],
            type=pyarrow.duration("us"),
        )
    )
    assert array.tolist() == [
        np.timedelta64(5, "D"),
        np.timedelta64(10, "D"),
    ]

    array = ak.from_arrow(
        pyarrow.array(
            [datetime.timedelta(5), datetime.timedelta(10)],
            type=pyarrow.duration("ns"),
        )
    )
    assert array.tolist() == [
        np.timedelta64(5, "D"),
        np.timedelta64(10, "D"),
    ]
示例#12
0
def test_arrow_nomask():
    array = ak.Array([1.1, 2.2, 3.3, 4.4, None])
    assert str(ak.type(ak.from_arrow(ak.to_arrow(array)))) == "5 * ?float64"
    assert str(ak.type(ak.from_arrow(ak.to_arrow(
        array[:-1])))) == "4 * ?float64"