Пример #1
0
        def inner(cat):
            # Test #1: save and load Categorical
            fn = str(tmpdir.join(name(cat)))

            save_sds(fn, cat)
            cat2 = load_sds(fn)

            assert_save_load(cat2, cat)
            assert cat == cat2

            # Test #2: save and load Categorical from within Dataset
            ds = rt.Dataset({name(cat): cat})

            ds.save(fn)
            ds2 = rt.Dataset.load(fn)

            assert_save_load(ds2, ds)
            assert ds[name(cat)] == ds2[name(cat)]
Пример #2
0
def column_arrays(draw) -> List[Union[np.ndarray, rt.FastArray]]:
    """Returns a list of numpy ndarray and riptide FastArray wrapped in columns for DataFrame strategies."""
    # todo add strategy to generate FastArray to the return list
    arr = draw(
        generate_array(
            shape=ndarray_shape_strategy(),
            dtype=ints_or_floats_dtypes(),
            include_invalid=False,
        ))
    # f_arr = rt.FastArray(arr)
    return [pdst.column(name(arr), elements=arr)]
Пример #3
0
    def test_save_load_array(self, arr, tmpdir):
        # Test #1: save and load of ndarray
        fn = str(tmpdir.join(name(arr)))
        save_sds(fn, arr)
        arr2 = load_sds(fn)

        assert_save_load(arr2, arr)
        assert_array_equal_(arr2, arr)

        # Test #2: save and load of FastArray derived from ndarray
        f_arr = rt.FA(arr)
        save_sds(fn, f_arr)
        f_arr2 = load_sds(fn)

        assert_array_equal_(f_arr2, f_arr)
Пример #4
0
    def test_save_load_dataset_array(self, arr, tmpdir):
        # Test #1: save and load of ndarray within Dataset
        fn = str(tmpdir.join(name(arr)))

        ds = rt.Dataset({name(arr): arr})

        ds.save(fn)
        ds2 = rt.Dataset.load(fn)

        assert_save_load(ds2, ds)
        assert_array_equal_(ds2[name(arr)], ds[name(arr)])

        # Test #2: save and load of FastArray derived from ndarray within Dataset
        f_arr = rt.FA(arr)
        fn = str(tmpdir.join(name(f_arr)))

        ds = rt.Dataset({name(f_arr): f_arr})
        ds.save(fn)
        ds2 = rt.Dataset.load(fn)

        assert_save_load(ds2, ds)
        assert_array_equal_(ds[name(f_arr)], ds2[name(f_arr)])
Пример #5
0
    def test_stack_save_load(self, dataframe, stack_count, tmpdir, stack):
        def assert_stack_equal(pds, ds, num_stack=1):
            assert id(pds) != id(
                ds
            ), f"Identity of saved {name(ds)} should be different from the loaded {name(ds)}."
            assert isinstance(pds, rt.PDataset), f"got type {type(pds)}"
            assert pds.shape == (
                num_stack * ds.shape[0],
                ds.shape[1],
            ), f"Shapes should be the same.\n{name(ds)}\n{repr(ds)}\n{name(pds)}\n{pds}"
            # TODO consider stacking
            # for f_arr1, f_arr2 in zip(pds.values(), ds.values()):
            #     assert_array_equal_(f_arr2._np, f_arr1._np)

        fn = str(tmpdir.join(name(dataframe)))

        ds = rt.Dataset(dataframe)
        save_sds(fn, ds)

        for i in range(stack_count):
            # expectations for empty input
            if i == 0:
                if stack:
                    with pytest.raises(ValueError):
                        _ = load_sds([fn] * i, stack=stack)
                else:
                    pds = load_sds([fn] * i, stack=stack)
                    assert isinstance(pds, type(None)), f"got type {type(pds)}"
                continue

            # expectations for n+1 input where n is a positive nonzero integer
            pds = load_sds([fn] * i, stack=stack)

            if stack:
                assert_stack_equal(pds, ds, num_stack=i)
            else:
                # handle expectations for non-stacked load
                assert isinstance(pds, list), f"got type {type(pds)}"
Пример #6
0
    def test_save_load_datasets(self, dataframe, tmpdir):
        # generate a dataframe of all the dtypes
        # all array types
        # copy itself and create nested datasets and sibling datasets

        # Test #1: save and load of DataFrame
        fn = str(tmpdir.join(name(dataframe)))
        # save_sds(fn, dataframe)
        # dataframe2 = load_sds(fn)
        # assert dataframe2 == dataframe
        # E TypeError: save_sds() can only save Structs, Datasets, or single arrays. Got <class 'pandas.core.frame.DataFrame'>
        # ..\rt_sds.py:470: TypeError

        # Test #2: save and load of Dataset created from DataFrame
        dataset = rt.Dataset(dataframe)

        save_sds(fn, dataset)
        dataset2 = load_sds(fn)

        assert_save_load(dataset2, dataset)
        for f_arr1, f_arr2 in zip(dataset.values(), dataset2.values()):
            assert_array_equal_(f_arr2._np, f_arr1._np)

        # Test #3: save and load nested Dataset within a Multiset
        # This also tests that shallow and deep copies that are saved and loaded from SDS
        # are both unique objects with the same size footprint.
        multiset = rt.Multiset()
        shallow_copy_name, deep_copy_name = "dataset_shallow_copy", "dataset_deep_copy"
        dataset_shallow_copy, dataset_deep_copy = (
            dataset.copy(deep=False),
            dataset.copy(deep=True),
        )
        multiset[shallow_copy_name], multiset[deep_copy_name] = (
            dataset_shallow_copy,
            dataset_deep_copy,
        )

        fn = str(tmpdir.join(name(multiset)))
        save_sds(fn, multiset)
        multiset2 = load_sds(fn)

        assert_save_load(multiset2, multiset)
        # Shallow copy assertions
        assert id(multiset[shallow_copy_name]) != id(
            multiset2[shallow_copy_name]
        ), f"Identity of saved object should be different from the loaded object."
        for f_arr1, f_arr2 in zip(multiset[shallow_copy_name].values(),
                                  multiset2[shallow_copy_name].values()):
            # Convert these to ndarrays so we don't need to consider Riptable invalid checks.
            # This test is concerned with ensuring the same data is loaded as saved.
            assert_save_load(f_arr2, f_arr1)
            assert_array_equal_(f_arr2._np, f_arr2._np)

        # Deep copy assertions
        assert id(multiset[deep_copy_name]) != id(
            multiset2[deep_copy_name]
        ), f"Identity of saved object should be different from the loaded object."
        for f_arr1, f_arr2 in zip(multiset[deep_copy_name].values(),
                                  multiset2[deep_copy_name].values()):
            assert_save_load(f_arr2, f_arr1)
            assert_array_equal_(f_arr2._np, f_arr2._np)