def test_conversion(): np.random.seed(123) array1 = np.random.randn(10, 3) all_objects = { "a1": (array1, DataTypes.NumpyArray), "a2": (1 * (array1 > 0), DataTypes.NumpyArray), "a3": (array1[:, 1], DataTypes.NumpyArray), "df1": (pd.DataFrame(array1, columns=["A", "B", "C"]), DataTypes.DataFrame), "df2": (pd.DataFrame(1 * (array1 > 0), columns=["a", "b", "c"]), DataTypes.DataFrame), "s1": (sparse.csr_matrix(array1), DataTypes.SparseArray), "s2": (sparse.csr_matrix(1 * (array1 > 0)), DataTypes.SparseArray), # "dfs1":(pd.SparseDataFrame(sparse.csr_matrix(array1),columns=["A","B","C"]) , data_type.SparseDataFrame) # "dfs2":(pd.SparseDataFrame(sparse.csr_matrix(1*(array1 > 0)),columns=["a","b","c"]), data_type.SparseDataFrame) } for name, (obj, expected_type) in all_objects.items(): assert get_type(obj) == expected_type converted = convert_to_dataframe(obj) assert get_type(converted) == DataTypes.DataFrame converted = convert_to_array(obj) assert get_type(converted) == DataTypes.NumpyArray converted = convert_to_sparsearray(obj) assert get_type(converted) == DataTypes.SparseArray # converted = convert_to_sparsedataframe(obj) # assert get_type(converted) == DataTypes.SparseDataFrame assert np.array_equal(convert_to_array(all_objects["df1"][0]), all_objects["a1"][0]) assert np.array_equal(convert_to_array(all_objects["s1"][0]), all_objects["a1"][0])
def test_conversion(): np.random.seed(123) array1 = np.random.randn(10, 3) all_objects = { "a1": (array1, DataTypes.NumpyArray), "a2": (1 * (array1 > 0), DataTypes.NumpyArray), "a3": (array1[:, 1], DataTypes.NumpyArray), "df1": (pd.DataFrame(array1, columns=["A", "B", "C"]), DataTypes.DataFrame), "df2": (pd.DataFrame(1 * (array1 > 0), columns=["a", "b", "c"]), DataTypes.DataFrame), "s1": (sparse.csr_matrix(array1), DataTypes.SparseArray), "s2": (sparse.csr_matrix(1 * (array1 > 0)), DataTypes.SparseArray), # "dfs1":(pd.SparseDataFrame(sparse.csr_matrix(array1),columns=["A","B","C"]) , data_type.SparseDataFrame) # "dfs2":(pd.SparseDataFrame(sparse.csr_matrix(1*(array1 > 0)),columns=["a","b","c"]), data_type.SparseDataFrame) } if _IS_PD1: df1_cat = all_objects["df1"][0].copy() df1_cat["A"] = df1_cat["A"].astype("category") all_objects["df1_cat"] = (df1_cat, DataTypes.DataFrame) for name, (obj, expected_type) in all_objects.items(): assert get_type(obj) == expected_type converted = convert_to_dataframe(obj) assert get_type(converted) == DataTypes.DataFrame converted = convert_to_array(obj) assert get_type(converted) == DataTypes.NumpyArray assert converted.dtype.kind in ("i", "f") converted = convert_to_sparsearray(obj) assert get_type(converted) == DataTypes.SparseArray # converted = convert_to_sparsedataframe(obj) # assert get_type(converted) == DataTypes.SparseDataFrame assert np.array_equal(convert_to_array(all_objects["df1"][0]), all_objects["a1"][0]) assert np.array_equal(convert_to_array(all_objects["s1"][0]), all_objects["a1"][0])
def test_generic_hstack_sparse_and_category(with_cat, force_sparse): df = pd.DataFrame({"a":10+np.arange(10),"b":np.random.randn(10)}) if with_cat: df["a"] = df["a"].astype("category") xx = convert_to_sparsearray(np.random.randint(0,1, size=(10,2))) concat = generic_hstack((df,xx), max_number_of_cells_for_non_sparse = 10 + (1-force_sparse) * 1000000) assert concat.shape == (df.shape[0] , df.shape[1] + xx.shape[1]) if force_sparse: assert get_type(concat) == DataTypes.SparseArray elif with_cat: assert concat.dtypes["a"] == "category" assert isinstance(concat, pd.DataFrame)