def test_check_array_pandas_dtype_object_conversion(): # test that data-frame like objects with dtype object # get converted X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=np.object) X_df = MockDataFrame(X) assert_equal(check_array(X_df).dtype.kind, "f") assert_equal(check_array(X_df, ensure_2d=False).dtype.kind, "f") # smoke-test against dataframes with column named "dtype" X_df.dtype = "Hans" assert_equal(check_array(X_df, ensure_2d=False).dtype.kind, "f")
def test_safe_indexing_mock_pandas(): X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) X_df = MockDataFrame(X) inds = np.array([1, 2]) X_df_indexed = safe_indexing(X_df, inds) X_indexed = safe_indexing(X_df, inds) assert_array_equal(np.array(X_df_indexed), X_indexed)
def test_check_array_on_mock_dataframe(): arr = np.array([[0.2, 0.7], [0.6, 0.5], [0.4, 0.1], [0.7, 0.2]]) mock_df = MockDataFrame(arr) checked_arr = check_array(mock_df) assert checked_arr.dtype == arr.dtype checked_arr = check_array(mock_df, dtype=np.float32) assert checked_arr.dtype == np.dtype(np.float32)
def train_test_split_mock_pandas(): # X mock dataframe X_df = MockDataFrame(X) X_train, X_test = train_test_split(X_df) assert_true(isinstance(X_train, MockDataFrame)) assert_true(isinstance(X_test, MockDataFrame)) X_train_arr, X_test_arr = train_test_split(X_df)
def test_clone_pandas_dataframe(): class DummyEstimator(BaseEstimator, TransformerMixin): """This is a dummy class for generating numerical features This feature extractor extracts numerical features from pandas data frame. Parameters ---------- df: pandas data frame The pandas data frame parameter. Notes ----- """ def __init__(self, df=None, scalar_param=1): self.df = df self.scalar_param = scalar_param def fit(self, X, y=None): pass def transform(self, X): pass # build and clone estimator d = np.arange(10) df = MockDataFrame(d) e = DummyEstimator(df, scalar_param=1) cloned_e = clone(e) # the test assert_true((e.df == cloned_e.df).values.all()) assert_equal(e.scalar_param, cloned_e.scalar_param)
def train_test_split_mock_pandas(): # X mock dataframe X_df = MockDataFrame(X) X_train, X_test = cval.train_test_split(X_df) assert_true(isinstance(X_train, MockDataFrame)) assert_true(isinstance(X_test, MockDataFrame)) X_train_arr, X_test_arr = cval.train_test_split(X_df, allow_lists=False) assert_true(isinstance(X_train_arr, np.ndarray)) assert_true(isinstance(X_test_arr, np.ndarray))
def train_test_split_mock_pandas(): # X mock dataframe X_df = MockDataFrame(X) X_train, X_test = cval.train_test_split(X_df) assert_true(isinstance(X_train, MockDataFrame)) assert_true(isinstance(X_test, MockDataFrame)) X_train_array, X_test_array = cval.train_test_split(X_df, force_arrays=True) assert_true(isinstance(X_train_array, np.ndarray)) assert_true(isinstance(X_test_array, np.ndarray))
def train_test_split_mock_pandas(): # X mock dataframe X_df = MockDataFrame(X) X_train, X_test = cval.train_test_split(X_df) assert_true(isinstance(X_train, MockDataFrame)) assert_true(isinstance(X_test, MockDataFrame)) with warnings.catch_warnings(record=True): # deprecated X_train_arr, X_test_arr = cval.train_test_split(X_df, allow_lists=False) assert_true(isinstance(X_train_arr, np.ndarray)) assert_true(isinstance(X_test_arr, np.ndarray))
def test_check_array_complex_data_error(): X = np.array([[1 + 2j, 3 + 4j, 5 + 7j], [2 + 3j, 4 + 5j, 6 + 7j]]) assert_raises_regex(ValueError, "Complex data not supported", check_array, X) # list of lists X = [[1 + 2j, 3 + 4j, 5 + 7j], [2 + 3j, 4 + 5j, 6 + 7j]] assert_raises_regex(ValueError, "Complex data not supported", check_array, X) # tuple of tuples X = ((1 + 2j, 3 + 4j, 5 + 7j), (2 + 3j, 4 + 5j, 6 + 7j)) assert_raises_regex(ValueError, "Complex data not supported", check_array, X) # list of np arrays X = [ np.array([1 + 2j, 3 + 4j, 5 + 7j]), np.array([2 + 3j, 4 + 5j, 6 + 7j]) ] assert_raises_regex(ValueError, "Complex data not supported", check_array, X) # tuple of np arrays X = (np.array([1 + 2j, 3 + 4j, 5 + 7j]), np.array([2 + 3j, 4 + 5j, 6 + 7j])) assert_raises_regex(ValueError, "Complex data not supported", check_array, X) # dataframe X = MockDataFrame( np.array([[1 + 2j, 3 + 4j, 5 + 7j], [2 + 3j, 4 + 5j, 6 + 7j]])) assert_raises_regex(ValueError, "Complex data not supported", check_array, X) # sparse matrix X = sp.coo_matrix([[0, 1 + 2j], [0, 0]]) assert_raises_regex(ValueError, "Complex data not supported", check_array, X)
def test_shuffle_dont_convert_to_array(): # Check that shuffle does not try to convert to numpy arrays with float # dtypes can let any indexable datastructure pass-through. a = ['a', 'b', 'c'] b = np.array(['a', 'b', 'c'], dtype=object) c = [1, 2, 3] d = MockDataFrame(np.array([['a', 0], ['b', 1], ['c', 2]], dtype=object)) e = sp.csc_matrix(np.arange(6).reshape(3, 2)) a_s, b_s, c_s, d_s, e_s = shuffle(a, b, c, d, e, random_state=0) assert_equal(a_s, ['c', 'b', 'a']) assert_equal(type(a_s), list) assert_array_equal(b_s, ['c', 'b', 'a']) assert_equal(b_s.dtype, object) assert_equal(c_s, [3, 2, 1]) assert_equal(type(c_s), list) assert_array_equal(d_s, np.array([['c', 2], ['b', 1], ['a', 0]], dtype=object)) assert_equal(type(d_s), MockDataFrame) assert_array_equal(e_s.toarray(), np.array([[4, 5], [2, 3], [0, 1]]))