def test_check_array_accept_sparse_type_exception(): X = [[1, 2], [3, 4]] X_csr = sp.csr_matrix(X) msg = ("A sparse tensor was passed, but dense data is required. " "Use X.todense() to convert to a dense tensor.") assert_raise_message(TypeError, msg, check_array, X_csr, accept_sparse=False) msg = ("When providing 'accept_sparse' as a tuple or list, " "it must contain at least one string value.") assert_raise_message(ValueError, msg.format([]), check_array, X_csr, accept_sparse=[]) assert_raise_message(ValueError, msg.format(()), check_array, X_csr, accept_sparse=()) with pytest.raises(ValueError): check_array(X_csr, accept_sparse=object)
def test_check_array_pandas_dtype_object_conversion(): # test that data-frame like objects with dtype object # get converted X = mt.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=mt.object) X_df = MockDataFrame(X) assert check_array(X_df).dtype.kind == "f" assert check_array(X_df, ensure_2d=False).dtype.kind == "f" # smoke-test against dataframes with column named "dtype" X_df.dtype = "Hans" assert check_array(X_df, ensure_2d=False).dtype.kind == "f"
def test_ordering(): # Check that ordering is enforced correctly by validation utilities. # We need to check each validation utility, because a 'copy' without # 'order=K' will kill the ordering. X = mt.ones((10, 5)) for A in X, X.T: for copy in (True, False): B = check_array(A, order='C', copy=copy) assert B.flags['C_CONTIGUOUS'] is True B = check_array(A, order='F', copy=copy) assert B.flags['F_CONTIGUOUS'] is True if copy: assert A is not B
def test_check_array_accept_sparse_no_exception(): X = [[1, 2], [3, 4]] X_csr = sp.csr_matrix(X) array = check_array(X_csr, accept_sparse=True) assert isinstance(array, Tensor) assert array.issparse() is True
def test_check_array_accept_sparse_no_exception(self): X = [[1, 2], [3, 4]] X_csr = sp.csr_matrix(X) array = check_array(X_csr, accept_sparse=True) self.assertIsInstance(array, Tensor) self.assertTrue(array.issparse())
def test_check_array(setup): # accept_sparse == False # raise error on sparse inputs X = [[1, 2], [3, 4]] X_csr = sp.csr_matrix(X) with pytest.raises(TypeError): check_array(X_csr) X_csr = mt.tensor(sp.csr_matrix(X)) with pytest.raises(TypeError): check_array(X_csr) # ensure_2d=False X_array = check_array([0, 1, 2], ensure_2d=False) assert X_array.ndim == 1 # ensure_2d=True with 1d array assert_raise_message(ValueError, 'Expected 2D array, got 1D array instead', check_array, [0, 1, 2], ensure_2d=True) assert_raise_message(ValueError, 'Expected 2D array, got 1D array instead', check_array, mt.tensor([0, 1, 2]), ensure_2d=True) # ensure_2d=True with scalar array assert_raise_message(ValueError, 'Expected 2D array, got scalar array instead', check_array, 10, ensure_2d=True) # don't allow ndim > 3 X_ndim = mt.arange(8).reshape(2, 2, 2) with pytest.raises(ValueError): check_array(X_ndim) check_array(X_ndim, allow_nd=True) # doesn't raise # dtype and order enforcement. X_C = mt.arange(4).reshape(2, 2).copy("C") X_F = X_C.copy("F") X_int = X_C.astype(mt.int) X_float = X_C.astype(mt.float) Xs = [X_C, X_F, X_int, X_float] dtypes = [mt.int32, mt.int, mt.float, mt.float32, None, mt.bool, object] orders = ['C', 'F', None] copys = [True, False] for X, dtype, order, copy in product(Xs, dtypes, orders, copys): X_checked = check_array(X, dtype=dtype, order=order, copy=copy, force_all_finite=False) if dtype is not None: assert X_checked.dtype == dtype else: assert X_checked.dtype == X.dtype if order == 'C': assert X_checked.flags['C_CONTIGUOUS'] assert not X_checked.flags['F_CONTIGUOUS'] elif order == 'F': assert X_checked.flags['F_CONTIGUOUS'] assert not X_checked.flags['C_CONTIGUOUS'] if copy: assert X is not X_checked else: # doesn't copy if it was already good if (X.dtype == X_checked.dtype and X_checked.flags['C_CONTIGUOUS'] == X.flags['C_CONTIGUOUS'] and X_checked.flags['F_CONTIGUOUS'] == X.flags['F_CONTIGUOUS']): assert X is X_checked # other input formats # convert lists to arrays X_dense = check_array([[1, 2], [3, 4]]) assert isinstance(X_dense, Tensor) # raise on too deep lists with pytest.raises(ValueError): check_array(X_ndim.to_numpy().tolist()) check_array(X_ndim.to_numpy().tolist(), allow_nd=True) # doesn't raise # convert weird stuff to arrays X_no_array = NotAnArray(X_dense.to_numpy()) result = check_array(X_no_array) assert isinstance(result, Tensor) # deprecation warning if string-like array with dtype="numeric" expected_warn_regex = r"converted to decimal numbers if dtype='numeric'" X_str = [['11', '12'], ['13', 'xx']] for X in [X_str, mt.array(X_str, dtype='U'), mt.array(X_str, dtype='S')]: with pytest.warns(FutureWarning, match=expected_warn_regex): check_array(X, dtype="numeric") # deprecation warning if byte-like array with dtype="numeric" X_bytes = [[b'a', b'b'], [b'c', b'd']] for X in [X_bytes, mt.array(X_bytes, dtype='V1')]: with pytest.warns(FutureWarning, match=expected_warn_regex): check_array(X, dtype="numeric") # test finite X = [[1.0, np.nan], [2.0, 3.0]] with pytest.raises(ValueError): _ = check_array(X).execute()
def test_check_array_from_dataframe(): X = md.DataFrame({'a': [1.0, 2.0, 3.0]}) assert check_array(X).dtype.kind == 'f'
def test_check_array(self): # accept_sparse == False # raise error on sparse inputs X = [[1, 2], [3, 4]] X_csr = sp.csr_matrix(X) with self.assertRaises(TypeError): check_array(X_csr) X_csr = mt.tensor(sp.csr_matrix(X)) with self.assertRaises(TypeError): check_array(X_csr) # ensure_2d=False X_array = check_array([0, 1, 2], ensure_2d=False) self.assertEqual(X_array.ndim, 1) # ensure_2d=True with 1d array assert_raise_message(ValueError, 'Expected 2D array, got 1D array instead', check_array, [0, 1, 2], ensure_2d=True) assert_raise_message(ValueError, 'Expected 2D array, got 1D array instead', check_array, mt.tensor([0, 1, 2]), ensure_2d=True) # ensure_2d=True with scalar array assert_raise_message(ValueError, 'Expected 2D array, got scalar array instead', check_array, 10, ensure_2d=True) # don't allow ndim > 3 X_ndim = mt.arange(8).reshape(2, 2, 2) with self.assertRaises(ValueError): check_array(X_ndim) check_array(X_ndim, allow_nd=True) # doesn't raise # dtype and order enforcement. X_C = mt.arange(4).reshape(2, 2).copy("C") X_F = X_C.copy("F") X_int = X_C.astype(mt.int) X_float = X_C.astype(mt.float) Xs = [X_C, X_F, X_int, X_float] dtypes = [ mt.int32, mt.int, mt.float, mt.float32, None, mt.bool, object ] orders = ['C', 'F', None] copys = [True, False] for X, dtype, order, copy in product(Xs, dtypes, orders, copys): X_checked = check_array(X, dtype=dtype, order=order, copy=copy) if dtype is not None: self.assertEqual(X_checked.dtype, dtype) else: self.assertEqual(X_checked.dtype, X.dtype) if order == 'C': assert X_checked.flags['C_CONTIGUOUS'] assert not X_checked.flags['F_CONTIGUOUS'] elif order == 'F': assert X_checked.flags['F_CONTIGUOUS'] assert not X_checked.flags['C_CONTIGUOUS'] if copy: assert X is not X_checked else: # doesn't copy if it was already good if (X.dtype == X_checked.dtype and X_checked.flags['C_CONTIGUOUS'] == X.flags['C_CONTIGUOUS'] and X_checked.flags['F_CONTIGUOUS'] == X.flags['F_CONTIGUOUS']): assert X is X_checked # # allowed sparse != None # X_csc = sp.csc_matrix(X_C) # X_coo = X_csc.tocoo() # X_dok = X_csc.todok() # X_int = X_csc.astype(mt.int) # X_float = X_csc.astype(mt.float) # # Xs = [X_csc, X_coo, X_dok, X_int, X_float] # accept_sparses = [['csr', 'coo'], ['coo', 'dok']] # for X, dtype, accept_sparse, copy in product(Xs, dtypes, accept_sparses, # copys): # with warnings.catch_warnings(record=True) as w: # X_checked = check_array(X, dtype=dtype, # accept_sparse=accept_sparse, copy=copy) # if (dtype is object or sp.isspmatrix_dok(X)) and len(w): # message = str(w[0].message) # messages = ["object dtype is not supported by sparse matrices", # "Can't check dok sparse matrix for nan or inf."] # assert message in messages # else: # self.assertEqual(len(w), 0) # if dtype is not None: # self.assertEqual(X_checked.dtype, dtype) # else: # self.assertEqual(X_checked.dtype, X.dtype) # if X.format in accept_sparse: # # no change if allowed # self.assertEqual(X.format, X_checked.format) # else: # # got converted # self.assertEqual(X_checked.format, accept_sparse[0]) # if copy: # assert X is not X_checked # else: # # doesn't copy if it was already good # if X.dtype == X_checked.dtype and X.format == X_checked.format: # assert X is X_checked # other input formats # convert lists to arrays X_dense = check_array([[1, 2], [3, 4]]) assert isinstance(X_dense, Tensor) # raise on too deep lists with self.assertRaises(ValueError): check_array(X_ndim.execute().tolist()) check_array(X_ndim.execute().tolist(), allow_nd=True) # doesn't raise # convert weird stuff to arrays X_no_array = NotAnArray(X_dense.execute()) result = check_array(X_no_array) assert isinstance(result, Tensor) # deprecation warning if string-like array with dtype="numeric" expected_warn_regex = r"converted to decimal numbers if dtype='numeric'" X_str = [['11', '12'], ['13', 'xx']] for X in [ X_str, mt.array(X_str, dtype='U'), mt.array(X_str, dtype='S') ]: with pytest.warns(FutureWarning, match=expected_warn_regex): check_array(X, dtype="numeric") # deprecation warning if byte-like array with dtype="numeric" X_bytes = [[b'a', b'b'], [b'c', b'd']] for X in [X_bytes, mt.array(X_bytes, dtype='V1')]: with pytest.warns(FutureWarning, match=expected_warn_regex): check_array(X, dtype="numeric")
def test_check_array_from_dataframe(self): X = md.DataFrame({'a': [1.0, 2.0, 3.0]}) self.assertEqual(check_array(X).dtype.kind, 'f')