Пример #1
0
def test_check_array_pandas_dtype_object_conversion():
    # test that data-frame like objects with dtype object
    # get converted
    X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=np.object)
    X_df = MockDataFrame(X)
    assert check_array(X_df).dtype.kind == "f"
    assert check_array(X_df, ensure_2d=False).dtype.kind == "f"
    # smoke-test against dataframes with column named "dtype"
    X_df.dtype = "Hans"
    assert check_array(X_df, ensure_2d=False).dtype.kind == "f"
Пример #2
0
def test_check_array_on_mock_dataframe():
    arr = np.array([[0.2, 0.7], [0.6, 0.5], [0.4, 0.1], [0.7, 0.2]])
    mock_df = MockDataFrame(arr)
    checked_arr = check_array(mock_df)
    assert checked_arr.dtype == arr.dtype
    checked_arr = check_array(mock_df, dtype=np.float32)
    assert checked_arr.dtype == np.dtype(np.float32)
Пример #3
0
def test_clone_pandas_dataframe():
    class DummyEstimator(TransformerMixin, BaseEstimator):
        """This is a dummy class for generating numerical features

        This feature extractor extracts numerical features from pandas data
        frame.

        Parameters
        ----------

        df: pandas data frame
            The pandas data frame parameter.

        Notes
        -----
        """
        def __init__(self, df=None, scalar_param=1):
            self.df = df
            self.scalar_param = scalar_param

        def fit(self, X, y=None):
            pass

        def transform(self, X):
            pass

    # build and clone estimator
    d = np.arange(10)
    df = MockDataFrame(d)
    e = DummyEstimator(df, scalar_param=1)
    cloned_e = clone(e)

    # the test
    assert (e.df == cloned_e.df).values.all()
    assert e.scalar_param == cloned_e.scalar_param
Пример #4
0
def test_shuffle_dont_convert_to_array():
    # Check that shuffle does not try to convert to numpy arrays with float
    # dtypes can let any indexable datastructure pass-through.
    a = ['a', 'b', 'c']
    b = np.array(['a', 'b', 'c'], dtype=object)
    c = [1, 2, 3]
    d = MockDataFrame(np.array([['a', 0],
                                ['b', 1],
                                ['c', 2]],
                      dtype=object))
    e = sp.csc_matrix(np.arange(6).reshape(3, 2))
    a_s, b_s, c_s, d_s, e_s = shuffle(a, b, c, d, e, random_state=0)

    assert a_s == ['c', 'b', 'a']
    assert type(a_s) == list

    assert_array_equal(b_s, ['c', 'b', 'a'])
    assert b_s.dtype == object

    assert c_s == [3, 2, 1]
    assert type(c_s) == list

    assert_array_equal(d_s, np.array([['c', 2],
                                      ['b', 1],
                                      ['a', 0]],
                                     dtype=object))
    assert type(d_s) == MockDataFrame

    assert_array_equal(e_s.toarray(), np.array([[4, 5],
                                                [2, 3],
                                                [0, 1]]))
Пример #5
0
def test_check_array_complex_data_error():
    X = np.array([[1 + 2j, 3 + 4j, 5 + 7j], [2 + 3j, 4 + 5j, 6 + 7j]])
    with pytest.raises(ValueError, match="Complex data not supported"):
        check_array(X)

    # list of lists
    X = [[1 + 2j, 3 + 4j, 5 + 7j], [2 + 3j, 4 + 5j, 6 + 7j]]
    with pytest.raises(ValueError, match="Complex data not supported"):
        check_array(X)

    # tuple of tuples
    X = ((1 + 2j, 3 + 4j, 5 + 7j), (2 + 3j, 4 + 5j, 6 + 7j))
    with pytest.raises(ValueError, match="Complex data not supported"):
        check_array(X)

    # list of np arrays
    X = [np.array([1 + 2j, 3 + 4j, 5 + 7j]), np.array([2 + 3j, 4 + 5j, 6 + 7j])]
    with pytest.raises(ValueError, match="Complex data not supported"):
        check_array(X)

    # tuple of np arrays
    X = (np.array([1 + 2j, 3 + 4j, 5 + 7j]), np.array([2 + 3j, 4 + 5j, 6 + 7j]))
    with pytest.raises(ValueError, match="Complex data not supported"):
        check_array(X)

    # dataframe
    X = MockDataFrame(np.array([[1 + 2j, 3 + 4j, 5 + 7j], [2 + 3j, 4 + 5j, 6 + 7j]]))
    with pytest.raises(ValueError, match="Complex data not supported"):
        check_array(X)

    # sparse matrix
    X = sp.coo_matrix([[0, 1 + 2j], [0, 0]])
    with pytest.raises(ValueError, match="Complex data not supported"):
        check_array(X)

    # target variable does not always go through check_array but should
    # never accept complex data either.
    y = np.array([1 + 2j, 3 + 4j, 5 + 7j, 2 + 3j, 4 + 5j, 6 + 7j])
    with pytest.raises(ValueError, match="Complex data not supported"):
        _check_y(y)
Пример #6
0
def test_check_array_complex_data_error():
    X = np.array([[1 + 2j, 3 + 4j, 5 + 7j], [2 + 3j, 4 + 5j, 6 + 7j]])
    assert_raises_regex(ValueError, "Complex data not supported", check_array,
                        X)

    # list of lists
    X = [[1 + 2j, 3 + 4j, 5 + 7j], [2 + 3j, 4 + 5j, 6 + 7j]]
    assert_raises_regex(ValueError, "Complex data not supported", check_array,
                        X)

    # tuple of tuples
    X = ((1 + 2j, 3 + 4j, 5 + 7j), (2 + 3j, 4 + 5j, 6 + 7j))
    assert_raises_regex(ValueError, "Complex data not supported", check_array,
                        X)

    # list of np arrays
    X = [
        np.array([1 + 2j, 3 + 4j, 5 + 7j]),
        np.array([2 + 3j, 4 + 5j, 6 + 7j])
    ]
    assert_raises_regex(ValueError, "Complex data not supported", check_array,
                        X)

    # tuple of np arrays
    X = (np.array([1 + 2j, 3 + 4j, 5 + 7j]), np.array([2 + 3j, 4 + 5j,
                                                       6 + 7j]))
    assert_raises_regex(ValueError, "Complex data not supported", check_array,
                        X)

    # dataframe
    X = MockDataFrame(
        np.array([[1 + 2j, 3 + 4j, 5 + 7j], [2 + 3j, 4 + 5j, 6 + 7j]]))
    assert_raises_regex(ValueError, "Complex data not supported", check_array,
                        X)

    # sparse matrix
    X = sp.coo_matrix([[0, 1 + 2j], [0, 0]])
    assert_raises_regex(ValueError, "Complex data not supported", check_array,
                        X)
Пример #7
0
def test_check_array_complex_data_error():
    X = np.array([[1 + 2j, 3 + 4j, 5 + 7j], [2 + 3j, 4 + 5j, 6 + 7j]])
    with pytest.raises(ValueError, match="Complex area_data not supported"):
        check_array(X)

    # list of lists
    X = [[1 + 2j, 3 + 4j, 5 + 7j], [2 + 3j, 4 + 5j, 6 + 7j]]
    with pytest.raises(ValueError, match="Complex area_data not supported"):
        check_array(X)

    # tuple of tuples
    X = ((1 + 2j, 3 + 4j, 5 + 7j), (2 + 3j, 4 + 5j, 6 + 7j))
    with pytest.raises(ValueError, match="Complex area_data not supported"):
        check_array(X)

    # list of np arrays
    X = [
        np.array([1 + 2j, 3 + 4j, 5 + 7j]),
        np.array([2 + 3j, 4 + 5j, 6 + 7j])
    ]
    with pytest.raises(ValueError, match="Complex area_data not supported"):
        check_array(X)

    # tuple of np arrays
    X = (np.array([1 + 2j, 3 + 4j, 5 + 7j]), np.array([2 + 3j, 4 + 5j,
                                                       6 + 7j]))
    with pytest.raises(ValueError, match="Complex area_data not supported"):
        check_array(X)

    # dataframe
    X = MockDataFrame(
        np.array([[1 + 2j, 3 + 4j, 5 + 7j], [2 + 3j, 4 + 5j, 6 + 7j]]))
    with pytest.raises(ValueError, match="Complex area_data not supported"):
        check_array(X)

    # sparse matrix
    X = sp.coo_matrix([[0, 1 + 2j], [0, 0]])
    with pytest.raises(ValueError, match="Complex area_data not supported"):
        check_array(X)