Exemplo n.º 1
0
def test_sgd_default(dtype, datatype):

    X, y = make_blobs(n_samples=100, n_features=3, centers=2, random_state=0)
    X = X.astype(dtype)
    y = y.astype(dtype)

    # Default loss is squared_loss
    y[y == 0] = -1

    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8)

    if datatype == "dataframe":
        X_train = cudf.DataFrame(X_train)
        X_test = cudf.DataFrame(X_test)
        y_train = cudf.Series(y_train)

    cu_sgd = cumlSGD()

    cu_sgd.fit(X_train, y_train)
    cu_pred = cu_sgd.predict(X_test)

    if datatype == "dataframe":
        assert isinstance(cu_pred, cudf.Series)
        cu_pred = cu_pred.to_numpy()

    else:
        assert isinstance(cu_pred, np.ndarray)

    # Adjust for squared loss (we don't need to test for high accuracy,
    # just that the loss function tended towards the expected classes.
    cu_pred[cu_pred < 0] = -1
    cu_pred[cu_pred >= 0] = 1

    assert np.array_equal(cu_pred, y_test)
Exemplo n.º 2
0
def test_svd_default(datatype):

    X_train = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]], dtype=datatype)
    y_train = np.array([1, 1, 2, 2], dtype=datatype)
    X_test = np.array([[3.0, 5.0], [2.0, 5.0]]).astype(datatype)

    cu_sgd = cumlSGD()

    cu_sgd.fit(X_train, y_train)
    cu_pred = cu_sgd.predict(X_test).to_array()
    print("cuML predictions : ", cu_pred)
Exemplo n.º 3
0
def test_svd(datatype, lrate, input_type, penalty, loss, name):

    if name == 'blobs':
        n_samples = 500000
        train_rows = int(n_samples * 0.8)
        X, y = make_blobs(n_samples=n_samples, n_features=1000, random_state=0)
        X_test = np.array(X[train_rows:, 0:], dtype=datatype)
        X_train = np.array(X[0:train_rows, :], dtype=datatype)
        y_train = np.array(y[0:train_rows, ], dtype=datatype)

    elif name == 'iris':
        iris = datasets.load_iris()
        X = iris.data
        y = iris.target
        train_rows = int((np.shape(X)[0]) * 0.8)
        X_test = np.array(X[train_rows:, 0:], dtype=datatype)
        X_train = np.array(X[0:train_rows, :], dtype=datatype)
        y_train = np.array(y[0:train_rows, ], dtype=datatype)

    else:
        X_train = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]],
                           dtype=datatype)
        y_train = np.array([1, 1, 2, 2], dtype=datatype)
        X_test = np.array([[3.0, 5.0], [2.0, 5.0]]).astype(datatype)

    cu_sgd = cumlSGD(learning_rate=lrate,
                     eta0=0.005,
                     epochs=2000,
                     fit_intercept=True,
                     batch_size=4096,
                     tol=0.0,
                     penalty=penalty,
                     loss=loss)

    if input_type == 'dataframe':
        y_train_pd = pd.DataFrame({'fea0': y_train[0:, ]})
        X_train_pd = pd.DataFrame(
            {'fea%d' % i: X_train[0:, i]
             for i in range(X_train.shape[1])})
        X_test_pd = pd.DataFrame(
            {'fea%d' % i: X_test[0:, i]
             for i in range(X_test.shape[1])})
        X_train = cudf.DataFrame.from_pandas(X_train_pd)
        X_test = cudf.DataFrame.from_pandas(X_test_pd)
        y_train = y_train_pd.values
        y_train = y_train[:, 0]
        y_train = cudf.Series(y_train)

    cu_sgd.fit(X_train, y_train)
    cu_pred = cu_sgd.predict(X_test).to_array()
    print("cuML predictions : ", cu_pred)
Exemplo n.º 4
0
def test_sgd(dtype, lrate, penalty, loss, datatype):

    X, y = make_blobs(n_samples=100, n_features=3, centers=2, random_state=0)
    X = X.astype(dtype)
    y = y.astype(dtype)

    if loss == "hinge" or loss == "squared_loss":
        y[y == 0] = -1

    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8)

    if datatype == "dataframe":
        X_train = cudf.DataFrame(X_train)
        X_test = cudf.DataFrame(X_test)
        y_train = cudf.Series(y_train)

    cu_sgd = cumlSGD(learning_rate=lrate,
                     eta0=0.005,
                     epochs=2000,
                     fit_intercept=True,
                     batch_size=4096,
                     tol=0.0,
                     penalty=penalty,
                     loss=loss,
                     power_t=0.4)

    cu_sgd.fit(X_train, y_train)
    cu_pred = cu_sgd.predict(X_test)

    if datatype == "dataframe":
        assert isinstance(cu_pred, cudf.Series)
        cu_pred = cu_pred.to_numpy()

    else:
        assert isinstance(cu_pred, np.ndarray)

    if loss == "log":
        cu_pred[cu_pred < 0.5] = 0
        cu_pred[cu_pred >= 0.5] = 1
    elif loss == "squared_loss":
        cu_pred[cu_pred < 0] = -1
        cu_pred[cu_pred >= 0] = 1

    # Adjust for squared loss (we don't need to test for high accuracy,
    # just that the loss function tended towards the expected classes.
    assert np.array_equal(cu_pred, y_test)
Exemplo n.º 5
0
def test_svd(datatype, lrate, input_type, penalty, loss):
    X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]], dtype=datatype)
    y = np.array([1, 1, 2, 2], dtype=datatype)
    pred_data = np.array([[3.0, 5.0], [2.0, 5.0]]).astype(datatype)
    if input_type == 'dataframe':
        X = cudf.DataFrame()
        X['col1'] = np.asarray([-1, -2, 1, 2], dtype=datatype)
        X['col2'] = np.asarray([-1, -1, 2, 2], dtype=datatype)
        y = cudf.Series(np.array(y, dtype=np.float32))
        pred_data = cudf.DataFrame()
        pred_data['col1'] = np.asarray([3, 2], dtype=datatype)
        pred_data['col2'] = np.asarray([5, 5], dtype=datatype)
    cu_sgd = cumlSGD(learning_rate=lrate, eta0=0.005, epochs=2000,
                     fit_intercept=True, batch_size=2,
                     tol=0.0, penalty=penalty, loss=loss)
    cu_sgd.fit(X, y)
    cu_pred = cu_sgd.predict(pred_data).to_array()
    print("cuML predictions : ", cu_pred)
Exemplo n.º 6
0
def test_svd(datatype, lrate, penalty, loss, name):

    if name == 'blobs':
        X, y = make_blobs(n_samples=500000, n_features=1000, random_state=0)
        X = X.astype(datatype)
        y = y.astype(datatype)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            train_size=0.8)

    elif name == 'iris':
        iris = datasets.load_iris()
        X = (iris.data).astype(datatype)
        y = (iris.target).astype(datatype)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            train_size=0.8)

    else:
        X_train = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]],
                           dtype=datatype)
        y_train = np.array([1, 1, 2, 2], dtype=datatype)
        X_test = np.array([[3.0, 5.0], [2.0, 5.0]]).astype(datatype)

    cu_sgd = cumlSGD(learning_rate=lrate,
                     eta0=0.005,
                     epochs=2000,
                     fit_intercept=True,
                     batch_size=4096,
                     tol=0.0,
                     penalty=penalty,
                     loss=loss)

    cu_sgd.fit(X_train, y_train)
    cu_pred = cu_sgd.predict(X_test).to_array()
    print("cuML predictions : ", cu_pred)