def test_cross_val_score_with_score_func_regression():
    X, y = make_regression(n_samples=30, n_features=20, n_informative=5,
                           random_state=0)
    reg = Ridge()

    # Default score of the Ridge regression estimator
    scores = cval.cross_val_score(reg, X, y, cv=5)
    assert_array_almost_equal(scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)

    # R2 score (aka. determination coefficient) - should be the
    # same as the default estimator score
    r2_scores = cval.cross_val_score(reg, X, y, scoring="r2", cv=5)
    assert_array_almost_equal(r2_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)

    # Mean squared error; this is a loss function, so "scores" are negative
    mse_scores = cval.cross_val_score(reg, X, y, cv=5,
                                      scoring="mean_squared_error")
    expected_mse = np.array([-763.07, -553.16, -274.38, -273.26, -1681.99])
    assert_array_almost_equal(mse_scores, expected_mse, 2)

    # Explained variance
    with warnings.catch_warnings(record=True):
        ev_scores = cval.cross_val_score(reg, X, y, cv=5,
                                         score_func=explained_variance_score)
    assert_array_almost_equal(ev_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)
示例#2
0
    def test_multi_predict(self):
        from sklearn.datasets import make_regression
        from sklearn.model_selection import train_test_split

        n = 1000
        X, y = make_regression(n, random_state=rng)
        X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                            random_state=123)
        dtrain = xgb.DMatrix(X_train, label=y_train)
        dtest = xgb.DMatrix(X_test)

        params = {}
        params["tree_method"] = "gpu_hist"

        params['predictor'] = "gpu_predictor"
        bst_gpu_predict = xgb.train(params, dtrain)

        params['predictor'] = "cpu_predictor"
        bst_cpu_predict = xgb.train(params, dtrain)

        predict0 = bst_gpu_predict.predict(dtest)
        predict1 = bst_gpu_predict.predict(dtest)
        cpu_predict = bst_cpu_predict.predict(dtest)

        assert np.allclose(predict0, predict1)
        assert np.allclose(predict0, cpu_predict)
def test_make_regression():
    X, y, c = make_regression(n_samples=100, n_features=10, n_informative=3,
                              effective_rank=5, coef=True, bias=0.0,
                              noise=1.0, random_state=0)

    assert_equal(X.shape, (100, 10), "X shape mismatch")
    assert_equal(y.shape, (100,), "y shape mismatch")
    assert_equal(c.shape, (10,), "coef shape mismatch")
    assert_equal(sum(c != 0.0), 3, "Unexpected number of informative features")

    # Test that y ~= np.dot(X, c) + bias + N(0, 1.0).
    assert_almost_equal(np.std(y - np.dot(X, c)), 1.0, decimal=1)

    # Test with small number of features.
    X, y = make_regression(n_samples=100, n_features=1)  # n_informative=3
    assert_equal(X.shape, (100, 1))
 def testParallelPen(self): 
     #Check if penalisation == inf when treeSize < gamma 
     numExamples = 100
     X, y = data.make_regression(numExamples) 
     learner = DecisionTreeLearner(pruneType="CART", maxDepth=10, minSplit=2)
     
     paramDict = {} 
     paramDict["setGamma"] = numpy.array(numpy.round(2**numpy.arange(1, 10, 0.5)-1), dtype=numpy.int)
     
     folds = 3
     alpha = 1.0
     Cvs = numpy.array([(folds-1)*alpha])
     
     idx = Sampling.crossValidation(folds, X.shape[0])
     
     resultsList = learner.parallelPen(X, y, idx, paramDict, Cvs)
     
     learner, trainErrors, currentPenalties = resultsList[0]
     
     learner.setGamma(2**10)
     treeSize = 0
     #Let's work out the size of the unpruned tree 
     for trainInds, testInds in idx: 
         trainX = X[trainInds, :]
         trainY = y[trainInds]
         
         learner.learnModel(trainX, trainY)
         treeSize += learner.tree.size 
     
     treeSize /= float(folds)         
     
     self.assertTrue(numpy.isinf(currentPenalties[paramDict["setGamma"]>treeSize]).all())      
     self.assertTrue(not numpy.isinf(currentPenalties[paramDict["setGamma"]<treeSize]).all())
def test_partial_dependence_helpers(est, method, target_feature):
    # Check that what is returned by _partial_dependence_brute or
    # _partial_dependence_recursion is equivalent to manually setting a target
    # feature to a given value, and computing the average prediction over all
    # samples.
    # This also checks that the brute and recursion methods give the same
    # output.

    X, y = make_regression(random_state=0)
    # The 'init' estimator for GBDT (here the average prediction) isn't taken
    # into account with the recursion method, for technical reasons. We set
    # the mean to 0 to that this 'bug' doesn't have any effect.
    y = y - y.mean()
    est.fit(X, y)

    # target feature will be set to .5 and then to 123
    features = np.array([target_feature], dtype=np.int32)
    grid = np.array([[.5],
                     [123]])

    if method == 'brute':
        pdp = _partial_dependence_brute(est, grid, features, X,
                                        response_method='auto')
    else:
        pdp = _partial_dependence_recursion(est, grid, features)

    mean_predictions = []
    for val in (.5, 123):
        X_ = X.copy()
        X_[:, target_feature] = val
        mean_predictions.append(est.predict(X_).mean())

    pdp = pdp[0]  # (shape is (1, 2) so make it (2,))
    assert_allclose(pdp, mean_predictions, atol=1e-3)
示例#6
0
 def __init__(self, n_samples, n_features, n_informative, normalize_y = False, normalize = True, centerdata = True,
              transformation=NullTransformation(), fit_intercept = True):
     self.n_samples = n_samples
     self.n_features = n_features
     X, Y = datasets.make_regression(n_samples=self.n_samples, n_features=self.n_features,
                                               n_informative=n_informative, shuffle=False, random_state=11)
     XTrain, XTest, YTrain, YTest = train_test_split(X, Y, test_size=0.33,random_state=0)
     self.XTrain_orig = XTrain
     self.XTest_orig = XTest
     self.YTrain_orig = YTrain
     self.YTest_orig = YTest
     if centerdata==True:
         self.XTrain, YTrain, X_mean, y_mean, X_std = center_data(XTrain, YTrain, fit_intercept=fit_intercept, normalize = normalize)
         self.XTest, YTest = self.center_test(XTest,YTest,X_mean,y_mean,X_std)
         if normalize_y:
             self.YTrain, self.YTest = self.normalize_labels(YTrain, YTest)
         else:
             self.YTrain = YTrain
             self.YTest = YTest
     else:
         self.XTrain = XTrain
         self.YTrain = YTrain
         self.XTest = XTest
         self.YTest = YTest
     self.transformation = transformation
def test_regression():

    X, y = make_regression(n_samples=1000,
                           n_features=5,
                           n_informative=2,
                           n_targets=1,
                           random_state=123,
                           shuffle=False)

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=123)

    svm = SVR(kernel='rbf')
    svm.fit(X_train, y_train)

    imp_vals, imp_all = feature_importance_permutation(
        predict_method=svm.predict,
        X=X_test,
        y=y_test,
        metric='r2',
        num_rounds=1,
        seed=123)

    assert imp_vals.shape == (X_train.shape[1], )
    assert imp_all.shape == (X_train.shape[1], 1)
    assert imp_vals[0] > 0.2
    assert imp_vals[1] > 0.2
    assert sum(imp_vals[3:]) <= 0.01
示例#8
0
def test_regression_custom_mse():

    X, y = make_regression(n_samples=1000,
                           n_features=5,
                           n_informative=2,
                           n_targets=1,
                           random_state=123,
                           shuffle=False)

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.3, random_state=123)

    svm = SVR(kernel='rbf', gamma='auto')
    svm.fit(X_train, y_train)

    imp_vals, imp_all = feature_importance_permutation(
        predict_method=svm.predict,
        X=X_test,
        y=y_test,
        metric=mean_squared_error,
        num_rounds=1,
        seed=123)

    norm_imp_vals = imp_vals / np.abs(imp_vals).max()

    assert imp_vals.shape == (X_train.shape[1], )
    assert imp_all.shape == (X_train.shape[1], 1)
    assert norm_imp_vals[0] == -1.
 def testRecursiveSetPrune(self): 
     numExamples = 1000
     X, y = data.make_regression(numExamples)  
     
     y = Standardiser().normaliseArray(y)
     
     numTrain = numpy.round(numExamples * 0.66)     
     
     trainX = X[0:numTrain, :]
     trainY = y[0:numTrain]
     testX = X[numTrain:, :]
     testY = y[numTrain:]
     
     learner = DecisionTreeLearner()
     learner.learnModel(trainX, trainY)
     
     rootId = (0,)
     learner.tree.getVertex(rootId).setTestInds(numpy.arange(testX.shape[0]))
     learner.recursiveSetPrune(testX, testY, rootId)
     
     for vertexId in learner.tree.getAllVertexIds(): 
         tempY = testY[learner.tree.getVertex(vertexId).getTestInds()]
         predY = numpy.ones(tempY.shape[0])*learner.tree.getVertex(vertexId).getValue()
         error = numpy.sum((tempY-predY)**2)
         self.assertAlmostEquals(error, learner.tree.getVertex(vertexId).getTestError())
         
     #Check leaf indices form all indices 
     inds = numpy.array([])        
     
     for vertexId in learner.tree.leaves(): 
         inds = numpy.union1d(inds, learner.tree.getVertex(vertexId).getTestInds())
         
     nptst.assert_array_equal(inds, numpy.arange(testY.shape[0]))
示例#10
0
def regression_data():
    X, y = make_regression(
        1000, 20, n_informative=10, bias=0, random_state=0)
    X, y = X.astype(np.float32), y.astype(np.float32).reshape(-1, 1)
    Xt = StandardScaler().fit_transform(X)
    yt = StandardScaler().fit_transform(y)
    return Xt, yt
示例#11
0
def test_shuffle():
    # Test that the shuffle parameter affects the training process (it should)
    X, y = make_regression(n_samples=50, n_features=5, n_targets=1,
                           random_state=0)

    # The coefficients will be identical if both do or do not shuffle
    for shuffle in [True, False]:
        mlp1 = MLPRegressor(hidden_layer_sizes=1, max_iter=1, batch_size=1,
                            random_state=0, shuffle=shuffle)
        mlp2 = MLPRegressor(hidden_layer_sizes=1, max_iter=1, batch_size=1,
                            random_state=0, shuffle=shuffle)
        mlp1.fit(X, y)
        mlp2.fit(X, y)

        assert np.array_equal(mlp1.coefs_[0], mlp2.coefs_[0])

    # The coefficients will be slightly different if shuffle=True
    mlp1 = MLPRegressor(hidden_layer_sizes=1, max_iter=1, batch_size=1,
                        random_state=0, shuffle=True)
    mlp2 = MLPRegressor(hidden_layer_sizes=1, max_iter=1, batch_size=1,
                        random_state=0, shuffle=False)
    mlp1.fit(X, y)
    mlp2.fit(X, y)

    assert not np.array_equal(mlp1.coefs_[0], mlp2.coefs_[0])
def test_check_gcv_mode_error(mode):
    X, y = make_regression(n_samples=5, n_features=2)
    gcv = RidgeCV(gcv_mode=mode)
    with pytest.raises(ValueError, match="Unknown value for 'gcv_mode'"):
        gcv.fit(X, y)
    with pytest.raises(ValueError, match="Unknown value for 'gcv_mode'"):
        _check_gcv_mode(X, mode)
def test_check_gcv_mode_choice(sparse, mode, mode_n_greater_than_p,
                               mode_p_greater_than_n):
    X, _ = make_regression(n_samples=5, n_features=2)
    if sparse:
        X = sp.csr_matrix(X)
    assert _check_gcv_mode(X, mode) == mode_n_greater_than_p
    assert _check_gcv_mode(X.T, mode) == mode_p_greater_than_n
示例#14
0
def test_multioutput_regression():
    # Test that multi-output regression works as expected
    X, y = make_regression(n_samples=200, n_targets=5)
    mlp = MLPRegressor(solver='lbfgs', hidden_layer_sizes=50, max_iter=200,
                       random_state=1)
    mlp.fit(X, y)
    assert_greater(mlp.score(X, y), 0.9)
示例#15
0
    def test_levenberg_marquardt(self):
        dataset = datasets.make_regression(n_samples=50, n_features=2)
        data, target = dataset

        data_scaler = preprocessing.MinMaxScaler()
        target_scaler = preprocessing.MinMaxScaler()

        x_train, x_test, y_train, y_test = train_test_split(
            data_scaler.fit_transform(data),
            target_scaler.fit_transform(target.reshape(-1, 1)),
            train_size=0.85
        )

        lmnet = algorithms.LevenbergMarquardt(
            connection=[
                layers.Input(2),
                layers.Sigmoid(6),
                layers.Sigmoid(1),
            ],
            mu_update_factor=2,
            mu=0.1,
            verbose=False,
            show_epoch=1,
        )
        lmnet.train(x_train, y_train, epochs=4)
        error = lmnet.prediction_error(x_test, y_test)

        self.assertAlmostEqual(0.006, error, places=3)
示例#16
0
def regr_data():
    return make_regression(
        n_samples=2000,
        n_targets=1,
        n_informative=10,
        random_state=0,
    )
示例#17
0
def svm_example(n_samples = 10000, n_features = 100):
	from sklearn.svm import SVR
	from sklearn.datasets import make_regression

	X,Y = make_regression(n_samples, n_features)
	m = SVR()

	m.fit(X,Y)
示例#18
0
    def test_with_pandas_df(self):
        x, y = make_regression(random_state=561)
        df = pd.DataFrame(x)
        df['y'] = y

        m = ElasticNet(n_splits=3, random_state=123)
        m = m.fit(df.drop(['y'], axis=1), df.y)
        sanity_check_regression(m, x)
 def test_fit_continuous(self):
     """
     Should not allow any target type other than binary or multiclass
     """
     X, y = make_regression()
     with pytest.raises(YellowbrickValueError, match="does not support target type"):
         oz = PrecisionRecallCurve(LinearSVC())
         oz.fit(X, y)
def get_weights_regression(min_weight, max_weight):
    rng = np.random.RandomState(199)
    n = 10000
    sparsity = 0.25
    X, y = datasets.make_regression(n, random_state=rng)
    X = np.array([[np.nan if rng.uniform(0, 1) < sparsity else x for x in x_row] for x_row in X])
    w = np.array([rng.uniform(min_weight, max_weight) for i in range(n)])
    return X, y, w
示例#21
0
def test_multi_target_regression_one_target():
    # Test multi target regression raises
    X, y = datasets.make_regression(n_targets=1)
    X_train, y_train = X[:50], y[:50]
    X_test, y_test = X[50:], y[50:]

    rgr = MultiOutputRegressor(GradientBoostingRegressor(random_state=0))
    assert_raises(ValueError, rgr.fit, X_train, y_train)
    def test_lasso_cdregressor_pickle(self):
        X, y = make_regression()
        est = _CDRegressor(fit_intercept=True)
        est.fit(X, y)

        buf = pickle.dumps(est)
        est2 = pickle.loads(buf)
        np.testing.assert_array_equal(est.coef_, est2.coef_)
def test_same_predictions_regression(seed, min_samples_leaf, n_samples,
                                     max_leaf_nodes):
    # Make sure sklearn has the same predictions as lightgbm for easy targets.
    #
    # In particular when the size of the trees are bound and the number of
    # samples is large enough, the structure of the prediction trees found by
    # LightGBM and sklearn should be exactly identical.
    #
    # Notes:
    # - Several candidate splits may have equal gains when the number of
    #   samples in a node is low (and because of float errors). Therefore the
    #   predictions on the test set might differ if the structure of the tree
    #   is not exactly the same. To avoid this issue we only compare the
    #   predictions on the test set when the number of samples is large enough
    #   and max_leaf_nodes is low enough.
    # - To ignore  discrepancies caused by small differences the binning
    #   strategy, data is pre-binned if n_samples > 255.

    rng = np.random.RandomState(seed=seed)
    n_samples = n_samples
    max_iter = 1
    max_bins = 256

    X, y = make_regression(n_samples=n_samples, n_features=5,
                           n_informative=5, random_state=0)

    if n_samples > 255:
        # bin data and convert it to float32 so that the estimator doesn't
        # treat it as pre-binned
        X = _BinMapper(max_bins=max_bins).fit_transform(X).astype(np.float32)

    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=rng)

    est_sklearn = HistGradientBoostingRegressor(
        max_iter=max_iter,
        max_bins=max_bins,
        learning_rate=1,
        n_iter_no_change=None,
        min_samples_leaf=min_samples_leaf,
        max_leaf_nodes=max_leaf_nodes)
    est_lightgbm = get_equivalent_estimator(est_sklearn, lib='lightgbm')

    est_lightgbm.fit(X_train, y_train)
    est_sklearn.fit(X_train, y_train)

    # We need X to be treated an numerical data, not pre-binned data.
    X_train, X_test = X_train.astype(np.float32), X_test.astype(np.float32)

    pred_lightgbm = est_lightgbm.predict(X_train)
    pred_sklearn = est_sklearn.predict(X_train)
    # less than 1% of the predictions are different up to the 3rd decimal
    assert np.mean(abs(pred_lightgbm - pred_sklearn) > 1e-3) < .011

    if max_leaf_nodes < 10 and n_samples >= 1000:
        pred_lightgbm = est_lightgbm.predict(X_test)
        pred_sklearn = est_sklearn.predict(X_test)
        # less than 1% of the predictions are different up to the 4th decimal
        assert np.mean(abs(pred_lightgbm - pred_sklearn) > 1e-4) < .01
def test_multioutput_regression():
    """Test whether multi-output regression works as expected."""
    X, y = make_regression(n_samples=200, n_targets=5,
                           random_state=random_state)
    for activation in ACTIVATION_TYPES:
        elm = ELMRegressor(n_hidden=300, activation=activation,
                           random_state=random_state)
        elm.fit(X, y)
        assert_greater(elm.score(X, y), 0.95)
示例#25
0
    def test_score(self):
        """
        Assert the score method returns an R2 value
        """
        visualizer = AlphaSelection(RidgeCV())

        X, y = make_regression(random_state=352)
        visualizer.fit(X, y)
        assert visualizer.score(X, y) == pytest.approx(0.9999780266590336)
示例#26
0
    def test_dynamic_classes(self):
        test_classes = {
            algorithms.GradientDescent: {},
            algorithms.MinibatchGradientDescent: {'batch_size': 10},
            algorithms.Momentum: {'momentum': 0.5},
        }

        for algorithm_class, algorithm_params in test_classes.items():
            optimization_classes = [algorithms.WeightDecay,
                                    algorithms.SearchThenConverge]

            bpnet = algorithm_class(
                (3, 5, 1),
                addons=optimization_classes,
                verbose=False,
                **algorithm_params
            )
            data, target = datasets.make_regression(n_features=3, n_targets=1)

            data = preprocessing.MinMaxScaler().fit_transform(data)
            target_scaler = preprocessing.MinMaxScaler()
            target = target_scaler.fit_transform(target.reshape(-1, 1))

            with tempfile.NamedTemporaryFile() as temp:
                valid_class_name = bpnet.__class__.__name__
                dill.dump(bpnet, temp)
                temp.file.seek(0)

                restored_bpnet = dill.load(temp)
                restored_class_name = restored_bpnet.__class__.__name__
                temp.file.seek(0)

                self.assertEqual(valid_class_name, restored_class_name)
                self.assertEqual(optimization_classes,
                                 restored_bpnet.addons)

                bpnet.train(data, target, epochs=10)
                real_bpnet_error = bpnet.prediction_error(data, target)
                updated_input_weight = (
                    bpnet.input_layer.weight.get_value().copy()
                )

                dill.dump(bpnet, temp)
                temp.file.seek(0)

                restored_bpnet2 = dill.load(temp)
                temp.file.seek(0)
                restored_bpnet_error = restored_bpnet2.prediction_error(
                    data, target
                )

                np.testing.assert_array_equal(
                    updated_input_weight,
                    restored_bpnet2.input_layer.weight.get_value()
                )
                # Error must be big, because we didn't normalize data
                self.assertEqual(real_bpnet_error, restored_bpnet_error)
示例#27
0
 def setUp(self):
     super(QuickPropTestCase, self).setUp()
     data, target = datasets.make_regression(n_samples=1500, n_features=5,
                                             n_informative=5, n_targets=1,
                                             random_state=33)
     target = preprocessing.MinMaxScaler().fit_transform(target)
     self.data = cross_validation.train_test_split(data, target,
                                                   train_size=0.75)
     self.connection = (5, 10, 1)
def get_sparse():
    rng = np.random.RandomState(199)
    n = 5000
    sparsity = 0.75
    X, y = datasets.make_regression(n, random_state=rng)
    X = np.array([[0.0 if rng.uniform(0, 1) < sparsity else x for x in x_row] for x_row in X])
    from scipy import sparse
    X = sparse.csr_matrix(X)
    return X, y
示例#29
0
def test_multioutput_regression():
    """Test that multi-output regression works as expected"""
    X, y = make_regression(n_samples=200, n_targets=5)
    mlp = MultilayerPerceptronRegressor(algorithm='l-bfgs',
                                        hidden_layer_sizes=50,
                                        max_iter=200,
                                        random_state=1)
    mlp.fit(X, y)
    assert_greater(mlp.score(X, y), 0.9)
示例#30
0
def build_regression(with_preprocessor=False):
  """Basic array for testing when using a preprocessor"""
  X, y = shuffle(*make_regression(n_samples=100, n_features=5,
                                  random_state=SEED),
                 random_state=SEED)
  indices = shuffle(np.arange(X.shape[0]), random_state=SEED).astype(int)
  if with_preprocessor:
    return Dataset(indices, y[indices], X, indices)
  else:
    return Dataset(X[indices], y[indices], None, X[indices])
示例#31
0
def test_multi_target_regression_one_target():
    # Test multi target regression raises
    X, y = datasets.make_regression(n_targets=1)
    rgr = MultiOutputRegressor(GradientBoostingRegressor(random_state=0))
    assert_raises(ValueError, rgr.fit, X, y)
if __name__ == '__main__':
    from sklearn.datasets import make_classification, make_regression
    from sklearn.metrics import mean_squared_error, accuracy_score
    import time
    from sklearn.model_selection import train_test_split

    for i in range(50, 400, 10):
        T = 1000

        # X, y = make_classification(n_samples=T, n_classes=3, n_informative=4, n_features=18)
        #
        # X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3)
        #
        # t1 = time.time()
        #
        #
        # clf = XGBClassifier(n_estimators=55, max_depth=8,random_state=7).train(X_train,y_train,(X_test,y_test))

        # print('c', i, t2 - t1)
        # print(accuracy_score(y_test, clf.predict(X_test)))
        t2 = time.time()
        X, y = make_regression(n_samples=T)
        X_train, X_test, y_train, y_test = train_test_split(X, y)
        clf = XGBRegressor(n_estimators=2,
                           max_depth=30).train(X_train, y_train,
                                               (X_test, y_test))
        print(mean_squared_error(y, clf.predict(X)))
        t3 = time.time()
        print('R', i, t3 - t2)
from sklearn.datasets import make_regression
import matplotlib.pyplot as plt
import numpy as np

X, y= make_regression(n_samples=100, n_features=1, noise=0.4, bias=50)

change this code


def plotLine(theta0, theta1, X, y):
    max_x = np.max(X) + 100
    min_x = np.min(X) - 100


    xplot = np.linspace(min_x, max_x, 1000)
    yplot = theta0 + theta1 * xplot



    plt.plot(xplot, yplot, color='#58b970', label='Regression Line')

    plt.scatter(X,y)
    plt.axis([-10, 10, 0, 200])
    plt.show()



def hypothesis(theta0, theta1, x):
    return theta0 + (theta1*x) 

def cost(theta0, theta1, X, y):
from time import time
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import ShuffleSplit
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from my_library import print_gscv_score
from my_library import print_score
from my_library import yyplot

print(__doc__)

start = time()

X, y = make_regression(n_samples=100, n_features=2, n_informative=2)
scaler = MinMaxScaler()
scaler = StandardScaler()
scaler.fit(X)
X = scaler.transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

mod = SVR()

# search range
# range_c = 2**np.arange(  -5  11, dtype=float)
# range_e = 2**np.arange( -10,  1, dtype=float)
# range_g = 2**np.arange( -20, 11, dtype=float)
# 196.29 seconds
range_c = 2**np.arange(-5, 11, dtype=float)
range_e = 2**np.arange(-10, 1, dtype=float)
示例#35
0
def test_make_reg():
    X, y = make_regression(1000)
    assert apply_toy_on(X, y)
示例#36
0
"""
Test the data module.
"""

import pytest
import numpy as np
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression
from imblearn.pipeline import Pipeline
from ...preprocessing import FeatureSelector, RowSelector

X, y = make_regression(n_features=10)


@pytest.mark.parametrize('indices', [([0, 1, 2]), ([5]), (np.arange(0, 10))])
def test_feature_selector(indices):
    selector = FeatureSelector(indices=indices)
    X_t = selector.fit_transform(X, y)
    assert X_t.shape[0] == X.shape[0]
    assert X_t.shape[1] == len(indices)
    assert np.array_equal(X_t, X[:, indices])


def test_default_feature_selector():
    selector = FeatureSelector()
    X_t = selector.fit_transform(X, y)
    assert np.array_equal(X_t, X)


def test_feature_selector_pipeline_integration():
    pipeline = Pipeline([('selector', FeatureSelector(indices=[0, 2])),
示例#37
0
def compute_bench(samples_range, features_range):

    it = 0

    results = defaultdict(lambda: [])

    max_it = len(samples_range) * len(features_range)
    for n_samples in samples_range:
        for n_features in features_range:
            it += 1
            print('====================')
            print('Iteration %03d of %03d' % (it, max_it))
            print('====================')
            dataset_kwargs = {
                'n_samples': n_samples,
                'n_features': n_features,
                'n_informative': n_features // 10,
                'effective_rank': min(n_samples, n_features) / 10,
                #'effective_rank': None,
                'bias': 0.0,
            }
            print("n_samples: %d" % n_samples)
            print("n_features: %d" % n_features)
            X, y = make_regression(**dataset_kwargs)

            gc.collect()
            print("benchmarking lars_path (with Gram):", end='')
            sys.stdout.flush()
            tstart = time()
            G = np.dot(X.T, X)  # precomputed Gram matrix
            Xy = np.dot(X.T, y)
            lars_path_gram(Xy=Xy, Gram=G, n_samples=y.size, method='lasso')
            delta = time() - tstart
            print("%0.3fs" % delta)
            results['lars_path (with Gram)'].append(delta)

            gc.collect()
            print("benchmarking lars_path (without Gram):", end='')
            sys.stdout.flush()
            tstart = time()
            lars_path(X, y, method='lasso')
            delta = time() - tstart
            print("%0.3fs" % delta)
            results['lars_path (without Gram)'].append(delta)

            gc.collect()
            print("benchmarking lasso_path (with Gram):", end='')
            sys.stdout.flush()
            tstart = time()
            lasso_path(X, y, precompute=True)
            delta = time() - tstart
            print("%0.3fs" % delta)
            results['lasso_path (with Gram)'].append(delta)

            gc.collect()
            print("benchmarking lasso_path (without Gram):", end='')
            sys.stdout.flush()
            tstart = time()
            lasso_path(X, y, precompute=False)
            delta = time() - tstart
            print("%0.3fs" % delta)
            results['lasso_path (without Gram)'].append(delta)

    return results
示例#38
0
'''
    선형 회귀
    독립변수 - 연속형, 종속변수 - 연속형
    
    회귀분석은 각 데이터에 대한 잔차제곱합이 최소가 되는 선형회귀식을 도출하는 방법
'''
import statsmodels.api as sm
from sklearn.datasets import make_regression
import numpy as np

np.random.seed(8)

# 방법 1 : make_regression 이용, 모델이 만들어지지는 않음, 선형회귀 연습을 위한 데이터셋
x, y, coef = make_regression(n_samples=50, n_features=1, bias=100,
                             coef=True)  # 데이터수, 독립변수, 절편, 기울기
print(x[:5])  # x가 -0.67283393 일 때        샘플 독립변수 자료
print(y[:5])  # y가  99.40304252라는 것     샘플 종속변수 자료=모델에 의해 나온 예측값
print(coef)  # 0.8872285585150852       기울기
# y = wx + b 라고 할 때,  0.8872285585150852 * x + 100 = 99.40304252
yhat = 0.8872285585150852 * -0.67283393 + 100  #실제값
print('yhat :', yhat)

yhat = 0.8872285585150852 * 1.1395335 + 100
print('yhat :', yhat)

new_x = 0.5
pred_yhat = 0.8872285585150852 * new_x + 100
print('pred_yhat :', pred_yhat, '\n\n')  #새로운 값 new_x에 대한 선형회귀 예측값
# 기존 데이터에 의해 만들어진 예측값과 실제값의 차이가 크지 않기 때문에 새로운 x에 대한 예측값을 신뢰할 수 있다고 본다.

# 방법 2 : LinearRegression() 이용, 모델이 생성됨
示例#39
0
from sklearn.exceptions import NotFittedError
from sklearn.compose import make_column_transformer

# To use this experimental feature, we need to explicitly ask for it:
from sklearn.experimental import enable_hist_gradient_boosting  # noqa
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.ensemble._hist_gradient_boosting.loss import _LOSSES
from sklearn.ensemble._hist_gradient_boosting.loss import LeastSquares
from sklearn.ensemble._hist_gradient_boosting.loss import BinaryCrossEntropy
from sklearn.ensemble._hist_gradient_boosting.grower import TreeGrower
from sklearn.ensemble._hist_gradient_boosting.binning import _BinMapper
from sklearn.utils import shuffle

X_classification, y_classification = make_classification(random_state=0)
X_regression, y_regression = make_regression(random_state=0)
X_multi_classification, y_multi_classification = make_classification(
    n_classes=3, n_informative=3, random_state=0)


def _make_dumb_dataset(n_samples):
    """Make a dumb dataset to test early stopping."""
    rng = np.random.RandomState(42)
    X_dumb = rng.randn(n_samples, 1)
    y_dumb = (X_dumb[:, 0] > 0).astype('int64')
    return X_dumb, y_dumb


@pytest.mark.parametrize(
    'GradientBoosting, X, y',
    [(HistGradientBoostingClassifier, X_classification, y_classification),
示例#40
0
3) Criar a função de erro (loss) e o otimizador
4) Criar o loop de treinamento
   - forward pass: calcular a predição e o erro
   - backward pass: calcular os gradientes
   - update weights: ajuste dos pesos do modelo
'''
#preparar dados

import torch
import torch.nn as nn
import numpy as np
from sklearn import datasets
import matplotlib.pyplot as plt

X_np, y_np = datasets.make_regression(n_samples=100,
                                      n_features=1,
                                      noise=20,
                                      random_state=1)

X = torch.from_numpy(X_np.astype(np.float32))
y = torch.from_numpy(y_np.astype(np.float32))

y = y.view(y.shape[0], 1)

#Criar modelo
n_sample, n_features = X.shape
model = nn.Linear(n_features, n_features)

#Função de erro
criterion = nn.MSELoss()

#Otimizador
from sklearn import datasets
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

# make_regression data
X, y = datasets.make_regression(n_samples=100,
                                n_features=2,
                                n_informative=10,
                                n_targets=1,
                                bias=0.0,
                                effective_rank=None,
                                tail_strength=0.5,
                                noise=0.0,
                                shuffle=True,
                                coef=False,
                                random_state=None)

print('X = ')
print(X)
print('y = ')
print(y)

fig = plt.figure()
ax = Axes3D(fig)
ax.set_xlabel("X1")
ax.set_ylabel("X2")
ax.set_zlabel("X3")

ax.plot(X[:, 0], X[:, 1], y, marker="o", linestyle='None')
plt.show()
示例#42
0
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.datasets import make_regression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

X, y = make_regression(n_samples=100,
                       n_features=1,
                       n_informative=1,
                       effective_rank=1,
                       tail_strength=1,
                       noise=3,
                       random_state=1)

X_scal = StandardScaler().fit_transform(X)
y_scal = StandardScaler().fit_transform(y.reshape(-1, 1))

X_train, X_test, y_train, y_test = train_test_split(X_scal,
                                                    y_scal,
                                                    test_size=0.3,
                                                    random_state=1)

model = Sequential()
model.add(Dense(10, activation='linear', input_dim=X.shape[1]))
model.add(Dense(1, activation='linear'))
示例#43
0
# %% [markdown]
# ## LinReg with PyTorch and Gradent Descent
#
# Previously, we had to do some math to calculate the optimal $\hat\beta$.
# PyTorch calculates the gradients for us automatically (more on that later)
# and we can use some version of gradient desctent to find our $\hat\beta$.

# %%
from sklearn.datasets import make_regression

n_features = 1
n_samples = 100

X, y = make_regression(
    n_samples=n_samples,
    n_features=n_features,
    noise=10,
)

dom_np = np.linspace(X.min(), X.max(), 20)
dom = torch.from_numpy(dom_np).unsqueeze(-1).float()

fix, ax = plt.subplots()
ax.plot(X, y, ".")

# %%
X = torch.from_numpy(X).float()
y = torch.from_numpy(y).float().unsqueeze(-1)
X.shape, y.shape

# %%
from sklearn.tree.tests.test_tree import assert_is_subtree


# toy sample
X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]]
y = [-1, -1, -1, 1, 1, 1]


# (X, y), n_targets  <-- as expected in the output of partial_dep()
binary_classification_data = (make_classification(n_samples=50,
                                                  random_state=0), 1)
multiclass_classification_data = (make_classification(n_samples=50,
                                                      n_classes=3,
                                                      n_clusters_per_class=1,
                                                      random_state=0), 3)
regression_data = (make_regression(n_samples=50, random_state=0), 1)
multioutput_regression_data = (make_regression(n_samples=50, n_targets=2,
                                               random_state=0), 2)

# iris
iris = load_iris()


@pytest.mark.filterwarnings("ignore:A Bunch will be returned")
@pytest.mark.parametrize('Estimator, method, data', [
    (GradientBoostingClassifier, 'auto', binary_classification_data),
    (GradientBoostingClassifier, 'auto', multiclass_classification_data),
    (GradientBoostingClassifier, 'brute', binary_classification_data),
    (GradientBoostingClassifier, 'brute', multiclass_classification_data),
    (GradientBoostingRegressor, 'auto', regression_data),
    (GradientBoostingRegressor, 'brute', regression_data),
示例#45
0
def test_group_lasso_paspal():
    """Test function for the module."""
    from sklearn.datasets import make_regression
    X, y, coef = make_regression(n_features=10, coef=True, n_informative=5)

    group_lasso_overlap_paspal(X, y, np.ones(10), 0.1)
示例#46
0
-------------------------------------------------
   File Name:class
   Description :  lightGBM 回归实例 with early-stopping sklearn API
   Email : [email protected]
   Date:2018/3/20
"""
from lightgbm import LGBMRegressor
from sklearn.datasets import make_regression
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV, train_test_split

# 获取数据
data, target = make_regression(n_samples=1000,
                               n_features=10,
                               n_targets=1,
                               n_informative=8,
                               noise=0.1,
                               random_state=12,
                               bias=1.2)

# 分割数据集
X_train, X_test, y_train, y_test = train_test_split(data,
                                                    target,
                                                    test_size=0.2)

# 模型构建
gbm = LGBMRegressor(objective='regression',
                    num_leaves=31,
                    learning_rate=0.05,
                    n_estimators=500)
示例#47
0
# -*- coding: utf-8 -*-
"""
Created on Thu Aug 17 11:24:21 2017

@author: arellave
"""

from sklearn.datasets import make_regression
import numpy as np

reg_data, reg_target = make_regression(n_samples=200,
                                       n_features=500,
                                       n_informative=10,
                                       noise=2)

from sklearn.linear_model import Lars
lars = Lars(n_nonzero_coefs=10)
lars.fit(reg_data, reg_target)

print(np.sum(lars.coef_ != 0))

train_n = 100
lars_12 = Lars(n_nonzero_coefs=12)
lars_12.fit(reg_data[:train_n], reg_target[:train_n])

lars_500 = Lars()
lars_500.fit(reg_data[:train_n], reg_target[:train_n])

#Printing squared error
print(
    np.mean(
# calculate akaike information criterion for a linear regression model
from math import log
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error


# calculate aic for regression
def calculate_aic(n, mse, num_params):
    aic = n * log(mse) + 2 * num_params
    return aic


# generate dataset
X, y = make_regression(n_samples=100, n_features=2, noise=0.1)
# define and fit the model on all data
model = LinearRegression()
model.fit(X, y)
# number of parameters
num_params = len(model.coef_) + 1
print('Number of parameters: %d' % (num_params))
# predict the training set
yhat = model.predict(X)
# calculate the error
mse = mean_squared_error(y, yhat)
print('MSE: %.3f' % mse)
# calculate the aic
aic = calculate_aic(len(y), mse, num_params)
print('AIC: %.3f' % aic)
示例#49
0
import numpy as np
from sklearn import datasets
import matplotlib.pyplot as plt

X, y = datasets.make_regression(
        n_samples=60,
        n_features=1,
        noise=10,
        random_state=42)

# put 1 for bias
X = np.concatenate([np.ones((60, 1)), X], -1)

X_train, y_train = X[:50], y[:50]
X_test, y_test = X[50:], y[50:]

# linear regression
### fill in this part ###
w =

# compute test error
pred_test = np.dot(X_test, w)
test_err = ((y_test - pred_test)**2).mean()
print('MSE: %.4f' % (test_err))

# plot
X_ = np.linspace(X.min(), X.max(), 100)
X_ = np.concatenate([np.ones((100, 1)), X_[:,None]], -1)
plt.plot(X_[:,1], np.dot(X_, w), 'k')
plt.scatter(X_train[:,1], y_train, edgecolor='b', facecolor='white', label='train')
plt.scatter(X_test[:,1], y_test, edgecolor='r', facecolor='white', label='test')
示例#50
0
import numpy as np
from matplotlib import pyplot as plt

from sklearn import linear_model, datasets

n_samles = 1000
n_outliers = 50

X, y, coef = datasets.make_regression(n_samples=n_samles,
                                      n_features=1,
                                      n_informative=1,
                                      noise=10,
                                      coef=True,
                                      random_state=0)

# Add outlier data
np.random.seed(0)
X[:n_outliers] = 3 + 0.5 * np.random.normal(size=(n_outliers, 1))
y[:n_outliers] = -3 + 10 * np.random.normal(size=n_outliers)

# Fit line using all data
lr = linear_model.LinearRegression()
lr.fit(X, y)

# Robustly fit linear model with RANSAC algorithm
ransac = linear_model.RANSACRegressor()
ransac.fit(X, y)
inlier_mask = ransac.inlier_mask_
outlier_mask = np.logical_not(inlier_mask)

# Predict data of estimated models
feature_names_fruits = ['height', 'width', 'mass', 'color_score']
X_fruits = fruits[feature_names_fruits]
y_fruits = fruits['fruit_label']
target_names_fruits = ['apple', 'mandarin', 'orange', 'lemon']

X_fruits_2d = fruits[['height', 'width']]
y_fruits_2d = fruits['fruit_label']

# synthetic dataset for simple regression
from sklearn.datasets import make_regression

plt.figure()
plt.title('Sample regression problem with one input variable')
X_R1, y_R1 = make_regression(n_samples=100,
                             n_features=1,
                             n_informative=1,
                             bias=150.0,
                             noise=30,
                             random_state=0)
plt.scatter(X_R1, y_R1, marker='o', s=50)
plt.show()

# synthetic dataset for more complex regression
from sklearn.datasets import make_friedman1

plt.figure()
plt.title('Complex regression problem with one input variable')
X_F1, y_F1 = make_friedman1(n_samples=100, n_features=7, random_state=0)

plt.scatter(X_F1[:, 2], y_F1, marker='o', s=50)
plt.show()
            "font.serif": ["Times", "Palatino", "serif"]
        })


plt.rcParams["text.usetex"] = True
set_style()
random_state = 414
saving_fig = False  # set to True to save images

# dataset = "synthetic_unco"  # Fig a
dataset = "synthetic"  # Fig b

if dataset is "synthetic":
    n_samples, n_features = (500, 5000)
    X, y = make_regression(n_samples=n_samples,
                           n_features=n_features,
                           random_state=random_state)

if dataset is "synthetic_unco":
    n_samples, n_features = (30, 50)
    X, y = make_sparse_uncorrelated(n_samples=n_samples,
                                    n_features=n_features,
                                    random_state=random_state)

X = X.astype(float)
y = y.astype(float)
X = np.asfortranarray(X)
y = np.asfortranarray(y)

n_samples, n_features = X.shape
X = np.asfortranarray(X)
示例#53
0
from sklearn.datasets import make_regression
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras import utils

# Creating Dataset
X, y = make_classification(n_samples=1000,
                           n_features=4,
                           n_informative=2,
                           n_redundant=0,
                           random_state=0,
                           shuffle=False)

X2, y2 = make_regression(n_samples=1000,
                         n_features=4,
                         n_informative=2,
                         random_state=0,
                         shuffle=False)
"""
Notes:
Neural Networks perform best when the data is normlized
"""

#-----------------------------------------------------------------------------#
#                          Neural Network: Binary Class                       #
#-----------------------------------------------------------------------------#

# Initializing Model
model = Sequential()

# Input Layer & Hidden Layer 1
示例#54
0
## Terminology
# Univerariate means there's only one variable (aka feature)
# X is usually the independent variable and is often capitalized to denote such

## Typical dependencies
from sklearn.datasets import make_regression # imports the library needed to make a data sample


## How to create a random dataset
from sklearn.datasets import make_regression # imports the library needed to make a data sample
X, y = make_regression(n_samples = 20, n_features=1, random_state=0, noise=4, bias=100.0)
    # X and y are the two variables.  Because X is capitalized, it is probably the Independent variable
    #n_sample is how many samples will be created in this dataset
    #n_features is the number of dependent variables.  (X is an independent variable and is not counted)
    # random_state is like random, but it seeds a number so that you can create the same dataset in the future.  This is useful in academia where you need the exact same dataset to ensure reproducibility
    #noise determines how far from the points will be from the line.  The higher the noise, the further from the line the dots will be (aka variance)
    #bias is where the Y-intercept will be (at x= 0)
plt.scatter(X,y) # is a good way to visualize youre dataset before usage.


## how to make an S-Curve dataset
from sklearn.datasets import make_s_curve
data, color = make_s_curve(100, random_state=0)
plt.scatter(data[:,0], color) # To see the data


## How to make a LinearRegression model against data
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X, y)  ## once we assign the Linear Regression function to the variable, we use it to fit the data
    # You can view variable associated with the fit.  Note the _ in the suffix.  That means it's only available after the model has been fit to the data
示例#55
0
Created on Sun Jul 14 10:12:41 2019

@author: Snake
"""

from __future__ import print_function
from sklearn import datasets
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt

loaded_data = datasets.load_boston()
data_X = loaded_data.data
data_y = loaded_data.target

model = LinearRegression()
model.fit(data_X, data_y)

print(model.predict(data_X[:4, :]))
print(data_y[:4])

X, y = datasets.make_regression(n_samples=100,
                                n_features=1,
                                n_targets=1,
                                noise=10)
model2 = LinearRegression()
model2.fit(X, y)
y_predict = model2.predict(X)

plt.scatter(X, y)
plt.plot(X, y_predict, color='r')
plt.show()
## 5. Generating Regression Data ##

from sklearn.datasets import make_regression
import pandas as pd
data = make_regression(n_samples=100, n_features=3, random_state=1)
features = pd.DataFrame(data[0])
labels = pd.Series(data[1])

## 6. Fitting A Linear Regression Neural Network ##

from sklearn.datasets import make_regression
import numpy as np
data = make_regression(n_samples=100, n_features=3, random_state=1)
features = pd.DataFrame(data[0])
labels = pd.Series(data[1])
features['bias'] = 1
from sklearn.linear_model import SGDRegressor


def train(features, labels):
    lr = SGDRegressor()
    lr.fit(features, labels)
    weights = lr.coef_
    return weights


def feedforward(features, weights):
    predictions = np.dot(features, weights.T)
    return predictions

示例#57
0
import numpy as np
import torch
import torch.nn as nn
from sklearn import datasets
import matplotlib.pyplot as plt
'''
1. Prepare Data
2. Create model
3. Loss and optimizer 
4. Traning loop
'''

# Let's prepare regression dataset

X_numpy, Y_numpy = datasets.make_regression(n_samples=100,
                                            n_features=1,
                                            random_state=1,
                                            noise=20)
X = torch.from_numpy(X_numpy.astype(np.float32))
Y = torch.from_numpy(Y_numpy.astype(np.float32))
Y = Y.view(Y.shape[0], 1)

n_samples, n_features = X.shape

# Let's define the model
input_size = n_features
output_size = 1


class LinearRegression(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LinearRegression, self).__init__()
示例#58
0
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split

# Create a data set for analysis
x, y = make_regression(n_samples=500, n_features = 1, noise=25, random_state=0)

# Split the data set into testing and training data
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=0)

# Plot the data
sns.set_style("darkgrid")
sns.regplot(x_test, y_test, fit_reg=False)

# Remove ticks from the plot
plt.xticks([])
plt.yticks([])

plt.tight_layout()
plt.show()
示例#59
0
from __future__ import print_function
from threading import Thread, current_thread
from functools import wraps
import sys
import warnings
from sklearn.datasets import make_classification, make_regression
from .export_utils import expr_to_tree, generate_pipeline_code
from deap import creator
# generate a small data set for a new pipeline, in order to check if the pipeline
# has unsuppported combinations in params
pretest_X, pretest_y = make_classification(n_samples=50,
                                           n_features=10,
                                           random_state=42)
pretest_X_reg, pretest_y_reg = make_regression(n_samples=50,
                                               n_features=10,
                                               random_state=42)


def convert_mins_to_secs(time_minute):
    """Convert time from minutes to seconds"""
    # time limit should be at least 1 second
    return max(int(time_minute * 60), 1)


class TimedOutExc(RuntimeError):
    """
    Raised when a timeout happens
    """

示例#60
0
def test_regressor_chains():
    X_reg, y_reg = make_regression(random_state=112, n_targets=3, n_samples=5150)
    stream = DataStream(X_reg, y_reg)
    stream.prepare_for_use()
    estimator = SGDRegressor(random_state=112, max_iter=10)
    learner = RegressorChain(base_estimator=estimator, random_state=112)

    X, y = stream.next_sample(150)
    learner.partial_fit(X, y)

    cnt = 0
    max_samples = 5000
    predictions = []
    true_labels = []
    wait_samples = 100

    while cnt < max_samples:
        X, y = stream.next_sample()
        # Test every n samples
        if (cnt % wait_samples == 0) and (cnt != 0):
            predictions.append(list(learner.predict(X)[0]))
            true_labels.append(y[0])

        learner.partial_fit(X, y)
        cnt += 1

    expected_predictions = [[-21.932581119953333, 1265662295936.5574, 7.5406725414072326e+22],
                            [-97.17297744582125, 5438576501559.791, -1.1370581201037737e+24],
                            [-60.06308622605051, 26421144038311.047, 1.3207650552720094e+25],
                            [-285.32687352244847, 8881551118262.033, -1.1322856827798374e+24],
                            [-115.80322693771457, -24997431307818.508, 2.85747306174037e+24],
                            [-12.184193815918672, 3510562166726.0283, -4.8590562435597834e+23],
                            [-94.99008392491476, 4794062761133.606, -1.8849188211946465e+24],
                            [66.35576182871232, -8147485653396.883, -7.492944375995595e+23],
                            [-52.145505628056995, -1013810481101.9043, -4.5310283013446384e+23],
                            [16.715060622072958, 562391244392.6193, 3.3789644409962397e+22],
                            [96.32219400190282, -20397346086007.85, 1.558245298240083e+24],
                            [-281.8168065846582, 118681520215938.52, 4.815807486956294e+25],
                            [-135.62679760307105, 20260866750185.832, 1.605753540523006e+24],
                            [0.07932047636460954, -708539394047.3298, -3.61482684929158e+22],
                            [-292.1646176261883, -11162615183157.55, -8.674643964570704e+23],
                            [-176.92746747754094, -29231218161585.13, 1.411600743825668e+24],
                            [-348.0498644784687, -100615393132365.25, 9.759683002046948e+23],
                            [30.948974669258675, -1199287119275.6328, 2.0866927007519847e+23],
                            [214.0020659569134, -24437173206276.543, 9.450880718880671e+23],
                            [153.98931593720746, 32675842205528.723, -1.7246747286222668e+24],
                            [99.39074016354951, -11385065116243.611, 1.0770253102805811e+24],
                            [127.81660709796127, 16929726964275.697, 7.14820947257164e+24],
                            [40.45505653639006, -14311951591200.725, -9.33193290094133e+23],
                            [117.52219878440611, 17952367624051.36, 4.5651719663788677e+23],
                            [75.53942801239991, -9231543699137.594, 3.2317133158453914e+24],
                            [31.795193207760704, -4084783706153.4004, -4.188095047309216e+23],
                            [68.5318978502461, 5735810247065.921, 1.7284713503779943e+24],
                            [65.18438567482129, -13298743450357.943, -1.4367047198923567e+24],
                            [-116.63952028337805, -344127767223.9295, 2.3925104169428623e+22],
                            [-76.81599010889556, 8711205431447.733, -1.1575305916673031e+24],
                            [263.1077717649874, 32146618104196.434, -7.240279466740839e+24],
                            [-94.07597099457413, -8216681977657.527, 2.3785728690780553e+24],
                            [-175.78429788635424, -368856885004.46, -5.7200993095587195e+22],
                            [59.648477499483285, -1752783828320.242, 2.1429953624557326e+23],
                            [71.68447202426032, -27151271800666.492, 9.367463190825582e+24],
                            [-189.96629636835922, -27090727476080.18, -3.8659883994544866e+24],
                            [-240.7920206809074, 15406047062899.537, 2.0609123388035027e+24],
                            [-105.80996634043589, -1518636404558.1646, -1.4166487855869706e+23],
                            [-164.02527753963858, -61386039046571.125, -2.179071650432624e+25],
                            [52.451759456657975, -988509747123.6125, -7.334899319683594e+22],
                            [68.37044139814127, -7434200892467.581, -7.535677215142279e+23],
                            [164.9457843624521, -9474550940989.51, -1.3512944635293625e+24],
                            [189.34401690407307, -14349556896444.508, 1.0732760415617274e+24],
                            [0.8944005517286119, 463945767759.78735, -1.9938544157612443e+22],
                            [71.7856433565235, -9804063257174.584, 4.7874862540754335e+23],
                            [-5.450502769025279, 281585481223.33276, 2.1974700575843552e+22],
                            [248.00190755589915, -81874135462745.58, -2.6532557110860303e+25],
                            [-113.86249490223707, 2634310697909.643, 1.580428629322546e+23],
                            [-35.92856878407447, -5410985463428.589, 2.522168862637753e+23]]

    print(predictions)
    assert np.allclose(np.array(predictions).all(), np.array(expected_predictions).all())
    assert type(learner.predict(X)) == np.ndarray