def UnequalMLPsEnsembleTest():
    #dataset = DatasetFactory.friedman1(n_samples=200200)
    #dataset = DatasetFactory.friedman2(n_samples=200200)
    dataset = DatasetFactory.friedman3(n_samples=200200)
    Xtrain, X, ytrain, y = model_selection.train_test_split(dataset.data,
                                                            dataset.target,
                                                            random_state=0,
                                                            train_size=200)
    ensemble = EnsembleRegressor(type='auto_large')
    ensemble.fit(Xtrain,
                 ytrain,
                 samples_per_regressor=200,
                 regressor_overlap=200)
    Ztrain = ensemble.predict(Xtrain)
    Z = ensemble.predict(X)

    sio.savemat(
        'ManualEnsembleDatasets\DifferentRegressors_Friedman3.mat', {
            'names': ensemble.regressor_labels,
            'Z': Z,
            'y': y,
            'Ztrain': Ztrain,
            'ytrain': ytrain,
            'samples_per_regressor': 200,
            'regressor_samples_overlap': 200,
            'Ey': np.mean(y),
            'Ey2': np.mean(y**2),
            'Description': 'Different Regressors (Friedman #3)'
        })
def RidgeRegressionEnsembleTest():
    #dataset = DatasetFactory.friedman1(n_samples=200200)
    #dataset = DatasetFactory.friedman2(n_samples=200200)
    dataset = DatasetFactory.friedman3(n_samples=200200)
    Xtrain, X, ytrain, y = model_selection.train_test_split(dataset.data,
                                                            dataset.target,
                                                            random_state=0,
                                                            train_size=200)
    ensemble = EnsembleRegressor(type='ridge')
    ensemble.fit(Xtrain,
                 ytrain,
                 samples_per_regressor=200,
                 regressor_overlap=200)
    ridgecv = linear_model.RidgeCV(alphas=np.arange(.1, 1, .2),
                                   fit_intercept=True,
                                   normalize=True)
    ridgecv.fit(Xtrain, ytrain)
    y_ridgecv = ridgecv.predict(X)
    Z = ensemble.predict(X)

    sio.savemat(
        'RidgeRegression_Friedman3_200k.mat',
        {
            'names': ensemble.regressor_labels,
            'Z': Z,
            'y': y,
            # 'Ztrain': Z_train, 'ytrain': ytrain,
            'y_RidgeCV': y_ridgecv,
            'samples_per_regressor': 200,
            'regressor_samples_overlap': 200,
            'Ey': np.mean(y),
            'Ey2': np.mean(y**2),
            'Description': 'Ridge Regression (Friedman #3)'
        })
def UnequalMLPsEnsembleTest():
    #dataset = DatasetFactory.friedman1(n_samples=200200)
    #dataset = DatasetFactory.friedman2(n_samples=200200)
    dataset = DatasetFactory.friedman3(n_samples=200200)
    Xtrain, X, ytrain, y = cross_validation.train_test_split(
        dataset.data,  dataset.target, random_state=0, train_size=200)
    ensemble = EnsembleRegressor(type='auto_large')
    ensemble.fit(Xtrain,ytrain,samples_per_regressor=200,regressor_overlap=200)
    Ztrain = ensemble.predict(Xtrain)
    Z = ensemble.predict(X)

    sio.savemat('ManualEnsembleDatasets\DifferentRegressors_Friedman3.mat', {
        'names': ensemble.regressor_labels,
        'Z': Z, 'y': y,
        'Ztrain': Ztrain, 'ytrain': ytrain,
        'samples_per_regressor': 200,
        'regressor_samples_overlap': 200,
        'Ey': np.mean(y),
        'Ey2': np.mean(y ** 2),
        'Description': 'Different Regressors (Friedman #3)'
    })
示例#4
0
def main():
    # Create the RandomForest regressor, and replace its predict function with
    # a predict that saves the individual regressor outputs to a member variable all_y_hat
    regr = RandomForestRegressor(n_estimators=50)
    regr.predict = types.MethodType(forest_regressor_predict, regr)

    # Load Dataset
    #ds = DatasetFactory.friedman1(n_samples=20200)
    #ds = DatasetFactory.friedman2(n_samples=20200)
    ds = DatasetFactory.friedman3(n_samples=20200)
    Xtrain, X, ytrain, y = cross_validation.train_test_split(ds.data,
                                                             ds.target,
                                                             train_size=200,
                                                             random_state=0)

    # Fit the model
    regr.fit(Xtrain, ytrain)
    yhat = regr.predict(X)
    Z = np.array(regr.all_y_hat)

    # Save results
    sio.savemat(
        'RandomForestTest_Friedman3.mat',
        {
            'names': str(regr.estimators_),
            'Z': Z,
            'y': y,
            'y_RandomForest': yhat,
            # 'Ztrain': Z_train, # NOTE: Combing DecisionTrees is unsupervised by nature
            #'ytrain': ytrain,
            'samples_per_regressor': 200,
            'regressor_samples_overlap': 200,
            'Ey': np.mean(y),
            'Ey2': np.mean(y**2),
            'Description': 'Random Forest Ensemble Test (Friedman #3)'
        })
def RidgeRegressionEnsembleTest():
    #dataset = DatasetFactory.friedman1(n_samples=200200)
    #dataset = DatasetFactory.friedman2(n_samples=200200)
    dataset = DatasetFactory.friedman3(n_samples=200200)
    Xtrain, X, ytrain, y = cross_validation.train_test_split(
        dataset.data,  dataset.target, random_state=0, train_size=200)
    ensemble = EnsembleRegressor(type='ridge')
    ensemble.fit(Xtrain,ytrain,samples_per_regressor=200,regressor_overlap=200)
    ridgecv = linear_model.RidgeCV(alphas=np.arange(.1,1,.2), fit_intercept=True, normalize=True)
    ridgecv.fit(Xtrain,ytrain)
    y_ridgecv = ridgecv.predict(X)
    Z = ensemble.predict(X)

    sio.savemat('RidgeRegression_Friedman3_200k.mat', {
        'names': ensemble.regressor_labels,
        'Z': Z, 'y': y,
        # 'Ztrain': Z_train, 'ytrain': ytrain,
        'y_RidgeCV': y_ridgecv,
        'samples_per_regressor': 200,
        'regressor_samples_overlap': 200,
        'Ey': np.mean(y),
        'Ey2': np.mean(y ** 2),
        'Description': 'Ridge Regression (Friedman #3)'
    })
def submit_one(data, target, filename):
    return make_large_ensemble(DatasetFactory.Dataset(data, target), filename)