Python GradientBoostingRegressor.predict示例，sklearn.ensemble.gradient_boosting.GradientBoostingRegressor.predict Python示例

示例#1

0

显示文件

文件： gbdt-regression.py 项目： wzxJayce/FATE

def main(param=""):
    # obtain config
    if isinstance(param, str):
        param = JobConfig.load_from_file(param)

    data_guest = param["data_guest"]
    data_host = param["data_host"]
    data_test = param["data_test"]
    idx = param["idx"]
    label_name = param["label_name"]

    # prepare data
    df_guest = pd.read_csv(data_guest, index_col=idx)
    df_host = pd.read_csv(data_host, index_col=idx)
    df_test = pd.read_csv(data_test, index_col=idx)

    df = pd.concat([df_guest, df_host], axis=0)
    y = df[label_name]
    X = df.drop(label_name, axis=1)
    X = df.drop(label_name, axis=1)
    X_guest = df_guest.drop(label_name, axis=1)
    y_guest = df_guest[label_name]
    clf = GradientBoostingRegressor(n_estimators=50)
    clf.fit(X, y)
    y_predict = clf.predict(X_guest)

    result = {
        "mean_squared_error": mean_squared_error(y_guest, y_predict),
        "mean_absolute_error": mean_absolute_error(y_guest, y_predict)
    }
    print(result)
    return {}, result

示例#2

0

显示文件

文件： test_booster.py 项目： modusdatascience/booster

def test_gradient_boosting_estimator_with_smooth_quantile_loss():
    np.random.seed(0)
    m = 15000
    n = 10
    p = .8
    X = np.random.normal(size=(m,n))
    beta = np.random.normal(size=n)
    mu = np.dot(X, beta)
    y = np.random.lognormal(mu)
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.33333333333333)
    loss_function = SmoothQuantileLossFunction(1, p, .0001)
    q_loss = QuantileLossFunction(1, p)
    model = Booster(BaggingRegressor(Earth(max_degree=2, verbose=False, use_fast=True, max_terms=10)), 
                                      loss_function, n_estimators=150, 
                                      stopper=stop_after_n_iterations_without_percent_improvement_over_threshold(3, .01), verbose=True)
    assert_raises(NotFittedError, lambda : model.predict(X_train))
    
    model.fit(X_train, y_train)
    
    prediction = model.predict(X_test)
    model2 = GradientBoostingRegressor(loss='quantile', alpha=p)
    model2.fit(X_train, y_train)
    prediction2 = model2.predict(X_test)
    assert_less(q_loss(y_test, prediction), q_loss(y_test, prediction2))
    assert_greater(r2_score(y_test,prediction), r2_score(y_test,prediction2))
    q = np.mean(y_test <= prediction)
    assert_less(np.abs(q-p), .05)
    assert_greater(model.score_, 0.)
    assert_approx_equal(model.score(X_train, y_train), model.score_)

示例#3

0

显示文件

def main(param=""):
    # obtain config
    if isinstance(param, str):
        param = JobConfig.load_from_file(param)
    data_guest = param["data_guest"]
    data_host = param["data_host"]

    idx = param["idx"]
    label_name = param["label_name"]

    # prepare data
    df_guest = pd.read_csv(data_guest, index_col=idx)
    df_host = pd.read_csv(data_host, index_col=idx)
    df = df_guest.join(df_host, rsuffix='host')
    y = df[label_name]
    X = df.drop(label_name, axis=1)

    clf = GradientBoostingRegressor(random_state=0,
                                    n_estimators=50,
                                    learning_rate=0.1)
    clf.fit(X, y)

    y_predict = clf.predict(X)

    result = {
        "mean_absolute_error": mean_absolute_error(y, y_predict),
    }
    print(result)
    return {}, result

示例#4

0

显示文件

文件： gbdt-regression.py 项目： zpskt/FATE

def main(config="../../config.yaml", param="./gbdt_config_reg.yaml"):

    # obtain config
    if isinstance(param, str):
        param = JobConfig.load_from_file(param)

    data_guest = param["data_guest"]
    data_host = param["data_host"]
    idx = param["idx"]
    label_name = param["label_name"]

    print('config is {}'.format(config))
    if isinstance(config, str):
        config = JobConfig.load_from_file(config)
        data_base_dir = config["data_base_dir"]
        print('data base dir is', data_base_dir)
    else:
        data_base_dir = config.data_base_dir

    # prepare data
    df_guest = pd.read_csv(os.path.join(data_base_dir, data_guest),
                           index_col=idx)
    df_host = pd.read_csv(os.path.join(data_base_dir, data_host),
                          index_col=idx)
    df = df_guest.join(df_host, rsuffix='host')
    y = df[label_name]
    X = df.drop(label_name, axis=1)
    clf = GradientBoostingRegressor(random_state=0, n_estimators=50)
    clf.fit(X, y)

    y_predict = clf.predict(X)

    result = {"mean_absolute_error": mean_absolute_error(y, y_predict)}
    print(result)
    return {}, result

示例#5

0

显示文件

文件： GDBT_ST.py 项目： emigmo/TC_CAINIAO

def GDBT_ST(trainFileName,testFilename):
    trainData = ld.LoadData_DATA_ST(trainFileName)
    testData = ld.LoadData_DATA_ST(testFilename)
    
    store = ['1','2','3','4','5']
    res = []
    
    for i in store:
        train_X = [];train_y = []
        context = trainData[i]
        for array in context:
            array = [float(x) for x in array[2:]]
            train_X.append((array[2:-1]))
            train_y.append(array[-1])
            
        test_X = [];items = []
        context = testData[i]
        for array in context:
            items.append((array[0],array[1]))
            array = [float(x) for x in array[2:] ]
            test_X.append((array[2:]))
            
        clf = GradientBoostingRegressor(loss='lad', n_estimators=50, learning_rate=0.1, max_depth=3).\
                    fit(train_X,train_y)
        pred_y = clf.predict(test_X)
         
        for i in range(len(pred_y)):
            res.append([items[i][0],items[i][1],'%.4f'%max(pred_y[i],0)])
    return res

示例#6

0

显示文件

文件： gradient_boosting_regressor.py 项目： fagan2888/lale

class GradientBoostingRegressorImpl():
    def __init__(self,
                 loss='ls',
                 learning_rate=0.1,
                 n_estimators=100,
                 subsample=1.0,
                 criterion='friedman_mse',
                 min_samples_split=2,
                 min_samples_leaf=1,
                 min_weight_fraction_leaf=0.0,
                 max_depth=3,
                 min_impurity_decrease=0.0,
                 min_impurity_split=None,
                 init=None,
                 random_state=None,
                 max_features=None,
                 alpha=0.9,
                 verbose=0,
                 max_leaf_nodes=None,
                 warm_start=False,
                 presort='auto',
                 validation_fraction=0.1,
                 n_iter_no_change=None,
                 tol=0.0001):
        self._hyperparams = {
            'loss': loss,
            'learning_rate': learning_rate,
            'n_estimators': n_estimators,
            'subsample': subsample,
            'criterion': criterion,
            'min_samples_split': min_samples_split,
            'min_samples_leaf': min_samples_leaf,
            'min_weight_fraction_leaf': min_weight_fraction_leaf,
            'max_depth': max_depth,
            'min_impurity_decrease': min_impurity_decrease,
            'min_impurity_split': min_impurity_split,
            'init': init,
            'random_state': random_state,
            'max_features': max_features,
            'alpha': alpha,
            'verbose': verbose,
            'max_leaf_nodes': max_leaf_nodes,
            'warm_start': warm_start,
            'presort': presort,
            'validation_fraction': validation_fraction,
            'n_iter_no_change': n_iter_no_change,
            'tol': tol
        }

    def fit(self, X, y=None):
        self._sklearn_model = SKLModel(**self._hyperparams)
        if (y is not None):
            self._sklearn_model.fit(X, y)
        else:
            self._sklearn_model.fit(X)
        return self

    def predict(self, X):
        return self._sklearn_model.predict(X)

示例#7

0

显示文件

文件： linclassifer.py 项目： maniarathi/takethislifedata

def GradientBoosted(X_train, X_test, y_train, y_test):
    mod = GradientBoostingRegressor()
    mod.fit(X_train, y_train)
    print "Done training"
    gb_labels = mod.predict(X_test)
    print "Done testing"
    gb_score = mod.score(X_test, y_test)
    return gb_score, gb_labels

示例#8

0

显示文件

文件： GDBT_all.py 项目： emigmo/TC_CAINIAO

def GDBT_ALL_train(trainFileName,testFileName):
    train_X, train_y, _ = ld.loadData_all(trainFileName)
    test_X, test_y,items = ld.loadData_all(testFileName)
    clf = GradientBoostingRegressor(loss='lad', n_estimators=40, learning_rate=0.1, max_depth=3).\
            fit(train_X, train_y)
    pred_y = clf.predict(test_X)
    res = []
    for i in range(len(test_X)):
        res.append([items[i],'all','%.2f'%max(pred_y[i],0),'%.2f'%max(test_y[i],0)])
    return res

示例#9

0

显示文件

def GDBT_ALL(trainFileName, testFileName):
    train_X, train_y, _ = ld.LoadData_DATA_LABEL_ITEM(trainFileName)
    Eval_X, items = ld.LoadData_DATA_ITEM(testFileName)
    clf = GradientBoostingRegressor(loss='lad', n_estimators=40, learning_rate=0.1, max_depth=3).\
            fit(train_X, train_y)
    pred_y = clf.predict(Eval_X)
    res = []
    for i in range(len(Eval_X)):
        res.append([items[i], 'all', '%.4f' % max(pred_y[i], 0)])
    return res

示例#10

0

显示文件

def gradient_boosting(train, test, label):
    gb = GradientBoostingRegressor(n_estimators=300,
                                   learning_rate=0.05,
                                   max_depth=3,
                                   max_features='sqrt',
                                   min_samples_leaf=15,
                                   min_samples_split=10,
                                   loss='huber')
    gb.fit(train, label.as_matrix().ravel())

    # prediction on training data
    y_predicton = gb.predict(train)
    y_test = label
    print("Gradient Boosting score on training set: ",
          rmse(y_test, y_predicton))

    y_prediction = gb.predict(test)
    y_prediction = np.exp(y_prediction)
    return y_prediction

示例#11

0

显示文件

文件： GDBT_all.py 项目： emigmo/TC_CAINIAO

def GDBT_ALL(trainFileName,testFileName):
    train_X, train_y, _ = ld.LoadData_DATA_LABEL_ITEM(trainFileName)
    Eval_X, items = ld.LoadData_DATA_ITEM(testFileName)
    clf = GradientBoostingRegressor(loss='lad', n_estimators=40, learning_rate=0.1, max_depth=3).\
            fit(train_X, train_y)
    pred_y = clf.predict(Eval_X)
    res = []
    for i in range(len(Eval_X)):
        res.append([items[i],'all','%.4f'%max(pred_y[i],0)])
    return res

示例#12

0

显示文件

文件： test_argument_names.py 项目： fagan2888/sklearn2code

def test_argument_names():
    boston = load_boston()
    X = DataFrame(boston['data'], columns=boston['feature_names'])
    y = boston['target']
    model = GradientBoostingRegressor(verbose=True).fit(X, y)
    code = sklearn2code(model, ['predict'],
                        numpy_flat,
                        argument_names=X.columns)
    boston_housing_module = exec_module('boston_housing_module', code)
    assert_array_almost_equal(model.predict(X),
                              boston_housing_module.predict(**X))

示例#13

0

显示文件

文件： picassoai.py 项目： dejimarquis/PicassoAI

 def predict_using_local_model(self):
     gbr = GradientBoostingRegressor()
     gbr.fit(self.train_x, self.train_y)
     print('Accuracy of gbr, on the training set: ' +
           str(gbr.score(train_x, train_y)))
     start_time = time.time()
     predictions = gbr.predict(self.test_x)
     predict_time = time.time() - start_time
     print('Prediction time for gbr is ' + str(predict_time) + '\n')
     predictions = predictions.astype('uint8')
     return predictions

示例#14

0

显示文件

文件： picassoai.py 项目： mateuszgembarzewski/PicassoAI

def prediction():
    global train_x, train_y, test_x
    gbr = GradientBoostingRegressor()
    gbr.fit(train_x, train_y)
    print('Accuracy of gbr, on the training set: ' +
          str(gbr.score(train_x, train_y)))
    start_time = time.time()
    predictions = gbr.predict(test_x)
    predict_time = time.time() - start_time
    print('Prediction time for gbr is ' + str(predict_time) + '\n')
    predictions = predictions.astype('uint8')
    print(predictions)
    return predictions

示例#15

0

显示文件

def NonlinReg(coeff, regressor='GBR', features=4, interval=0, length=1):
    '''
    NonlinReg: Non-linear Regression Model
    
    coeff: Input sequence disposed by WT (Wavelet Transformation Function)
    
    regressor: Non-linear regressor, 'GBR' default
    
    features: Days used to predict, 4 default
    
    interval: Prediction lagging, 0 default
    
    length: 1 default
    '''
    X, Y = [], []
    for i in range(len(coeff[0])):
        if i + features + interval < len(coeff[0]):
            X.append(coeff[0][i:i + features])
            Y.append(coeff[0][i + features + interval])
    X = np.array(X)
    Y = np.array(Y)

    if regressor == 'GBR':
        gbr = GBR(learning_rate=0.1, n_estimators=80, max_depth=2).fit(X, Y)

        X_ = copy.deepcopy(X)
        Y_ = copy.deepcopy(Y)
        for i in range(length):
            X_ = np.concatenate(
                (X_,
                 np.array([
                     np.concatenate(
                         (X_[-1][-features + 1:], Y_[[-interval - 1]]))
                 ])))
            Y_ = np.concatenate((Y_, gbr.predict(X_[-1])))

    if regressor == 'SVR':
        svr = svm.SVR(kernel='rbf', C=100, gamma=3).fit(X, Y)

        X_ = copy.deepcopy(X)
        Y_ = copy.deepcopy(Y)
        for i in range(length):
            X_ = np.concatenate(
                (X_,
                 np.array([
                     np.concatenate(
                         (X_[-1][-features + 1:], Y_[[-interval - 1]]))
                 ])))
            Y_ = np.concatenate((Y_, svr.predict(X_[-1])))

    return Y_

示例#16

0

显示文件

def GDBT_ALL_train(trainFileName, testFileName):
    train_X, train_y, _ = ld.loadData_all(trainFileName)
    test_X, test_y, items = ld.loadData_all(testFileName)
    clf = GradientBoostingRegressor(loss='lad', n_estimators=40, learning_rate=0.1, max_depth=3).\
            fit(train_X, train_y)
    pred_y = clf.predict(test_X)
    res = []
    for i in range(len(test_X)):
        res.append([
            items[i], 'all',
            '%.2f' % max(pred_y[i], 0),
            '%.2f' % max(test_y[i], 0)
        ])
    return res

示例#17

0

显示文件

文件： predict.py 项目： PandoraLS/python_toys

def main(train, test, filepath):
    if not filepath:
        click.echo("need filepath")
        return

    X, Y = get_data(filepath)

    if not train or not test:
        click.echo("need train or test size")
        return

    TRAIN_SIZE = 96 * int(train)
    TEST_SIZE = 96 * int(test)

    X_train = X[:TRAIN_SIZE]
    Y_train = Y[:TRAIN_SIZE]
    X_test = X[TRAIN_SIZE:]
    Y_test = Y[TRAIN_SIZE:]

    #clf = SVR(kernel='rbf', C=1e3, gamma=0.00001)
    clf = GradientBoostingRegressor(n_estimators=100, max_depth=1)
    #clf = DecisionTreeRegressor(max_depth=25)
    #clf = ExtraTreesRegressor(n_estimators=2000,max_depth=14)
    #clf = xgb.XGBRegressor(n_estimators=2000,max_depth=25)
    #clf = RandomForestRegressor(n_estimators=1000,max_depth=26,n_jobs=7)

    #clf.fit(X_train,Y_train)
    #y_pred = clf.predict(X_test)
    #plt.plot(X_test, y_pred, linestyle='-', color='red')
    predict_list = []
    for i in range(TEST_SIZE):
        X = [[x] for x in range(i, TRAIN_SIZE + i)]
        clf.fit(X, Y[i:TRAIN_SIZE + i])
        y_pred = clf.predict(np.array([TRAIN_SIZE + 1 + i]).reshape(1, -1))
        predict_list.append(y_pred)

    #print("mean_squared_error:%s"%mean_squared_error(Y_test, predict_list))
    #print("sqrt of mean_squared_error:%s"%np.sqrt(mean_squared_error(Y_test, predict_list)))
    origin_data = Y_test
    #print("origin data:%s"%origin_data)
    plt.plot([x for x in range(TRAIN_SIZE + 1, TRAIN_SIZE + TEST_SIZE + 1)],
             predict_list,
             linestyle='-',
             color='red',
             label='prediction model')
    plt.plot(X_test, Y_test, linestyle='-', color='blue', label='actual model')
    plt.legend(loc=1, prop={'size': 12})
    plt.show()

示例#18

0

显示文件

文件： GBDT.py 项目： liyumeng/AliRecommendProject

label = [item[0] for item in label]
y_train = np.array(label)
est = GradientBoostingRegressor(n_estimators=150,
                                learning_rate=0.1,
                                max_depth=3,
                                random_state=0,
                                loss='ls',
                                verbose=1).fit(X_train, y_train)
print 'testing...'

reader = BinReader(
    ur'F:\AliRecommendHomeworkData\1212新版\test18.expand.norm.bin')
reader.open()
result = [0] * reader.LineCount
for i in xrange(reader.LineCount):
    (x, userid, itemid, label) = reader.readline()
    x[0] = 1
    y = est.predict([x])[0]
    result[i] = (userid, itemid, y)
    if i % 10000 == 0:
        print '%d/%d' % (i, reader.LineCount)

result.sort(key=lambda x: x[2], reverse=True)
result = result[:7000]

print ur'正在输出...'
with open('result.csv', 'w') as f:
    for item in result:
        f.write('%d,%d\n' % (item[0], item[1]))
print ur'阈值：', result[-1][2]
print ur'样本总数:', reader.LineCount

示例#19

0

显示文件

文件： features_to_select_volker.py 项目： Pold87/pikki-virus

    X_train,X_test, y_train, y_test, y_train_numMosquitos, y_test_numMosquitos = year_train_test_split(
        train_for_loo,
        'WnvPresent',
        year)      

    X_train.to_csv("data_per_year/" + str(year) + "X_train.csv", index=False)
    X_test.to_csv("data_per_year/" + str(year) + "X_test.csv", index=False)
    y_train.to_csv("data_per_year/" + str(year) + "y_train.csv", index=False)
    y_test.to_csv("data_per_year/" + str(year) + "y_test.csv", index=False)


    if predict_num_mosquitos:
        reg = GradientBoostingRegressor(n_estimators=40)

        reg.fit(X_train.drop(['NumMosquitos'], axis=1), y_train_numMosquitos.astype(float))
        predicted_mosquitos = reg.predict(X_test)
        X_test['NumMosquitos'] = predicted_mosquitos
        print("Accuracy is", metrics.r2_score(y_test_numMosquitos, predicted_mosquitos))

    clf.fit(X_train.drop(['NumMosquitos'], axis=1), y_train)

    y_pred = clf.predict_proba(X_test)[:, 1]
    # print(y_pred)

    # y_pred = clf.predict_proba(X_test) # For xgbwrapper best score: 57.2
    #         y_pred = clf.predict_proba(X_test)
    # y_pred = clf.predict(X_test)



    non_carriers_mask = (X_test.Species == species_encoder.transform('CULEX SALINARIUS')) |\

示例#20

0

显示文件

文件： DeepOLS&SecondStage.py 项目： xz2139/Machine-Learning-Project

    'n_estimators': [10, 50, 100, 200, 300],
    'max_depth': [2, 3, 4, 5],
    'max_features': ['auto', 'sqrt', 'log2']
}
gbr_cv = GridSearchCV(gbr, parameters)
gbr_cv.fit(svd_train_df.iloc[:, :-1], svd_train_df.iloc[:, -1])

#best parameters
gbr_cv.best_params_

gbr_opt = GradientBoostingRegressor(max_depth=2,
                                    max_features='log2',
                                    n_estimators=10)
gbr_opt.fit(svd_train_df.iloc[:, :-1], svd_train_df.iloc[:, -1])

gbr_opt_tfidf_2gram_pred = gbr_opt.predict(svd_test_df.iloc[:, :-1])
gbr_opt_tfidf_2gram_mse = mean_squared_error(svd_test_df.iloc[:, -1],
                                             gbr_opt_tfidf_2gram_pred)
gbr_opt_tfidf_2gram_mae = mean_absolute_error(svd_test_df.iloc[:, -1],
                                              gbr_opt_tfidf_2gram_pred)

print('mean squared error is {}'.format(gbr_opt_tfidf_2gram_mse))
print('mean absolute error is {}'.format(gbr_opt_tfidf_2gram_mae))

### Look at R^2:

inference_data = pd.read_csv('bio_txt.csv')

true_df = inference_data[[
    'index', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11',
    '12', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23',

示例#21

0

显示文件

文件： GBDT.py 项目： 343829084/AliRecommendProject

from sklearn.ensemble.gradient_boosting import GradientBoostingRegressor

(data,label,items) = BinReader.readData(ur'F:\AliRecommendHomeworkData\1212新版\train1217.expand.norm.bin') 

X_train = np.array(data)
label = [item[0] for item in label]
y_train = np.array(label)
est = GradientBoostingRegressor(n_estimators=150, learning_rate=0.1,max_depth=3, random_state=0, loss='ls',verbose=1).fit(X_train, y_train)
print 'testing...'

reader = BinReader(ur'F:\AliRecommendHomeworkData\1212新版\test18.expand.norm.bin')
reader.open()
result = [0] * reader.LineCount
for i in xrange(reader.LineCount):
    (x,userid,itemid,label) = reader.readline()
    x[0] = 1
    y = est.predict([x])[0]
    result[i] = (userid,itemid,y)
    if i % 10000 == 0:
        print '%d/%d' % (i,reader.LineCount)
    
result.sort(key=lambda x:x[2],reverse=True)
result = result[:7000]


print ur'正在输出...'
with open('result.csv','w') as f:
    for item in result:
        f.write('%d,%d\n' % (item[0],item[1]))
print ur'阈值：',result[-1][2]
print ur'样本总数:',reader.LineCount

示例#22

0

显示文件

文件： crossvalidate.py 项目： Pold87/pikki-virus

        X_train.to_csv("data_per_year/" + str(year) + "X_train.csv",
                       index=False)
        X_test.to_csv("data_per_year/" + str(year) + "X_test.csv", index=False)
        y_train.to_csv("data_per_year/" + str(year) + "y_train.csv",
                       index=False)
        y_test.to_csv("data_per_year/" + str(year) + "y_test.csv", index=False)

        print(X_test.columns)

        if predict_num_mosquitos:

            reg = GradientBoostingRegressor(n_estimators=40)

            reg.fit(X_train.drop(['NumMosquitos'], axis=1),
                    y_train_numMosquitos.astype(float))
            predicted_mosquitos = reg.predict(X_test)
            X_test['NumMosquitos'] = predicted_mosquitos
            print("Accuracy is",
                  metrics.r2_score(y_test_numMosquitos, predicted_mosquitos))

        print(len(X_train))
        print(len(y_train))

        clf.fit(X_train, y_train)

        y_pred = clf.predict_proba(X_test)[:, 1]
        # print(y_pred)

        # y_pred = clf.predict_proba(X_test) # For xgbwrapper best score: 57.2
        #         y_pred = clf.predict_proba(X_test)
        # y_pred = clf.predict(X_test)

示例#23

0

显示文件

文件： GB_train.py 项目： rafaelc007/Kaggle-House-Prices-Advanced-Regression-Techniques

    # preprocess x_pred
    X_pred_ordinal_transf = preprocess_X(X_pred_ordinal)
    X_pred = np.concatenate((X_pred_numeric, X_pred_ordinal_transf.T), axis=1)
    return [X_data, y_data, X_pred, test_Ids]


X_data, y_data, X_pred, test_Ids = get_data()

# Model parameters
lr = 0.15  # learning rate
n_est = 200  # number of boosting stages

# log scaling the target
y_data = np.log(y_data)

# training a Gradient boosting regressor
model = GBR(learning_rate=lr, n_estimators=n_est, random_state=0)
model.fit(X_data, y_data)

# evaluate model

scored = cross_val_score(model,
                         X_data,
                         y=y_data,
                         cv=5,
                         scoring='neg_mean_squared_error',
                         n_jobs=2)
prices = np.round(np.exp(model.predict(X_pred)), 2)
Gen_output_file(test_Ids, prices)
print('scored {0}'.format(np.mean(scored)))

示例#24

0

显示文件

文件： boston_regression.py 项目： tuchengxiaoweidu/dust_emission_model

print('mae \t mean_absolute_error')
print('mse \t mean_squared_error')
print('r2 \t coefficient of determination')
print(70 * '-')
exit()
#  模型效果可视化
plt.figure()
x = np.arange(training_data_input.shape[0])
plt.plot(x, training_data_output, color='r', label='origin y')
color_list = ['k.', 'b.', 'go', 'yv', 'c*', 'm^']  # 颜色列表
for i, pre_y in enumerate(pre_y_list):  # 读出通过回归模型预测得到的索引及结果
    plt.plot(x, pre_y_list[i], color_list[i], label=model_names[i])  # 画出每条预测结果线
plt.title('regression result comparison')  # 标题
plt.legend(loc='upper right')
plt.xlabel('test data number')
plt.ylabel('real and predicted values')
# plt.savefig("regression compare.jpg", dpi=500)
plt.show()

# 模型应用
print('regression prediction:')
print('predict data \t real data')
new_pre_y = model_gbr.predict(test_data_input)  # 使用GBR进行预测
model_gbr_score = model_gbr.score(test_data_input, test_data_output)
print("The score of model_gbr is : %f" % model_gbr_score)
for i in range(len(test_data_input)):
    print('  %.2f \t %0.2f' % (new_pre_y[i], test_data_output[i]))  # 打印输出每个数据点的预测信息

# if __name__ == "__main__":
#     svm_baseline()

示例#25

0

显示文件

class GradientBoostingRegressor:
    def __init__(self,
                 loss,
                 learning_rate,
                 n_estimators,
                 subsample,
                 min_samples_split,
                 min_samples_leaf,
                 min_weight_fraction_leaf,
                 max_depth,
                 criterion,
                 max_features,
                 max_leaf_nodes,
                 min_impurity_decrease,
                 random_state=None,
                 verbose=0,
                 **kwargs):
        self.loss = loss
        self.learning_rate = learning_rate
        self.n_estimators = n_estimators
        self.subsample = subsample
        self.min_samples_split = min_samples_split
        self.min_samples_leaf = min_samples_leaf
        self.min_weight_fraction_leaf = min_weight_fraction_leaf
        self.max_depth = max_depth
        self.criterion = criterion
        self.max_features = max_features
        self.max_leaf_nodes = max_leaf_nodes
        self.min_impurity_decrease = min_impurity_decrease
        self.random_state = random_state
        self.verbose = verbose
        self.estimator = None
        self.fully_fit_ = False
        self.time_limit = None

    def fit(self, X, y, sample_weight=None):

        from sklearn.ensemble.gradient_boosting import GradientBoostingRegressor as GBR
        # Special fix for gradient boosting!
        if isinstance(X, np.ndarray):
            X = np.ascontiguousarray(X, dtype=X.dtype)

        self.learning_rate = float(self.learning_rate)
        self.n_estimators = int(self.n_estimators)
        self.subsample = float(self.subsample)
        self.min_samples_split = int(self.min_samples_split)
        self.min_samples_leaf = int(self.min_samples_leaf)
        self.min_weight_fraction_leaf = float(self.min_weight_fraction_leaf)
        if check_none(self.max_depth):
            self.max_depth = None
        else:
            self.max_depth = int(self.max_depth)
        self.max_features = float(self.max_features)
        if check_none(self.max_leaf_nodes):
            self.max_leaf_nodes = None
        else:
            self.max_leaf_nodes = int(self.max_leaf_nodes)
        self.min_impurity_decrease = float(self.min_impurity_decrease)
        self.verbose = int(self.verbose)

        self.estimator = GBR(
            loss=self.loss,
            learning_rate=self.learning_rate,
            n_estimators=self.n_estimators,
            subsample=self.subsample,
            min_samples_split=self.min_samples_split,
            min_samples_leaf=self.min_samples_leaf,
            min_weight_fraction_leaf=self.min_weight_fraction_leaf,
            max_depth=self.max_depth,
            criterion=self.criterion,
            max_features=self.max_features,
            max_leaf_nodes=self.max_leaf_nodes,
            random_state=self.random_state,
            verbose=self.verbose,
            warm_start=True,
        )

        self.estimator.fit(X, y, sample_weight=sample_weight)

        return self

    def predict(self, X):
        if self.estimator is None:
            raise NotImplementedError
        return self.estimator.predict(X)

    @staticmethod
    def get_cs():
        cs = ConfigurationSpace()
        loss = CategoricalHyperparameter("loss", ['ls', 'lad'],
                                         default_value='ls')
        learning_rate = UniformFloatHyperparameter(name="learning_rate",
                                                   lower=0.01,
                                                   upper=1,
                                                   default_value=0.1,
                                                   log=True)
        n_estimators = UniformIntegerHyperparameter("n_estimators",
                                                    50,
                                                    500,
                                                    default_value=200)
        max_depth = UniformIntegerHyperparameter(name="max_depth",
                                                 lower=1,
                                                 upper=10,
                                                 default_value=3)
        criterion = CategoricalHyperparameter('criterion',
                                              ['friedman_mse', 'mse', 'mae'],
                                              default_value='friedman_mse')
        min_samples_split = UniformIntegerHyperparameter(
            name="min_samples_split", lower=2, upper=20, default_value=2)
        min_samples_leaf = UniformIntegerHyperparameter(
            name="min_samples_leaf", lower=1, upper=20, default_value=1)
        min_weight_fraction_leaf = UnParametrizedHyperparameter(
            "min_weight_fraction_leaf", 0.)
        subsample = UniformFloatHyperparameter(name="subsample",
                                               lower=0.1,
                                               upper=1.0,
                                               default_value=1.0)
        max_features = UniformFloatHyperparameter("max_features",
                                                  0.1,
                                                  1.0,
                                                  default_value=1)
        max_leaf_nodes = UnParametrizedHyperparameter(name="max_leaf_nodes",
                                                      value="None")
        min_impurity_decrease = UnParametrizedHyperparameter(
            name='min_impurity_decrease', value=0.0)
        cs.add_hyperparameters([
            loss, learning_rate, n_estimators, max_depth, criterion,
            min_samples_split, min_samples_leaf, min_weight_fraction_leaf,
            subsample, max_features, max_leaf_nodes, min_impurity_decrease
        ])

        return cs

示例#26

0

显示文件

文件： model_performance.py 项目： xz2139/Machine-Learning-Project

svrr.fit(tf_train_2gram, Y_train)

svrr_base_tfidf_2gram_pred = svrr.predict(tf_test_2gram)
svrr_tfidf_2gram_base_mse = mean_squared_error(Y_test,
                                               svrr_base_tfidf_2gram_pred)
svrr_tfidf_2gram_base_mae = mean_absolute_error(Y_test,
                                                svrr_base_tfidf_2gram_pred)

print('svrr mean squared error is {}'.format(svrr_tfidf_2gram_base_mse))
print('svrr mean absolute error is {}'.format(svrr_tfidf_2gram_base_mae))

#gradient boosting accept sparse matrix as input in "fit" method, but currently does not accept sparse matrix for "predict" method
#gradient boosting regressor:

gbr = GradientBoostingRegressor()
gbr.fit(tf_train_2gram, Y_train)

gbr_base_tfidf_2gram_pred = gbr.predict(tf_test_2gram.todense())
gbr_tfidf_2gram_base_mse = mean_squared_error(Y_test,
                                              gbr_base_tfidf_2gram_pred)
gbr_tfidf_2gram_base_mae = mean_absolute_error(Y_test,
                                               gbr_base_tfidf_2gram_pred)

print('gbr mean squared error is {}'.format(gbr_tfidf_2gram_base_mse))
print('gbr mean absolute error is {}'.format(gbr_tfidf_2gram_base_mae))
'''from the experiment, we see that gradient boosting tree has the best performance, however, overall, the text feature doesn't predicte sentencing length very well according to mean absolute error and mean squared error.'''

#plt.scatter(Y_test,gbr_base_tfidf_2gram_pred)
#plt.show()
#savefig('GBR_predicted_VS_true.png')

示例#27

0

显示文件

def gbr(x_train, x_test, y_train):
    #Training Classifier
    reg = GradientBoostingRegressor(random_state=1)
    reg.fit(x_train, y_train)
    #Testing Classifier
    print(plt.plot(reg.predict(x_test)))

示例#28

0

显示文件

    line_train_y.append(inv_y[i + windows_size - 1])
line_train_x = numpy.array(line_train_x).reshape(
    line_test_size - windows_size + 1, 2 * windows_size)
# line_train_x = scaler.fit_transform(line_train_x)
model.fit(line_train_x, line_train_y)

line_test_x = []
for i in range(arima.test_size - line_test_size - windows_size + 1):
    for j in range(windows_size):
        line_test_x.append(arima.predictions[line_test_size + i + j][0])
    for j in range(windows_size):
        line_test_x.append(inv_yhat[line_test_size + i + j])
line_test_x = numpy.array(line_test_x).reshape(
    arima.test_size - line_test_size - windows_size + 1, 2 * windows_size)
# line_test_x = scaler.fit_transform(line_test_x)
dyn_combined = model.predict(line_test_x)

mse = mean_squared_error(inv_y[test_start:], dyn_combined)
rmse = math.sqrt(mean_squared_error(inv_y[test_start:], dyn_combined))
mae = mean_absolute_error(inv_y[test_start:], dyn_combined)
mape = arima.mean_a_p_e(inv_y[test_start:], dyn_combined)
print('dyn combined Test MAE:%.3f MSE: %.3f RMSE:%.3f MAPE:%.3f' %
      (mae, mse, rmse, mape))

plt.figure()
plt.plot(inv_y[test_start:], '-', label="real flow")
# plt.plot(arima.predictions, 'x--', color='y', label="ARIMA")
# plt.plot(inv_yhat, 'x--', color='red', label="LSTM")
plt.plot(dyn_combined, '--', color='red', label="combined")
plt.legend(loc='upper right')
plt.xlabel("period(15-minute intervals)")

示例#29

0

显示文件

# gbr
from sklearn.ensemble.gradient_boosting import GradientBoostingRegressor
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt


model = GradientBoostingRegressor(n_estimators=10000)
X_train, X_test, y_train, y_test = train_test_split(state_data,action_data,test_size=0.33)

print(X_train.shape,X_test.shape,y_train.shape,y_test.shape)

# GBR只能预测一个target,所以设置target的index
target_index = 0
model.fit(X_train,y_train[:,target_index])
test_pre = model.predict(X_test)
plt.plot(y_test[:,target_index],test_pre,"ro")
plt.show()
# 打印出各个feature的importance
print(model.feature_importances_)
'''
importance分析结果:
对于油门和刹车这个target,importance为
速度标量       0.45935925  
速度x         0.01006275  
速度y         0.00771682  
加速度标量     0.3719029   
加速度x        0.05408772  
加速度y        0.00396671
车道线c0       0.01144348  
车道线c1       0.00659045

示例#30

0

显示文件

    6.58
],
                 [
                     0.7842, 0., 8.14, 0., 0.538, 5.99, 81.7, 4.2579, 4., 307.,
                     21., 386.75, 14.67
                 ],
                 [
                     0.80271, 0., 8.14, 0., 0.538, 5.456, 36.6, 3.7965, 4.,
                     307., 21., 288.99, 11.69
                 ],
                 [
                     0.7258, 0., 8.14, 0., 0.538, 5.727, 69.5, 3.7965, 4.,
                     307., 21., 390.95, 11.28
                 ]]  # 要预测的新数据集
for i, new_point in enumerate(new_point_set):  # 循环读出每个要预测的数据点
    new_pre_y = model_gbr.predict(new_point)  # 使用GBR进行预测
    print('predict for new point %d is:  %.2f' % (i + 1, new_pre_y)
          )  # 打印输出每个数据点的预测信息

####################################################################
# 4.3 分类分析
# 导入库
import numpy as np  # 导入numpy库
from sklearn.model_selection import train_test_split  # 数据分区库
from sklearn import tree  # 导入决策树库
from sklearn.metrics import accuracy_score, auc, confusion_matrix, f1_score, precision_score, recall_score, \
    roc_curve  # 导入指标库
import prettytable  # 导入表格库
import pydotplus  # 导入dot插件库
import matplotlib.pyplot as plt  # 导入图形展示库

示例#31

0

显示文件

文件： mart.py 项目： Veterun/dgbrt

        return sum((tree.predict(X) for tree in self.trees))

    def fit(self, X, y):
        for m in range(self.n_boosting_steps):
            residuals = y - self.predict(X)
            new_tree = Node(X, residuals)
            new_tree.fit(max_tree_size=self.max_tree_size)
            self.trees.append(new_tree)

if __name__ == '__main__':

    from sklearn.cross_validation import train_test_split
    from sklearn.metrics.metrics import mean_squared_error
    from sklearn.datasets import load_boston

    boston = load_boston()
    X_train, X_test, y_train, y_test = train_test_split(boston.data,
                                                        boston.target,
                                                        test_size=0.33)

    from sklearn.ensemble.gradient_boosting import GradientBoostingRegressor
    sk_gbrt = GradientBoostingRegressor(n_estimators=20)
    sk_gbrt.fit(X_train, y_train)
    print "sklearn test MSE", mean_squared_error(y_test, sk_gbrt.predict(X_test))

    mart = MART(10, 15)
    mart.fit(X_train, y_train)
    print "mart test MSE", mean_squared_error(y_test, mart.predict(X_test))

示例#32

0

显示文件

文件： gradient_boosting.py 项目： williamy1996/AutoExpression

class GradientBoostingRegressor(IterativeComponentWithSampleWeight,
                                BaseRegressionModel):
    def __init__(self,
                 loss,
                 learning_rate,
                 n_estimators,
                 subsample,
                 min_samples_split,
                 min_samples_leaf,
                 min_weight_fraction_leaf,
                 max_depth,
                 criterion,
                 max_features,
                 max_leaf_nodes,
                 min_impurity_decrease,
                 random_state=None,
                 verbose=0):
        self.loss = loss
        self.learning_rate = learning_rate
        self.n_estimators = n_estimators
        self.subsample = subsample
        self.min_samples_split = min_samples_split
        self.min_samples_leaf = min_samples_leaf
        self.min_weight_fraction_leaf = min_weight_fraction_leaf
        self.max_depth = max_depth
        self.criterion = criterion
        self.max_features = max_features
        self.max_leaf_nodes = max_leaf_nodes
        self.min_impurity_decrease = min_impurity_decrease
        self.random_state = random_state
        self.verbose = verbose
        self.estimator = None
        self.fully_fit_ = False
        self.start_time = time.time()
        self.time_limit = None

    def iterative_fit(self, X, y, sample_weight=None, n_iter=1, refit=False):

        from sklearn.ensemble.gradient_boosting import GradientBoostingRegressor as GBR
        # Special fix for gradient boosting!
        if isinstance(X, np.ndarray):
            X = np.ascontiguousarray(X, dtype=X.dtype)
        if refit:
            self.estimator = None

        if self.estimator is None:
            self.learning_rate = float(self.learning_rate)
            self.n_estimators = int(self.n_estimators)
            self.subsample = float(self.subsample)
            self.min_samples_split = int(self.min_samples_split)
            self.min_samples_leaf = int(self.min_samples_leaf)
            self.min_weight_fraction_leaf = float(
                self.min_weight_fraction_leaf)
            if check_none(self.max_depth):
                self.max_depth = None
            else:
                self.max_depth = int(self.max_depth)
            self.max_features = float(self.max_features)
            if check_none(self.max_leaf_nodes):
                self.max_leaf_nodes = None
            else:
                self.max_leaf_nodes = int(self.max_leaf_nodes)
            self.min_impurity_decrease = float(self.min_impurity_decrease)
            self.verbose = int(self.verbose)

            self.estimator = GBR(
                loss=self.loss,
                learning_rate=self.learning_rate,
                n_estimators=n_iter,
                subsample=self.subsample,
                min_samples_split=self.min_samples_split,
                min_samples_leaf=self.min_samples_leaf,
                min_weight_fraction_leaf=self.min_weight_fraction_leaf,
                max_depth=self.max_depth,
                criterion=self.criterion,
                max_features=self.max_features,
                max_leaf_nodes=self.max_leaf_nodes,
                random_state=self.random_state,
                verbose=self.verbose,
                warm_start=True,
            )

        else:
            self.estimator.n_estimators += n_iter
            self.estimator.n_estimators = min(self.estimator.n_estimators,
                                              self.n_estimators)

        self.estimator.fit(X, y, sample_weight=sample_weight)

        # Apparently this if is necessary
        if self.estimator.n_estimators >= self.n_estimators:
            self.fully_fit_ = True

        return self

    def configuration_fully_fitted(self):
        if self.estimator is None:
            return False
        return not len(self.estimator.estimators_) < self.n_estimators

    def predict(self, X):
        if self.estimator is None:
            raise NotImplementedError
        return self.estimator.predict(X)

    @staticmethod
    def get_properties(dataset_properties=None):
        return {
            'shortname': 'GB',
            'name': 'Gradient Boosting Regressor',
            'handles_regression': True,
            'handles_classification': False,
            'handles_multiclass': False,
            'handles_multilabel': False,
            'is_deterministic': True,
            'input': (DENSE, UNSIGNED_DATA),
            'output': (PREDICTIONS, )
        }

    @staticmethod
    def get_hyperparameter_search_space(dataset_properties=None,
                                        optimizer='smac'):
        if optimizer == 'smac':
            cs = ConfigurationSpace()
            loss = CategoricalHyperparameter("loss", ['ls', 'lad'],
                                             default_value='ls')
            learning_rate = UniformFloatHyperparameter(name="learning_rate",
                                                       lower=0.01,
                                                       upper=1,
                                                       default_value=0.1,
                                                       log=True)
            n_estimators = UniformIntegerHyperparameter("n_estimators",
                                                        50,
                                                        500,
                                                        default_value=200)
            max_depth = UniformIntegerHyperparameter(name="max_depth",
                                                     lower=1,
                                                     upper=10,
                                                     default_value=3)
            criterion = CategoricalHyperparameter(
                'criterion', ['friedman_mse', 'mse', 'mae'],
                default_value='friedman_mse')
            min_samples_split = UniformIntegerHyperparameter(
                name="min_samples_split", lower=2, upper=20, default_value=2)
            min_samples_leaf = UniformIntegerHyperparameter(
                name="min_samples_leaf", lower=1, upper=20, default_value=1)
            min_weight_fraction_leaf = UnParametrizedHyperparameter(
                "min_weight_fraction_leaf", 0.)
            subsample = UniformFloatHyperparameter(name="subsample",
                                                   lower=0.1,
                                                   upper=1.0,
                                                   default_value=1.0)
            max_features = UniformFloatHyperparameter("max_features",
                                                      0.1,
                                                      1.0,
                                                      default_value=1)
            max_leaf_nodes = UnParametrizedHyperparameter(
                name="max_leaf_nodes", value="None")
            min_impurity_decrease = UnParametrizedHyperparameter(
                name='min_impurity_decrease', value=0.0)
            cs.add_hyperparameters([
                loss, learning_rate, n_estimators, max_depth, criterion,
                min_samples_split, min_samples_leaf, min_weight_fraction_leaf,
                subsample, max_features, max_leaf_nodes, min_impurity_decrease
            ])

            return cs
        elif optimizer == 'tpe':
            from hyperopt import hp
            space = {
                'loss':
                hp.choice('gb_loss', ["ls", "lad"]),
                'learning_rate':
                hp.loguniform('gb_learning_rate', np.log(0.01), np.log(1)),
                # 'n_estimators': hp.randint('gb_n_estimators', 451) + 50,
                'n_estimators':
                hp.choice('gb_n_estimators', [100]),
                'max_depth':
                hp.randint('gb_max_depth', 8) + 1,
                'criterion':
                hp.choice('gb_criterion', ['friedman_mse', 'mse', 'mae']),
                'min_samples_split':
                hp.randint('gb_min_samples_split', 19) + 2,
                'min_samples_leaf':
                hp.randint('gb_min_samples_leaf', 20) + 1,
                'min_weight_fraction_leaf':
                hp.choice('gb_min_weight_fraction_leaf', [0]),
                'subsample':
                hp.uniform('gb_subsample', 0.1, 1),
                'max_features':
                hp.uniform('gb_max_features', 0.1, 1),
                'max_leaf_nodes':
                hp.choice('gb_max_leaf_nodes', [None]),
                'min_impurity_decrease':
                hp.choice('gb_min_impurity_decrease', [0])
            }

            init_trial = {
                'loss': "ls",
                'learning_rate': 0.1,
                'n_estimators': 100,
                'max_depth': 3,
                'criterion': "friedman_mse",
                'min_samples_split': 2,
                'min_samples_leaf': 1,
                'min_weight_fraction_leaf': 0,
                'subsample': 1,
                'max_features': 1,
                'max_leaf_nodes': None,
                'min_impurity_decrease': 0
            }
            return space

示例#33

0

显示文件

文件： prediction_test_refactored.py 项目： JefNtungila/202mais

ax.set_title('Heatmap of Methylation Values')

ax.set_xlabel("Samples sorted ascending by age")
ax.set_ylabel("19 selected cgp positions")
plt.savefig('heatmap19.jpg')
print(data19_ages)

# Trying GradientBoostingRegressor Learning
from sklearn.ensemble.gradient_boosting import GradientBoostingRegressor

#X_train, X_test, y_train, y_test = train_test_split(data_f, annotations["Age_Group"], test_size=0.10)
X_train, X_test, y_train, y_test = train_test_split(data19, data19_ages, test_size=0.10)

gb = GradientBoostingRegressor(loss='lad', learning_rate=0.03, n_estimators=300, max_features='log2', subsample = 0.6, min_samples_split=2, max_depth=4, verbose = 1, warm_start = True)
gb.fit(X_train, y_train)
y = gb.predict(X_test)

print(y.shape)
print(y_test.shape)

df = pd.DataFrame({"Real":y_test, "Pred":y.reshape(73)})
df = df.sort_values(by=['Real'])
plt.scatter(x = range(73), y = df.Pred, c = 'b')
plt.scatter(x = range(73), y = df.Real, c = 'r')
plt.show()
plt.scatter(x = df.Real, y = df.Pred, c = 'b')

X_train, X_test, y_train, y_test = train_test_split(data19, data19_ages, test_size=0.10)

model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(512, activation='relu', input_shape=(19,)),

示例#34

0

显示文件

    6.58
],
                 [
                     0.7842, 0., 8.14, 0., 0.538, 5.99, 81.7, 4.2579, 4., 307.,
                     21., 386.75, 14.67
                 ],
                 [
                     0.80271, 0., 8.14, 0., 0.538, 5.456, 36.6, 3.7965, 4.,
                     307., 21., 288.99, 11.69
                 ],
                 [
                     0.7258, 0., 8.14, 0., 0.538, 5.727, 69.5, 3.7965, 4.,
                     307., 21., 390.95, 11.28
                 ]]  # 要预测的新数据集
for i, new_point in enumerate(new_point_set):  # 循环读出每个要预测的数据点
    new_pre_y = model_gbr.predict(np.array(new_point).reshape(1,
                                                              -1))  # 使用GBR进行预测
    print('predict for new point %d is:  %.2f' % (i + 1, new_pre_y)
          )  # 打印输出每个数据点的预测信息

####################################################################
# 4.3 分类分析
# 导入库
import numpy as np  # 导入numpy库
from sklearn.model_selection import train_test_split  # 数据分区库
from sklearn import tree  # 导入决策树库
from sklearn.metrics import accuracy_score, auc, confusion_matrix, f1_score, precision_score, recall_score, \
    roc_curve  # 导入指标库
import prettytable  # 导入表格库
import pydotplus  # 导入dot插件库
import matplotlib.pyplot as plt  # 导入图形展示库

示例#35

0

显示文件

文件： boston_regression_max_min.py 项目： tuchengxiaoweidu/dust_emission_model

         ss_y.inverse_transform(training_data_output),
         color='r',
         label='origin y')
color_list = ['k+', 'b.', 'go', 'cv', 'y*', 'm^']  # 颜色列表
for i, pre_y in enumerate(pre_y_list):  # 读出通过回归模型预测得到的索引及结果
    plt.plot(x,
             ss_y.inverse_transform(pre_y_list[i].reshape(-1, 1)),
             color_list[i],
             label=model_names[i])  # 画出每条预测结果线
plt.title('Comparison of results by six regression model')  # 标题
plt.legend(loc='upper right')
plt.xlabel('Test data number')
plt.ylabel('Filter weight gain(g/h)')
plt.savefig("picture/all regression compare.jpg", dpi=500)
plt.show()

# 模型应用
print('regression prediction:')
new_pre_y = ss_y.inverse_transform(
    model_gbr.predict(test_data_input).reshape(-1, 1))  # 使用GBR进行预测

print('predict data \t real data')
test_y_output = ss_y.inverse_transform(test_data_output)
for i in range(len(test_data_input)):
    print('  %.2f \t %0.2f' %
          (new_pre_y[i], test_y_output[i]))  # 打印输出每个数据点的预测信息
mse = mean_squared_error(test_y_output, new_pre_y)
print("The mse of model_gbr is : %f" % mse)
# if __name__ == "__main__":
#     svm_baseline()

示例#36

0

显示文件

mae_svm = mean_absolute_error(Y_test, y_pred_svm)

#RF Algorithm
from sklearn.ensemble import RandomForestRegressor
regressor_rf = RandomForestRegressor(n_estimators=20, random_state=0)
regressor_rf.fit(X_train, Y_train)
y_pred_rf = regressor_rf.predict(X_test)
rms_rf = sqrt(mean_squared_error(Y_test, y_pred_rf))
rsqrd_rf = r2_score(Y_test, y_pred_rf)
mae_rf = mean_absolute_error(Y_test, y_pred_rf)

#GB Algorithm
from sklearn.ensemble.gradient_boosting import GradientBoostingRegressor
regressor_gb = GradientBoostingRegressor(learning_rate=0.5,
                                         n_estimators=400,
                                         loss='ls')
regressor_gb.fit(X_train, Y_train)
y_pred_gb = regressor_gb.predict(X_test)
rms_gb = sqrt(mean_squared_error(Y_test, y_pred_gb))
rsqrd_gb = r2_score(Y_test, y_pred_gb)
mae_gb = mean_absolute_error(Y_test, y_pred_gb)

#Multiple Linear Regression
from sklearn.linear_model import LinearRegression
regressor_lr = LinearRegression()
regressor_lr.fit(X_train, Y_train)
y_pred_lr = regressor_lr.predict(X_test)
rms_lr = sqrt(mean_squared_error(Y_test, y_pred_lr))
rsqrd_lr = r2_score(Y_test, y_pred_lr)
mae_lr = mean_absolute_error(Y_test, y_pred_lr)

示例#37

0

显示文件

        'killsNorm', 'damageDealtNorm', 'boostsPerWalkDistance',
        'healsPerWalkDistance', 'healsAndBoostsPerWalkDistance',
        'killsPerWalkDistance'
    ]]

    return data


train = pd.read_csv("train_V2.csv")

train = train.sample(frac=0.3)

train = preprocess(train)

X = train[[
    'totalDistance', 'killsCategories', 'playersJoined', 'killsNorm',
    'damageDealtNorm', 'boostsPerWalkDistance', 'healsPerWalkDistance',
    'healsAndBoostsPerWalkDistance', 'killsPerWalkDistance'
]]
y = train["winPlacePerc"]

X_train, X_test, y_train, y_test = train_test_split(X, y)

regressor = GradientBoostingRegressor(n_estimators=200,
                                      learning_rate=0.1,
                                      max_depth=1,
                                      loss='ls')
regressor.fit(X_train, y_train)
test_y_predicted = regressor.predict(X_test)
valiation(y_test, test_y_predicted)