示例#1
0
文件: rf.py 项目: bwuu/Rossmann

targets = data['Sales'].values
targets = np.reshape(targets, [targets.shape[0], 1])
data = data[features].values
data = np.concatenate((targets, data), axis=1)
np.random.shuffle(data)

from sklearn.ensemble import RandomForestRegressor
offset = int(data.shape[0] * 0.9)
train = data[:offset, :]
test = data[offset:, :]

params= {'n_estimators':50, 'n_jobs':1, 'verbose':2, 'max_depth':30, 'max_features':0.6,}
forest = RandomForestRegressor(**params)
forest.fit(train[:, 1::], np.log1p(train[:, 0]))

#WARNING: MAY DUMP A FEW GIGS OR MORE
#from sklearn.externals import joblib
#joblib.dump(forest, '../saved_models/rf_001.model')
#print 'wrote forest model to rf_001.model'

out = np.expm1(forest.predict(test[:, 1::]))
target = test[:, 0]
err = rmspe(target, out)

print err
print 'done'

print forest.feature_importances_
示例#2
0
文件: xgb.py 项目: bwuu/Rossmann
          "colsample_bytree": 0.7,
          #"silent": 1
          "seed": 1301
          }
num_boost_round = 1700
print("Train an XGBoost model")

# sales in first column for easy syntax later
holdout = (data.Year==2014) & ((data.Month==8))
X_train = data[['Sales'] + features][~holdout].values
X_valid = data[['Sales'] + features][holdout].values

#data = data[['Sales'] + features].values
#X_train, X_valid = train_test_split(data, test_size=0.012, random_state=10)

y_train = np.log1p(X_train[:, 0])
y_valid = np.log1p(X_valid[:, 0])
dtrain = xgb.DMatrix(X_train[:, 1:], y_train)
dvalid = xgb.DMatrix(X_valid[:, 1:], y_valid)

watchlist = [(dtrain, 'train'), (dvalid, 'eval')]
gbm = xgb.train(params, dtrain, num_boost_round, evals=watchlist, \
  early_stopping_rounds=120, feval=rmspe_xg, verbose_eval=True)

gbm.save_model('../saved_models/xgb_001.model')
print 'model saved in xgb_001.model'

print("Validating")
yhat = gbm.predict(xgb.DMatrix(X_valid[:, 1:]))
error = rmspe(X_valid[:,0], np.expm1(yhat))
print('RMSPE: {:.6f}'.format(error))