def __ensemble_test(type, X_train, X_test, y_train, y_test): if type.lower() == 'gbr': reg = GBR(n_estimators=100, random_state=1) elif type.lower() == 'rfr': reg = RFR(n_estimators=100, random_state=1) elif type.lower() == 'abr': reg = ABR(n_estimators=100, random_state=1) elif type.lower() == 'etr': reg = ETR(n_estimators=100, random_state=1) reg.fit(X_train, y_train) return reg, reg.score(X_test, y_test), reg.feature_importances_
def train(self, zone, num, hidden_layer_size=(4), n_jobs=1, kernel='rbf', n_components=15, n_estimators=50, loss='linear', learning_rate=1.0, host='127.0.0.1'): f = fd(host) input_set = f.getTrainData(zone) x_train, x_test, y_train, y_test, scaler, pca = self.read_dataset( input_set, n_components) if num == 1: #Linear Regression clf = LinearRegression(n_jobs=n_jobs) clf.fit(x_train, y_train) # storeObj(clf,zone,clf.score(x_test,y_test),'Linear Regression') return clf, clf.score(x_test, y_test), 'Linear Regression', scaler, pca elif num == 2: # SVR sigmoid clf = svm.SVR(kernel=kernel) clf.fit(x_train, y_train) # storeObj(clf, zone, clf.score(x_test, y_test), 'SVR'+','+kernel) return clf, clf.score(x_test, y_test), 'SVR' + kernel, scaler, pca elif num == 3: #Neural Net clf = mlpr(hidden_layer_size=hidden_layer_size) clf.fit(x_train, y_train) str = '' for i in hidden_layer_size: str += '-> {}'.format(i) # storeObj(clf, zone, clf.score(x_test, y_test), 'NeuralNet'+' hidden layer size'+hidden_layer_size) return clf, clf.score( x_test, y_test), 'NeuralNet hidden_size' + str, scaler, pca elif num == 4: #Gradient Boosting Regressor clf = GBR(loss=loss, n_estimators=n_estimators, learning_rate=learning_rate) clf.fit(x_train, y_train) # storeObj(clf, zone, clf.score(x_test, y_test), 'Gradient Boosting Regressor') return clf, clf.score( x_test, y_test), 'Gradient Boosted Regressor', scaler, pca elif num == 5: clf = ABR() clf.fit(x_train, y_train) # storeObj(clf, zone, clf.score(x_test, y_test), 'AdaBoost Regressor') return clf, clf.score(x_test, y_test), 'AdaBoost Regressor', scaler, pca
def __init__(self, base_estimator=None, n_estimators=50, learning_rate=1.0, random_state=None): self.base_estimator = base_estimator self.learning_rate = learning_rate self.random_state = random_state self.n_estimators = n_estimators self.model = ABR(n_estimators=self.n_estimators, learning_rate=self.learning_rate, base_estimator=self.base_estimator, random_state=self.random_state)
def fit(self, X, Y, sample_weight=None): from sklearn.ensemble import AdaBoostRegressor as ABR from sklearn.tree import DecisionTreeRegressor self.n_estimators = int(self.n_estimators) self.learning_rate = float(self.learning_rate) self.max_depth = int(self.max_depth) base_estimator = DecisionTreeRegressor(max_depth=self.max_depth) estimator = ABR(base_estimator=base_estimator, n_estimators=self.n_estimators, learning_rate=self.learning_rate, random_state=self.random_state) estimator.fit(X, Y, sample_weight=sample_weight) self.estimator = estimator return self
def main(): ### parsing and Data pre-processing # load the provided data train_features_path = os.path.join(data_path, 'dengue_features_train.csv') train_labels_path = os.path.join(data_path, 'dengue_labels_train.csv') ### pre-processing data sj_train, iq_train = preprocess_data(train_features_path, labels_path=train_labels_path) #print(sj_train.describe()) #print(iq_train.describe()) ###Define the xgb parameters xgb_params = { 'eta': 0.05, 'max_depth': 5, 'subsample': 0.7, 'colsample_bytree': 0.7, 'objective': 'reg:linear', 'eval_metric': 'rmse', 'silent': 1 } num_boost_rounds = 1000 ##Use K-fold to create cross validation data kf = KFold(n_splits=6) ##Do the stacking by adding 5 dataframes 'negbi', 'gb', 'xgb','adaboost','extratree' ,'bagging'which store the training prediction sj_train = sj_train.assign(negbi=0) sj_train = sj_train.assign(gb=0) sj_train = sj_train.assign(xgb=0) sj_train = sj_train.assign(abr=0) sj_train = sj_train.assign(etr=0) sj_train = sj_train.assign(br=0) loop = 1 for train_index, val_index in kf.split( sj_train ): #The index will be split into [train_index] and [val_index] X_train, X_val = sj_train.ix[train_index], sj_train.ix[val_index] ###(1)neg_binomial method sj_neg_model = get_best_model(X_train, X_val, 'sj') predictions_neg = sj_neg_model.predict(X_val).astype(int) #Shift the prediction manually for i in range(predictions_neg.shape[0] - 1, 3, -1): predictions_neg.ix[i] = predictions_neg.ix[i - 4] ###(2)gradient boosting method sj_gb_model = gradient_boosting( X_train.drop(['negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1), X_val.drop(['negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1)) predictions_gb = sj_gb_model.predict( X_val.drop( ['total_cases', 'negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1)).astype(int) ###(3)xgboost method dtrain = xgb.DMatrix( X_train.drop( ['total_cases', 'negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1), X_train['total_cases']) dval = xgb.DMatrix( X_val.drop( ['total_cases', 'negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1)) sj_xgb_model = xgb.train(dict(xgb_params, silent=0), dtrain, num_boost_round=num_boost_rounds) predictions_xgb = sj_xgb_model.predict(dval).astype(int) ###(4)Adaboost regressor method sj_abr_model = ABR(n_estimators=800, learning_rate=0.08, loss='linear', random_state=0) sj_abr_model.fit( X_train.drop( ['total_cases', 'negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1), X_train['total_cases']) predictions_abr = sj_abr_model.predict( X_val.drop( ['total_cases', 'negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1)) ###(5)Extra tree regressor method sj_etr_model = ETR(n_estimators=800, max_depth=4, random_state=0, verbose=1) sj_etr_model.fit( X_train.drop( ['total_cases', 'negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1), X_train['total_cases']) predictions_etr = sj_etr_model.predict( X_val.drop( ['total_cases', 'negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1)) ###(6) Bagging Regressor method sj_br_model = BR(n_estimators=800, oob_score=False, n_jobs=5, random_state=0, verbose=1) sj_br_model.fit( X_train.drop( ['total_cases', 'negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1), X_train['total_cases']) predictions_br = sj_br_model.predict( X_val.drop( ['total_cases', 'negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1)) ###Store the result in sj_train predictions_neg -> 'negbi', predictions_gb -> 'gb' print( "Adding the result of the predictions to sj training data({}/{})". format(loop, 6)) for idx, index in enumerate(val_index): sj_train['negbi'].ix[index] = predictions_neg.ix[idx] sj_train['gb'].ix[index] = predictions_gb[idx] sj_train['xgb'].ix[index] = predictions_xgb[idx] sj_train['abr'].ix[index] = predictions_abr[idx] sj_train['etr'].ix[index] = predictions_etr[idx] sj_train['br'].ix[index] = predictions_br[idx] loop += 1 iq_train = iq_train.assign(negbi=0) iq_train = iq_train.assign(gb=0) iq_train = iq_train.assign(xgb=0) iq_train = iq_train.assign(abr=0) iq_train = iq_train.assign(etr=0) iq_train = iq_train.assign(br=0) loop = 1 for train_index, val_index in kf.split(iq_train): X_train, X_val = iq_train.ix[train_index], iq_train.ix[val_index] ###(1)neg_binomial method iq_neg_model = get_best_model(X_train, X_val, 'iq') predictions_neg = iq_neg_model.predict(X_val).astype(int) #Shift the prediction manually for i in range(predictions_neg.shape[0] - 1, 0, -1): predictions_neg.ix[i] = predictions_neg.ix[i - 1] ###(2)gradient boosting method iq_gb_model = gradient_boosting( X_train.drop(['negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1), X_val.drop(['negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1)) predictions_gb = iq_gb_model.predict( X_val.drop( ['total_cases', 'negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1)).astype(int) ###(3)xgb method dtrain = xgb.DMatrix( X_train.drop( ['total_cases', 'negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1), X_train['total_cases']) dval = xgb.DMatrix( X_val.drop( ['total_cases', 'negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1)) iq_xgb_model = xgb.train(dict(xgb_params, silent=0), dtrain, num_boost_round=num_boost_rounds) predictions_xgb = iq_xgb_model.predict(dval).astype(int) ###(4)Adaboost regressor method iq_abr_model = ABR(n_estimators=800, learning_rate=0.08, loss='linear', random_state=0) iq_abr_model.fit( X_train.drop( ['total_cases', 'negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1), X_train['total_cases']) predictions_abr = iq_abr_model.predict( X_val.drop( ['total_cases', 'negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1)) ###(5)Extra tree regressor method iq_etr_model = ETR(n_estimators=800, max_depth=4, random_state=0, verbose=1) iq_etr_model.fit( X_train.drop( ['total_cases', 'negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1), X_train['total_cases']) predictions_etr = iq_etr_model.predict( X_val.drop( ['total_cases', 'negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1)) ###(6) Bagging Regressor method iq_br_model = BR(n_estimators=800, oob_score=False, n_jobs=5, random_state=0, verbose=1) iq_br_model.fit( X_train.drop( ['total_cases', 'negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1), X_train['total_cases']) predictions_br = iq_br_model.predict( X_val.drop( ['total_cases', 'negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1)) ###Store the result in iq_train predictions_neg -> 'negbi', predictions_gb -> 'gb' print( "Adding the result of the predictions to iq training data({}/{})". format(loop, 6)) for idx, index in enumerate(val_index): iq_train['negbi'].ix[index] = predictions_neg.ix[idx] iq_train['gb'].ix[index] = predictions_gb[idx] iq_train['xgb'].ix[index] = predictions_xgb[idx] iq_train['abr'].ix[index] = predictions_abr[idx] iq_train['etr'].ix[index] = predictions_etr[idx] iq_train['br'].ix[index] = predictions_br[idx] loop += 1 ###Now the training data looks like [feature, total_cases, negbi, gb, xgb] ##Accessing testing data test_features_path = os.path.join(data_path, 'dengue_features_test.csv') sj_test, iq_test = preprocess_data(test_features_path) ##Like training, add 'negbi' and 'gb' to the testing dataframe sj_test = sj_test.assign(negbi=0) sj_test = sj_test.assign(gb=0) sj_test = sj_test.assign(xgb=0) sj_test = sj_test.assign(abr=0) sj_test = sj_test.assign(etr=0) sj_test = sj_test.assign(br=0) ##(1)neg_binomial prediction sj_predictions_neg = sj_neg_model.predict(sj_test).astype(int) for i in range(sj_predictions_neg.shape[0] - 1, 3, -1): sj_predictions_neg.ix[i] = sj_predictions_neg.ix[i - 4] ##(2)gradient boosting prediction sj_predictions_gb = sj_gb_model.predict( sj_test.drop(['negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1)).astype(int) ##(3)xgb prediction dtest = xgb.DMatrix( sj_test.drop(['negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1)) sj_predictions_xgb = sj_xgb_model.predict(dtest).astype(int) ###(4)Adaboost regressor method sj_predictions_abr = sj_br_model.predict( sj_test.drop(['negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1)).astype(int) ###(5)extra tree regressor method sj_predictions_etr = sj_etr_model.predict( sj_test.drop(['negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1)).astype(int) ###(6)bagging regressor method sj_predictions_br = sj_br_model.predict( sj_test.drop(['negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1)).astype(int) print("Adding predictions as features to sj testing data...") for i in range(len(sj_test['negbi']) ): #Add the prediction to the corresponding column sj_test['negbi'].ix[i] = sj_predictions_neg.ix[i] sj_test['gb'].ix[i] = sj_predictions_gb[i] sj_test['xgb'].ix[i] = sj_predictions_xgb[i] sj_test['abr'].ix[i] = sj_predictions_abr[i] sj_test['etr'].ix[i] = sj_predictions_etr[i] sj_test['br'].ix[i] = sj_predictions_br[i] ##Same process as city sj iq_test = iq_test.assign(negbi=0) iq_test = iq_test.assign(gb=0) iq_test = iq_test.assign(xgb=0) iq_test = iq_test.assign(abr=0) iq_test = iq_test.assign(etr=0) iq_test = iq_test.assign(br=0) ###(1)neg_binomial prediction iq_predictions_neg = iq_neg_model.predict(iq_test).astype(int) for i in range(iq_predictions_neg.shape[0] - 1, 0, -1): iq_predictions_neg.ix[i] = iq_predictions_neg.ix[i - 1] ##(2)gradient boosting prediction iq_predictions_gb = iq_gb_model.predict( iq_test.drop(['negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1)).astype(int) ##(3)xgb prediction dtest = xgb.DMatrix( iq_test.drop(['negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1)) iq_predictions_xgb = iq_xgb_model.predict(dtest).astype(int) ###(4)Adaboost regressor method iq_predictions_abr = iq_abr_model.predict( sj_test.drop(['negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1)).astype(int) ###(5)extra tree regressor method iq_predictions_etr = iq_etr_model.predict( sj_test.drop(['negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1)).astype(int) ###(6)bagging regressor method iq_predictions_br = iq_br_model.predict( sj_test.drop(['negbi', 'gb', 'xgb', 'abr', 'etr', 'br'], axis=1)).astype(int) print("Adding predictions as features to iq testing data...") for i in range(len(iq_test['negbi'])): iq_test['negbi'].ix[i] = iq_predictions_neg.ix[i] iq_test['gb'].ix[i] = iq_predictions_gb[i] iq_test['xgb'].ix[i] = iq_predictions_xgb[i] iq_test['abr'].ix[i] = iq_predictions_abr[i] iq_test['etr'].ix[i] = iq_predictions_etr[i] iq_test['br'].ix[i] = iq_predictions_br[i] ##use new information to run a linear regression print("Building linear regression model...") #Now the linear regression model uses (X = [features, negbi, gb, xgb], y = total_cases )to train(fit) sj_lr = LR() sj_lr.fit(sj_train.drop('total_cases', axis=1), sj_train['total_cases']) iq_lr = LR() iq_lr.fit(iq_train.drop('total_cases', axis=1), iq_train['total_cases']) #Calculate the k-fold validation error sj_score = [] for train_index, val_index in kf.split(sj_train): X_train, X_val = sj_train.ix[train_index], sj_train.ix[val_index] train_predict = np.array( sj_lr.predict(X_val.drop('total_cases', axis=1))).astype(int) sj_score.append(eval_measures.meanabs(train_predict, X_val.total_cases)) print("Mean of {} cross validation of sj_score is {} (+/- {})".format( kf.get_n_splits(sj_train), np.mean(sj_score), np.std(sj_score))) iq_score = [] for train_index, val_index in kf.split(iq_train): X_train, X_val = iq_train.ix[train_index], iq_train.ix[val_index] train_predict = np.array( iq_lr.predict(X_val.drop('total_cases', axis=1))).astype(int) iq_score.append(eval_measures.meanabs(train_predict, X_val.total_cases)) print("Mean of {} cross validation of iq_score is {} (+/- {})".format( kf.get_n_splits(iq_train), np.mean(iq_score), np.std(iq_score))) ##Use the model sj_lr and iq_lr trained before to predict the testing data print("Predicting testing data...") sj_predictions = sj_lr.predict(sj_test) iq_predictions = iq_lr.predict(iq_test) sj_predictions = np.array(sj_predictions).astype(int) iq_predictions = np.array(iq_predictions).astype(int) print("Creating submit file...") ##Use submission_format as template to write the answer sample_path = os.path.join(data_path, 'submission_format.csv') submission = pd.read_csv(sample_path, index_col=[0, 1, 2]) submission.total_cases = np.concatenate([sj_predictions, iq_predictions]) submission.to_csv("./data/stacking_6_less_feature.csv") '''