def forward_selection(self, x_train, y_train, x_test, folds, cols): cv = Cross_Validate(None, n_splits=self.n_splits, len_trn=x_train.shape[0], len_tst=x_test.shape[0], clf=-1, params=self.params, max_round=self.max_round) x_train_cols = x_train[cols] x_test_cols = x_test[cols] x_train.drop(cols, axis=1, inplace=True) x_test.drop(cols, axis=1, inplace=True) cv.cross_validate_xgb(x_train, y_train, x_test, folds) self.current_best = cv.trn_gini self.scores.append(self.current_best) for i in range(len(cols)): print("Round %i" % (i + 1)) print("Shape of train"), print x_train.shape for col in cols: x_train = pd.concat([x_train, x_train_cols[col]], axis=1) x_test = pd.concat([x_test, x_test_cols[col]], axis=1) cv.cross_validate_xgb(x_train, y_train, x_test, folds) if cv.trn_gini > self.current_best: self.current_best = cv.trn_gini self.col_temp = col x_train.drop(x_train_cols[col], axis=1, inplace=True) x_test.drop(x_test_cols[col], axis=1, inplace=True) if self.col_temp != 0: cols.remove(self.col_temp) x_train = pd.concat([x_train, x_train_cols[self.col_temp]], axis=1) x_test = pd.concat([x_test, x_test_cols[self.col_temp]], axis=1) self.cols.append(self.col_temp) self.scores.append(self.current_best) self.col_temp = 0 else: break
def xgb03(x_train, y_train, x_test, folds, max_round, n_splits=5): params = {} params['max_depth'] = 4 params['objective'] = "binary:logistic" params['eta'] = 0.025 # learning rate params['subsample'] = 0.9 params['min_child_weight'] = 100 params['colsample_bytree'] = 0.7 params['gamma'] = 0.60 params['n_jobs'] = -1 params['reg_alpha'] = 4 # params['reg_lambda'] = 5 params['silent'] = 1 # Additional processing of data x_train, x_test = feature_engineering_3(x_train, x_test, y_train) # Cross Validate cv = Cross_Validate(xgb03.__name__, n_splits, x_train.shape[0], x_test.shape[0], -1, params, max_round) cv.cross_validate_xgb(x_train, y_train, x_test, folds, verbose_eval=100) return cv.trn_gini, cv.y_trn, cv.y_tst, cv.fscore
def tune_seq(self, x_train, y_train, x_test, folds, verbose_eval=False): """ Tune parameters sequentially :return: """ print("\ntuning starts...") for key in self.params_dict.keys(): for item in self.params_dict[key]: print('Tuning for parameter %s with value %f' % (key, item)) self.params_temp = self.params self.params_temp.update({key: item}) cv = Cross_Validate(None, n_splits=self.n_splits, len_trn=x_train.shape[0], len_tst=x_test.shape[0], clf=-1, params=self.params, max_round=self.max_round) cv.cross_validate_xgb(x_train, y_train, x_test, folds, verbose_eval) self.params_temp.update({'score': cv.trn_gini}) self.sframe = pd.concat([ self.sframe, pd.Series(self.params_temp.values(), index=self.params_temp.keys()) ], axis=1) if cv.trn_gini > self.max_score: self.max_item = item self.max_score = cv.trn_gini self.params.update({key: self.max_item}) self.max_item = 0 self.max_score = 0 self.sframe = self.sframe.transpose().reset_index()