def predict(self, X, output_margin=False, ntree_limit=0): sizes, _, X_features, _, _ = _preprare_data_in_groups(X) test_dmatrix = DMatrix(X_features, missing=self.missing) test_dmatrix.set_group(sizes) rank_values = self.get_booster().predict(test_dmatrix, output_margin=output_margin, ntree_limit=ntree_limit) return rank_values
def fit(self, X, y, sample_weight=None, eval_set=None, eval_metric=None, early_stopping_rounds=None, verbose=True, xgb_model=None): """ Fit the gradient boosting model Parameters ---------- X : array_like Feature matrix with the first feature containing a group indicator y : array_like Labels sample_weight : array_like instance weights eval_set : list, optional A list of (X, y) tuple pairs to use as a validation set for early-stopping eval_metric : str, callable, optional If a str, should be a built-in evaluation metric to use. See doc/parameter.md. If callable, a custom evaluation metric. The call signature is func(y_predicted, y_true) where y_true will be a DMatrix object such that you may need to call the get_label method. It must return a str, value pair where the str is a name for the evaluation and value is the value of the evaluation function. This objective is always minimized. early_stopping_rounds : int Activates early stopping. Validation error needs to decrease at least every <early_stopping_rounds> round(s) to continue training. Requires at least one item in evals. If there's more than one, will use the last. Returns the model from the last iteration (not the best one). If early stopping occurs, the model will have three additional fields: bst.best_score, bst.best_iteration and bst.best_ntree_limit. (Use bst.best_ntree_limit to get the correct value if num_parallel_tree and/or num_class appears in the parameters) verbose : bool If `verbose` and an evaluation set is used, writes the evaluation metric measured on the validation set to stderr. xgb_model : str file name of stored xgb model or 'Booster' instance Xgb model to be loaded before training (allows training continuation). """ X, y = check_X_y(X, y, accept_sparse=False, y_numeric=True) sizes, _, X_features, y, _ = _preprare_data_in_groups(X, y) params = self.get_xgb_params() if callable(self.objective): obj = _objective_decorator(self.objective) # Dummy, Not used when custom objective is given params["objective"] = "binary:logistic" else: obj = None evals_result = {} feval = eval_metric if callable(eval_metric) else None if eval_metric is not None: if callable(eval_metric): eval_metric = None else: params.update({'eval_metric': eval_metric}) if sample_weight is not None: train_dmatrix = DMatrix(X_features, label=y, weight=sample_weight, missing=self.missing) else: train_dmatrix = DMatrix(X_features, label=y, missing=self.missing) train_dmatrix.set_group(sizes) self._Booster = train(params, train_dmatrix, self.n_estimators, early_stopping_rounds=early_stopping_rounds, evals_result=evals_result, obj=obj, feval=feval, verbose_eval=verbose, xgb_model=xgb_model) if evals_result: for val in evals_result.items(): evals_result_key = list(val[1].keys())[0] evals_result[val[0]][evals_result_key] = val[1][evals_result_key] self.evals_result = evals_result if early_stopping_rounds is not None: self.best_score = self._Booster.best_score self.best_iteration = self._Booster.best_iteration self.best_ntree_limit = self._Booster.best_ntree_limit return self
def _dmat_init(data, labels, **params): sizes, _, X_features, y, _ = _preprare_data_in_groups(data, labels) ret = DMatrix(X_features, y, **params) ret.set_group(sizes) return ret