def lgb_compatible_f1_score(y_hat: np.ndarray, data: lgb.Dataset) -> Tuple[str, float, bool]: y_true = data.get_label().astype(int) y_hat = np.round(y_hat).astype( int) # scikit's f1 doesn't work with probabilities return "f1_score", f1_score(y_true, y_hat), True
def lgb_f1_loss_multiclass(preds: np.ndarray, train_data: lgb.Dataset, clip: float = 1e-5): """Custom loss for optimizing f1. Args: preds: np.ndarray. train_data: lgb dataset. clip: clip values. Returns: lgb loss output. """ y_true = train_data.get_label().astype(np.int32) preds = preds.reshape((y_true.shape[0], -1), order='F') # softmax preds = np.clip(softmax_ax1(preds), clip, 1 - clip) # make ohe y_ohe = np.zeros_like(preds) np.add.at(y_ohe, (np.arange(y_true.shape[0]), y_true), 1) # grad grad = (preds - y_ohe) * preds # hess hess = (1 - preds) * preds * np.clip((2 * preds - y_ohe), 1e-3, np.inf) # reshape back preds return grad.reshape((-1, ), order='F'), hess.reshape((-1, ), order='F')
def lgb_f1_loss_multiclass( preds: np.ndarray, train_data: lgb.Dataset, clip: float = 1e-5) -> Tuple[np.ndarray, np.ndarray]: """Custom loss for optimizing f1. Args: preds: Predctions. train_data: Dataset in LightGBM format. clip: Clump constant. Returns: Gradient, hessian. """ y_true = train_data.get_label().astype(np.int32) preds = preds.reshape((y_true.shape[0], -1), order="F") # softmax preds = np.clip(softmax_ax1(preds), clip, 1 - clip) # make ohe y_ohe = np.zeros_like(preds) np.add.at(y_ohe, (np.arange(y_true.shape[0]), y_true), 1) # grad grad = (preds - y_ohe) * preds # hess hess = (1 - preds) * preds * np.clip((2 * preds - y_ohe), 1e-3, np.inf) # reshape back preds return grad.reshape((-1, ), order="F"), hess.reshape((-1, ), order="F")
def _evaluate(self, scores: np.ndarray, clases: lgb.Dataset) -> Tuple[str, int, bool]: labels = clases.get_label() weights = clases.get_weight() score_corte = self.prob_corte nombre, valor = self._evaluar_funcion_ganancia(scores, labels, weights, score_corte) return nombre, valor, True
def Dist_Objective(predt: np.ndarray, data: lgb.Dataset): """A customized objective function to train each distributional parameter using custom gradient and hessian. """ target = torch.tensor(data.get_label()) # When num_class!= 0, preds has shape (n_obs, n_dist_param). # Each element in a row represents a raw prediction (leaf weight, hasn't gone through response function yet). predt = predt.reshape(-1, Gaussian.n_dist_param(), order="F") preds_location = Gaussian.param_dict()["location"](predt[:, 0]) preds_location = torch.tensor(preds_location, requires_grad=True) preds_scale = Gaussian.param_dict()["scale"](predt[:, 1]) preds_scale = torch.tensor(preds_scale, requires_grad=True) # Weights if data.get_weight() == None: # Use 1 as weight if no weights are specified weights = np.ones_like(target, dtype=float) else: weights = data.get_weight() # Initialize Gradient and Hessian Matrices grad = np.zeros(shape=(len(target), Gaussian.n_dist_param())) hess = np.zeros(shape=(len(target), Gaussian.n_dist_param())) # Specify Metric for Auto Derivation dGaussian = Normal(preds_location, preds_scale) autograd_metric = -dGaussian.log_prob(target).nansum() # Location grad[:, 0] = stabilize_derivative( auto_grad(metric=autograd_metric, parameter=preds_location, n=1) * weights, Gaussian.stabilize) hess[:, 0] = stabilize_derivative( auto_grad(metric=autograd_metric, parameter=preds_location, n=2) * weights, Gaussian.stabilize) # Scale grad[:, 1] = stabilize_derivative( auto_grad(metric=autograd_metric, parameter=preds_scale, n=1) * weights, Gaussian.stabilize) hess[:, 1] = stabilize_derivative( auto_grad(metric=autograd_metric, parameter=preds_scale, n=2) * weights, Gaussian.stabilize) # Reshaping grad = grad.ravel(order="F") hess = hess.ravel(order="F") return grad, hess
def _make_validation_labels_purchase_only(valid_ds: lgb.Dataset): valid_ds.construct() labels = np.array(valid_ds.get_label()) non_purchase = (labels != _PURCHASE_LABEL) non_purchase_interaction = np.logical_and(non_purchase, labels != _NOTHING_LABEL) logging.info( f"Number of non-purchase interactions in valid: {non_purchase_interaction.sum()}" ) logging.info( f"Number of total non-purchases in valid: {non_purchase.sum()}") labels[non_purchase] = 0.0 valid_ds.set_label(labels)
def lgb_mape(preds: np.ndarray, lgb_train: Dataset) -> Tuple[str, float, bool]: """ Mean average precision error metric for evaluation in lightgbm. Args: preds: Array of predictions lgb_train: LightGBM Dataset Returns: Tuple of error name (str) and error (float) """ labels = lgb_train.get_label() mask = labels != 0 return "mape", (np.fabs(labels - preds) / labels)[mask].mean(), False
def Dist_Objective(predt: np.ndarray, data: lgb.Dataset): """A customized objective function to train each distributional parameter using custom gradient and hessian. """ target = data.get_label() # When num_class!= 0, preds has shape (n_obs, n_dist_param). # Each element in a row represents a raw prediction (leaf weight, hasn't gone through response function yet). predt = predt.reshape(-1, Gaussian.n_dist_param(), order="F") preds_location = Gaussian.param_dict()["location"](predt[:, 0]) preds_scale = Gaussian.param_dict()["scale"](predt[:, 1]) # Weights if data.get_weight() == None: # Use 1 as weight if no weights are specified weights = np.ones_like(target, dtype=float) else: weights = data.get_weight() # Initialize Gradient and Hessian Matrices grad = np.zeros(shape=(len(target), Gaussian.n_dist_param())) hess = np.zeros(shape=(len(target), Gaussian.n_dist_param())) # Location grad[:, 0] = Gaussian.gradient_location(y=target, location=preds_location, scale=preds_scale, weights=weights) hess[:, 0] = Gaussian.hessian_location(scale=preds_scale, weights=weights) # Scale grad[:, 1] = Gaussian.gradient_scale(y=target, location=preds_location, scale=preds_scale, weights=weights) hess[:, 1] = Gaussian.hessian_scale(scale=preds_scale, weights=weights) # Reshaping grad = grad.ravel(order="F") hess = hess.ravel(order="F") return grad, hess
def lgb_pr_auc(preds: np.ndarray, lgb_train: Dataset) -> Tuple[str, float, bool]: """ Precision Recall AUC (Area under Curve) of our prediction in lightgbm Args: preds: Array of predictions lgb_train: LightGBM Dataset Returns: Precision Recall AUC (Area under Curve) """ labels = lgb_train.get_label() precision, recall, _ = precision_recall_curve(labels, preds) return "pr_auc", auc(recall, precision), True
def top2_accuray_lgb( predt: np.ndarray, data: lgb.Dataset, threshold: float = 0.5, ) -> Tuple[str, float, bool]: s_0 = 31 s_1 = int(len(predt) / s_0) predt = predt.reshape(s_0, s_1) y = data.get_label() p = predt.argsort(axis=0)[::-1, :] accuracy = ((y == p[0, :]) | (y == p[1, :])).mean() # # eval_name, eval_result, is_higher_better return 'top2_accuray', float(accuracy), True
def Dist_Metric(predt: np.ndarray, data: lgb.Dataset): """A customized evaluation metric that evaluates the predictions using the negative log-likelihood. """ target = data.get_label() is_higher_better = False # Using a custom objective function, the custom metric receives raw predictions which need to be transformed # with the corresponding response function. predt = predt.reshape(-1, Gaussian.n_dist_param(), order="F") preds_location = Gaussian.param_dict()["location"](predt[:, 0]) preds_scale = Gaussian.param_dict()["scale"](predt[:, 1]) nll = -np.nansum(norm.logpdf(x=target, loc=preds_location, scale=preds_scale)) return "NegLogLikelihood", nll, is_higher_better
def corr_sharpe_lgb( time_id_fold, y_pred: np.array, dtrain: lgb.Dataset, ) -> Tuple[str, float, bool]: """ Pearson correlation coefficient metric """ y_true = dtrain.get_label() pd_info = pd.DataFrame({ 'time_id': time_id_fold, 'y_pred': y_pred, 'y_true': y_true }) sharpe_corr = calculate_corr(pd_info, sharpe=True)[0] return 'pearson_corr_sharpe', sharpe_corr, True
def lgb_mape_exp(preds: np.ndarray, lgb_train: Dataset) -> Tuple[str, float, bool]: """ Mean average precision error metric for evaluation in lightgbm. NOTE: This will exponentiate the predictions first, in the case where our actual is logged Args: preds: Array of predictions lgb_train: LightGBM Dataset Returns: Tuple of error name (str) and error (float) """ labels = lgb_train.get_label() mask = labels != 0 return "mape_exp", (np.fabs(labels - np.exp(preds)) / labels)[mask].mean(), False
def Dist_Objective(predt: np.ndarray, data: lgb.Dataset): """A customized objective function to train each distributional parameter using custom gradient and hessian. """ target = data.get_label() # When num_class!= 0, preds has shape (n_obs, n_dist_param). # Each element in a row represents a raw prediction (leaf weight, hasn't gone through response function yet). preds_expectile = predt.reshape(-1, Expectile.n_dist_param(), order="F") # Weights if data.get_weight() == None: # Use 1 as weight if no weights are specified weights = np.ones_like(target, dtype=float) else: weights = data.get_weight() # Initialize Gradient and Hessian Matrices grad = np.zeros(shape=(len(target), len(Expectile.expectiles))) hess = np.zeros(shape=(len(target), len(Expectile.expectiles))) for i in range(len(Expectile.expectiles)): grad[:, i] = Expectile.gradient_expectile( y=target, expectile=preds_expectile[:, i], tau=Expectile.expectiles[i], weights=weights) hess[:, i] = Expectile.hessian_expectile(y=target, expectile=preds_expectile[:, i], tau=Expectile.expectiles[i], weights=weights) # Reshaping grad = grad.ravel(order="F") hess = hess.ravel(order="F") return grad, hess
def Dist_Metric(predt: np.ndarray, data: lgb.Dataset): """A customized evaluation metric that evaluates the predictions using the negative log-likelihood. """ target = torch.tensor(data.get_label()) is_higher_better = False # Using a custom objective function, the custom metric receives raw predictions which need to be transformed # with the corresponding response function. predt = predt.reshape(-1, Gaussian.n_dist_param(), order="F") preds_location = Gaussian.param_dict()["location"](predt[:, 0]) preds_location = torch.tensor(preds_location, requires_grad=True) preds_scale = Gaussian.param_dict()["scale"](predt[:, 1]) preds_scale = torch.tensor(preds_scale, requires_grad=True) dGaussian = Normal(preds_location, preds_scale) nll = -dGaussian.log_prob(target).nansum() nll = nll.detach().numpy() nll = np.round(nll, 5) return "NegLogLikelihood", nll, is_higher_better
def __call__(self, pred: np.ndarray, dtrain: lgb.Dataset) -> Tuple[str, float, bool]: label = dtrain.get_label() weights = dtrain.get_weight() if label.shape[0] != pred.shape[0]: pred = pred.reshape((label.shape[0], -1), order='F') label = label.astype(np.int32) pred = self.bw_func(pred) # for weighted case try: val = self.metric_func(label, pred, sample_weight=weights) except TypeError: val = self.metric_func(label, pred) # TODO: what if grouped case return 'Opt metric', val, self.greater_is_better
def Dist_Metric(predt: np.ndarray, data: lgb.Dataset): """A customized evaluation metric that evaluates the predictions using the negative log-likelihood. """ target = data.get_label() is_higher_better = False # Using a custom objective function, the custom metric receives raw predictions which need to be transformed # with the corresponding response function. preds_expectile = predt.reshape(-1, Expectile.n_dist_param(), order="F") loss_expectile = [] for i in range(len(Expectile.expectiles)): loss_expectile.append( Expectile.expectile_loss(y=target, expectile=preds_expectile[:, i], tau=Expectile.expectiles[i])) nll = np.nanmean(loss_expectile) return "NegLogLikelihood", nll, is_higher_better
def rmse(y_pred: np.ndarray, data: lgb.Dataset) -> Tuple[str, float, bool]: score = mean_squared_error(data.get_label(), y_pred)**0.5 return "rmsle", score, False # name, result, is_higher_better
def MacroF1MetricRegression(preds: np.ndarray, dtrain: Dataset): labels = dtrain.get_label() preds = np.round(np.clip(preds, 0, 10)).astype(np.int16) score = f1_score(labels, preds, average='macro') return ('MacroF1Metric', score, True)
def macro_f1(pred: np.array, data: lgb.Dataset): y = data.get_label() pred = pred.reshape(-1, len(y)).T # -> (N, num_class) f1 = calc_f1(y, pred) return 'macro_f1', f1, True # True means "higher is better"
def my_logistic_obj(predt: np.ndarray, data: lgb.Dataset) -> Tuple[np.ndarray, np.ndarray]: """ return grad : One-time differentiation hess : Two-time differentiation """ w_dict = { 4.0: 0.7590094259401983, 12.0: 0.8095400631514924, 13.0: 0.8225826394000231, 14.0: 0.8549528042689977, 30.0: 0.8549528042689977, 11.0: 0.8790811910945177, 0.0: 0.8797399286456303, 6.0: 0.8981458256286846, 19.0: 0.91289269651962, 8.0: 0.9193468859159446, 23.0: 0.9257599332783142, 9.0: 0.9466869195345441, 28.0: 0.9616129930949363, 5.0: 0.9636161932936556, 22.0: 0.9670600916375024, 27.0: 0.9896031798239419, 20.0: 0.998140820116188, 15.0: 1.0347287996927568, 10.0: 1.0440428983274967, 17.0: 1.0568262071050216, 2.0: 1.060630426576832, 24.0: 1.0737709894542427, 7.0: 1.083904634845265, 21.0: 1.0942847373031122, 16.0: 1.0961254610380533, 1.0: 1.098000827862746, 26.0: 1.1133105616315027, 3.0: 1.199290359855132, 25.0: 1.2044053703071544, 18.0: 1.227637388888927, 29.0: 1.2703169414985653 } class_num = 31 data_size = int(len(predt) / class_num) y = data.get_label().astype(int) idx = np.arange(0, data_size).reshape(-1, 1) predt = predt.reshape(class_num, data_size) predt = np.transpose(predt) predt = softmax(predt) #grad dy_dx = predt.copy() dy_dx *= -predt[idx, y.reshape(-1, 1)] dy_dx[idx, y.reshape(-1, 1)] += predt[idx, y.reshape(-1, 1)] df_dy = np.tile(-1 / predt[idx, y.reshape(-1, 1)], (1, 31)) grad = df_dy * dy_dx #hess d2y_dx2 = predt.copy() d2y_dx2 = d2y_dx2 * (2 * d2y_dx2 - 1) d2y_dx2 *= predt[idx, y.reshape(-1, 1)] d2y_dx2[idx, y.reshape(-1, 1)] -= predt[idx, y.reshape(-1, 1)] d2f_dxdy = (df_dy**2) * dy_dx hess = df_dy * d2y_dx2 + d2f_dxdy * dy_dx for key, value in w_dict.items(): grad[y == key] *= value hess[y == key] *= value hess = np.transpose(hess).flatten() grad = np.transpose(grad).flatten() return grad, hess
def f_relative_error(y_true: np.ndarray, dtrain: lgb.Dataset): y_hat = dtrain.get_label() return ('RelativeError', np.mean(np.abs((y_true - y_hat) / y_true)), False)
def lgb_rmsle_score(preds: np.ndarray, dval: lgb.Dataset): label = dval.get_label() y_true = np.exp(label) y_pred = np.exp(preds) return 'rmsle', np.sqrt(mean_squared_log_error(y_true, y_pred)), False
def train(params: Dict[str, Any], dtrain: lgb.Dataset, dist, num_boost_round: int = 100, valid_sets: Optional[List[lgb.Dataset]] = None, valid_names: Optional[List[str]] = None, init_model: Optional[Union[str, Path, lgb.Booster]] = None, feature_name: Union[List[str], str] = 'auto', categorical_feature: Union[List[str], List[int], str] = 'auto', keep_training_booster: bool = False, callbacks: Optional[List[Callable]] = None) -> lgb.Booster: """Train a LightGBMLSS model with given parameters. Parameters ---------- params : dict Parameters for training. dtrain : Dataset Data to be trained on. dist: lightgbmlss.distributions class. Specifies distributional assumption. num_boost_round : int, optional (default=100) Number of boosting iterations. valid_sets : list of Dataset, or None, optional (default=None) List of data to be evaluated on during training. valid_names : list of str, or None, optional (default=None) Names of ``valid_sets``. init_model : str, pathlib.Path, Booster or None, optional (default=None) Filename of LightGBM model or Booster instance used for continue training. feature_name : list of str, or 'auto', optional (default="auto") Feature names. If 'auto' and data is pandas DataFrame, data columns names are used. categorical_feature : list of str or int, or 'auto', optional (default="auto") Categorical features. If list of int, interpreted as indices. If list of str, interpreted as feature names (need to specify ``feature_name`` as well). If 'auto' and data is pandas DataFrame, pandas unordered categorical columns are used. All values in categorical features should be less than int32 max value (2147483647). Large values could be memory consuming. Consider using consecutive integers starting from zero. All negative values in categorical features will be treated as missing values. The output cannot be monotonically constrained with respect to a categorical feature. keep_training_booster : bool, optional (default=False) Whether the returned Booster will be used to keep training. If False, the returned value will be converted into _InnerPredictor before returning. This means you won't be able to use ``eval``, ``eval_train`` or ``eval_valid`` methods of the returned Booster. When your model is very large and cause the memory error, you can try to set this param to ``True`` to avoid the model conversion performed during the internal call of ``model_to_string``. You can still use _InnerPredictor as ``init_model`` for future continue training. callbacks : list of callable, or None, optional (default=None) List of callback functions that are applied at each iteration. See Callbacks in Python API for more information. Returns ------- booster : Booster The trained Booster model. """ params_adj = { "num_class": dist.n_dist_param(), "metric": "None", "objective": "None", "random_seed": 123, "verbose": -1 } params.update(params_adj) # Set init_score as starting point for each distributional parameter. dist.start_values = dist.initialize(dtrain.get_label()) init_score = (np.ones(shape=(dtrain.get_label().shape[0], 1))) * dist.start_values dtrain.set_init_score(init_score.ravel(order="F")) bstLSS_train = lgb.train(params, dtrain, num_boost_round=num_boost_round, fobj=dist.Dist_Objective, feval=dist.Dist_Metric, valid_sets=valid_sets, valid_names=valid_names, init_model=init_model, feature_name=feature_name, categorical_feature=categorical_feature, keep_training_booster=keep_training_booster, callbacks=callbacks) return bstLSS_train
def rmsle(y_pred: np.ndarray, data: lgb.Dataset) -> Tuple[str, float, bool]: y_pred = np.expm1(y_pred) y_true = np.expm1(data.get_label()) score = mean_squared_log_error(y_true, y_pred)**0.5 return "rmsle", score, False # name, result, is_higher_better