def _get_gradient_hess(self, y, y_pred): """ 获取一阶、二阶导数信息 :param y:真实值 :param y_pred:预测值 :return: """ if self.loss == 'squarederror': return y_pred - y, np.ones_like(y) elif self.loss == 'logistic': return utils.sigmoid(y_pred) - utils.sigmoid( y), utils.sigmoid(y_pred) * (1 - utils.sigmoid(y_pred)) elif self.loss == 'poisson': return np.exp(y_pred) - y, np.exp(y_pred) elif self.loss == 'gamma': return 1.0 - y * np.exp(-1.0 * y_pred), y * np.exp(-1.0 * y_pred) elif self.loss == 'tweedie': if self.p == 1: return np.exp(y_pred) - y, np.exp(y_pred) elif self.p == 2: return 1.0 - y * np.exp(-1.0 * y_pred), y * np.exp( -1.0 * y_pred) else: return np.exp(y_pred * (2.0 - self.p)) - y * np.exp( y_pred * (1.0 - self.p)), (2.0 - self.p) * np.exp( y_pred * (2.0 - self.p)) - (1.0 - self.p) * y * np.exp( y_pred * (1.0 - self.p))
def predict(self, X): """ :param X: :return: """ # 归一化 if self.normal: X = (X - self.xmin) / self.xmax # reshape X = X[:, self.replace_ind] # 去掉第一列bias以及非组合特征 X_ = X[:, self.positive_ind:] n_sample, n_feature = X_.shape pol = np.zeros(n_sample) for i in range(0, n_feature - 1): for j in range(i + 1, n_feature): pol += X_[:, i] * X_[:, j] * np.dot(self.V[i, self.fields[self.positive_ind + j]], self.V[j, self.fields[self.positive_ind + i]]) linear_rst = np.c_[np.ones(n_sample), X] @ self.w.reshape(-1) + pol if self.objective == "squarederror": return linear_rst elif self.objective in ["poisson", "gamma", "tweedie"]: return np.exp(linear_rst) else: return utils.sigmoid(linear_rst) > 0.5
def predict_proba(self, X): """ :param X: :return: """ if self.normal: X = (X - self.xmin) / self.xmax n_sample, n_feature = X.shape X_V = X @ self.V X_V_2 = X_V * X_V X_2_V_2 = (X * X) @ (self.V * self.V) pol = 0.5 * np.sum(X_V_2 - X_2_V_2, axis=1) linear_rst = np.c_[np.ones(n_sample), X] @ self.w.reshape(-1) + pol pos_proba = utils.sigmoid(linear_rst) return np.c_[1.0 - pos_proba, pos_proba]
def predict(self, X): """ :param X: :return: """ if self.normal: X = (X - self.xmin) / self.xmax n_sample, n_feature = X.shape X_V = X @ self.V X_V_2 = X_V * X_V X_2_V_2 = (X * X) @ (self.V * self.V) pol = 0.5 * np.sum(X_V_2 - X_2_V_2, axis=1) linear_rst = np.c_[np.ones(n_sample), X] @ self.w.reshape(-1) + pol if self.objective == "squarederror": return linear_rst elif self.objective in ["poisson", "gamma", "tweedie"]: return np.exp(linear_rst) else: return utils.sigmoid(linear_rst) > 0.5
def predict_proba(self, X): """ logistic regression用 :param X: :return: """ # 归一化 if self.normal: X = (X - self.xmin) / self.xmax # reshape X = X[:, self.replace_ind] # 去掉第一列bias以及非组合特征 X_ = X[:, self.positive_ind:] n_sample, n_feature = X_.shape pol = np.zeros(n_sample) for i in range(0, n_feature - 1): for j in range(i + 1, n_feature): pol += X_[:, i] * X_[:, j] * np.dot(self.V[i, self.fields[self.positive_ind + j]], self.V[j, self.fields[self.positive_ind + i]]) pos_proba = utils.sigmoid(np.c_[np.ones(n_sample), X] @ self.w.reshape(-1) + pol) return np.c_[1.0 - pos_proba, pos_proba]
def fit(self, X, y, eval_set=None, show_log=True): X_o = X.copy() if self.normal: self.xmin = X.min(axis=0) self.xmax = X.max(axis=0) + 1e-8 X = (X - self.xmin) / self.xmax n_sample, n_feature = X.shape x_y = np.c_[np.ones(n_sample), X, y] # 记录loss train_losses = [] eval_losses = [] # 调整一下学习率 if self.adjust_lr: self.lr = max(self.lr, 1 / n_feature) # 初始化参数 self.w = np.random.random((n_feature + 1, 1)) * 1e-3 self.V = np.random.random((n_feature, self.hidden_dim)) * 1e-3 if self.solver == 'adam': # 缓存梯度一阶,二阶估计 w_1 = np.zeros_like(self.w) V_1 = np.zeros_like(self.V) w_2 = np.zeros_like(self.w) V_2 = np.zeros_like(self.V) # 更新参数 count = 0 for epoch in range(self.epochs): # 验证集记录 best_eval_value = np.power(2., 1023) eval_count = 0 np.random.shuffle(x_y) for index in range(x_y.shape[0] // self.batch_size): count += 1 batch_x_y = x_y[self.batch_size * index:self.batch_size * (index + 1)] batch_x = batch_x_y[:, :-1] batch_y = batch_x_y[:, -1:] # 计算链式求导第一层梯度 if self.objective == "squarederror": y_x_t = self._y(batch_x).reshape((-1, 1)) - batch_y elif self.objective == "poisson": y_x_t = np.exp(self._y(batch_x).reshape((-1, 1))) - batch_y elif self.objective == "gamma": y_x_t = 1.0 - batch_y * np.exp( -1.0 * self._y(batch_x).reshape((-1, 1))) elif self.objective == 'tweedie': if self.tweedie_p == 1: y_x_t = np.exp(self._y(batch_x).reshape( (-1, 1))) - batch_y elif self.tweedie_p == 2: y_x_t = 1.0 - batch_y * np.exp( -1.0 * self._y(batch_x).reshape((-1, 1))) else: y_x_t = np.exp(self._y(batch_x).reshape((-1, 1)) * (2.0 - self.tweedie_p)) \ - batch_y * np.exp(self._y(batch_x).reshape((-1, 1)) * (1.0 - self.tweedie_p)) else: # 二分类 y_x_t = utils.sigmoid(self._y(batch_x).reshape( (-1, 1))) - batch_y # 更新w w_reg = self.lamb * self.w + self.alpha * np.where( self.w > 0, 1, 0) w_reg[0, 0] = 0.0 w_grad = (np.sum(y_x_t * batch_x, axis=0) / self.batch_size).reshape((-1, 1)) + w_reg if self.solver == 'sgd': self.w = self.w - self.lr * w_grad elif self.solver == 'adam': w_1 = self.rho_1 * w_1 + (1 - self.rho_1) * w_grad w_2 = self.rho_2 * w_2 + (1 - self.rho_2) * w_grad * w_grad w_1_ = w_1 / (1 - np.power(self.rho_1, count)) w_2_ = w_2 / (1 - np.power(self.rho_2, count)) self.w = self.w - (self.lr * w_1_) / (np.sqrt(w_2_) + 1e-8) # 更新 V batch_x_ = batch_x[:, 1:] V_X = batch_x_ @ self.V X_2 = batch_x_ * batch_x_ # 从i,f单个元素逐步更新有点慢 # for i in range(self.V.shape[0]): # for f in range(self.V.shape[1]): # if self.solver == "sgd": # self.V[i, f] -= self.lr * ( # np.sum(y_x_t.reshape(-1) * (batch_x_[:, i] * V_X[:, f] - self.V[i, f] * X_2[:, i])) # / self.batch_size + self.lamb * self.V[i, f] + self.alpha * (self.V[i, f] > 0)) # elif self.solver == "adam": # v_reg = self.lamb * self.V[i, f] + self.alpha * (self.V[i, f] > 0) # v_grad = np.sum(y_x_t.reshape(-1) * ( # batch_x_[:, i] * V_X[:, f] - self.V[i, f] * X_2[:, i])) / self.batch_size + v_reg # V_1[i, f] = self.rho_1 * V_1[i, f] + (1 - self.rho_1) * v_grad # V_2[i, f] = self.rho_2 * V_2[i, f] + (1 - self.rho_2) * v_grad * v_grad # v_1_ = V_1[i, f] / (1 - np.power(self.rho_1, count)) # v_2_ = V_2[i, f] / (1 - np.power(self.rho_2, count)) # self.V[i, f] = self.V[i, f] - (self.lr * v_1_) / (np.sqrt(v_2_) + 1e-8) # 从隐变量的维度进行更新 for f in range(self.V.shape[1]): V_reg = self.lamb * self.V[:, f] + self.alpha * ( self.V[:, f] > 0) V_grad = np.sum(y_x_t * (batch_x_ * V_X[:, f].reshape( (-1, 1)) - X_2 * self.V[:, f]), axis=0) + V_reg if self.solver == 'sgd': self.V[:, f] = self.V[:, f] - self.lr * V_grad elif self.solver == 'adam': V_1[:, f] = self.rho_1 * V_1[:, f] + (1 - self.rho_1) * V_grad V_2[:, f] = self.rho_2 * V_2[:, f] + ( 1 - self.rho_2) * V_grad * V_grad V_1_ = V_1[:, f] / (1 - np.power(self.rho_1, count)) V_2_ = V_2[:, f] / (1 - np.power(self.rho_2, count)) self.V[:, f] = self.V[:, f] - (self.lr * V_1_) / ( np.sqrt(V_2_) + 1e-8) # 计算eval loss eval_loss = None if eval_set is not None: eval_x, eval_y = eval_set eval_loss = np.std(eval_y - self.predict(eval_x)) eval_losses.append(eval_loss) # 是否显示 if show_log: train_loss = np.std(y - self.predict(X_o)) print("epoch:", epoch + 1, "/", self.epochs, ",samples:", (index + 1) * self.batch_size, "/", n_sample, ",train loss:", train_loss, ",eval loss:", eval_loss) train_losses.append(train_loss) # 是否早停 if eval_loss is not None and self.early_stopping_rounds is not None: if eval_loss < best_eval_value: eval_count = 0 best_eval_value = eval_loss else: eval_count += 1 if eval_count >= self.early_stopping_rounds: print( "---------------early_stopping-----------------------------" ) break return train_losses, eval_losses
def fit(self, X, y, eval_set=None, show_log=False, fields=None): """ :param X: :param y: :param eval_set: :param show_log: :param fields: 为None时,退化为FM :return: """ X_o = X.copy() # 归一化 if self.normal: self.xmin = X.min(axis=0) self.xmax = X.max(axis=0) + 1e-7 X = (X - self.xmin) / self.xmax n_sample, n_feature = X.shape # 处理fields if fields is None: self.replace_ind = list(range(0, n_feature)) self.positive_ind = 0 self.fields = [0] * n_feature self.field_num = 1 else: self.replace_ind = np.argsort(fields).tolist() self.positive_ind = np.sum([1 if item < 0 else 0 for item in fields]) self.fields = sorted(fields) self.field_num = len(set(self.fields[self.positive_ind:])) # reshape X X = X[:, self.replace_ind] x_y = np.c_[np.ones(n_sample), X, y] # 记录loss train_losses = [] eval_losses = [] # 调整一下学习率 if self.adjust_lr: self.lr = max(self.lr, 1 / n_feature) # 初始化参数 self.w = np.random.random((n_feature + 1, 1)) * 1e-3 self.V = np.random.random((n_feature - self.positive_ind, self.field_num, self.hidden_dim)) * 1e-3 if self.solver == 'adam': # 缓存梯度一阶,二阶估计 w_1 = np.zeros_like(self.w) V_1 = np.zeros_like(self.V) w_2 = np.zeros_like(self.w) V_2 = np.zeros_like(self.V) # 更新参数 count = 0 for epoch in range(self.epochs): # 验证集记录 best_eval_value = np.power(2., 1023) eval_count = 0 np.random.shuffle(x_y) for index in range(x_y.shape[0] // self.batch_size): count += 1 batch_x_y = x_y[self.batch_size * index:self.batch_size * (index + 1)] batch_x = batch_x_y[:, :-1] batch_y = batch_x_y[:, -1:] # 计算链式求导第一层梯度 if self.objective == "squarederror": y_x_t = self._y(batch_x).reshape((-1, 1)) - batch_y elif self.objective == "poisson": y_x_t = np.exp(self._y(batch_x).reshape((-1, 1))) - batch_y elif self.objective == "gamma": y_x_t = 1.0 - batch_y * np.exp(-1.0 * self._y(batch_x).reshape((-1, 1))) elif self.objective == 'tweedie': if self.tweedie_p == 1: y_x_t = np.exp(self._y(batch_x).reshape((-1, 1))) - batch_y elif self.tweedie_p == 2: y_x_t = 1.0 - batch_y * np.exp(-1.0 * self._y(batch_x).reshape((-1, 1))) else: y_x_t = np.exp(self._y(batch_x).reshape((-1, 1)) * (2.0 - self.tweedie_p)) \ - batch_y * np.exp(self._y(batch_x).reshape((-1, 1)) * (1.0 - self.tweedie_p)) else: # 二分类 y_x_t = utils.sigmoid(self._y(batch_x).reshape((-1, 1))) - batch_y # 更新w if self.solver == 'sgd': self.w = self.w - (self.lr * (np.sum(y_x_t * batch_x, axis=0) / self.batch_size).reshape( (-1, 1)) + self.lamb * self.w + self.alpha * np.where(self.w > 0, 1, 0)) elif self.solver == 'adam': w_reg = self.lamb * self.w + self.alpha * np.where(self.w > 0, 1, 0) w_grad = (np.sum(y_x_t * batch_x, axis=0) / self.batch_size).reshape( (-1, 1)) + w_reg w_1 = self.rho_1 * w_1 + (1 - self.rho_1) * w_grad w_2 = self.rho_2 * w_2 + (1 - self.rho_2) * w_grad * w_grad w_1_ = w_1 / (1 - np.power(self.rho_1, count)) w_2_ = w_2 / (1 - np.power(self.rho_2, count)) self.w = self.w - (self.lr * w_1_) / (np.sqrt(w_2_) + 1e-8) # 更新 V batch_x_ = batch_x[:, 1 + self.positive_ind:] # 逐元素更新 for i in range(0, batch_x_.shape[1] - 1): for j in range(i + 1, batch_x_.shape[1]): for k in range(0, self.hidden_dim): v_reg_l = self.lamb * self.V[i, self.fields[self.positive_ind + j], k] + \ self.alpha * (self.V[i, self.fields[self.positive_ind + j], k] > 0) v_grad_l = np.sum(y_x_t.reshape(-1) * batch_x_[:, i] * batch_x_[:, j] * self.V[ j, self.fields[self.positive_ind + i], k]) / self.batch_size + v_reg_l v_reg_r = self.lamb * self.V[j, self.fields[self.positive_ind + i], k] + \ self.alpha * (self.V[j, self.fields[self.positive_ind + i], k] > 0) v_grad_r = np.sum(y_x_t.reshape(-1) * batch_x_[:, i] * batch_x_[:, j] * self.V[ i, self.fields[self.positive_ind + j], k]) / self.batch_size + v_reg_r if self.solver == "sgd": self.V[i, self.fields[self.positive_ind + j], k] -= self.lr * v_grad_l self.V[j, self.fields[self.positive_ind + i], k] -= self.lr * v_grad_r elif self.solver == "adam": V_1[i, self.fields[self.positive_ind + j], k] = self.rho_1 * V_1[ i, self.fields[self.positive_ind + j], k] + (1 - self.rho_1) * v_grad_l V_2[i, self.fields[self.positive_ind + j], k] = self.rho_2 * V_2[ i, self.fields[self.positive_ind + j], k] + (1 - self.rho_2) * v_grad_l * v_grad_l v_1_l = V_1[i, self.fields[self.positive_ind + j], k] / ( 1 - np.power(self.rho_1, count)) v_2_l = V_2[i, self.fields[self.positive_ind + j], k] / ( 1 - np.power(self.rho_2, count)) V_1[j, self.fields[self.positive_ind + i], k] = self.rho_1 * V_1[ j, self.fields[self.positive_ind + i], k] + (1 - self.rho_1) * v_grad_r V_2[j, self.fields[self.positive_ind + i], k] = self.rho_2 * V_2[ j, self.fields[self.positive_ind + i], k] + (1 - self.rho_2) * v_grad_r * v_grad_r v_1_r = V_1[j, self.fields[self.positive_ind + i], k] / ( 1 - np.power(self.rho_1, count)) v_2_r = V_2[j, self.fields[self.positive_ind + i], k] / ( 1 - np.power(self.rho_2, count)) self.V[i, self.fields[self.positive_ind + j], k] -= (self.lr * v_1_l) / ( np.sqrt(v_2_l) + 1e-8) self.V[j, self.fields[self.positive_ind + i], k] -= (self.lr * v_1_r) / ( np.sqrt(v_2_r) + 1e-8) # 计算eval loss eval_loss = None if eval_set is not None: eval_x, eval_y = eval_set if self.objective == 'logistic': eval_loss = np.mean(eval_y != self.predict(eval_x)) else: eval_loss = np.std(eval_y - self.predict(eval_x)) eval_losses.append(eval_loss) # 是否显示 if show_log: if self.objective == 'logistic': train_loss = np.mean(y != self.predict(X_o)) else: train_loss = np.std(y - self.predict(X_o)) print("epoch:", epoch + 1, "/", self.epochs, ",samples:", (index + 1) * self.batch_size, "/", n_sample, ",train loss:", train_loss, ",eval loss:", eval_loss) train_losses.append(train_loss) # 是否早停 if eval_loss is not None and self.early_stopping_rounds is not None: if eval_loss < best_eval_value: eval_count = 0 best_eval_value = eval_loss else: eval_count += 1 if eval_count >= self.early_stopping_rounds: print("---------------early_stopping-----------------------------") break return train_losses, eval_losses