def run(self, XY, wc=None): """A NIPALS implementation for sparse PLS regresison. Parameters ---------- XY : List of two numpy arrays. XY[0] is n-by-p and XY[1] is n-by-q. The independent and dependent variables. wc : List of numpy array. The start vectors. Returns ------- w : Numpy array, p-by-1. The weight vector of X. c : Numpy array, q-by-1. The weight vector of Y. """ X = XY[0] Y = XY[1] n, p = X.shape l1_1 = penalties.L1(l=self.l[0]) l1_2 = penalties.L1(l=self.l[1]) if wc is not None: w_new = wc[0] else: maxi = np.argmax(np.sum(Y ** 2, axis=0)) u = Y[:, [maxi]] w_new = np.dot(X.T, u) w_new *= 1.0 / maths.norm(w_new) for i in range(self.max_iter): w = w_new c = np.dot(Y.T, np.dot(X, w)) if self.penalise_y: c = l1_2.prox(c) normc = maths.norm(c) if normc > consts.TOLERANCE: c *= 1.0 / normc w_new = np.dot(X.T, np.dot(Y, c)) w_new = l1_1.prox(w_new) normw = maths.norm(w_new) if normw > consts.TOLERANCE: w_new *= 1.0 / normw if maths.norm(w_new - w) / maths.norm(w) < self.eps: break self.num_iter = i # t = np.dot(X, w) # tt = np.dot(t.T, t)[0, 0] # c = np.dot(Y.T, t) # if tt > consts.TOLERANCE: # c /= tt return w_new, c
def run(self, X, y, beta=None): """Find the minimiser of the associated function, starting at beta. Parameters ---------- X : Numpy array, shape n-by-p. The matrix X with independent variables. y : Numpy array, shape n-by-1. The response variable y. beta : Numpy array. Optional starting point. """ if self.info_requested(Info.ok): self.info_set(Info.ok, False) if self.info_requested(Info.time): t = [] if self.info_requested(Info.fvalue): f = [] if self.info_requested(Info.converged): self.info_set(Info.converged, False) n, p = X.shape if beta is None: beta = self.start_vector.get_weights(p) else: beta = beta.copy() function = functions.CombinedFunction() function.add_loss(functions.losses.LinearRegression(X, y, mean=False)) function.add_prox(penalties.L1(l=self.l)) xTx = np.sum(X**2.0, axis=0) if self.mean: xTx *= 1.0 / float(n) for i in range(1, self.max_iter + 1): if self.info_requested(Info.time): tm = utils.time_cpu() # The update has an error that propagates. This resets the # approximation. We may not need to do this at every iteration. y_Xbeta = y - np.dot(X, beta) betaold = beta.copy() for j in range(p): xj = X[:, [j]] betaj = beta[j, 0] if xTx[j] < consts.TOLERANCE: # Avoid division-by-zero. bj = 0.0 else: bj = np.dot(xj.T, y_Xbeta + xj * betaj)[0, 0] if self.mean: bj /= float(n) if j < self.penalty_start: bj = bj / xTx[j] else: # Soft thresholding. bj = np.sign(bj) \ * max(0.0, (abs(bj) - self.l) / xTx[j]) y_Xbeta -= xj * (bj - betaj) # Update X.beta. beta[j] = bj # Save result. if self.info_requested(Info.time): t.append(utils.time_cpu() - tm) if self.info_requested(Info.fvalue): f_ = self._f(y_Xbeta, y, beta) f.append(f_) # print "err:", maths.norm(beta - betaold) if maths.norm(beta - betaold) < self.eps \ and i >= self.min_iter: if self.info_requested(Info.converged): self.info_set(Info.converged, True) # print "iterations: ", i break self.num_iter = i if self.info_requested(Info.num_iter): self.info_set(Info.num_iter, i) if self.info_requested(Info.time): self.info_set(Info.time, t) if self.info_requested(Info.fvalue): self.info_set(Info.fvalue, f) if self.info_requested(Info.ok): self.info_set(Info.ok, True) return beta