def _update_eta(self, gx0, gx1, ggx0, d): if self.eta_method == "1": self.eta = vector_2norm(gx1 - gx0 - np.dot(ggx0, d)) / vector_2norm(gx0) else: # delta \in (0, 1] # alpha \in (1, 2] self.eta = 0.5 * (vector_2norm(gx1) / vector_2norm(gx0))**1.5
def _get_descent_direction(self, f, g, gg, x0, epsilon): L, D = modify_cholesky_fraction(gg(x0)) """ print("L is {}\n" "D is {}".format(L, D)) print("gg(x0) is {}".format(gg(x0))) """ # print("LDL^T is {}".format(np.dot(np.dot(L, D), L.T)-gg(x0))) if vector_2norm(g(x0)) > epsilon: # d = LDL_equation(L, D, -g(x0)) G = np.dot(np.dot(L, D), L.T) d = -np.dot(np.linalg.inv(G), g(x0)) else: psi = np.diag(2*gg(x0)-np.dot(np.dot(L, D), L.T)) index = np.argmin(psi) # print("psi min is {}".format(np.min(psi))) if psi[index] > 0: return np.zeros(x0.shape) y = np.zeros(x0.shape) y[index][0] = 1 d = ut_equation(L.T, y) dd = np.dot(np.linalg.inv(L.T), y) if np.dot(g(x0).T, dd) > 0: d = -dd return d
def _cauchy(self, fx, gx, ggx): """ :Note cauchy point method to solve sub question :param fx: f(x), np.ndarray of shape (N, 1) :param gx: g(x), np.ndarray of shape (N, 1) :param ggx: gg(x), np.ndarray of shape (N, N) :return: d: the descent direction, np.ndarray of shape (N, 1) """ gxggxgx = np.dot(gx.T, np.dot(ggx, gx)) eta = 1 if gxggxgx > 0: eta = np.min([1, vector_2norm(gx) ** 3 / (self.delta * gxggxgx)]) d = -1 * eta * self.delta / vector_2norm(gx) * gx return d
def compute(self, f, g, gg, x0): self.iter_num = 0 fx0 = f(x0) gx0 = g(x0) ggx0 = gg(x0) fx1 = fx0 gx1 = gx0 ggx1 = ggx0 while True: fx0 = fx1 gx0 = gx1 ggx0 = ggx1 d = self._get_descent_direction(ggx0, gx0) ''' if vector_2norm((np.dot(ggx0, d)+gx0).flatten()) > self.eta * vector_2norm(gx0.flatten()): print("{} f(x0) is {}".format(self.iter_num, fx0)) raise ValueError("") ''' # get suitable step length eta = self.eta for i in range(2): x1 = x0 + d fx1, gx1, ggx1 = f(x1), g(x1), gg(x1) theta = self.get_theta(gx0, ggx0, gx1, ggx1, d) if vector_2norm(g(x0 + d)) <= (1 - self.t * (1 - eta)) * vector_2norm(gx0): break d = theta * d print(i) eta = 1 - theta * (1 - eta) #print("{} 开始更新参数 {}".format(self.iter_num, self.eta)) self._update_eta(gx0, gx1, ggx0, d) #print("iter_num is{}, f(x) is{} g(x) is {} x is {}".format(self.iter_num, fx1, vector_2norm(gx1), vector_2norm(x1))) if self._convergence(gx1, x1) is True or self._maximum_loop() is True: print("iter_num is{}, f(x) is{}".format(self.iter_num, fx1)) break x0 = x1 self._iter_increment() return x1
def _r_i(self, x, i): if i != self.n: return np.sqrt(self.eta)*(x[i][0]-1) else: return vector_2norm(x)**2-0.25
def _convergence(self, gx, x): # print("delta is{}, condition{}".format(np.abs(f(x0)-f(x1)), np.abs(f(x0)-f(x1)) < self.max_error)) return True if vector_2norm(gx.flatten()) < self.max_error * np.max( [1, vector_2norm(x.flatten())]) else False
def _get_descent_direction(self, f, g, gg, x0): L, D, P = bunch_parlett_fraction(gg(x0)) min_eigval = np.min(np.linalg.eigvals(D)) """ print("D is {}".format(D)) print("L is {}\n" " P is {}".format(L, P)) """ # all eigenvalue is positive if min_eigval > 1e-8: return -np.dot(np.linalg.inv(gg(x0)), g(x0)) # has negative eigenvalue elif min_eigval < -1e-8: self.d_tag = 1 if self.d_tag == 0: # construct the a # a = np.zeros(x0.shape) m = 0 while m != D.shape[0]: if m == D.shape[0] - 1: a[m][0] = 1 if D[m][m] <= 0 else 0 break if np.abs(D[m][m + 1]) > 1e-15: # 2x2 block tmp = D[m:m + 2, m:m + 2] eigval, eigvec = np.linalg.eig(tmp) index = np.argmin(eigval) if eigval[index] > 0: raise ValueError("negative value") try: a[m:m + 2][0] = tmp[:, index] / (np.sqrt(np.sum(tmp[:, index] ** 2))) except: print(index) m = m + 2 else: if D[m][m] <= 0: a[m][0] = 1 m = m + 1 a = ut_equation(L.T, a) d = np.dot(P, a) if np.dot(g(x0).T, d) > 0: d = -d self.d_tag = 1 else: # construct the positive D m = 0 Dpp = np.zeros(D.shape) while m != D.shape[0]: if m == D.shape[0] - 1: Dpp[m][m] = 1/D[m][m] if D[m][m] > 0 else 0 break if np.abs(D[m][m + 1]) > 1e-15: # 2x2 block tmp = D[m:m + 2, m:m + 2] eigval, eigvec = np.linalg.eig(tmp) index = np.argmax(eigval) if eigval[index] <= 0: raise ValueError("negative value") try: Dpp[m:m + 2][m:m+2] = 1/eigval[index] * np.dot(tmp[:, index], tmp[:, index].T) except: print(index) m = m + 2 else: # 1x1 block if D[m][m] > 0: Dpp[m][m] = 1/D[m][m] m = m + 1 self.d_tag = 0 lp = np.dot(P, np.linalg.inv(L).T) d = -np.dot(np.dot(lp, Dpp), lp.T) d = np.dot(d, g(x0)) if vector_2norm(d) < 1e-8: # if d equals to zero # compute LDL d = 0, _, _, v = np.linalg.svd(np.dot(np.dot(L, D), L.T)) print(np.dot(gg(x0), v[-1, :].reshape(-1, 1))) if np.dot(v[-1, :], g(x0)) > 0: return -v[-1, :].reshape(-1, 1)*10 else: return v[-1, :].reshape(-1, 1)*10 return d
def _convergence(self, gx, x): return True if vector_2norm(gx.flatten()) < self.max_error * np.max( [1, vector_2norm(x.flatten())]) else False
def _subspace(self, fx, gx, ggx): """ :Note two-dimension subspace method to solve sub question :param fx: f(x), np.ndarray of shape (N, 1) :param gx: g(x), np.ndarray of shape (N, 1) :param ggx: gg(x), np.ndarray of shape (N, N) :return: d: the descent direction, np.ndarray of shape (N, 1) """ min_eigval = np.min(np.linalg.eigvals(ggx)) if min_eigval < -1e-5: modify_ggx = ggx - 1.5*min_eigval*np.eye(ggx.shape[0]) inv_modify_ggx = np.linalg.inv(modify_ggx) invmodifyggx_gx = np.dot(inv_modify_ggx, gx) a = vector_2norm(gx)**2 b = np.dot(gx.T, invmodifyggx_gx) c = vector_2norm(invmodifyggx_gx)**2 d = np.dot(gx.T, np.dot(ggx, gx)) e = np.dot(np.dot(ggx, gx).T, invmodifyggx_gx) f = np.dot(np.dot(ggx, invmodifyggx_gx).T, invmodifyggx_gx) p = a*c - b ** 2 q = e*b - a*f m = 4*b*e - 2*a*f - 2*c*d r = d*f - e ** 2 n = a*e - b*d # 0=q_4 v^4 + q_3 v^3 + q_2 v^2 + q_1 v+ q_0 q4 = 16*p**2 * self.delta**2 q3 = 8*m*p*self.delta**2 q2 = (8*p*r + m**2) * self.delta**2 - 4*a*p**2 q1 = 2*m*r*self.delta**2 - 4*(a*p*q+b*n*p) q0 = self.delta**2*r**2 - (a*q**2 + 2*b*n*q + c*n**2) v = np.roots(np.array([q4, q3, q2, q1, q0]).flatten()) v = np.sort(v) for i in v: if np.imag(i) == 0: v = np.real(i) break t = 4*p*v**2 + m*v + n d = 1/t * ((2*v*p+q)*gx + n*invmodifyggx_gx) elif min_eigval > 1e-5: a = vector_2norm(gx)**2 inv_ggx = np.linalg.inv(ggx) b = np.dot(np.dot(inv_ggx, gx).T, gx) c = vector_2norm(np.dot(inv_ggx, gx))**2 d = np.dot(np.dot(ggx, gx).T, gx) m = a*c - b*b n = a*b - c*d q = a*a - b*d # 0=q_4 v^4 + q_3 v^3 + q_2 v^2 + q_1 v+ q_0 q4 = 16*m**2 * self.delta**2 q3 = 16*m*n*self.delta**2 q2 = 4*n**2 * self.delta ** 2 - 8*m*q*self.delta**2 - 4*a*m**2 q1 = 4*b*q*m - 4*n*q*self.delta**2 q0 = (self.delta**2 - c) * q**2 v = np.roots(np.array([q4, q3, q2, q1, q0]).flatten()) v = np.sort(v) for i in v: if np.imag(i) == 0: v = np.real(i) break t = 4*m*v**2 + 2*n*v - q d = (2*v*m*gx + q*np.dot(inv_ggx, gx)) / t else: return self._cauchy(fx, gx, ggx) return d