def update_beta(self, tr_x, tr_y): s_beta = [] y_beta = [] not_converge = True old_loss = [self.loss_func(tr_x, tr_y)] d_beta = 0 while not_converge: #print "Update Beta" grad = self.gradient(tr_x, tr_y, 'beta') d_beta = self.l_bfgs_d(grad, s_beta, y_beta) #Normalize the steepest direction #The very large magnitude causes the line search problematic d_beta = d_beta / max(np.linalg.norm(d_beta), 1) #line search of d_beta step_size_beta = line_search_armijo(ls.ls_loss_func, xk=self.beta, pk=-d_beta, gfk=grad, old_fval=old_loss[-1], args=(self, tr_x, tr_y, 'beta'), alpha0=1) #print "Step Size:%f, Func Called:%d" % (step_size_beta[0], step_size_beta[1]) step_size_beta = step_size_beta[0] if step_size_beta == None: #Line search cannot find step size any more break beta_t = self.beta - step_size_beta * d_beta beta_t[np.multiply(beta_t, self.beta) < 0] = 0 s_last = beta_t - self.beta self.beta = beta_t grad_t = self.gradient(tr_x, tr_y, 'beta') y_last = grad_t - grad if np.dot(s_last, y_last) > 0: s_beta = self.update_save(s_beta, s_last) y_beta = self.update_save(y_beta, y_last) old_loss.append(self.loss_func(tr_x, tr_y)) #print "Objective Function Upd Beta:%f " % old_loss[-1] if len(old_loss) > 10: if (old_loss[-11] - old_loss[-1]) / 10.0 < self.opt_param['tol'] * old_loss[-1]: not_converge = False
def update_a(self, tr_x, tr_y): s_a = [] y_a = [] not_converge = True old_loss = [self.loss_func(tr_x, tr_y)] while not_converge: #tempory save the last a a_last = self.a[:,-1] #print "Update A" ###L-BFGS to update ak### grad = self.gradient(tr_x, tr_y, 'a') d_a = self.l_bfgs_d(grad, s_a, y_a) d_a = d_a / max(np.linalg.norm(d_a), 1) step_size_a = line_search_armijo(ls.ls_loss_func, xk=a_last, pk=-d_a, gfk=grad, old_fval=old_loss[-1], args=(self, tr_x, tr_y, 'a'), alpha0 = 1) #print "Step Size:%f, Func Called:%d" % (step_size_a[0], step_size_a[1]) step_size_a = step_size_a[0] if step_size_a == None: break a_t = a_last - step_size_a * d_a a_t[np.multiply(a_t, a_last) < 0] = 0 s_last = a_t - a_last self.a[:, -1] = a_t grad_t = self.gradient(tr_x, tr_y, 'a') y_last = grad_t - grad if np.dot(s_last, y_last) > 0: s_a = self.update_save(s_a, s_last) y_a = self.update_save(y_a, y_last) old_loss.append(self.loss_func(tr_x, tr_y)) #print "Objective Function Upd Alpha:%f , Sparisty:%f" % (old_loss[-1], np.mean(self.a == 0)) if len(old_loss) > 10: if (old_loss[-11] - old_loss[-1]) / 10.0 < self.opt_param['tol'] * old_loss[-1]: #or np.dot(s_a[-1], y_a[-1]) == 0: not_converge = False