def exercise_two_loop_recursion(fgh): x0 = flex.double([3.0, -4.0]) g0 = fgh.g(x0) h0 = flex.double([[1,0],[0,1]]) memory = [] hg0 = bfgs.hg_two_loop_recursion(memory, hk0=h0, gk=g0) assert approx_equal(hg0, h0.matrix_multiply(g0)) # x1 = x0 - 1/3 * hg0 g1 = fgh.g(x1) h1 = bfgs.h_update(hk=h0, sk=x1-x0, yk=g1-g0) memory.append(bfgs.memory_element(s=x1-x0, y=g1-g0)) hg1 = bfgs.hg_two_loop_recursion(memory, hk0=h0, gk=g1) assert approx_equal(hg1, h1.matrix_multiply(g1)) # x2 = x1 - 1/5 * hg1 g2 = fgh.g(x2) h2 = bfgs.h_update(hk=h1, sk=x2-x1, yk=g2-g1) memory.append(bfgs.memory_element(s=x2-x1, y=g2-g1)) hg2 = bfgs.hg_two_loop_recursion(memory, hk0=h0, gk=g2) assert approx_equal(hg2, h2.matrix_multiply(g2)) # x3 = x2 - 3/8 * hg2 g3 = fgh.g(x3) h3 = bfgs.h_update(hk=h2, sk=x3-x2, yk=g3-g2) memory.append(bfgs.memory_element(s=x3-x2, y=g3-g2)) hg3 = bfgs.hg_two_loop_recursion(memory, hk0=h0, gk=g3) assert approx_equal(hg3, h3.matrix_multiply(g3))
def callback_after_step(O, minimizer): xk = O.prev_x xl = O.x gk = O.fgh.g(xk) gl = O.fgh.g(xl) def check(bk, hk): hl = bfgs.h_update(hk, xl-xk, gl-gk) bl = bfgs.b_update(bk, xl-xk, gl-gk) assert approx_equal(matrix.sqr(hl).inverse(), bl) es = eigensystem.real_symmetric(bl) assert es.values().all_gt(0) assert bfgs.h0_scaling(sk=xl-xk, yk=gl-gk) > 0 # bk = flex.double([[1,0],[0,1]]) hk = bk check(bk, hk) # bk = O.fgh.h(xk) es = eigensystem.real_symmetric(bk) if (es.values().all_gt(0)): hk = bk.deep_copy() hk.matrix_inversion_in_place() check(bk, hk) # h0 = flex.double([[0.9,0.1],[-0.2,0.8]]) hg_tlr = bfgs.hg_two_loop_recursion(memory=O.memory, hk0=h0, gk=gl) h_upd = h0 for m in O.memory: h_upd = bfgs.h_update(hk=h_upd, sk=m.s, yk=m.y) hg_upd = h_upd.matrix_multiply(gl) assert approx_equal(hg_tlr, hg_upd) # O.memory.append(bfgs.memory_element(s=xl-xk, y=gl-gk)) O.prev_x = O.x.deep_copy()
def build_bfgs_memory(O, active_infos): result = [] for iinfo in xrange(len(active_infos)-1): k = active_infos[iinfo] l = active_infos[iinfo+1] m = bfgs.memory_element(s=l.x-k.x, y=l.grads-k.grads) if (m.rho is None): return None result.append(m) return result
def update_dests_using_bfgs_formula(O, dests): O.aq_sel_size = -2 O.aq_n_used = -2 if (O.params.bfgs_estimate_limit_factor <= 0): return aq_sel = flex.size_t() aq_sel_size_start = 0 iinfo_active = [] for iinfo in xrange(len(O.xfgc_infos)-1,-1,-1): info = O.xfgc_infos[iinfo] if (info.is_iterate): if (aq_sel_size_start == 0): aq_sel = info.approx_quads aq_sel_size_start = aq_sel.size() if (aq_sel_size_start < 2): return else: next_aq_sel = aq_sel.intersection(other=info.approx_quads) if ( next_aq_sel.size() < aq_sel_size_start * 0.9 and len(iinfo_active) > 1): break aq_sel = next_aq_sel iinfo_active.append(iinfo) iinfo_active.sort() O.aq_sel_size = aq_sel.size() if (len(iinfo_active) < 2 or O.aq_sel_size < 2): return O.aq_n_used = -1 assert iinfo_active[-1] == len(O.xfgc_infos)-1 curvs = O.xfgc_infos[iinfo_active[-1]].curvs.select(aq_sel) assert curvs.all_gt(0) hk0 = 1 / curvs memory = [] for iinfo in iinfo_active[:-1]: k = O.xfgc_infos[iinfo] l = O.xfgc_infos[iinfo+1] xk = k.x.select(aq_sel) xl = l.x.select(aq_sel) gk = k.grads.select(aq_sel) gl = l.grads.select(aq_sel) m = bfgs.memory_element(s=xl-xk, y=gl-gk) gks = gk.dot(m.s) gls = gl.dot(m.s) wolfe_curv_cond = (gls >= 0.9 * gks) # Nocedal & Wright (1999) Equation 3.7b # reformulated using sk instead of pk if (not wolfe_curv_cond): return if (m.rho is None): print "Warning: rho <= 0" return memory.append(m) aq_dests = -bfgs.hg_two_loop_recursion( memory=memory, hk0=hk0, gk=O.xfgc_infos[-1].grads.select(aq_sel)) O.aq_n_used = 0 for aq_dest,ix in zip(aq_dests, aq_sel): dsl = O.dynamic_shift_limits[ix] limit = dsl.pair(x=O.x[ix]).get(grad=O.grads[ix]) if (abs(aq_dest) <= O.params.bfgs_estimate_limit_factor * limit): dests[ix] = aq_dest O.aq_n_used += 1