def find_optimum(sumLogPi=0, sumLogPiActiveVec=0, sumLogPiRemVec=0, nDoc=0, gamma=1.0, alpha=1.0, kappa=0.0, startAlphaLogPi=0.0, initrho=None, initomega=None, scaleVector=None, approx_grad=False, factr=1.0e5, **kwargs): ''' Run gradient optimization to estimate best parameters rho, omega Returns -------- rhoomega : 1D array, length 2*K f : scalar value of minimization objective Info : dict Raises -------- ValueError on an overflow, any NaN, or failure to converge ''' if sumLogPi is not None: if sumLogPi.ndim > 1: sumLogPi = np.squeeze(np.asarray(sumLogPi, dtype=np.float64)) assert sumLogPi.ndim == 1 K = sumLogPi.size - 1 else: assert sumLogPiActiveVec.ndim == 1 assert sumLogPiActiveVec.shape == sumLogPiRemVec.shape K = sumLogPiActiveVec.size # Determine initial value if initrho is None: initrho = create_initrho(K) initrho = forceRhoInBounds(initrho) if initomega is None: initomega = create_initomega(K, nDoc, gamma) initomega = forceOmegaInBounds(initomega) assert initrho.size == K assert initomega.size == K # Initialize rescaling vector if scaleVector is None: scaleVector = np.hstack([np.ones(K), np.ones(K)]) # Create init vector in unconstrained space initrhoomega = np.hstack([initrho, initomega]) initc = rhoomega2c(initrhoomega, scaleVector=scaleVector) # Define objective function (unconstrained!) objArgs = dict(sumLogPi=sumLogPi, sumLogPiActiveVec=sumLogPiActiveVec, sumLogPiRemVec=sumLogPiRemVec, startAlphaLogPi=startAlphaLogPi, nDoc=nDoc, gamma=gamma, alpha=alpha, kappa=kappa, approx_grad=approx_grad, scaleVector=scaleVector) def c_objFunc(c): return objFunc_unconstrained(c, **objArgs) # Run optimization, raising special error on any overflow or NaN issues with warnings.catch_warnings(): warnings.filterwarnings('error', category=RuntimeWarning, message='overflow') try: chat, fhat, Info = scipy.optimize.fmin_l_bfgs_b( c_objFunc, initc, disp=None, approx_grad=approx_grad, factr=factr, **kwargs) except RuntimeWarning: raise ValueError("FAILURE: overflow!") except AssertionError: raise ValueError("FAILURE: NaN/Inf detected!") # Raise error on abnormal warnings (like bad line search) if Info['warnflag'] > 1: raise ValueError("FAILURE: " + Info['task']) # Convert final answer back to rhoomega (safely) Info['init'] = initrhoomega rhoomega = c2rhoomega(chat, scaleVector=scaleVector, returnSingleVector=1) rhoomega[:K] = forceRhoInBounds(rhoomega[:K]) return rhoomega, fhat, Info
def find_optimum( initrho=None, initomega=None, do_grad_rho=1, do_grad_omega=1, approx_grad=0, nDoc=None, sumLogPiActiveVec=None, sumLogPiRemVec=None, sumLogPiRem=None, alpha=1.0, gamma=1.0, factr=100.0, Log=None, **kwargs): ''' Estimate optimal rho and omega via gradient descent on ELBO objective. Returns -------- rho : 1D array, length K omega : 1D array, length K f : scalar value of minimization objective Info : dict Raises -------- ValueError on an overflow, any NaN, or failure to converge. Examples -------- When no documents exist, we recover the prior parameters >>> r_opt, o_opt, f_opt, Info = find_optimum( ... nDoc=0, ... sumLogPiActiveVec=np.zeros(3), ... sumLogPiRemVec=np.zeros(3), ... alpha=0.5, gamma=1.0) >>> print r_opt [ 0.5 0.5 0.5] >>> print o_opt [ 2. 2. 2.] We can optimize for just rho by turning do_grad_omega off. This fixes omega at its initial value, but optimizes rho. >>> r_opt, o_opt, f_opt, Info = find_optimum( ... do_grad_omega=0, ... nDoc=10, ... sumLogPiActiveVec=np.asarray([-2., -4., -6.]), ... sumLogPiRemVec=np.asarray([0, 0, -20.]), ... alpha=0.5, ... gamma=5.0) >>> print o_opt [ 46. 36. 26.] >>> np.allclose(o_opt, Info['initomega']) True We can optimize for just omega by turning do_grad_rho off. This fixes rho at its initial value, but optimizes omega >>> r_opt2, o_opt2, f_opt2, Info = find_optimum( ... do_grad_rho=0, ... initrho=r_opt, ... nDoc=10, ... sumLogPiActiveVec=np.asarray([-2., -4., -6.]), ... sumLogPiRemVec=np.asarray([0, 0, -20.]), ... alpha=0.5, ... gamma=5.0) >>> np.allclose(r_opt, r_opt2) True >>> np.allclose(o_opt2, o_opt, atol=10, rtol=0) True ''' assert sumLogPiActiveVec.ndim == 1 K = sumLogPiActiveVec.size if sumLogPiRem is not None: sumLogPiRemVec = np.zeros(K) sumLogPiRemVec[-1] = sumLogPiRem assert sumLogPiActiveVec.shape == sumLogPiRemVec.shape if nDoc > 0: maxOmegaVal = 1000.0 * (nDoc * (K+1) + gamma) else: maxOmegaVal = 1000.0 * (K + 1 + gamma) # Determine initial values for rho, omega if initrho is None: initrho = make_initrho(K, nDoc, gamma) initrho = forceRhoInBounds(initrho) if initomega is None: initomega = make_initomega(K, nDoc, gamma) initomega = forceOmegaInBounds(initomega, maxOmegaVal=0.5*maxOmegaVal) assert initrho.size == K assert initomega.size == K # Define keyword args for the objective function objFuncKwargs = dict( sumLogPiActiveVec=sumLogPiActiveVec, sumLogPiRemVec=sumLogPiRemVec, nDoc=nDoc, gamma=gamma, alpha=alpha, approx_grad=approx_grad, do_grad_rho=do_grad_rho, do_grad_omega=do_grad_omega, initrho=initrho, initomega=initomega) # Transform initial rho/omega into unconstrained vector c if do_grad_rho and do_grad_omega: rhoomega_init = np.hstack([initrho, initomega]) c_init = rhoomega2c(rhoomega_init) elif do_grad_rho: c_init = rho2c(initrho) objFuncKwargs['omega'] = initomega else: c_init = omega2c(initomega) objFuncKwargs['rho'] = initrho # Define the objective function (in unconstrained space) def objFunc(c): return negL_c(c, **objFuncKwargs) # Define keyword args for the optimization package (fmin_l_bfgs_b) fminKwargs = dict( factr=factr, approx_grad=approx_grad, disp=None, ) fminPossibleKwargs = set(scipy.optimize.fmin_l_bfgs_b.__code__.co_varnames) for key in kwargs: if key in fminPossibleKwargs: fminKwargs[key] = kwargs[key] # Run optimization, raising special error on any overflow or NaN issues with warnings.catch_warnings(): warnings.filterwarnings('error') try: c_opt, f_opt, Info = scipy.optimize.fmin_l_bfgs_b( objFunc, c_init, **fminKwargs) except RuntimeWarning as e: # Any warnings are probably related to overflow. # Raise them as errors! We don't want a result with overflow. raise ValueError("FAILURE: " + str(e)) except AssertionError as e: # Any assertions that failed mean that # rho/omega or some other derived quantity # reached a very bad place numerically. Raise an error! raise ValueError("FAILURE: NaN/Inf detected!") # Raise error on abnormal optimization warnings (like bad line search) if Info['warnflag'] > 1: raise ValueError("FAILURE: " + Info['task']) # Convert final answer back to rhoomega (safely) Info['initrho'] = initrho Info['initomega'] = initomega if do_grad_rho and do_grad_omega: rho_opt, omega_opt = c2rhoomega(c_opt) elif do_grad_rho: rho_opt = c2rho(c_opt) omega_opt = initomega else: omega_opt = c2omega(c_opt) rho_opt = initrho Info['estrho'] = rho_opt Info['estomega'] = omega_opt rho_safe = forceRhoInBounds(rho_opt) omega_safe = forceOmegaInBounds( omega_opt, maxOmegaVal=maxOmegaVal, Log=Log) objFuncKwargs['approx_grad'] = 1.0 with warnings.catch_warnings(): warnings.filterwarnings('error') objFuncKwargs['rho'] = initrho objFuncKwargs['omega'] = initomega f_init = negL_rhoomega(**objFuncKwargs) with warnings.catch_warnings(): warnings.filterwarnings('error') objFuncKwargs['rho'] = rho_safe objFuncKwargs['omega'] = omega_safe f_safe = negL_rhoomega(**objFuncKwargs) if not np.allclose(rho_safe, rho_opt): if Log: Log.error('rho_opt_CHANGED_TO_LIE_IN_BOUNDS') Info['rho_opt_CHANGED_TO_LIE_IN_BOUNDS'] = 1 if not np.allclose(omega_safe, omega_opt): if Log: Log.error('omega_opt_CHANGED_TO_LIE_IN_BOUNDS') Info['omega_opt_CHANGED_TO_LIE_IN_BOUNDS'] = 1 if f_safe < f_init: return rho_safe, omega_safe, f_safe, Info else: return initrho, initomega, f_init, Info
def find_optimum(sumLogVd=0, sumLog1mVd=0, nDoc=0, gamma=1.0, alpha=1.0, inituhat=None, approx_grad=False, factr=1.0e5, **kwargs): ''' Run gradient optimization to estimate best parameters rho, omega Returns -------- rhoomega : 1D array, length 2*K f : scalar value of minimization objective Info : dict Raises -------- ValueError on an overflow, any NaN, or failure to converge ''' if sumLogVd.ndim > 1: sumLogVd = np.squeeze(np.asarray(sumLogVd, dtype=np.float64)) sumLog1mVd = np.squeeze(np.asarray(sumLog1mVd, dtype=np.float64)) assert sumLogVd.ndim == 1 K = sumLogVd.size ## Determine initial value if inituhat is None: inituhat = create_inituhat(K) inituhat = forceRhoInBounds(inituhat) assert inituhat.size == K initc = uhat2c(inituhat) ## Define objective function (unconstrained!) objArgs = dict(sumLogVd=sumLogVd, sumLog1mVd=sumLog1mVd, nDoc=nDoc, gamma=gamma, alpha=alpha, approx_grad=approx_grad) c_objFunc = lambda c: objFunc_unconstrained(c, **objArgs) ## Run optimization and catch any overflow or NaN issues with warnings.catch_warnings(): warnings.filterwarnings('error', category=RuntimeWarning, message='overflow') try: chat, fhat, Info = scipy.optimize.fmin_l_bfgs_b( c_objFunc, initc, disp=None, approx_grad=approx_grad, factr=factr, **kwargs) except RuntimeWarning: raise ValueError("FAILURE: overflow!") except AssertionError: raise ValueError("FAILURE: NaN/Inf detected!") if Info['warnflag'] > 1: raise ValueError("FAILURE: " + Info['task']) Info['init'] = inituhat uhat = c2uhat(chat) uhat = forceRhoInBounds(uhat) return uhat, fhat, Info