Exemplo n.º 1
0
def find_optimum(
        initrho=None, initomega=None,
        do_grad_rho=1, do_grad_omega=1, approx_grad=0,
        nDoc=None, sumLogPiActiveVec=None,
        sumLogPiRemVec=None, sumLogPiRem=None,
        alpha=1.0, gamma=1.0,
        factr=100.0,
        Log=None,
        **kwargs):
    ''' Estimate optimal rho and omega via gradient descent on ELBO objective.

    Returns
    --------
    rho : 1D array, length K
    omega : 1D array, length K
    f : scalar value of minimization objective
    Info : dict

    Raises
    --------
    ValueError on an overflow, any NaN, or failure to converge.

    Examples
    --------
    When no documents exist, we recover the prior parameters
    >>> r_opt, o_opt, f_opt, Info = find_optimum(
    ...     nDoc=0,
    ...     sumLogPiActiveVec=np.zeros(3),
    ...     sumLogPiRemVec=np.zeros(3),
    ...     alpha=0.5, gamma=1.0)
    >>> print r_opt
    [ 0.5  0.5  0.5]
    >>> print o_opt
    [ 2.  2.  2.]
    
    We can optimize for just rho by turning do_grad_omega off.
    This fixes omega at its initial value, but optimizes rho.
    >>> r_opt, o_opt, f_opt, Info = find_optimum(
    ...     do_grad_omega=0,
    ...     nDoc=10,
    ...     sumLogPiActiveVec=np.asarray([-2., -4., -6.]),
    ...     sumLogPiRemVec=np.asarray([0, 0, -20.]),
    ...     alpha=0.5,
    ...     gamma=5.0)
    >>> print o_opt
    [ 46.  36.  26.]
    >>> np.allclose(o_opt, Info['initomega'])
    True

    We can optimize for just omega by turning do_grad_rho off.
    This fixes rho at its initial value, but optimizes omega
    >>> r_opt2, o_opt2, f_opt2, Info = find_optimum(
    ...     do_grad_rho=0,
    ...     initrho=r_opt,
    ...     nDoc=10,
    ...     sumLogPiActiveVec=np.asarray([-2., -4., -6.]),
    ...     sumLogPiRemVec=np.asarray([0, 0, -20.]),
    ...     alpha=0.5,
    ...     gamma=5.0)
    >>> np.allclose(r_opt, r_opt2)
    True
    >>> np.allclose(o_opt2, o_opt, atol=10, rtol=0)
    True
    '''
    assert sumLogPiActiveVec.ndim == 1
    K = sumLogPiActiveVec.size
    if sumLogPiRem is not None:
        sumLogPiRemVec = np.zeros(K)
        sumLogPiRemVec[-1] = sumLogPiRem
    assert sumLogPiActiveVec.shape == sumLogPiRemVec.shape


    if nDoc > 0:
        maxOmegaVal = 1000.0 * (nDoc * (K+1) + gamma)
    else:
        maxOmegaVal = 1000.0 * (K + 1 + gamma)

    # Determine initial values for rho, omega
    if initrho is None:
        initrho = make_initrho(K, nDoc, gamma)
    initrho = forceRhoInBounds(initrho)
    if initomega is None:
        initomega = make_initomega(K, nDoc, gamma)
    initomega = forceOmegaInBounds(initomega, maxOmegaVal=0.5*maxOmegaVal)
    assert initrho.size == K
    assert initomega.size == K

    # Define keyword args for the objective function
    objFuncKwargs = dict(
        sumLogPiActiveVec=sumLogPiActiveVec,
        sumLogPiRemVec=sumLogPiRemVec,
        nDoc=nDoc,
        gamma=gamma,
        alpha=alpha,
        approx_grad=approx_grad,
        do_grad_rho=do_grad_rho,
        do_grad_omega=do_grad_omega,
        initrho=initrho,
        initomega=initomega)
    # Transform initial rho/omega into unconstrained vector c
    if do_grad_rho and do_grad_omega:
        rhoomega_init = np.hstack([initrho, initomega])
        c_init = rhoomega2c(rhoomega_init)
    elif do_grad_rho:
        c_init = rho2c(initrho)
        objFuncKwargs['omega'] = initomega
    else:
        c_init = omega2c(initomega)
        objFuncKwargs['rho'] = initrho
    # Define the objective function (in unconstrained space)
    def objFunc(c):
        return negL_c(c, **objFuncKwargs)

    # Define keyword args for the optimization package (fmin_l_bfgs_b)
    fminKwargs = dict(
        factr=factr,
        approx_grad=approx_grad,
        disp=None,
        )
    fminPossibleKwargs = set(scipy.optimize.fmin_l_bfgs_b.__code__.co_varnames)
    for key in kwargs:
        if key in fminPossibleKwargs:
            fminKwargs[key] = kwargs[key]
    # Run optimization, raising special error on any overflow or NaN issues
    with warnings.catch_warnings():
        warnings.filterwarnings('error')
        try:
            c_opt, f_opt, Info = scipy.optimize.fmin_l_bfgs_b(
                objFunc, c_init, **fminKwargs)
        except RuntimeWarning as e:
            # Any warnings are probably related to overflow.
            # Raise them as errors! We don't want a result with overflow.
            raise ValueError("FAILURE: " + str(e))
        except AssertionError as e:
            # Any assertions that failed mean that
            # rho/omega or some other derived quantity
            # reached a very bad place numerically. Raise an error!
            raise ValueError("FAILURE: NaN/Inf detected!")
    # Raise error on abnormal optimization warnings (like bad line search)
    if Info['warnflag'] > 1:
        raise ValueError("FAILURE: " + Info['task'])

    # Convert final answer back to rhoomega (safely)
    Info['initrho'] = initrho
    Info['initomega'] = initomega
    if do_grad_rho and do_grad_omega:
        rho_opt, omega_opt = c2rhoomega(c_opt)
    elif do_grad_rho:
        rho_opt = c2rho(c_opt)
        omega_opt = initomega
    else:
        omega_opt = c2omega(c_opt)
        rho_opt = initrho

    Info['estrho'] = rho_opt
    Info['estomega'] = omega_opt
    rho_safe = forceRhoInBounds(rho_opt)
    omega_safe = forceOmegaInBounds(
        omega_opt, maxOmegaVal=maxOmegaVal, Log=Log)
    objFuncKwargs['approx_grad'] = 1.0

    with warnings.catch_warnings():
        warnings.filterwarnings('error')
        objFuncKwargs['rho'] = initrho
        objFuncKwargs['omega'] = initomega
        f_init = negL_rhoomega(**objFuncKwargs)

    with warnings.catch_warnings():
        warnings.filterwarnings('error')
        objFuncKwargs['rho'] = rho_safe
        objFuncKwargs['omega'] = omega_safe
        f_safe = negL_rhoomega(**objFuncKwargs)
    if not np.allclose(rho_safe, rho_opt):
        if Log:
            Log.error('rho_opt_CHANGED_TO_LIE_IN_BOUNDS')
        Info['rho_opt_CHANGED_TO_LIE_IN_BOUNDS'] = 1
    if not np.allclose(omega_safe, omega_opt):
        if Log:
            Log.error('omega_opt_CHANGED_TO_LIE_IN_BOUNDS')
        Info['omega_opt_CHANGED_TO_LIE_IN_BOUNDS'] = 1
    if f_safe < f_init:
        return rho_safe, omega_safe, f_safe, Info
    else:
        return initrho, initomega, f_init, Info
Exemplo n.º 2
0
def find_optimum(sumLogPi=0,
                 sumLogPiActiveVec=0,
                 sumLogPiRemVec=0,
                 nDoc=0,
                 gamma=1.0,
                 alpha=1.0,
                 kappa=0.0,
                 startAlphaLogPi=0.0,
                 initrho=None,
                 initomega=None,
                 scaleVector=None,
                 approx_grad=False,
                 factr=1.0e5,
                 **kwargs):
    ''' Run gradient optimization to estimate best parameters rho, omega

    Returns
    --------
    rhoomega : 1D array, length 2*K
    f : scalar value of minimization objective
    Info : dict

    Raises
    --------
    ValueError on an overflow, any NaN, or failure to converge
    '''
    if sumLogPi is not None:
        if sumLogPi.ndim > 1:
            sumLogPi = np.squeeze(np.asarray(sumLogPi, dtype=np.float64))
        assert sumLogPi.ndim == 1
        K = sumLogPi.size - 1
    else:
        assert sumLogPiActiveVec.ndim == 1
        assert sumLogPiActiveVec.shape == sumLogPiRemVec.shape
        K = sumLogPiActiveVec.size

    # Determine initial value
    if initrho is None:
        initrho = create_initrho(K)
    initrho = forceRhoInBounds(initrho)
    if initomega is None:
        initomega = create_initomega(K, nDoc, gamma)
    initomega = forceOmegaInBounds(initomega)
    assert initrho.size == K
    assert initomega.size == K

    # Initialize rescaling vector
    if scaleVector is None:
        scaleVector = np.hstack([np.ones(K), np.ones(K)])

    # Create init vector in unconstrained space
    initrhoomega = np.hstack([initrho, initomega])
    initc = rhoomega2c(initrhoomega, scaleVector=scaleVector)

    # Define objective function (unconstrained!)
    objArgs = dict(sumLogPi=sumLogPi,
                   sumLogPiActiveVec=sumLogPiActiveVec,
                   sumLogPiRemVec=sumLogPiRemVec,
                   startAlphaLogPi=startAlphaLogPi,
                   nDoc=nDoc,
                   gamma=gamma,
                   alpha=alpha,
                   kappa=kappa,
                   approx_grad=approx_grad,
                   scaleVector=scaleVector)

    def c_objFunc(c):
        return objFunc_unconstrained(c, **objArgs)

    # Run optimization, raising special error on any overflow or NaN issues
    with warnings.catch_warnings():
        warnings.filterwarnings('error',
                                category=RuntimeWarning,
                                message='overflow')
        try:
            chat, fhat, Info = scipy.optimize.fmin_l_bfgs_b(
                c_objFunc,
                initc,
                disp=None,
                approx_grad=approx_grad,
                factr=factr,
                **kwargs)
        except RuntimeWarning:
            raise ValueError("FAILURE: overflow!")
        except AssertionError:
            raise ValueError("FAILURE: NaN/Inf detected!")
    # Raise error on abnormal warnings (like bad line search)
    if Info['warnflag'] > 1:
        raise ValueError("FAILURE: " + Info['task'])

    # Convert final answer back to rhoomega (safely)
    Info['init'] = initrhoomega
    rhoomega = c2rhoomega(chat, scaleVector=scaleVector, returnSingleVector=1)
    rhoomega[:K] = forceRhoInBounds(rhoomega[:K])
    return rhoomega, fhat, Info