示例#1
0
    def learn(self,
              obseqs,
              tol=1e-6,
              miniter=25,
              maxiter=100,
              method='baumwelch'):
        likelihood = -inf
        
        iteration = 0        
        while True:
            print "Iteration:", iteration
            print "Current model:", str(self)

            # budget exhausted ?
            if maxiter <= iteration:
                print "Model did not converge after %d iterations (tolerance was set to %s)."%(iteration,tol)
                break

            # learn new model
            if method == 'baumwelch':
                result = self.do_baumwelch(obseqs)
            elif method == 'brand':
                result = self.do_brand(obseqs)
            else:
                raise RuntimeError, "Unsupported parameter-estimation method: %s"%method

            print "Model likelihood:", result.get('likelihood')

            # converged ?
            if result.get('likelihood') == 0 and miniter <= iteration:
                print "Model converged (to global optimum) after %d iterations."%iteration
                break

            # update model likelihood
            converged, increased, relative_error = check_converged(likelihood, result.get('likelihood'), tol=tol)
            likelihood = result.get('likelihood')
            if method == 'baumwelch':
                assert increased # if this fails, then somethx is terribly wrong with the do_baumwelch code!!!

            print "Relative error in model likelihood over last iteration:", relative_error
            print 

            # converged ?
            if converged and miniter <= iteration:
                print "Model converged after %d iterations (tolerance was set to %s)."%(iteration,tol)
                break

            # update model
            self.set_transition(result.get('transition'))
            self.set_emission(result.get('emission'))
            self.set_pi(result.get('pi'))

            # proceed with next iteration
            iteration += 1
示例#2
0
def entropic_reestimate(omega, theta=None, Z=1, maxiter=100, tol=1e-7, verbose=False):
    """
    Re-estimates a statistic parameter vector entropically [1]_.
    
    Parameters
    ----------
    omega : array_like 
        Evidence vector
    theta : array_like, optional
        Parameter vector to be re-estimated under given evidence and learning rate (default None)
    Z : {-1, 0, +1}, optional
        -1: Algorithm reduces to traditional MLE (e.g the Baum-Welch)

        0: ?

        +1: Algorithm will seek maximum structure
    maxiter : int, optional
        Maximum number of iterations of Fixed-point loop (default 100)
    verbose : bool, optional
        Display verbose output (default off)

    Returns
    -------
    theta_hat : array_like
        Learned parameter vector
    Z : float
        Final Learning rate
    _lambda : float
        Limiting value of Lagrange multiplier

    Examples
    --------
    >>> from entropy_map import entropic_reestimate
    >>> omega = [1, 2]
    >>> theta = [0.50023755, 0.49976245]
    >>> theta_hat, final_Z, _lambda = entropic_reestimate(omega, theta, Z=1, tol=1e-6)
    >>> theta_hat
    array([ 0.33116253,  0.66883747])
    >>> final_Z
    0.041828014112488016
    >>> _lambda
    -3.0152672618320637

    References
    ----------
    .. [1] Matthiew Brand, "Pattern learning via entropy maximization"

    """

    def _debug(msg=''):
        if verbose:
            print msg

    # XXX TODO: handle Z = 0 case
    assert Z != 0 

    # if no initial theta specified, start with uniform candidate
    if theta is None:
        theta = almost_uniform_vector(len(omega))

    # all arrays must be numpy-like
    omega = array(omega, dtype='float64')
    theta = array(theta, dtype='float64')

    # XXX TODO: trim-off any evidence which 'relatively close to 0' (since such evidence can't justify anything!) 
    informative_indices = nonzero(minimum(omega, theta) > _EPSILON)
    _omega = omega[informative_indices]
    _theta = theta[informative_indices]

    # prepare initial _lambda which will ensure that Lambert's W is real-valued
    if Z > 0:
        critical_lambda = min(-Z*(2 + log(_omega/Z)))
        _lambda = critical_lambda - 1 # or anything less than the critical value above
    elif Z < 0:
        #  make an educated guess
        _lambda = -mean(Z*(log(_theta) + 1) + _omega/_theta)
    assert all(-_omega*exp(1+_lambda/Z)/Z > -1/e), -_omega*exp(1+_lambda/Z)/Z 
    
    # Fixed-point loop
    _theta_hat = _theta
    iteration = 0
    converged = False
    _debug("entropy_map: starting Fixed-point loop ..\n")
    _debug("Initial model: %s"%_theta)
    _debug("Initial lambda: %s"%_lambda)
    _debug("Initila learning rate (Z): %s"%Z)
    while not converged:
        # exhausted ?
        if maxiter <= iteration:
            break

        # if necessary, re-scale learning rate (Z) so that exp(1 + _lambda/Z) is not 'too small'
        if _lambda < 0:
            if Z > 0:
                new_Z = -_lambda/_BEAR
            elif Z < 0:
                new_Z = _lambda/_BEAR
            if new_Z != Z:
                Z = new_Z
                _debug("N.B:- We'll re-scale learning rate (Z) to %s to prevent Lambert's W function from vanishing."%(Z))

        # prepare argument (vector) for Lambert's W function
        z = -_omega*exp(1 + _lambda/Z)/Z
        assert all(isreal(z)) 
        if any(z < -1/e):
            _debug("Lambert's W: argument z = %s out of range (-1/e, +inf)"%z)
            break

        # compute Lambert's W function at z
        if Z <= 0:
            g = W(z, k=0)
        else:
            g = W(z, k=-1)
        assert all(isreal(g))
        g = real(g)
        
        # check against division by zero (btw we re-scaled Z to prevent this)
        # assert all(g != 0)
        assert all(abs(g) > _EPSILON)

        # re-estimate _theta
        _theta_hat = (-_omega/Z)/g 
        assert all(_theta_hat >= 0)

        # normalize the approximated _theta_hat parameter
        _theta_hat = normalize_probabilities(_theta_hat)

        # re-estimate _lambda
        _lambda_hat = -(Z*(log(_theta_hat[0]) + 1) + _omega[0]/_theta_hat[0]) # [0] or any other index [i]

        # check whether _lambda values have convergede
        converged, _, relative_error = check_converged(_lambda, _lambda_hat, tol=tol)

        # verbose for debugging, etc.
        _debug("Iteration: %d"%iteration)
        _debug('Current parameter estimate:\n%s'%_theta)
        _debug('lambda: %s'%_lambda)
        _debug("Relative error in lambda over last iteration: %s"%relative_error)
        _debug("Learning rate (Z): %s"%Z)

        # update _lambda and _theta
        _lambda = _lambda_hat
        _theta = _theta_hat

        # goto next iteration
        iteration += 1

        _debug('\n')

    _debug("Done.")
    _debug('Final parameter estimate:\n%s'%_theta)
    _debug('lambda: %s'%_lambda)
    _debug("Relative error in lambda over last iteration: %s"%relative_error)
    _debug("Learning rate (Z): %s"%Z)

    # converged ?
    if converged:
        _debug("entropic_reestimate: loop converged after %d iterations (tolerance was set to %s)"%(iteration,tol))
    else:
        _debug("entropic_reestimate: loop did not converge after %d iterations (tolerance was set to %s)"\
            %(maxiter,tol))

    # render results
    theta_hat = 0*theta
    theta_hat[informative_indices] = _theta_hat
    return theta_hat, Z, _lambda