Exemplo n.º 1
0
    def __init__(self, nstates=None, nsymbols=None, transition=None, emission=None, pi=None):
        assert not ((nstates is None and transition is None and pi is None) or (nsymbols is None and emission is None))

        if not transition is None:
            assert transition.ndim == 2
            if not nstates is None:
                assert nstates == transition.shape[0]
            else:
                nstates = transition.shape[0]
            if not emission is None:
                assert transition.shape[0] == emission.shape[0]
            if not pi is None:
                assert transition.shape[0] == pi.size
            
        if not emission is None:
            assert emission.ndim == 2
            if not nsymbols is None:
                assert nsymbols == emission.shape[1]
            else:
                nsymbols = emission.shape[1]
            if not nstates is None:
                assert nstates == emission.shape[0]
            else:
                nstates == emission.shape[0]
            if not pi is None:
                assert emission.shape[0] == pi.size

        if not pi is None:
            assert pi.ndim == 1
            if not nstates is None:
                assert nstates == pi.size
            else:
                nstates = pi.size

        if transition is None:
            transition = almost_uniform_matrix(nstates)

        if emission is None:
            emission = almost_uniform_matrix(nstates, nsymbols)

        if pi is None:
            pi = almost_uniform_vector(nstates)

        self.set_transition(transition)
        self.set_emission(emission)
        self.set_pi(pi)
Exemplo n.º 2
0
def entropic_reestimate(omega, theta=None, Z=1, maxiter=100, tol=1e-7, verbose=False):
    """
    Re-estimates a statistic parameter vector entropically [1]_.
    
    Parameters
    ----------
    omega : array_like 
        Evidence vector
    theta : array_like, optional
        Parameter vector to be re-estimated under given evidence and learning rate (default None)
    Z : {-1, 0, +1}, optional
        -1: Algorithm reduces to traditional MLE (e.g the Baum-Welch)

        0: ?

        +1: Algorithm will seek maximum structure
    maxiter : int, optional
        Maximum number of iterations of Fixed-point loop (default 100)
    verbose : bool, optional
        Display verbose output (default off)

    Returns
    -------
    theta_hat : array_like
        Learned parameter vector
    Z : float
        Final Learning rate
    _lambda : float
        Limiting value of Lagrange multiplier

    Examples
    --------
    >>> from entropy_map import entropic_reestimate
    >>> omega = [1, 2]
    >>> theta = [0.50023755, 0.49976245]
    >>> theta_hat, final_Z, _lambda = entropic_reestimate(omega, theta, Z=1, tol=1e-6)
    >>> theta_hat
    array([ 0.33116253,  0.66883747])
    >>> final_Z
    0.041828014112488016
    >>> _lambda
    -3.0152672618320637

    References
    ----------
    .. [1] Matthiew Brand, "Pattern learning via entropy maximization"

    """

    def _debug(msg=''):
        if verbose:
            print msg

    # XXX TODO: handle Z = 0 case
    assert Z != 0 

    # if no initial theta specified, start with uniform candidate
    if theta is None:
        theta = almost_uniform_vector(len(omega))

    # all arrays must be numpy-like
    omega = array(omega, dtype='float64')
    theta = array(theta, dtype='float64')

    # XXX TODO: trim-off any evidence which 'relatively close to 0' (since such evidence can't justify anything!) 
    informative_indices = nonzero(minimum(omega, theta) > _EPSILON)
    _omega = omega[informative_indices]
    _theta = theta[informative_indices]

    # prepare initial _lambda which will ensure that Lambert's W is real-valued
    if Z > 0:
        critical_lambda = min(-Z*(2 + log(_omega/Z)))
        _lambda = critical_lambda - 1 # or anything less than the critical value above
    elif Z < 0:
        #  make an educated guess
        _lambda = -mean(Z*(log(_theta) + 1) + _omega/_theta)
    assert all(-_omega*exp(1+_lambda/Z)/Z > -1/e), -_omega*exp(1+_lambda/Z)/Z 
    
    # Fixed-point loop
    _theta_hat = _theta
    iteration = 0
    converged = False
    _debug("entropy_map: starting Fixed-point loop ..\n")
    _debug("Initial model: %s"%_theta)
    _debug("Initial lambda: %s"%_lambda)
    _debug("Initila learning rate (Z): %s"%Z)
    while not converged:
        # exhausted ?
        if maxiter <= iteration:
            break

        # if necessary, re-scale learning rate (Z) so that exp(1 + _lambda/Z) is not 'too small'
        if _lambda < 0:
            if Z > 0:
                new_Z = -_lambda/_BEAR
            elif Z < 0:
                new_Z = _lambda/_BEAR
            if new_Z != Z:
                Z = new_Z
                _debug("N.B:- We'll re-scale learning rate (Z) to %s to prevent Lambert's W function from vanishing."%(Z))

        # prepare argument (vector) for Lambert's W function
        z = -_omega*exp(1 + _lambda/Z)/Z
        assert all(isreal(z)) 
        if any(z < -1/e):
            _debug("Lambert's W: argument z = %s out of range (-1/e, +inf)"%z)
            break

        # compute Lambert's W function at z
        if Z <= 0:
            g = W(z, k=0)
        else:
            g = W(z, k=-1)
        assert all(isreal(g))
        g = real(g)
        
        # check against division by zero (btw we re-scaled Z to prevent this)
        # assert all(g != 0)
        assert all(abs(g) > _EPSILON)

        # re-estimate _theta
        _theta_hat = (-_omega/Z)/g 
        assert all(_theta_hat >= 0)

        # normalize the approximated _theta_hat parameter
        _theta_hat = normalize_probabilities(_theta_hat)

        # re-estimate _lambda
        _lambda_hat = -(Z*(log(_theta_hat[0]) + 1) + _omega[0]/_theta_hat[0]) # [0] or any other index [i]

        # check whether _lambda values have convergede
        converged, _, relative_error = check_converged(_lambda, _lambda_hat, tol=tol)

        # verbose for debugging, etc.
        _debug("Iteration: %d"%iteration)
        _debug('Current parameter estimate:\n%s'%_theta)
        _debug('lambda: %s'%_lambda)
        _debug("Relative error in lambda over last iteration: %s"%relative_error)
        _debug("Learning rate (Z): %s"%Z)

        # update _lambda and _theta
        _lambda = _lambda_hat
        _theta = _theta_hat

        # goto next iteration
        iteration += 1

        _debug('\n')

    _debug("Done.")
    _debug('Final parameter estimate:\n%s'%_theta)
    _debug('lambda: %s'%_lambda)
    _debug("Relative error in lambda over last iteration: %s"%relative_error)
    _debug("Learning rate (Z): %s"%Z)

    # converged ?
    if converged:
        _debug("entropic_reestimate: loop converged after %d iterations (tolerance was set to %s)"%(iteration,tol))
    else:
        _debug("entropic_reestimate: loop did not converge after %d iterations (tolerance was set to %s)"\
            %(maxiter,tol))

    # render results
    theta_hat = 0*theta
    theta_hat[informative_indices] = _theta_hat
    return theta_hat, Z, _lambda