Пример #1
0
def XDGMM(X, Xcov, n_components, n_iter=100, tol=1E-5, Nthreads=1, R=None, 
          init_n_iter=10, w=None, model=None, fixed_means=None,
          aligned_covs=None, verbose=False):
    """
    Extreme Deconvolution

    Fit an extreme deconvolution (XD) model to the data

    Parameters
    ----------
    n_components: integer
        number of gaussian components to fit to the data
    n_iter: integer (optional)
        number of EM iterations to perform (default=100)
    tol: float (optional)
        stopping criterion for EM iterations (default=1E-5)
    X:    array_like
          Input data. shape = (n_samples, n_features)
    Xcov: array_like
          Covariance of input data.  shape = (n_samples, n_features,
          n_features)
    R:    array_like
          (TODO: not implemented)
          Transformation matrix from underlying to observed data.  If
          unspecified, then it is assumed to be the identity matrix.
    w:    float or array_like
          if float - w * np.eye is added to V
          if vector - np.diag(w) is added to V
          if array - w is added to V 
    Notes
    -----
    This implementation follows Bovy et al. arXiv 0905.2979
    """
    if model is None:
        model = xd_model(X.shape, n_components, n_iter, tol, w, Nthreads,
                         fixed_means, aligned_covs, verbose)

    if R is not None:
        raise NotImplementedError("mixing matrix R is not yet implemented")

    X = np.asarray(X)
    Xcov = np.asarray(Xcov)

    # assume full covariances of data
    assert Xcov.shape == (model.n_samples, model.n_features, model.n_features)

    # initialize components via a few steps of GMM
    # this doesn't take into account errors, but is a fast first-guess
    if model.V is None:
        t0 = time()
        if (fixed_means is None) & (aligned_covs is None):
            gmm = GMM(model.n_components, n_iter=init_n_iter,
                      covariance_type='full').fit(X)
        else:
            gmm = constrained_GMM(X, model.n_components, init_n_iter,
                                  fixed_means, aligned_covs)
        model.mu = gmm.means_
        model.alpha = gmm.weights_
        model.V = gmm.covars_
        model.initial_logL = model.logLikelihood(X, Xcov)
        if model.verbose:
            print 'Initalization done in %.2g sec' % (time() - t0)
            print 'Initial Log Likelihood: ', model.initial_logL

    logL = -np.inf
    for i in range(model.n_iter):
        t0 = time()
        model = _EMstep(model, X, Xcov)
        logL_next = model.logLikelihood(X, Xcov)
        t1 = time()

        if model.verbose:
            print "%i: log(L) = %.5g" % (i + 1, logL_next)
            print "    (%.2g sec)" % (t1 - t0)

        model.prev_logL = logL
        model.logL = logL_next

        if logL_next < logL + model.tol:
            break
        logL = logL_next

    return model
Пример #2
0
def XDGMM(X,
          Xcov,
          n_components,
          n_iter=100,
          tol=1E-5,
          Nthreads=1,
          R=None,
          init_n_iter=10,
          w=None,
          model=None,
          fixed_means=None,
          aligned_covs=None,
          verbose=False):
    """
    Extreme Deconvolution

    Fit an extreme deconvolution (XD) model to the data

    Parameters
    ----------
    n_components: integer
        number of gaussian components to fit to the data
    n_iter: integer (optional)
        number of EM iterations to perform (default=100)
    tol: float (optional)
        stopping criterion for EM iterations (default=1E-5)
    X:    array_like
          Input data. shape = (n_samples, n_features)
    Xcov: array_like
          Covariance of input data.  shape = (n_samples, n_features,
          n_features)
    R:    array_like
          (TODO: not implemented)
          Transformation matrix from underlying to observed data.  If
          unspecified, then it is assumed to be the identity matrix.
    w:    float or array_like
          if float - w * np.eye is added to V
          if vector - np.diag(w) is added to V
          if array - w is added to V 
    Notes
    -----
    This implementation follows Bovy et al. arXiv 0905.2979
    """
    if model is None:
        model = xd_model(X.shape, n_components, n_iter, tol, w, Nthreads,
                         fixed_means, aligned_covs, verbose)

    if R is not None:
        raise NotImplementedError("mixing matrix R is not yet implemented")

    X = np.asarray(X)
    Xcov = np.asarray(Xcov)

    # assume full covariances of data
    assert Xcov.shape == (model.n_samples, model.n_features, model.n_features)

    # initialize components via a few steps of GMM
    # this doesn't take into account errors, but is a fast first-guess
    if model.V is None:
        t0 = time()
        if (fixed_means is None) & (aligned_covs is None):
            gmm = GMM(model.n_components,
                      n_iter=init_n_iter,
                      covariance_type='full').fit(X)
        else:
            gmm = constrained_GMM(X, model.n_components, init_n_iter,
                                  fixed_means, aligned_covs)
        model.mu = gmm.means_
        model.alpha = gmm.weights_
        model.V = gmm.covars_
        model.initial_logL = model.logLikelihood(X, Xcov)
        if model.verbose:
            print 'Initalization done in %.2g sec' % (time() - t0)
            print 'Initial Log Likelihood: ', model.initial_logL

    logL = -np.inf
    for i in range(model.n_iter):
        t0 = time()
        model = _EMstep(model, X, Xcov)
        logL_next = model.logLikelihood(X, Xcov)
        t1 = time()

        if model.verbose:
            print "%i: log(L) = %.5g" % (i + 1, logL_next)
            print "    (%.2g sec)" % (t1 - t0)

        model.prev_logL = logL
        model.logL = logL_next

        if logL_next < logL + model.tol:
            break
        logL = logL_next

    return model