Exemplo n.º 1
0
def cluster(X, K, divergence, debug=False):
    if divergence == 'KL':
        dist_cls = distributions.KL
        if np.any(X <= 0):  # for MFCCs...
            X = X - X.min() + 1e-8
        X = 5. * X / X.sum(1)[:,nax]
    elif divergence == 'IS':
        dist_cls = distributions.ItakuraSaito
        if np.any(X <= 0):  # for MFCCs...
            X = X - X.min() + 1e-8
        # X = X / X.sum(1)[:,nax]
    elif divergence == 'EU':
        dist_cls = distributions.SquareDistance
    else:
        print 'Wrong divergence'
        sys.exit(0)

    assignments, centroids, _ = kmeans.kmeans_best_of_n(X, K, n_trials=10,
            dist_cls=dist_cls, debug=debug)
    init_pi = np.ones(K) / K
    init_obs_distr = centroids

    tau_em, obs_distr, pi, em_ll_train, _ = em.em(X, centroids, n_iter=10)
    # tau_hmm, A, obs_distr, pi, ll_train, _ = hmm.em_hmm(X, init_pi, init_obs_distr, n_iter=10)
    # seq_hmm, _ = hmm.viterbi(X, pi, A, obs_distr)
    Tracer()()

    return {'kmeans': assignments,
            'EM': np.argmax(tau_em, axis=1),
            # 'hmm_smoothing': np.argmax(tau_hmm, axis=1),
            # 'hmm_viterbi': seq_hmm,
           }
Exemplo n.º 2
0
            ll_test.append(log_likelihood(lalpha_test, lbeta_test))
        if monitor:
            monitor_vals.append(monitor(A, obs_distr))

    Tracer()()
    return seq, A, obs_distr, ll_test, monitor_vals


if __name__ == '__main__':
    X = np.loadtxt('EMGaussian.data')
    Xtest = np.loadtxt('EMGaussian.test')
    K = 4

    # Run simple EM (no HMM)
    iterations = 40
    assignments, centers, _ = kmeans.kmeans_best_of_n(X, K, n_trials=5)
    new_centers = [distributions.Gaussian(c.mean, np.eye(2)) \
                for c in centers]
    tau, obs_distr, pi, gmm_ll_train, gmm_ll_test = \
            em.em(X, new_centers, assignments, n_iter=iterations, Xtest=Xtest)

    # example with fixed parameters
    A = 1. / 6 * np.ones((K, K))
    A[np.diag(np.ones(K)) == 1] = 0.5

    lalpha, lbeta = alpha_beta(Xtest, pi, A, obs_distr)
    log_p = smoothing(lalpha, lbeta)
    p = np.exp(log_p)

    def plot_traj(p):
        plt.figure()
Exemplo n.º 3
0
    # algos to run
    algs = map(int, options.algos.split(','))

    # number of clusters
    K = options.k

    ass_plots = []
    seqs = {}
    results = {}  # data/results obtained, indexed by algorithm

    # K-means
    if options.init == 'kmeans' or algos.kmeans in algs:
        t = time.time()
        assignments, centroids, dists = \
                kmeans.kmeans_best_of_n(X, K, n_trials=4, dist_cls=distributions.KL)
        print 'K-means: {}s'.format(time.time() - t)
        results[algos.kmeans] = {
            'seq': assignments,
            'centroids': centroids,
        }
        seqs[algos.kmeans] = assignments

    # EM
    if options.init == 'em' or algos.em in algs:
        iterations = 10
        t = time.time()
        tau_em, obs_distr, pi, em_ll_train, _ = em.em(X,
                                                      centroids,
                                                      n_iter=options.n_iter)
        print 'EM: {}s'.format(time.time() - t)
Exemplo n.º 4
0
        for j in range(K):
            x, y = np.arange(-10., 10., 0.04), np.arange(-15., 15., 0.04)
            xx, yy = np.meshgrid(x, y)
            sx = np.sqrt(sigmas[j][0,0])
            sy = np.sqrt(sigmas[j][1,1])
            sxy = sigmas[j][1,0]
            z = bivariate_normal(xx, yy, sx, sy, means[j,0], means[j,1], sxy)
            cs = plt.contour(xx, yy, z, [0.01])

if __name__ == '__main__':
    X = np.loadtxt('EMGaussian.data')
    Xtest = np.loadtxt('EMGaussian.test')
    K = 4
    iterations = 40

    assignments, centers, _ = kmeans.kmeans_best_of_n(X, K, n_trials=5)
    for k in range(K):
        centers[k].sigma2 = 1.

    # Isotropic
    tau, obs_distr, pi, ll_train_iso, ll_test_iso = \
            em(X, centers, assignments, n_iter=iterations, Xtest=Xtest)
    plot_em(X, tau, obs_distr, contours=True)
    plt.title('EM with covariance matrices proportional to identity')

    # General
    new_centers = [distributions.Gaussian(c.mean, c.sigma2*np.eye(2)) \
                for c in centers]
    tau, obs_distr, pi, ll_train_gen, ll_test_gen = \
            em(X, new_centers, assignments, n_iter=iterations, Xtest=Xtest)
    plot_em(X, tau, obs_distr, contours=True)