Exemplo n.º 1
0
 def fit(self):
     l = len(self.samples)
     print "Data Size: ", l
     self.samples = numpy.asarray(self.samples)
     print "start fitting..."
     self.means, self.covars, self.weights, self.logl = pypr_GMM.em_gm(
         self.samples, K=self.n_comp, max_iter=self.n_iter, verbose=True, diag_add=1e-9
     )
Exemplo n.º 2
0
 def fit(self):
     l = len(self.samples)
     print("Data Size: ", l)
     self.samples = numpy.asarray(self.samples)
     print("start fitting...")
     self.means, self.covars, \
             self.weights, self.logl = pypr_GMM.em_gm(self.samples, K=self.n_comp, max_iter=self.n_iter,
                    verbose=True, diag_add=1e-9)
Exemplo n.º 3
0
    def fit(self, data):
        '''
        fit gmm from data
        '''
        if self.k > 1:
            self.mu, self.sigma, self.pi, ll = gmm.em_gm(np.array(data),
                                                         K=self.k)
        else:
            self.mu = [np.mean(data, axis=0)]
            self.sigma = [np.cov(data, rowvar=0)]
            self.pi = np.array([1.0])

        self.updateInvSigma()

        return self
Exemplo n.º 4
0
                                        mGMM.bic(dataset),
                                        mGMM.score_samples(dataset)[0].sum()))
############## best: 12 components and full covariance
import pypr.clustering.gmm as gmm


def iter_plot(cen_lst, cov_lst, itr):
    # For plotting EM progress
    if itr % 2 == 0:
        for i in range(len(cen_lst)):
            x, y = gmm.gauss_ellipse_2d(cen_lst[i], cov_lst[i])
            plt.plot(x, y, 'k', linewidth=0.5)


cen_lst, cov_lst, p_k, logL = gmm.em_gm(dataset,
                                        K=9,
                                        max_iter=400,
                                        verbose=True)
print "Log likelihood (how well the data fits the model) = ", logL

self_centroids = [
    np.array([77, 75]),
    np.array([335, 75]),
    np.array([693, 75]),
    np.array([77, 350]),
    np.array([335, 350]),
    np.array([693, 350]),
    np.array([77, 741]),
    np.array([335, 741]),
    np.array([693, 741])
]
Exemplo n.º 5
0
              and {} components:
              aic = {}
              bic = {}
              likelihood = {}""".format(c, n, mGMM.aic(dataset), mGMM.bic(dataset),
                mGMM.score_samples(dataset)[0].sum()))        
############## best: 12 components and full covariance
import pypr.clustering.gmm as gmm

def iter_plot(cen_lst, cov_lst, itr):
    # For plotting EM progress
    if itr % 2 == 0:
        for i in range(len(cen_lst)):
            x,y = gmm.gauss_ellipse_2d(cen_lst[i], cov_lst[i])
            plt.plot(x, y, 'k', linewidth=0.5)

cen_lst, cov_lst, p_k, logL = gmm.em_gm(dataset, K = 9, max_iter = 400,verbose = True)
print "Log likelihood (how well the data fits the model) = ", logL                

self_centroids = [np.array([77, 75]),
                  np.array([335, 75]),
                  np.array([693, 75]),  
                  np.array([77, 350]),
                  np.array([335, 350]),
                  np.array([693, 350]),
                  np.array([77, 741]),
                  np.array([335,741]),
                  np.array([693, 741])]

cen_lst, cov_lst, p_k, logL = gmm.em_gm(dataset, K = 9, max_iter = 400,verbose = True, init_kw={'cluster_init':'custom', 'cntr_lst':self_centroids})
print "Log likelihood (how well the data fits the model) = ", logL                
ccov = [ array([[1,0.4],[0.4,1]]), diag((1,2)), diag((0.4,0.1)) ]
    # Covariance matrices

T = gmm.sample_gaussian_mixture(centroids, ccov, mc, samples=500)
V = gmm.sample_gaussian_mixture(centroids, ccov, mc, samples=500)
plot(T[:,0], T[:,1], '.')

# Expectation-Maximization of Mixture of Gaussians
Krange = range(1, 20 + 1);
runs = 1
meanLogL_train = np.zeros((len(Krange), runs))
meanLogL_valid = np.zeros((len(Krange), runs))
for K in Krange:
    print "Clustering for K = ", K; sys.stdout.flush()
    for r in range(runs):
        cen_lst, cov_lst, p_k, logL = gmm.em_gm(T, K = K, iter = 100)
        meanLogL_train[K-1, r] = logL
        meanLogL_valid[K-1, r] = gmm.gm_log_likelihood(V, cen_lst, cov_lst, p_k)

fig1 = figure()
subplot(1, 2, 1)
for r in range(runs):
    plot(Krange, meanLogL_train[:, r], 'g:', label='Training')
    plot(Krange, meanLogL_valid[:, r], 'b-', label='Validation')
legend(loc='lower right')
xlabel('Number of clusters')
ylabel('log likelihood')

bic = np.zeros(len(Krange))
# We should train with ALL data here
X = np.concatenate((T,V), axis = 0)
Exemplo n.º 7
0
            x, y = gmm.gauss_ellipse_2d(cen_lst[i], cov_lst[i])
            plot(x, y, 'k', linewidth=0.5)


seed(1)
mc = [0.4, 0.4, 0.2]  # Mixing coefficients
centroids = [array([0, 0]), array([3, 3]), array([0, 4])]
ccov = [array([[1, 0.4], [0.4, 1]]), diag((1, 2)), diag((0.4, 0.1))]

# Generate samples from the gaussian mixture model
X = gmm.sample_gaussian_mixture(centroids, ccov, mc, samples=1000)
fig1 = figure()
plot(X[:, 0], X[:, 1], '.')

# Expectation-Maximization of Mixture of Gaussians
cen_lst, cov_lst, p_k, logL = gmm.em_gm(X, K = 3, iter = 400,\
    verbose = True, iter_call = iter_plot)
print "Log likelihood (how well the data fits the model) = ", logL

# Plot the cluster ellipses
for i in range(len(cen_lst)):
    x1, x2 = gmm.gauss_ellipse_2d(cen_lst[i], cov_lst[i])
    plot(x1, x2, 'k', linewidth=2)
title("")
xlabel(r'$x_1$')
ylabel(r'$x_2$')

# Now we will find the conditional distribution of x given y
fig2 = figure()
ax1 = subplot(111)
plot(X[:, 0], X[:, 1], ',')
y = -1.0
Exemplo n.º 8
0
def EM_wall_mixture_model(points,
                          K=3,
                          MAX_ITER=30,
                          init_from_gmm=True,
                          force_connected=True,
                          debug_output=None):
    """ initialization """
    N = points.shape[0]
    # init lines
    L = []
    if init_from_gmm:
        cen_lst, cov_lst, p_k, logL = gmm.em_gm(points, K=K)
        for cen, cov in zip(cen_lst, cov_lst):
            L.append(Line.fromEllipse(cen, cov))
    else:
        for k in range(K):
            L.append(Line.getRandom(points))
    # init pi
    pi_k = 1. / K * ones(K)
    """ run algorithm """
    oldgamma = zeros((N, K))
    for iter_num in range(MAX_ITER):
        """ E-step """
        pi_times_prob = zeros((N, K))
        gamma = zeros((N, K))
        for n in range(N):
            for k in range(K):
                pi_times_prob[n, k] = pi_k[k] * L[k].Prob(points[n])
            # normalized version of pi_times_prob is gamma
            gamma[n, :] = pi_times_prob[n, :] / pi_times_prob[n, :].sum()
        """ debug output """
        if debug_output:
            debug_output(L, gamma, iter_num)
        """ M-step (general) """
        N_k = gamma.sum(0)
        pi_k = N_k / N_k.sum()
        """ M-step (gaussian mixture with inf primary eigenval) """
        eOptimizer = EOptimizer(L, gamma, pi_k, N_k, points, pi_times_prob)
        for k in range(K):
            """ get mean """
            mu = 1 / N_k[k] * sum(gamma[n, k] * points[n] for n in range(N))
            """ get cov matrix """
            x = [array([points[n] - mu]).T for n in range(N)]
            cov = 1 / N_k[k] * sum(gamma[n, k] * x[n] * x[n].T
                                   for n in range(N))
            # re-initilize M, alpha and sigma from ellipse
            L[k].updateFromEllipse(mu, cov)
            """ get e """
            eOptimizer.setK(k)
            L[k].e = eOptimizer.optimize()
        """ force that all lines are connected """
        for l in L:
            l.connectToNearestLines(L)
        """ check sanity of lines """
        for k in range(K):
            if L[k].inBadCondition():
                #print "L[%d] in bad condition" % k
                L[k] = Line.getRandom(points)
            for kk in range(k):
                if L[k].almostEqualTo(L[kk]):
                    #print "L[%d] almost equal to L[%d]" % (k, kk)
                    L[k] = Line.getRandom(points)
                    break
        """ remove 2 lines that are on same real line """
        DTHETA = 20 * pi / 180  # margin
        for k, l in enumerate(L):
            for kk, line in ([kk, ll] for kk, ll in enumerate(L) if k != kk):
                if l.isConnectedTo(line) or l.isAlmostConnectedTo(line, L):
                    theta = l.getAngleWith(line)
                    if pi / 2 - abs(theta - pi / 2) < DTHETA:
                        # remove line with least responsibility
                        del_k = k if N_k[kk] > N_k[k] else kk
                        L[del_k] = Line.getRandom(points)
        """ remove crossing lines """
        for k, l in enumerate(L):
            for kk, line in ([kk, ll] for kk, ll in enumerate(L) if k != kk):
                # if l and line are connected, they can't cross
                if l.isConnectedTo(line):
                    continue
                X = l.getIntersectionWith(line)
                # if X lies on line and l, this is an intersection
                if line.hasPoint(X) and l.hasPoint(X):
                    # remove line with least responsibility
                    del_k = k if N_k[kk] > N_k[k] else kk
                    L[del_k] = Line.getRandom(points)
        """ check stop cond """
        if norm(oldgamma - gamma) < .05:
            break
        oldgamma = gamma
    """ debug output """
    if debug_output:
        debug_output(L, gamma)
    """ calc probablility P[{Lk} | X] ~ P[X | {Lk}] * P[{Lk}] """
    totalLogProb = sum(log(pi_times_prob.sum(1)))
    logProbLk = 0
    anglecounter = 0
    ## add prob of theta ~ N(pi/2, sigma_theta)
    sigma_theta = .01 * pi / 2
    for k, l in enumerate(L):
        lines = l.getAllConnectedLinesFrom(L[k + 1:])
        for line in lines:
            theta = l.getAngleWith(line)
            dtheta = abs(theta - pi / 2)
            logProbLk += -dtheta**2 / (2 * sigma_theta**2) - log(
                sqrt(2 * pi) * sigma_theta)
            anglecounter += 1
    logProbLk /= anglecounter if anglecounter else 1
    ## in case of crossing: Prob = 0
    for k, l in enumerate(L):
        for line in (ll for kk, ll in enumerate(L) if k != kk):
            X = l.getIntersectionWith(line)
            # if l and line are connected, no extra prob is needed
            if l.isConnectedTo(line):
                continue
            # if X lies on line and l, this is an intersection
            if line.hasPoint(X) and l.hasPoint(X):
                logProbLk += float('-inf')
                break
    ## add prob for unattached but near line (extension has to cross)
    ##         ~ N(THRESHOLD_E/d | 0, sigma_unatt) * N(theta | pi/2, sigma_theta)
    sigma_unatt = 0.05
    for k, l in enumerate(L):
        for line in (ll for kk, ll in enumerate(L) if k != kk):
            # if l and line are connected, no extra prob is needed
            if l.isConnectedTo(line):
                continue
            for E in line.E():
                # if E is near l, add probabilities as described above
                d = l.diff(E) + .001  # d should never be zero
                if d < THRESHOLD_E:
                    logProbLk += -(THRESHOLD_E / d)**2 / (2 * sigma_unatt**2)
                    theta = l.getAngleWith(line)
                    logProbLk += -(theta - pi / 2)**2 / (2 * sigma_theta**2)
                    break

    logProbLkX = totalLogProb + logProbLk

    return L, logProbLkX
Exemplo n.º 9
0
""" wall mixture model """
from points import points, N
from wmmlib import *
from pylab import *
import pylab as pl
from copy import copy
import time
import pypr.clustering.gmm as gmm  # download from http://pypr.sourceforge.net/
import argparse
""" settings """
K = 4
print N
""" plot function """
gmm_cen_lst, gmm_cov_lst, p_k, logL = gmm.em_gm(points, K=K)


def debug_output(L, gamma=None, iter_num=None):
    """ check final """
    if gamma != None:
        final = iter_num == None
        if final: ioff()
        #else: return
    """ text output """
    if gamma != None:
        print "iteration %d..." % iter_num if not final else "\nFINAL"
    """ init vars """
    COLORS = "bykgrcm"
    if gamma == None:
        gamma = zeros((N, K))
    """ plot init """
    clf()
Exemplo n.º 10
0
def plotData(X, c):

    plot(X[:,0], X[:,1],'.', c = c, alpha = 0.2)




# create data

#seed(1)

fig1 = figure()

X = generateData(70)
plotData(X, 'b')
cen_lst_1, cov_lst_1, p_k_1, logL_1 = gmm.em_gm(X, K = 3, max_iter = 400, \
verbose = True, iter_call = None)
plotCenters(cen_lst_1, cov_lst_1, 'b', 0)


X = generateData(70)
plotData(X, 'y')
cen_lst_2, cov_lst_2, p_k_2, logL_2 = gmm.em_gm(X, K = 3, max_iter = 400, \
verbose = True, iter_call = None)
plotCenters(cen_lst_2, cov_lst_2, 'y', 3)



# get conditional distribution
#y = 0.3
#(con_cen, con_cov, new_p_k) = gmm.cond_dist(np.array([y,np.nan]), cen_lst, cov_lst, p_k)
Exemplo n.º 11
0
sigma_scl = 0.1
X = np.zeros((samples_pr_cluster * K_orig, D))
for k in range(K_orig):
    mu = np.random.randn(D)
    sigma = np.eye(D) * sigma_scl
    cen_lst.append(mu)
    cov_lst.append(sigma)
mc = np.ones(K_orig) / K_orig  # All clusters equally probable

# Sample from the mixture:
N = 1000
X = gmm.sample_gaussian_mixture(cen_lst, cov_lst, mc, samples=N)

K_range = range(2, 10)
runs = 10
bic_table = np.zeros((len(K_range), runs))
for K_idx, K in enumerate(K_range):
    print "Clustering for K=%d" % K
    for i in range(runs):
        cluster_init_kw = {"cluster_init": "sample", "max_init_iter": 5, "cov_init": "var", "verbose": True}
        cen_lst, cov_lst, p_k, logL = gmm.em_gm(
            X, K=K, max_iter=1000, delta_stop=1e-2, init_kw=cluster_init_kw, verbose=True, max_tries=10
        )
        bic = stattest.bic_gmm(logL, N, D, K)
        bic_table[K_idx, i] = bic

plot(K_range, bic_table)
xlabel("K")
ylabel("BIC score")
title("True K=%d, dim=%d") % (K_orig, D)
Exemplo n.º 12
0
def EM_wall_mixture_model(points, K = 3, MAX_ITER = 30, init_from_gmm=True, force_connected=True, debug_output = None):
    """ initialization """
    N = points.shape[0]
    # init lines
    L = []
    if init_from_gmm:
        cen_lst, cov_lst, p_k, logL = gmm.em_gm(points, K = K)
        for cen, cov in zip(cen_lst, cov_lst):
            L.append(Line.fromEllipse(cen, cov))
    else:
        for k in range(K):
            L.append(Line.getRandom(points))
    # init pi
    pi_k = 1./K * ones(K)
    
    """ run algorithm """
    oldgamma = zeros((N, K))
    for iter_num in range(MAX_ITER):
        """ E-step """
        pi_times_prob = zeros((N, K))
        gamma = zeros((N, K))
        for n in range(N):
            for k in range(K):
                pi_times_prob[n,k] = pi_k[k] * L[k].Prob(points[n])
            # normalized version of pi_times_prob is gamma
            gamma[n,:] = pi_times_prob[n,:] / pi_times_prob[n,:].sum()

        """ debug output """
        if debug_output:
            debug_output(L, gamma, iter_num)
        
        """ M-step (general) """
        N_k = gamma.sum(0)
        pi_k = N_k / N_k.sum()
        
        """ M-step (gaussian mixture with inf primary eigenval) """
        eOptimizer = EOptimizer(L, gamma, pi_k, N_k, points, pi_times_prob)
        for k in range(K):
            """ get mean """
            mu = 1/N_k[k] * sum(gamma[n,k]*points[n] for n in range(N))
            """ get cov matrix """
            x = [array([points[n]-mu]).T for n in range(N)]
            cov = 1/N_k[k] * sum(gamma[n,k]*x[n]*x[n].T for n in range(N))
            # re-initilize M, alpha and sigma from ellipse
            L[k].updateFromEllipse(mu, cov)
            """ get e """
            eOptimizer.setK(k)
            L[k].e = eOptimizer.optimize()
        """ force that all lines are connected """
        for l in L:
            l.connectToNearestLines(L)
        
        """ check sanity of lines """
        for k in range(K):
            if L[k].inBadCondition():
                #print "L[%d] in bad condition" % k
                L[k] = Line.getRandom(points)
            for kk in range(k):
                if L[k].almostEqualTo(L[kk]):
                    #print "L[%d] almost equal to L[%d]" % (k, kk)
                    L[k] = Line.getRandom(points)
                    break
        
        """ remove 2 lines that are on same real line """
        DTHETA = 20 * pi/180 # margin
        for k, l in enumerate(L):
            for kk, line in ([kk, ll] for kk, ll in enumerate(L) if k != kk):
                if l.isConnectedTo(line) or l.isAlmostConnectedTo(line, L):
                    theta = l.getAngleWith(line)
                    if pi/2 - abs(theta - pi/2) < DTHETA:
                        # remove line with least responsibility
                        del_k = k if N_k[kk] > N_k[k] else kk
                        L[del_k] = Line.getRandom(points)
        
        """ remove crossing lines """
        for k, l in enumerate(L):
            for kk, line in ([kk, ll] for kk, ll in enumerate(L) if k != kk):
                # if l and line are connected, they can't cross
                if l.isConnectedTo(line):
                    continue
                X = l.getIntersectionWith(line)
                # if X lies on line and l, this is an intersection
                if line.hasPoint(X) and l.hasPoint(X):
                    # remove line with least responsibility
                    del_k = k if N_k[kk] > N_k[k] else kk
                    L[del_k] = Line.getRandom(points)
        
        """ check stop cond """
        if norm(oldgamma - gamma) < .05:
            break
        oldgamma = gamma
    
    """ debug output """
    if debug_output:
        debug_output(L, gamma)
    
    """ calc probablility P[{Lk} | X] ~ P[X | {Lk}] * P[{Lk}] """
    totalLogProb = sum(log(pi_times_prob.sum(1)))
    logProbLk = 0
    anglecounter = 0
    ## add prob of theta ~ N(pi/2, sigma_theta)
    sigma_theta = .01 * pi/2
    for k, l in enumerate(L):
        lines = l.getAllConnectedLinesFrom(L[k+1:])
        for line in lines:
            theta = l.getAngleWith(line)
            dtheta = abs(theta - pi/2)
            logProbLk += - dtheta**2 / (2*sigma_theta**2) - log(sqrt(2*pi)*sigma_theta)
            anglecounter += 1
    logProbLk /= anglecounter if anglecounter else 1
    ## in case of crossing: Prob = 0
    for k, l in enumerate(L):
        for line in (ll for kk, ll in enumerate(L) if k != kk):
            X = l.getIntersectionWith(line)
            # if l and line are connected, no extra prob is needed
            if l.isConnectedTo(line):
                continue
            # if X lies on line and l, this is an intersection
            if line.hasPoint(X) and l.hasPoint(X):
                logProbLk += float('-inf')
                break
    ## add prob for unattached but near line (extension has to cross)
    ##         ~ N(THRESHOLD_E/d | 0, sigma_unatt) * N(theta | pi/2, sigma_theta)
    sigma_unatt = 0.05
    for k, l in enumerate(L):
        for line in (ll for kk, ll in enumerate(L) if k != kk):
            # if l and line are connected, no extra prob is needed
            if l.isConnectedTo(line):
                continue
            for E in line.E():
                # if E is near l, add probabilities as described above
                d = l.diff(E) + .001 # d should never be zero
                if d < THRESHOLD_E:
                    logProbLk += - (THRESHOLD_E / d)**2 / (2 * sigma_unatt**2)
                    theta = l.getAngleWith(line)
                    logProbLk += - (theta - pi/2)**2 / (2*sigma_theta**2)
                    break
    
    logProbLkX = totalLogProb + logProbLk
    
    return L, logProbLkX
Exemplo n.º 13
0
Arquivo: gmm.py Projeto: tadeze/ADMV
    from util.common import metric
    df = pd.read_csv(
        '/home/tadeze/projects/missingvalue/datasets/anomaly/yeast/fullsamples/yeast_1.csv'
    )
    train_data = df.ix[:, 1:].as_matrix().astype(np.float64)
    # train_lbl = df.ix[:,0] #
    train_lbl = map(int, df.ix[:, 0] == "anomaly")
    gmms = GaussianMixture(n_components=3)
    gmms.fit(train_data)
    score = -gmms.score_samples(train_data)
    print gmms.get_params(False)
    print len(score)
    print metric(train_lbl, score)
    from pypr.clustering import gmm

    cen_lst, cov_lst, p_k, logL = gmm.em_gm(train_data, max_iter=100, K=3)
    score = [
        -gmm.gm_log_likelihood(
            train_data[i, :], center_list=cen_lst, cov_list=cov_lst, p_k=p_k)
        for i in range(0, train_data.shape[0])
    ]
    #print score
    print metric(train_lbl, score)

    # Marginalize the
    #m_cen_lst, m_cov_lst, m_p_k
    featur_inc = [0, 4]
    marginalize = gmm.marg_dist(featur_inc, cen_lst, cov_lst, p_k)
    score2 = score = [
        -gmm.gm_log_likelihood(train_data[i, featur_inc],
                               center_list=marginalize[0],
Exemplo n.º 14
0

seed(1)
mc = [0.4, 0.4, 0.2]  # Mixing coefficients
centroids = [array([0, 0]), array([3, 3]), array([0, 4])]
ccov = [array([[1, 0.4], [0.4, 1]]), diag((1, 2)), diag((0.4, 0.1))]

# Generate samples from the gaussian mixture model
X = gmm.sample_gaussian_mixture(centroids, ccov, mc, samples=1000)
fig1 = figure()
plot(X[:, 0], X[:, 1], '.')

# Expectation-Maximization of Mixture of Gaussians
cen_lst, cov_lst, p_k, logL = gmm.em_gm(X,
                                        K=3,
                                        max_iter=400,
                                        verbose=True,
                                        iter_call=None)
print "Log likelihood (how well the data fits the model) = ", logL

# Plot the cluster ellipses
for i in range(len(cen_lst)):
    x1, x2 = gmm.gauss_ellipse_2d(cen_lst[i], cov_lst[i])
    plot(x1, x2, 'k', linewidth=2)
title("")
xlabel(r'$x_1$')
ylabel(r'$x_2$')

# Now we will find the conditional distribution of x given y
fig2 = figure()
ax1 = subplot(111)
Exemplo n.º 15
0
""" wall mixture model """
from points import points, N
from wmmlib import *
from pylab import *
import pylab as pl
from copy import copy
import time
import pypr.clustering.gmm as gmm  # download from http://pypr.sourceforge.net/
import argparse

""" settings """
K = 4
print N

""" plot function """
gmm_cen_lst, gmm_cov_lst, p_k, logL = gmm.em_gm(points, K=K)


def debug_output(L, gamma=None, iter_num=None):

    """ check final """
    if gamma != None:
        final = iter_num == None
        if final:
            ioff()
        # else: return

    """ text output """
    if gamma != None:
        print "iteration %d..." % iter_num if not final else "\nFINAL"
Exemplo n.º 16
0
sigma_scl = 0.1
X = np.zeros((samples_pr_cluster * K_orig, D))
for k in range(K_orig):
    mu = np.random.randn(D)
    sigma = np.eye(D) * sigma_scl
    cen_lst.append(mu)
    cov_lst.append(sigma)
mc = np.ones(K_orig) / K_orig  # All clusters equally probable

# Sample from the mixture:
N = 1000
X = gmm.sample_gaussian_mixture(cen_lst, cov_lst, mc, samples=N)

K_range = list(range(2, 10))
runs = 10
bic_table = np.zeros((len(K_range), runs))
for K_idx, K in enumerate(K_range):
    print("Clustering for K=%d" % K)
    for i in range(runs):
        cluster_init_kw = {'cluster_init':'sample', 'max_init_iter':5, \
            'cov_init':'var', 'verbose':True}
        cen_lst, cov_lst, p_k, logL = gmm.em_gm(X, K = K, max_iter = 1000, \
            delta_stop=1e-2, init_kw=cluster_init_kw, verbose=True, max_tries=10)
        bic = stattest.bic_gmm(logL, N, D, K)
        bic_table[K_idx, i] = bic

plot(K_range, bic_table)
xlabel('K')
ylabel('BIC score')
title('True K=%d, dim=%d') % (K_orig, D)
Exemplo n.º 17
0
        for i in range(len(cen_lst)):
            x,y = gmm.gauss_ellipse_2d(cen_lst[i], cov_lst[i])
            plot(x, y, 'k', linewidth=0.5)
    
seed(1)
mc = [0.4, 0.4, 0.2] # Mixing coefficients
centroids = [ array([0,0]), array([3,3]), array([0,4]) ]
ccov = [ array([[1,0.4],[0.4,1]]), diag((1,2)), diag((0.4,0.1)) ]

# Generate samples from the gaussian mixture model
X = gmm.sample_gaussian_mixture(centroids, ccov, mc, samples=1000)
fig1 = figure()
plot(X[:,0], X[:,1], '.')

# Expectation-Maximization of Mixture of Gaussians
cen_lst, cov_lst, p_k, logL = gmm.em_gm(X, K = 3, max_iter = 400,\
    verbose = True, iter_call = iter_plot)
print "Log likelihood (how well the data fits the model) = ", logL

# Plot the cluster ellipses
for i in range(len(cen_lst)):
    x1,x2 = gmm.gauss_ellipse_2d(cen_lst[i], cov_lst[i])
    plot(x1, x2, 'k', linewidth=2)
title(""); xlabel(r'$x_1$'); ylabel(r'$x_2$')

# Now we will find the conditional distribution of x given y
fig2 = figure()
ax1 = subplot(111)
plot(X[:,0], X[:,1], ',')
y = -1.0
axhline(y)
x1plt = np.linspace(axis()[0], axis()[1], 200)