Exemplo n.º 1
0
def test_seeds(X, K):
    print("\n############## KMEAN K=" + str(K) + " ###############")

    mixture0, post0 = common.init(X, K, 0)
    mixture1, post1 = common.init(X, K, 1)
    mixture2, post2 = common.init(X, K, 2)
    mixture3, post3 = common.init(X, K, 3)
    mixture4, post4 = common.init(X, K, 4)

    cost0 = kmeans.run(X, mixture0, post0)[2]
    cost1 = kmeans.run(X, mixture1, post1)[2]
    cost2 = kmeans.run(X, mixture2, post2)[2]
    cost3 = kmeans.run(X, mixture3, post3)[2]
    cost4 = kmeans.run(X, mixture4, post4)[2]

    print("K=" + str(K) + " seed=0 : cost=" + str(cost0))
    print("K=" + str(K) + " seed=1 : cost=" + str(cost1))
    print("K=" + str(K) + " seed=2 : cost=" + str(cost2))
    print("K=" + str(K) + " seed=3 : cost=" + str(cost3))
    print("K=" + str(K) + " seed=4 : cost=" + str(cost4))

    naive_em_estimate0 = naive_em.run(X, mixture0, post0)
    naive_em_estimate1 = naive_em.run(X, mixture1, post1)
    naive_em_estimate2 = naive_em.run(X, mixture2, post2)
    naive_em_estimate3 = naive_em.run(X, mixture3, post3)
    naive_em_estimate4 = naive_em.run(X, mixture4, post4)

    print("K=" + str(K) + " seed=0 : likelihood=" + str(naive_em_estimate0[2]))
    print("K=" + str(K) + " seed=1 : likelihood=" + str(naive_em_estimate1[2]))
    print("K=" + str(K) + " seed=2 : likelihood=" + str(naive_em_estimate2[2]))
    print("K=" + str(K) + " seed=3 : likelihood=" + str(naive_em_estimate3[2]))
    print("K=" + str(K) + " seed=4 : likelihood=" + str(naive_em_estimate4[2]))
Exemplo n.º 2
0
def run_kmeans(X, plot=False):
    """ My solution:
    for i in range(len(K)):
        for j in range(len(seed)):
            mixture, post = common.init(X, K[i], seed[j])
            mixture, post, cost = kmeans.run(X, mixture, post)
            print("K = {}, seed = {}, cost = {}".format(K[i], seed[j], cost))
            if plot:
                common.plot(X, mixture, post, "K={}, seed={}".format(K[i], seed[j]))
    """
    # Instructor's solution:
    for K in range(1, 5):
        min_cost = None
        best_seed = None
        for seed in range(0, 5):
            mixture, post = common.init(X, K, seed)
            mixture, post, cost = kmeans.run(X, mixture, post)
            if min_cost is None or cost < min_cost:
                min_cost = cost
                best_seed = seed

        mixture, post = common.init(X, K, best_seed)
        mixture, post, cost = kmeans.run(X, mixture, post)
        title = "K-means for K=, seed=, cost=".format(K, best_seed, min_cost)
        print(title)
        common.plot(X, mixture, post, title)
def run_kmean(X):
    for K in [1,2,3,4]:
        cost_list = []
        for seed in range(5):
            mixture, post = common.init(X, K, seed)
            mixture, post, cost = kmeans.run(X, mixture, post)
            cost_list.append(cost)
            #common.plot(X, mixture, post, "{} means with seed{}".format(K, seed))
        print("The cost of {} cluster is".format(K), min(cost_list))
        best_seed = np.argmin(cost_list)
        for seed_ in [best_seed]:
            mixture, post = common.init(X, K, int(seed_))
            mixture, post, cost = kmeans.run(X, mixture, post)
            common.plot(X, mixture, post, "{} means with seed{}".format(K, seed_))
    return "Done"
Exemplo n.º 4
0
Arquivo: main.py Projeto: SYYoung/MIT
def run_kmeans():
    for K in range(1, 5):
        min_cost = None
        best_seed = None
        for seed in range(0, 5):
            mixture, post = common.init(X, K, seed)
            mixture, post, cost = kmeans.run(X, mixture, post)
            if min_cost is None or cost < min_cost:
                min_cost = cost
                best_seed = seed

        mixture, post = common.init(X, K, best_seed)
        mixture, post, cost = kmeans.run(X, mixture, post)
        title = "K-means for K={}, seed={} , cost= {}".format(K, best_seed, min_cost)
        common.plot(X, mixture, post, title)
Exemplo n.º 5
0
 def run(self):
     """ Main method that drives Spectral Co-Clustering. """
     self.nfeatures = self.A.shape[0]
     self.ndocs = self.A.shape[1]
     self.logger.debug("Word By Documentmatrix A has dim:(%d,%d)", \
                       self.nfeatures, self.ndocs)
     self.logger.debug("Generating normalized Adjacency Matrix, A_n")
     self.gen_An()
     self.logger.debug("Finding SVD of An")
     un, s, vnt = spla.svd(self.An.todense())
     self.logger.debug('Shape of un (%d,%d)', un.shape[0], un.shape[1])
     vn = vnt.T
     self.logger.debug('Shape of vn (%d,%d)', vn.shape[1], vn.shape[1])
     self.logger.debug("Generating Z matrix")
     self.get_Z(un, vn)
     data = (self.Z.T).tocsc()
     kmeans = kmeans.KMeans(data, self.k, self.n, self.delta, self.rc, \
                            self.cl, self.verbose)
     result = kmeans.run()
     self.centroids = result['centroids']
     self.centroid_dict = result['centroiddict']
     self.clusters = result['clusters']
     self.cluster_dict = self._get_cluster_dict()
     self.logger.debug('Number of co-clusters produced: %d', \
                                                     len(self.clusters))
     return {'centroids' : self.centroids, \
             'centroiddict' : self.centroid_dict, \
             'clusters' : self.clusters, \
             'clusterdict' : self.cluster_dict}
Exemplo n.º 6
0
 def run(self):
     """ Main method that drives Spectral Co-Clustering. """
     self.nfeatures = self.A.shape[0]
     self.ndocs = self.A.shape[1]
     self.logger.debug("Word By Documentmatrix A has dim:(%d,%d)", \
                       self.nfeatures, self.ndocs)
     self.logger.debug("Generating normalized Adjacency Matrix, A_n")
     self.gen_An()
     self.logger.debug("Finding SVD of An")
     un, s, vnt = spla.svd(self.An.todense())
     self.logger.debug('Shape of un (%d,%d)', un.shape[0], un.shape[1])
     vn = vnt.T
     self.logger.debug('Shape of vn (%d,%d)', vn.shape[1], vn.shape[1])
     self.logger.debug("Generating Z matrix")
     self.get_Z(un, vn)
     data = (self.Z.T).tocsc()
     kmeans = kmeans.KMeans(data, self.k, self.n, self.delta, self.rc, \
                            self.cl, self.verbose)
     result = kmeans.run()
     self.centroids = result['centroids']
     self.centroid_dict = result['centroiddict']
     self.clusters = result['clusters']
     self.cluster_dict = self._get_cluster_dict()
     self.logger.debug('Number of co-clusters produced: %d', \
                                                     len(self.clusters))
     return {'centroids' : self.centroids, \
             'centroiddict' : self.centroid_dict, \
             'clusters' : self.clusters, \
             'clusterdict' : self.cluster_dict}
Exemplo n.º 7
0
def flores_clustering_data_set_run():
    data_set = arff.load(open('./data/flores_clustering.arff'))
    k = 3
    results = kmeans.run(data_set['data'], k)
    print "Centroids"
    print results[0]
    print "Clusters"
    print results[1]
    print "Resolved in " + str(results[2]) + " iterations"
Exemplo n.º 8
0
def clusteredThreeD():
    m = 200  # sample size
    n = 3  # number of features
    K = 4  # number of clusters
    X = np.zeros((0, n))
    centersOfMass = np.random.uniform(0, 100, (K, n))
    for i in centersOfMass:
        stdDev = 12
        samples = np.random.normal(i, stdDev, (int(m / K), n))
        X = np.append(X, samples, axis=0)
    clusterings = kmeans.run(X, K)
Exemplo n.º 9
0
def test_kmeans():
    for k in [1, 2, 3, 4]:
        para_list = []
        for seed in [0, 1, 2, 3, 4]:
            gm, post = common.init(X, k, seed)
            mixture, p, cost = kmeans.run(X, gm, post)
            para_list.append((mixture, p, cost))
        max_para = max(para_list, key=lambda x: x[2])
        common.plot(X, max_para[0], max_para[1],
                    'Kmeans on toy data with {k}'.format(k=k))
    return max_para[0], max_para[1]
    def execute(trial = False):
        startTime = datetime.datetime.now()

        # Setup and connect to mongo
        client = dml.pymongo.MongoClient()
        repo = client.repo
        repo.authenticate(team_name, team_name)

        # Get all data needed
        accidents_data = [doc for doc in repo[accidents_collection].find()]

        if trial:
            # take 200 random records if in trial mode
            accidents_data = random.sample(accidents_data, 200)

        # list of all coordinate tuples
        P = [(doc['location']['coordinates'][0], doc['location']['coordinates'][1]) for doc in accidents_data]

        # Compute min and max coordinates
        minX = accidents_data[0]['location']['coordinates'][0]
        maxX = accidents_data[0]['location']['coordinates'][0]
        minY = accidents_data[0]['location']['coordinates'][1]
        maxY = accidents_data[0]['location']['coordinates'][1]

        for doc in accidents_data:
            if doc['location']['coordinates'][0] < minX:
                minX = doc['location']['coordinates'][0]
            if doc['location']['coordinates'][0] > maxX:
                maxX = doc['location']['coordinates'][0]
            if doc['location']['coordinates'][1] < minY:
                minY = doc['location']['coordinates'][1]
            if doc['location']['coordinates'][1] > maxY:
                maxY = doc['location']['coordinates'][1]

        # starting point
        M = [(minX, minY), (maxX, maxY)]

        # Run the algorithm
        clusters = kmeans.run(M,P)
        print("Final clusters:", clusters)

        # Save results to DB
        repo.dropPermanent(clusters_collection)
        repo.createPermanent(clusters_collection)
        doc = [{"loc": [x, y]} for (x, y) in clusters]
        repo[clusters_collection].insert_many(doc)

        # Wrap up..
        repo.logout()
        endTime = datetime.datetime.now()
        return {"start":startTime, "end":endTime}
Exemplo n.º 11
0
# Instantiate list to hold evaluation metrics over different values of k
precision = []
max_precision = []
min_precision = []
std_precision = []
recall = []
max_recall = []
min_recall = []
std_recall = []
fscore = []
max_fscore = []
min_fscore = []
std_fscore = []
RI = []
max_RI = []
min_RI = []
std_RI = []
epoch = []
max_epoch = []
min_epoch = []

# Train our classifier for all values of k

print("Running algorithm with k = " + str(k) + "\n")

# Run k-Means algorithm
precisions, recalls, fscores, ris, epochs = kmeans.run(train_data, classes, 4,
                                                       n_runs,
                                                       distance_measure)
Exemplo n.º 12
0
#%%
import numpy as np
import kmeans
import common
import naive_em
import em

#%%
X = np.loadtxt("toy_data.txt")
for K in range(1,5):
    for seed in range(0,5):

        title = "K=" + str(K) + ", seed=" + str(seed)

        M, P = common.init(X, K, seed)
        cost = kmeans.run(X, M, P)
        print(title, cost[2])

#common.plot(X, M, P, title)

# %%
Exemplo n.º 13
0
#23 : 101928.961581
#24 : 105803.434798
#25 : 106071.097392
#26 : 108282.084023
#27 : 105008.134663
#28 : 105096.342568
#29 : 102076.680087
#30 : 106594.176483

#From this data it is clear to see that after k=5 there is no significant gain
#for increasing values of k. Thus k=5 is probably the best fit for our data. 

import kmeans
import sys
def test(iter = 20, k = 30)
try:
    iterations = int(sys.argv[1])
    numk = int(sys.argv[2])+1
except ValueError:
    print >> sys.stderr, "Invalid input, usage python testkmeans.py [#iters] [#ks]"
    sys.exit(1)

ks = [sys.maxint] * (numk)
for k in range(1,numk):
    for i in range(iterations):
        newk = kmeans.run(k, True)
        if newk < ks[k]:
            ks[k] = newk
for i, k in enumerate(ks[1:], 1):
    print i, ":", k
Exemplo n.º 14
0
import kmeans

file = open('models/starspace.txt')
X = []

for i, line in enumerate(file):
    should_continue = i < 4 or i % 2 != 0

    if should_continue:
        continue
    
    vector = [float(chunk) for chunk in line.split()]

    X.append(vector)

kmeans.run(X)
Exemplo n.º 15
0
                c='#00CED1')
    plt.scatter(y2[:, 0], y2[:, 1], c='#00CED1', linewidths=line3)
    plt.legend(('points', 'centers', 'membership grade'))
    plt.title('u of No.2 center')

    plt.subplot(133)
    plt.plot(x, fcm_distance[1:], c='black')
    plt.title('Cumulative distance')
    name = 'fig' + str(m)

    plt.savefig(name)
    plt.show()


n_samples = 50
#centerbox= [(-5,0),(5,0)]
#point,_ = make_blobs(n_samples=100, n_features=2, cluster_std=1.6,center_box=centerbox, shuffle=False, random_state=42)
point = np.zeros((n_samples, 2))
for i in range(25):
    point[i][0] = random.randint(0, 45)
    point[-i][0] = random.randint(55, 100)

run(2, point)
run(3, point)
run(4, point)
run(5, point)
run(10, point)
run(100, point)

kmeans.run(2, point)
Exemplo n.º 16
0
import json

from kmeans import Point, run

if __name__ == "__main__":
    sortedPoints = lambda ps: sorted(ps, key=lambda p: (p.x, p.y))
    with open("../points.json") as f:
        points = map(lambda x: Point(x[0], x[1]), json.loads(f.read()))
    result = run(points, 10)
    for k in sortedPoints(result.keys()):
        print "==\n# %s #" % k
        print '\n'.join("  " + str(p) for p in sortedPoints(result[k]))
Exemplo n.º 17
0
import common
import naive_em
import em

X = np.loadtxt("toy_data.txt")

######### Section 2: K-means ############
print("******* Section 2 *******\n ")
K = [1, 2, 3, 4]
seeds = [0, 1, 2, 3, 4]

costs_kMeans = [0, 0, 0, 0, 0]

for k in range(len(K)):
    for i in range(len(seeds)):
        _, _, costs_kMeans[i] = kmeans.run(X, *common.init(X, K[k], seeds[i]))

    print("----- Clusters", k + 1, " -----")
    print("Lowest cost: ", np.min(costs_kMeans))
    print("Best seed: ", np.argmin(costs_kMeans))

print("******* End of section 2 *******\n ")

######### Section 4: Comparing K-means and EM ############
print("******* Section 4 *******\n ")
costs_EM = [0, 0, 0, 0, 0]
mixtures_EM = [0, 0, 0, 0, 0]  # Mixtures for best seed
bic = [0., 0., 0., 0.]  # BIC for best cluster

for k in range(len(K)):
    for i in range(len(seeds)):
Exemplo n.º 18
0
def uniformTwoD():
    m = 100  # sample size
    n = 2  # number of features
    K = 3  # number of clusters
    X = np.random.uniform(0, 100, (m, n))
    clusterings = kmeans.run(X, K)
Exemplo n.º 19
0
    plt.show()


X = np.loadtxt("toy_data.txt")
K = [1, 2, 3, 4]
# TODO: Your code here
costs = []
loglikelihoods = []
bics = []
for k in K:
    cost_seeds_ = []
    log_likelihood_ = []
    bic_ = []
    for seed in range(4):
        gauss_mixture, post = common.init(X=X, K=k, seed=seed)
        gauss_mixture_kmeans, post_kmeans, cost = kmeans.run(
            X=X, mixture=gauss_mixture, post=post)
        #print('for k =',k, "and seed=",seed, end=" ")
        #print("cost=",cost)
        gauss_mixture_em, post_em, loglikelihood = naive_em.run(
            X, gauss_mixture, post)
        bic_.append(common.bic(X, gauss_mixture_em, loglikelihood))
        log_likelihood_.append(loglikelihood)
        cost_seeds_.append(cost)
#        plot_points(X,post_kmeans,
#                    title="kmeans with k:"+str(k)+" seed:"+str(seed))
#        plot_points(X,post_em,
#                    title="em with k:"+str(k)+" seed:"+str(seed))
    bics.append(bic_)
    costs.append(cost_seeds_)
    loglikelihoods.append(log_likelihood_)
Exemplo n.º 20
0
try:
	import kmeans
	import common
	import naive_em
	import em
except ModuleNotFoundError:
	import FromLinearModelsToDeepLearning.unit_4.netflix.kmeans as kmeans
	import FromLinearModelsToDeepLearning.unit_4.netflix.common as common
	import FromLinearModelsToDeepLearning.unit_4.netflix.naive_em as naive_em
	import FromLinearModelsToDeepLearning.unit_4.netflix.em as em

X = np.loadtxt(r'C:\Users\sam\Documents\Trainings\FromLinearModelsToDeepLearning\FromLinearModelsToDeepLearning\unit_4\netflix\toy_data.txt')

seeds = [0,1,2,3,4]
mixture, post = common.init(X, 4, 0)
mixture, post, cost = kmeans.run(X,mixture, post )
ks = [1,2,3,4]

from collections import namedtuple
results = namedtuple('results', 'k seed cost')
costs =[]
for k in ks:
	for seed in seeds:
		mixture, post = common.init(X, k, seed)
		mixture, post, cost = kmeans.run(X, mixture, post)
		r = results(k,seed,cost)
		costs.append(r)
		print(r)

def get_best_cost_for_k(costs,k):
	best_cost = np.float('inf')
Exemplo n.º 21
0
bestseed_EM = [0, 0, 0, 0]

#Mixture for Best Seed for Algo
mixture_kmeans = [0, 0, 0, 0, 0]
mixture_EM = [0, 0, 0, 0, 0]

# Posterior probs. for best seeds
post_kmeans = [0, 0, 0, 0, 0]
post_EM = [0, 0, 0, 0, 0]

# BIC score of cluster
bic = [0., 0., 0., 0.]

for k in range(len(K)):
    for i in range(len(seeds)):
        mixture_kmeans[i], post_kmeans[i], cost_kmeans[i] = kmeans.run(
            X, *common.init(X, K[k], seeds[i]))
        mixture_EM[i], post_EM[i], cost_EM[i] = naive_em.run(
            X, *common.init(X, K[k], seeds[i]))

    print("=============== Clusters:", k + 1, "======================")
    print("Lowest cost using kMeans is:", np.min(cost_kmeans))
    print("Lowest cost using EM is:", np.max(cost_EM))

    #Save best seed for plotting
    bestseed_kmeans[k] = np.argmin(cost_kmeans)
    bestseed_EM[k] = np.argmax(cost_EM)

    common.plot(X,
                mixture_kmeans[bestseed_kmeans[k]],
                post_kmeans[bestseed_kmeans[k]],
                title="kmeans")
Exemplo n.º 22
0
def k_means_function(X, K, seed):
    init_model = common.init(X, K, seed)
    mixture, post, cost = kmeans.run(X, init_model[0], init_model[1])
    return mixture, post, cost
Exemplo n.º 23
0
import common
import naive_em
import em

X = np.loadtxt("toy_data.txt")

Ks = [1, 2, 3, 4]
seeds = [0, 1, 2, 3, 4]
BICs = np.empty(len(Ks))

for i, K in enumerate(Ks):
    k_best_mix, k_best_post, k_best_cost = None, None, np.inf
    em_best_mix, em_best_post, em_best_ll = None, None, -np.inf
    for seed in seeds:
        init_mix, init_post = common.init(X, K, seed)
        k_mix, k_post, k_cost = kmeans.run(X, init_mix, init_post)
        em_mix, em_post, em_ll = naive_em.run(X, init_mix, init_post)
        if k_cost < k_best_cost:
            k_best_mix, k_best_post, k_best_cost = k_mix, k_post, k_cost
        if em_ll > em_best_ll:
            em_best_mix, em_best_post, em_best_ll = em_mix, em_post, em_ll
    BICs[i] = common.bic(X, em_best_mix, em_best_ll)
    common.plot(X, k_best_mix, k_best_post, "K-means K={}".format(K))
    common.plot(X, em_best_mix, em_best_post, "EM K={}".format(K))

print("BICs: ", BICs)
print("Best BIC: ", np.max(BICs))
print("Best K: ", Ks[np.argmax(BICs)])

X = np.loadtxt("netflix_incomplete.txt")
Exemplo n.º 24
0
import numpy as np
import kmeans
import common
import naive_em
import em

X = np.loadtxt("toy_data.txt")

# TODO: Your code here
for i in range(1, 5):
    costs = []
    for j in range(5):
        mixture, post = common.init(X, i, j)
        _, _, cost = kmeans.run(X, mixture, 0)
        costs.append(cost)
        common.plot(X, mixture, post, 'test')
    print(min(costs))
Exemplo n.º 25
0
import numpy as np
import kmeans
import common
import naive_em
import em

X = np.loadtxt("datas/toy_data.txt")

K = [1, 2, 3, 4]
seeds = [0, 1, 2, 3, 4]

for k in K:
    KM_best_mixture, KM_best_post, KM_best_cost = None, None, np.inf
    EM_best_mixture, EM_best_post, EM_best_logvrais = None, None, -np.inf
    for seed in seeds:
        init_mixture, init_post = common.init(X, k, seed)
        # Modèle KMeans
        KM_mixture, KM_post, KM_cost = kmeans.run(X, init_mixture, init_post)
        if KM_cost < KM_best_cost:
            KM_best_mixture, KM_best_post, KM_best_cost = KM_mixture, KM_post, KM_cost
        # Modèle EM
        EM_mixture, EM_post, EM_logvrais = naive_em.run(X, init_mixture, init_post)
        if EM_logvrais > EM_best_logvrais:
            EM_best_mixture, EM_best_post, EM_best_logvrais = EM_mixture, EM_post, EM_logvrais
    common.plot(X, KM_best_mixture, KM_best_post, f"K-means K={k}")
    common.plot(X, EM_best_mixture, EM_best_post, f"EM K={k}")
Exemplo n.º 26
0
import clean
import kmeans
import merge
import zoning

exec(open('../pymongo_dm.py').read())

# connect to DBMS

print("Connecting to the DBMS...")

client = pymongo.MongoClient()
repo   = client.repo
repo.authenticate('djmcc_jasper', 'djmcc_jasper')

# execute scripts

reset.run(repo)
get.run(repo)
clean.run(repo)
merge.run(repo)
kmeans.run(repo)
zoning.run(repo)

# disconnect from the DBMS

print("Disconnecting from the DBMS...")

repo.logout()

# EOF
Exemplo n.º 27
0
mixtures_kMeans = [0, 0, 0, 0, 0]
mixtures_EM = [0, 0, 0, 0, 0]

# Posterior probs. for best seeds
posts_kMeans = [0, 0, 0, 0, 0]
posts_EM = [0, 0, 0, 0, 0]

# BIC score of cluster
bic = [0., 0., 0., 0.]

for k in range(len(K)):
    for i in range(len(seeds)):

        # Run kMeans
        mixtures_kMeans[i], posts_kMeans[i], costs_kMeans[i] = \
        kmeans.run(X, *common.init(X, K[k], seeds[i]))

        # Run Naive EM
        mixtures_EM[i], posts_EM[i], costs_EM[i] = \
        naive_em.run(X, *common.init(X, K[k], seeds[i]))

    # Print lowest cost
    print("=============== Clusters:", k + 1, "======================")
    print("Lowest cost using kMeans is:", np.min(costs_kMeans))
    print("Highest log likelihood using EM is:", np.max(costs_EM))

    # Save best seed for plotting
    best_seed_kMeans[k] = np.argmin(costs_kMeans)
    best_seed_EM[k] = np.argmax(costs_EM)

    # Plot kMeans and EM results
Exemplo n.º 28
0
import naive_em
import em
from scipy.stats import multivariate_normal

X = np.loadtxt("toy_data.txt")
Ks = [1, 2, 3, 4]
seeds = [0, 1, 2, 3, 4]

# =============================================================================
# 2. K-means
# =============================================================================

for K in Ks:
    for seed in seeds:
        mixture, post = common.init(X, K, seed=seed)  # Initialize K-means
        mixture, post, cost = kmeans.run(X, mixture, post)  # K-means
        common.plot(X, mixture, post, [K, seed])  # Plot initialization
        print(cost)

# =============================================================================
# 3. Expectation–maximization algorithm
# =============================================================================


def test_2dgaussian_pdf(X, mu, var):
    y1 = naive_em.pdf_2dgaussian(X, mu, var)
    y2 = multivariate_normal.pdf(X, mean=mu.reshape(2, ), cov=var[0])
    return all(y1 - y2) < 1e-6


# 2dgaussian
Exemplo n.º 29
0
print('\n----- K-Means Algorithm -----\n')

seeds = [0, 1, 2, 3, 4]
K = [1, 2, 3, 4]

for k in K:
    mixtures = []
    posts = []
    costs = np.empty(len(seeds))

    for i, seed in enumerate(seeds):
        # initialize mixture model with random points
        mixture, post = common.init(X, K=k, seed=seed)

        # run k-means
        mixture, post, cost = kmeans.run(X, mixture=mixture, post=post)

        mixtures.append(mixture)
        posts.append(post)
        costs[i] = cost

    best_seed = np.argmin(costs)
    cost = costs[best_seed]
    mixture = mixtures[best_seed]
    post = posts[best_seed]

    print(f'K={k}', f'Best seed: {best_seed}', f'Cost: {cost}')
    #common.plot(X, mixture, post, title=f"K-Means, K={k}")


# -----------------------------------
import numpy as np
import kmeans
import common
import naive_em
import em

X = np.loadtxt("toy_data.txt")

# TODO: Your code here
K = np.array([1, 2, 3, 4])
seeds = np.array([0, 1, 2, 3, 4])
for i in seeds:
    mixture = common.init(X, K[3], i)[0]
    post = common.init(X, K[3], i)[1]
    [mixture, post, cost] = kmeans.run(X, mixture, post)
#    common.plot(X, mixture, post)