Exemplo n.º 1
0
def kmeans(d,points=3,axis=0,iter=20):
    def init_centroids(dok_vecs):
        centroids = []
        # 1a. choose an initial center c_1 uniformly at random X
        centroids.append(dok_vecs.values()[0])
        
        # 1b. choose the next ceter c_i selecting c_i = x' in X w/probablity D(x')^2/\sum_{x in X} D(x)
        # 1c. repeat step 1b until i == k 
        items = []
        for i in range(1,points):
            for w,v in dok_vecs.items():
                temp = [ dok.squaredistance(v,c) for c in centroids ]
                temp = [ x for x in temp if temp > 0 ]
                items += [(w,min(temp))]
            centroids.append(dok_vecs[dok.weighted_choice_bisect_compile(items)()])
        return centroids

    dok_vecs = d.tovecs(axis)
    centroids = init_centroids(dok_vecs)
    #centroidsを更新する。
    for i in range(iter):
        clusters = []
        for i in range(points):
            clusters += [[]]
        for v in dok_vecs.values():
            temp = [ (dok.squaredistance(v,centroids[i]),i) for i in range(points) ]
            clusters[min(temp)[1]] += [v]
        for i in range(points):
            if len(clusters[i]) > 1:
                centroids[i] = dok.mean(clusters[i])
            else:
                centroids[i] = centroids[i]
    return centroids,clusters
Exemplo n.º 2
0
 def init_centroids(dok_vecs):
     centroids = []
     # 1a. choose an initial center c_1 uniformly at random X
     centroids.append(dok_vecs.values()[0])
     
     # 1b. choose the next ceter c_i selecting c_i = x' in X w/probablity D(x')^2/\sum_{x in X} D(x)
     # 1c. repeat step 1b until i == k 
     items = []
     for i in range(1,points):
         for w,v in dok_vecs.items():
             temp = [ dok.squaredistance(v,c) for c in centroids ]
             temp = [ x for x in temp if temp > 0 ]
             items += [(w,min(temp))]
         centroids.append(dok_vecs[dok.weighted_choice_bisect_compile(items)()])
     return centroids