Пример #1
0
        def func(alpha, nClusters=nClusters):
            alpha1, alpha2 = alpha[:self.nVars], alpha[self.nVars:]
            alpha1 = alpha1.reshape((self.nVars, 1))
            alpha2 = alpha2.reshape(self.nVars, 1)
            alpha = numpy.append(alpha1, alpha2, axis=1)
            smpl = numpy.dot(self.Z, alpha)
            smpl = orange.ExampleTable(smpl)
            km1 = orngClustering.KMeans(smpl, centroids=nClusters)
            km2 = orngClustering.KMeans(smpl, centroids=nClusters)

            score = (self.totalSize - nClusters -
                     1) * (km1.score - km2.score) / (km2.score)
            return -score
Пример #2
0
 def runOptimization(self):
     if self.optimizationTo > len(set(self.data)):
         self.error("Not enough unique data instances (%d) for given number of clusters (%d)." % \
                    (len(set(self.data)), self.optimizationTo))
         return
     
     random.seed(0)
     try:
         self.progressBarInit()
         Ks = list(range(self.optimizationFrom, self.optimizationTo + 1))
         self.optimizationRun =[(k, orngClustering.KMeans(
                 self.data,
                 centroids = k,
                 minscorechange=0,
                 nstart = self.restarts,
                 initialization = self.initializations[self.initializationType][1],
                 distance = self.distanceMeasures[self.distanceMeasure][1],
                 scoring = self.scoringMethods[self.scoring][1],
                 inner_callback = lambda val: self.progressBarSet(min(self.progressEstimate(val)/len(Ks) + k * 100.0 / len(Ks), 100.0))
                 )) for k in Ks]
         self.progressBarFinished()
         self.bestRun = (min if getattr(self.scoringMethods[self.scoring][1], "minimize", False) else max)(self.optimizationRun, key=lambda k_run: k_run[1].score)
         self.showResults()
         self.sendData()
     except Exception as ex:
         self.error(0, "An error occured while running optimization. Reason: " + str(ex))
         raise
Пример #3
0
    def runOptimization(self):
        if self.optimizationTo > len(set(self.data)):
            self.error("Not enough unique data instances (%d) for given number of clusters (%d)." % \
                       (len(set(self.data)), self.optimizationTo))
            return

        random.seed(0)
        try:
            self.progressBarInit()
            Ks = range(self.optimizationFrom, self.optimizationTo + 1)
            outer_callback_count = len(Ks) * self.restarts
            outer_callback_state = {"restart": 0}
            optimizationRun = []
            for k in Ks:

                def outer_progress(km):
                    outer_callback_state["restart"] += 1
                    self.progressBarSet(100.0 *
                                        outer_callback_state["restart"] /
                                        outer_callback_count)

                def inner_progress(km):
                    estimate = self.progressEstimate(km)
                    self.progressBarSet(min(estimate / outer_callback_count + \
                                            outer_callback_state["restart"] * \
                                            100.0 / outer_callback_count,
                                            100.0))

                kmeans = orngClustering.KMeans(
                    self.data,
                    centroids=k,
                    minscorechange=0,
                    nstart=self.restarts,
                    initialization=self.initializations[
                        self.initializationType][1],
                    distance=self.distanceMeasures[self.distanceMeasure][1],
                    scoring=self.scoringMethods[self.scoring][1],
                    outer_callback=outer_progress,
                    inner_callback=inner_progress)
                optimizationRun.append((k, kmeans))

                if self.restarts == 1:
                    outer_progress(None)

            self.optimizationRun = optimizationRun
            self.progressBarFinished()
            self.bestRun = (min if getattr(
                self.scoringMethods[self.scoring][1], "minimize", False) else
                            max)(self.optimizationRun,
                                 key=lambda (k, run): run.score)
            self.showResults()
            self.sendData()
        except Exception, ex:
            self.error(
                0, "An error occured while running optimization. Reason: " +
                str(ex))
            raise
Пример #4
0
 def func(alpha, nClusters=nClusters):
     alpha1, alpha2 = alpha[:self.nVars], alpha[self.nVars:]
     alpha1 = alpha1.reshape((self.nVars, 1))
     alpha2 = alpha2.reshape(self.nVars, 1)
     alpha = numpy.append(alpha1, alpha2, axis=1)
     smpl = numpy.dot(self.Z, alpha)
     smpl = orange.ExampleTable(smpl)
     km = orngClustering.KMeans(smpl, centroids=nClusters)
     score = orngClustering.score_silhouette(km)
     return -score
Пример #5
0
    def cluster(self):
        if self.K > len(set(self.data)):
            self.error("Not enough unique data instances (%d) for given number of clusters (%d)." % \
                       (len(set(self.data)), self.K))
            return
        random.seed(0)

        self.km = orngClustering.KMeans(
            centroids=self.K,
            minscorechange=0,
            nstart=self.restarts,
            initialization=self.initializations[self.initializationType][1],
            distance=self.distanceMeasures[self.distanceMeasure][1],
            scoring=self.scoringMethods[self.scoring][1],
            inner_callback=self.clusterCallback,
        )
        self.progressBarInit()
        self.km(self.data)
        self.sendData()
        self.progressBarFinished()
Пример #6
0
import orange
import orngClustering
import random

data_names = ["iris.tab", "housing.tab", "vehicle.tab"]
data_sets = [orange.ExampleTable(name) for name in data_names]

print "%10s %3s %3s %3s" % ("", "Rnd", "Div", "HC")
for data, name in zip(data_sets, data_names):
    random.seed(42)
    km_random = orngClustering.KMeans(data, centroids=3)
    km_diversity = orngClustering.KMeans(data, centroids=3, \
        initialization=orngClustering.kmeans_init_diversity)
    km_hc = orngClustering.KMeans(data, centroids=3, \
        initialization=orngClustering.KMeans_init_hierarchicalClustering(n=100))
    print "%10s %3d %3d %3d" % (name, km_random.iteration,
                                km_diversity.iteration, km_hc.iteration)
Пример #7
0
import orange
import orngClustering

import random
random.seed(42)

data = orange.ExampleTable("iris")
km = orngClustering.KMeans(data, 3)
print km.clusters[-10:]

Пример #8
0
import orange
import orngClustering
import random

data = orange.ExampleTable("iris")
# data = orange.ExampleTable("lung-cancer")

bestscore = 0
for k in range(2, 10):
    random.seed(42)
    km = orngClustering.KMeans(
        data,
        k,
        initialization=orngClustering.KMeans_init_hierarchicalClustering(n=50),
        nstart=10)
    score = orngClustering.score_silhouette(km)
    print "%d: %.3f" % (k, score)
    if score > bestscore:
        best_km = km
        bestscore = score

orngClustering.plot_silhouette(best_km, filename='tmp.png')
Пример #9
0
import orange
import orngClustering

data = orange.ExampleTable("voting")
# data = orange.ExampleTable("iris")
for k in range(2, 5):
    km = orngClustering.KMeans(
        data, k, initialization=orngClustering.kmeans_init_diversity)
    score = orngClustering.score_silhouette(km)
    print k, score

km = orngClustering.KMeans(data,
                           3,
                           initialization=orngClustering.kmeans_init_diversity)
orngClustering.plot_silhouette(km, "kmeans-silhouette.png")
import orange
import orngClustering

import random
random.seed(42)


def callback(km):
    print "Iteration: %d, changes: %d, score: %.4f" % (km.iteration,
                                                       km.nchanges, km.score)


data = orange.ExampleTable("iris")
km = orngClustering.KMeans(data, 3, minscorechange=0, inner_callback=callback)
Пример #11
0
    xc = [float(d[attx]) for d in km.centroids]
    yc = [float(d[atty]) for d in km.centroids]
    pylab.scatter(xc, yc, marker="x", c="k", s=200)

    pylab.xlabel(attx)
    pylab.ylabel(atty)
    if title:
        pylab.title(title)
    pylab.savefig("%s-%03d.png" % (filename, km.iteration))
    pylab.close()


def in_callback(km):
    print "Iteration: %d, changes: %d, score: %8.6f" % (km.iteration,
                                                        km.nchanges, km.score)
    plot_scatter(data,
                 km,
                 "petal width",
                 "petal length",
                 title="Iteration %d" % km.iteration)


data = orange.ExampleTable("iris")
random.seed(42)
km = orngClustering.KMeans(data,
                           3,
                           minscorechange=0,
                           maxiters=10,
                           inner_callback=in_callback)