예제 #1
0
def kmeansTest(k=2, n=20, verbose=False):
    random.seed(0)
    xMean = 3
    xSD = 1
    yMean = 5
    ySD = 1
    
    d1Samples = util.genDistribution(xMean, xSD, yMean, ySD, n, '1.')
    d2Samples = util.genDistribution(xMean+3, xSD, yMean+1, ySD, n, '2.')
    allSamples = d1Samples + d2Samples
    
    print("before clustering")
    util.plot_cluster([cluster.Cluster(allSamples)])
    
    print("after clustering")
    clusters = kmeans(allSamples, k, verbose)
    util.plot_cluster(clusters, verbose)
    
    print('Final result')
    for c in clusters:
        print('', c)    
예제 #2
0
    #### Implement the centroid updating function here!
    def update(self, samples):
        """Replace the samples in the cluster by new samples
           Return: how much the centroid has changed"""
        return helper.update(self, samples)

    def __str__(self):
        names = []
        for e in self.samples:
            names.append(e.getName())
        names.sort()
        result = 'Cluster with centroid '\
                 + str(self.centroid.getFeatures()) + ' contains:\n  '
        for e in names:
            result = result + e + ', '
        return result[:-2]


if __name__ == "__main__":
    test_samples = util.genDistribution()
    c = Cluster(test_samples)
    print(c.centroid)
    print("cluster center: ", c.centroid.features)
    util.plot_cluster([c])

    # now assign the cluster new samples, and move it
    test_samples2 = util.genDistribution(1, 1, 1, 1, 30)
    diff = c.update(test_samples2)
    print("center moved: ", diff)
    # plot_cluster expects an array of cluster...
    util.plot_cluster([c])
예제 #3
0
        all_cluster.append(cl.Cluster(tmp_data))
        data += tmp_data

    def onclick(event):
        # Creating a new point and finding the k nearest neighbours
        new = sample.Sample('', [event.xdata, event.ydata], '')
        knn(new, data, K)

        # draw the new point
        data.append(new)
        pylab.scatter([new.getFeatures()[0]], \
                      [new.getFeatures()[1]], \
                      label = new.getLabel(), \
                      marker = util.make_cmarkers()[LABELS.index(new.getLabel())], \
                      color = util.make_cmap()[LABELS.index(new.getLabel())])
        pylab.draw()

    # start plotting
    fig = pylab.figure()
    cid = fig.canvas.mpl_connect('button_press_event', onclick)
    util.plot_cluster(all_cluster, centroid=False)
    pylab.show()

    # new_pt = sample.Sample('', [0.2, 0.3], '')
    # knn(new_pt, data, K)
    #
    # data.append(new_pt)
    # print("\nafter....")
    # util.plotSamples(data)
    # plt.show()
    pass
    """
    fill the array allSamples to hold the samples, each sample
    takes two attributes of an iris instance
    """
    for line in data:
        content = line.strip().split(",")
        d = sample.Sample('', [float(content[1]), float(content[3])])
        allSamples.append(d)
    # ---- end of your code --- #

    verbose = False
    k = 3
    print("before clustering")
    unclustered = [kmeans.Cluster(allSamples)]
    util.plot_cluster(unclustered)

    clusters = unclustered
    print("after clustering")
    # IMPLEMENTATION: apply k means to cluster the samples
    # ---- start your code ---- #
    pass
    clusters = kmeans.kmeans(allSamples, 3, verbose)

    # ---- end of your code --- #

    util.plot_cluster(clusters, verbose)
    """ bonus """
    normalized_allSamples = allSamples
    print("after normalizing")