Exemplo n.º 1
0
def tuneTsneBisecting(createOutput):
    numTrials = 5
    X = common.readDigitsFile()
    print("Shape of Digits file  = " + str(X.shape))
    print("Starting t-SNE.")
    #pca = PCA(n_components=50)
    #X_temp = pca.fit_transform(X)
    tsne = TSNE(n_components=2,
                init='random',
                random_state=0,
                perplexity=35,
                learning_rate=400,
                n_iter=2000)
    X_new = tsne.fit_transform(X)
    #X_new = digitutil.preprocessDownsampling(X)
    print("Finished t-SNE, and got X_new. Shape = " + str(X_new.shape))
    #print("First 10 rows are: ")
    #print(X_new[0:10, :])

    #numTrials = 10
    #X_normalized = normalize(X, norm='l2', axis=0)
    distanceFunc = common.euclideanDistanceFunction
    centerFunc = common.digitsDataCenterFunction
    clusterLabels = np.array(list(range(1, 11)), dtype=np.int8)
    bestAssignments = None
    bestErrorTotal = None
    for z in range(numTrials):

        model = kmclustering.BisectingKMeansClusteringModel(
            distanceFunc, centerFunc, 3)
        model.runBisectingKMeansClustering(X_new, clusterLabels,
                                           kminit.initKMeansSampling)
        #model = kmclustering.BasicKMeansClusteringModel(distanceFunc, centerFunc, 100)
        #initFunc = lambda q, z: kminit.initKMeansSampling(q, z)
        #assignments, centers = kminit.initKMeansSampling(X_normalized, clusterLabels, distanceFunc)
        #model.runBasicKMeansClustering(X_new, clusterLabels, kminit.initKMeansSampling)

        print("====== Done with Trial # " + str(z + 1) + " / " +
              str(numTrials) + ", Error Total = " +
              str(model.finalClusterErrorTotal) + " ======")
        #print("Assignments = " + str(model.finalClusterAssignments))
        #print("Centers = " + str(model.finalClusterCenters))
        #print("Error Map = " + str(model.finalClusterErrorMap))
        #print()

        if bestErrorTotal is None or model.finalClusterErrorTotal < bestErrorTotal:
            bestErrorTotal = model.finalClusterErrorTotal
            bestAssignments = model.finalClusterAssignments
            #print("Improved total SSE! New Value = " + str(bestErrorTotal))

    print("Finished K-Means with t-SNE, P=. Best Error Total = " +
          str(bestErrorTotal))
    #print("Best Assignments = " + str(bestAssignments))
    if createOutput:
        common.writeResultsFile(bestAssignments)
Exemplo n.º 2
0
def chartClusterErrorVsClusterCount():
    X = common.readDigitsFile()
    X = X / 255.0
    print("Shape of Digits file  = " + str(X.shape))
    print("Starting t-SNE.")
    tsne = TSNE(n_components=2,
                init='random',
                random_state=0,
                perplexity=100,
                learning_rate=400,
                n_iter=2000)
    X_new = tsne.fit_transform(X)
    print("Finished t-SNE, and got X_new. Shape = " + str(X_new.shape))

    distanceFunc = common.euclideanDistanceFunction
    centerFunc = common.digitsDataCenterFunction
    numTrials = 5
    errorMapForClusterSize = {}
    clusterSizeList = list(range(2, 21, 2))

    for clusterSize in clusterSizeList:
        errorSum = 0
        clusterLabels = np.array(list(range(1, clusterSize + 1)),
                                 dtype=np.int8)

        for z in range(numTrials):
            model = kmclustering.BasicKMeansClusteringModel(
                distanceFunc, centerFunc, 100)
            model.runBasicKMeansClustering(X_new, clusterLabels,
                                           kminit.initKMeansSampling)

            print("====== Done with Trial # " + str(z + 1) + " / " + str(numTrials) + " for K = " + str(clusterSize) + \
                  ", Error Total = " + str(model.finalClusterErrorTotal) + " ======")

            errorSum += model.finalClusterErrorTotal

        avgError = errorSum / numTrials
        errorMapForClusterSize[clusterSize] = avgError

    print("Done with K-Means.")

    for clusterSize in clusterSizeList:
        print("For K = " + str(clusterSize) + "     Error is:      " +
              str(errorMapForClusterSize[clusterSize]))
Exemplo n.º 3
0
def submission02(createOutput, numTrials):
    X = common.readDigitsFile()
    print("Shape of Digits file  = " + str(X.shape))
    pca = PCA(n_components=50)
    X_temp = pca.fit_transform(X)
    tsne = TSNE(n_components=2, init='pca')
    X_new = tsne.fit_transform(X_temp)
    #X_new = digitutil.preprocessDownsampling(X)
    print("Got X_new. Shape = " + str(X_new.shape))
    #print("First 10 rows are: ")
    #print(X_new[0:10, :])

    #numTrials = 10
    X_normalized = normalize(X, norm='l2', axis=0)
    distanceFunc = common.euclideanDistanceFunction
    centerFunc = common.digitsDataCenterFunction
    clusterLabels = np.array(list(range(1, 11)), dtype=np.int8)
    bestAssignments = None
    bestErrorTotal = None
    for z in range(numTrials):
        model = kmclustering.BasicKMeansClusteringModel(
            distanceFunc, centerFunc, 55)
        #initFunc = lambda q, z: kminit.initKMeansSampling(q, z)
        #assignments, centers = kminit.initKMeansSampling(X_normalized, clusterLabels, distanceFunc)
        model.runBasicKMeansClustering(X_normalized, clusterLabels,
                                       kminit.initKMeansSampling)

        print("====== Done with Trial # " + str(z + 1) + " / " +
              str(numTrials) + ", Error Total = " +
              str(model.finalClusterErrorTotal) + " ======")
        #print("Assignments = " + str(model.finalClusterAssignments))
        #print("Centers = " + str(model.finalClusterCenters))
        #print("Error Map = " + str(model.finalClusterErrorMap))
        #print()

        if bestErrorTotal is None or model.finalClusterErrorTotal < bestErrorTotal:
            bestErrorTotal = model.finalClusterErrorTotal
            bestAssignments = model.finalClusterAssignments
            #print("Improved total SSE! New Value = " + str(bestErrorTotal))

    print("Finished K-Means. Best Error Total = " + str(bestErrorTotal))
    #print("Best Assignments = " + str(bestAssignments))
    if createOutput:
        common.writeResultsFile(bestAssignments)