Exemplo n.º 1
0
def main():
    # Defining an object for the class MyKmeans
    km = MyKmeans()
    # Parsing the data file and acquiring a dataframe
    parsedData = km.readData('digits-embedding.csv')
    # Defining the numbers you need within the datafile to create a subset
    sub_nums = [2, 4, 6, 7]
    # Creating the subset
    subset = createSubset(parsedData, sub_nums)

    #--- The Tasks ---#
    # (1) Visualizing the images based on the 2D features
    visualizeData(subset, sub_nums)
    # (2) Cluster the data with different values of K ∈ [2,4,8,16]. For each K
    # repeat the experiment for 5 different times each with random centroids and
    # calculate the average Silhouette Coefficient (SC) of each K after the 5 trials
    K = [2, 4, 8, 16]
    # >> K = 2
    SC_avg_K2 = iterClustering(subset, parsedData, 5, 50, K[0])
    # >> K = 4
    SC_avg_K4 = iterClustering(subset, parsedData, 5, 50, K[1])
    # # >> K = 8
    SC_avg_K8 = iterClustering(subset, parsedData, 5, 50, K[2])
    # # >> K = 16
    SC_avg_K16 = iterClustering(subset, parsedData, 5, 50, K[3])

    # Plotting results
    plotAvgSC(K, [SC_avg_K2, SC_avg_K4, SC_avg_K8, SC_avg_K16])
Exemplo n.º 2
0
def calAvgSC(clusters, parsedData):
    km = MyKmeans()
    # Intialize SC value
    SC = 0
    # Looping to obtain average SC value for all trials
    for x in range(clusters.shape[0]):
        temp = km.calculateSC(clusters[x], parsedData)
        SC += temp
    SC_avg = SC / clusters.shape[0]
    return SC_avg
Exemplo n.º 3
0
def iterClustering(subset, parsedData, iterNum, iterCount, K):
    # Creating local object
    km = MyKmeans()
    cluster_K = []  # Initialize an empty array
    for x in range(iterNum):
        temp = km.cluster(subset, iterCount, K, [])
        cluster_K.append(temp)
    cluster_K = np.array(cluster_K)
    SC_avg_k = calAvgSC(cluster_K, parsedData)
    return SC_avg_k
def main():
    # Defining an object for the class MyKmeans
    km = MyKmeans()
    # Parsing the data file and acquiring a dataframe
    parsedData = km.readData('digits-embedding.csv')
    # Defining the numbers you need within the datafile to create a subset
    sub_nums = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
    # Creating the subset
    subset = createSubset(parsedData, sub_nums)

    #--- The Tasks ---#
    # (1) Visualizing the images based on the 2D features
    visualizeData(subset, sub_nums)