示例#1
0
文件: main.py 项目: wnsgur4322/CS-434
def apply_kmeans(do_pca, x_train, y_train, x_test, y_test, kmeans_max_iter, kmeans_max_k):
    print('kmeans\n')
    train_sses_vs_iter = []
    train_sses_vs_k = []
    train_purities_vs_k = []

    ##################################
    #      YOUR CODE GOES HERE       #
    ##################################
    max_repeat = 7
    for repeat in range(1, max_repeat):
        if repeat == 1:
            for k in range(1, kmeans_max_k):
                kmeans = KMeans(k, kmeans_max_iter)
                sse_vs_iter = kmeans.fit(x_train)
                train_sses_vs_iter.append(sse_vs_iter)
                train_purities_vs_k.append(kmeans.get_purity(x_train, y_train))
                train_sses_vs_k.append(min(sse_vs_iter))
        elif repeat == max_repeat-1:
            for k in range(1, kmeans_max_k):
                kmeans = KMeans(k, kmeans_max_iter)
                sse_vs_iter = kmeans.fit(x_train)
                train_sses_vs_iter[k-1] += sse_vs_iter[k-1]
                train_sses_vs_iter[k-1] = train_sses_vs_iter[k-1]/repeat
                
                train_purities_vs_k[k-1] += kmeans.get_purity(x_train, y_train)
                train_purities_vs_k[k-1] = train_purities_vs_k[k-1] / repeat
                print("Purity: ", train_purities_vs_k[k-1])

                train_sses_vs_k[k-1] += min(sse_vs_iter)
                train_sses_vs_k[k-1] = train_sses_vs_k[k-1]/repeat

        else:
            for k in range(1, kmeans_max_k):
                kmeans = KMeans(k, kmeans_max_iter)
                sse_vs_iter = kmeans.fit(x_train)
                train_sses_vs_iter[k-1] += sse_vs_iter[k-1]
                train_purities_vs_k[k-1] += kmeans.get_purity(x_train, y_train)
                train_sses_vs_k[k-1] += min(sse_vs_iter)

    

    plot_y_vs_x_list(train_sses_vs_iter, x_label='iter', y_label='sse',
                     save_path='plot_sse_vs_k_subplots_%d'%do_pca)
    plot_y_vs_x(train_sses_vs_k, x_label='k', y_label='sse',
                save_path='plot_sse_vs_k_%d'%do_pca)
    plot_y_vs_x(train_purities_vs_k, x_label='k', y_label='purities',
                save_path='plot_purity_vs_k_%d'%do_pca)
def apply_kmeans(do_pca, x_train, y_train, kmeans_max_iter, kmeans_max_k):
    print('kmeans\n')
    train_sses_vs_iter = []
    train_sses_vs_k = []
    train_purities_vs_k = []

    ##################################
    #      YOUR CODE GOES HERE       #
    ##################################
    # iterations for 5 different runs of k-means.
    for k in range(0, 5):
        kmeans = KMeans(6, kmeans_max_iter)
        sse_vs_iter = kmeans.fit(x_train)
        train_sses_vs_iter.append(sse_vs_iter)
        train_purities_vs_k.append(kmeans.get_purity(x_train, y_train))
        train_sses_vs_k.append(min(sse_vs_iter))
        if k == 0:
            avg_list = [0] * len(sse_vs_iter)
        avg_list = [
            avg_list[i] + sse_vs_iter[i] for i in range(len(sse_vs_iter))
        ]

    plot_y_vs_x_list(train_sses_vs_iter,
                     x_label='iter',
                     y_label='sse',
                     save_path='plot_sse_vs_k_subplots_%d' % do_pca)
    plot_y_vs_x(avg_list,
                x_label='iterations',
                y_label='sse',
                save_path='plot_sse_vs_iter_%d' % do_pca)
示例#3
0
文件: main.py 项目: diedricj/CS434_A4
def apply_kmeans(do_pca, x_train, y_train, kmeans_max_iter, kmeans_max_k):
    print('kmeans\n')
    train_sses_vs_iter = []
    train_sses_vs_k = []
    train_purities_vs_k = []

    ##################################
    #      YOUR CODE GOES HERE       #
    ##################################

    start = time.time()
    for k in range(1, kmeans_max_k):
        print("On step k =", k, "of", kmeans_max_k,
              "\telapsed time: %.2f" % (time.time() - start), "s")
        kmeans = KMeans(k, kmeans_max_iter)
        sse_vs_iter = kmeans.fit(x_train)
        train_sses_vs_iter.append(sse_vs_iter)
        train_purities_vs_k.append(kmeans.get_purity(x_train, y_train))
        train_sses_vs_k.append(min(sse_vs_iter))

    plot_y_vs_x_list(train_sses_vs_iter,
                     x_label='iter',
                     y_label='sse',
                     save_path='plot_sse_vs_k_subplots_%d' % do_pca)
    plot_y_vs_x(train_sses_vs_k,
                x_label='k',
                y_label='sse',
                save_path='plot_sse_vs_k_%d' % do_pca)
    plot_y_vs_x(train_purities_vs_k,
                x_label='k',
                y_label='purities',
                save_path='plot_purity_vs_k_%d' % do_pca)
示例#4
0
def apply_kmeans(do_pca, x_train, y_train, x_test, y_test, kmeans_max_iter,
                 kmeans_max_k):
    print('kmeans\n')
    train_sses_vs_iter = []
    train_sses_vs_k = []
    train_purities_vs_k = []

    ##################################
    #      YOUR CODE GOES HERE       #
    ##################################

    for k in range(1, kmeans_max_k):
        kmeans = KMeans(k, kmeans_max_iter)
        sse_vs_iter = kmeans.fit(x_train)
        train_sses_vs_iter.append(sse_vs_iter)
        train_purities_vs_k.append(kmeans.get_purity(x_train, y_train))
        train_sses_vs_k.append(min(sse_vs_iter))

    plot_y_vs_x_list(train_sses_vs_iter,
                     x_label='iter',
                     y_label='sse',
                     save_path='plot_sse_vs_k_subplots_%d' % do_pca)
    plot_y_vs_x(train_sses_vs_k,
                x_label='k',
                y_label='sse',
                save_path='plot_sse_vs_k_%d' % do_pca)
    plot_y_vs_x(train_purities_vs_k,
                x_label='k',
                y_label='purities',
                save_path='plot_purity_vs_k_%d' % do_pca)
示例#5
0
def apply_kmeans3(do_pca, x_train, y_train, kmeans_max_iter, kmeans_max_k):
    print('kmeans\n')
    train_sses_vs_iter = []
    train_sses_vs_k = []
    train_purities_vs_k = []

    ##################################
    #      YOUR CODE GOES HERE       #
    ##################################

    result = []
    for k in range(1, 11):
        print('k:', k)
        for times in range(0, 5):
            kmeans = KMeans(k, kmeans_max_iter)
            sse_vs_iter = kmeans.fit(x_train)
            train_sses_vs_iter.append(sse_vs_iter)
            train_purities_vs_k.append(kmeans.get_purity(x_train, y_train))
            train_sses_vs_k.append(min(sse_vs_iter))
        print(train_purities_vs_k)
        avg = sum(train_purities_vs_k) / len(train_purities_vs_k)
        result.append(avg)
        train_purities_vs_k = []

    print(result)
    print('max purity', max(result))
    plot_y_vs_x(result,
                x_label='k',
                y_label='purities',
                save_path='plot_purity_vs_k_%d' % do_pca)
示例#6
0
def apply_kmeans1(do_pca, x_train, y_train, kmeans_max_iter, kmeans_max_k):
    print('kmeans\n')
    train_sses_vs_iter = []
    train_sses_vs_k = []
    train_purities_vs_k = []

    ##################################
    #      YOUR CODE GOES HERE       #
    ##################################

    for run in range(0, 5):
        kmeans = KMeans(6, kmeans_max_iter)
        sse_vs_iter = kmeans.fit(x_train)
        train_sses_vs_iter.append(sse_vs_iter)
        train_purities_vs_k.append(kmeans.get_purity(x_train, y_train))
        train_sses_vs_k.append(min(sse_vs_iter))

    result = []
    for col in range(len(train_sses_vs_iter[0])):
        sum = 0
        for row in range(0, 5):
            sum += train_sses_vs_iter[row][col]
        sum = sum / 5
        result.append(sum)
    result = [result]

    print(result)

    plot_y_vs_x_list(result,
                     x_label='iter',
                     y_label='sse',
                     save_path='plot_sse_vs_k_subplots_%d' % do_pca)
示例#7
0
def apply_kmeans(do_pca, x_train, y_train, kmeans_max_iter, kmeans_max_k):
    print('kmeans\n')
    train_sses_vs_iter = []
    train_sses_vs_k = []
    train_purities_vs_k = []

    ##################################
    #      YOUR CODE GOES HERE       #
    ##################################

    for k in range(1, kmeans_max_k):
        sses = None
        avg_purity = 0.

        # do five tests to reduce effect of random start
        for i in range(5):
            kmeans = KMeans(k, kmeans_max_iter)
            sse = kmeans.fit(x_train)
            if (sses == None):
                sses = sse
            else:
                for j in range(len(sse)):
                    sses[j] = (sses[j] + sse[j])

            avg_purity += kmeans.get_purity(x_train, y_train)

        avg_purity = avg_purity / 5.

        for j in range(len(sses)):
            sses[j] = sses[j] / 5.0
        # avg_sses = np.sum(np.array(sses), 0) / 5

        train_sses_vs_iter.append(sses)
        train_purities_vs_k.append(avg_purity)
        train_sses_vs_k.append(min(sses))

    plot_y_vs_x_list(train_sses_vs_iter,
                     x_label='iter',
                     y_label='sse',
                     save_path='plot_sse_vs_k_subplots_%d' % do_pca)
    plot_y_vs_x(train_sses_vs_k,
                x_label='k',
                y_label='sse',
                save_path='plot_sse_vs_k_%d' % do_pca)
    plot_y_vs_x(train_purities_vs_k,
                x_label='k',
                y_label='purities',
                save_path='plot_purity_vs_k_%d' % do_pca)
def apply_kmeans_3(do_pca, x_train, y_train, kmeans_max_iter, kmeans_max_k):
    train_sses_vs_iter = []
    train_sses_vs_k = []
    train_purities_vs_k = []
    averg_list = []

    for k in range(1, 11):
        for it in range(0, 5):
            kmeans = KMeans(k, kmeans_max_iter)
            sse_vs_iter = kmeans.fit(x_train)
            train_sses_vs_iter.append(sse_vs_iter)
            train_purities_vs_k.append(kmeans.get_purity(x_train, y_train))
            train_sses_vs_k.append(min(sse_vs_iter))
        averg_list.append(
            (sum(train_purities_vs_k) / len(train_purities_vs_k)))
    #plot the average purity
    plot_y_vs_x(averg_list,
                x_label='k',
                y_label='purities',
                save_path='plot_purity_vs_k_%d' % do_pca)
def apply_kmeans_2(do_pca, x_train, y_train, kmeans_max_iter, kmeans_max_k):
    print('kmeans\n')
    train_sses_vs_iter = []
    train_sses_vs_k = []
    train_purities_vs_k = []
    avg_me = []

    for k in range(1, 11):
        for it in range(0, 5):
            kmeans = KMeans(k, kmeans_max_iter)
            sse_vs_iter = kmeans.fit(x_train)
            train_sses_vs_iter.append(sse_vs_iter)
            train_purities_vs_k.append(kmeans.get_purity(x_train, y_train))
            train_sses_vs_k.append(min(sse_vs_iter))
        avg_me.append((sum(train_sses_vs_k) / len(train_sses_vs_k)))

    plot_y_vs_x(avg_me,
                x_label='k',
                y_label='sse',
                save_path='plot_sse_vs_k_%d' % do_pca)
示例#10
0
文件: main.py 项目: lunng/CS434_A4
def apply_kmeans_avg(x_train, y_train, kmeans_max_iter, k, iterations=5):
    train_sses_vs_iter = None
    sse = 0
    purity = 0
    print("")
    for step in range(iterations):
        print("On step ", step + 1, "of", iterations, "for k =", k)
        kmeans = KMeans(k, kmeans_max_iter)
        sse_vs_iter_loop = np.array(kmeans.fit(x_train))

        # initialize the train sse array
        if train_sses_vs_iter is None:
            train_sses_vs_iter = np.zeros(len(sse_vs_iter_loop))

        train_sses_vs_iter += sse_vs_iter_loop

        purity += kmeans.get_purity(x_train, y_train)
        sse += sse_vs_iter_loop.min()

    return (train_sses_vs_iter /
            iterations).tolist(), sse / iterations, purity / iterations
def apply_kmeans(do_pca, x_train, y_train, kmeans_max_iter, kmeans_max_k):
    train_sses_vs_iter = []
    train_sses_vs_k = []
    train_purities_vs_k = []

    ##################################
    #      YOUR CODE GOES HERE       #
    ##################################
    sses_sum = 0
    purities_sum = 0
    for k in range(1, kmeans_max_k):
        # for k in range(1, 6):
        for i in range(5):
            kmeans = KMeans(k, kmeans_max_iter)
            sse_vs_iter = kmeans.fit(x_train)
            sses_sum += min(sse_vs_iter)
            purities_sum += kmeans.get_purity(x_train, y_train)
        print(k)
        sses_sum /= 5
        purities_sum /= 5
        train_sses_vs_k.append(sses_sum)
        train_purities_vs_k.append(purities_sum)
    print(train_sses_vs_k)
    print(train_purities_vs_k)

    plot_y_vs_x_list(train_sses_vs_iter,
                     x_label='iter',
                     y_label='sse',
                     save_path='plot_sse_vs_k_subplots_%d' % do_pca)
    plot_y_vs_x(train_sses_vs_k,
                x_label='k',
                y_label='sse',
                save_path='plot_sse_vs_k_%d' % do_pca)
    plot_y_vs_x(train_purities_vs_k,
                x_label='k',
                y_label='purities',
                save_path='plot_purity_vs_k_%d' % do_pca)