Exemplo n.º 1
0
def main():
    timeData = []
    silhouette_scoreData = []
    sum_of_square_errorData = []

    sourceData = inputDataset()
    choose = int(
        input('Determine the clustering algorithm, 1) K-means, 2) K-means++:'))
    N = int(input('Determine running time:'))
    K1, K2 = map(
        int,
        input('Determine K range, (ex1:2 10, K=2~10; ex2:3 3, K=3):').split())
    try:
        deletePreviousOutputFile()
        for n_th in range(N):
            timeData.append([])
            silhouette_scoreData.append([])
            sum_of_square_errorData.append([])
            for K in range(K1, K2 + 1, 1):
                if choose == 1:
                    k_cluster = K_means(sourceData, K)
                elif choose == 2:
                    k_cluster = K_meansPP(sourceData, K)

                start = time.time()
                k_cluster.initCentroid()  # Step 1.
                iter = 0
                while True:
                    countChangingCluster = k_cluster.fitting()  # Step 2.
                    k_cluster.updateCentroid()  # Step 3.
                    if iter > 50 or countChangingCluster == 0:
                        break
                    iter += 1

                end = time.time()

                print('Round: ' + str(n_th + 1) + ', K: ' + str(K) + ' done.')

                timeData[n_th].append(end - start)
                silhouette_scoreData[n_th].append(
                    metrics.silhouette_score(sourceData,
                                             k_cluster.belongCluster))
                sum_of_square_errorData[n_th].append(
                    k_cluster.sum_of_square_error[-1])
                outputResult(
                    K, iter, end - start,
                    metrics.silhouette_score(sourceData,
                                             k_cluster.belongCluster),
                    k_cluster.sum_of_square_error)
        outputPlot(K1, K2, numpy.array(timeData),
                   numpy.array(silhouette_scoreData),
                   numpy.array(sum_of_square_errorData))

    except Exception as e:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        print(exc_type, fname, exc_tb.tb_lineno)
Exemplo n.º 2
0
def img_seg(img_a, k_points, max_gen):

    array = np.reshape(img_a, (img_a.shape[0] * img_a.shape[1], img_a.shape[2]))

    test = KM.k_means(array, k_points, max_gen)


    new_array = np.zeros(array.shape).astype(np.int64)

    for i in range(test[0].shape[0]):
        new_array += ((np.nan_to_num(test[1][i] / test[1][i])).astype(np.int64) * test[0][i].astype(np.int64))   #  (np.tile((test[0][i]).astype(np.int64), test[1][i].shape[0]).reshape(test[1][i].shape)).astype(np.int64))

    new_img_a = np.reshape(new_array, img_a.shape).astype(np.uint8)

    return new_img_a
Exemplo n.º 3
0
def compare_algorithms(data, k):
    sys.stdout.write(
        f"Compare scikit and own implentation for k={k} on {len(data)} instances... "
    )
    sys.stdout.flush()
    kmeans = K_means(k, m=2, max_iterations=300, verbose=False)
    iterations = kmeans.run(data)

    ndinit = np.array(kmeans.initial_centroids)
    sk = scikit_KMeans(n_clusters=k,
                       init=ndinit,
                       max_iter=300,
                       tol=0,
                       n_init=1)
    sk.fit(data)

    for i in range(0, len(data)):
        instance = data[i]
        sk_centroid = sk.cluster_centers_[sk.predict([instance])][0]
        own_centroid = kmeans.centroids[kmeans.closest_centroid(i)[0]]
        assert np.allclose(
            sk_centroid, own_centroid
        ), f"FAIL, instance {instance} has different centroids! {sk_centroid} (scikit) and {own_centroid} (own algorithm)"
    print("✔")
Exemplo n.º 4
0
if __name__ == "__main__":
    plt.ion()
    plt.clf()

    def plot_k_means(k_means):
        plt.waitforbuttonpress()
        #plt.plot(k_means.instance_map)
        plt.clf()
        colors = 10 * ["r", "g", "c", "b", "k", "y"]

        for cluster_i in k_means.instances_by_cluster:
            color = colors[cluster_i]
            centroid = k_means.centroids[cluster_i]
            plt.scatter(centroid[0], centroid[1], marker="X", s=50)
            for i in k_means.instances_by_cluster[cluster_i]:
                instance = k_means.instances[i]
                plt.scatter(instance[0], instance[1], color=color, s=30)
        plt.draw()

    # execute k-means clustering
    k_means = K_means(k=4, m=2, init_strategy=3)

    def plot(k_means, cycle):
        plot_k_means(k_means)

    k_means.run(data,
                after_centroid_calculation=plot,
                after_cluster_membership=plot)

    plot_k_means(k_means)
    plt.show()
Exemplo n.º 5
0
                       color=get_color(k),
                       marker="o",
                       label=f"source $X_{k}$")
        elif N == 3:
            ax.scatter(X[N_ranges[k]:N_ranges[k + 1], 0],
                       X[N_ranges[k]:N_ranges[k + 1], 1],
                       X[N_ranges[k]:N_ranges[k + 1], 2],
                       color=get_color(k),
                       marker="o",
                       label=f"source $X_{k}$")
    plt.grid(True)
    ax.legend(loc="best", ncol=1, scatterpoints=1, numpoints=1)
    ax.yaxis.set_major_formatter(FormatStrFormatter('%.2f'))
    plt.show()

r = K_means(X, K=tK, v=1, title_str="K-means of original data")

A = np.empty([M, M])
for i in range(M):
    for j in range(M):
        A[i, j] = np.exp(-(((X[i, :] - X[j, :])**2).sum()) / (2.0 * sigma**2))
for i in range(M):
    A[i, i] = 0

D = np.zeros([M, M])
for i in range(M):
    D[i, i] = A[i, :].sum()
D2 = np.diag(A.dot(np.ones(M)))
if not np.allclose(D, D2, eps):
    print("D - D2 =\n", D - D2)
Exemplo n.º 6
0
def run_kmeans(data, k):
    print("K", k)
    kmeans = K_means(k, m=2, max_iterations=300, verbose=False)
    iterations = kmeans.run(data)
Exemplo n.º 7
0
from k_means import K_means
import numpy as np
import matplotlib.pyplot as plt

dataset = np.random.rand(100,2).tolist() # not limited to 2D dataset
K = 5 
k_mean = K_means(dataset,K)
n_iter = 100 # max number of iterations
i = 0
done = False
centroids = k_mean.begin() # initialize centroids
clusters = k_mean.find_dist(centroids) # initialize clusters

while not done:
    
    centroids = k_mean.find_new_centroids(clusters)
    old_clusters = clusters
    clusters = k_mean.find_dist(centroids)
    done = k_mean.cluster_change(old_clusters, clusters) or i>n_iter
    i += 1

# Assigning colors to each clusters and combining them
plot_dataset = np.ones((1,3))
for i in range(K):
    plot_clusters = np.column_stack((np.array(clusters[i]),i*np.ones((len(clusters[i]),1))))
    plot_dataset = np.concatenate((plot_dataset,plot_clusters))

plot_dataset = plot_dataset[1:]
plt.scatter(plot_dataset[:,0], plot_dataset[:,1], c=plot_dataset[:,2], s=50, cmap='viridis')
plt.scatter(np.array(centroids)[:, 0], np.array(centroids)[:, 1], c='black', s=200, alpha=0.5);   
Exemplo n.º 8
0
train_data = np.genfromtxt(os.getcwd() + train_file_name, delimiter=',')
test_data = np.genfromtxt(os.getcwd() + test_file_name, delimiter=',')

p_train_data = train_data[:, :-1]
train_label = train_data[:, -1]
size_of_input = len(p_train_data[0])

p_test_data = test_data[:, :-1]
test_label = test_data[:, -1]

for k in k_list:
    model_table = []
    for i in range(repeat):
        print("Repeat: ", i + 1)
        model = K_means(k, range_of_input, size_of_input)
        model.train(p_train_data)
        model_table.append((model.mse(p_train_data), model))

    best_run = 0
    for i in range(len(model_table)):
        if i != 0:
            if model_table[best_run][0] > model_table[i][0]:
                best_run = i

    print("<< k set to: {} >>".format(k))
    print("Best run: ", best_run)
    print("Average mean-square-error: ", model_table[best_run][0])
    print("Mean-square-separation: ", model_table[best_run][1].mss())
    print("Mean entropy: ",
          model_table[best_run][1].m_entropy(number_of_classes, train_label))
cv2.createTrackbar('scale', 'image', 1, 100, f)

while (True):
    # Capture frame-by-frame
    ret, frame = cap.read()

    scale = cv2.getTrackbarPos('scale', 'image') + 1

    img_array = cv2.resize(frame,
                           dsize=(int(frame.shape[1] * (1 / scale)),
                                  int(frame.shape[0] * (1 / scale))),
                           interpolation=3)

    # Our operations on the frame come here
    k = cv2.getTrackbarPos('k points', 'image')
    kp = KM.make_k_points(k, 3, 0, 255)
    gray = IS.img_seg(img_array, kp,
                      cv2.getTrackbarPos('max iteration', 'image'))

    gray = cv2.resize(gray,
                      dsize=(int(gray.shape[1] * scale),
                             int(gray.shape[0] * scale)),
                      interpolation=3)

    # Display the resulting frame
    cv2.imshow('image', gray)
    if cv2.waitKey(50) & 0xFF == ord('q'):
        break

# When everything done, release the capture
cap.release()
Exemplo n.º 10
0
import pickle
import numpy as np

from k_means import K_means
from cluster_eval import *
from sklearn import datasets
from sklearn.cluster import KMeans

##Iris run ###
iris = datasets.load_iris()
X = iris.data
Y = iris.target

Y_df = pd.DataFrame(Y)

give_external_eval(
    Y,
    X,
    14,
    2.0,
    print_message="iris_run_m2_k12",
    iris=True,
)

k_means = K_means(k=14, m=2)
k_means.run(X)
give_tendency_eval(k_means)
best_run = table_class_vs_cluster(Y, k_means, iris=True)

total_nbr_instances = sum(sum(best_run.iloc[:, :3].values))
total_nbr_instances