def output_autoencoder_performance(autoencoder_state, autoencoder_connections, check_data): data = check_data[0] labels = check_data[1] neuron_states = autoencoder_state[0] total_error = 0.0 for i in np.arange(data.shape[0]): neuron_states[0] = data[i] update_autoencoder(autoencoder_state, autoencoder_connections) total_error += np.linalg.norm((neuron_states[-1] - labels[i])) output = open(output_file, 'a') output.write('{} mean squared error.\n\n'.format(total_error / float(data.shape[0]))) output.close() # Show some pictures! if display_autoencoder_images: random_indices = np.random.randint(0, data.shape[0], 10) inputs = np.copy(data[random_indices]) outputs = np.ndarray((10, data.shape[1])) for i in np.arange(10): neuron_states[0] = data[random_indices[i]] update_autoencoder(autoencoder_state, autoencoder_connections) outputs[i] = np.copy(neuron_states[-1]) input_viewable = denormalize(inputs) output_viewable = denormalize(outputs) mnist.visualize(np.concatenate((input_viewable, output_viewable))) None
def visualize_digits(assignment, data_matrix): groups = [[] for i in range(10)] for i in range(len(data_matrix)): digit = assignment[i] groups[digit].append(data_matrix[i]) for digit in range(len(groups)): print "Printing for digit", digit mnist.visualize(np.array(groups[digit]))
def main(): """ DO NOT TOUCH THIS FUNCTION. IT IS USED FOR COMPUTER EVALUATION OF YOUR CODE """ results = my_info() + "\t\t" print results + "\t\t" X, Y = mnist.read_mnist_training_data(500) centriods = X[:10] cm, c = kmeans(X, centriods) mnist.visualize(cm) # for mean, cluster in zip(cm, c): # mnist.visualize(np.insert(cluster, 0, mean, axis=0)) centriods_unique = np.array([X[np.where(Y == i)[0][0]] for i in range(10)]) cm, c = kmeans(X, centriods_unique) mnist.visualize(cm) # for mean, cluster in zip(cm, c): # mnist.visualize(np.insert(cluster, 0, mean, axis=0)) distances = distance.cdist(X, X, "euclidean") medoids_idx, clusters = kmedoids(distances, list(range(10))) medoids = np.array([X[int(i)] for i in medoids_idx]) c = np.array([X[clusters == i] for i in range(10)]) mnist.visualize(medoids) # for mean, cluster in zip(cm, c): # mnist.visualize(np.insert(cluster, 0, mean, axis=0)) mediod_idx = [np.where(Y == i)[0][0] for i in range(10)] medoids_idx, clusters = kmedoids(distances, mediod_idx) medoids = np.array([X[int(i)] for i in medoids_idx]) c = np.array([X[clusters == i] for i in range(10)]) mnist.visualize(medoids)
def main(): Xin = X[0:500] print "=== k-means ===" assignment1, cluster_means1 = kmeans(Xin, X[0:10]) print "= First iteration" print "Cluster means" mnist.visualize(cluster_means1) print "Clusters" visualize_digits(assignment1, Xin) print "= Second iteration" distinct_means = X[0:10].copy() digit_set = set() for i in range(len(Xin)): digit = y[i] if digit not in digit_set: digit_set.add(digit) distinct_means[digit] = Xin[i] if len(digit_set) == 10: break print digit_set assignment1, cluster_means1 = kmeans(Xin, distinct_means) print "Cluster means" mnist.visualize(cluster_means1) print "Clusters" visualize_digits(assignment1, Xin) print "=== k-medoids ===" dissimilarity_matrix = compute_dissimilarity_matrix(Xin) print "= First iteration" assignment2, cluster_medoids1 = kmedoids(dissimilarity_matrix, X[0:10]) print "Cluster medoids" mnist.visualize(cluster_medoids1) print "Clusters" visualize_digits(assignment2, Xin) print "= Second iteration" assignment2, cluster_medoids1 = kmedoids(dissimilarity_matrix, distinct_means) print "Cluster medoids" mnist.visualize(cluster_medoids1) print "Clusters" visualize_digits(assignment2, Xin)
def sanity_check(): indices = np.random.choice(5000, 100, replace=False) print bmatrix(y[indices].reshape(10,10)) mnist.visualize(X[indices])
new_mediod_indices[i] = new_mediod_i changed = (new_mediod_indices != mediod_indices).any() mediod_indices = np.copy(new_mediod_indices) return new_mediod_indices, cluster_indices X, Y = mnist_load_show.read_mnist_training_data(SAMPLE_SIZE) first_ten = X[:10] # select first instance of each label first_label_instance = np.array([X[np.where(Y == i)[0][0]] for i in range(10)]) cluster_means, clusters = k_means(X, first_ten) mnist_load_show.visualize(cluster_means) for mean, cluster in zip(cluster_means, clusters): mnist_load_show.visualize(np.insert(cluster, 0, mean, axis=0)) cluster_means, clusters = k_means(X, first_label_instance) mnist_load_show.visualize(cluster_means) for mean, cluster in zip(cluster_means, clusters): mnist_load_show.visualize(np.insert(cluster, 0, mean, axis=0)) distances = distance.cdist(X, X, 'euclidean') cluster_medoids_indices, clusters_indices = k_medoids(distances, list(range(10))) cluster_medoids = np.array([X[int(i)] for i in cluster_medoids_indices]) clusters = np.array([X[clusters_indices == i] for i in range(10)]) mnist_load_show.visualize(cluster_medoids) for mediod, cluster in zip(cluster_medoids, clusters):
new_mediod_indices[i] = new_mediod_i changed = (new_mediod_indices != mediod_indices).any() mediod_indices = np.copy(new_mediod_indices) return new_mediod_indices, cluster_indices X, Y = mnist_load_show.read_mnist_training_data(SAMPLE_SIZE) first_ten = X[:10] # select first instance of each label first_label_instance = np.array([ X[np.where(Y == i)[0][0]] for i in range(10) ]) cluster_means, clusters = k_means(X, first_ten) mnist_load_show.visualize(cluster_means) for mean, cluster in zip(cluster_means, clusters): mnist_load_show.visualize(np.insert(cluster, 0, mean, axis=0)) cluster_means, clusters = k_means(X, first_label_instance) mnist_load_show.visualize(cluster_means) for mean, cluster in zip(cluster_means, clusters): mnist_load_show.visualize(np.insert(cluster, 0, mean, axis=0)) distances = distance.cdist(X, X, 'euclidean') cluster_medoids_indices, clusters_indices = k_medoids(distances, list(range(10))) cluster_medoids = np.array([X[int(i)] for i in cluster_medoids_indices]) clusters = np.array([ X[clusters_indices == i] for i in range(10) ]) mnist_load_show.visualize(cluster_medoids) for mediod, cluster in zip(cluster_medoids, clusters): mnist_load_show.visualize(np.insert(cluster, 0, mediod, axis=0))
def verify(n, xs, ys): for i in random.sample(range(0,len(xs)), n): print ys[i] mnist.visualize(xs[i])
def verify(n, xs, ys): for i in random.sample(range(0, len(xs)), n): print ys[i] mnist.visualize(xs[i])