def plot_log_decision_boundary(xtrain, ytrain, xtest, ytest, a_xlim = -5, bxlim = 5, title = "Logistic regresison decision boundary"): """ The function uses the train set to obtain the parameters w. Then evaluate the performance of the model obtained by plotting both, the decision boundary and the true class division of the data. """ # Get the logistic regression parameters by gradient descent w, _ = logistic_reg(xtrain, ytrain) # Get x and y values for the decision boundary x_values = [np.min(xtest[:, 1] + a_xlim), np.max(xtest[:, 1] + bxlim)] y_values = np.dot((-1. / w[2]), (np.dot(w[1], x_values) + w[0])) # Get colors of the classes class0 = xtest[ytest == 0] class1 = xtest[ytest == 1] # Plot points plt.scatter(class0[:, 0], class0[:, 1], label = "Class 0", color = "C0", ec = "black", zorder = 2) plt.scatter(class1[:, 0], class1[:, 1], label = "Class 1", color = "red", ec = "black", zorder = 2) # Plot decision boundary plt.plot(x_values, y_values, label = 'Decision Boundary', color = "green", zorder = 3) # Add plot details ids.plot_details(title = title, ax_equal = True, bg = True, legend = True, leg_loc = "upper left", save = False, filename = title)
def plot_gradient_details(x1, learningrate, steps_to_plot, plot_tan): """ Plot the last point (hopefully the minimum of the function) and add plot details. """ # Plot last point if requested if x1 >= -2 and x1 <= 2: plt.scatter(x1, f(x1), zorder=3, s = 80, marker = "X", color = "yellow", ec = "black", label = "Last iteration") else: print("""\nCan't plot last point, its value is out of the plotting coordinates: > Learningrate = %.4f > Steps to plot = %d > Value = %f """ % (learningrate, steps_to_plot, x1)) # Set legend positions and columns number if steps_to_plot <= 5: ncol, loc = 1, "lower right" else: ncol, loc = 3, "lower center" if plot_tan == False: loc = "upper center" # Plot details title = "GD, {} iterations plotted, {} learning rate".format(steps_to_plot, learningrate) plt.legend(loc = loc, ncol = ncol, shadow = True) ids.plot_details(title = title, bg = True, save = False, filename = "{}".format(title))
def easy_plot_classes(class0, class1, title = "Classes scatter plot", save = False): plt.scatter(class0[:,0], class0[:,1], ec = "black", label = "Class 0", zorder=3) plt.scatter(class1[:,0], class1[:,1], color = "red", ec = "black", label = "Class 1", zorder=3) ids.plot_details(title = title, ax_equal = True, bg = True, legend = True, leg_loc = "upper left", save = save, filename = title)
def plot_image(centroid, n_cluster, dim): """ Plot the image as a n by n pixels image. """ image = centroid.reshape((28, 28)) plt.imshow(image.real, cmap = 'viridis') title = "Cluster %d image (after MDS to %d pcs)" % ((n_cluster+1), dim) ids.plot_details(title = title, xlabel = "X-axis (pixels)", ylabel = "Y-axis (pixels)", save = False, filename = title)
def plot_projected_clusters(data, clusters_indexes, centroids = None, title = "Projection of dataset", labels = None, leg_loc = "upper left", save = False, filename = "projected_data"): """ Given the a dataset and a list of indexes, perform 2D projection of data divided by the provided clusters (classes or clusters). Also, plot centroids if provided. """ # Num of clusters division num_clusters = len(clusters_indexes) # MDS data_2d = ids.mds(data) # Assign clusters/classes and MDS clusters = [] for n in range(num_clusters): group = data_2d[clusters_indexes[n],:] clusters.append(group) # Obtain centroids in 2D if centroids != None: centroids_2D = ids.project_centroids(data, centroids) # Plot clusters/classes and centroids for n, group in enumerate(clusters): # Plot clusters/classes if labels != None: label = "Class %d" % (labels[n]) else: label = "Cluster %d" % (n+1) plt.scatter(group[:,0], group[:,1], label = label, ec = "black", zorder=3) if centroids != None: # Plot centroids label = None if n == len(clusters) - 1: label = "Centroids" plt.scatter(centroids_2D[n][0], centroids_2D[n][1], marker = "X", color = "yellow", ec = "black", s = 120, zorder=4, label = label) # Add plot details ids.plot_details(title = title, xlabel = "PC1", ylabel = "PC2", ax_equal = True, bg = True, legend = True, leg_loc = leg_loc, save = save, filename = filename)
## Clustering print("Clustering, no prior MDS:") # Obtain centroids and clusters indexes centroids = ids.kmeans_fit(xdigit, 3) clusters_indexes = ids.kmeans_clustering(xdigit, centroids, indexes_only=True) # Plot 3 cluster centers as images for n, centroid in enumerate(centroids): # Plot the image (rapresented by 28 x 28 pixels) centroid = centroid.reshape((28, 28)) plt.imshow(centroid, cmap = 'viridis') title = "Cluster %d image (no prior MDS)" % (n+1) ids.plot_details(title = title, xlabel = "X-axis (pixels)", ylabel = "Y-axis (pixels)", save = False, filename = title) # Count the proportion of 1s, 7s and 9s in each cluster def get_labels_proportion(clusters_indexes, true_y): labels = 1,7,9 for n, cl_indexes in enumerate(clusters_indexes): proportion1 = sum(true_y[cl_indexes] == labels[0]) / len(cl_indexes) proportion2 = sum(true_y[cl_indexes] == labels[1]) / len(cl_indexes) proportion3 = sum(true_y[cl_indexes] == labels[2]) / len(cl_indexes) print("\t> Cluster %d:" % (n+1)) print("\t\t Proportion of 1 = %.2f%%" % (proportion1 * 100)) print("\t\t Proportion of 7 = %.2f%%" % (proportion2 * 100)) print("\t\t Proportion of 9 = %.2f%%" % (proportion3 * 100))