def plot_log_decision_boundary(xtrain, ytrain, xtest, ytest,
                               a_xlim = -5, bxlim = 5,
                               title = "Logistic regresison decision boundary"):
    """
    The function uses the train set to obtain the parameters w. 
    Then evaluate the performance of the model obtained by 
    plotting both, the decision boundary and the true class 
    division of the data.
    """
    # Get the logistic regression parameters by gradient descent
    w, _ = logistic_reg(xtrain, ytrain)
    # Get x and y values for the decision boundary
    x_values = [np.min(xtest[:, 1] + a_xlim), np.max(xtest[:, 1] + bxlim)]
    y_values = np.dot((-1. / w[2]), (np.dot(w[1], x_values) + w[0]))
    # Get colors of the classes
    class0 = xtest[ytest == 0]
    class1 = xtest[ytest == 1]
    # Plot points
    plt.scatter(class0[:, 0], class0[:, 1], label = "Class 0", color = "C0", ec = "black", zorder = 2)
    plt.scatter(class1[:, 0], class1[:, 1], label = "Class 1", color = "red", ec = "black", zorder = 2)
    # Plot decision boundary
    plt.plot(x_values, y_values, label = 'Decision Boundary', color = "green", zorder = 3)
    # Add plot details
    ids.plot_details(title = title,
                     ax_equal = True,
                     bg = True, 
                     legend = True,
                     leg_loc = "upper left",
                     save = False,
                     filename = title)
def plot_gradient_details(x1, learningrate, steps_to_plot, plot_tan):
    """
    Plot the last point (hopefully the minimum of 
    the function) and add plot details.
    """
    # Plot last point if requested
    if x1 >= -2 and x1 <= 2: 
        plt.scatter(x1, f(x1), zorder=3, s = 80, marker = "X", 
                               color = "yellow", ec = "black", label = "Last iteration")
    else:
        print("""\nCan't plot last point, its value is out of the plotting coordinates:
              > Learningrate = %.4f
              > Steps to plot = %d
              > Value = %f
              """ % (learningrate, steps_to_plot, x1))
    # Set legend positions and columns number
    if steps_to_plot <= 5:
        ncol, loc = 1, "lower right"
    else:
        ncol, loc = 3, "lower center"
    if plot_tan == False:
        loc = "upper center"
    # Plot details
    title = "GD, {} iterations plotted, {} learning rate".format(steps_to_plot, learningrate)
    plt.legend(loc = loc, ncol = ncol, shadow = True)
    ids.plot_details(title = title,
                     bg = True,
                     save = False,
                     filename = "{}".format(title))
def easy_plot_classes(class0, class1, title = "Classes scatter plot", save = False):
    plt.scatter(class0[:,0], class0[:,1], ec = "black", label = "Class 0", zorder=3)
    plt.scatter(class1[:,0], class1[:,1], color = "red", ec = "black", label = "Class 1", zorder=3)
    ids.plot_details(title = title,
                     ax_equal = True,
                     bg = True, 
                     legend = True,
                     leg_loc = "upper left",
                     save = save,
                     filename = title)
def plot_image(centroid, n_cluster, dim):
    """
    Plot the image as a n by n pixels image.
    """
    image = centroid.reshape((28, 28))
    plt.imshow(image.real, cmap = 'viridis')
    title = "Cluster %d image (after MDS to %d pcs)" % ((n_cluster+1), dim)
    ids.plot_details(title = title,
                     xlabel = "X-axis (pixels)",
                     ylabel = "Y-axis (pixels)",
                     save = False,
                     filename = title)
def plot_projected_clusters(data, 
                            clusters_indexes,
                            centroids = None,
                            title = "Projection of dataset",
                            labels = None,
                            leg_loc = "upper left",
                            save = False,
                            filename = "projected_data"):
    """
    Given the a dataset and a list of indexes, perform 2D 
    projection of data divided by the provided clusters 
    (classes or clusters). Also, plot centroids if provided. 
    """
    # Num of clusters division
    num_clusters = len(clusters_indexes)
    # MDS
    data_2d = ids.mds(data)
    # Assign clusters/classes and MDS
    clusters = []
    for n in range(num_clusters):
        group = data_2d[clusters_indexes[n],:]
        clusters.append(group)
    # Obtain centroids in 2D
    if centroids != None:
        centroids_2D = ids.project_centroids(data, centroids)
    # Plot clusters/classes and centroids
    for n, group in enumerate(clusters):
        # Plot clusters/classes
        if labels != None:
            label = "Class %d" % (labels[n])
        else:
            label = "Cluster %d" % (n+1)
        plt.scatter(group[:,0], group[:,1], label = label, ec = "black", zorder=3)
        if centroids != None:
            # Plot centroids
            label = None
            if n == len(clusters) - 1:
                label = "Centroids"    
            plt.scatter(centroids_2D[n][0], centroids_2D[n][1], marker = "X", color = "yellow", 
                                                                ec = "black", s = 120, 
                                                                zorder=4, label = label)                               
    # Add plot details
    ids.plot_details(title = title,
                     xlabel = "PC1",
                     ylabel = "PC2",
                     ax_equal = True,
                     bg = True,
                     legend = True,
                     leg_loc = leg_loc,
                     save = save,
                     filename = filename)
## Clustering
print("Clustering, no prior MDS:")
# Obtain centroids and clusters indexes
centroids = ids.kmeans_fit(xdigit, 3)                                          
clusters_indexes = ids.kmeans_clustering(xdigit, centroids, indexes_only=True)  
                                                                               
# Plot 3 cluster centers as images
for n, centroid in enumerate(centroids):
    # Plot the image (rapresented by 28 x 28 pixels)
    centroid = centroid.reshape((28, 28))
    plt.imshow(centroid, cmap = 'viridis')  
    title = "Cluster %d image (no prior MDS)" % (n+1)
    ids.plot_details(title = title,
                     xlabel = "X-axis (pixels)",
                     ylabel = "Y-axis (pixels)",
                     save = False,
                     filename = title)
    
# Count the proportion of 1s, 7s and 9s in each cluster
def get_labels_proportion(clusters_indexes, true_y):
    labels = 1,7,9
    for n, cl_indexes in enumerate(clusters_indexes):
        proportion1 = sum(true_y[cl_indexes] == labels[0]) / len(cl_indexes)
        proportion2 = sum(true_y[cl_indexes] == labels[1]) / len(cl_indexes)
        proportion3 = sum(true_y[cl_indexes] == labels[2]) / len(cl_indexes)
        print("\t> Cluster %d:" % (n+1))
        print("\t\t  Proportion of 1 = %.2f%%" % (proportion1 * 100))
        print("\t\t  Proportion of 7 = %.2f%%" % (proportion2 * 100))
        print("\t\t  Proportion of 9 = %.2f%%" % (proportion3 * 100))