Exemplo n.º 1
0
def plot_regularization_path(alphas,
                             coefs,
                             features_labels,
                             n_features_labels=None,
                             legend_size='medium'):
    """

    :param alphas:
    :param coefs:
    :param features_labels:
    :param n_features_labels:
    :param legend_size:
    :return:
    """
    plt.figure(figsize=(12, 6))
    ax = plt.gca()
    ax.plot(alphas, coefs)
    ax.set_xscale('log')
    plt.xlabel('Alpha')
    plt.ylabel('Coefficients')
    plt.legend(features_labels[:n_features_labels]
               if n_features_labels is not None else features_labels,
               loc='upper right',
               fontsize=legend_size)
    plt.show()
Exemplo n.º 2
0
 def plot_dendrogram(self, plot_size=(10, 25), title_pad=20, xlabel_pad=20, orient='left', leaf_text_size=16,
                     save_as_img=False, filename='cah', file_type='jpg'):
     plt.figure(figsize=plot_size)
     plt.title('Hierarchical Clustering Dendrogram', pad=title_pad)
     plt.xlabel('distance', labelpad=xlabel_pad)
     self.dendrogram_data = dendrogram(self.Z,
                                       labels=self.categories,
                                       orientation=orient,
                                       leaf_font_size=leaf_text_size)
     if save_as_img:
         plt.tight_layout()
         plt.savefig(f'{filename}.{file_type}')
     plt.show()
Exemplo n.º 3
0
def plot_confusion_matrix(cm, class_names):
    """
    Returns a matplotlib figure containing the plotted confusion matrix.

    Args:
        cm (array, shape = [n, n]): a confusion matrix of integer classes
        class_names (array, shape = [n]): String names of the integer classes
    """
    size = len(class_names)
    figure = plt.figure(figsize=(size, size))
    plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
    plt.title("Confusion matrix")
    plt.colorbar()
    tick_marks = np.arange(len(class_names))
    plt.xticks(tick_marks, class_names, rotation=45)
    plt.yticks(tick_marks, class_names)

    # Compute the labels from the normalized confusion matrix.
    labels = np.around(cm.astype('float') / cm.sum(axis=1)[:, np.newaxis],
                       decimals=2)

    # Use white text if squares are dark; otherwise black.
    threshold = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        color = "white" if cm[i, j] > threshold else "black"
        plt.text(j, i, labels[i, j], horizontalalignment="center", color=color)

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    return figure
Exemplo n.º 4
0
def correlation_matrix(df,
                       as_chart=True,
                       precision=2,
                       title=None,
                       rotate=90,
                       save_as_img=False,
                       size=(16, 12)):
    """
    """
    corr = df.corr()
    if as_chart:
        colormap = plt.cm.RdBu
        plt.figure(figsize=size)
        if title is None:
            title = 'Pearson Correlation of Features'
        plt.title(title, y=1.05, size=15, pad=20)
        mask = np.triu(np.ones_like(corr, dtype=np.bool))
        ax = sns.heatmap(corr,
                         linewidths=0.5,
                         vmax=1.0,
                         square=True,
                         cmap=colormap,
                         linecolor='white',
                         annot=True,
                         mask=mask,
                         cbar_kws={"shrink": .5},
                         fmt='.{}f'.format(precision))

        ax.set_xlim(0, df.shape[1] - 1)
        ax.set_ylim(df.shape[1], 1)
        plt.xticks(rotation=rotate)
        if save_as_img:
            plt.tight_layout()
            plt.savefig('corr_matrix.jpg')
        plt.show()
    else:
        return corr.style.background_gradient(
            cmap='coolwarm').set_precision(precision)
Exemplo n.º 5
0
    def plot_3d_factorial_plan(self,
                               x_comp=1,
                               y_comp=2,
                               z_comp=3,
                               cat_colors=None):
        """

        """
        # Store results of PCA in a data frame
        # result = pd.DataFrame(self.X_projected, columns=['PCA{}'.format(i+1) for i in range(self.n_comp)])
        cat_colors = self.cat_colors if cat_colors is None else cat_colors
        result = self.components_table
        my_dpi = 96
        fig = plt.figure(figsize=(480 / my_dpi, 480 / my_dpi),
                         dpi=my_dpi)  # fig = plt.figure()
        ax = fig.add_subplot(111, projection='3d')
        # ax.set_aspect('equal')
        axes_3d_comp = [x_comp, y_comp, z_comp]
        x_comp_label, y_comp_label, z_comp_label = [
            'F{}'.format(n) for n in axes_3d_comp
        ]  # old "PCA"
        # Components axes limits
        xmin, xmax = (min(result[x_comp_label]), max(result[x_comp_label]))
        ymin, ymax = (min(result[y_comp_label]), max(result[y_comp_label]))
        zmin, zmax = (min(result[z_comp_label]), max(result[z_comp_label]))
        ax.set_xlim(xmin, xmax)
        ax.set_ylim(ymin, ymax)
        ax.set_zlim(zmin, zmax)
        # Components axes coordinates
        xaxis = [(xmin, xmax), (0, 0), (0, 0)]
        yaxis = [(0, 0), (ymin, ymax), (0, 0)]
        zaxis = [(0, 0), (0, 0), (zmin, zmax)]
        # Plot components axes
        for a in [xaxis, yaxis, zaxis]:
            ax.plot(a[0], a[1], a[2], 'b')
        # label the axes
        ax.set_xlabel("PC1")
        ax.set_ylabel("PC2")
        ax.set_zlabel("PC3")
        fig.tight_layout()
        # Plot
        ax.scatter(result[x_comp_label],
                   result[y_comp_label],
                   result[z_comp_label],
                   c=cat_colors,
                   cmap="Set2_r",
                   s=60)
        plt.title("3D PCA")
        plt.show()
Exemplo n.º 6
0
 def plot(self, plot_size=(16, 10), cluster_labels=None, c=10):
     if cluster_labels is not None:
         cluster_title = cluster_labels.name.capitalize()
         self.df_embedded[cluster_title] = cluster_labels
         n_colors = len(self.df_embedded[cluster_title].unique())
         centroids_series = [
             self.df_embedded[self.df_embedded[cluster_title] == n +
                              1].mean() for n in range(n_colors)
         ]
         centroids_df = pd.concat(centroids_series, axis=1).T
     # 2D plot
     if self.n_comp == 2:
         plt.figure(figsize=plot_size)
         # Plot clusters
         ax1 = sns.scatterplot(
             x="1d",
             y="2d",
             hue=cluster_title if cluster_labels is not None else None,
             palette=sns.color_palette(
                 "hls", n_colors if cluster_labels is not None else c),
             data=self.df_embedded,
             legend="full",
             alpha=0.3)
         if cluster_labels is not None:
             # Plot centroids
             for i in range(n_colors):
                 ax1.scatter(centroids_df.iloc[i, 0],
                             centroids_df.iloc[i, 1],
                             c='b',
                             s=50,
                             ec='black'
                             #label='centroid'
                             )
     # 3D plot
     elif self.n_comp == 3:
         #if cluster_labels[cluster_title.lower()].dtypes.name in ['category', 'object']:
         self.df_embedded[cluster_title] = [
             i + 1 for i in range(len(cluster_labels))
         ]
         fig = plt.figure(figsize=plot_size)
         ax = Axes3D(fig)
         ax.scatter(self.df_embedded.iloc[:, 0],
                    self.df_embedded.iloc[:, 1],
                    self.df_embedded.iloc[:, 2],
                    c=self.df_embedded[cluster_title].values
                    if cluster_labels is not None else 'b',
                    cmap='viridis' if cluster_labels is not None else None,
                    marker='o')
         #ax.legend()
         if cluster_labels is not None:
             # Plot centroids
             for i in range(n_colors):
                 ax.scatter(
                     centroids_df.iloc[i, 0],
                     centroids_df.iloc[i, 1],
                     centroids_df.iloc[i, 2],
                     c='r',
                     s=50,
                     #label='centroid'
                 )
                 #ax.legend()
     else:
         raise Exception()
Exemplo n.º 7
0
 def plot_factorial_planes(self,
                           n_plan=None,
                           X_projected=None,
                           labels=None,
                           alpha=1,
                           illustrative_var=None,
                           illustrative_var_title=None,
                           save_as_img=False,
                           plot_size=(10, 8)):
     """
     :param: axis_nb: the total number of axes to display (default is kaiser criterion divided by 2)
     """
     X_projected = self.X_projected if X_projected is None else X_projected
     factorial_plan_nb = self.default_factorial_plan_nb if n_plan is None else n_plan
     axis_ranks = [(x, x + 1) for x in range(0, factorial_plan_nb, 2)]
     for d1, d2 in axis_ranks:
         if d2 < self.n_comp:
             fig = plt.figure(figsize=plot_size)
             # Display data points
             if illustrative_var is None:
                 plt.scatter(X_projected[:, d1],
                             X_projected[:, d2],
                             alpha=alpha)
             else:
                 illustrative_var = np.array(illustrative_var)
                 for value in np.unique(illustrative_var):
                     selected = np.where(illustrative_var == value)
                     plt.scatter(X_projected[selected, d1],
                                 X_projected[selected, d2],
                                 alpha=alpha,
                                 label=value)
                 plt.legend(title=illustrative_var_title
                            if illustrative_var_title is not None else None)
             # Display data points labels
             if labels is not None:
                 for i, (x, y) in enumerate(X_projected[:, [d1, d2]]):
                     plt.text(x,
                              y,
                              labels[i],
                              fontsize='12',
                              ha='center',
                              va='bottom')
                     # Fix factorial plan limits
             boundary = np.max(np.abs(X_projected[:, [d1, d2]])) * 1.1
             plt.xlim([-boundary, boundary])
             plt.ylim([-boundary, boundary])
             # Display horizontal & vertical lines
             plt.plot([-100, 100], [0, 0], color='grey', ls='--')
             plt.plot([0, 0], [-100, 100], color='grey', ls='--')
             # Axes labels with % explained variance
             plt.xlabel('F{} ({}%)'.format(d1 + 1,
                                           round(100 * self.evr[d1], 1)),
                        labelpad=20)
             plt.ylabel('F{} ({}%)'.format(d2 + 1,
                                           round(100 * self.evr[d2], 1)),
                        labelpad=20)
             plt.title("Projection des individus (sur F{} et F{})".format(
                 d1 + 1, d2 + 1),
                       pad=20)
             if save_as_img:
                 plt.tight_layout()
                 plt.savefig(
                     'factorial_plan_{}.jpg'.format(1 if d1 == 0 else d1))
             plt.show(block=False)