示例#1
0
    def pngTable(self,
                 df: DataFrame,
                 colwidth_factor: float = 0.20,
                 fontsize: int = 12,
                 formatFloats: bool = True,
                 save: bool = False,
                 in_folder: PosixPath = None):
        '''
        Displays or saves a table as png.
        Uses matplotlib => pandas plotting table.
        
        parameters:
            df: dataframe or pivot table
            colwidth_factor: float, default 0.20, defines the width of columns
            fontsize: int, default 12
            formatFloats: bool, default True, formats as two digit prettiy floats
            save: saves the png file as table.png
            in_folder: posixpath, default None, folder to save the png file
        
        returns:
            png file in Downloads folder
        '''
        if not isinstance(in_folder, PosixPath) or not in_folder.exists():
            in_folder = core.folder.Downloads

        # file name and path
        path = in_folder.joinpath(f"table-{core.now_prefix()}.png")

        # format floats - two digits
        if formatFloats:
            df.applymap(lambda x: '{:,.2f}'.format(x)
                        if isinstance(x, float) else x)

        # get pandas.plotting.table
        table = pd.plotting.table

        fig, ax = plt.subplots(figsize=(1.9 * df.shape[1],
                                        0.3 * df.shape[0]))  # set size frame
        ax.xaxis.set_visible(False)  # hide the x axis
        ax.yaxis.set_visible(False)  # hide the y axis
        ax.set_frame_on(False)  # no visible frame, uncomment if size is ok
        tabla = table(ax,
                      df,
                      loc='upper left',
                      colWidths=[colwidth_factor] *
                      len(df.columns))  # where df is your data frame
        tabla.auto_set_font_size(False)  # Activate set fontsize manually
        tabla.set_fontsize(fontsize)  # if ++fontsize is necessary ++colWidths
        tabla.scale(1.2, 1.2)  # change size table
        if save:
            plt.savefig(fname=path, bbox_inches="tight", pad_inches=1)  # save
            # https://stackoverflow.com/questions/56328353/matplotlib-savefig-cuts-off-pyplot-table
            plt.close()
            print(f"saved in Downloads folder as {path.stem}.png")
        else:
            plt.show()  # show the result
            plt.close()
示例#2
0
def generate_and_save_report(
    y_pred: list,
    y_true: list,
    corpus_name: str,
    algorithm_name: str,
    fold: int,
) -> None:
    report = DataFrame(
        classification_report(
            y_pred,
            y_true,
            zero_division=0,
            output_dict=True,
        ))

    # Save laporan ke Excel
    report.to_excel("./reports/{}-{}-fold-{}.xlsx".format(
        corpus_name, algorithm_name, fold))

    # Buat plot Confusion Matrix untuk masing-masing kelas pengelompokan
    confusion_matrix = ConfusionMatrix(
        actual_vector=y_true,
        predict_vector=y_pred,
    )

    confusion_matrix_positions = DataFrame(confusion_matrix.position())
    confusion_matrix_positions = confusion_matrix_positions.applymap(
        lambda positions: len(positions))

    pos_classes = confusion_matrix.classes
    pos_classes.sort()

    report_text = ""
    for pos_class in pos_classes:
        tp = confusion_matrix_positions[pos_class]["TP"]
        tn = confusion_matrix_positions[pos_class]["TN"]
        fp = confusion_matrix_positions[pos_class]["FP"]
        fn = confusion_matrix_positions[pos_class]["FN"]
        accuracy = tp + tn / (tp + tn + fp + fn)

        report_text += get_pos_class_report_text(algorithm_name, report,
                                                 pos_class, tp, tn, fp, fn,
                                                 accuracy)

    # Simpan teks laporan
    with open(
            "./reports/{}-{}-fold-{}.txt".format(corpus_name, algorithm_name,
                                                 fold_counter),
            "w") as report_filehandle:
        report_filehandle.write(report_text)

    confusion_matrix_df = DataFrame(confusion_matrix.to_array())

    row_sums = confusion_matrix_df.sum().to_list()

    y_labels = pos_classes
    x_labels = [
        "{} ({})".format(label, row_sums[index])
        for index, label in enumerate(pos_classes)
    ]

    temp_df = confusion_matrix_df.T

    shape = temp_df.shape
    annotations = [[
        f'''{pos_classes[col]}\n{pos_classes[row]}\n{temp_df[row][col]}'''
        for col in range(0, shape[0])
    ] for row in range(0, shape[1])]

    confusion_matrix_heatmap = sns.heatmap(
        confusion_matrix_df,
        annot=annotations,
        xticklabels=x_labels,
        yticklabels=y_labels,
        cmap="Greens",
        linewidths=0.1,
        linecolor="black",
        fmt='',
        cbar=False,
    )

    fig = confusion_matrix_heatmap.get_figure()
    fig.savefig("./reports/{}-{}-fold-{}.svg".format(
        corpus_name,
        algorithm_name,
        fold,
    ),
                bbox_inches='tight')
    plt.clf()
    pass