def run_classifier(files: Tuple[str, str,
                                str], method: str, method_class: Base,
                   model_file: str, lower_case: bool) -> None:
    "Inherit classes from classifiers.py and apply the predict/accuracy methods"
    train, dev, test = files  # Unpack train, dev and test filenames
    result = method_class.predict(train, test, lower_case)
    method_class.accuracy(result)
    # Plot confusion matrix
    make_dirs("Plots")
    fig, ax = plot_confusion_matrix(result['truth'],
                                    result['pred'],
                                    normalize=True)
    ax.set_title("Normalized Confusion Matrix: {}".format(method.title()))
    fig.tight_layout()
    fig.savefig("Plots/{}.png".format(method))
Exemplo n.º 2
0
                                if not os.path.exists(directory):
                                    os.makedirs(directory)
                                    os.makedirs(directory + "/normalized")
                                    os.makedirs(directory + "/unnormalized")
                                model.save(directory + "/" + file_name +
                                           ".model")
                                i += 1

                                # print(model.evaluate(x_test, y_test, batch_size=batch_size))
                                (cnf_mat, acc) = gue.manual_verification_100(
                                    model, (x_test, y_test), batch_size=b_size)

                                plt.figure(figsize=(10, 10), dpi=100)
                                plotter.plot_confusion_matrix(
                                    cnf_mat,
                                    classes=range(16),
                                    normalize=True,
                                    title='Normalized confusion matrix')

                                plt.savefig(directory + "/normalized/" +
                                            file_name + "_normalized_" +
                                            str(acc) + ".png")
                                plt.figure(figsize=(10, 10), dpi=100)
                                plotter.plot_confusion_matrix(
                                    cnf_mat.astype(int),
                                    classes=range(16),
                                    normalize=False,
                                    title='Non-Normalized confusion matrix')

                                plt.savefig(directory + "/unnormalized/" +
                                            file_name + "_" + str(acc) +
def run(args):
    """Executes the main process of the script

    Parameters
    ----------
    args : ArgumentParser
        The arguments of the command typed by the user
    """
    global CONFIG
    CONFIG = exh.load_json("config/{0}.json".format(args.CONFIG))

    print("Loading publications")
    # Load DIDA publications
    dida_data = exh.load_json(FILENAME_TEMPLATE.format(CONFIG['DIDA_DOCS']))
    # Load Not-DIDA publications
    notdida_data = exh.load_json(
        FILENAME_TEMPLATE.format(CONFIG['NOTDIDA_DOCS']))

    # docs = [deepcopy(notdida_data), deepcopy(dida_data)]
    docs = [deepcopy(dida_data), deepcopy(notdida_data)]
    display.display_ok("Loading publications done")

    data_directory = DIRECTORY + '/' + CONFIG['ALL_CLUSTERS_DIRECTORY']
    Ndw = exh.load_json(data_directory + "/ndw.json")
    W = exh.load_json(data_directory + "/W.json")

    # Real labels of each publication
    # y_true = np.append(np.zeros(len(notdida_data)), np.ones(len(dida_data)))
    y_true = np.append(np.ones(len(dida_data)), np.zeros(len(notdida_data)))
    strict_result, doublon_result = classification(docs, Ndw, W,
                                                   data_directory, y_true)

    plt.plot_confusion_matrix(strict_result,
                              len(dida_data),
                              len(notdida_data),
                              "strict_",
                              "n_clusters",
                              "Number of clusters",
                              DIRECTORY,
                              step=1000)
    exh.save_to_log(strict_result, "strict", "n_clusters",
                    LOG_FILENAME.format("strict"))
    plt.plot_confusion_matrix(doublon_result,
                              len(dida_data),
                              len(notdida_data),
                              "doublon_",
                              "n_clusters",
                              "Number of clusters",
                              DIRECTORY,
                              step=1000)
    exh.save_to_log(doublon_result, "doublon", "n_clusters",
                    LOG_FILENAME.format("doublon"))

    scores = [strict_result['score'], doublon_result['score']]
    classifiers_names = ["Strict converter", "Doublon converter"]

    plt.plot_lines(strict_result['n_clusters'],
                   scores,
                   classifiers_names,
                   FSCORE_FILENAME,
                   "Number of clusters",
                   "F1-score",
                   step=1000)
Exemplo n.º 4
0
def run(args):
    """Executes the main process of the script

    Parameters
    ----------
    args : ArgumentParser
        The arguments of the command typed by the user
    """
    global CONFIG
    CONFIG = exh.load_json("config/{0}.json".format(args.CONFIG))

    exh.create_directory(DIRECTORY)

    print("Loading publications")
    # Load DIDA publications
    dida_data = exh.load_json(FILENAME_TEMPLATE.format(CONFIG['DIDA_DOCS']))
    # Load Not-DIDA publications
    notdida_data = exh.load_json(
        FILENAME_TEMPLATE.format(CONFIG['NOTDIDA_DOCS']))
    display.display_ok("Loading publications done")

    n = CONFIG['NGRAMS']

    csv_files = csv_filenames(n)

    # Real labels of each publication
    y_true = np.append(np.ones(len(dida_data)), np.zeros(len(notdida_data)))

    data = deepcopy(dida_data)
    data.extend(deepcopy(notdida_data))

    scores = []
    classifiers_names = []

    print("Strict Classifier training")
    results = train(StrictClassifier, deepcopy(data), csv_files, y_true)
    plt.plot_confusion_matrix(results, len(dida_data), len(notdida_data),
                              'strict_', "threshold", "Threshold", DIRECTORY)
    scores.append(results['score'])
    exh.save_to_log(results, "strict", "threshold",
                    LOG_FILENAME.format("strict"))
    classifiers_names.append("Strict Classifier")
    display.display_ok("Strict Classifier training done")

    print("Split Weighted Classifier training")
    results = train(SplitWeightedClassifier, deepcopy(data), csv_files, y_true)
    plt.plot_confusion_matrix(results, len(dida_data), len(notdida_data),
                              'splitweighted_', "threshold", "Threshold",
                              DIRECTORY)
    scores.append(results['score'])
    exh.save_to_log(results, "splitweighted", "threshold",
                    LOG_FILENAME.format("splitweighted"))
    classifiers_names.append("Split Weighted Classifier")
    display.display_ok("Split Weighted Classifier training done")

    print("Weighted Classifier training")
    results = train(WeightedClassifier, deepcopy(data), csv_files, y_true)
    plt.plot_confusion_matrix(results, len(dida_data), len(notdida_data),
                              'weighted_', "threshold", "Threshold", DIRECTORY)
    scores.append(results['score'])
    exh.save_to_log(results, "weighted", "threshold",
                    LOG_FILENAME.format("weighted"))
    classifiers_names.append("Weighted Classifier")
    display.display_ok("Weighted Classifier training done")

    plt.plot_lines(results['threshold'], scores, classifiers_names,
                   FSCORE_FILENAME, "Threshold", "F1-score")
    display.display_info("Results saved in {0}".format(DIRECTORY))