예제 #1
0
def plot(method):
    x = list(range(1, 1 + num_epochs))
    colors = config.COLORS
    line_style = '-'

    global fig_number
    fig = plt.figure(fig_number, (10, 6))
    fig_number += 1
    fig_title = method + " " + config.EMBEDDINGS_KEY + " " + metric_pretty
    fig.canvas.set_window_title(fig_title)

    ax = plt.subplot(111)

    plot_lines = []

    embeddings = sorted_by_suffix(data[method].keys())
    for i, embedding in enumerate(embeddings):
        # Each dataset gets a color
        color = colors[i % len(colors)]
        epoch_results = data[method][embedding][
            name_of_classifier][:num_epochs]
        f1_scores = list(map(lambda e: e[metric], epoch_results))
        line = ax.plot(x, f1_scores, line_style, color=color)
        plot_lines.append(line)

    # plt.axis([0, len(embeddings_files), 0, 1])
    plt.xticks(x)
    plt.xlabel('Epoch')
    plt.ylabel('Score')

    # Classifier legend (colors)
    color_patches = []
    for i, embedding in enumerate(embeddings):
        color = colors[i % len(colors)]
        patch = mpatches.Patch(color=color, label=embedding)
        color_patches.append(patch)

    # Show only every second epoch label
    for label in ax.xaxis.get_ticklabels()[::2]:
        label.set_visible(False)

    # Shrink current axis by 20%
    box = ax.get_position()
    ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])

    ax.legend(handles=color_patches,
              loc='upper right',
              bbox_to_anchor=(1.3, 1.0))
    save_path = path.join(config.RESULT_DIR, method,
                          fig_title.replace(' ', '_'))
    print("Saving to", save_path)
    plt.savefig(save_path)
    plt.show(block=fig_number == number_of_figures + 1)
예제 #2
0
 def load_results(self, method, embedding, classifier, results_dir):
     """Loads results for all epochs of a method + embedding + classifier combination"""
     for epoch_dir in sorted_by_suffix([d for d in listdir(results_dir)
                                        if path.isdir(path.join(results_dir, d))]):
         file_path = path.join(results_dir, epoch_dir, classifier.lower())
         with open(file_path) as f:
             epoch_results = {}
             for line in f:
                 metric = line.split()[0]
                 value = float(line.split()[1])
                 epoch_results[metric] = value
             self.add_epoch_results(method, embedding, classifier, epoch_results)
예제 #3
0
def print_table(method):
    """
    Creates a table corresponding to combined_plot for each 'method' (binary, ternary, agg_ternary)
    PS! Works only for one classifier!
    :param method: Method   
    """
    print("% " + method + ": " + ", ".join(data[method].keys()))

    num_columns = len(data[method]) + 2
    column_setup = "{" + col_align + " " + col_align + "*{" + str(
        num_columns - 2) + "}{|" + col_align + "}}"

    prefix = "\\begin{table}[H]\n\t\\centering\n\t\\begin{tabular}" + column_setup + "\n"

    # Create header
    header = "\\multicolumn{" + str(
        num_columns) + "}{" + col_align + "}{" + bold(
            config.EMBEDDINGS_KEY) + "} \\\\\n"
    embeddings = sorted_by_suffix(data[method].keys())
    header_values = "& & " + " & ".join(
        list(map(lambda e: e.split("-")[-1], embeddings))) + " \\\\\n"

    hline = "\\hhline{~*{" + str(num_columns - 1) + "}{|-}}\n"

    epochs_header = "\\parbox[t]{2mm}{\\multirow{" + str(
        num_epochs) + "}{*}{\\rotatebox[origin=c]{90}{\\textbf{Epochs}}}}\n"

    table = prefix + header + header_values + hline + epochs_header

    max_value = -1

    for epoch in range(num_epochs):
        s = "& " + str(epoch + 1) + " & "
        for embedding in data[method]:
            value = round(
                data[method][embedding][name_of_classifier][epoch][metric],
                specificity)
            if value > max_value:
                max_value = value
            s += str(value) + " & "
        table += s[:-3] + " \\\\\n"

    caption = metric_pretty + " scores for " + method + " method on " + config.EMBEDDINGS_KEY + " embeddings."
    caption = caption.replace("_", "\\_")
    postfix = "\t\\end{tabular}\n\t\\caption{" + caption + "}\n\\end{table}\n"
    table += postfix

    table = table.replace(str(max_value), bold(str(max_value)))
    print(table)
예제 #4
0
    def __init__(self, methods: List[str], embeddings: List[str], classifiers: List[str], num_epochs: int):
        """
        Creates a ResultsData instance that holds results for the given methods, embeddings and classifiers with 
        a certain number of epochs trained.
        
        :param methods:     List of methods to collect results from ("binary", "ternary", "agg_ternary", ...)
        :param embeddings:  List of embeddings/params to collect results from ("TextBlob", "AFINN", "windowsize=1", ...)
        :param classifiers: Names of classifiers to collect results from ("SVM c=1", "lexicon classifier", ...)
        :param num_epochs:  The minimum number of epochs a method+embedding combination must have been trained for.
        """
        # { method: { embedding: { classifier_name: [ { metric: value } ] } } }
        self.data = {}

        self.methods = sorted(methods)
        self.embeddings = sorted_by_suffix(embeddings)
        self.classifiers = sorted(classifiers)
        self.num_epochs = num_epochs

        # Load results!
        for method in methods:

            embs = embeddings
            if embs == "all":
                method_path = path.join(config.RESULT_DIR, method)
                embs = [d for d in sorted_by_suffix(os.listdir(method_path)) if
                        os.path.isdir(os.path.join(method_path, d))]

            for embedding in embs:
                selected_embeddings = path.join(method, embedding)
                results_dir = path.join(config.RESULT_DIR, selected_embeddings)

                if not self.filter_tweet(method, embedding, results_dir):
                    continue

                for classifier in sorted(classifiers):
                    self.load_results(method, embedding, classifier, results_dir)
예제 #5
0
def test_all_epochs(embeddings_dir, results_dir):
    # Sort by suffix number of files. Turn to int so that '7' is treated as less that '18', for instance.
    embeddings_files = sorted_by_suffix(listdir(embeddings_dir))

    for embeddings_file in embeddings_files:
        logger.info(embeddings_file)

        # Configure test_and_train
        train_and_test.classifiers = classifiers()
        train_and_test.baselines = []
        train_and_test.embedding_file = path.join(embeddings_dir,
                                                  embeddings_file)
        train_and_test.verbose = -2
        train_and_test.quiet = True
        train_and_test.results_dir = path.join(results_dir, embeddings_file)

        # Run
        train_and_test.main()
예제 #6
0
def get_dir_names(prefix):
    return sorted_by_suffix([
        d for d in listdir(RESULTS_DIR)
        if path.isdir(path.join(RESULTS_DIR, d)) and d.startswith(prefix)
    ])