def plot_curves(plot_file_name, curve_names, mrs, homogeneity_scores, completeness_scores, number_of_embeddings):
    """
    Plots all specified curves and saves the plot into a file.
    :param plot_file_name: String value of save file name
    :param curve_names: Set of names used in legend to describe this curve
    :param mrs: 2D Matrix, each row describes one dataset of misclassification rates for a curve
    :param homogeneity_scores: 2D Matrix, each row describes one dataset of homogeneity scores for a curve
    :param completeness_scores: 2D Matrix, each row describes one dataset of completeness scores for a curve
    :param number_of_embeddings: set of integers, each integer describes how many embeddings is in this curve
    """
    logger = get_logger('analysis', logging.INFO)
    logger.info('Plot results')
    logger.info(plot_file_name)
    min_mrs = []
    for mr in mrs:
        min_mrs.append(np.min(mr))

    min_mrs, curve_names, mrs, homogeneity_scores, completeness_scores, number_of_embeddings = \
        (list(t) for t in
         zip(*sorted(zip(min_mrs, curve_names, mrs, homogeneity_scores, completeness_scores, number_of_embeddings))))

    # How many lines to plot
    number_of_lines = len(curve_names)

    # Get various colors needed to plot
    color_map = plt.get_cmap('gist_rainbow')
    colors = [color_map(i) for i in np.linspace(0, 1, number_of_lines)]

    # Define number of figures
    fig1 = plt.figure(1)
    fig1.set_size_inches(16, 8)

    # Define Plots
    mr_plot = plt.subplot2grid((2, 3), (0, 0), colspan=2)
    mr_plot.set_ylabel('MR')
    mr_plot.set_xlabel('number of clusters')
    plt.ylim([-0.02, 1.02])

    completeness_scores_plot = add_cluster_subplot(fig1, 234, 'completeness_scores')
    homogeneity_scores_plot = add_cluster_subplot(fig1, 235, 'homogeneity_scores')

    # Define curves and their values
    curves = [[mr_plot, mrs],
              [homogeneity_scores_plot, homogeneity_scores],
              [completeness_scores_plot, completeness_scores]]

    # Plot all curves
    for index in range(number_of_lines):
        label = curve_names[index] + '\n min MR: ' + str(min_mrs[index])
        color = colors[index]
        number_of_clusters = np.arange(number_of_embeddings[index], 0, -1)

        for plot, value in curves:
            plot.plot(number_of_clusters, value[index], color=color, label=label)

    # Add legend and save the plot
    fig1.legend()
    # fig1.show()
    fig1.savefig(get_result_png(plot_file_name))
    fig1.savefig(get_result_png(plot_file_name + '.svg'), format='svg')
def plot_curves(plot_file_name, curve_names, mrs, homogeneity_scores, completeness_scores, number_of_embeddings):
    """
    Plots all specified curves and saves the plot into a file.
    """
    logger = get_logger('analysis', logging.INFO)
    logger.info('Plot results')

    # How many lines to plot
    number_of_lines = len(curve_names)

    # Get various colors needed to plot
    color_map = plt.get_cmap('gist_rainbow')
    colors = [color_map(i) for i in np.linspace(0, 1, number_of_lines)]

    # Define number of figures
    fig1 = plt.figure(1)
    fig1.set_size_inches(32, 24)

    # Define Plots
    mr_plot = plt.subplot2grid((2, 2), (0, 0), colspan=2)
    mr_plot.set_title('MR')
    mr_plot.set_xlabel('number of clusters')
    mr_plot.axis([0, 80, 0, 1])

    completeness_scores_plot = add_cluster_subplot(fig1, 223, 'completeness_scores')
    homogeneity_scores_plot = add_cluster_subplot(fig1, 224, 'homogeneity_scores')

    # Define curves and their values
    curves = [[mr_plot, mrs],
              [homogeneity_scores_plot, homogeneity_scores],
              [completeness_scores_plot, completeness_scores]]

    # Plot all curves
    for index in range(number_of_lines):
        label = curve_names[index]
        color = colors[index]
        number_of_clusters = np.arange(number_of_embeddings[index], 1, -1)

        for plot, value in curves:
            plot.plot(number_of_clusters, value[index], color=color, label=label)

        min_mr = np.min(mrs[index])
        mr_plot.annotate(str(min_mr), xy=(0, min_mr))

    # Add legend and save the plot
    fig1.legend()
    # fig1.show()
    fig1.savefig(get_result_png(plot_file_name))
Exemplo n.º 3
0
def plot_curves(plot_file_name, loaded_dict, loaded_dict2, loaded_dict3,
                loaded_dict4):
    """
    Plots all specified curves and saves the plot into a file.
    :param plot_file_name: String value of save file name
    :param curve_names: Set of names used in legend to describe this curve
    :param mrs: 2D Matrix, each row describes one dataset of misclassification rates for a curve
    :param homogeneity_scores: 2D Matrix, each row describes one dataset of homogeneity scores for a curve
    :param completeness_scores: 2D Matrix, each row describes one dataset of completeness scores for a curve
    :param number_of_embeddings: set of integers, each integer describes how many embeddings is in this curve
    """
    logger = get_logger('analysis', logging.INFO)
    logger.info('Plot results')

    num_clusters = [j for j in range(1, 41)]
    print(num_clusters)
    tot_clus = []
    hierach_MR = []
    kmeans_mr = []
    ds_mr = []

    list_64_0 = []
    list_64_1 = []
    list_64_2 = []
    list_64_3 = []
    list_64_4 = []
    list_64_5 = []
    list_64_6 = []
    list_64_7 = []

    list_128_0 = []
    list_128_1 = []
    list_128_2 = []
    list_128_3 = []

    list_256_0 = []
    list_256_1 = []

    list_512 = []

    for x in loaded_dict:
        print(x)

    for x in loaded_dict:
        tot_clus.append(x)

        if "64_0" in x:
            list_64_0.append(loaded_dict[x])
        elif "64_1" in x:
            list_64_1.append(loaded_dict[x])
        elif "64_2" in x:
            list_64_2.append(loaded_dict[x])
        elif "64_3" in x:
            list_64_3.append(loaded_dict[x])
        elif "64_4" in x:
            list_64_4.append(loaded_dict[x])
        elif "64_5" in x:
            list_64_5.append(loaded_dict[x])
        elif "64_6" in x:
            list_64_6.append(loaded_dict[x])
        elif "64_7" in x:
            list_64_7.append(loaded_dict[x])

        elif "128_0" in x:
            list_128_0.append(loaded_dict[x])
        elif "128_1" in x:
            list_128_1.append(loaded_dict[x])
        elif "128_2" in x:
            list_128_2.append(loaded_dict[x])
        elif "128_3" in x:
            list_128_3.append(loaded_dict[x])

        elif "256_0" in x:
            list_256_0.append(loaded_dict[x])
        elif "256_1" in x:
            list_256_1.append(loaded_dict[x])

        elif "512" in x:
            list_512.append(loaded_dict[x])

    # print("\n")
    # print("Hierachial MR")
    # hierach_MR.sort()
    # print(hierach_MR)

    # list_128 = [s for s in num_clusters if "128" in s]
    # list_512 = [s for s in num_clusters if "512" in s]

    list_64_0.sort()
    list_64_1.sort()
    list_64_2.sort()
    list_64_3.sort()
    list_64_4.sort()
    list_64_5.sort()
    list_64_6.sort()
    list_64_7.sort()

    list_128_0.sort()
    list_128_1.sort()
    list_128_2.sort()
    list_128_3.sort()
    list_256_0.sort()
    list_256_1.sort()
    list_512.sort()

    print(list_64_0)
    print(list_64_1)
    print(list_64_2)
    print(list_64_3)
    print(list_64_4)
    print(list_64_5)
    print(list_64_6)
    print(list_64_7)
    print("\n")

    print(list_128_0)
    print(list_128_1)
    print(list_128_2)
    print(list_128_3)
    print("\n")

    print(list_256_0)
    print(list_256_1)
    print("\n")

    print(list_512)

    maxc = max(num_clusters)
    minc = min(num_clusters)

    # Get various colors needed to plot
    color_map = plt.get_cmap('gist_rainbow')
    colors = [color_map(i) for i in np.linspace(0, 2, 15)]

    # Define number of figures
    fig1 = plt.figure(1)
    fig1.set_size_inches(16, 8)

    # # Define Plots
    # mr_plot = plt.subplot2grid((1, 1), (0, 0), colspan=1)
    # mr_plot.set_ylabel('MR')
    # mr_plot.set_xlabel('Number of clusters')
    # # plt.grid(True)
    plt.title('Embeddings Plot for hierarchical MR ')
    plt.axis([(int(minc) - 0.1), (int(maxc) + 0.1), -0.02, 0.5])
    plt.xlabel('Number of clusters')
    plt.ylabel("MR")

    # value = [list_64_0, list_64_1, list_64_2, list_64_3, list_64_4, list_64_5, list_64_6, list_64_7, list_128_0, list_128_1, list_128_2, list_128_3, list_256_0, list_256_1, list_512]
    value = [
        list_128_0, list_128_1, list_128_2, list_128_3, list_256_0, list_256_1,
        list_512
    ]

    # curves = [[mr_plot, value]]

    # algorithm = ["64_0", "64_1", "64_2", "64_3", "64_4", "64_5", "64_6", "64_7", "128_0", "128_1", "128_2", "128_3", "256_0", "256_1", "512"]
    algorithm = ["128_0", "128_1", "128_2", "128_3", "256_0", "256_1", "512"]
    # Plot all curves
    for index in range(7):
        # ymax = max(value[index])
        # xpos = value[index].index(ymax)
        # print("xpos " + str(xpos))
        # xmax = num_clusters[xpos]
        #
        # temp = 'Max Value : ' + str((xmax, ymax))
        #
        # # plt.annotate(temp  , xy=(xmax, ymax), xytext=(xmax-50, ymax + 0.05),
        # #              arrowprops=dict(facecolor=colors[index], shrink=0.03 ),)
        print("plotting " + algorithm[index])
        label = algorithm[index]
        color = colors[index]
        plt.plot(num_clusters, value[index], color=color, label=label)

        # for plot, value in curves:
        #     # plot.plot(ks,value[index], color=color, label=label)

    # Add legend and save the plot
    fig1.legend()
    fig1.show()
    fig1.savefig(get_result_png(plot_file_name))
    print("Plot File saved in " + get_result_png(plot_file_name))
    fig1.savefig(get_result_png(plot_file_name + '.svg'), format='svg')
Exemplo n.º 4
0
def plot_curves(plot_file_name, curve_names, mrs, homogeneity_scores, completeness_scores, number_of_embeddings,loaded_dict,loaded_dict2,loaded_dict3):
    """
    Plots all specified curves and saves the plot into a file.
    :param plot_file_name: String value of save file name
    :param curve_names: Set of names used in legend to describe this curve
    :param mrs: 2D Matrix, each row describes one dataset of misclassification rates for a curve
    :param homogeneity_scores: 2D Matrix, each row describes one dataset of homogeneity scores for a curve
    :param completeness_scores: 2D Matrix, each row describes one dataset of completeness scores for a curve
    :param number_of_embeddings: set of integers, each integer describes how many embeddings is in this curve
    """
    logger = get_logger('analysis', logging.INFO)
    logger.info('Plot results')
    logger.info(plot_file_name)
    min_mrs = []
    for mr in mrs:
        min_mrs.append(np.min(mr))


    num_clusters=[]
    hierach_MR = []
    kmeans_mr = []
    ds_mr = []

    for x in loaded_dict:
        num_clusters.append(x)
        hierach_MR.append(loaded_dict[x])
        # print(str(loaded_dict[x]) + " appended to " + str(x))
    print("\n")
    print("Hierachial MR")
    print(hierach_MR)

    for x in loaded_dict2:
        kmeans_mr.append(loaded_dict2[x])
        # print(str(loaded_dict[x]) + " appended to " + str(x))
    print("\n")
    print("Kmeans MR")
    print(kmeans_mr)

    for x in loaded_dict3:
        ds_mr.append(loaded_dict3[x])
        # print(str(loaded_dict[x]) + " appended to " + str(x))
    print("\n")
    print("DS MR")
    print(ds_mr)


    ks = list(loaded_dict)
    ks = list(map(int, ks))
    print("\n")
    print("Cluster Count")
    print(ks)
    print("Minimum Cluster : " + str(min(ks)))
    print("Maximum CLuster : " + str(max(ks))+ "\n")

    maxc = max(ks)
    minc = min(ks)

    # x = zip(*sorted(zip(min_mrs, curve_names, mrs, homogeneity_scores, completeness_scores, number_of_embeddings)))
    #
    #
    # print("\n")
    # print(tuple(x))
    # print("\n")
    #
    # min_mrs, curve_names, mrs, homogeneity_scores, completeness_scores, number_of_embeddings = \
    #     (list(t) for t in x)


    # How many lines to plot
    number_of_lines = len(curve_names)

    # Get various colors needed to plot
    color_map = plt.get_cmap('gist_rainbow')
    colors = [color_map(i) for i in np.linspace(0, 1, number_of_lines)]

    # Define number of figures
    fig1 = plt.figure(1)
    fig1.set_size_inches(16, 8)

    # Define Plots
    mr_plot = plt.subplot2grid((2, 3), (0, 0), colspan=2)
    mr_plot.set_ylabel('MR')
    mr_plot.set_xlabel('Number of clusters')
    plt.grid(True)
    plt.axis([minc,maxc, -0.02,1.2])


    # print(mrs)
    #
    #
    # for i in range(1,len(mrs),2):
    #     print(mrs[i])
    #     print(mrs[i][0])
    #     kmeans_mr.append(mrs[i][0])
    #     print(str(mrs[i][0]) + " appended "

    completeness_scores_plot = add_cluster_subplot(fig1, 234, 'completeness_scores')
    homogeneity_scores_plot = add_cluster_subplot(fig1, 235, 'homogeneity_scores')

    value = [hierach_MR , kmeans_mr, ds_mr]
    # Define curves and their values
    curves = [[mr_plot, value]]

    algorithm = ["Agglomerative_Hierachial_Clustering",
                 "K_Means_Clustering",
                 "DominantSets_Clustering"]


    # Plot all curves
    for index in range(3):
        label = algorithm[index]
        color = colors[index]
        # number_of_clusters = np.arange(number_of_embeddings[index], 0, -1)


        for plot, value in curves:
            print(value[index])
            plot.plot(ks,value[index], color=color, label=label)


    # Add legend and save the plot
    fig1.legend()
    # fig1.show()
    fig1.savefig(get_result_png(plot_file_name))
    print("Plot File saved in " + get_result_png(plot_file_name) )
    fig1.savefig(get_result_png(plot_file_name + '.svg'), format='svg')
Exemplo n.º 5
0
def _plot_curves(plot_file_name, curve_names, metric_sets,
                 number_of_embeddings):
    """
    Plots all specified curves and saves the plot into a file.
    :param plot_file_name: String value of save file name
    :param curve_names: Set of names used in legend to describe this curve
    :param metric_sets: A list of 2D matrices, each row of a metrics 2D matrix describes one dataset for a curve
    :param number_of_embeddings: set of integers, each integer describes how many embeddings is in this curve
    """
    logger = get_logger('analysis', logging.INFO)
    logger.info('Plot results')

    config = load_config(None, join(get_common(), 'config.cfg'))
    plot_width = config.getint('common', 'plot_width')
    fig_width = config.getint('common', 'fig_width')
    fig_height = config.getint('common', 'fig_height')
    #Slice results to only 1-80 clusters
    for i in range(0, len(metric_sets)):
        for j in range(0, len(metric_sets[i])):
            metric_sets[i][j] = metric_sets[i][j][-plot_width:]
            print(len(metric_sets[i][j]))

    best_results = [[] for _ in metric_names]
    for m, min_value in enumerate(metric_min_values):
        for results in metric_sets[m]:
            if (metric_min_values[m] == 0):
                best_results[m].append(np.max(results))
            else:
                best_results[m].append(np.min(results))
    '''
    This code is used to sort the lines by min mr. Because we now use mutliple metrics and dont sort by a single
    metric, this code is not used anymore, but we keep it for now.
    min_mrs, curve_names, mrs, acps, aris, homogeneity_scores, completeness_scores, number_of_embeddings = \
        (list(t) for t in
         zip(*sorted(zip(min_mrs, curve_names, mrs, acps, aris, homogeneity_scores, completeness_scores, number_of_embeddings))))
    '''

    # How many lines to plot
    number_of_lines = len(curve_names)

    # Get various colors needed to plot
    color_map = plt.get_cmap('gist_rainbow')
    colors = [color_map(i) for i in np.linspace(0, 1, number_of_lines)]

    #Set fontsize for all plots
    plt.rcParams.update({'font.size': 12})

    # Define number of figures
    fig1 = plt.figure(figsize=(fig_width, fig_height))

    # Define Plots
    plot_grid = (3, 2)

    plots = [None] * len(metric_names)

    plots[0] = _add_cluster_subplot(plot_grid, (0, 0), metric_names[0], 1)
    plots[1] = _add_cluster_subplot(plot_grid, (0, 1), metric_names[1], 1)
    plots[2] = _add_cluster_subplot(plot_grid, (1, 0), metric_names[2], 1)
    plots[3] = _add_cluster_subplot(plot_grid, (1, 1), metric_names[3], 1)

    #Set the horizontal space between subplots
    plt.subplots_adjust(hspace=0.3)

    # Define curves and their values
    curves = [[] for _ in metric_names]

    for m, metric_set in enumerate(metric_sets):
        curves[m] = [plots[m], metric_set]

    # Plot all curves
    for index in range(number_of_lines):
        label = curve_names[index]
        for m, metric_name in enumerate(metric_names):
            label = label + '\n {} {}: {}'.format(
                'Max' if metric_min_values[m] == 0 else 'Min', metric_name,
                str(best_results[m][index]))
        color = colors[index]
        number_of_clusters = np.arange(plot_width, 0, -1)

        for plot, value in curves:
            plot.plot(number_of_clusters,
                      value[index],
                      color=color,
                      label=label)

    # Add legend and save the plot
    fig1.legend(loc='upper center', bbox_to_anchor=(0.5, 0.33), ncol=4)
    #fig1.show()
    fig1.savefig(get_result_png(plot_file_name))
    fig1.savefig(get_result_png(plot_file_name + '.svg'), format='svg')
Exemplo n.º 6
0
def plot_curves(plot_file_name, loaded_dict, loaded_dict2, loaded_dict3):
    """
    Plots all specified curves and saves the plot into a file.
    :param plot_file_name: String value of save file name
    :param curve_names: Set of names used in legend to describe this curve
    :param mrs: 2D Matrix, each row describes one dataset of misclassification rates for a curve
    :param homogeneity_scores: 2D Matrix, each row describes one dataset of homogeneity scores for a curve
    :param completeness_scores: 2D Matrix, each row describes one dataset of completeness scores for a curve
    :param number_of_embeddings: set of integers, each integer describes how many embeddings is in this curve
    """
    logger = get_logger('analysis', logging.INFO)
    logger.info('Plot results')

    num_clusters = []
    hierach_MR = []
    kmeans_mr = []
    ds_mr = []

    for x in loaded_dict:
        num_clusters.append(x)
        hierach_MR.append(loaded_dict[x])
        # print(str(loaded_dict[x]) + " appended to " + str(x))
    print("\n")
    print("Hierachial MR")
    hierach_MR.sort()
    print(hierach_MR)

    for x in loaded_dict2:
        kmeans_mr.append(loaded_dict2[x])
        # print(str(loaded_dict[x]) + " appended to " + str(x))
    print("\n")
    print("Kmeans MR")
    kmeans_mr.sort()
    print(kmeans_mr)

    for x in loaded_dict3:
        ds_mr.append(loaded_dict3[x])
        # print(str(loaded_dict[x]) + " appended to " + str(x))
    print("\n")
    print("DS MR")
    ds_mr.sort()
    print(ds_mr)

    print(num_clusters)
    ks = list(loaded_dict3)
    ks = list(map(int, ks))
    print("\n")
    print("Cluster Count")
    ks.sort()
    print(ks)
    print("Minimum Cluster : " + str(min(ks)))
    print("Maximum CLuster : " + str(max(ks)) + "\n")

    maxc = max(ks)
    minc = min(ks)

    # Get various colors needed to plot
    color_map = plt.get_cmap('gist_rainbow')
    colors = [color_map(i) for i in np.linspace(0, 1, 5)]

    # Define number of figures
    fig1 = plt.figure(1)
    fig1.set_size_inches(16, 8)

    # # Define Plots
    # mr_plot = plt.subplot2grid((1, 1), (0, 0), colspan=1)
    # mr_plot.set_ylabel('MR')
    # mr_plot.set_xlabel('Number of clusters')
    # # plt.grid(True)
    plt.title("Plot for DominantSets Clustering for pairwise_lstm")
    plt.axis([(minc - 0.1), (maxc + 0.1), -0.02, 0.2])
    plt.xlabel('Number of clusters')
    plt.ylabel("MR")

    value = [ds_mr, hierach_MR, kmeans_mr]
    # Define curves and their values
    # curves = [[mr_plot, value]]

    algorithm = [
        "DominantSets_Clustering", "Agglomerative_Hierachial_Clustering",
        "K_Means_Clustering"
    ]

    # Plot all curves
    for index in range(1):
        ymax = max(value[index])
        xpos = value[index].index(ymax)
        print("xpos " + str(xpos))
        xmax = ks[xpos]

        temp = 'Max Value : ' + str((xmax, ymax))

        # plt.annotate(temp  , xy=(xmax, ymax), xytext=(xmax-50, ymax + 0.05),
        #              arrowprops=dict(facecolor=colors[index], shrink=0.03 ),)
        print("plotting " + algorithm[index])
        label = algorithm[index]
        color = colors[index]
        plt.plot(ks, value[index], color=color, label=label)

        # for plot, value in curves:
        #     # plot.plot(ks,value[index], color=color, label=label)

    # Add legend and save the plot
    fig1.legend()
    fig1.show()
    fig1.savefig(get_result_png(plot_file_name))
    print("Plot File saved in " + get_result_png(plot_file_name))
    fig1.savefig(get_result_png(plot_file_name + '.svg'), format='svg')