def compare_per_nt(file1, file2, y_label='New'): nt1, data1 = list(read_jsons(file1)) nt2, data2 = list(read_jsons(file2)) assert nt1 == nt2 nt = nt1 data1 = np.array(data1) data2 = np.array(data2) diff = data2 - data1 ind = np.arange(len(nt)) p1 = plt.bar(ind, data1) for bar in p1: bar.set_facecolor(COLOR_BASE) p2 = plt.bar(ind, diff, bottom=data1) for id, bar in enumerate(p2): if diff[id] >= 0: bar.set_facecolor(COLOR_GREEN) else: bar.set_facecolor(COLOR_RED) custom_lines = [Line2D([0], [0], color=COLOR_BASE, lw=4), Line2D([0], [0], color=COLOR_GREEN, lw=4), Line2D([0], [0], color=COLOR_RED, lw=4)] plt.legend(custom_lines, ['База', 'Улучшение', 'Ухудшение']) # plt.legend((p1[0], p2[0]), (file1, file2)) add_nt_x_ticks(nt) plt.ylabel(y_label, fontsize=12) plt.show()
def compare_per_nt_diff_only(file1, file2, y_label='New'): nt1, data1 = list(read_jsons(file1)) nt2, data2 = list(read_jsons(file2)) assert nt1 == nt2 nt = read_json('data/ast/non_terminals_plot_modified_attention.json') assert len(nt1) == len(nt) data1 = np.array(data1) data2 = np.array(data2) diff = data2 - data1 ind = np.arange(len(nt)) p1 = plt.bar(ind, (diff) * 100, width=1) for id, bar in enumerate(p1): if diff[id] >= 0: bar.set_facecolor(COLOR_GREEN) else: bar.set_facecolor(COLOR_RED) custom_lines = [ Line2D([0], [0], color=COLOR_GREEN, lw=4), Line2D([0], [0], color=COLOR_RED, lw=4) ] plt.legend(custom_lines, ['Улучшение', 'Ухудшение'], prop={'size': 16}) plt.grid(True) add_nt_x_ticks(nt) plt.ylabel(y_label, fontsize=14) plt.show()
def compare_per_nt(file1, file2, y_label): nt1, res1 = list(read_jsons(file1)) nt2, res2 = list(read_jsons(file2)) assert nt1 == nt2 x = np.arange(len(nt1)) y1 = np.array(res1) y2 = np.array(res2) my_xticks = nt1 plt.xticks(x, my_xticks, rotation=30, horizontalalignment='right', fontsize=5) plt.ylabel(y_label) plt.grid(True) plt.plot(x, (y2 - y1) * 100) plt.show()
def print_tree_heights_stats_from_file(tree_heights_file): print_tree_heights_stats(list(read_jsons(tree_heights_file))[0])
def plot_percentile_from_file(file, x_label, y_label): stat = list(read_jsons(file))[0] x, y = get_percentile_plot(stat) draw_plot(x, y, x_label=x_label, y_label=y_label)
def nearest(x, y, vocab, word): w_id = -1 for i in range(len(vocab)): if vocab[i] == word: w_id = i if w_id == -1: raise Exception('No such word in vocabulary: {}'.format(word)) px = x[w_id] py = y[w_id] p = np.array([px, py]) points_with_distance = [] for i in range(len(x)): points_with_distance.append( (i, np.linalg.norm(np.array([x[i], y[i]]) - p))) print('Nearest to {}:'.format(vocab[w_id])) for c_p in sorted(points_with_distance, key=lambda x: x[1])[:10]: print(vocab[c_p[0]]) if __name__ == '__main__': emb = Embeddings( vector_file='/Users/zerogerc/Documents/diploma/GloVe/vectors.txt', embeddings_size=5) vocab = list(read_jsons('data/ast/non_terminals.json'))[0] vocab.append('EOF') tsne_plot(emb, vocab)
def draw_per_nt_plot(file, y_label='Per NT accuracy'): nt, data = list(read_jsons(file)) draw_per_nt_plot_inner(nt, Plot(data=data), y_label=y_label)