plt.bar([a for a, b in wrong_np_num_l], [b for a, b in wrong_np_num_l]) plt.title('Wrong NP Num', fontsize=10) plt.subplot(2, 3, 4) plt.bar([a for a, b in np_dis_l], [b for a, b in np_dis_l]) plt.title('NP Dis', fontsize=10) plt.subplot(2, 3, 5) plt.bar([a for a, b in correct_np_dis_l], [b for a, b in correct_np_dis_l]) plt.title('Correct NP Dis', fontsize=10) plt.subplot(2, 3, 6) plt.bar([a for a, b in wrong_np_dis_l], [b for a, b in wrong_np_dis_l]) plt.title('Wrong NP Dis', fontsize=10) plt.savefig('results/data_analysis.png') if __name__ == '__main__': # build data from raw OntoNotes data print 'Processing' generate_vector_data() generate_input_data() # split training data into dev and train, saved in ./data/train_data print 'Dividing' train_generator = DataGenerator("train", args.batch_size) train_generator.devide() save_f = file("./data/train_data", 'wb') cPickle.dump(train_generator, save_f, protocol=cPickle.HIGHEST_PROTOCOL) save_f.close() print 'Analysing' analysis_data(train_generator)