def genrate_gold_gui_data(corpus_dir, doc_id, data_file): data_reader = jsonlines.open(data_file) # handle the case that the doc_id already exists. if check_duplicate_dir(corpus_dir): sys.exit() doc_ids = [] for doc_dict in data_reader.iter(): doc = Document(doc_dict) doc_ids.append(doc.doc_id) # doc data doc_data = doc.get_visualize_data() doc_data_file = "%s/span/%s.json" % (corpus_dir, doc.doc_id) save_to_json(doc_data_file, doc_data) # surface data surface_data = doc.get_surface_data() surface_data_file = "%s/detail/%s.json" % (corpus_dir, doc.doc_id) save_to_json(surface_data_file, surface_data) # cluster data cluster_data = doc.get_cluster_data() cluster_data_file = "%s/coref/%s.json" % (corpus_dir, doc.doc_id) save_to_json(cluster_data_file, cluster_data) # doc ids and corpus ids. doc_ids_file = "%s/doc_ids.json" % corpus_dir save_to_json(doc_ids_file, doc_ids) update_corpus_ids(doc_id)