if __name__ == "__main__": fg = FilterGolden() dd = DatasetDexter() wd = WikipediaDataset() dexter_json_doc_list = dd.get_dexter_dataset( FileLocations.get_dropbox_dexter_path(), 'saliency-dataset.json') golden_saliency_by_entid_by_docid = dd.get_golden_saliency_by_entid_by_docid( dexter_json_doc_list, wd) #check which are still valid wikititle_by_id = wd.get_wikititle_by_id() not_found_count = 0 count = 0 multiple_wid_count = 0 for docid in golden_saliency_by_entid_by_docid.keys(): for entity_id in golden_saliency_by_entid_by_docid[docid].keys(): n_entity_id = wd.get_wikititle_id_from_id(entity_id) wikititle1 = '' wikititle2 = '' if entity_id in wikititle_by_id: wikititle1 = wikititle_by_id[entity_id] if n_entity_id in wikititle_by_id: wikititle2 = wikititle_by_id[n_entity_id]
print('not_salient_list:' + str(not_salient_list)) print('salient_list:' + str(salient_list)) if __name__ == "__main__": filename = FileLocations.get_dropbox_intermediate_path() + 'sel.pickle' build_model = False # smb = SelModelBuilder() # if build_model: # sentiment_processor = smb.train_and_save_model(filename) # else: # sentiment_processor = SentimentProcessor() # sentiment_processor.load_model(filename) dd = DatasetDexter() wikipediaDataset = WikipediaDataset() document_list = dd.get_dexter_dataset( path=FileLocations.get_dropbox_dexter_path()) spotter = GoldenSpotter(document_list, wikipediaDataset) golden_saliency_by_entid_by_docid = dd.get_golden_saliency_by_entid_by_docid( document_list, wikipediaDataset) wikititle_by_id = wikipediaDataset.get_wikititle_by_id() show_doc_info(2)