Пример #1
0

if __name__ == "__main__":
    fg = FilterGolden()

    dd = DatasetDexter()
    wd = WikipediaDataset()

    dexter_json_doc_list = dd.get_dexter_dataset(
        FileLocations.get_dropbox_dexter_path(), 'saliency-dataset.json')
    golden_saliency_by_entid_by_docid = dd.get_golden_saliency_by_entid_by_docid(
        dexter_json_doc_list, wd)

    #check which are still valid

    wikititle_by_id = wd.get_wikititle_by_id()
    not_found_count = 0
    count = 0
    multiple_wid_count = 0

    for docid in golden_saliency_by_entid_by_docid.keys():
        for entity_id in golden_saliency_by_entid_by_docid[docid].keys():

            n_entity_id = wd.get_wikititle_id_from_id(entity_id)

            wikititle1 = ''
            wikititle2 = ''
            if entity_id in wikititle_by_id:
                wikititle1 = wikititle_by_id[entity_id]
            if n_entity_id in wikititle_by_id:
                wikititle2 = wikititle_by_id[n_entity_id]
    print('not_salient_list:' + str(not_salient_list))
    print('salient_list:' + str(salient_list))


if __name__ == "__main__":

    filename = FileLocations.get_dropbox_intermediate_path() + 'sel.pickle'
    build_model = False

    #    smb = SelModelBuilder()

    # if build_model:
    #     sentiment_processor = smb.train_and_save_model(filename)
    # else:
    #     sentiment_processor = SentimentProcessor()
    #     sentiment_processor.load_model(filename)

    dd = DatasetDexter()
    wikipediaDataset = WikipediaDataset()
    document_list = dd.get_dexter_dataset(
        path=FileLocations.get_dropbox_dexter_path())
    spotter = GoldenSpotter(document_list, wikipediaDataset)

    golden_saliency_by_entid_by_docid = dd.get_golden_saliency_by_entid_by_docid(
        document_list, wikipediaDataset)

    wikititle_by_id = wikipediaDataset.get_wikititle_by_id()

    show_doc_info(2)