pred_numpy = (y_pred.data).cpu().numpy() y_pred_labels = [ix_to_label[ix] for ix in pred_numpy] assert len(y_pred_labels) == len( features), 'y_pred_labels and features have different lengths' for i, pred_label in enumerate(y_pred_labels): features[i][5] = pred_label instances.append(features[i]) acc = 100.0 * correct / total return acc, instances if __name__ == "__main__": traindocuments = parserNcbiTxtFile_simple(opt.train_file) devdocuments = parserNcbiTxtFile_simple(opt.dev_file) testdocuments = parserNcbiTxtFile_simple(opt.test_file) entityAbbres = loadAbbreviations(opt.abbre_file) preprocessMentions(traindocuments, devdocuments, testdocuments, entityAbbres) dict = load_dict(opt.dict_file) meshlabels, meshlabel_to_ix, dict_words = utils.parser_dict(dict) corpus_words = utils.parser_corpus(traindocuments, devdocuments, testdocuments) word_to_ix, all_words, char_to_ix = utils.generate_word_alphabet( corpus_words, dict_words) if opt.random_emb:
return entity_docs if __name__ == '__main__': ner_path = "/home/lyx/workspace/Dnorm_ncbi/ncbi_test_plain_ner" output_path_doc = "./sample_data/ncbi_test_ner_evalNorm" output_path_entity = "/home/lyx/workspace/Dnorm_ncbi/ncbi_test_plain_ner_entities" ncbi_ner_path = "/home/lyx/workspace/Dnorm_ncbi/output/analysis_ncbi.txt" # entity_docs = load_entity_doc(ner_path) entity_docs = load_entity_doc(ncbi_ner_path) test_documents = parserNcbiTxtFile_simple(opt.test_file) for i in range(len(entity_docs)): isfind = False for test_doc in test_documents: if entity_docs[i].doc_name == test_doc.doc_name: isfind =True entity_docs[i].title = test_doc.title entity_docs[i].abstractt = test_doc.abstractt break if not isfind: print(entity_docs[i].doc_name) outputDocuments_title_abstract_entity(output_path_doc, entity_docs) # outputDocuments_ner_entities(output_path_entity, entity_docs) print('end')