from data.cnews_loader import read_vocab, read_category, batch_iter, process_file, clean_wds, get_dic from data.cnews_loader import build_vocab, build_vocab_words, loadWord2Vec, expand_abbr, txt_proc from data.cnews_loader import read_category_textual, read_category_intuitive import run_rnn as rnn train_dic = get_dic('data/Obesity_data/train_groundtruth.xml') test_dic = get_dic('data/Obesity_data/test_groundtruth.xml') test_dic_text_rule = get_dic( 'perl_classifier/output/system_textual_annotation.xml') test_dic_int_rule = get_dic( 'perl_classifier/output/system_intuitive_annotation.xml') # Read Word Vectors word_vector_file = 'data/mimic3_pp100.txt' vocab, embd, word_vector_map = loadWord2Vec(word_vector_file) embedding_dim = len(embd[0]) #embeddings = np.asarray(embd) rnn.categories, rnn.cat_to_id, rnn.id_to_cat = read_category() doc = Dom.Document() root_node = doc.createElement("diseaseset") doc.appendChild(root_node) for key in train_dic: train_sub_dic = train_dic[key] test_sub_dic = test_dic[key] source_node = doc.createElement("diseases") source_node.setAttribute("source", key) for sub_key in train_sub_dic:
f.close() corpus = [] for line in lines: corpus.append(line) print(len(corpus)) train_dic = get_dic('data/Obesity_data/train_groundtruth.xml') test_dic = get_dic('data/Obesity_data/test_groundtruth.xml') test_dic_text_rule = get_dic('perl_classifier/output/prod_134_0_2.xml') test_dic_int_rule = get_dic('perl_classifier/output/prod_134_0_5.xml') # Read CUI Vectors entity_vector_file = 'data/DeVine_etal_200.txt' entity_vocab, entity_embd, entity_vector_map = loadWord2Vec(entity_vector_file) entity_embedding_dim = len(entity_embd[0]) #embeddings = np.asarray(embd) # Read Word Vectors word_vector_file = 'data/mimic3_pp200.txt' word_vocab, word_embd, word_vector_map = loadWord2Vec(word_vector_file) word_embedding_dim = len(word_embd[0]) #embeddings = np.asarray(embd) cnn.categories, cnn.cat_to_id, cnn.id_to_cat = read_category() doc = Dom.Document() root_node = doc.createElement("diseaseset") doc.appendChild(root_node) for key in train_dic: train_sub_dic = train_dic[key]