Пример #1
0
from data.cnews_loader import read_vocab, read_category, batch_iter, process_file, clean_wds, get_dic
from data.cnews_loader import build_vocab, build_vocab_words, loadWord2Vec, expand_abbr, txt_proc
from data.cnews_loader import read_category_textual, read_category_intuitive
import run_rnn as rnn

train_dic = get_dic('data/Obesity_data/train_groundtruth.xml')
test_dic = get_dic('data/Obesity_data/test_groundtruth.xml')

test_dic_text_rule = get_dic(
    'perl_classifier/output/system_textual_annotation.xml')
test_dic_int_rule = get_dic(
    'perl_classifier/output/system_intuitive_annotation.xml')

# Read Word Vectors
word_vector_file = 'data/mimic3_pp100.txt'
vocab, embd, word_vector_map = loadWord2Vec(word_vector_file)
embedding_dim = len(embd[0])
#embeddings = np.asarray(embd)
rnn.categories, rnn.cat_to_id, rnn.id_to_cat = read_category()

doc = Dom.Document()
root_node = doc.createElement("diseaseset")
doc.appendChild(root_node)

for key in train_dic:
    train_sub_dic = train_dic[key]
    test_sub_dic = test_dic[key]
    source_node = doc.createElement("diseases")
    source_node.setAttribute("source", key)

    for sub_key in train_sub_dic:
Пример #2
0
f.close()

corpus = []
for line in lines:
    corpus.append(line)
print(len(corpus))

train_dic = get_dic('data/Obesity_data/train_groundtruth.xml')
test_dic = get_dic('data/Obesity_data/test_groundtruth.xml')

test_dic_text_rule = get_dic('perl_classifier/output/prod_134_0_2.xml')
test_dic_int_rule = get_dic('perl_classifier/output/prod_134_0_5.xml')

# Read CUI Vectors
entity_vector_file = 'data/DeVine_etal_200.txt'
entity_vocab, entity_embd, entity_vector_map = loadWord2Vec(entity_vector_file)
entity_embedding_dim = len(entity_embd[0])
#embeddings = np.asarray(embd)
# Read Word Vectors
word_vector_file = 'data/mimic3_pp200.txt'
word_vocab, word_embd, word_vector_map = loadWord2Vec(word_vector_file)
word_embedding_dim = len(word_embd[0])
#embeddings = np.asarray(embd)
cnn.categories, cnn.cat_to_id, cnn.id_to_cat = read_category()

doc = Dom.Document()
root_node = doc.createElement("diseaseset")
doc.appendChild(root_node)

for key in train_dic:
    train_sub_dic = train_dic[key]