'formatter': 'standard', 'class': 'logging.StreamHandler', }, }, 'loggers': { '': { 'handlers': ['default'], 'level': 'INFO', 'propagate': True } } }) # word embedding vector_model, vocabulary, inversed_vocabulary = vectorizer.prepare_embedding_vocab( config['embedding']['emb_file'], binary=True, limit=config.getint('embedding', 'limit')) pretrained = vectorizer.load_pretrained_word_embeddings( vocabulary, vector_model) # MEDIC dictionary dictionary = load.Terminology() # dictionary of entries, key = canonical id, value = named tuple in the form of # MEDIC_ENTRY(DiseaseID='MESH:D005671', DiseaseName='Fused Teeth', # AllDiseaseIDs=('MESH:D005671',), AllNames=('Fused Teeth', 'Teeth, Fused') dictionary.loaded = load.load( os.path.normpath(config['terminology']['dict_file']), 'MEDIC') def concept_obj(conf, dictionary, order=None): concept_ids = [] # list of all concept ids
'formatter': 'standard', 'class': 'logging.StreamHandler', }, }, 'loggers': { '': { 'handlers': ['default'], 'level': 'INFO', 'propagate': True } } }) # word embedding vector_model, vocabulary, inversed_vocabulary = vectorizer.prepare_embedding_vocab( '/home/lhchan/disease_normalization/data/pubmed2018_w2v_400D/pubmed2018_w2v_400D.bin', binary=True, limit=1000000) pretrained = vectorizer.load_pretrained_word_embeddings( vocabulary, vector_model) # MEDIC dictionary dictionary = load.Terminology() # dictionary of entries, key = canonical id, value = named tuple in the form of # MEDIC_ENTRY(DiseaseID='MESH:D005671', DiseaseName='Fused Teeth', # AllDiseaseIDs=('MESH:D005671',), AllNames=('Fused Teeth', 'Teeth, Fused') dictionary.loaded = load.load(config['terminology']['dict_file'], 'MEDIC') concept_ids = [] # list of all concept ids concept_all_ids = [] # list of (lists of all concept ids with alt IDs) concept_names = [] # list of all names, same length as concept_ids concept_map = {} # names as keys, ids as concepts