'formatter': 'standard',
            'class': 'logging.StreamHandler',
        },
    },
    'loggers': {
        '': {
            'handlers': ['default'],
            'level': 'INFO',
            'propagate': True
        }
    }
})

# word embedding
vector_model, vocabulary, inversed_vocabulary = vectorizer.prepare_embedding_vocab(
    config['embedding']['emb_file'],
    binary=True,
    limit=config.getint('embedding', 'limit'))
pretrained = vectorizer.load_pretrained_word_embeddings(
    vocabulary, vector_model)

# MEDIC dictionary
dictionary = load.Terminology()
# dictionary of entries, key = canonical id, value = named tuple in the form of
#   MEDIC_ENTRY(DiseaseID='MESH:D005671', DiseaseName='Fused Teeth',
#   AllDiseaseIDs=('MESH:D005671',), AllNames=('Fused Teeth', 'Teeth, Fused')
dictionary.loaded = load.load(
    os.path.normpath(config['terminology']['dict_file']), 'MEDIC')


def concept_obj(conf, dictionary, order=None):
    concept_ids = []  # list of all concept ids
Пример #2
0
            'formatter': 'standard',
            'class': 'logging.StreamHandler',
        },
    },
    'loggers': {
        '': {
            'handlers': ['default'],
            'level': 'INFO',
            'propagate': True
        }
    }
})

# word embedding
vector_model, vocabulary, inversed_vocabulary = vectorizer.prepare_embedding_vocab(
    '/home/lhchan/disease_normalization/data/pubmed2018_w2v_400D/pubmed2018_w2v_400D.bin',
    binary=True,
    limit=1000000)
pretrained = vectorizer.load_pretrained_word_embeddings(
    vocabulary, vector_model)

# MEDIC dictionary
dictionary = load.Terminology()
# dictionary of entries, key = canonical id, value = named tuple in the form of
#   MEDIC_ENTRY(DiseaseID='MESH:D005671', DiseaseName='Fused Teeth',
#   AllDiseaseIDs=('MESH:D005671',), AllNames=('Fused Teeth', 'Teeth, Fused')
dictionary.loaded = load.load(config['terminology']['dict_file'], 'MEDIC')

concept_ids = []  # list of all concept ids
concept_all_ids = []  # list of (lists of all concept ids with alt IDs)
concept_names = []  # list of all names, same length as concept_ids
concept_map = {}  # names as keys, ids as concepts