Exemplo n.º 1
0
    def test_concept_similarity(self):
        cdb = CDB(config=self.config)
        np.random.seed(11)
        for i in range(500):
            cui = "C" + str(i)
            type_ids = {'T-' + str(i%10)}
            cdb.add_concept(cui=cui, names=prepare_name('Name: ' + str(i), self.maker.nlp, {}, self.config), ontologies=set(),
                    name_status='P', type_ids=type_ids, description='', full_build=True)

            vectors = {}
            for cntx_type in self.config.linking['context_vector_sizes']:
                vectors[cntx_type] = np.random.rand(300)
            cdb.update_context_vector(cui, vectors, negative=False)
        res = cdb.most_similar('C200', 'long', type_id_filter=['T-0'], min_cnt=1, topn=10, force_build=True)
        assert len(res) == 10
Exemplo n.º 2
0
    def test_for_linker(self):
        self.config = Config()
        self.config.general['log_level'] = logging.DEBUG
        cdb = CDB(config=self.config)

        # Add a couple of names
        cdb.add_names(cui='S-229004',
                      names=prepare_name('Movar', self.nlp, {}, self.config))
        cdb.add_names(cui='S-229004',
                      names=prepare_name('Movar viruses', self.nlp, {},
                                         self.config))
        cdb.add_names(cui='S-229005',
                      names=prepare_name('CDB', self.nlp, {}, self.config))
        cdb.add_names(cui='S-2290045',
                      names=prepare_name('Movar', self.nlp, {}, self.config))
        # Check
        #assert cdb.cui2names == {'S-229004': {'movar', 'movarvirus', 'movarviruses'}, 'S-229005': {'cdb'}, 'S-2290045': {'movar'}}

        cuis = list(cdb.cui2names.keys())
        for cui in cuis[0:50]:
            vectors = {
                'short': np.random.rand(300),
                'long': np.random.rand(300),
                'medium': np.random.rand(300)
            }
            cdb.update_context_vector(cui, vectors, negative=False)

        d = self.nlp(self.text)
        vocab = Vocab.load(self.vocab_path)
        cm = ContextModel(cdb, vocab, self.config)
        cm.train_using_negative_sampling('S-229004')
        self.config.linking['train_count_threshold'] = 0

        cm.train('S-229004', d._.ents[1], d)

        cm.similarity('S-229004', d._.ents[1], d)

        cm.disambiguate(['S-2290045', 'S-229004'], d._.ents[1], 'movar', d)
Exemplo n.º 3
0
cdb = CDB(config=config)
np.random.seed(11)
for i in range(500):
    cui = "C" + str(i)
    type_ids = {'T-' + str(i % 10)}
    cdb.add_concept(cui=cui,
                    names=prepare_name('Name: ' + str(i), maker.nlp, {},
                                       config),
                    ontologies=set(),
                    name_status='P',
                    type_ids=type_ids,
                    description='',
                    full_build=True)

    vectors = {}
    for cntx_type in config.linking['context_vector_sizes']:
        vectors[cntx_type] = np.random.rand(300)
    cdb.update_context_vector(cui, vectors, negative=False)
res = cdb.most_similar('C200',
                       'long',
                       type_id_filter=['T-0'],
                       min_cnt=1,
                       topn=10,
                       force_build=True)
assert len(res) == 10

# Test training reset
cdb.reset_training()
assert len(cdb.cui2context_vectors['C0']) == 0
assert cdb.cui2count_train['C0'] == 0