Пример #1
0
 def test(self):
     v = FeatureVector()
     entity = Entity('1', EntityType.GPE, 'New York City', EntityOrigin.GEO)
     entity.names = {'NYC', 'New York'}
     chain = MentionChain(
         [Mention('Nueva York', 'doc1', (), (), EntityType.GPE)])
     SharedTokensFeature().extract(chain, entity, None, v)
     self.assertAlmostEqual(0.5, v.data[0])
Пример #2
0
 def test_match(self):
     v = FeatureVector()
     entity = Entity('1', EntityType.GPE, 'New York', EntityOrigin.GEO)
     entity.names = {'New York', 'New York City', 'NYC'}
     chain = MentionChain(
         [Mention('Nueva York', 'doc1', (), (), EntityType.GPE)])
     chain.mentions[0].translate_string = 'new york'
     ExactMatchFeature().extract(chain, entity, None, v)
     self.assertTrue(v.data[0])
Пример #3
0
 def test_positive(self):
     v = FeatureVector()
     entity = Entity('1',
                     EntityType.GPE,
                     'New York',
                     EntityOrigin.GEO,
                     urls=['http://en.wikipedia.org/wiki/New_York_City'])
     entity.names = {'New York', 'New York City', 'NYC'}
     chain = MentionChain(
         [Mention('New York City', 'doc1', (), (), EntityType.GPE)])
     WikipediaFeature().extract(chain, entity, None, v)
     self.assertTrue(v.data[0])
Пример #4
0
 def test_one_match(self):
     doc = unittest.mock.Mock()
     doc.doc_id = 'doc1'
     doc.mention_chains = [
         MentionChain(
             [Mention('John Smith', 'doc1', (4, 8), (), EntityType.PER)]),
     ]
     entity1 = Entity('122', EntityType.PER, 'John', EntityOrigin.WLL)
     entity1.names = {'John', 'J. Smith', 'john smith'}
     doc.mention_chains[0].candidates = [
         entity1,
         Entity('123', EntityType.PER, 'Not John', EntityOrigin.WLL)
     ]
     ExactNameResolver().resolve(doc)
     self.assertEqual(1, len(doc.mention_chains))
     self.assertEqual(2, len(doc.mention_chains[0].candidates))
     self.assertEqual(entity1, doc.mention_chains[0].entity)