def test(self): v = FeatureVector() entity = Entity('1', EntityType.GPE, 'New York City', EntityOrigin.GEO) entity.names = {'NYC', 'New York'} chain = MentionChain( [Mention('Nueva York', 'doc1', (), (), EntityType.GPE)]) SharedTokensFeature().extract(chain, entity, None, v) self.assertAlmostEqual(0.5, v.data[0])
def test_match(self): v = FeatureVector() entity = Entity('1', EntityType.GPE, 'New York', EntityOrigin.GEO) entity.names = {'New York', 'New York City', 'NYC'} chain = MentionChain( [Mention('Nueva York', 'doc1', (), (), EntityType.GPE)]) chain.mentions[0].translate_string = 'new york' ExactMatchFeature().extract(chain, entity, None, v) self.assertTrue(v.data[0])
def test_positive(self): v = FeatureVector() entity = Entity('1', EntityType.GPE, 'New York', EntityOrigin.GEO, urls=['http://en.wikipedia.org/wiki/New_York_City']) entity.names = {'New York', 'New York City', 'NYC'} chain = MentionChain( [Mention('New York City', 'doc1', (), (), EntityType.GPE)]) WikipediaFeature().extract(chain, entity, None, v) self.assertTrue(v.data[0])
def test_one_match(self): doc = unittest.mock.Mock() doc.doc_id = 'doc1' doc.mention_chains = [ MentionChain( [Mention('John Smith', 'doc1', (4, 8), (), EntityType.PER)]), ] entity1 = Entity('122', EntityType.PER, 'John', EntityOrigin.WLL) entity1.names = {'John', 'J. Smith', 'john smith'} doc.mention_chains[0].candidates = [ entity1, Entity('123', EntityType.PER, 'Not John', EntityOrigin.WLL) ] ExactNameResolver().resolve(doc) self.assertEqual(1, len(doc.mention_chains)) self.assertEqual(2, len(doc.mention_chains[0].candidates)) self.assertEqual(entity1, doc.mention_chains[0].entity)