예제 #1
0
def get_test_gwn(db_path=TEST_DB):
    db = GWNSQL(db_path)
    if not os.path.isfile(db_path) or os.path.getsize(db_path) == 0:
        # insert dummy synsets
        xmlwn = GWordnetXML()
        xmlwn.read(MOCKUP_SYNSETS_DATA)
        db.insert_synsets(xmlwn.synsets)
    return db
예제 #2
0
 def test_setup_insert_stuff(self):
     db = GWNSQL(":memory:")
     xmlwn = GWordnetXML()
     xmlwn.read(MOCKUP_SYNSETS_DATA)
     with db.ctx() as ctx:
         synsets = list(xmlwn.synsets)
         db.insert_synset(synsets[0], ctx=ctx)
         db.insert_synsets(synsets[1:3], ctx=ctx)
         # test select stuff out
         ssids = ctx.synset.select()
         self.assertEqual(len(ssids), 3)
         # all tags
         tags = db.tagged_sensekeys(ctx=ctx)
         self.assertEqual(tags, {'not%4:02:00::', 'be_born%2:30:00::', 'christian_era%1:28:00::', 'christ%1:18:00::', 'date%1:28:04::', 'musical_accompaniment%1:10:00::', 'a_cappella%4:02:00::', 'ad%4:02:00::', 'ce%4:02:00::'})
         # all sensekeys
         sks = ctx.sensekey.select()
         self.assertEqual(len(sks), 7)
예제 #3
0
 def test_all_api(self):
     gwn = GWNSQL(':memory:')
     with gwn.ctx() as ctx:
         setup_ram_gwn(gwn, ctx)
         self.assertRaises(WordnetException, lambda: gwn.get_synset('00001740-n', ctx=ctx))
         ssids = ctx.synset.select(columns=('ID',))
         self.assertEqual(len(ssids), 219)
         # test get_synset() and get_synsets()
         r00008007 = gwn.get_synset('00008007-r', ctx=ctx)
         self.assertTrue(r00008007)
         self.assertTrue(r00008007.definition)
         self.assertTrue(r00008007.examples)
         self.assertTrue(r00008007.get_aux())
         for ss in gwn.get_synsets(('a01179767', 'n03095965', 'r00001837'), ctx=ctx):
             self.assertTrue(ss.ID)
             self.assertTrue(ss.definition)
             self.assertTrue(ss.keys)
         # test get by key
         r00008007 = gwn.get_by_key('wholly%4:02:00::', ctx=ctx)
         self.assertTrue(r00008007)
         self.assertTrue(r00008007.definition)
         self.assertTrue(r00008007.examples)
         self.assertTrue(r00008007.get_aux())
         # test get_by_keys
         synsets = gwn.get_by_keys(('divine%3:00:02:heavenly:00', 'wholly%4:02:00::'), ctx=ctx)
         for ss in synsets:
             self.assertTrue(ss.definition)
             self.assertTrue(ss.keys)
             self.assertTrue(ss.examples)
         # test sk2sid
         self.assertEqual(gwn.sk2sid('wholly%4:02:00::', ctx=ctx), 'r00008007')
         # test search
         lemma = 'automatically'
         synsets = gwn.search(lemma=lemma, ctx=ctx)
         self.assertTrue(synsets)
         for ss in synsets:
             self.assertTrue(ss.keys)
             self.assertTrue(ss.definition)
             self.assertIn(lemma, ss.lemmas)
         # limit by POS
         self.assertFalse(gwn.search(lemma=lemma, pos='v', ctx=ctx))
         # hypernyms, hyponyms, hypehypo are not supported
         self.assertRaises(WordnetException, lambda: gwn.hypernyms('r00008007', ctx=ctx))
         self.assertRaises(WordnetException, lambda: gwn.hyponyms('r00008007', ctx=ctx))
         self.assertRaises(WordnetException, lambda: gwn.hypehypo('r00008007', ctx=ctx))
예제 #4
0
 def test_match_surface(self):
     fixed = CSV.read("data/fixed_surface.tab")
     raws_map = {x[0]: x[1:] for x in fixed if x}
     gwn = GWNSQL(YLConfig.GWN30_DB)
     sid = 'v02681795'
     ss = gwn.get_synset(sid)
     raws = raws_map[sid] if sid in raws_map else ss.get_orig().split()
     print("raws: {}".format(raws))
     print("glosses: {}".format([(x.text(), x.cat) for x in ss.glosses]))
     for r, g in zip(raws, ss.glosses):
         tokens = [t.text for t in g]
         while tokens[-1] == ';':
             tokens.pop()
         sent = ttl.Sentence(r)
         sent.import_tokens(tokens)
         print("{} --- {}".format(r, tokens))
     self.assertTrue(ss.match_surface(raws=raws))
     for g in ss.glosses:
         print(g.items, g.surface)
예제 #5
0
 def test_single_match(self):
     gwn = GWNSQL(YLConfig.GWN30_DB)
     ss = gwn.get_synset('r00008007')
     raws = ss.get_orig().split()
     d = ss.get_def()
     for idx, r in enumerate(raws):
         sent = ttl.Sentence(r)
         try:
             tokens = [i.text for i in d.items]
             sent.import_tokens(tokens)
             # found the def raw
             if "(" in r:
                 new_part = r.replace("(", ";(").split(";")
                 raws[idx] = new_part[0]
                 for loc, part in enumerate(new_part[1:]):
                     raws.insert(idx + loc + 1, part)
                 break
         except:
             continue
     print("Before:", ss.get_orig().split())
     print("After:", raws)
예제 #6
0
 def test_setup_insert_stuff(self):
     if os.path.isfile(TEST_DB_SETUP):
         os.unlink(TEST_DB_SETUP)
     db = GWNSQL(TEST_DB_SETUP)
     xmlwn = GWordnetXML()
     xmlwn.read(MOCKUP_SYNSETS_DATA)
     db.insert_synset(xmlwn.synsets[0])
     db.insert_synsets(xmlwn.synsets[1:3])
     self.assertIsNotNone(db)
     # test select stuff out
     ss = db.all_synsets()
     self.assertEqual(len(ss), 3)
     # all tags
     tags = db.get_all_sensekeys_tagged()
     self.assertEqual(
         tags, {
             'not%4:02:00::', 'be_born%2:30:00::',
             'christian_era%1:28:00::', 'christ%1:18:00::',
             'date%1:28:04::', 'musical_accompaniment%1:10:00::',
             'a_cappella%4:02:00::', 'ad%4:02:00::', 'ce%4:02:00::'
         })
     # all sensekeys
     sks = db.get_all_sensekeys()
     self.assertEqual(len(sks), 7)
예제 #7
0
 def test_shallow_search(self):
     gwn = GWNSQL(YLConfig.GWN30_DB)
     ss = gwn.search('dog', deep_select=False)
     self.assertTrue(ss)