Пример #1
0
 def get_all_sensekeys_tagged(self):
     with Execution(self.schema) as exe:
         # synset;
         results = exe.schema.sensetag.select(columns=['sk'])
         sensekeys = set()
         for result in results:
             sensekeys.add(result.sk)
         return sensekeys
Пример #2
0
 def get_synset_def(self, sid_str, lang='eng'):
     sid = SynsetID.from_string(sid_str)
     with Execution(self.schema) as exe:
         defs = exe.schema.sdef.select(where='synset=? and lang=?',
                                       values=[sid.to_canonical(), lang])
         assert len(defs) in (0, 1)
         if defs:
             return defs[0]._2
Пример #3
0
 def get_synset_by_sk(self, sensekey):
     with Execution(self.schema) as exe:
         # synset;
         results = exe.schema.synset.select(where='id IN (SELECT sid FROM sensekey where sensekey=?)', values=[sensekey])
         if results:
             synsets = self.results_to_synsets(results, exe)
             if synsets and len(synsets) == 1:
                 return synsets[0]
     raise Exception("Could not find any synset with provided key {}".format(sensekey))
Пример #4
0
 def get_synset_by_sks(self, sensekeys):
     synsets = SynsetCollection()
     with Execution(self.schema) as exe:
         # synset;
         where = 'id IN (SELECT sid FROM sensekey where sensekey IN (%s))' % ','.join(['?'] * len(sensekeys))
         results = exe.schema.synset.select(where=where, values=sensekeys)
         if results:
             return self.results_to_synsets(results, exe, synsets)
     return synsets
Пример #5
0
 def get_senseinfo_by_sk(self, sk):
     if sk in self.sk_cache:
         return self.sk_cache[sk]
     result = None
     with Execution(self.schema) as exe:
         result = exe.schema.wss.select_single(where='sensekey=?', values=[sk],
                                               columns=['pos', 'synsetid', 'sensekey'])
     self.sk_cache[sk] = result
     return result
Пример #6
0
 def get_tagcount(self, sid):
     if sid in self.tagcount_cache:
         return self.tagcount_cache[sid]
     with Execution(self.schema) as exe:
         results = exe.schema.wss.select(where='synsetid=?', values=[sid], columns=['tagcount'])
     counter = 0
     for res in results:
         counter += res.tagcount
     self.tagcount_cache[sid] = counter
     return counter
Пример #7
0
 def get_synsets_by_ids(self, synsetids):
     sids = [str(SynsetID.from_string(x).to_gwnsql()) for x in synsetids]
     synsets = SynsetCollection()
     with Execution(self.schema) as exe:
         # synset;
         wherecon = 'id IN (%s)' % (','.join(['?'] * len(sids)))
         results = exe.schema.synset.select(where=wherecon, values=sids)
         if results:
             return self.results_to_synsets(results, exe, synsets)
     return synsets
Пример #8
0
 def all_synsets(self, synsets=None, deep_select=True):
     synsets = SynsetCollection()
     with Execution(self.schema) as exe:
         # synset;
         results = exe.schema.synset.select()
         if results:
             if deep_select:
                 return self.results_to_synsets(results, exe, synsets)
             else:
                 return results
     return synsets
Пример #9
0
 def get_senseinfo_by_sid(self, synsetid):
     sid = self.ensure_sid(synsetid)
     if sid in self.sid_cache:
         return self.sid_cache[sid]
     result = None
     with Execution(self.schema) as exe:
         result = exe.schema.wss.select_single(where='synsetid=?', values=[sid],
                                               columns=['pos', 'synsetid',
                                                        'sensekey', 'definition', 'tagcount'])
     self.sid_cache[sid] = result
     return result
Пример #10
0
    def get_synset_by_id(self, synsetid):
        # ensure that synsetid is an instance of SynsetID
        sid = SynsetID.from_string(synsetid)

        with Execution(self.schema) as exe:
            # synset;
            results = exe.schema.synset.select(where='id=?', values=[sid.to_gwnsql()])
            if results:
                synsets = self.results_to_synsets(results, exe)
                if len(synsets) == 1:
                    return synsets[0]
        return None
Пример #11
0
 def get_glossitems_text(self, synsetid):
     sid = SynsetID.from_string(synsetid).to_gwnsql()
     with Execution(self.schema) as exe:
         where = 'gid IN (SELECT id FROM gloss WHERE sid = ?)'
         results = exe.schema.glossitem.select(where=where, values=[sid],
                                               columns=['id', 'lemma', 'pos', 'text'])
         items = []
         for item in results:
             g = GlossItem(gloss=None, tag=None, lemma=item.lemma, pos=item.pos, cat=None,
                           coll=None, rdf=None, origid=None, sep=None, text=None, itemid=item.id)
             items.append(g)
         return items
Пример #12
0
 def get_hypehypo(self, sid):
     ''' Get all hypernyms and hyponyms of a given synset
     '''
     sid = SynsetID.from_string(str(sid))
     if sid in self.hypehypo_cache:
         return self.hypehypo_cache[sid]
     result = None
     with Execution(self.schema) as exe:
         result = exe.schema.sss.select(where='ssynsetid = ? and linkid in (1,2,3,4, 11,12,13,14,15,16,40,50,81)',
                                        values=[sid.to_wnsql()],
                                        columns=['linkid', 'dpos', 'dsynsetid', 'dsensekey', 'dwordid'])
     for r in result:
         self.hypehypo_cache[sid].add(r)
     return self.hypehypo_cache[sid]
Пример #13
0
 def get_synsets_by_term(self, term, pos=None, synsets=None, sid_only=False):
     synsets = SynsetCollection()
     with Execution(self.schema) as exe:
         # synset;
         if pos:
             results = exe.schema.synset.select(where='pos = ? AND id IN (SELECT sid FROM term where lower(term)=?)', values=[pos, term.lower()])
         else:
             results = exe.schema.synset.select(where='id IN (SELECT sid FROM term where lower(term)=?)', values=[term.lower()])
         if results:
             if sid_only:
                 return results
             else:
                 return self.results_to_synsets(results, exe, synsets)
     return synsets
Пример #14
0
 def get_synset_by_sk(self, sk):
     with Execution(self.schema) as exe:
         # get synset object
         rows = exe.schema.wss.select(where='sensekey=?', values=(sk,))
         if rows is not None and len(rows) > 0:
             ss = Synset(rows[0].synsetid)
             ss.definition = rows[0].definition
             for row in rows:
                 ss.add_lemma(row.lemma)
                 ss.add_key(row.sensekey)
                 ss.tagcount += row.tagcount
             # add examples
             exes = exe.schema.ex.select(where='synsetid=?', values=[rows[0].synsetid], orderby='sampleid')
             for ex in exes:
                 ss.exes.append(ex.sample)
             return ss
Пример #15
0
 def get_synsets_by_lemma(self, lemma):
     with Execution(self.schema) as exe:
         # get synset object
         rows = exe.schema.wss.select(where='lemma=?', values=(lemma,))
         synsets = SynsetCollection()
         if rows is not None and len(rows) > 0:
             for row in rows:
                 ss = Synset(row.synsetid)
                 ss.definition = row.definition
                 ss.add_lemma(row.lemma)
                 ss.add_key(row.sensekey)
                 ss.tagcount = row.tagcount
                 # add examples
                 exes = exe.schema.ex.select(where='synsetid=?', values=[row.synsetid], orderby='sampleid')
                 for ex in exes:
                     ss.exes.append(ex.sample)
                 synsets.add(ss)
         return synsets
Пример #16
0
 def insert_synsets(self, synsets):
     ''' Store synsets with related information (sensekeys, terms, gloss, etc.)
     '''
     with Execution(self.schema) as exe:
         # synset;
         for synset in synsets:
             sid = synset.sid.to_gwnsql()
             exe.schema.synset.insert([sid, synset.sid.offset, synset.sid.pos])
             # term;
             for term in synset.lemmas:
                 exe.schema.term.insert([sid, term])
             # sensekey;
             for sk in synset.keys:
                 exe.schema.sensekey.insert([sid, sk])
             # gloss_raw;
             for gloss_raw in synset.raw_glosses:
                 exe.schema.gloss_raw.insert([sid, gloss_raw.cat, gloss_raw.gloss])
             # gloss; DB: id origid sid cat | OBJ: gid origid cat
             for gloss in synset.glosses:
                 exe.schema.gloss.insert([gloss.origid, sid, gloss.cat])
                 gloss.gid = exe.ds.execute('SELECT last_insert_rowid()').fetchone()[0]
                 # glossitem;
                 # OBJ | gloss, order, tag, lemma, pos, cat, coll, rdf, origid, sep, text
                 # DB  | id ord gid tag lemma pos cat coll rdf sep text origid
                 for item in gloss.items:
                     exe.schema.glossitem.insert([item.order, gloss.gid, item.tag, item.lemma, item.pos, item.cat, item.coll, item.rdf, item.sep, item.text, item.origid])
                     item.itemid = exe.ds.execute('SELECT last_insert_rowid()').fetchone()[0]
                 # sensetag;
                 for tag in gloss.tags:
                     # OBJ: tagid cat, tag, glob, glemma, gid, coll, origid, sid, sk, lemma
                     # DB: id cat tag glob glob_lemma glob_id coll sid gid sk origid lemma itemid
                     exe.schema.sensetag.insert([tag.cat, tag.tag, tag.glob, tag.glemma,
                                                 tag.glob_id, tag.coll, '', gloss.gid, tag.sk,
                                                 tag.origid, tag.lemma, tag.item.itemid])
         exe.ds.commit()
     pass
Пример #17
0
 def cache_tagcounts(self):
     with Execution(self.schema) as exe:
         results = exe.schema.wss.select(columns=['synsetid', 'tagcount'])
     for res in results:
         self.tagcount_cache[res.synsetid] += res.tagcount
Пример #18
0
 def cache_all_hypehypo(self):
     with Execution(self.schema) as exe:
         results = exe.schema.sss.select(columns=['linkid', 'dpos', 'dsynsetid', 'dsensekey', 'dwordid', 'ssynsetid'])
         for result in results:
             self.hypehypo_cache[result.ssynsetid].update(result)
Пример #19
0
 def get_all_sensekeys(self):
     with Execution(self.schema) as exe:
         # synset;
         results = exe.schema.sensekey.select()
         return results
Пример #20
0
 def get_examples_by_sid(self, synsetid):
     sid = self.ensure_sid(synsetid)
     with Execution(self.schema) as exe:
         result = exe.schema.ex.select(where='synsetid=?', values=[sid], orderby='sampleid')
     return result
Пример #21
0
 def get_all_sensekeys(self):
     results = None
     with Execution(self.schema) as exe:
         results = exe.schema.wss.select(columns=['pos', 'synsetid', 'sensekey'])
     return results
Пример #22
0
 def cache_all_sensekey(self):
     with Execution(self.schema) as exe:
         results = exe.schema.wss.select(columns=['pos', 'synsetid', 'sensekey'])
         for result in results:
             self.sk_cache[result.sensekey] = result
Пример #23
0
 def get_sensetags(self, synsetid):
     sid = SynsetID.from_string(synsetid).to_gwnsql()
     with Execution(self.schema) as exe:
         results = exe.schema.sensetag.select(where='gid IN (SELECT id FROM gloss WHERE sid = ?)', values=[sid],
                                              columns=['id', 'lemma', 'sk'])
         return results
Пример #24
0
 def get_all_synsets(self):
     with Execution(self.schema) as exe:
         return exe.schema.wss.select(columns=['synsetid', 'lemma', 'sensekey', 'tagcount'])
Пример #25
0
 def get_all_synsets(self):
     with Execution(self.schema) as exe:
         return exe.schema.ss.select()