def get_all_sensekeys_tagged(self): with Execution(self.schema) as exe: # synset; results = exe.schema.sensetag.select(columns=['sk']) sensekeys = set() for result in results: sensekeys.add(result.sk) return sensekeys
def get_synset_def(self, sid_str, lang='eng'): sid = SynsetID.from_string(sid_str) with Execution(self.schema) as exe: defs = exe.schema.sdef.select(where='synset=? and lang=?', values=[sid.to_canonical(), lang]) assert len(defs) in (0, 1) if defs: return defs[0]._2
def get_synset_by_sk(self, sensekey): with Execution(self.schema) as exe: # synset; results = exe.schema.synset.select(where='id IN (SELECT sid FROM sensekey where sensekey=?)', values=[sensekey]) if results: synsets = self.results_to_synsets(results, exe) if synsets and len(synsets) == 1: return synsets[0] raise Exception("Could not find any synset with provided key {}".format(sensekey))
def get_synset_by_sks(self, sensekeys): synsets = SynsetCollection() with Execution(self.schema) as exe: # synset; where = 'id IN (SELECT sid FROM sensekey where sensekey IN (%s))' % ','.join(['?'] * len(sensekeys)) results = exe.schema.synset.select(where=where, values=sensekeys) if results: return self.results_to_synsets(results, exe, synsets) return synsets
def get_senseinfo_by_sk(self, sk): if sk in self.sk_cache: return self.sk_cache[sk] result = None with Execution(self.schema) as exe: result = exe.schema.wss.select_single(where='sensekey=?', values=[sk], columns=['pos', 'synsetid', 'sensekey']) self.sk_cache[sk] = result return result
def get_tagcount(self, sid): if sid in self.tagcount_cache: return self.tagcount_cache[sid] with Execution(self.schema) as exe: results = exe.schema.wss.select(where='synsetid=?', values=[sid], columns=['tagcount']) counter = 0 for res in results: counter += res.tagcount self.tagcount_cache[sid] = counter return counter
def get_synsets_by_ids(self, synsetids): sids = [str(SynsetID.from_string(x).to_gwnsql()) for x in synsetids] synsets = SynsetCollection() with Execution(self.schema) as exe: # synset; wherecon = 'id IN (%s)' % (','.join(['?'] * len(sids))) results = exe.schema.synset.select(where=wherecon, values=sids) if results: return self.results_to_synsets(results, exe, synsets) return synsets
def all_synsets(self, synsets=None, deep_select=True): synsets = SynsetCollection() with Execution(self.schema) as exe: # synset; results = exe.schema.synset.select() if results: if deep_select: return self.results_to_synsets(results, exe, synsets) else: return results return synsets
def get_senseinfo_by_sid(self, synsetid): sid = self.ensure_sid(synsetid) if sid in self.sid_cache: return self.sid_cache[sid] result = None with Execution(self.schema) as exe: result = exe.schema.wss.select_single(where='synsetid=?', values=[sid], columns=['pos', 'synsetid', 'sensekey', 'definition', 'tagcount']) self.sid_cache[sid] = result return result
def get_synset_by_id(self, synsetid): # ensure that synsetid is an instance of SynsetID sid = SynsetID.from_string(synsetid) with Execution(self.schema) as exe: # synset; results = exe.schema.synset.select(where='id=?', values=[sid.to_gwnsql()]) if results: synsets = self.results_to_synsets(results, exe) if len(synsets) == 1: return synsets[0] return None
def get_glossitems_text(self, synsetid): sid = SynsetID.from_string(synsetid).to_gwnsql() with Execution(self.schema) as exe: where = 'gid IN (SELECT id FROM gloss WHERE sid = ?)' results = exe.schema.glossitem.select(where=where, values=[sid], columns=['id', 'lemma', 'pos', 'text']) items = [] for item in results: g = GlossItem(gloss=None, tag=None, lemma=item.lemma, pos=item.pos, cat=None, coll=None, rdf=None, origid=None, sep=None, text=None, itemid=item.id) items.append(g) return items
def get_hypehypo(self, sid): ''' Get all hypernyms and hyponyms of a given synset ''' sid = SynsetID.from_string(str(sid)) if sid in self.hypehypo_cache: return self.hypehypo_cache[sid] result = None with Execution(self.schema) as exe: result = exe.schema.sss.select(where='ssynsetid = ? and linkid in (1,2,3,4, 11,12,13,14,15,16,40,50,81)', values=[sid.to_wnsql()], columns=['linkid', 'dpos', 'dsynsetid', 'dsensekey', 'dwordid']) for r in result: self.hypehypo_cache[sid].add(r) return self.hypehypo_cache[sid]
def get_synsets_by_term(self, term, pos=None, synsets=None, sid_only=False): synsets = SynsetCollection() with Execution(self.schema) as exe: # synset; if pos: results = exe.schema.synset.select(where='pos = ? AND id IN (SELECT sid FROM term where lower(term)=?)', values=[pos, term.lower()]) else: results = exe.schema.synset.select(where='id IN (SELECT sid FROM term where lower(term)=?)', values=[term.lower()]) if results: if sid_only: return results else: return self.results_to_synsets(results, exe, synsets) return synsets
def get_synset_by_sk(self, sk): with Execution(self.schema) as exe: # get synset object rows = exe.schema.wss.select(where='sensekey=?', values=(sk,)) if rows is not None and len(rows) > 0: ss = Synset(rows[0].synsetid) ss.definition = rows[0].definition for row in rows: ss.add_lemma(row.lemma) ss.add_key(row.sensekey) ss.tagcount += row.tagcount # add examples exes = exe.schema.ex.select(where='synsetid=?', values=[rows[0].synsetid], orderby='sampleid') for ex in exes: ss.exes.append(ex.sample) return ss
def get_synsets_by_lemma(self, lemma): with Execution(self.schema) as exe: # get synset object rows = exe.schema.wss.select(where='lemma=?', values=(lemma,)) synsets = SynsetCollection() if rows is not None and len(rows) > 0: for row in rows: ss = Synset(row.synsetid) ss.definition = row.definition ss.add_lemma(row.lemma) ss.add_key(row.sensekey) ss.tagcount = row.tagcount # add examples exes = exe.schema.ex.select(where='synsetid=?', values=[row.synsetid], orderby='sampleid') for ex in exes: ss.exes.append(ex.sample) synsets.add(ss) return synsets
def insert_synsets(self, synsets): ''' Store synsets with related information (sensekeys, terms, gloss, etc.) ''' with Execution(self.schema) as exe: # synset; for synset in synsets: sid = synset.sid.to_gwnsql() exe.schema.synset.insert([sid, synset.sid.offset, synset.sid.pos]) # term; for term in synset.lemmas: exe.schema.term.insert([sid, term]) # sensekey; for sk in synset.keys: exe.schema.sensekey.insert([sid, sk]) # gloss_raw; for gloss_raw in synset.raw_glosses: exe.schema.gloss_raw.insert([sid, gloss_raw.cat, gloss_raw.gloss]) # gloss; DB: id origid sid cat | OBJ: gid origid cat for gloss in synset.glosses: exe.schema.gloss.insert([gloss.origid, sid, gloss.cat]) gloss.gid = exe.ds.execute('SELECT last_insert_rowid()').fetchone()[0] # glossitem; # OBJ | gloss, order, tag, lemma, pos, cat, coll, rdf, origid, sep, text # DB | id ord gid tag lemma pos cat coll rdf sep text origid for item in gloss.items: exe.schema.glossitem.insert([item.order, gloss.gid, item.tag, item.lemma, item.pos, item.cat, item.coll, item.rdf, item.sep, item.text, item.origid]) item.itemid = exe.ds.execute('SELECT last_insert_rowid()').fetchone()[0] # sensetag; for tag in gloss.tags: # OBJ: tagid cat, tag, glob, glemma, gid, coll, origid, sid, sk, lemma # DB: id cat tag glob glob_lemma glob_id coll sid gid sk origid lemma itemid exe.schema.sensetag.insert([tag.cat, tag.tag, tag.glob, tag.glemma, tag.glob_id, tag.coll, '', gloss.gid, tag.sk, tag.origid, tag.lemma, tag.item.itemid]) exe.ds.commit() pass
def cache_tagcounts(self): with Execution(self.schema) as exe: results = exe.schema.wss.select(columns=['synsetid', 'tagcount']) for res in results: self.tagcount_cache[res.synsetid] += res.tagcount
def cache_all_hypehypo(self): with Execution(self.schema) as exe: results = exe.schema.sss.select(columns=['linkid', 'dpos', 'dsynsetid', 'dsensekey', 'dwordid', 'ssynsetid']) for result in results: self.hypehypo_cache[result.ssynsetid].update(result)
def get_all_sensekeys(self): with Execution(self.schema) as exe: # synset; results = exe.schema.sensekey.select() return results
def get_examples_by_sid(self, synsetid): sid = self.ensure_sid(synsetid) with Execution(self.schema) as exe: result = exe.schema.ex.select(where='synsetid=?', values=[sid], orderby='sampleid') return result
def get_all_sensekeys(self): results = None with Execution(self.schema) as exe: results = exe.schema.wss.select(columns=['pos', 'synsetid', 'sensekey']) return results
def cache_all_sensekey(self): with Execution(self.schema) as exe: results = exe.schema.wss.select(columns=['pos', 'synsetid', 'sensekey']) for result in results: self.sk_cache[result.sensekey] = result
def get_sensetags(self, synsetid): sid = SynsetID.from_string(synsetid).to_gwnsql() with Execution(self.schema) as exe: results = exe.schema.sensetag.select(where='gid IN (SELECT id FROM gloss WHERE sid = ?)', values=[sid], columns=['id', 'lemma', 'sk']) return results
def get_all_synsets(self): with Execution(self.schema) as exe: return exe.schema.wss.select(columns=['synsetid', 'lemma', 'sensekey', 'tagcount'])
def get_all_synsets(self): with Execution(self.schema) as exe: return exe.schema.ss.select()