def hyponyms(self, synsetid, lang='eng', deep_select=True, ctx=None): synsetid = self.ensure_sid(synsetid) synsetids = ctx.synlink.select("synset1=? and link='hypo'", (synsetid,), columns=('synset2',)) if deep_select: return self.get_synsets(synsetids=(x.synset2 for x in synsetids), lang=lang, ctx=ctx) else: return [Synset(sid) for sid in synsetids]
def search(self, lemma, pos=None, deep_select=True, ignore_case=True, synsets=None, ctx=None, **kwargs): like_phrase = ' LIKE ? ' if '%' in lemma or '_' in lemma: like_phrase = " LIKE ? ESCAPE '@'" lemma = escape_like(lemma) if ignore_case: query = [ 'ID IN (SELECT sid FROM term WHERE lower(term) {})'.format( like_phrase) ] params = [lemma.lower()] else: query = [ 'ID IN (SELECT sid FROM term WHERE term {})'.format( like_phrase) ] params = [lemma] if pos: query.append('pos = ?') params.append(pos) # query synsetids results = ctx.synset.select(' AND '.join(query), params, columns=('ID', )) if deep_select: return self.results_to_synsets(results, ctx=ctx, synsets=synsets) else: return SynsetCollection(synsets=(Synset(x.ID) for x in results))
def get_synset(self, synsetid, lang='eng', ctx=None): synsetid = self.ensure_sid(synsetid) res = ctx.synset.by_id(synsetid) synset = Synset(res.synset, lang=lang) # select lemma words = ctx.word.select('wordid in (SELECT wordid FROM sense WHERE synset=?) and lang=?', (synsetid, lang)) synset.lemmas.extend((w.lemma for w in words)) # select defs def_rows = ctx.sdef.select("synset=? AND lang=?", (synsetid, lang)) for row in def_rows: synset.definitions.append(row._2) # examples exes = ctx.sex.select('synset=? and lang=?', (synsetid, lang)) synset.examples.extend([e._2 for e in exes]) return synset
def get_synsets_by_lemma(self, lemma): with Execution(self.schema) as exe: # get synset object rows = exe.schema.wss.select(where='lemma=?', values=(lemma,)) synsets = SynsetCollection() if rows is not None and len(rows) > 0: for row in rows: ss = Synset(row.synsetid) ss.definition = row.definition ss.add_lemma(row.lemma) ss.add_key(row.sensekey) ss.tagcount = row.tagcount # add examples exes = exe.schema.ex.select(where='synsetid=?', values=[row.synsetid], orderby='sampleid') for ex in exes: ss.exes.append(ex.sample) synsets.add(ss) return synsets
def get_synset_by_sk(self, sk): with Execution(self.schema) as exe: # get synset object rows = exe.schema.wss.select(where='sensekey=?', values=(sk,)) if rows is not None and len(rows) > 0: ss = Synset(rows[0].synsetid) ss.definition = rows[0].definition for row in rows: ss.add_lemma(row.lemma) ss.add_key(row.sensekey) ss.tagcount += row.tagcount # add examples exes = exe.schema.ex.select(where='synsetid=?', values=[rows[0].synsetid], orderby='sampleid') for ex in exes: ss.exes.append(ex.sample) return ss
def all_senses(self): if WordnetSQL.sense_map_cache: return WordnetSQL.sense_map_cache _query = """SELECT lemma, pos, synsetid, sensekey, definition, tagcount FROM wordsXsensesXsynsets ORDER BY lemma, pos, tagcount DESC;""" conn = self.get_conn() c = conn.cursor() result = c.execute(_query).fetchall() # Build lemma map lemma_map = {} for (lemma, pos, synsetid, sensekey, definition, tagcount) in result: sinfo = Synset(synsetid, tagcount=tagcount, lemma=lemma) # add to map if lemma not in lemma_map: lemma_map[lemma] = [] lemma_map[lemma].append(sinfo) # close connection & return results conn.close() WordnetSQL.sense_map_cache = lemma_map return lemma_map
def search_senses(self, lemma_list, pos=None, a_conn=None): if len(lemma_list) == 0: return list() CACHE_JOIN_TOKEN = '|\t'*12 cache_key=CACHE_JOIN_TOKEN.join(lemma_list) # caching method if cache_key in WordnetSQL.lemma_list_cache: return WordnetSQL.lemma_list_cache[cache_key] # Build query lemma, pos, synsetid, sensekey, definition, tagcount _query = """SELECT lemma, pos, synsetid, sensekey, definition, tagcount FROM wordsXsensesXsynsets WHERE (%s) """ % 'or '.join(["lemma=?"] * len(lemma_list)) _args = list(lemma_list) if pos: _query += " and pos = ?"; _args.append(pos) # Query if a_conn: conn = a_conn else: conn = self.get_conn() c = conn.cursor() result = c.execute(_query, _args).fetchall() # Build results senses = [] for (lemma, pos, synsetid, sensekey, definition, tagcount) in result: senses.append(Synset(synsetid, tagcount=tagcount, lemma=lemma)) if not a_conn: conn.close() # store to cache WordnetSQL.lemma_list_cache[cache_key] = senses return senses
def get_synset(self, synsetid, ctx=None, **kwargs): sid = self.ensure_sid(synsetid) # get synset object synset_info = ctx.ss.by_id(sid) if synset_info is None: return None else: ss = Synset(synset_info.synsetid) ss.definition = synset_info.definition # add lemmas, sensekeys and tag count rows = ctx.wordsense.select('synsetid=?', (sid, ), columns=('lemma', 'sensekey', 'tagcount')) for row in rows: ss.add_lemma(row.lemma) ss.add_key(row.sensekey) ss.tagcount += row.tagcount # add examples exes = ctx.ex.select(where='synsetid=?', values=[sid], orderby='sampleid') for ex in exes: ss.examples.append(ex.sample) return ss