def fetch_rate_id(ili_ids, u=None): """ This function takes a list of ili ids and, optionally a username. It returns a dictionary with the ratings filtered by the ids and, if provided, for that specific user. """ rating = dd(list) ili_list = (",".join("?" for s in ili_ids), ili_ids) if u: # sys.stderr.write('\n USER MODE \n') #TEST for r in query_omw("""SELECT id, ili_id, rating, u, t FROM ili_rating WHERE ili_id in ({}) AND u = ?""".format(ili_list[0]), ili_list[1]+[u]): rating[r['ili_id']].append((r['rating'], r['u'], r['t'])) else: # sys.stderr.write('\n NON USER MODE \n') #TEST for r in query_omw("""SELECT id, ili_id, rating, u, t FROM ili_rating WHERE ili_id in ({}) """.format(ili_list[0]), ili_list[1]): rating[r['ili_id']].append((r['rating'], r['u'], r['t'])) return rating
def fetch_sense(s_id): """ return information about the sense """ # sense = (lemma, pos, freq, w_id, ss_id, ili_id) sense=[] for r in query_omw(""" SELECT lemma, w_id, canon, ss_id, pos_id, ili_id FROM ( SELECT lemma, w_id, canon, ss_id FROM ( SELECT w_id, canon, ss_id FROM ( SELECT ss_id, w_id FROM s WHERE id=? ) as sense JOIN w ON w_id = w.id ) as word JOIN f ON canon = f.id ) as thing JOIN ss on ss.id=ss_id """, (s_id,)): sense = [r['lemma'], r['pos_id'], 0, r['w_id'], r['ss_id'], r['ili_id']] ### NOTE hard-coding frequency type smt_id=1 for r in query_omw("""SELECT sml_id as freq FROM sm WHERE s_id=? and smt_id=1""", (s_id,)): if r['freq']: sense[2] = r['freq'] return sense
def fetch_ili(ili_ids=None): src_id = fetch_src() kind_id = fetch_kind() status_id = fetch_status() ili = dict() ili_defs = dict() if ili_ids: ili_list = (",".join("?" for s in ili_ids), ili_ids) for c in query_omw("""SELECT * FROM ili WHERE id in (%s) """ % (ili_list[0]), ili_list[1]): ili[c['id']] = (kind_id[c['kind_id']], c['def'], src_id[c['origin_src_id']], c['src_key'], status_id[c['status_id']], c['superseded_by_id'], c['t']) ili_defs[c['def']]=c['id'] else: for c in query_omw("""SELECT * FROM ili """): ili[c['id']] = (kind_id[c['kind_id']], c['def'], src_id[c['origin_src_id']], c['src_key'], status_id[c['status_id']], c['superseded_by_id'], c['t']) ili_defs[c['def']]=c['id'] return ili, ili_defs
def fetch_core(): """return the core seynsets""" core_ss = set() core_ili = set() r = query_omw('select id from resource where code = ?', ('core',), one=True) # print(r) if r: rid = r['id'] for q in query_omw("""SELECT ss_id, x1 FROM ssxl WHERE resource_id=?""", (rid,)): core_ss.add(q['ss_id']) core_ili.add(q['x1']) return core_ss, core_ili
def fetch_def_by_ssid_lang_text(ss_id, lang_id, d): for r in query_omw(""" SELECT id, ss_id, lang_id, def FROM def WHERE ss_id = ? AND lang_id = ? AND def = ?""", [ss_id, lang_id, d]): return r['id']
def fetch_pos(): pos_id = dd(lambda: dd()) for r in query_omw("""SELECT id, tag, def FROM pos"""): pos_id['id'][r['id']]=r['tag'] pos_id['tag'][r['tag']]=r['id'] pos_id['def'][r['id']]=r['def'] return pos_id
def f_src_id_by_proj_id_ver(proj_id, version): for r in query_omw("""SELECT id, proj_id, version FROM src WHERE proj_id = ? AND version = ?""", [proj_id, version]): return r['id']
def f_ss_id_by_ili_id(ili_id): """ Return a list of ss_ids from an ili_id """ ss_ids = list() for r in query_omw("""SELECT id FROM ss WHERE ili_id = ?""", [ili_id]): ss_ids.append(r['id']) return ss_ids
def fetch_ssexe_by_ssid_lang_text(ss_id, lang_id, e): for r in query_omw(""" SELECT id, ss_id, lang_id, ssexe FROM ssexe WHERE ss_id = ? AND lang_id = ? AND ssexe = ?""", [ss_id, lang_id, e]): return r['id']
def fetch_labels(lang_id, sss): """return a dict with lang_id labels for the synsets in sss""" labels = dict() for r in query_omw("""SELECT ss_id, label FROM label WHERE lang_id = ? AND ss_id in (%s)""" % l2q(sss), [lang_id] + list(sss)): labels[r['ss_id']]=r['label'] return labels
def f_src_id_by_proj_ver(proj, version): # print(proj,version) for r in query_omw("""SELECT src.id FROM src JOIN proj ON src.proj_id=proj.id WHERE proj.code= ? AND src.version = ?""", [proj, version]): return r['id']
def fetch_src_for_ss_id(s_ids): """return a dict of lists of (src_ids, conf) per synset id src_id[ss_id] = [(src_id, src_key, conf), ... ] """ src_ssid = dd(list) for r in query_omw("""SELECT ss_id, src_key, src_id, conf FROM ss_src WHERE ss_id in (%s)""" % qs(s_ids), s_ids): src_ssid[r['ss_id']].append((r['src_id'], r['src_key'], r['conf'])) return src_ssid
def fetch_sense_links(s_ids): """ return information about the links to a list of senses slinks[s_id_from][srel] = [s_id_to, ...] """ slinks = dd(lambda: dd(list)) # links[srel] = [s2_id, ...] for r in query_omw(""" SELECT s1_id, srel_id, s2_id FROM slink WHERE s1_id in ({})""".format(l2q(s_ids)), s_ids): slinks[r['s1_id']][r['srel_id']].append(r['s2_id']) return slinks
def f_ili_ss_id_map(): """ Returns a dictionary linking ili_ids and ss_ids. It is possible that one ili_id links to multiple ss_ids, but one ss_id can only link to a single ili_id. """ ili_ss_map = dd(lambda: dd(list)) for r in query_omw("""SELECT id, ili_id, pos_id FROM ss"""): ili_ss_map['ili'][r['ili_id']].append((r['id'],r['pos_id'])) ili_ss_map['ss'][r['id']] = r['ili_id'] return ili_ss_map
def fetch_defs_by_sense(s_ids): """given a list of senses, return a dictionary of definitions""" ### FIXME: find the sense level definition when defined defs=dd(lambda: dict()) for r in query_omw(""" SELECT s_id, lang_id, def FROM (SELECT id AS s_id, ss_id FROM s WHERE id IN ({})) as sense JOIN def ON sense.ss_id = def.ss_id""".format(l2q(s_ids)), s_ids): defs[r['s_id']][r['lang_id']] = r['def'] return defs
def fetch_ssrel(): """look up the relation and definition for synset level links index by an 'id' or from a 'ssrel' ssrel['id'][1] = ('agent', 'the undertaker of an action') ssrel['rel']['agent'] = (1, 'the undertaker of an action') """ ssrel_dict = dd(lambda: dd()) for r in query_omw("""SELECT id, rel, def FROM ssrel"""): ssrel_dict['id'][r['id']]=(r['rel'],r['def']) ssrel_dict['rel'][r['rel']]=(r['id'],r['def']) return ssrel_dict
def fetch_srel(): """look up the relation and definition for sense level links index by an 'id' or from a 'srel' ssrel['id'][1] = ('antonym', 'a sense with the opposite meaning') ssrel['rel']['agent'] = (1, 'a sense with the opposite meaning') """ srel_dict = dd(lambda: dd()) for r in query_omw("""SELECT id, rel, def FROM srel"""): srel_dict['id'][r['id']]=(r['rel'],r['def']) srel_dict['rel'][r['rel']]=(r['id'],r['def']) return srel_dict
def f_sslink_id_by_ss1_rel_ss2(ss1, rel, ss2): "Return sslink_id, if any, from sslink." for r in query_omw("""SELECT id FROM sslink WHERE ss1_id = ? AND ssrel_id = ? AND ss2_id = ?""", [ss1, rel, ss2]): return r['id']
def fetch_langs(): lang_id = dd(lambda: dd()) lang_code = dd(lambda: dd()) for r in query_omw("""SELECT id, bcp47, iso639, in_lang_id, name FROM lang JOIN lang_name ON id = lang_id"""): lang_id[r['id']]['bcp47'] = r['bcp47'] lang_id[r['id']]['iso639'] = r['iso639'] lang_id[r['id']][r['in_lang_id']] = r['name'] lang_code['code'][r['bcp47']] = r['id'] lang_code['code'][r['iso639']] = r['id'] return lang_id, lang_code
def fetch_ili_status(status): src_id = fetch_src() kind_id = fetch_kind() status_id = fetch_status() ili = dict() for c in query_omw("""SELECT * FROM ili WHERE status_id = ?""", [status]): ili[c['id']] = (kind_id[c['kind_id']], c['def'], src_id[c['origin_src_id']], c['src_key'], status_id[c['status_id']], c['superseded_by_id'], c['t']) return ili
def fetch_forms(w_id): """return the forms of all variants FIXME: should include meta data """ # variant = [lemma] forms=[] for r in query_omw(""" SELECT lemma, id as f_id FROM (SELECT f_id FROM wf_link WHERE w_id = ?) JOIN f on f.id=f_id""", (w_id,)): forms.append(r['lemma']) return forms
def fetch_src_id_stats(src_id): src_id_stats=dd(int) for r in query_omw(""" SELECT count(distinct s.ss_id), count(distinct s.id) FROM s JOIN s_src ON s.id=s_src.s_id WHERE s_src.src_id=?""", [src_id]): src_id_stats['synsets'] = r['count(distinct s.ss_id)'] src_id_stats['senses'] = r['count(distinct s.id)'] for r in query_omw(""" SELECT count(distinct w_id), count(distinct f_id) FROM wf_link WHERE src_id=?""", [src_id]): src_id_stats['forms'] = r['count(distinct f_id)'] src_id_stats['words'] = r['count(distinct w_id)'] cid = query_omw('select id from resource where code = ?', ('core',), one=True) if cid: core_id = cid['id'] for r in query_omw("""select count(distinct ss.id) FROM ss JOIN ss_src ON ss.id=ss_src.ss_id JOIN ssxl ON ssxl.ss_id=ss.id WHERE ss_src.src_id = ? AND ssxl.resource_id = ?""", [src_id, core_id]): src_id_stats['core'] = r['count(distinct ss.id)'] ## synsets that are used in a sense and linked to an ili for r in query_omw(""" SELECT count(distinct id) FROM ss WHERE ss.ili_id is not NULL AND id IN (SELECT s.ss_id FROM s WHERE s.id IN (SELECT s_id FROM s_src WHERE s_src.src_id=?))""", [src_id]): src_id_stats['in_ili'] = r['count(distinct id)'] ### Definitions for r in query_omw(""" SELECT count(distinct ss_id) FROM def WHERE id IN (SELECT def_id FROM def_src WHERE src_id =?)""", [src_id]): src_id_stats['def'] = r['count(distinct ss_id)'] ### Examples for r in query_omw(""" SELECT count(distinct ss_id) FROM ssexe WHERE id in (SELECT ssexe_id FROM ssexe_src WHERE src_id =?)""", [src_id]): src_id_stats['ssexe'] = r['count(distinct ss_id)'] return src_id_stats
def fetch_sense_labels(s_ids): """return just the string for the canonical form for each of a list of sense ids slabel[s_id] = lemma (s_id is the id of the sense) slabel[127262] = 'driver' """ slabel = dict() for r in query_omw("""SELECT lemma, s_id, canon FROM ( SELECT w_id, canon, s_id FROM ( SELECT id as s_id, w_id FROM s WHERE id in ({}) ) as sense JOIN w ON w_id = w.id ) as word JOIN f ON canon = f.id""".format(l2q(s_ids)), s_ids): slabel[r['s_id']] = r['lemma'] return slabel
def fetch_comment_id(ili_ids, u=None): """ This function takes a list of ili ids and, optionally a username. It returns a dictionary with the comments filtered by the ids and, if provided, for that specific user. """ comments = dd(list) ili_list = (",".join("?" for s in ili_ids), ili_ids) if u: for r in query_omw("""SELECT id, ili_id, com, u, t FROM ili_com WHERE ili_id in ({}) AND u = ?""".format(ili_list[0]), ili_list[1]+[u]): comments[r['ili_id']].append((r['com'], r['u'], r['t'])) else: for r in query_omw("""SELECT id, ili_id, com, u, t FROM ili_com WHERE ili_id in ({}) """.format(ili_list[0]), ili_list[1]): comments[r['ili_id']].append((r['com'], r['u'], r['t'])) return comments
def fetch_ss_basic(synset_list): synset_list = list(synset_list) ss_list = (",".join("?" for s in synset_list), synset_list) ss = dict() # ss[ss_id][s_id] = [wid, fid, lang_id, pos_id] for r in query_omw(""" SELECT id, ili_id, pos_id FROM ss WHERE id in (%s) """ % (ss_list[0]), ss_list[1]): ss[r['id']] = (r['ili_id'], r['pos_id']) senses = dd(lambda: dd(list)) # senses[ss_id][lang] = [(s_id, lemma, freq), ] s_tmp = list() s_list = list() for r in query_omw(""" SELECT lang_id, lemma, w_id, canon, ss_id, s_id FROM ( SELECT w_id, canon, ss_id, s_id FROM ( SELECT id as s_id, ss_id, w_id FROM s WHERE ss_id in (%s)) as sense JOIN w ON w_id = w.id ) as word JOIN f ON canon = f.id """ % (ss_list[0]), ss_list[1]): s_tmp.append((r['ss_id'], r['lang_id'], r['s_id'], r['lemma'])) s_list.append(r['s_id']) sfreq = dd(int) for r in query_omw("""SELECT s_id, sml_id as freq FROM sm WHERE s_id in (%s) and smt_id=1""" % l2q(s_list), s_list): sfreq[r['s_id']] = r['freq'] for (ss_id, lang_id, s_id, lemma) in s_tmp: senses[ss_id][lang_id].append((s_id, lemma, sfreq[s_id])) for ss_id in senses: for lang_id in senses[ss_id]: senses[ss_id][lang_id].sort(key=lambda x: x[2], reverse=True) defs = dd(lambda: dd(list)) # defs[ss_id][lang] = [def, def2] for r in query_omw(""" SELECT ss_id, lang_id, def FROM def WHERE ss_id in (%s) """ % (ss_list[0]), ss_list[1]): defs[r['ss_id']][r['lang_id']].append(r['def']) exes = dd(lambda: dd(list)) # exs[ss_id][lang] = [ex, ex2] for r in query_omw(""" SELECT ss_id, lang_id, ssexe FROM ssexe WHERE ss_id in (%s) """ % (ss_list[0]), ss_list[1]): exes[r['ss_id']][r['lang_id']].append(r['ssexe']) links = dd(lambda: dd(list)) # links[ss1_id][ssrel] = [ss2_id, ...] for r in query_omw(""" SELECT ss1_id, ssrel_id, ss2_id FROM sslink WHERE ss1_id in (%s) """ % (ss_list[0]), ss_list[1]): links[r['ss1_id']][r['ssrel_id']].append(r['ss2_id']) return ss, senses, defs, exes, links
def fetch_ssrel_stats(src_id): constitutive = ['instance_hyponym','instance_hypernym', 'hypernym', 'hyponym', 'synonym', 'antonym', 'mero_part', 'holo_part', 'mero_member', 'holo_member', 'mero_substance', 'holo_substance' ] src_ssrel_stats = dd(int) ssrl=fetch_ssrel() for r in query_omw(""" SELECT ssrel_id, count(ssrel_id) FROM sslink JOIN sslink_src ON sslink.id=sslink_src.sslink_id WHERE sslink_src.src_id=? GROUP BY ssrel_id""", [src_id]): link = ssrl['id'][r['ssrel_id']] src_ssrel_stats[link[0]] = r['count(ssrel_id)'] src_ssrel_stats['TOTAL'] += r['count(ssrel_id)'] if link[0] in constitutive: src_ssrel_stats['CONSTITUATIVE'] += r['count(ssrel_id)'] return src_ssrel_stats
def updateLabels(): """This functions is to be run after a new wordnet is uploaded so that concept labels for that language are created and visible as concept names. """ sfreq=dd(int) f = query_omw("""SELECT id FROM smt WHERE tag='freq'""") if f: for r in query_omw("""SELECT s_id, sml_id FROM sm WHERE smt_id=?""", str(f[0]['id'])): sfreq[r['s_id']]=r['sml_id'] senses =dd(lambda: dd(list)) #senses[ss_id][lang_id]=[(ls_id, lemma, freq), ...] forms = dd(lambda: dd(int)) #forms[lang][word] = freq langs=set() eng_id=1 ### we know this :-) for r in query_omw("""SELECT s_id, ss_id, lemma, lang_id FROM (SELECT w_id, canon, ss_id, s_id FROM (SELECT id as s_id, ss_id, w_id FROM s) JOIN w ON w_id = w.id ) JOIN f ON canon = f.id"""): senses[r['ss_id']][r['lang_id']].append((r['s_id'], r['lemma'], sfreq[r['s_id']])) forms[r['lang_id']][r['lemma']] += 1 langs.add(r['lang_id']) for ss in senses: for l in senses[ss]: senses[ss][l].sort(key=lambda x: (-x[2], ### sense freq (freq is good) forms[l][x[1]], ### uniqueness (freq is bad) len(x[1]), ### length (short is good) x[1])) ### lemma (so it is the same) # make the labels label = dd(lambda: dd(str)) lgs=sorted(langs) values=list() for ss in senses: for l in lgs: if senses[ss][l]: label[ss][l]=senses[ss][l][0][1] else: for lx in lgs: ### start with eng and go through till you find one if senses[ss][lx]: label[ss][l]=senses[ss][lx][0][1] break else: label[ss][l]="?????" values.append((ss, l, label[ss][l])) # write the labels (delete old ones first) write_omw("""DELETE FROM label""") blk_write_omw("""INSERT INTO label(ss_id, lang_id, label, u) VALUES (?,?,?,"omw")""", values) return True
def fetch_all_sssrels_by_s_rel_trgt(): ssslinks = dd(lambda: dd()) for r in query_omw("""SELECT id, s_id, srel_id, ss_id FROM ssslink"""): ssslinks[r['s_id']][(r['srel_id'],r['ss_id'])]=r['id'] return ssslinks
def fetch_src_meta(): src_meta = dd(lambda: dd(str)) for r in query_omw("""SELECT src_id, attr, val, u, t FROM src_meta"""): src_meta[r['src_id']][r['attr']] = r['val'] #src_meta_id[r['src_id']].append(r) return src_meta
def fetch_all_forms_by_lang_pos_lemma(): forms = dd(lambda: dd()) for r in query_omw("""SELECT id, lang_id, pos_id, lemma FROM f"""): forms[r['lang_id']][(r['pos_id'],r['lemma'])]=r['id'] return forms