def do_rdngs(self, elems, entr, fmap): if elems is None: return rdngs = getattr(entr, '_rdng', []) kanjs = getattr(entr, '_kanj', []) rdngs = [] dupchk = {} for ord, elem in enumerate(elems): txt = elem.find('reb').text if not jdb.unique(txt, dupchk): self.warn("Duplicate reb text: '%s'" % txt) continue if not jdb.jstr_reb(txt): self.warn("reb text '%s' not kana." % txt) rdng = jdb.Rdng(rdng=ord + 1, txt=txt) self.do_kws(elem.findall('re_inf'), rdng, '_inf', 'RINF') for x in elem.findall('re_pri'): freqtuple = self.parse_freq(x.text, "re_pri") if not freqtuple: continue rlist = fmap[freqtuple][0] if not jdb.isin(rdng, rlist): rlist.append(rdng) else: self.freq_warn("Duplicate", rdng, None, x.text) nokanji = elem.find('re_nokanji') self.do_restr(elem.findall('re_restr'), rdng, kanjs, 'restr', nokanji) self.do_audio(elem.findall("audio"), rdng, jdb.Rdngsnd) rdngs.append(rdng) if rdngs: entr._rdng = rdngs
def do_gloss(self, elems, sens, xlit=False, xlang=None): XKW = self.XKW glosses = [] lits = [] lsrc = [] dupchk = {} for elem in elems: lng = elem.get('{http://www.w3.org/XML/1998/namespace}lang') try: lang = XKW.LANG[lng].id if lng else XKW.LANG['eng'].id except KeyError: self.warn("Invalid gloss lang attribute: '%s'" % lng) continue txt = elem.text if not jdb.jstr_gloss(txt): self.warn("gloss text '%s' not latin characters." % txt) lit = [] if xlit and ('lit:' in txt): txt, lit = extract_lit(txt) if not jdb.unique((lang, txt), dupchk): self.warn("Duplicate lang/text in gloss '%s'/'%s'" % (lng, txt)) continue # (entr,sens,gloss,lang,txt) if txt and (not xlang or lang in xlang): glosses.append( jdb.Gloss(lang=lang, ginf=XKW.GINF['equ'].id, txt=txt)) if lit: lits.extend([ jdb.Gloss(lang=lang, ginf=XKW.GINF['lit'].id, txt=x) for x in lit ]) if glosses or lits: if not hasattr(sens, '_gloss'): sens._gloss = [] sens._gloss.extend(glosses + lits)
def do_kanjs (self, elems, entr, fmap): if elems is None: return kanjs = []; dupchk = {} for ord, elem in enumerate (elems): txt = elem.find('keb').text if not jdb.unique (txt, dupchk): self.warn ("Duplicate keb text: '%s'" % txt); continue if not (jdb.jstr_keb (txt)): self.warn ("keb text '%s' not kanji." % txt) kanj = jdb.Kanj (kanj=ord+1, txt=txt) self.do_kws (elem.findall('ke_inf'), kanj, '_inf', 'KINF') for x in elem.findall ('ke_pri'): freqtuple = self.parse_freq (x.text, "ke_pri") if not freqtuple: continue klist = fmap[freqtuple][1] if not jdb.isin (kanj, klist): klist.append (kanj) else: self.freq_warn ("Duplicate", None, kanj, x.text) kanjs.append (kanj) if kanjs: entr._kanj = kanjs