def variant(x): # Map the keywords used in var_type to values used # in the database kw* tables, where they differ. vmap = {'njecd': 'halpern_njecd', 'oneill': 'oneill_names'} vt = x.get('var_type') vt = vmap.get(vt, vt) if vt == 'ucs': kw = 0 else: kw = KW.CINF[Xml2db.CINF.get(vt, vt)].id return jdb.Cinf(kw=kw, value=x.text)
def radical(x, c): cinf = c._cinf if len(list(x.keys())) != 1: warn('Expected only one rad_value attribute') rad_attr, rad_type = list(x.items())[0] if rad_attr != 'rad_type': warn('Unexpected rad_value attribute: %s', rad_attr) if rad_type == 'classical': c.bushu = int(x.text) elif rad_type == 'nelson_c': cinf.append(jdb.Cinf(kw=KW.CINF_nelson_rad, value=int(x.text))) else: warn("Unknown radical attribute value: %s=\"%s\"", (rad_attr, rad_type))
def codepoint(x, c, chtxt): cinf = c._cinf if len(list(x.keys())) != 1: warn('Expected only one cp_value attribute') cp_attr, cp_type = list(x.items())[0] if cp_attr != 'cp_type': warn('Unexpected cp_value attribute', cp_attr) if cp_type == 'ucs': if int(x.text, 16) != jdb.uord(chtxt): warn ("xml codepoint ucs value '%s' doesnt match character %s (0x%x)." \ % (x.text, chtxt, jdb.uord (chtxt))) else: cinf.append( jdb.Cinf(kw=KW.CINF[Xml2db.CINF.get(cp_type, cp_type)].id, value=x.text))
def rmgroup(rmg, langs=None): rdngs = [] glosses = [] cinf = [] dupchk = {} for x in rmg.findall('reading'): rtype = None rstat = None cinfrec = None for aname, aval in list(x.items()): if aname == 'r_type': rtype = aval if aname == 'on_type': rtype = aval if aname == 'r_status': rstat = aval if rtype in ('pinyin', 'korean_r', 'korean_h', 'vietnam'): if (rtype, x.text) in dupchk: warn("Duplicate reading ignored: %s, %s" % (rtype, x.text)) continue dupchk[(rtype, x.text)] = True cinf.append(jdb.Cinf(kw=KW.CINF[rtype].id, value=x.text)) elif rtype == 'ja_on' or rtype == 'ja_kun': if x.text in dupchk: warn('Duplicate reading ignored: %s' % x.text) continue dupchk[x.text] = True rdng = jdb.Rdng(txt=x.text, _inf=[]) rdng._inf.append( jdb.Rinf(kw=KW.RINF[Xml2db.RINF.get(aval, aval)].id)) if rstat: rdng._inf.append( jdb.Rinf(kw=KW.RINF[Xml2db.RINF.get(rstat, rstat)].id)) rdngs.append(rdng) else: raise KeyError('Unkown r_type attribute: %s' % rtype) dupchk = {} for x in rmg.findall('meaning'): lang = x.get('m_lang', 'en') langkw = KW.LANG[Xml2db.LANG.get(lang, lang)].id if (lang, x.text) in dupchk: warn("Duplicate lang,meaning pair ignored: %s:%s" % (lang, x.text)) continue dupchk[(lang, x.text)] = True if not langs or langkw in langs: glosses.append(jdb.Gloss(txt=x.text, lang=langkw, ginf=1)) return rdngs, glosses, cinf
def dicnum(dic_number, cinf): dupchk = {} for x in dic_number.findall('dic_ref'): drtype = x.get('dr_type') val = x.text if x.get('m_vol'): val = "%s.%s.%s" % (x.get('m_vol'), x.get('m_page'), x.text) key = Xml2db.CINF.get(drtype, drtype) try: kw = KW.CINF[key].id except KeyError: warn('Unknown CINF keyword: "%s"' % key) continue if (kw, val) in dupchk: warn('Duplicate dr_type,value pair ignored: %s, %s' % (drtype, val)) continue dupchk[(kw, val)] = True cinf.append(jdb.Cinf(kw=kw, value=val))
def qcode(query_code, cinf): dupchk = {} saw_misclass = False saw_skip = False for x in query_code.findall('q_code'): qctype = x.get('qc_type') val = x.text kw = KW.CINF[Xml2db.CINF.get(qctype, qctype)].id misclass = x.get('skip_misclass', '') if (kw, val) in dupchk: warn('Duplicate qc_type,value pair ignored: %s,%s' % (qctype, val)) continue dupchk[(kw, val)] = True if misclass: if qctype != "skip": raise KeyError("'skip_misclass' attr on non-skip element") saw_misclass = True elif qctype == 'skip': saw_skip = True cinf.append(jdb.Cinf(kw=kw, value=val, mctype=misclass)) if saw_misclass and not saw_skip: warn("Has skip_misclass but no skip")
def strokes(x, n, c): cinf = c._cinf if n == 0: c.strokes = int(x.text) else: cinf.append(jdb.Cinf(kw=KW.CINF_strokes, value=int(x.text)))