def codepoint(x, c, chtxt): cinf = c._cinf if len(list(x.keys())) != 1: warn('Expected only one cp_value attribute') cp_attr, cp_type = list(x.items())[0] if cp_attr != 'cp_type': warn('Unexpected cp_value attribute', cp_attr) if cp_type == 'ucs': if int(x.text, 16) != jdb.uord(chtxt): warn ("xml codepoint ucs value '%s' doesnt match character %s (0x%x)." \ % (x.text, chtxt, jdb.uord (chtxt))) else: cinf.append( jdb.Cinf(kw=KW.CINF[Xml2db.CINF.get(cp_type, cp_type)].id, value=x.text))
def chr(c): fmt = [] a = [] fmt.append("Character %d:" % jdb.uord(c.chr)) if getattr(c, 'strokes', None): a.append("strokes: %d" % c.strokes) if getattr(c, 'bushu', None): a.append("radical: %d" % c.strokes) if getattr(c, 'freq', None): a.append("freq: %d" % c.freq) if getattr(c, 'grade', None): a.append("grade: %d" % c.grade) if getattr(c, 'jlpt', None): a.append("jlpt: %d" % c.jlpt) if a: fmt.append(" " + ', '.join(a)) return fmt
def do_chr(elem, srcid, langs): global Char # Process a <character> element. The element has been # parsed by the xml ElementTree parse and is in "elem". # "lineno" is the source file line number. chtxt = elem.find('literal').text Char = chtxt # For warning messages created by warn(). c = jdb.Chr(chr=chtxt, _cinf=[]) e = jdb.Entr(src=srcid, stat=KW.STAT_A, seq=jdb.uord(chtxt), unap=False, chr=c, _kanj=[jdb.Kanj(txt=chtxt)], _rdng=[], _sens=[], _krslv=[]) for x in elem.findall('codepoint/cp_value'): codepoint(x, c, chtxt) for x in elem.findall('radical/rad_value'): radical(x, c) x = None try: x = (elem.find('misc/freq')).text except: pass if x: if c.freq is not None: warn('Duplicate "freq" element ignored: %s' % x) else: c.freq = int(x) x = None try: x = (elem.find('misc/grade')).text except: pass if x: if c.grade is not None: warn('Duplicate "grade" element ignored: %s' % x) else: c.grade = int(x) for n, x in enumerate(elem.findall('misc/stroke_count')): strokes(x, n, c) rn = '\u3001'.join([x.text for x in elem.findall('misc/rad_name')]) if rn: c.radname = rn for x in elem.findall('reading_meaning'): reading_meaning(x, e._rdng, e._sens, c._cinf, langs) x = elem.find('dic_number') if x is not None: dicnum(x, c._cinf) x = elem.find('query_code') if x is not None: qcode(x, c._cinf) for x in elem.findall('misc/variant'): e._krslv.append(variant(x)) x = elem.find('misc/jlpt') if x is not None: jlptnum(x, c) return e
def ucshex(s): return ' '.join(["%0.4X" % jdb.uord(c) for c in s])