print "Parsing the tab ... ", list = open("../resources/vocab_jlpt_n3_cum.xml") #list = open("../resources/vocab_jlpt_n3.xml") parser = make_parser() vocab_list = HtmlTabHandler() parser.setContentHandler(vocab_list) parser.parse(list) print "done." print "Creating Vocab List ... ", kanjiList = [] for entry in vocab_list.list[1:]: if len(entry) == 4: kanjiList.append(Kanji(literal=entry[0], reading = entry[1], meaning = entry[2].split(','), jlpt = 3)) elif len(entry) == 3: kanjiList.append(Kanji(literal=entry[0], meaning = entry[1].split(','), jlpt = 3)) else: pass print "done." for entry in kanjiList: entry.show() print "Number of entries: ", len(kanjiList) print "Copy list list to XML File ...", kl2xml = KanjiListToXmlFile("../resources/JLPT_N3_VOC_CUM.xml") #kl2xml = KanjiListToXmlFile("../resources/JLPT_N3_VOC.xml") kl2xml.kanjiListToXml(kanjiList) print "done."
@author: zermelozf ''' from xml.sax import make_parser from Kanjidic2SaxHandlers import ListKanji from KanjiListSaxWriters import KanjiListToXmlFile if __name__ == '__main__': ''' This script uses the ListKanji class to create a customized list of Kanji from Kanjidic2. The list is then saved to an XML file. You need to put the Kanjidic2 file available at http://www.csse.monash.edu.au/~jwb/kanjidic2/ in the resources directory. ''' #Create a customized list of JLPT3 Kanji dict = open("../resources/kanjidic2") print "Creating the list ...", parser = make_parser() jlpt = ListKanji('jlpt', 3) parser.setContentHandler(jlpt) parser.parse(dict) sortedKanjiList = jlpt.organizeListBy('freq') print "done." for kanji in sortedKanjiList: kanji.show() print "JLPT 3 contains", len(jlpt.kanjiList), "Kanji." #Save the list to an XML file print "Copy JLPT3 list to XML File ...", kl2xml = KanjiListToXmlFile("../resources/JLPT3.xml") kl2xml.kanjiListToXml(sortedKanjiList) print "done."