def process_character(char_ele: Element): """Process a character element.""" # Literal literal = char_ele.findtext('literal') kanji, _created = Kanji.get_or_create(literal=literal) # Codepoint get_kanji_codepoints(char_ele, kanji) # Radical get_kanji_radicals(char_ele, kanji) # Misc # - Grade grade = char_ele.findtext('misc/grade') kanji.grade = int(grade) if grade else None # - Stroke Count kanji.stroke_count = get_kanji_stroke_count(char_ele, kanji) # - Variants get_kanji_variants(char_ele, kanji) # - Frequency frequency = char_ele.findtext('misc/freq') kanji.frequency = int(frequency) if frequency else None # - Radical Name kanji.radical_name = char_ele.findtext('misc/rad_name') # - JLPT Old jlpt_old = char_ele.findtext('misc/jlpt') kanji.jlpt_old = int(jlpt_old) if jlpt_old else None # Dictionary Number get_kanji_dict_numbers(char_ele, kanji) # Query Code get_kanji_query_code(char_ele, kanji) # Readings # - Normal Readings get_kanji_normal_readings(char_ele, kanji) # - Nanori get_kanji_nanori(char_ele, kanji) # Meanings get_kanji_meanings(char_ele, kanji) # Save kanji.save()
def on_get(self, _req: Request, resp: Response, literal: str): """Handles GET requests.""" kanji: Optional[Kanji] = Kanji.get_or_none(literal=literal) if kanji is None: raise falcon.HTTPNotFound() # --- Return resp.media = kanji.get_formatted_response()
def on_get(self, req: Request, resp: Response): """Handles GET requests""" query = Kanji.select() query = query.limit(req.params['limit']).offset(req.params['offset']) response = [] for k in query: response.append(get_kanji_response(k)) resp.media = response
def load_anki_data(kanji_list): kanji_list = set(kanji_list) # Find out which kanji we actually have cards for expected = set() for kanji in Kanji.all(): if kanji.suspended: continue expected.add(kanji.kanji) # Kanji words also get to add to the whitelist actual = set() for word in Counter.all() + KanjiWord.all(): if word.suspended: continue # Add all the kanji in the word for kanji in word.kanji: # Make sure we only add kanji if kana.is_kana(kanji): continue actual.add(kanji) extra = load_extra(settings.EXTRA_DICT_KANJI) # Find which kanji we have no cards for missing = actual - expected if len(missing): message("Missing Kanji Found", ' '.join(missing)) # Notify the user of any kanji that don't have examples (no kanji-words) no_example = expected - actual if len(no_example): message("Kanji with no Examples", ' '.join(no_example)) # Notify the user of any kanji that aren't in our dictionary unknown = (expected | actual) - (kanji_list | extra) if len(unknown): message("Unknown Kanji, not in Dict:", ' '.join(unknown)) # Now we finally make our known kanji list known = (expected | actual) return known
# if kana.is_kana(reading['base']): # raise AnkiModel.Error(u"Kana mismatch: %s word(%s) reading(%s)" % ( # reading['base'], word.kanji, word.reading # )) # else: # raise AnkiModel.Error(u"Kanji not found, but in use: %s word(%s)" % ( # reading['base'], word.kanji # )) # if reading['reading'] not in kanji.readings and kanji.kanji != '々': # print '%s(%s) word(%s)' % ( # kanji.kanji, # reading['reading'], # word.kanji, # ) for kanji in Kanji.all(): for reading in kanji._readings: possible = map(unicode, reading.get_all()) # See if we can find the reading in our kanji words list found = False for use in mapping[kanji.kanji]: if use in possible: found = True # Ignore any that are used if found: continue # See if its been ignored string = "(kanji: %s) %s" % (kanji.kanji, reading)
#!/usr/bin/env python # -*- coding: UTF-8 -*- from models.kanji import Kanji from models.kanji_word import KanjiWord from utf8_helper import force_UTF8 import kana import settings if __name__ == '__main__': force_UTF8() # First we need to read out whitelist whitelist = set() for kanji in Kanji.all(): if kanji.suspended: continue whitelist.add(kanji.kanji) # Now we filter out any KanjiWords that use other kanji for kanji_word in KanjiWord.all(): fine = True for kanji in kanji_word.kanji: if kana.is_kana(kanji) and kanji not in whitelist: fine = False if fine: kanji_word.mark_suspended(False)
'words': [], } data[key]['words'].append((word, readings)) if __name__ == '__main__': force_UTF8() missing = {} # Now we need to find if all the readings are found for word in KanjiWord.all(): for reading in word.kanji_readings: try: kanji = Kanji.find(reading['base']) except KeyError: if kana.is_kana(reading['base']) and reading['base'] != u'ヶ': raise AnkiModel.Error(u"Kana mismatch: %s word(%s) reading(%s)" % ( reading['base'], word.kanji, word.reading )) else: # Make sure not to do the rest of the work # otherwise you'll use the previous kanji continue # raise AnkiModel.Error(u"Kanji not found, but in use: %s word(%s)" % ( # reading['base'], word.kanji # )) # Now that we have the kanji, check if this reading is used if kanji.kanji == '々':
out = parser.parse_args(args) return out if __name__ == '__main__': force_UTF8() args = parse() # Find all the kanji that are in the deck all_kanji = set() for word in KanjiWord.all(): for kanji in word.kanji: all_kanji.add(kanji) for kanji in Kanji.all(): all_kanji.add(kanji) # Count which kanji the input data has data = Counter(unicode(sys.stdin.read())) for char, count in data.most_common(): # we don't want kana if kana.is_kana(char): del data[char] # Nor do we want kanji we know if char in all_kanji: del data[char] # Nor any non-kanji chars if not kana.is_kanji(char): del data[char]
# Path for the ignore unused readings file UNUSED_READINGS = 'unused.json' # Path for the jlpt kanji file JLPT_PATH = 'jlpt_kanji.json' # Path for Kanji dictionary FULL_KANJI_DICT = os.path.join(DATA, 'kanjidic2.xml') KANJI_DICT = os.path.join(DATA, 'kanjidic2_common.xml') # KANJI_DICT = os.path.join(DATA, 'kanjidic2.xml') # Html Output templates DATA_HEADER = os.path.join(DATA, 'header.html') DATA_CSS = os.path.join(DATA, 'main.css') EXTRA_DICT_KANJI = os.path.join(DATA, 'extra_dict.json') # Now we setup all the models from models.anki import AnkiModel from models.kanji_word import KanjiWord from models.kanji import Kanji AnkiModel.setup(path=os.path.join(ANKI_PATH, ANKI_USER, ANKI_DB)) KanjiWord.setup(path=os.path.join(DATA, COMPLEX_READINGS)) Kanji.setup(path=os.path.join(DATA, UNUSED_READINGS)) import jlpt jlpt.setup_jlpt(path=os.path.join(DATA, JLPT_PATH))
# Path for Kanji dictionary FULL_KANJI_DICT = os.path.join(DATA, 'kanjidic2.xml') KANJI_DICT = os.path.join(DATA, 'kanjidic2_common.xml') # KANJI_DICT = os.path.join(DATA, 'kanjidic2.xml') # Html Output templates DATA_HEADER = os.path.join(DATA, 'header.html') DATA_CSS = os.path.join(DATA, 'main.css') EXTRA_DICT_KANJI = os.path.join(DATA, 'extra_dict.json') # Now we setup all the models from models.anki import AnkiModel from models.kanji_word import KanjiWord from models.kanji import Kanji AnkiModel.setup(path=os.path.join(ANKI_PATH, ANKI_USER, ANKI_DB)) KanjiWord.setup(path=os.path.join(DATA, COMPLEX_READINGS)) Kanji.setup(path=os.path.join(DATA, UNUSED_READINGS)) import jlpt jlpt.setup_jlpt(path=os.path.join(DATA, JLPT_PATH))
'words': [], } data[key]['words'].append((word, readings)) if __name__ == '__main__': force_UTF8() missing = {} # Now we need to find if all the readings are found for word in KanjiWord.all(): for reading in word.kanji_readings: try: kanji = Kanji.find(reading['base']) except KeyError: if kana.is_kana(reading['base']) and reading['base'] != u'ヶ': raise AnkiModel.Error( u"Kana mismatch: %s word(%s) reading(%s)" % (reading['base'], word.kanji, word.reading)) else: # Make sure not to do the rest of the work # otherwise you'll use the previous kanji continue # raise AnkiModel.Error(u"Kanji not found, but in use: %s word(%s)" % ( # reading['base'], word.kanji # )) # Now that we have the kanji, check if this reading is used if kanji.kanji == '々':