def main(args=None): args = parse(args) word_filter = create_filter(args.tags) words = KanjiWord.all() words = filter(lambda val: word_filter(val.tags), words) sample = random.sample(words, args.count) for word in sample: print word.kanji
def load_anki_data(kanji_list): kanji_list = set(kanji_list) # Find out which kanji we actually have cards for expected = set() for kanji in Kanji.all(): if kanji.suspended: continue expected.add(kanji.kanji) # Kanji words also get to add to the whitelist actual = set() for word in Counter.all() + KanjiWord.all(): if word.suspended: continue # Add all the kanji in the word for kanji in word.kanji: # Make sure we only add kanji if kana.is_kana(kanji): continue actual.add(kanji) extra = load_extra(settings.EXTRA_DICT_KANJI) # Find which kanji we have no cards for missing = actual - expected if len(missing): message("Missing Kanji Found", ' '.join(missing)) # Notify the user of any kanji that don't have examples (no kanji-words) no_example = expected - actual if len(no_example): message("Kanji with no Examples", ' '.join(no_example)) # Notify the user of any kanji that aren't in our dictionary unknown = (expected | actual) - (kanji_list | extra) if len(unknown): message("Unknown Kanji, not in Dict:", ' '.join(unknown)) # Now we finally make our known kanji list known = (expected | actual) return known
from models.kanji_word import KanjiWord from utf8_helper import force_UTF8 import kana from collections import defaultdict from itertools import islice import settings if __name__ == '__main__': force_UTF8() # Get the readings of every single kanji mapping = defaultdict(list) for word in KanjiWord.all(): for reading in word.kanji_readings: mapping[reading['base']].append(reading['reading']) # Now remove any that are used # try: # kanji = Kanji.find(reading['base']) # except KeyError: # if kana.is_kana(reading['base']): # raise AnkiModel.Error(u"Kana mismatch: %s word(%s) reading(%s)" % ( # reading['base'], word.kanji, word.reading # )) # else: # raise AnkiModel.Error(u"Kanji not found, but in use: %s word(%s)" % ( # reading['base'], word.kanji # ))
#!/usr/bin/env python # -*- coding: UTF-8 -*- from models.kanji import Kanji from models.kanji_word import KanjiWord from utf8_helper import force_UTF8 import kana import settings if __name__ == '__main__': force_UTF8() # First we need to read out whitelist whitelist = set() for kanji in Kanji.all(): if kanji.suspended: continue whitelist.add(kanji.kanji) # Now we filter out any KanjiWords that use other kanji for kanji_word in KanjiWord.all(): fine = True for kanji in kanji_word.kanji: if kana.is_kana(kanji) and kanji not in whitelist: fine = False if fine: kanji_word.mark_suspended(False)
type=int, default=10, help='The number of words to display') out = parser.parse_args(args) return out if __name__ == '__main__': force_UTF8() args = parse() # Find all the kanji that are in the deck all_kanji = set() for word in KanjiWord.all(): for kanji in word.kanji: all_kanji.add(kanji) for kanji in Kanji.all(): all_kanji.add(kanji) # Count which kanji the input data has data = Counter(unicode(sys.stdin.read())) for char, count in data.most_common(): # we don't want kana if kana.is_kana(char): del data[char] # Nor do we want kanji we know if char in all_kanji: del data[char] # Nor any non-kanji chars
# Path for the ignore unused readings file UNUSED_READINGS = 'unused.json' # Path for the jlpt kanji file JLPT_PATH = 'jlpt_kanji.json' # Path for Kanji dictionary FULL_KANJI_DICT = os.path.join(DATA, 'kanjidic2.xml') KANJI_DICT = os.path.join(DATA, 'kanjidic2_common.xml') # KANJI_DICT = os.path.join(DATA, 'kanjidic2.xml') # Html Output templates DATA_HEADER = os.path.join(DATA, 'header.html') DATA_CSS = os.path.join(DATA, 'main.css') EXTRA_DICT_KANJI = os.path.join(DATA, 'extra_dict.json') # Now we setup all the models from models.anki import AnkiModel from models.kanji_word import KanjiWord from models.kanji import Kanji AnkiModel.setup(path=os.path.join(ANKI_PATH, ANKI_USER, ANKI_DB)) KanjiWord.setup(path=os.path.join(DATA, COMPLEX_READINGS)) Kanji.setup(path=os.path.join(DATA, UNUSED_READINGS)) import jlpt jlpt.setup_jlpt(path=os.path.join(DATA, JLPT_PATH))
# Path for Kanji dictionary FULL_KANJI_DICT = os.path.join(DATA, 'kanjidic2.xml') KANJI_DICT = os.path.join(DATA, 'kanjidic2_common.xml') # KANJI_DICT = os.path.join(DATA, 'kanjidic2.xml') # Html Output templates DATA_HEADER = os.path.join(DATA, 'header.html') DATA_CSS = os.path.join(DATA, 'main.css') EXTRA_DICT_KANJI = os.path.join(DATA, 'extra_dict.json') # Now we setup all the models from models.anki import AnkiModel from models.kanji_word import KanjiWord from models.kanji import Kanji AnkiModel.setup(path=os.path.join(ANKI_PATH, ANKI_USER, ANKI_DB)) KanjiWord.setup(path=os.path.join(DATA, COMPLEX_READINGS)) Kanji.setup(path=os.path.join(DATA, UNUSED_READINGS)) import jlpt jlpt.setup_jlpt(path=os.path.join(DATA, JLPT_PATH))