Пример #1
0
def process_character(char_ele: Element):
    """Process a character element."""

    # Literal
    literal = char_ele.findtext('literal')

    kanji, _created = Kanji.get_or_create(literal=literal)

    # Codepoint
    get_kanji_codepoints(char_ele, kanji)

    # Radical
    get_kanji_radicals(char_ele, kanji)

    # Misc

    # - Grade
    grade = char_ele.findtext('misc/grade')
    kanji.grade = int(grade) if grade else None

    # - Stroke Count
    kanji.stroke_count = get_kanji_stroke_count(char_ele, kanji)

    # - Variants
    get_kanji_variants(char_ele, kanji)

    # - Frequency
    frequency = char_ele.findtext('misc/freq')
    kanji.frequency = int(frequency) if frequency else None

    # - Radical Name
    kanji.radical_name = char_ele.findtext('misc/rad_name')

    # - JLPT Old
    jlpt_old = char_ele.findtext('misc/jlpt')
    kanji.jlpt_old = int(jlpt_old) if jlpt_old else None

    # Dictionary Number
    get_kanji_dict_numbers(char_ele, kanji)

    # Query Code
    get_kanji_query_code(char_ele, kanji)

    # Readings

    # - Normal Readings
    get_kanji_normal_readings(char_ele, kanji)

    # - Nanori
    get_kanji_nanori(char_ele, kanji)

    # Meanings
    get_kanji_meanings(char_ele, kanji)

    # Save
    kanji.save()
Пример #2
0
    def on_get(self, _req: Request, resp: Response, literal: str):
        """Handles GET requests."""

        kanji: Optional[Kanji] = Kanji.get_or_none(literal=literal)

        if kanji is None:
            raise falcon.HTTPNotFound()

        # --- Return
        resp.media = kanji.get_formatted_response()
Пример #3
0
    def on_get(self, req: Request, resp: Response):
        """Handles GET requests"""
        query = Kanji.select()

        query = query.limit(req.params['limit']).offset(req.params['offset'])
        response = []

        for k in query:
            response.append(get_kanji_response(k))

        resp.media = response
Пример #4
0
def load_anki_data(kanji_list):
    kanji_list = set(kanji_list)

    # Find out which kanji we actually have cards for
    expected = set()
    for kanji in Kanji.all():
        if kanji.suspended:
            continue
        expected.add(kanji.kanji)

    # Kanji words also get to add to the whitelist
    actual = set()
    for word in Counter.all() + KanjiWord.all():
        if word.suspended:
            continue

        # Add all the kanji in the word
        for kanji in word.kanji:
            # Make sure we only add kanji
            if kana.is_kana(kanji):
                continue

            actual.add(kanji)

    extra = load_extra(settings.EXTRA_DICT_KANJI)

    # Find which kanji we have no cards for
    missing = actual - expected
    if len(missing):
        message("Missing Kanji Found", ' '.join(missing))

    # Notify the user of any kanji that don't have examples (no kanji-words)
    no_example = expected - actual
    if len(no_example):
        message("Kanji with no Examples", ' '.join(no_example))

    # Notify the user of any kanji that aren't in our dictionary
    unknown = (expected | actual) - (kanji_list | extra)
    if len(unknown):
        message("Unknown Kanji, not in Dict:", ' '.join(unknown))

    # Now we finally make our known kanji list
    known = (expected | actual)

    return known
Пример #5
0
def load_anki_data(kanji_list):
    kanji_list = set(kanji_list)

    # Find out which kanji we actually have cards for
    expected = set()
    for kanji in Kanji.all():
        if kanji.suspended:
            continue
        expected.add(kanji.kanji)

    # Kanji words also get to add to the whitelist
    actual = set()
    for word in Counter.all() + KanjiWord.all():
        if word.suspended:
            continue

        # Add all the kanji in the word
        for kanji in word.kanji:
            # Make sure we only add kanji
            if kana.is_kana(kanji):
                continue

            actual.add(kanji)

    extra = load_extra(settings.EXTRA_DICT_KANJI)

    # Find which kanji we have no cards for
    missing = actual - expected
    if len(missing):
        message("Missing Kanji Found", ' '.join(missing))

    # Notify the user of any kanji that don't have examples (no kanji-words)
    no_example = expected - actual
    if len(no_example):
        message("Kanji with no Examples", ' '.join(no_example))

    # Notify the user of any kanji that aren't in our dictionary
    unknown = (expected | actual) - (kanji_list | extra)
    if len(unknown):
        message("Unknown Kanji, not in Dict:", ' '.join(unknown))

    # Now we finally make our known kanji list
    known = (expected | actual)

    return known
Пример #6
0
    #     if kana.is_kana(reading['base']):
    #         raise AnkiModel.Error(u"Kana mismatch: %s word(%s) reading(%s)" % (
    #             reading['base'], word.kanji, word.reading
    #         ))
    #     else:
    #         raise AnkiModel.Error(u"Kanji not found, but in use: %s word(%s)" % (
    #             reading['base'], word.kanji
    #         ))
    # if reading['reading'] not in kanji.readings and kanji.kanji != '々':
    #     print '%s(%s) word(%s)' % (
    #         kanji.kanji,
    #         reading['reading'],
    #         word.kanji,
    #     )

    for kanji in Kanji.all():
        for reading in kanji._readings:
            possible = map(unicode, reading.get_all())

            # See if we can find the reading in our kanji words list
            found = False
            for use in mapping[kanji.kanji]:
                if use in possible:
                    found = True

            # Ignore any that are used
            if found:
                continue

            # See if its been ignored
            string = "(kanji: %s) %s" % (kanji.kanji, reading)
Пример #7
0
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
from models.kanji import Kanji
from models.kanji_word import KanjiWord
from utf8_helper import force_UTF8
import kana

import settings


if __name__ == '__main__':
    force_UTF8()

    # First we need to read out whitelist
    whitelist = set()
    for kanji in Kanji.all():
        if kanji.suspended:
            continue
        whitelist.add(kanji.kanji)

    # Now we filter out any KanjiWords that use other kanji
    for kanji_word in KanjiWord.all():
        fine = True
        for kanji in kanji_word.kanji:
            if kana.is_kana(kanji) and kanji not in whitelist:
                fine = False

        if fine:
            kanji_word.mark_suspended(False)

Пример #8
0
            'words': [],
        }

    data[key]['words'].append((word, readings))


if __name__ == '__main__':
    force_UTF8()

    missing = {}

    # Now we need to find if all the readings are found
    for word in KanjiWord.all():
        for reading in word.kanji_readings:
            try:
                kanji = Kanji.find(reading['base'])
            except KeyError:
                if kana.is_kana(reading['base']) and reading['base'] != u'ヶ':
                    raise AnkiModel.Error(u"Kana mismatch: %s word(%s) reading(%s)" % (
                        reading['base'], word.kanji, word.reading
                    ))
                else:
                    # Make sure not to do the rest of the work
                    # otherwise you'll use the previous kanji
                    continue
                    # raise AnkiModel.Error(u"Kanji not found, but in use: %s word(%s)" % (
                    #     reading['base'], word.kanji
                    # ))

            # Now that we have the kanji, check if this reading is used
            if kanji.kanji == '々':
Пример #9
0
    out = parser.parse_args(args)
    return out


if __name__ == '__main__':
    force_UTF8()

    args = parse()

    # Find all the kanji that are in the deck
    all_kanji = set()
    for word in KanjiWord.all():
        for kanji in word.kanji:
            all_kanji.add(kanji)
    for kanji in Kanji.all():
        all_kanji.add(kanji)

    # Count which kanji the input data has
    data = Counter(unicode(sys.stdin.read()))
    for char, count in data.most_common():
        # we don't want kana
        if kana.is_kana(char):
            del data[char]
        # Nor do we want kanji we know
        if char in all_kanji:
            del data[char]
        # Nor any non-kanji chars
        if not kana.is_kanji(char):
            del data[char]
Пример #10
0
# Path for the ignore unused readings file
UNUSED_READINGS = 'unused.json'

# Path for the jlpt kanji file
JLPT_PATH = 'jlpt_kanji.json'

# Path for Kanji dictionary
FULL_KANJI_DICT = os.path.join(DATA, 'kanjidic2.xml')
KANJI_DICT = os.path.join(DATA, 'kanjidic2_common.xml')
# KANJI_DICT = os.path.join(DATA, 'kanjidic2.xml')

# Html Output templates
DATA_HEADER = os.path.join(DATA, 'header.html')
DATA_CSS = os.path.join(DATA, 'main.css')

EXTRA_DICT_KANJI = os.path.join(DATA, 'extra_dict.json')

# Now we setup all the models
from models.anki import AnkiModel
from models.kanji_word import KanjiWord
from models.kanji import Kanji

AnkiModel.setup(path=os.path.join(ANKI_PATH, ANKI_USER, ANKI_DB))
KanjiWord.setup(path=os.path.join(DATA, COMPLEX_READINGS))
Kanji.setup(path=os.path.join(DATA, UNUSED_READINGS))

import jlpt

jlpt.setup_jlpt(path=os.path.join(DATA, JLPT_PATH))
Пример #11
0
# Path for Kanji dictionary
FULL_KANJI_DICT = os.path.join(DATA, 'kanjidic2.xml')
KANJI_DICT      = os.path.join(DATA, 'kanjidic2_common.xml')
# KANJI_DICT = os.path.join(DATA, 'kanjidic2.xml')

# Html Output templates
DATA_HEADER = os.path.join(DATA, 'header.html')
DATA_CSS    = os.path.join(DATA, 'main.css')


EXTRA_DICT_KANJI = os.path.join(DATA, 'extra_dict.json')






# Now we setup all the models
from models.anki import AnkiModel
from models.kanji_word import KanjiWord
from models.kanji import Kanji

AnkiModel.setup(path=os.path.join(ANKI_PATH, ANKI_USER, ANKI_DB))
KanjiWord.setup(path=os.path.join(DATA, COMPLEX_READINGS))
Kanji.setup(path=os.path.join(DATA, UNUSED_READINGS))

import jlpt

jlpt.setup_jlpt(path=os.path.join(DATA, JLPT_PATH))
Пример #12
0
    #     if kana.is_kana(reading['base']):
    #         raise AnkiModel.Error(u"Kana mismatch: %s word(%s) reading(%s)" % (
    #             reading['base'], word.kanji, word.reading
    #         ))
    #     else:
    #         raise AnkiModel.Error(u"Kanji not found, but in use: %s word(%s)" % (
    #             reading['base'], word.kanji
    #         ))
    # if reading['reading'] not in kanji.readings and kanji.kanji != '々':
    #     print '%s(%s) word(%s)' % (
    #         kanji.kanji,
    #         reading['reading'],
    #         word.kanji,
    #     )

    for kanji in Kanji.all():
        for reading in kanji._readings:
            possible = map(unicode, reading.get_all())

            # See if we can find the reading in our kanji words list
            found = False
            for use in mapping[kanji.kanji]:
                if use in possible:
                    found = True

            # Ignore any that are used
            if found:
                continue

            # See if its been ignored
            string = "(kanji: %s) %s" % (kanji.kanji, reading)
Пример #13
0
            'words': [],
        }

    data[key]['words'].append((word, readings))


if __name__ == '__main__':
    force_UTF8()

    missing = {}

    # Now we need to find if all the readings are found
    for word in KanjiWord.all():
        for reading in word.kanji_readings:
            try:
                kanji = Kanji.find(reading['base'])
            except KeyError:
                if kana.is_kana(reading['base']) and reading['base'] != u'ヶ':
                    raise AnkiModel.Error(
                        u"Kana mismatch: %s word(%s) reading(%s)" %
                        (reading['base'], word.kanji, word.reading))
                else:
                    # Make sure not to do the rest of the work
                    # otherwise you'll use the previous kanji
                    continue
                    # raise AnkiModel.Error(u"Kanji not found, but in use: %s word(%s)" % (
                    #     reading['base'], word.kanji
                    # ))

            # Now that we have the kanji, check if this reading is used
            if kanji.kanji == '々':