示例#1
0
    def get_sentences(self, kanji, items_per_page, page_num, allowed_kanji = None):
        """
        Implementation of get_sentences.

        """
        if len(kanji) != 1 or type(kanji) != unicode or not Restructurer.is_kanji(kanji):
            raise ValueError("Invalid value for 'kanji' parameter")

        # add the kanji to search to the list of allowed kanji
        if not allowed_kanji is None:
            allowed_kanji += kanji

        try:
            query_ids = self.cache[kanji]
        except KeyError:
            query = [ (k, len(v[0])) for (k, v) in self.sentences.items() \
                      if kanji in v[0] \
                         and v[1] != '' \
                         and ( allowed_kanji is None \
                               or self.all_allowed(v[0], allowed_kanji) ) ]
            # with v[1] != '' we filter bad data (TODO: sanitize bad data)
            query.sort(key = lambda x: x[1])     # sort by second argument, length of the sentence
            query_ids = [ t[0] for t in query ]  # keep only the ids
            self.cache[kanji] = query_ids

        results = [ self.sentences[id] for id in query_ids ]
        init_idx = page_num * items_per_page
        return results[init_idx : init_idx + items_per_page]
示例#2
0
 def all_allowed(self, sentence, allowed):
     return all((c in allowed for c in sentence if Restructurer.is_kanji(c)))
示例#3
0
文件: common.py 项目: brAzzi64/manabu
def KanjiIterator(string):
    for s in string:
        if Restructurer.is_kanji(s):
            yield s