示例#1
0
    def __init__(self):
        super().__init__()
        self.entries = dict()
        category = NotImplemented  # type: str

        with open(database_path('vocab_category.txt')) as f:
            for row in f:
                match_obj = re.match(r'([^\t]*)\t([^\t]*)\t([^\t]*)[\t\n]', row)
                if not match_obj:
                    category = row.strip().lower()
                    if not category:
                        continue
                else:
                    vocab, reading, english = match_obj.groups()

                    if vocab not in self.entries.keys():
                        self.entries[vocab] = {
                            'vocab': vocab,
                            'reading': reading,
                            'english': english,
                            'categories': [category]
                        }
                        vocab = ''
                    else:
                        if category not in self.entries[vocab]['categories']:
                            self.entries[vocab]['categories'].append(category)

        self._lookup = dict()
        self._lookup_categories = []
        self._categories = dict()
        self._category = ''
        self._do_categories()
示例#2
0
 def __init__(self):
     self.entries = dict()
     with open(database_path('hanzi_variant.tsv')) as f:
         next(f)
         for row in f:
             contents = row.split('\t')
             self.entries[contents[1]] = contents[-3]
示例#3
0
 def __init__(self):
     self.entries = dict()
     with open(database_path('hsk_vocab.tsv')) as f:
         for row in f:
             contents = row.split('\t')
             level = contents[-3][-1]
             if level.isdigit():
                 self.entries.setdefault(contents[-3],
                                         []).append(contents[0])
示例#4
0
 def __init__(self):
     self.entries = []
     with open(database_path('hanzi_level.txt')) as f:
         category = None
         for row in f:
             if row[0].isdigit():
                 category = row.strip()
             else:
                 self.entries.append((row.strip(), category))
示例#5
0
 def __init__(self):
     self.entries = []
     with open(database_path('SpoonFed.tsv')) as f:
         next(f)
         for row in f:
             contents = row.split('\t')
             self.entries.append({
                 'Chinese': contents[2],
                 'English': contents[0]
             })
示例#6
0
 def __init__(self):
     super().__init__()
     self.levels = []
     category = NotImplemented  # type: str
     with open(database_path('hanzi_level.txt')) as f:
         for row in f:
             match_obj = re.match(r'(\d+)-(\d+) (\S+)', row)
             if match_obj is not None:
                 _, _, category = match_obj.groups()
             else:
                 self.levels.append([row.strip(), category])
示例#7
0
    def __init__(self):
        super().__init__()
        self.entries = dict()
        with open(database_path(self.filename), encoding=self.encoding) as f:
            keys = f.readline().strip().split('\t')
            for row in f:
                values = row.strip().split('\t')
                self.entries[values[self.index_column]] = \
                    dict(zip(keys, [BeautifulSoup(value, "html.parser").text for value in values]))

        self._lookup = []
        self._lookup_params = []
示例#8
0
    def __init__(self):
        super().__init__()
        self.entries = dict()
        with open(database_path('hsk_vocab.tsv')) as f:
            keys = f.readline().strip().split('\t')
            for row in f:
                item = row.strip().split('\t')
                self.entries.setdefault(item[0], []).append(dict(zip(keys, item)))
                # if item[1] and item[1] != item[0]:
                #     self.entries.setdefault(item[1], []).append(dict(zip(keys, item)))

        self._lookup = []
        self._lookup_params = []
示例#9
0
 def __init__(self):
     self.entries = dict()
     with open(database_path('vocab_category.txt')) as f:
         category = None
         vocab_list = None
         for row in f:
             contents = row.split('\t')
             if len(contents) == 1:
                 if category and vocab_list:
                     self.entries[category] = vocab_list
                 category = contents[0].strip()
                 vocab_list = []
             else:
                 vocab_list.append(contents[0])
示例#10
0
 def __init__(self):
     super().__init__()
     self.dictionary = dict()
     with open(database_path('cedict_ts.u8'), encoding='utf8') as f:
         for row in f.readlines():
             result = re.fullmatch(r'(\w+) (\w+) \[(.+)\] /(.+)/\n', row)
             if result is not None:
                 trad, simp, pinyin, eng = result.groups()
                 self.dictionary.setdefault(simp, [])
                 self.dictionary.setdefault(trad, [])
                 self.dictionary[simp].append({
                     'traditional': trad,
                     'simplified': simp,
                     'reading': pinyin,
                     'english': eng
                 })
                 if trad != simp:
                     self.dictionary[trad].append(self.dictionary[simp][-1])