def __init__(self): super().__init__() self.entries = dict() category = NotImplemented # type: str with open(database_path('vocab_category.txt')) as f: for row in f: match_obj = re.match(r'([^\t]*)\t([^\t]*)\t([^\t]*)[\t\n]', row) if not match_obj: category = row.strip().lower() if not category: continue else: vocab, reading, english = match_obj.groups() if vocab not in self.entries.keys(): self.entries[vocab] = { 'vocab': vocab, 'reading': reading, 'english': english, 'categories': [category] } vocab = '' else: if category not in self.entries[vocab]['categories']: self.entries[vocab]['categories'].append(category) self._lookup = dict() self._lookup_categories = [] self._categories = dict() self._category = '' self._do_categories()
def __init__(self): self.entries = dict() with open(database_path('hanzi_variant.tsv')) as f: next(f) for row in f: contents = row.split('\t') self.entries[contents[1]] = contents[-3]
def __init__(self): self.entries = dict() with open(database_path('hsk_vocab.tsv')) as f: for row in f: contents = row.split('\t') level = contents[-3][-1] if level.isdigit(): self.entries.setdefault(contents[-3], []).append(contents[0])
def __init__(self): self.entries = [] with open(database_path('hanzi_level.txt')) as f: category = None for row in f: if row[0].isdigit(): category = row.strip() else: self.entries.append((row.strip(), category))
def __init__(self): self.entries = [] with open(database_path('SpoonFed.tsv')) as f: next(f) for row in f: contents = row.split('\t') self.entries.append({ 'Chinese': contents[2], 'English': contents[0] })
def __init__(self): super().__init__() self.levels = [] category = NotImplemented # type: str with open(database_path('hanzi_level.txt')) as f: for row in f: match_obj = re.match(r'(\d+)-(\d+) (\S+)', row) if match_obj is not None: _, _, category = match_obj.groups() else: self.levels.append([row.strip(), category])
def __init__(self): super().__init__() self.entries = dict() with open(database_path(self.filename), encoding=self.encoding) as f: keys = f.readline().strip().split('\t') for row in f: values = row.strip().split('\t') self.entries[values[self.index_column]] = \ dict(zip(keys, [BeautifulSoup(value, "html.parser").text for value in values])) self._lookup = [] self._lookup_params = []
def __init__(self): super().__init__() self.entries = dict() with open(database_path('hsk_vocab.tsv')) as f: keys = f.readline().strip().split('\t') for row in f: item = row.strip().split('\t') self.entries.setdefault(item[0], []).append(dict(zip(keys, item))) # if item[1] and item[1] != item[0]: # self.entries.setdefault(item[1], []).append(dict(zip(keys, item))) self._lookup = [] self._lookup_params = []
def __init__(self): self.entries = dict() with open(database_path('vocab_category.txt')) as f: category = None vocab_list = None for row in f: contents = row.split('\t') if len(contents) == 1: if category and vocab_list: self.entries[category] = vocab_list category = contents[0].strip() vocab_list = [] else: vocab_list.append(contents[0])
def __init__(self): super().__init__() self.dictionary = dict() with open(database_path('cedict_ts.u8'), encoding='utf8') as f: for row in f.readlines(): result = re.fullmatch(r'(\w+) (\w+) \[(.+)\] /(.+)/\n', row) if result is not None: trad, simp, pinyin, eng = result.groups() self.dictionary.setdefault(simp, []) self.dictionary.setdefault(trad, []) self.dictionary[simp].append({ 'traditional': trad, 'simplified': simp, 'reading': pinyin, 'english': eng }) if trad != simp: self.dictionary[trad].append(self.dictionary[simp][-1])