def read_transformations(paradigm_codes_data): """ Получает на вход список парадигм вида (36, 1+е+2+ь#1+е+2+ь#1+2+и#1+2+я#1+2+ей#1+2+ю#1+2+ям#1+е+2+ь#1+2+и#1+2+ем#1+2+ями#1+2+е#1+2+ях, 12) и извлекает оттуда кодировку трансформаций """ # вначале для каждого слова извлечь трансформации # NB! расставить переменные # потом сохранить их в нужные контейнеры paradigms_by_codes = OrderedDict() codes_by_paradigms = dict() counts_by_code = dict() transformations_list, transformation_codes = [], dict() transformation_counts, transformations_number = defaultdict(int), 1 transformations_by_paradigms = dict() for code, descr, count in paradigm_codes_data: paradigms_by_codes[code] = descr codes_by_paradigms[descr] = code counts_by_code[code] = count transformations = descr_to_transforms(descr) for trans in transformations: if trans not in transformation_codes: transformation_codes[trans] = transformations_number transformations_number += 1 transformations_list.append(trans) transformation_counts[trans] += count transformations_by_paradigms[code] = [ transformation_codes[trans] for trans in transformations ] transformation_counts = { transformation_codes[trans]: count for trans, count in transformation_counts.items() } transformations_by_strings = defaultdict(list) for transformation, code in transformation_codes.items(): substr = transformation.trans[0] transformations_by_strings[substr].append(code) return ((paradigms_by_codes, codes_by_paradigms, counts_by_code), (transformations_list, transformation_codes, transformation_counts, transformations_by_paradigms), transformations_by_strings)
def read_transformations_(paradigms_by_codes, paradigm_counts=None): """ Аргументы: ---------- paradigm_codes: dict, словарь вида (парадигма, код) paradigm_counts: dict or None (optional, default=None), словарь вида (код, счётчик) Возвращает: ---------- transformations_list: list, список трансформаций transformations_codes: dict, словарь вида (трансформация, код трансформации) transformations_counts: dict, словарь вида (код трансформации, счётчик) transformations_by_paradigms: dict, словарь вида (код парадигмы, коды входящих в ней локальных трансформаций) """ if paradigm_counts is None: paradigm_counts = defaultdict(int) transformations_list, transformation_codes = [None], dict() transformation_counts, transformations_number = defaultdict(int), 1 transformations_by_paradigms = dict() for code, descr in paradigms_by_codes.items(): paradigm_count = paradigm_counts[code] transformations = descr_to_transforms(descr) for trans in transformations: trans_code = transformation_codes.get(trans, None) if trans_code is None: trans_code = transformation_codes[ trans] = transformations_number transformations_number += 1 transformations_list.append(trans) transformation_counts[trans_code] += paradigm_count transformations_by_paradigms[code] = [ transformation_codes[trans] for trans in transformations ] transformations_by_strings = defaultdict(list) for transformation, code in transformation_codes.items(): substr = transformation.trans[0] transformations_by_strings[substr].append(code) return (transformations_list, transformation_codes, transformation_counts, transformations_by_paradigms), transformations_by_strings
def read_transformations(paradigm_codes_data): """ Получает на вход список парадигм вида (36, 1+е+2+ь#1+е+2+ь#1+2+и#1+2+я#1+2+ей#1+2+ю#1+2+ям#1+е+2+ь#1+2+и#1+2+ем#1+2+ями#1+2+е#1+2+ях, 12) и извлекает оттуда кодировку трансформаций """ # вначале для каждого слова извлечь трансформации # NB! расставить переменные # потом сохранить их в нужные контейнеры paradigms_by_codes = OrderedDict() codes_by_paradigms = dict() counts_by_code = dict() transformations_list, transformation_codes = [], dict() transformation_counts, transformations_number = defaultdict(int), 1 transformations_by_paradigms = dict() for code, descr, count in paradigm_codes_data: paradigms_by_codes[code] = descr codes_by_paradigms[descr] = code counts_by_code[code] = count transformations = descr_to_transforms(descr) for trans in transformations: if trans not in transformation_codes: transformation_codes[trans] = transformations_number transformations_number += 1 transformations_list.append(trans) transformation_counts[trans] += count transformations_by_paradigms[code] = [transformation_codes[trans] for trans in transformations] transformation_counts = {transformation_codes[trans]: count for trans, count in transformation_counts.items()} transformations_by_strings = defaultdict(list) for transformation, code in transformation_codes.items(): substr = transformation.trans[0] transformations_by_strings[substr].append(code) return ((paradigms_by_codes, codes_by_paradigms, counts_by_code), (transformations_list, transformation_codes, transformation_counts, transformations_by_paradigms), transformations_by_strings)
def read_transformations_(paradigms_by_codes, paradigm_counts=None): """ Аргументы: ---------- paradigm_codes: dict, словарь вида (парадигма, код) paradigm_counts: dict or None (optional, default=None), словарь вида (код, счётчик) Возвращает: ---------- transformations_list: list, список трансформаций transformations_codes: dict, словарь вида (трансформация, код трансформации) transformations_counts: dict, словарь вида (код трансформации, счётчик) transformations_by_paradigms: dict, словарь вида (код парадигмы, коды входящих в ней локальных трансформаций) """ if paradigm_counts is None: paradigm_counts = defaultdict(int) transformations_list, transformation_codes = [None], dict() transformation_counts, transformations_number = defaultdict(int), 1 transformations_by_paradigms = dict() for code, descr in paradigms_by_codes.items(): paradigm_count = paradigm_counts[code] transformations = descr_to_transforms(descr) for trans in transformations: trans_code = transformation_codes.get(trans, None) if trans_code is None: trans_code = transformation_codes[trans] = transformations_number transformations_number += 1 transformations_list.append(trans) transformation_counts[trans_code] += paradigm_count transformations_by_paradigms[code] = [transformation_codes[trans] for trans in transformations] transformations_by_strings = defaultdict(list) for transformation, code in transformation_codes.items(): substr = transformation.trans[0] transformations_by_strings[substr].append(code) return (transformations_list, transformation_codes, transformation_counts, transformations_by_paradigms), transformations_by_strings