示例#1
0
def read_transformations(paradigm_codes_data):
    """
    Получает на вход список парадигм вида
    (36, 1+е+2+ь#1+е+2+ь#1+2+и#1+2+я#1+2+ей#1+2+ю#1+2+ям#1+е+2+ь#1+2+и#1+2+ем#1+2+ями#1+2+е#1+2+ях,	12)
    и извлекает оттуда кодировку трансформаций
    """
    # вначале для каждого слова извлечь трансформации
    # NB! расставить переменные
    # потом сохранить их в нужные контейнеры
    paradigms_by_codes = OrderedDict()
    codes_by_paradigms = dict()
    counts_by_code = dict()
    transformations_list, transformation_codes = [], dict()
    transformation_counts, transformations_number = defaultdict(int), 1
    transformations_by_paradigms = dict()
    for code, descr, count in paradigm_codes_data:
        paradigms_by_codes[code] = descr
        codes_by_paradigms[descr] = code
        counts_by_code[code] = count
        transformations = descr_to_transforms(descr)
        for trans in transformations:
            if trans not in transformation_codes:
                transformation_codes[trans] = transformations_number
                transformations_number += 1
                transformations_list.append(trans)
            transformation_counts[trans] += count
        transformations_by_paradigms[code] = [
            transformation_codes[trans] for trans in transformations
        ]
    transformation_counts = {
        transformation_codes[trans]: count
        for trans, count in transformation_counts.items()
    }
    transformations_by_strings = defaultdict(list)
    for transformation, code in transformation_codes.items():
        substr = transformation.trans[0]
        transformations_by_strings[substr].append(code)
    return ((paradigms_by_codes, codes_by_paradigms, counts_by_code),
            (transformations_list, transformation_codes, transformation_counts,
             transformations_by_paradigms), transformations_by_strings)
示例#2
0
def read_transformations_(paradigms_by_codes, paradigm_counts=None):
    """
    Аргументы:
    ----------
    paradigm_codes: dict, словарь вида (парадигма, код)
    paradigm_counts: dict or None (optional, default=None), словарь вида (код, счётчик)

    Возвращает:
    ----------
    transformations_list: list, список трансформаций
    transformations_codes: dict, словарь вида (трансформация, код трансформации)
    transformations_counts: dict, словарь вида (код трансформации, счётчик)
    transformations_by_paradigms: dict, словарь вида (код парадигмы, коды входящих в ней локальных трансформаций)
    """
    if paradigm_counts is None:
        paradigm_counts = defaultdict(int)
    transformations_list, transformation_codes = [None], dict()
    transformation_counts, transformations_number = defaultdict(int), 1
    transformations_by_paradigms = dict()
    for code, descr in paradigms_by_codes.items():
        paradigm_count = paradigm_counts[code]
        transformations = descr_to_transforms(descr)
        for trans in transformations:
            trans_code = transformation_codes.get(trans, None)
            if trans_code is None:
                trans_code = transformation_codes[
                    trans] = transformations_number
                transformations_number += 1
                transformations_list.append(trans)
            transformation_counts[trans_code] += paradigm_count
        transformations_by_paradigms[code] = [
            transformation_codes[trans] for trans in transformations
        ]
    transformations_by_strings = defaultdict(list)
    for transformation, code in transformation_codes.items():
        substr = transformation.trans[0]
        transformations_by_strings[substr].append(code)
    return (transformations_list, transformation_codes, transformation_counts,
            transformations_by_paradigms), transformations_by_strings
示例#3
0
def read_transformations(paradigm_codes_data):
    """
    Получает на вход список парадигм вида
    (36, 1+е+2+ь#1+е+2+ь#1+2+и#1+2+я#1+2+ей#1+2+ю#1+2+ям#1+е+2+ь#1+2+и#1+2+ем#1+2+ями#1+2+е#1+2+ях,	12)
    и извлекает оттуда кодировку трансформаций
    """
    # вначале для каждого слова извлечь трансформации
    # NB! расставить переменные
    # потом сохранить их в нужные контейнеры
    paradigms_by_codes = OrderedDict()
    codes_by_paradigms = dict()
    counts_by_code = dict()
    transformations_list, transformation_codes = [], dict()
    transformation_counts, transformations_number = defaultdict(int), 1
    transformations_by_paradigms = dict()
    for code, descr, count in paradigm_codes_data:
        paradigms_by_codes[code] = descr
        codes_by_paradigms[descr] = code
        counts_by_code[code] = count
        transformations = descr_to_transforms(descr)
        for trans in transformations:
            if trans not in transformation_codes:
                transformation_codes[trans] = transformations_number
                transformations_number += 1
                transformations_list.append(trans)
            transformation_counts[trans] += count
        transformations_by_paradigms[code] = [transformation_codes[trans]
                                              for trans in transformations]
    transformation_counts = {transformation_codes[trans]: count
                             for trans, count in transformation_counts.items()}
    transformations_by_strings = defaultdict(list)
    for transformation, code in transformation_codes.items():
        substr = transformation.trans[0]
        transformations_by_strings[substr].append(code)
    return ((paradigms_by_codes, codes_by_paradigms, counts_by_code),
            (transformations_list, transformation_codes,
             transformation_counts, transformations_by_paradigms),
            transformations_by_strings)
示例#4
0
def read_transformations_(paradigms_by_codes, paradigm_counts=None):
    """
    Аргументы:
    ----------
    paradigm_codes: dict, словарь вида (парадигма, код)
    paradigm_counts: dict or None (optional, default=None), словарь вида (код, счётчик)

    Возвращает:
    ----------
    transformations_list: list, список трансформаций
    transformations_codes: dict, словарь вида (трансформация, код трансформации)
    transformations_counts: dict, словарь вида (код трансформации, счётчик)
    transformations_by_paradigms: dict, словарь вида (код парадигмы, коды входящих в ней локальных трансформаций)
    """
    if paradigm_counts is None:
        paradigm_counts = defaultdict(int)
    transformations_list, transformation_codes = [None], dict()
    transformation_counts, transformations_number = defaultdict(int), 1
    transformations_by_paradigms = dict()
    for code, descr in paradigms_by_codes.items():
        paradigm_count = paradigm_counts[code]
        transformations = descr_to_transforms(descr)
        for trans in transformations:
            trans_code = transformation_codes.get(trans, None)
            if trans_code is None:
                trans_code = transformation_codes[trans] = transformations_number
                transformations_number += 1
                transformations_list.append(trans)
            transformation_counts[trans_code] += paradigm_count
        transformations_by_paradigms[code] = [transformation_codes[trans]
                                              for trans in transformations]
    transformations_by_strings = defaultdict(list)
    for transformation, code in transformation_codes.items():
        substr = transformation.trans[0]
        transformations_by_strings[substr].append(code)
    return (transformations_list, transformation_codes,
            transformation_counts, transformations_by_paradigms), transformations_by_strings