Пример #1
0
def get_bs_abbreviation_homonyms():
    abbreviation_bs = get_string_list_from_file('out/Аббревиатура. БС.txt')

    word_forms_bases = read_src_bs('src_dict/БС 13.03.21.txt')
    title_forms = [str(x.title_word_form) for x in word_forms_bases]

    bs_abbreviation_homonyms = []

    for abbreviation in abbreviation_bs:
        abbreviation_name = get_bs_title_word_form(abbreviation).name
        title_form_list = []
        for title_form in title_forms:
            form_name = get_bs_title_word_form(title_form).name.replace(
                '*', '')
            if form_name == abbreviation_name.lower():
                print(title_form)
                title_form_list.append(title_form)

        if title_form_list:
            bs_abbreviation_homonyms.append(abbreviation)
            bs_abbreviation_homonyms += title_form_list
            bs_abbreviation_homonyms.append('')

    save_list_to_file(bs_abbreviation_homonyms[:-1],
                      'out/Аббревиатура. БС. Омонимы.txt')
Пример #2
0
def get_capital_letter_bs():
    capital_letter_bs = list(
        get_string_list_from_file('out/Большая буква. БС.txt'))

    word_forms_bases = read_src_bs('src_dict/БС 13.03.21.txt')
    title_forms = [str(x.title_word_form) for x in word_forms_bases]

    capital_letter_bs_homonyms = []

    for capital_word in capital_letter_bs:
        capital_word_name = get_bs_title_word_form(capital_word).name
        title_form_list = []
        for title_form in title_forms:
            form_name = get_bs_title_word_form(title_form).name.replace(
                '*', '')
            if form_name == capital_word_name.lower():
                print(title_form)
                title_form_list.append(title_form)

        if title_form_list:
            capital_letter_bs_homonyms.append(capital_word)
            capital_letter_bs_homonyms += title_form_list
            capital_letter_bs_homonyms.append('')

    save_list_to_file(capital_letter_bs_homonyms[:-1],
                      'out/Большая буква. БС. Омонимы.txt')
Пример #3
0
def ordinary_words_bs():
    word_forms_bases = read_src_bs('src_dict/БС 02.03.21.txt')
    bs_word_forms = [str(x.title_word_form) for x in word_forms_bases]

    exclusion_list = []

    # Многокорневые слова БС
    multi_root_bs_forms = get_string_list_from_file(
        'src_dict/Многокорневые слова БС.txt')
    exclusion_list += multi_root_bs_forms

    # Омонимы БС
    homonyms_bs = get_string_list_from_file('src_dict/Омонимы БС.txt')
    exclusion_list += homonyms_bs

    # Повторы ост. совпадает с БС
    remaining_repetitions = get_string_list_from_file(
        'src_dict/Повторы ост. совпадает с БС.txt')
    remaining_repetitions = [str(get_bs_title_word_form(x))
                             for x in remaining_repetitions]
    exclusion_list += remaining_repetitions

    # Обычные слова БС
    ordinary_words_bs_list = []

    for bs_str_form in bs_word_forms:
        if bs_str_form not in exclusion_list:
            ordinary_words_bs_list.append(bs_str_form)
            # print(bs_str_form)

    save_list_to_file(ordinary_words_bs_list, 'out/Обычные слова БС.txt')
Пример #4
0
def compare_homonyms_spec_note():
    homonyms_bg = get_string_list_from_file('src_dict/Омонимы БГ.txt')

    homonyms_bg_str_form = []

    for homonyms in homonyms_bg:
        socket_form = get_socket_word_form(homonyms)

        spec_note = socket_form.spec_note.replace('< ', '')
        spec_note_socket_form = get_socket_word_form(spec_note)
        spec_note = ' '.join(
            filter(None, [
                spec_note_socket_form.invisible,
                spec_note_socket_form.name,
                spec_note_socket_form.root_index,
                spec_note_socket_form.idf,
                ' '.join(spec_note_socket_form.info),
                spec_note_socket_form.note,
            ]))

        string_form = ' '.join(
            filter(None, [
                socket_form.name,
                socket_form.idf,
                ' '.join(socket_form.info),
                spec_note,
            ]))
        homonyms_bg_str_form.append(string_form)

    homonyms_spec_note = get_string_list_from_file(
        'src_dict/О-мы БС спец. прим. не совпадают с Повторами.txt')

    homonyms_spec_note_relevant = []
    homonyms_spec_note_not_relevant = []

    for homonym in homonyms_spec_note:
        title_form = get_bs_title_word_form(homonym)
        print(title_form.name, title_form.note)
        string_form = ' '.join(
            filter(None, [
                title_form.name,
                title_form.idf,
                ' '.join(title_form.info),
                title_form.note.replace('.* < ', ''),
            ]))
        if string_form in homonyms_bg_str_form:
            homonyms_spec_note_relevant.append(homonym)
        else:
            homonyms_spec_note_not_relevant.append(homonym)

    save_list_to_file(homonyms_spec_note_relevant,
                      'out/О-мы БС спец. прим. совпадают с О-мами БГ.txt')
    save_list_to_file(homonyms_spec_note_not_relevant,
                      'out/О-мы БС спец. прим. не совпадают с О-мами БГ.txt')
Пример #5
0
def get_remaining_repetitions():
    homonyms = []

    for line in get_string_list_from_file(
            'src_dict/О-мы БС спец. прим. совпадают с Повторами.txt'):
        bs_form = get_bs_title_word_form(line)
        homonyms.append(' '.join(filter(None, [
            bs_form.name,
            bs_form.idf,
            ' '.join(bs_form.info),
        ])))

    for line in get_string_list_from_file(
            'src_dict/О-мы БС совпадают с Повторами.txt'):
        bs_form = get_bs_title_word_form(line)
        homonyms.append(' '.join(filter(None, [
            bs_form.name,
            bs_form.idf,
            ' '.join(bs_form.info),
            bs_form.note.replace('.*', '').strip()
        ])))

    remaining_repetitions = []

    replays_in_groups = get_string_list_from_file(
        'out/Повторы в группах (без повторов).txt')
    replays_in_groups = [
        ' '.join(filter(None, [
            get_socket_word_form(x).name,
            get_socket_word_form(x).idf,
            ' '.join(get_socket_word_form(x).info),
            get_socket_word_form(x).note.replace('*', '').strip(),
        ]))
        for x in replays_in_groups
    ]

    for replay in replays_in_groups:
        if replay not in homonyms:
            remaining_repetitions.append(replay)

    save_list_to_file(remaining_repetitions, 'out/Повторы ост.txt')
Пример #6
0
def get_capital_letter_bs():
    capital_letter_bs = list(
        get_string_list_from_file('src_dict/Большая буква. БС.txt'))

    word_forms_bases = read_src_bs('src_dict/БС 09.03.21.txt')
    title_forms = [str(x.title_word_form) for x in word_forms_bases]

    capital_letter_bs_homonyms = []

    for capital_word in capital_letter_bs:
        capital_word_name = get_bs_title_word_form(capital_word).name
        for title_form in title_forms:
            form_name = get_bs_title_word_form(title_form).name.replace(
                '*', '')
            if form_name == capital_word_name.lower():
                print(title_form)
                capital_letter_bs_homonyms.append(title_form)

    save_list_to_file(
        sorted(capital_letter_bs_homonyms, key=lambda x: x.replace('*', '')),
        'out/Большая буква. БС. Омонимы.txt')
Пример #7
0
def get_bs_abbreviation_homonyms():
    abbreviation_bs = get_string_list_from_file(
        'src_dict/Аббревиатура. БС.txt')

    word_forms_bases = read_src_bs('src_dict/БС 09.03.21.txt')
    title_forms = [str(x.title_word_form) for x in word_forms_bases]

    bs_abbreviation_homonyms = []

    for abbreviation in abbreviation_bs:
        abbreviation_name = get_bs_title_word_form(abbreviation).name
        for title_form in title_forms:
            form_name = get_bs_title_word_form(title_form).name.replace(
                '*', '')
            if form_name == abbreviation_name.lower():
                print(title_form)
                bs_abbreviation_homonyms.append(title_form)

    save_list_to_file(
        sorted(bs_abbreviation_homonyms, key=lambda x: x.replace('*', '')),
        'out/Аббревиатура. БС. Омонимы.txt')
Пример #8
0
def check_g58():
    endings = ('греть', 'мять', 'оть', 'ыть', 'пеленать')

    g58_list = get_string_list_from_file('src_dict/Г58 ещё.txt')
    word_forms = [get_bs_title_word_form(x) for x in g58_list]

    out_list = []

    for word_form in word_forms[:]:
        if not word_form.name.endswith(endings):
            print(word_form)
            out_list.append(str(word_form))

    save_list_to_file(sorted(out_list), 'out/Г58 ещё изм.txt')
Пример #9
0
def check_socket_bs():
    multi_root_words = get_dicts_from_csv_file('out/Многокорневые слова.csv')
    multi_root_bs_forms = get_string_list_from_file(
        'out/Многокорневые слова БС.txt')
    multi_root_bs_forms = [
        get_bs_title_word_form(x).name for x in multi_root_bs_forms
    ]

    for multi_root_word in multi_root_words:
        for root_index_key in list(multi_root_word)[1:]:
            if multi_root_word[root_index_key]:
                socket_form = get_socket_word_form(
                    multi_root_word[root_index_key])
                if socket_form.name not in multi_root_bs_forms:
                    print(socket_form)
Пример #10
0
def check_g21():
    endings = ('верещать', 'верещаться', 'дышать', 'дышаться', 'кишеть',
               'кишеться', 'пищать', 'пищаться', 'слышать', 'слышаться',
               'трещать', 'трещаться', 'ршать', 'ршаться', 'жить', 'житься',
               'чить', 'читься', 'шить', 'шиться', 'щить', 'щиться')

    g21_list = get_string_list_from_file('src_dict/Г21 ещё.txt')
    word_forms = [get_bs_title_word_form(x) for x in g21_list]

    out_list = []

    for word_form in word_forms[:]:
        if not word_form.name.endswith(endings):
            print(word_form)
            out_list.append(str(word_form))

    save_list_to_file(sorted(out_list), 'out/Г21 ещё изм.txt')
Пример #11
0
def check_g15():
    endings = ('бормотать', 'бормотаться', 'лепетать', 'лепетаться', 'плакать',
               'плакаться', 'прятать', 'прятаться', 'скакать', 'скакаться',
               'топтать', 'топтаться', 'шептать', 'шептаться', 'щебетать',
               'щебетаться', 'готать', 'готаться', 'котать', 'котаться',
               'потать', 'потаться', 'хотать', 'хотаться', 'хтать', 'хтаться')

    g15_list = get_string_list_from_file('src_dict/Г15 ещё.txt')
    word_forms = [get_bs_title_word_form(x) for x in g15_list]

    out_list = []

    for word_form in word_forms[:]:
        if not word_form.name.endswith(endings):
            print(word_form)
            out_list.append(str(word_form))

    save_list_to_file(sorted(out_list), 'out/Г15 ещё изм.txt')
Пример #12
0
def check_g6():
    endings = ('нуть', 'нуться', 'зиждить', 'зиждиться', 'врать', 'враться',
               'жрать', 'жраться', 'рвать', 'рваться', 'реветь', 'реветься',
               'ржать', 'ржаться', 'сосать', 'сосаться', 'срать', 'сраться',
               'стонать', 'стонаться', 'ткать', 'ткаться', 'шибить',
               'шибиться')

    g6_list = get_string_list_from_file('src_dict/Г6 ещё.txt')
    word_forms = [get_bs_title_word_form(x) for x in g6_list]

    out_list = []

    for word_form in word_forms[:]:
        if not word_form.name.endswith(endings):
            print(word_form)
            out_list.append(str(word_form))

    save_list_to_file(sorted(out_list), 'out/Г6 ещё изм.txt')
Пример #13
0
def compare_homonyms():
    homonyms_bg = get_string_list_from_file(
        'src_dict/Омонимы БГ без этим. примечаний.txt')

    homonyms_bg_str_form = []

    for homonyms in homonyms_bg:
        socket_form = get_socket_word_form(homonyms)
        string_form = ' '.join(filter(
            None, [
                socket_form.name,
                socket_form.idf,
                ' '.join(socket_form.info),
                socket_form.note
            ]
        ))
        homonyms_bg_str_form.append(string_form)

    homonyms_bs = get_string_list_from_file(
        'src_dict/О-мы БС не совпадают с Повторами.txt')

    homonyms_bs_relevant = []
    homonyms_bs_not_relevant = []

    for homonym in homonyms_bs:
        title_form = get_bs_title_word_form(homonym)
        string_form = ' '.join(filter(
            None,
            [
                title_form.name,
                title_form.idf,
                ' '.join(title_form.info),
                title_form.note.replace('.* ', '* '),
            ]
        ))
        if string_form in homonyms_bg_str_form:
            homonyms_bs_relevant.append(homonym)
        else:
            homonyms_bs_not_relevant.append(homonym)

    save_list_to_file(homonyms_bs_relevant,
                      'out/О-мы БС совпадают с О-мами БГ.txt')
    save_list_to_file(homonyms_bs_not_relevant,
                      'out/О-мы БС не совпадают с О-мами БГ.txt')
Пример #14
0
def remove_strings():
    relevant_repetitions = get_string_list_from_file(
        'src_dict/Повторы ост. совпадает с БС.txt')
    multi_root_bs = list(get_string_list_from_file(
        'src_dict/Многокорневые слова БС.txt'))
    multi_root_bs = [get_bs_title_word_form(x) for x in multi_root_bs]
    multi_root_bs = [
        ' '.join(filter(None, [
            x.name,
            x.idf,
            ' '.join(x.info),
            x.note,
        ]))
        for x in multi_root_bs
    ]

    save_list_to_file(sorted(list(set(
        relevant_repetitions) - set(multi_root_bs))),
                      'out/Повторы ост. совпадает с БС.txt')
Пример #15
0
def check_presence():
    homonyms = [get_bs_title_word_form(x).name
                for x in get_string_list_from_file('src_dict/Омонимы БС.txt')]

    word_forms_bases = list(read_src_bs('src_dict/БС 16.03.21 изм.txt'))
    bs_names = [
        x.title_word_form.name.replace('*', '').strip()
        for x in word_forms_bases
    ]

    for file_path in Path('out/lst').glob('*'):
        file_stem = file_path.stem

        presence = []  # 28а. Если слово имеется в док-те Омонимы БС.txt
        nouns = []  # 29а. если слово находится
        absent = []  # 29б. если слово НЕ находится

        words = get_string_list_from_file(file_path, encoding='cp1251')
        for word in words:
            print(word)
            if word in homonyms:
                presence.append(word)

            else:
                if word in bs_names:
                    for group in word_forms_bases:
                        if word == group.title_word_form.name:
                            print(group.title_word_form)
                            nouns.append(str(group.title_word_form))
                else:
                    absent.append(word)

        if presence:
            save_list_to_file(
                sorted(presence), f'out/homonyms/{file_stem} омонимы.txt')

        if nouns:
            save_list_to_file(sorted(nouns), f'out/C/С{file_stem}.txt')

        if absent:
            save_list_to_file(
                sorted(absent), f'out/absent/{file_stem} отсутствует.txt')
Пример #16
0
def compare_replays_in_groups():
    homonyms_bs = get_string_list_from_file('src_dict/Омонимы БС.txt')

    bs_str_forms = []

    for homonym in homonyms_bs:
        title_form = get_bs_title_word_form(homonym)
        string_form = ' '.join(
            filter(None, [
                title_form.name, title_form.idf, ' '.join(title_form.info),
                title_form.note.replace('.*', '').strip()
            ]))
        bs_str_forms.append(string_form)

    relevant = []
    not_relevant = []

    with open('src_dict/Повторы в группах.txt', encoding='utf-8') as f_in:
        groups = (x.strip() for x in f_in.read().split('\n\n'))
        for group in groups:
            for line in group.split('\n')[1:]:
                if not line.startswith('!'):
                    word_form = get_socket_word_form(line)
                    line_form = ' '.join(
                        filter(None, [
                            word_form.name,
                            word_form.idf,
                            ' '.join(word_form.info),
                            word_form.note.replace('*', '').strip(),
                        ]))
                    if line_form in bs_str_forms:
                        relevant.append(line)
                    else:
                        not_relevant.append(line)

    save_list_to_file(
        sorted(relevant, key=lambda x: x.replace('*', '').strip().lower()),
        'out/19.1 Совпадающие.txt')

    save_list_to_file(
        sorted(not_relevant, key=lambda x: x.replace('*', '').strip().lower()),
        'out/19.1 Не совпадающие.txt')
Пример #17
0
def check_socket_bs():
    multi_root_words = get_dicts_from_csv_file('out/Многокорневые слова.csv')
    multi_root_bs_forms = get_string_list_from_file(
        'out/Многокорневые слова БС.txt')
    multi_root_bs_forms = [
        get_bs_title_word_form(x).name for x in multi_root_bs_forms
    ]

    not_found_in_bs = []

    for multi_root_word in multi_root_words:
        for root_index_key in list(multi_root_word)[1:]:
            if multi_root_word[root_index_key]:
                socket_form = get_socket_word_form(
                    multi_root_word[root_index_key])
                if socket_form.name not in multi_root_bs_forms:
                    print(socket_form)
                    not_found_in_bs.append(str(socket_form))

    save_list_to_file(not_found_in_bs, 'out/Не найденные в БС.txt')
Пример #18
0
def check_g50_51():
    endings = ('выкнуть', 'выкнуться', 'гибнуть', 'гибнуться', 'глохнуть',
               'глохнуться', 'грязнуть', 'грязнуться', 'дрыхнуть',
               'дрыхнуться', 'дрябнуть', 'дрябнуться', 'зябнуть', 'зябнуться',
               'киснуть', 'киснуться', 'крепнуть', 'крепнуться', 'липнуть',
               'липнуться', 'мерзнуть', 'мерзнуться', 'мокнуть', 'мокнуться',
               'мякнуть', 'мякнуться', 'пухнуть', 'пухнуться', 'сохнуть',
               'сохнуться', 'чахнуть', 'чахнуться')

    g50_51_list = get_string_list_from_file('src_dict/Г50, Г51 ещё.txt')
    word_forms = [get_bs_title_word_form(x) for x in g50_51_list]

    out_list = []

    for word_form in word_forms[:]:
        if not word_form.name.endswith(endings):
            print(word_form)
            out_list.append(str(word_form))

    save_list_to_file(sorted(out_list), 'out/Г50, Г51 ещё изм.txt')
Пример #19
0
def check_g53_56():
    endings = ('шел', 'авать', 'ивать', 'увать', 'ывать', 'вевать', 'мевать',
               'певать', 'ревать', 'севать', 'тевать', 'щевать', 'длевать',
               'тлевать', 'одолевать', 'одолевать', 'разевать')

    nes_endings = ('водить', 'возить', 'носить', 'ходить')

    g53_56_list = get_string_list_from_file(
        'src_dict/Г53, Г54, Г55, Г56 ещё.txt')
    word_forms = [get_bs_title_word_form(x) for x in g53_56_list]

    out_list = []

    for word_form in word_forms[:]:
        if not ('неп' in word_form.info or 'б' in word_form.info
                or word_form.name.endswith(endings) or
                ('нес' in word_form.info
                 and word_form.name.endswith(nes_endings))):
            print(word_form)
            out_list.append(str(word_form))

    save_list_to_file(sorted(out_list), 'out/Г53, Г54, Г55, Г56 ещё изм.txt')
Пример #20
0
def compare_dicts():
    ordinary_words_bg_list = list(
        get_string_list_from_file('out/Обычные слова БГ.txt'))
    ordinary_words_bs_list = list(
        get_string_list_from_file('out/Обычные слова БС.txt'))

    bg_compare_forms = [
        get_socket_word_form(x) for x in ordinary_words_bg_list
    ]
    bg_compare_forms = [
        ' '.join(filter(None, [
            x.name,
            x.idf,
            ' '.join(x.info),
            x.note[2:],
        ])) for x in bg_compare_forms
    ]

    bs_compare_forms = [
        get_bs_title_word_form(x) for x in ordinary_words_bs_list
    ]
    bs_compare_forms = [
        ' '.join(filter(None, [
            x.name,
            x.idf,
            ' '.join(x.info),
            x.note[3:],
        ])) for x in bs_compare_forms
    ]

    intersection = list(set(bg_compare_forms) & set(bs_compare_forms))

    matching_lines = []
    bg_unique = []
    bs_unique = []

    for bg_string in ordinary_words_bg_list:
        bg_form = get_socket_word_form(bg_string)
        compare_form = ' '.join(
            filter(None, [
                bg_form.name,
                bg_form.idf,
                ' '.join(bg_form.info),
                bg_form.note[2:],
            ]))
        if compare_form in intersection:
            matching_lines.append(bg_string)
        else:
            bg_unique.append(bg_string)

    for bs_string in ordinary_words_bs_list:
        bs_form = get_bs_title_word_form(bs_string)
        compare_form = ' '.join(
            filter(None, [
                bs_form.name, bs_form.idf, ' '.join(bs_form.info),
                bs_form.note[3:]
            ]))
        if compare_form not in intersection:
            matching_lines.append(bs_unique)

    save_list_to_file(matching_lines, 'out/Строки совпадают.txt')
    save_list_to_file(bg_unique, 'out/Уникальные строки БГ.txt')
    save_list_to_file(bs_unique, 'out/Уникальные строки БС.txt')
Пример #21
0
def get_diff_lists():
    ordinary_words_bs_list = get_string_list_from_file(
        'out/Обычные слова БС.txt')
    ordinary_words_bs_list = [
        get_bs_title_word_form(x) for x in ordinary_words_bs_list
    ]
    ordinary_words_bs_list = [
        ' '.join(
            filter(None, [
                x.name,
                x.idf,
                ' '.join(x.info),
                x.note.replace('.*', '').strip(),
            ])) for x in ordinary_words_bs_list
    ]

    ordinary_words_bg_list = get_string_list_from_file(
        'out/Обычные слова БГ.txt')
    ordinary_words_bg_list = [
        get_socket_word_form(x) for x in ordinary_words_bg_list
    ]
    ordinary_words_bg_list = [
        ' '.join(filter(None, [
            x.name,
            x.idf,
            ' '.join(x.info),
            x.note[2:],
        ])) for x in ordinary_words_bg_list
    ]

    matching_src = list(
        set(ordinary_words_bs_list)
        & set(ordinary_words_bg_list))
    unique_src_bs = list(
        set(ordinary_words_bs_list) - set(ordinary_words_bg_list))
    unique_src_bg = list(
        set(ordinary_words_bg_list) - set(ordinary_words_bs_list))

    matching_forms = []
    unique_bs_forms = []
    unique_bg_forms = []

    for word in ordinary_words_bs_list:
        title_form = get_bs_title_word_form(word)
        str_form = ' '.join(
            filter(None, [
                title_form.name,
                title_form.idf,
                ' '.join(title_form.info),
                title_form.note.replace('.*', '').strip(),
            ]))
        if str_form in matching_src:
            matching_forms.append(title_form)
        elif str_form in unique_src_bs:
            unique_bs_forms.append(title_form)

    for word in ordinary_words_bg_list:
        socket_form = get_socket_word_form(word)
        str_form = ' '.join(
            filter(None, [
                socket_form.name,
                socket_form.idf,
                ' '.join(socket_form.info),
                socket_form.note[2:],
            ]))
        if str_form in unique_src_bg:
            unique_bg_forms.append(socket_form)

    save_list_to_file(matching_forms, 'out/Строки совпадают.txt')
    save_list_to_file(unique_bs_forms, 'out/Уникальные строки БС.txt')
    save_list_to_file(unique_bg_forms, 'out/Уникальные строки БГ.txt')
Пример #22
0
def check_presence():
    homonyms = [
        get_bs_title_word_form(x)
        for x in get_string_list_from_file('src_dict/Омонимы БС.txt')
    ]
    homonyms = [
        ' '.join(
            filter(None,
                   [x.name, ' '.join([y
                                      for y in x.info if y in IDENTIFIERS])]))
        for x in homonyms
    ]

    word_forms_bases = list(read_src_bs('src_dict/БС 27.03.21.txt'))
    bs_names = [
        ' '.join(
            filter(None, [
                x.title_word_form.name, ' '.join(
                    [y for y in x.title_word_form.info if y in IDENTIFIERS])
            ])) for x in word_forms_bases
    ]

    for file_path in Path('out/lst').glob('*'):
        file_stem = file_path.stem

        presence = []  # 28а. Если слово имеется в док-те Омонимы БС.txt
        nouns = []  # 29а. если слово находится
        absent = []  # 29б. если слово НЕ находится

        words = get_string_list_from_file(file_path)
        for word in words:
            clear_word = word.split('.*')[0]
            clear_word = ' '.join([x for x in clear_word.split() if x])
            print(word)
            if clear_word in homonyms:
                presence.append(word)

            else:
                if clear_word in bs_names:
                    for group in word_forms_bases:
                        if clear_word == ' '.join(
                                filter(None, [
                                    group.title_word_form.name, ' '.join([
                                        y for y in group.title_word_form.info
                                        if y in IDENTIFIERS
                                    ])
                                ])):
                            nouns.append(str(group.title_word_form))
                else:
                    absent.append(word)

        if presence:
            save_list_to_file(
                sorted(presence,
                       key=lambda x: x.replace('*', '').strip().lower()),
                f'out/homonyms/{file_stem} омонимы.txt')

        if nouns:
            save_list_to_file(
                sorted(nouns,
                       key=lambda x: x.replace('*', '').strip().lower()),
                f'out/verbs/Г{file_stem}.txt')

        if absent:
            save_list_to_file(
                sorted(absent,
                       key=lambda x: x.replace('*', '').strip().lower()),
                f'out/absent/{file_stem} отсутствует.txt')
Пример #23
0
def get_homonyms_bs():
    homonyms_bs = get_string_list_from_file('src_dict/Омонимы БС.txt')

    replays_in_groups_spec_note = []  # (".* <")
    replays_in_groups = []  # (".*")

    with open('src_dict/Повторы в группах.txt', encoding='utf-8') as f_in:
        groups = (x.strip() for x in f_in.read().split('\n\n'))
        for group in groups:
            for line in group.split('\n')[1:]:
                if line.startswith('!'):
                    title_word_sub_group = line
                    title_name_sub_group = get_socket_word_form(
                        title_word_sub_group.replace('!', '').strip()).name
                else:
                    word_form = get_socket_word_form(line)
                    replays_in_groups_spec_note.append(' '.join(
                        filter(None, [
                            word_form.name,
                            word_form.idf,
                            ' '.join(word_form.info),
                            title_name_sub_group,
                        ])))
                    replays_in_groups.append(' '.join(
                        filter(None, [
                            word_form.name,
                            word_form.idf,
                            ' '.join(word_form.info),
                            word_form.note.replace('*', '').strip(),
                        ])))

    homonyms_spec_note_relevant = []
    homonyms_spec_note_not_relevant = []
    homonyms_relevant = []
    homonyms_not_relevant = []

    for homonym in homonyms_bs:
        title_form = get_bs_title_word_form(homonym)

        # имеющие специальное примечание (".* <")
        if title_form.note.startswith('.* <'):
            string_form = ' '.join(
                filter(None, [
                    title_form.name, title_form.idf, ' '.join(title_form.info),
                    title_form.note.replace('.* <', '').strip()
                ]))
            if string_form in replays_in_groups_spec_note:
                homonyms_spec_note_relevant.append(homonym)
            else:
                homonyms_spec_note_not_relevant.append(homonym)

        # НЕ имеющие специальное примечание (".* <")
        else:
            string_form = ' '.join(
                filter(None, [
                    title_form.name, title_form.idf, ' '.join(title_form.info),
                    title_form.note.replace('.*', '').strip()
                ]))
            if string_form in replays_in_groups:
                homonyms_relevant.append(homonym)
            else:
                homonyms_not_relevant.append(homonym)

    save_list_to_file(homonyms_spec_note_relevant,
                      'out/О-мы БС спец. прим. совпадают с Повторами.txt')
    save_list_to_file(homonyms_spec_note_not_relevant,
                      'out/О-мы БС спец. прим. не совпадают с Повторами.txt')
    save_list_to_file(homonyms_relevant,
                      'out/О-мы БС совпадают с Повторами.txt')
    save_list_to_file(homonyms_not_relevant,
                      'out/О-мы БС не совпадают с Повторами.txt')
Пример #24
0
def get_homonyms_bs():
    homonymous_repetitions = list(
        get_string_list_from_file(
            'src_dict/Слова, омонимичные повторам в группе.txt'))
    homonyms_bs = get_string_list_from_file('src_dict/Омонимы БС.txt')

    homonyms_spec_note_relevant = []
    homonyms_spec_note_not_relevant = []
    homonyms_relevant = []
    homonyms_not_relevant = []

    for homonym in homonyms_bs:
        title_form = get_bs_title_word_form(homonym)

        # без примечаний
        if not title_form.note:
            string_form = ' '.join(
                filter(None, [
                    title_form.name, title_form.idf, ' '.join(title_form.info),
                    title_form.note.replace('.*', '').strip()
                ]))
            if string_form in homonymous_repetitions:
                homonyms_relevant.append(homonym)
            else:
                homonyms_not_relevant.append(homonym)

        else:
            # имеющие специальное примечание (".* <")
            if title_form.note.startswith('.* <'):
                string_form = ' '.join(
                    filter(None, [
                        title_form.name, title_form.idf, ' '.join(
                            title_form.info),
                        title_form.note.replace('.* <', '<').strip()
                    ]))
                if string_form in homonymous_repetitions:
                    homonyms_spec_note_relevant.append(homonym)
                else:
                    homonyms_spec_note_not_relevant.append(homonym)

            # НЕ имеющие специальное примечание (".* <")
            else:
                string_form = ' '.join(
                    filter(None, [
                        title_form.name, title_form.idf, ' '.join(
                            title_form.info),
                        title_form.note.replace('.*', '*').strip()
                    ]))
                if string_form in homonymous_repetitions:
                    homonyms_relevant.append(homonym)
                else:
                    homonyms_not_relevant.append(homonym)

    save_list_to_file(
        homonyms_spec_note_relevant,
        'out/О-мы БС спец. прим. совпадают с Слова, омонимичные повторам.txt')
    save_list_to_file(
        homonyms_spec_note_not_relevant,
        'out/О-мы БС спец. прим. не совпадают с Слова, омонимичные повторам.txt'
    )
    save_list_to_file(
        homonyms_relevant,
        'out/О-мы БС совпадают с Слова, омонимичные повторам.txt')
    save_list_to_file(
        homonyms_not_relevant,
        'out/О-мы БС не совпадают с Слова, омонимичные повторам.txt')