Пример #1
0
 def Parser(self, txt):       
     f = open(txt)
     x = f.read().lower().replace(',', '').replace("'", "").replace("[", "").replace("]", "").replace(".", "").replace("(", "").replace(")", "").replace(";", "").replace(":", "").replace("-", "").split()
     for item in x:
         split = rusyllab.split_words(item.split())
         verbs_collection.extend(split)
     f.close()
Пример #2
0
def count_of_syllables():
    arr_syllables = []
    file = open("comments/clean_comments.txt", "r")
    for line in file:
        syllables = rusyllab.split_words(line.strip().lower().split())
        for syllable in syllables:
            arr_syllables.append(syllable)
    return dict(Counter(arr_syllables))
Пример #3
0
def get_rhyme_ending(word):
    stress_pos = accent.put_stress(word).find('\'')
    if stress_pos == -1:
        return word

    lst = list(word)
    lst[stress_pos - 1] = lst[stress_pos - 1].upper()
    word = ''.join(lst)
    sx = rusyllab.split_words([word])
    for i in range(len(sx)):
        if not sx[i].islower():
            return ''.join(sx[i:]).lower()
Пример #4
0
 def Parser_input(self):
     x = self.plainTextEdit.toPlainText().lower().replace(',', '').replace(
         "'",
         "").replace("[", "").replace("]", "").replace(".", "").replace(
             "(",
             "").replace(")",
                         "").replace(";",
                                     "").replace(":",
                                                 "").replace("-",
                                                             "").split()
     for item in x:
         split = rusyllab.split_words(item.split())
         verbs_collection.extend(split)
Пример #5
0
def check(word):
    if (len(word.split()) > 1):
        return "Слишком много слов"
    elif len(word) == 1 and word in cons:
        return 'чё'
    elif word == "/start":
        return "Здаров, епт"
    elif(len(word) < 20):
        syllables = rusyllab.split_words([word])
        syl = syllables[0]
        if syl[0] in cons:
            return check_cons(syllables, word)
        else:
            return check_vow(syllables, word)
    else:
        return "Браток, помедленней"
Пример #6
0
def check_cons(syllables, word):
    syl = syllables[0]
    tmp = list(syl[:])
    excons = ""
    i = 0
    for let in tmp:
        if let in cons:
            tmp[i] = ""
            i = i + 1
        if i >= 3 and len(syllables) < 2:
            return "хуе" + "".join(syllables)
        if let in conc or let in vow:
            break
    excons = "".join(tmp) + "".join(syllables[1:])
    new_word = rusyllab.split_words([excons])
    result = check_vow(new_word, word)
    return result
Пример #7
0
def answer2pieces(answer_str, max_answer_len):
    if answer_representation == 'chars':
        # вариант для разбивки на символы
        return rpad_chars(BEG_CHAR + answer_str + END_CHAR, max_answer_len)
    elif answer_representation == 'syllables':
        # вариант для разбивки на слоги
        seq = [BEG_CHAR] + rusyllab.split_words(answer_str.split()) + [END_CHAR]
        l = len(seq)
        if l < max_answer_len:
            seq = seq + list(itertools.repeat(PAD_CHAR, (max_answer_len - l)))
        return seq
    elif answer_representation == 'sentencepiece':
        seq = [BEG_CHAR] + spm_encoder.EncodeAsPieces(answer_str) + [END_CHAR]
        l = len(seq)
        if l < max_answer_len:
            seq = seq + list(itertools.repeat(PAD_CHAR, (max_answer_len - l)))
        return seq
    else:
        raise NotImplementedError()
    def tokenize(self,
                 text,
                 use_preproc=False,
                 use_stem=False,
                 use_lemm=False,
                 check_length=True,
                 check_stopwords=True):

        preprocessed_text = text

        if use_preproc:
            preprocessed_text, _ = self.preprocessor.preproc(
                text,
                use_lemm=use_lemm,
                use_stem=use_stem,
                check_stopwords=check_stopwords,
                check_length=check_length)

        syllables = rusyllab.split_words(preprocessed_text.split())
        return list(filter(lambda syl: syl != ' ', syllables))
Пример #9
0
def split_word(text):
    syllables_lst = rusyllab.split_words(text.strip().lower().split())
    return syllables_lst