def get_attachment(self, word: TextPunctuation) -> dict: text = word.get_text() sign = word.get_punctuation() buffer_attach = dict() attachment = self.get_data().zero_syll_words[text] buffer_attach['attachment'] = attachment buffer_attach['text'] = text buffer_attach['sign'] = sign return buffer_attach
def clean(self, words: list) -> list: curr, foll = words[0], words[1] buffer_text, buffer_signs = [], [] if len(curr.get_text()) != 1 and curr.get_text().isupper(): return [None, foll] for i in range(len(curr.get_text())): sym = curr.get_text()[i] sign = curr.get_punctuation()[i] if sym == '.' and isinstance( foll, TextPunctuation) and not foll.get_text().istitle(): return [None, foll] if not len(buffer_text) and sign == constants.PUNCT: if sym in punctuation_to_erase: continue elif sym in dashes: return [None, foll] if sign == constants.HYPHEN: if len(curr.get_text()) == i + 1: return [None, foll] else: continue if sign == constants.PUNCT and sym not in hyphen_dashes: for j in range(i + 1, len(curr.get_text())): next_sign = curr.get_punctuation()[j] if next_sign != constants.PUNCT: return [None, foll] return [ TextPunctuation(''.join(buffer_text), buffer_signs), foll ] sym_low = sym.lower() if sym_low not in self.get_data().letters: return [None, foll] buffer_text.append(sym_low) buffer_signs.append(sign) curr = TextPunctuation(''.join(buffer_text), buffer_signs) if isinstance(foll, TextPunctuation) and len(foll.get_text()) == 1: foll_low = TextPunctuation(foll.get_text().lower(), foll.get_punctuation()) if self.is_zero_syll(foll_low.get_text()): buffer_attach = self.get_attachment(foll_low) if buffer_attach['attachment'] == 'to_preceding': buffer_text.append(foll_low.get_text()) buffer_signs.append(foll_low.get_punctuation()) foll = None elif isinstance(foll, TextPunctuation) and len( curr.get_text()) == 1 and self.is_zero_syll(curr.get_text()): buffer_attach = self.get_attachment(curr) if buffer_attach['attachment'] == 'to_following': foll.set_text(curr.get_text() + foll.get_text()) foll.set_punctuation(curr.get_punctuation() + foll.get_punctuation()) return [None, foll] return [TextPunctuation(''.join(buffer_text), buffer_signs), foll]