Exemplo n.º 1
0
 def handle_text(self, line_delimiter="\n"):
     result_text = ""
     delimiter = helpers.find_delimiter(self.text)
     if not delimiter:
         delimiter = helpers.find_delimiter_euristic(self.text)
     self.delimiter = delimiter
     for line in self.text.split(line_delimiter):
         # здесь формируется вывод
         if not helpers.is_string_good(line, self.delimiter):
             continue
         line = helpers.handle_line(line)
         words = self.split_string_to_words(line)
         try:
             eng_word, rus_word = line.split(self.delimiter)[0:2]
         except ValueError:
             logging.error("problem with delimiter: {} in line:{}".format(self.delimiter, line))
             result_line = ""
         else:
             if helpers.is_english(rus_word) or helpers.is_russian(eng_word):
                 # print("doing it!")
                 tmp = rus_word
                 rus_word = eng_word
                 eng_word = tmp
                 # print("eng:{}, rus:{}".format(eng_word, rus_word))
             result_line = helpers.handle_words_or_word(words).format(eng=eng_word, rus=rus_word, tab="\t")
         if result_line:
             result_text = result_text + result_line + "\n"
     self.handled_text = result_text
     # print(self.handled_text)
     return result_text
Exemplo n.º 2
0
 def split_string_to_words(self, line):
     if helpers.is_string_good(line, self.delimiter):
         words = self.left_part_regexp.findall(line)
         words = [x for x in words if x not in self.set_of_popular_words]
         return words
     else:
         return []