def create_non_spaced_string(self, text, diff_tuples, size_filo, search_range_filo): PADDING_CHAR = '¦' # pad values because of filos text_padded = Random.append_pad_values(text, size_filo, PADDING_CHAR) text_split = list(text_padded) current_chars_filo = Ranged_Filo(size_filo, search_range_filo, True) filo_mid_index = current_chars_filo.get_middle_index() final_text = "" for char_index, char in enumerate(text_split): current_chars_filo.push(char) # if current middle char is ' ' and there is a diff tuple for that, don't push it to final string current_tuple = current_chars_filo.get_middle_items(True, True) current_middle_char = current_tuple[filo_mid_index] its_a_diff_tuple = False for diff_tuple_index, diff_tuple in enumerate(diff_tuples): if current_tuple == diff_tuple: diff_tuples[ diff_tuple_index] = "done" # mark this tuple as corrected its_a_diff_tuple = True break # escape inner loop if current_middle_char is not PADDING_CHAR: # do not append padded chars if not its_a_diff_tuple and current_middle_char is not None: final_text += current_middle_char return final_text
def add_linebreaks(self, previous_line, current_line, previous_line_index, sd_line_index, line_heigth_info): MODE = 'TAKE_CURRENT_LINE_DIST' if previous_line is None: return None if MODE is 'TAKE_CURRENT_LINE_DIST': MARGIN = 0 # tolerance margin current_lh_info = line_heigth_info[sd_line_index] (xp_start, yp_start, xp_stop, yp_stop) = previous_line.coordinates (xc_start, yc_start, xc_stop, yc_stop) = current_line.coordinates y_dist = yc_start - yp_stop if y_dist <= 0: return None line_distance = current_lh_info.get_line_distance() y_times = (y_dist + MARGIN) / line_distance y_times_absolute = TypeCasts.round_to_int(y_times) if y_times_absolute > 0: generated_text = Random.append_pad_values( "", y_times_absolute, "\n") return generated_text else: return None self.cpr.print("Undefined case reached shouldn't happen") return None
def compare_ocr_strings_hamming(ocr_string1, ocr_string2, pad_difference=True): if pad_difference is True: len_str1 = len(ocr_string1) len_str2 = len(ocr_string2) if len_str1 > len_str2: ocr_string2 = Random.append_pad_values(ocr_string2, len_str1 - len_str2) elif len_str2 > len_str1: ocr_string1 = Random.append_pad_values(ocr_string1, len_str2 - len_str1) # print("Do Hammingdist ",ocr_string1," to " ,ocr_string2) result = distpkg.hamming(ocr_string1, ocr_string2) return result