Пример #1
0
 def get_datetime(self, line: str):
     line_clean = clean_string(line, charset=self.ALLOWED_CHARS_ALPHANUM)
     date_items = re.findall(r"\d{1,8}", line_clean)
     date_value = date_items[0] if len(date_items) >= 2 else ""
     if date_value.isdigit() and len(date_value) >= 8:
         return datetime.strptime(date_items[0], '%d%m%Y')
     return None
Пример #2
0
    def _is_words_and_pay_line(self, first_desire_word: str,
                               line: str) -> bool:
        cleaned_line = clean_string(
            self._remove_spaces_and_to_lower(line),
            charset="abcdefghijklmnopqrstuvwxyz0123456789.")

        # Does have the right body?
        if ((not (re.search(r"[a-z]+\d+\.\d{2}", cleaned_line)
                  or re.search(r"[a-z]+\d{3,}", cleaned_line)))
                or re.search(r"^[a-z]+$", cleaned_line)
                or re.search(r"^\d+$", cleaned_line)
                or re.search(r"^\d+\.\d+$", cleaned_line)):
            return False

        # Try to guess if first chars are: first_desire_word
        firs_input_word = ""
        # Does it has dot?
        if "." not in cleaned_line:
            firs_input_word = re.split(r"\d{3,}", cleaned_line)[0]
        else:
            firs_input_word = re.split(r"\d+\.\d{2}", cleaned_line)[0]
        if not firs_input_word:
            return False

        return self._is_it_similar(
            firs_input_word,
            first_desire_word) or self._is_it_phonetic_similar(
                firs_input_word, first_desire_word)
Пример #3
0
 def clean_line(self, line: str, charset: str = None) -> str:
     """
     Remove not desire chars from a string
     """
     return clean_string(
         line.upper(),
         charset=charset or "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789",
     )
Пример #4
0
    def get_amount_format(self, line):

        value = clean_string(line, charset=self.ALLOWED_CHARS_ALPHANUM)
        amount = self.get_numbers(value)

        if not amount:
            return None

        amount_str = str(amount[0])
        insert_point = len(amount_str) - 2
        return amount_str[:insert_point] + "." + amount_str[insert_point:]
Пример #5
0
 def get_next_line(self, list_data: list):
     if len(list_data) >= 1:
         line_data = list_data.pop(0)
         text = line_data.get("text")
         #print(f"get_next_line: {text}")
         return Line(
             uuid=line_data.get("uuid"),
             text=text,
             text_clean=clean_string(
                 text, charset=self.ALLOWED_CHARS_ALPHANUM).upper(),
             text_len=len(text),
         )
     return Line(uuid="-1")
Пример #6
0
    def get_date(self, line: str) -> str:
        """
        Get the date value of FECHA 26/11/2020 09:35:53
        """

        cleaned_line = clean_string(line.upper(), charset="0123456789 :-/+.")
        if line == "":
            return None

        date_values = {"year": 2020, "month": 1, "day": 1}
        date = re.split(r'-|/|\s|\+', cleaned_line)
        cleaned_line = self._translate_line(cleaned_line)
        time_items = re.split(r":|\.", cleaned_line)
        try:
            if len(date) >= 4:
                year = re.findall(r"\d{4}|\d{2}", date[-2])[0]
                date_values.update({
                    "year": int(year),
                    "month": int(date[-2]),
                    "day": int(date[-4]),
                    "hour": int(time_items[-3]),
                    "minute": int(time_items[-2]),
                    "second": int(time_items[-1])
                })
            else:
                year = re.findall(r"\d{4}|\d{2}", date[-1])[0]
                date_values.update({
                    "year": int(year),
                    "month": int(date[-2]),
                    "day": int(date[-3]),
                    "hour": int(time_items[-3]),
                    "minute": int(time_items[-2]),
                    "second": int(time_items[-1])
                })
                return datetime(**date_values)
        except ValueError:
            return None
        except IndexError:
            return None

        return None
Пример #7
0
 def get_quantity_without_text(self, text: str, number_zero: int):
     quantitys = clean_string(text, "0123456789. ")
     return self.format_quantity(quantitys, number_zero)
Пример #8
0
def clean_headers(headers):
    headers = [clean_string(h) for h in headers]
    headers = [title_to_snake(h) for h in headers]
    return headers