Пример #1
0
    def scrape_url(
        self,
        url,
        parser='html.parser',
        tag_to_find='p',
    ):
        try:
            sents = []
            resp = requests.get(url=url, )
            soup = BeautifulSoup(resp.content, parser)
            contents_tag = soup.find_all(tag_to_find)
            for cont in contents_tag:
                txt = StringUtils.trim(cont.get_text())
                sent_list = txt.split('。')
                sent_list = [StringUtils.trim(s) for s in sent_list if s]
                if len(sent_list):
                    sents += sent_list
                Log.debug('Split "' + str(txt) + '" into:' + str(sent_list))
                # [Log.debug('\t"' + str(s) + '"') for s in sent_list]

            return sents
        except Exception as ex:
            Log.error(
                str(self.__class__) + ' ' +
                str(getframeinfo(currentframe()).lineno) +
                ': Error scraping url "' + str(url) + '", exception: ' +
                str(ex))
Пример #2
0
 def verify_totp_style(
         self,
         # We test for <tolerance_secs> back
         tolerance_secs=30):
     now = datetime.now()
     try:
         for i in range(tolerance_secs):
             t_test = now - timedelta(seconds=i)
             Log.debugdebug(
                 str(self.__class__) + ' ' +
                 str(getframeinfo(currentframe()).lineno) + ': Trying ' +
                 str(t_test.strftime('%Y-%m-%d %H:%M:%S')))
             test_challenge_calc = AccessTokenSharedsecretChallenge.create_totp_style_challenge_response(
                 shared_secret=self.shared_secret,
                 datetime_val=t_test,
                 algo_hash=self.algo_hash)
             res = self.__compare_test_challenge(
                 test_challenge_calc=test_challenge_calc)
             if res == True:
                 return res
         return False
     except Exception as ex:
         Log.error(
             str(self.__class__) + ' ' +
             str(getframeinfo(currentframe()).lineno) +
             ': Exception for shared secret "' + str(self.shared_secret) +
             '", totp style test challenge "' + str(self.test_challenge) +
             '": ' + str(ex))
         return False
Пример #3
0
    def __pre_process_training_data(
            self
    ):
        if not self.is_training_data_ready:
            try:
                self.training_data = self.training_data_source.fetch_data()
            except Exception as ex:
                errmsg = \
                    str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)\
                    + ': Exception calling external object type "' + str(type(self.training_data_source)) \
                    + '" method fetch_data(), exception msg: ' + str(ex)
                Log.error(errmsg)
                raise Exception(errmsg)

        if type(self.training_data) is not tdm.TrainingDataModel:
            raise Exception(
                str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
                + ': "' + str(self.identifier_string)
                + '": Wrong training data type "' + str(type(self.training_data)) + '".'
            )

        # Train a single y/label ID only, regardless of train mode
        if self.y_id is not None:
            # Filter by this y/label only
            self.training_data.filter_by_y_id(
                y_id = self.y_id
            )

        return
Пример #4
0
 def send(self, user, password, recipients_list, message):
     try:
         if password not in [None, '']:
             self.server.login(user=user, password=password)
             Log.important(
                 str(self.__class__) + ' ' +
                 str(getframeinfo(currentframe()).lineno) +
                 ': Login for user "' + str(user) + '" successful.')
         else:
             # If no password passed in, no need to do login
             Log.warning(
                 str(self.__class__) + ' ' +
                 str(getframeinfo(currentframe()).lineno) +
                 ': Not doing login for user "' + str(user) +
                 '", no password given "' + str(password) + '"')
         self.server.sendmail(from_addr=user,
                              to_addrs=recipients_list,
                              msg=message)
         Log.important(
             str(self.__class__) + ' ' +
             str(getframeinfo(currentframe()).lineno) + ': Message from ' +
             str(user) + ' to ' + str(recipients_list) +
             ' sent successfully. Closing server..')
         self.server.close()
         Log.info(
             str(self.__class__) + ' ' +
             str(getframeinfo(currentframe()).lineno) + ': Mail server "' +
             str(self.mail_server_url) + '" closed')
     except Exception as ex:
         errmsg = str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)\
                  + ': Exception sending mail from ' + str(user) + ' to ' + str(recipients_list)\
                  + '. Got exception ' + str(ex) + '.'
         Log.error(errmsg)
         raise Exception(errmsg)
Пример #5
0
    def decode(self, ciphertext):
        try:
            if self.cipher_mode == AES.MODE_EAX:
                cipher = AES.new(key=self.key,
                                 mode=self.cipher_mode,
                                 nonce=self.nonce)
                cipherbytes = b64decode(ciphertext.encode(self.text_encoding))
                data = cipher.decrypt(cipherbytes)
            elif self.cipher_mode == AES.MODE_CBC:
                cipher = AES.new(key=self.key,
                                 mode=self.cipher_mode,
                                 iv=self.nonce)
                cipherbytes = b64decode(ciphertext.encode(self.text_encoding))
                data = cipher.decrypt(cipherbytes)
                Log.debugdebug(
                    str(self.__class__) + ' ' +
                    str(getframeinfo(currentframe()).lineno) +
                    ': Decrypted data length = ' + str(len(data)) +
                    ', modulo 16 = ' + str(len(data) % 128 / 8))
                # Remove last x bytes encoded in the padded bytes
                data = data[:-data[-1]]
            else:
                raise Exception(
                    str(self.__class__) + ' ' +
                    str(getframeinfo(currentframe()).lineno) +
                    ': Unsupported mode "' + str(self.cipher_mode) + '".')

            return str(data, encoding=STR_ENCODING)
        except Exception as ex:
            errmsg = str(__name__) + ' ' + str(getframeinfo(currentframe()).lineno) \
                    + ': Error decoding data "' + str(ciphertext) + '" using AES ". Exception: ' + str(ex)
            Log.error(errmsg)
            raise Exception(errmsg)
Пример #6
0
    def prepare_message(from_addr, to_addrs_list, subject, text, files=None):
        try:
            msg = MIMEMultipart()
            msg['From'] = from_addr
            msg['To'] = SendMail.COMMASPACE.join(to_addrs_list)
            msg['Date'] = formatdate(localtime=True)
            msg['Subject'] = subject

            msg.attach(MIMEText(text))

            files_allowed = SendMail.__attach_file_check_validity_and_size(
                files_attachment_list=files,
                max_total_files_size=SendMail.
                MAX_TOTAL_FILES_SIZE_MB_EMAIL_ATTCH)

            for f in files_allowed or []:
                with open(f, "rb") as fil:
                    part = MIMEApplication(fil.read(),
                                           Name=os.path.basename(f))
                # After the file is closed
                part[
                    'Content-Disposition'] = 'attachment; filename="%s"' % os.path.basename(
                        f)
                msg.attach(part)
            return msg.as_string()
        except Exception as ex:
            errmsg = str(__name__) + ' ' + str(getframeinfo(currentframe()).lineno)\
                     + ': Error creating email message: ' + str(ex)
            Log.error(errmsg)
            raise Exception(errmsg)
Пример #7
0
 def get_model_file_prefix(dir_path_model, model_name, identifier_string,
                           is_partial_training):
     # Prefix or dir
     prefix_or_dir = dir_path_model + '/' + model_name + '.' + identifier_string
     if is_partial_training:
         # Check if directory exists
         if not os.path.isdir(prefix_or_dir):
             Log.important(
                 str(__name__) + ' ' +
                 str(getframeinfo(currentframe()).lineno) + ': Path "' +
                 str(prefix_or_dir) +
                 '" does not exist. Trying to create this directory...')
             try:
                 os.mkdir(path=prefix_or_dir)
                 Log.important(
                     str(__name__) + ' ' +
                     str(getframeinfo(currentframe()).lineno) + ': Path "' +
                     str(prefix_or_dir) + '" successfully created.')
             except Exception as ex:
                 errmsg =\
                     str(__name__) + ' ' + str(getframeinfo(currentframe()).lineno)\
                     + ': Error creating directory "' + str(prefix_or_dir) + '". Exception ' + str(ex) + '.'
                 Log.error(errmsg)
                 raise Exception(errmsg)
         return prefix_or_dir
     else:
         Log.important(
             str(__name__) + ' ' +
             str(getframeinfo(currentframe()).lineno) +
             ': Using path prefix "' + str(prefix_or_dir) + '"')
         return prefix_or_dir
Пример #8
0
    def read_text_file(
        filepath,
        encoding='utf-8',
        throw_exception=False,
    ):
        try:
            fh = open(filepath, 'r', encoding=encoding)
        except IOError as e:
            errmsg = str(__name__) + ' ' + str(getframeinfo(currentframe()).lineno) \
                     + ': Cannot open file [' + str(filepath) + ']. ' + str(e)
            Log.error(errmsg)
            if throw_exception:
                raise Exception(errmsg)
            else:
                return []

        lines = []
        for line in fh:
            # Can just use StringUtils.trim() to remove newline also
            # if remove_newline:
            #    line = re.sub('\n|\r', '', line)
            # line = unicode(line, encoding)
            lines.append(line)

        fh.close()
        return lines
Пример #9
0
    def process_common_words(self, word_split_token=' '):
        try:
            self.raw_words = StringUtils.trim(self.raw_words)
            self.raw_words = re.sub(pattern='[\xa0\t\n\r]',
                                    repl=word_split_token,
                                    string=self.raw_words)
            self.raw_words = self.raw_words.lower()
        except Exception as ex:
            errmsg = str(__name__) + ' ' + str(getframeinfo(currentframe()).lineno)\
                     + ': Error processing raw words. Exception: ' + str(ex)
            Log.error(errmsg)
            raise Exception(errmsg)

        try:
            self.common_words = self.raw_words.split(word_split_token)
            # Remove None, '', {}, etc.
            self.common_words = [w for w in self.common_words if w]

            word_stems = self.add_word_stems()
            if word_stems:
                self.common_words = word_stems + self.common_words

            self.common_words = sorted(set(self.common_words))
            Log.info(
                str(__name__) + ' ' + str(getframeinfo(currentframe()).lineno) \
                + ': Loaded ' + str(len(self.common_words)) + ' common words of lang "' + str(self.lang) + '".'
            )
        except Exception as ex:
            errmsg = str(__name__) + ' ' + str(getframeinfo(currentframe()).lineno)\
                     + ': Error processing common words. Exception: ' + str(ex)
            Log.error(errmsg)
            raise Exception(errmsg)

        return
Пример #10
0
 def import_form_fields(
         list_json,
         mex_form_model
 ):
     if len(list_json) != len(mex_form_model):
         raise Exception(
             str(__name__) + ' ' + str(getframeinfo(currentframe()).lineno)
             + ': List of fields must be same length with mex expr list.'
             + ' Fields: ' + str(list_json) + ', Mex Expr List: ' + str(mex_form_model)
         )
     form_fields = []
     for i in range(len(list_json)):
         json_field = list_json[i]
         json_field[ffld.FormField.KEY_MEX_EXPR] = StringUtils.trim(mex_form_model[i])
         try:
             form_fields.append(
                 ffld.FormField.import_form_field(json_obj=json_field)
             )
         except Exception as ex_field:
             errmsg = \
                 str(__name__) + ' ' + str(getframeinfo(currentframe()).lineno) \
                 + ': Error importing field: ' + str(json_field) \
                 + '. Exception: ' + str(ex_field)
             Log.error(errmsg)
             raise Exception(errmsg)
     return form_fields
Пример #11
0
    def __recognize_file(self):
        need_convert_format = re.sub(pattern='(.*[.])([a-zA-Z0-9]+$)',
                                     repl='\\2',
                                     string=self.audio_file).lower() != 'wav'
        audio_filepath_wav = self.audio_file

        if need_convert_format:
            Log.important(
                str(self.__class__) + ' ' +
                str(getframeinfo(currentframe()).lineno) + ': Converting "' +
                str(self.audio_file) + '" to wav format..')
            audio_filepath_wav = AudioUtils().convert_format(
                filepath=self.audio_file)

        # Initialize recognizer class (for recognizing the speech)
        r = sr.Recognizer()

        # Reading Audio file as source
        # listening the audio file and store in audio_text variable

        with sr.AudioFile(audio_filepath_wav) as source:
            audio_text = r.listen(source)

            # recoginize_() method will throw a request error if the API is unreachable, hence using exception handling
            try:

                if self.engine == SpeechRecognition.ENGINE_GOOGLE:
                    text = r.recognize_google(audio_text, language=self.lang)
                elif self.engine == SpeechRecognition.ENGINE_GOOGLE_CLOUD:
                    text = r.recognize_google_cloud(
                        audio_text,
                        credentials_json=self.auth_info,
                        language=self.lang)
                elif self.engine == SpeechRecognition.ENGINE_BING:
                    text = r.recognize_bing(audio_text,
                                            key=self.auth_info,
                                            language=self.lang)
                else:
                    raise Exception(
                        str(self.__class__) + ' ' +
                        str(getframeinfo(currentframe()).lineno) +
                        ': Unsuported engine "' + str(self.engine) + '".')
                Log.info(
                    str(self.__class__) + ' ' +
                    str(getframeinfo(currentframe()).lineno) +
                    ': Converting audio transcripts into text ...')
                Log.important(
                    str(self.__class__) + ' ' +
                    str(getframeinfo(currentframe()).lineno) +
                    ': Recognized "' + str(self.lang) + '" text "' +
                    str(text) + '" from audio file "' + str(self.audio_file) +
                    '"')
                return text
            except Exception as ex:
                Log.error(
                    str(self.__class__) + ' ' +
                    str(getframeinfo(currentframe()).lineno) +
                    ': Exception converting audio transcript from "' +
                    str(self.audio_file) + '": ' + str(ex))
Пример #12
0
 def convert_to_simplified_chinese(self, text):
     try:
         text_sim = hzc.HanziConv.toSimplified(text)
         return text_sim
     except Exception as ex:
         Log.error(
             str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
             + ': Error converting "' + str(text) + '" to simplified chinese. Exception ' + str(ex) + '.'
         )
         return text
Пример #13
0
    def preprocess_training_data(self):
        if not self.is_training_data_ready:
            try:
                #
                # The external interface must pass back 2 parameters, a DataFrame of preprocessed training data
                # and Embedding Layer params
                #
                self.df_training_data_pp, self.embedding_params = self.training_data_source.fetch_and_preprocess_data(
                )

                Log.important(
                    str(self.__class__) + ' ' +
                    str(getframeinfo(currentframe()).lineno) +
                    ': Successfully preprocessed training data. Max label val = '
                    + str(self.embedding_params.max_label_val) +
                    ', max sentence length = ' +
                    str(self.embedding_params.max_sent_len) +
                    ', vocabulary size = ' +
                    str(self.embedding_params.vocab_size) +
                    ', x one hot dict: ' +
                    str(self.embedding_params.x_one_hot_dict))

                self.training_data = TextTrainer.convert_preprocessed_text_to_training_data_model(
                    model_name=self.model_name,
                    training_dataframe=self.df_training_data_pp,
                    embedding_x=self.embedding_params.x,
                    embedding_y=self.embedding_params.y,
                    embedding_x_one_hot_dict=self.embedding_params.
                    x_one_hot_dict,
                    embedding_y_one_hot_dict=self.embedding_params.
                    y_one_hot_dict,
                    word_freq_model=self.word_freq_model,
                )
            except Exception as ex:
                errmsg = \
                    str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)\
                    + ': Exception calling external object type "' + str(type(self.training_data_source)) \
                    + '" method fetch_and_preprocess_data(), exception msg: ' + str(ex)
                Log.error(errmsg)
                raise Exception(errmsg)

        if type(self.training_data) is not tdm.TrainingDataModel:
            raise Exception(
                str(self.__class__) + ' ' +
                str(getframeinfo(currentframe()).lineno) + ': "' +
                str(self.identifier_string) + '": Wrong training data type "' +
                str(type(self.training_data)) + '".')

        # Train a single y/label ID only, regardless of train mode
        if self.y_id is not None:
            # Filter by this y/label only
            self.training_data.filter_by_y_id(y_id=self.y_id)

        return
Пример #14
0
 def __init__(self, noun_case_endings=NOUN_PARTICLES, verb_case_endings=()):
     super().__init__(noun_case_endings=noun_case_endings,
                      verb_case_endings=verb_case_endings)
     try:
         # Разбить Хангул (한글) слоги на буквы (자모)
         # https://github.com/JDongian/python-jamo, https://python-jamo.readthedocs.io/en/latest/
         from jamo import h2j, j2hcj
     except Exception as ex:
         errmsg = str(__name__) + ' ' + str(getframeinfo(currentframe()).lineno) \
                  + ': Error importing jamo library: ' + str(ex)
         Log.error(errmsg)
         raise Exception(errmsg)
     return
Пример #15
0
    def __send_email(self, text_subject, text_msg, files, ignore_limit):
        email_msg = SendMail.prepare_message(
            from_addr=self.from_addr,
            to_addrs_list=self.alert_recipients,
            subject=text_subject,
            text=text_msg,
            files=files)
        try:
            # Check how many already sent this hour
            if datetime.now().hour != self.current_hour:
                self.current_hour = datetime.now().hour
                self.emails_sent_this_hour = 0

            if not ignore_limit:
                if self.emails_sent_this_hour >= self.limit_per_hour:
                    Log.warning(
                        str(self.__class__) + ' ' +
                        str(getframeinfo(currentframe()).lineno) +
                        ': Send email alert limit ' +
                        str(self.limit_per_hour) +
                        ' per hour hit. Not sending subject: "' +
                        str(text_subject) + '", message: ' + str(text_msg))
                    return
            else:
                Log.info(
                    str(self.__class__) + ' ' +
                    str(getframeinfo(currentframe()).lineno) +
                    ': Ignoring send limit of ' + str(self.limit_per_hour) +
                    ' per hour.')

            if self.fake_send:
                print('Fake send email from "' + str(self.from_addr) +
                      '" to: ' + str(self.alert_recipients) + ' Message:\n\r' +
                      str(email_msg))
            else:
                SendMail(mode=self.mail_mode,
                         mail_server_url=self.mail_server_url,
                         mail_server_port=self.mail_server_port).send(
                             user=self.from_addr,
                             password=self.password,
                             recipients_list=self.alert_recipients,
                             message=email_msg)
            self.emails_sent_this_hour += 1
        except Exception as ex_mail:
            Log.error(
                str(self.__class__) + ' ' +
                str(getframeinfo(currentframe()).lineno) +
                ': Error sending email: ' + str(ex_mail) +
                '. Could not send message: ' + str(email_msg))
Пример #16
0
 def encode(
         self,
         # bytes format
         data):
     try:
         if self.cipher_mode == AES.MODE_EAX:
             cipher = AES.new(key=self.key,
                              mode=self.cipher_mode,
                              nonce=self.nonce)
             cipherbytes, tag = cipher.encrypt_and_digest(data)
             return AES_Encrypt.EncryptRetClass(
                 cipher_mode=self.cipher_mode_str,
                 ciphertext_b64=b64encode(cipherbytes).decode(
                     self.text_encoding),
                 plaintext_b64=None,
                 tag_b64=b64encode(tag).decode(self.text_encoding),
                 nonce_b64=b64encode(self.nonce).decode(self.text_encoding))
         elif self.cipher_mode == AES.MODE_CBC:
             # 1-16, make sure not 0, other wise last byte will not be block length
             length = AES_Encrypt.DEFAULT_BLOCK_SIZE_AES_CBC - (
                 len(data) % AES_Encrypt.DEFAULT_BLOCK_SIZE_AES_CBC)
             # Pad data with the original length, so when we decrypt we can just take data[-1]
             # as length of data block
             data += bytes(chr(length), encoding=STR_ENCODING) * length
             Log.debugdebug(
                 str(self.__class__) + ' ' +
                 str(getframeinfo(currentframe()).lineno) +
                 ': Padded length = ' + str(length))
             cipher = AES.new(key=self.key,
                              mode=self.cipher_mode,
                              iv=self.nonce)
             cipherbytes = cipher.encrypt(data)
             return AES_Encrypt.EncryptRetClass(
                 cipher_mode=self.cipher_mode_str,
                 ciphertext_b64=b64encode(cipherbytes).decode(
                     self.text_encoding),
                 plaintext_b64=None,
                 tag_b64=None,
                 nonce_b64=b64encode(self.nonce).decode(self.text_encoding))
         else:
             raise Exception(
                 str(self.__class__) + ' ' +
                 str(getframeinfo(currentframe()).lineno) +
                 ': Unsupported mode "' + str(self.cipher_mode) + '".')
     except Exception as ex:
         errmsg = str(__name__) + ' ' + str(getframeinfo(currentframe()).lineno) \
                 + ': Error encoding data "' + str(data) + '" using AES ". Exception: ' + str(ex)
         Log.error(errmsg)
         raise Exception(errmsg)
Пример #17
0
 def verify(self):
     try:
         test_challenge_calc = AccessTokenSharedsecretChallenge.create_test_challenge_string(
             shared_secret=self.shared_secret,
             challenge_string=self.challenge,
             algo_hash=self.algo_hash)
         return self.__compare_test_challenge(
             test_challenge_calc=test_challenge_calc)
     except Exception as ex:
         Log.error(
             str(self.__class__) + ' ' +
             str(getframeinfo(currentframe()).lineno) +
             ': Exception for shared secret "' + str(self.shared_secret) +
             '", challenge "' + str(self.challenge) + '": ' + str(ex))
         return False
Пример #18
0
 def stop_model_thread(self):
     # Kill any background jobs
     try:
         Log.info(
             str(self.__class__) +
             str(getframeinfo(currentframe()).lineno) + ': "' +
             str(self.identifier_string) +
             '" Stopping model background job..')
         self.model.stoprequest.set()
     except Exception as ex:
         Log.error(
             str(self.__class__) +
             str(getframeinfo(currentframe()).lineno) + ': "' +
             str(self.identifier_string) +
             '" Stop model background job exception: ' + str(ex))
Пример #19
0
 def verify_totp_otp(self, valid_window=1):
     try:
         import pyotp
         s = str(self.shared_secret)
         # Pad to 8 modulo with last character in shared secret
         shared_secret_pad = s + s[-1] * ((8 - len(s) % 8) % 8)
         totp_obj = pyotp.TOTP(shared_secret_pad)
         res = totp_obj.verify(otp=self.test_challenge,
                               valid_window=valid_window)
         # print('Secret=' + str(self.shared_secret) + ', otp=' + str(self.test_challenge) + ' ' + str(res))
         return res
     except Exception as ex:
         Log.error(
             str(self.__class__) + ' ' +
             str(getframeinfo(currentframe()).lineno) +
             ': Error TOTP authentication, exception: ' + str(ex))
         return False
Пример #20
0
 def segment_ko_ja(
         self,
         text,
         return_array_of_split_words = False
 ):
     try:
         if self.lang in [lf.LangFeatures.LANG_JA]:
             words_postags = nagisa.tagging(text)
             txt_sym_tok = words_postags.words
             txt_sym_postags = words_postags.postags
             Log.debug(
                 str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
                 + ': Japanese segmentation "' + str(txt_sym_tok) + '", word & POS tags: ' + str(words_postags)
             )
             if return_array_of_split_words:
                 return txt_sym_tok
             else:
                 return BasicPreprocessor.get_word_separator(lang=self.lang).join(txt_sym_tok)
         elif self.lang in [lf.LangFeatures.LANG_KO]:
             self.warn_korean()
             words_postags = self.kkma.pos(
                 phrase = text
             )
             txt_sym_tok = [wp[0] for wp in words_postags]
             txt_sym_postags = [wp[1] for wp in words_postags]
             Log.debug(
                 str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
                 + ': Korean segmentation "' + str(txt_sym_tok) + '", word & POS tags: ' + str(words_postags)
             )
             if return_array_of_split_words:
                 return txt_sym_tok
             else:
                 return BasicPreprocessor.get_word_separator(lang=self.lang).join(txt_sym_tok)
         else:
             raise Exception(
                 str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
                 + ': No external library supported for language "' + str(self.lang) + '"'
             )
     except Exception as ex:
         errmsg = str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) \
                  + ': Error segmenting lang "' + str(self.lang) + '", text "' + str(text) \
                  + '", exception: ' + str(ex)
         Log.error(errmsg)
         raise Exception(errmsg)
Пример #21
0
 def __init__(self, lang=LangFeatures.LANG_EN):
     self.lang = LangFeatures.map_to_lang_code_iso639_1(lang_code=lang)
     Ssl.disable_ssl_check()
     try:
         if nltk.download(Corpora.NLTK_COMTRANS):
             Log.info(
                 str(self.__class__) + ' ' +
                 str(getframeinfo(currentframe()).lineno) +
                 ': NLTK download of "' + Corpora.NLTK_COMTRANS + '" OK.')
         else:
             raise Exception('Download "' + str(Corpora.NLTK_COMTRANS) +
                             '" returned False')
     except Exception as ex:
         errmsg = str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) \
                  + ': NLTK download of "' + str(Corpora.NLTK_COMTRANS) + '" exception: ' \
                  + str(ex) + '.'
         Log.error(errmsg)
         raise Exception(errmsg)
     return
Пример #22
0
    def __attach_file_check_validity_and_size(
            files_attachment_list,
            max_total_files_size=MAX_TOTAL_FILES_SIZE_MB_EMAIL_ATTCH):
        if files_attachment_list is None:
            return []

        files_attachment_list_allowed = []

        cum_size_mb = 0.0
        for filepath in files_attachment_list:
            if os.path.isfile(filepath):
                Log.info('File <' + str(__name__) + '> line ' +
                         str(getframeinfo(currentframe()).lineno) +
                         ': Attachment file path "' + str(filepath) + '" OK')
            else:
                Log.error('File <' + str(__name__) + '> line ' +
                          str(getframeinfo(currentframe()).lineno) +
                          ': Invalid attachment file "' + str(filepath) +
                          '", not attaching to email')
                continue

            fsize_bytes = os.path.getsize(filepath)
            fsize_mb = round(fsize_bytes / (1024 * 1024), 2)

            if fsize_mb + cum_size_mb < max_total_files_size:
                files_attachment_list_allowed.append(filepath)
                cum_size_mb += fsize_mb
                Log.info('File <' + str(__name__) + '> line ' +
                         str(getframeinfo(currentframe()).lineno) +
                         ': Appended file "' + str(filepath) +
                         '" as email attachment size ' + str(fsize_mb) +
                         'MB, total cumulative ' + str(cum_size_mb) + 'MB')
            else:
                Log.warning('File <' + str(__name__) + '> line ' +
                            str(getframeinfo(currentframe()).lineno) +
                            ': File "' + str(filepath) + '" too big ' +
                            str(fsize_mb) + 'MB. Cumulative = ' +
                            str(fsize_mb + cum_size_mb) +
                            ' Not attaching to email')
        return files_attachment_list_allowed
Пример #23
0
    def __recognize_mic(self):
        # Initialize recognizer class (for recognizing the speech)
        r = sr.Recognizer()

        with sr.Microphone() as source:
            print('Start talking')
            audio_text = r.listen(source)
            print('Done')
            try:
                # using google speech recognition
                text = r.recognize_google(audio_text, language=self.lang)
                Log.important(
                    str(self.__class__) + ' ' +
                    str(getframeinfo(currentframe()).lineno) +
                    ': Recognized "' + str(self.lang) + '" text "' +
                    str(text) + '" from mic "' + str(self.audio_file) + '"')
                return text
            except Exception as ex:
                Log.error(
                    str(self.__class__) + ' ' +
                    str(getframeinfo(currentframe()).lineno) +
                    ': Exception: ' + str(ex))
Пример #24
0
 def hash(string, algo=ALGO_SHA1):
     str_encode = string.encode(encoding=Hash.STR_ENCODING)
     try:
         if algo == Hash.ALGO_SHA1:
             h = hashlib.sha1(str_encode)
         elif algo == Hash.ALGO_SHA256:
             h = hashlib.sha256(str_encode)
         elif algo == Hash.ALGO_SHA512:
             h = hashlib.sha512(str_encode)
         elif algo == Hash.ALGO_SHA3_256:
             h = hashlib.sha3_256(str_encode)
         elif algo == Hash.ALGO_SHA3_512:
             h = hashlib.sha3_512(str_encode)
         else:
             raise Exception('Unsupported hash algo "' + str(algo) + '".')
         return h.hexdigest()
     except Exception as ex:
         errmsg = str(__name__) + ' ' + str() \
                  + 'Error hashing string "' + str(string) + '" using algo "' + str(algo)\
                  + '". Exception: ' + str(ex)
         Log.error(errmsg)
         return None
Пример #25
0
    def retrieve_corpora(self, corpora_name):
        try:
            als = comtrans.aligned_sents(corpora_name)
        except Exception as ex:
            errmsg = str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) \
                     + ': Comtrans download of corpora "' + str(corpora_name) + '" exception: ' \
                     + str(ex) + '.'
            Log.error(errmsg)
            raise Exception(errmsg)

        sentences_l1 = [sent.words for sent in als]
        sentences_l2 = [sent.mots for sent in als]
        Log.info('Sentences length = ' + str(len(sentences_l1)))

        # Filter length
        (sentences_l1, sentences_l2) = self.filter_pair_sentence_length(
            sentences_arr_l1=sentences_l1,
            sentences_arr_l2=sentences_l2,
            max_len=20)
        Log.info('Sentences length after filtering = ' +
                 str(len(sentences_l1)))
        assert len(sentences_l1) == len(sentences_l2)
        return (sentences_l1, sentences_l2)
Пример #26
0
 def convert_datetime_to_number(
         x,
         datetime_format,
         round_to_integer = False,
         # If relative date is given, total days from this date is returned
         relative_date = None
 ):
     try:
         if type(x) is str:
             dtime = datetime.strptime(str(x), datetime_format)
         else:
             dtime = x
         dtime_no = DataPreprocessor.date_to_number(
             x                = dtime,
             round_to_integer = round_to_integer,
             relative_date    = relative_date
         )
         return dtime_no
     except Exception as ex:
         Log.error(
             str(__name__) + ' ' + str(getframeinfo(currentframe()).lineno)
             + ': Error converting "' + str(x) + '" type "' + str(type(x)) + '": ' + str(ex)
         )
         return 0
Пример #27
0
    def persist_model_to_storage(
            self,
            network = None
    ):
        try:
            #
            # Stupid Keras changed behavior in Python 3.8 and need to be a directory instead
            #
            major_version = sys.version_info[0]
            minor_version = sys.version_info[1]
            if (major_version > 3) or ( (major_version == 3) and (minor_version >= 8) ):
                if not os.path.isdir(self.fpath_model):
                    Log.important(
                        str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
                        + ': Path "' + str(self.fpath_model) + '" does not exist. Trying to create this directory...'
                    )
                    os.mkdir(path=self.fpath_model)

            self.network.save(self.fpath_model)
            Log.important(
                str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
                + ': Saved network to file/directory "' + str(self.fpath_model) + '".'
            )

            if self.x_one_hot_dict:
                ModelInterface.safe_dataframe_write(
                    df            = pd.DataFrame({
                        'code': list(self.x_one_hot_dict.keys()),
                        'word': list(self.x_one_hot_dict.values()),
                    }),
                    name_df       = 'x_one_hot_dict',
                    include_index = True,
                    index_label   = 'INDEX',
                    filepath      = self.fpath_model_x_one_hot,
                    log_training  = self.logs_training
                )

            if self.y_one_hot_dict:
                ModelInterface.safe_dataframe_write(
                    df            = pd.DataFrame({
                        'code': list(self.y_one_hot_dict.keys()),
                        'label':  list(self.y_one_hot_dict.values()),
                    }),
                    name_df       = 'y_one_hot_dict',
                    include_index = True,
                    index_label   = 'INDEX',
                    filepath      = self.fpath_model_y_one_hot,
                    log_training  = self.logs_training
                )

            # To allow applications to check if model updated
            # It is important to do it last (and fast), after everything is done
            ModelInterface.safe_file_write(
                dict_obj      = {'timenow': str(datetime.now())},
                name_dict_obj = 'model last updated time',
                filepath      = self.fpath_updated_file,
                write_as_json = False,
                log_training  = self.logs_training
            )

            return
        except Exception as ex_save:
            errmsg = str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) \
                     + ': Error saving model "' + str(self.identifier_string) + '": ' + str(ex_save)
            Log.error(errmsg)
            raise Exception(errmsg)
Пример #28
0
    def load_model_parameters(
            self
    ):
        try:
            self.mutex_training.acquire()

            # First check the existence of the files
            if not os.path.isfile(self.fpath_updated_file):
                errmsg = str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) \
                         + ': Last update file "' + self.fpath_updated_file \
                         + 'for model "' + str(self.identifier_string) + '" not found!'
                Log.error(errmsg)
                raise Exception(errmsg)

            self.network = load_model(self.fpath_model)

            try:
                df_x_one_hot_dict = pd.read_csv(
                    filepath_or_buffer = self.fpath_model_x_one_hot,
                    sep       = ',',
                    index_col = 'INDEX'
                )
                self.x_one_hot_dict = {code:word for code,word in df_x_one_hot_dict.values}
                # Form the inverse for convenience of transforming user input
                self.x_one_hot_dict_inverse = {word: code for code, word in self.x_one_hot_dict.items()}
                Log.important(
                    str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) \
                    + ': Model "' + str(self.identifier_string)
                    + '" x one hot dict loaded: ' + str(self.x_one_hot_dict)
                )
            except Exception as ex_x_one_hot_dict:
                Log.important(
                    str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) \
                    + ': Model "' + str(self.identifier_string) + '" no x_one_hot_dict. ' + str(ex_x_one_hot_dict)
                )
                self.x_one_hot_dict = None

            try:
                df_y_one_hot_dict = pd.read_csv(
                    filepath_or_buffer = self.fpath_model_y_one_hot,
                    sep       = ',',
                    index_col = 'INDEX'
                )
                self.y_one_hot_dict = {code:lbl for code,lbl in df_y_one_hot_dict.values}
                Log.important(
                    str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) \
                    + ': Model "' + str(self.identifier_string)
                    + '" y one hot dict loaded: ' + str(self.y_one_hot_dict)
                )
            except Exception as ex_y_one_hot_dict:
                Log.important(
                    str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) \
                    + ': Model "' + str(self.identifier_string) + '" no y_one_hot_dict. ' + str(ex_y_one_hot_dict)
                )
                self.y_one_hot_dict = None

            self.model_loaded = True
            self.model_updated_time = os.path.getmtime(self.fpath_updated_file)

            Log.important(
                str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) \
                + ': Model "' + str(self.identifier_string) + '" trained at "' + str(self.model_updated_time)
                + '" successfully loaded.'
            )
        except Exception as ex:
            errmsg = str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)\
                     + ': Model "' + str(self.identifier_string)\
                     + '" failed to load from file "' + str(self.fpath_model)\
                     + '". Got exception ' + str(ex) + '.'
            Log.error(errmsg)
            raise Exception(errmsg)
        finally:
            self.mutex_training.release()
Пример #29
0
    def train(
            self,
            write_model_to_storage = True,
            write_training_data_to_storage = False,
            # Option to train a single y ID/label
            y_id = None,
            # To keep training logs here for caller's reference
            log_list_to_populate = None,
            # # Transform train labels to categorical or not
            # convert_train_labels_to_categorical = True
    ):
        if self.training_data is None:
            raise Exception(
                str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
                + ': Cannot train without training data for identifier "' + self.identifier_string + '"'
            )

        if type(self.model_params) is not nwdesign.NetworkDesign:
            raise Exception(
                str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
                + ': Cannot train without network for identifier "'
                + self.identifier_string + '". Got wrong type "' + str(type(self.model_params))
            )

        self.mutex_training.acquire()
        try:
            self.model_loaded = False
            Log.info(
                str(self.__class__) + str(getframeinfo(currentframe()).lineno)
                + ': Training for data, x shape ' + str(self.training_data.get_x().shape)
                + ', train labels with shape ' + str(self.training_data.get_y().shape)
            )

            if type(log_list_to_populate) is list:
                self.logs_training = log_list_to_populate
            else:
                self.logs_training = []

            x = self.training_data.get_x().copy()
            y = self.training_data.get_y().copy()
            self.x_one_hot_dict = self.training_data.get_x_one_hot_dict()
            # Form the inverse for convenience of transforming user input
            if type(self.x_one_hot_dict) is dict:
                self.x_one_hot_dict_inverse = {word:code for code,word in self.x_one_hot_dict.items()}
            self.y_one_hot_dict = self.training_data.get_y_one_hot_dict()
            # Convert labels to categorical one-hot encoding
            train_labels_categorical = to_categorical(y)

            n_labels = len(list(set(y.tolist())))
            Log.info(
                str(self.__class__) + str(getframeinfo(currentframe()).lineno)
                + ': Total unique labels = ' + str(n_labels) + '.',
                log_list = self.logs_training
            )

            try:
                self.network_layer_config = self.model_params.get_network_config()
                Log.info(
                    str(self.__class__) + str(getframeinfo(currentframe()).lineno)
                    + ': Start creating network layers from config: ' + str(self.network_layer_config)
                )
                network = self.model_params.get_network()
                Log.info(
                    str(self.__class__) + str(getframeinfo(currentframe()).lineno)
                    + ': Successfully created network layers from config: ' + str(self.network_layer_config)
                )
            except Exception as ex_layers:
                errmsg = str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) \
                         + ': Error creating network layers for config: ' + str(self.network_layer_config) \
                         +'. Exception: ' + str(ex_layers)
                Log.error(
                    s = errmsg,
                    log_list = self.logs_training
                )
                raise Exception(errmsg)

            try:
                Log.important(
                    str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) \
                    + ': Start compiling network "' + str(self.identifier_string) + '"..'
                )
                network.compile(
                    optimizer = self.train_optimizer,
                    loss      = self.train_loss,
                    metrics   = self.evaluate_metrics
                )
            except Exception as ex_compile:
                errmsg = str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) \
                         + ': Error compiling network for config: ' + str(self.network_layer_config) \
                         +'. Exception: ' + str(ex_compile)
                Log.error(errmsg)
                raise Exception(errmsg)

            # Log model summary
            network.summary(print_fn=Log.info)

            Log.info(
                str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno)
                + 'Categorical Train label shape "' + str(train_labels_categorical.shape)
                + '":\n\r' + str(train_labels_categorical)
            )

            try:
                Log.important(
                    str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) \
                    + ': Start fitting network "' + str(self.identifier_string) + '"..'
                )

                # print('***** x: ' + str(x))
                # print('***** y: ' + str(train_labels_categorical))
                train_labels = y
                if self.model_params.require_label_to_categorical:
                    train_labels = train_labels_categorical
                if self.train_batch_size is not None:
                    network.fit(
                        x,
                        train_labels,
                        epochs     = self.train_epochs,
                        batch_size = self.train_batch_size
                    )
                else:
                    network.fit(
                        x,
                        train_labels,
                        epochs    = self.train_epochs,
                    )
                Log.important(
                    str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) \
                    + ': Successfully fitted network "' + str(self.identifier_string) + '"..'
                )
            except Exception as ex_fit:
                errmsg = str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) \
                         + ': Error training/fitting network for config: ' + str(self.network_layer_config) \
                         +'. Exception: ' + str(ex_fit)
                Log.error(errmsg)
                raise Exception(errmsg)

            self.network = network

            if write_model_to_storage:
                self.persist_model_to_storage(network=network)
            if write_training_data_to_storage:
                self.persist_training_data_to_storage(td=self.training_data)

            self.model_loaded = True
        except Exception as ex_train:
            errmsg = str(self.__class__) + ' ' + str(getframeinfo(currentframe()).lineno) \
                     + ': Train error for identifier "' + str(self.identifier_string)\
                     + '". Exception: ' + str(ex_train)
            Log.error(
                s = errmsg,
                log_list = self.logs_training
            )
            raise Exception(errmsg)
        finally:
            self.mutex_training.release()
        return
Пример #30
0
    def safe_file_write(dict_obj,
                        filepath,
                        name_dict_obj=None,
                        write_as_json=False,
                        log_training=None,
                        file_encoding='utf-8'):
        DEFAULT_CSV_SEPARATOR = ','
        #
        # Write to tmp file first
        #
        filepath_tmp = str(filepath) + '.tmp'
        # We backup the previous model file just in case
        filepath_old = ModelInterface.get_backup_filepath(filepath=filepath)

        try:
            f = open(file=filepath_tmp, mode='w', encoding=file_encoding)
            if write_as_json:
                json.dump(dict_obj, f, indent=2)
            else:
                for i in dict_obj.keys():
                    line = str(dict_obj[i])
                    f.write(str(line) + '\n\r')
            f.close()
            Log.important(str(__name__) + ' ' +
                          str(getframeinfo(currentframe()).lineno) +
                          ': TMP File: Saved "' + str(name_dict_obj) +
                          '" with ' + str(len(dict_obj.keys())) + ' lines,' +
                          ' filepath "' + str(filepath_tmp) + '"',
                          log_list=log_training)
        except Exception as ex:
            errmsg =\
                str(__name__) + ' ' + str(getframeinfo(currentframe()).lineno)\
                + ': TMP File: Could not create tmp "' + str(name_dict_obj)\
                + '" file "' + str(filepath_tmp) + '". ' + str(ex)
            Log.error(s=errmsg, log_list=log_training)
            raise Exception(errmsg)

        #
        # TODO Now try to read it back
        #

        time.sleep(0.2)
        #
        # Finally rename the .tmp file
        #
        try:
            # If old model file exists, backup the file
            if os.path.isfile(filepath):
                os.rename(src=filepath, dst=filepath_old)
            Log.important(str(__name__) + ' ' +
                          str(getframeinfo(currentframe()).lineno) +
                          ': BACKUP File: Successfully backed up old model "' +
                          str(name_dict_obj) + '" to filepath "' +
                          str(filepath_old) + '"',
                          log_list=log_training)
            os.rename(src=filepath_tmp, dst=filepath)
            Log.important(str(__name__) + ' ' +
                          str(getframeinfo(currentframe()).lineno) +
                          ': REAL File: Saved "' + str(name_dict_obj) +
                          '" with ' + str(len(dict_obj.keys())) + ' lines,' +
                          ' filepath "' + str(filepath) + '"',
                          log_list=log_training)
        except Exception as ex:
            errmsg =\
                str(__name__) + ' ' + str(getframeinfo(currentframe()).lineno)\
                + ': REAL File: For object "' + str(name_dict_obj)\
                + '" could not rename tmp file "' + str(filepath_tmp)\
                + '" to file "' + str(filepath)\
                + '". ' + str(ex)
            Log.error(s=errmsg, log_list=log_training)
            raise Exception(errmsg)