示例#1
0
 def train(self, train_dataset):
     train_data = du.chunks(
         train_dataset['trainX'], self.__config['batchSize'], train_dataset['trainY'])
     n_batches = math.ceil(
         len(train_dataset['trainX'])/self.__config['batchSize'])
     self.__logger('Start training classification model!')
     enough_accuracy_reached = False
     m = self.__model
     intents = self.__dataset_params["intents"]
     num_classes = len(intents)
     # ===   Visualization code block   ===
     # sentence = 'please remind to me watch real madrid match tomorrow at 9pm'
     # x_viz = self.__embeddings_model.embed([sentence])
     # visualize(x_viz, 'embedded_sentence')
     # visualize_layer_output('classConv1', m, x_viz, 'class/conv1-')
     # visualize_layer_output('classConv2', m, x_viz, 'class/conv2-')
     # visualize_layer_output('classConv3', m, x_viz, 'class/conv3-')
     # visualize(m.predict(x_viz), 'class/output-')
     # === END visualization code block ===
     for idx, t_chunk in enumerate(train_data):
         if enough_accuracy_reached:
             break
         x = self.__embeddings_model.embed(t_chunk[0])
         y = to_categorical(np.array(t_chunk[1], dtype=np.int32), num_classes)
         self.__logger(f'Training batch {idx+1}.')
         m.fit(
             x=x,
             y=y,
             # batch_size=self.__config['batchSize'],
             shuffle=True,
             epochs=self.__config['epochs'],
             verbose=0,
             validation_split=self.__config['trainingValidationSplit'],
         )
         # ===   Visualization code block   ===
         # visualize_layer_output('classConv1', m, x_viz, f'class/conv1-{idx}')
         # visualize_layer_output('classConv2', m, x_viz, f'class/conv2-{idx}')
         # visualize_layer_output('classConv3', m, x_viz, f'class/conv3-{idx}')
         # visualize(m.predict(x_viz), f'class/output-{idx}')
         # === END visualization code block ===
         self.__logger(
             f'Trained {m.history.epoch[-1]+1} epochs on batch {idx + 1} of {n_batches}')
         self.__logger(
             f'Training Loss: {m.history.history["loss"][-1]} | Training Accuracy: {m.history.history["acc"][-1]}')
         self.__logger(
             f'Validation Loss: {m.history.history["val_loss"][-1]} | Validation Accuracy: {m.history.history["val_acc"][-1]}')
         self.__logger(
             '==================================================================================================')
         if (self.__config["lossThresholdToStopTraining"] > 0 and
             m.history.history["loss"][-1] < self.__config["lossThresholdToStopTraining"] and
                 m.history.history["val_loss"][-1] < self.__config["lossThresholdToStopTraining"]):
             enough_accuracy_reached = True
             self.__logger(
                 f'Enough accuracy reached! Ending training after batch {idx + 1} of {n_batches}')
             self.__logger(
                 '==================================================================================================')
示例#2
0
 def test(self, test_examples, results_handler=None):
     chunks = du.chunks(
         test_examples['testX'], self.__config['batchSize'], test_examples['testY'])
     handler = results_handler if results_handler != None else self.__default_results_logger
     stats = {'correct': 0, 'wrong': 0, 'lowConfidence': 0}
     for t_chunk in chunks:
         x = t_chunk[0]  # sentences
         y = t_chunk[1]  # intents code per sentence
         predictions = self.predict(x)
         handler(x, y, predictions, stats)
     return stats
 def train(self, train_dataset):
     train_data = du.chunks(train_dataset['trainX'],
                            self.__config['batchSize'],
                            train_dataset['trainY'])
     n_batches = math.ceil(
         len(train_dataset['trainX']) / self.__config['batchSize'])
     self.__logger(f'Start training classification model!')
     enough_accuracy_reached = False
     m = self.__model
     intents = self.__dataset_params["intents"]
     num_classes = len(intents)
     for idx, t_chunk in enumerate(train_data):
         if enough_accuracy_reached:
             break
         x = self.__embeddings_model.embed(t_chunk[0])
         y = to_categorical(np.array(t_chunk[1], dtype=np.int32),
                            num_classes)
         self.__logger(f'Training batch {idx+1}.')
         m.fit(
             x=x,
             y=y,
             # batch_size=self.__config['batchSize'],
             shuffle=True,
             epochs=self.__config['epochs'],
             verbose=0,
             validation_split=self.__config['trainingValidationSplit'],
         )
         self.__logger(
             f'Trained {m.history.epoch[-1]+1} epochs on batch {idx + 1} of {n_batches}'
         )
         self.__logger(
             f'Training Loss: {m.history.history["loss"][-1]} | Training Accuracy: {m.history.history["acc"][-1]}'
         )
         self.__logger(
             f'Validation Loss: {m.history.history["val_loss"][-1]} | Validation Accuracy: {m.history.history["val_acc"][-1]}'
         )
         self.__logger(
             '=================================================================================================='
         )
         if (self.__config["lossThresholdToStopTraining"] > 0
                 and m.history.history["loss"][-1] <
                 self.__config["lossThresholdToStopTraining"]
                 and m.history.history["val_loss"][-1] <
                 self.__config["lossThresholdToStopTraining"]):
             enough_accuracy_reached = True
             self.__logger(
                 f'Enough accuracy reached! Ending training after batch {idx + 1} of {n_batches}'
             )
             self.__logger(
                 '=================================================================================================='
             )
示例#4
0
 def test(self, test_examples, results_handler=None):
     handler = results_handler if results_handler != None else self.__default_results_logger
     chunks = du.chunks(test_examples['testX'], self.__config['batchSize'],
                        test_examples['testY'], test_examples['testY2'])
     stats = {'correct': 0, 'wrong': 0}
     for t_chunk in chunks:
         test_x = t_chunk[0]  # sentences
         test_y = t_chunk[1]  # intents code per sentence
         test_y2 = t_chunk[2]  # slots encoded per sentence word
         p_intent = [{
             'confidence':
             1,
             'intent':
             self.__dataset_params['intents'][test_y[sentence_id]],
             'sentence':
             sentence,
         } for sentence_id, sentence in enumerate(test_x)]
         predictions = self.raw_prediction(test_x, p_intent)
         preds = []
         for sentences in predictions:
             preds.append(
                 [sentence['highestIndex'] for sentence in sentences])
         handler(test_x, test_y2, preds, stats)
     return stats
示例#5
0
 def train(self, train_dataset):
     chunks = du.chunks(train_dataset['trainX'], self.__config['batchSize'],
                        train_dataset['trainY'], train_dataset['trainY2'])
     self.__logger(
         f'Start training NER model! (attention enabled: {self.__config["addAttention"]})'
     )
     enough_accuracy_reached = False
     m = self.__model
     num_slot_types = len(self.__dataset_params["slotsToId"].keys())
     n_batches = math.ceil(
         len(train_dataset['trainX']) / self.__config['batchSize'])
     for idx, t_chunk in enumerate(chunks):
         train_x_chunks = t_chunk[0]  # sentences
         train_y_chunks = t_chunk[1]  # intents code per sentence
         train_y2_chunks = t_chunk[2]  # slots encoded per sentence word
         if enough_accuracy_reached:
             break
         intent_labels = to_categorical(
             np.array(train_y_chunks, dtype=np.int32),
             len(self.__dataset_params['intents']))
         embedded_sentence_words = self.__embeddings_model.embed(
             train_x_chunks)
         embedded_sentence_word_chars = self.__embeddings_model.embed_by_word_characters(
             train_x_chunks)
         y2_sentences = []
         for words_slot_id in train_y2_chunks:
             slot_ids = np.array(words_slot_id, dtype=np.int32)
             pad_width = self.__dataset_params['maxWordsPerSentence'] - len(
                 words_slot_id)
             padded_slot_ids = np.pad(slot_ids, [[0, pad_width]],
                                      mode='constant')
             y2_sentences.append(
                 to_categorical(padded_slot_ids, num_slot_types))
         slot_tags = np.stack(y2_sentences)
         m.fit(
             x=[
                 intent_labels, embedded_sentence_words,
                 embedded_sentence_word_chars
             ],
             y=slot_tags,
             shuffle=True,
             # batch_size=self.__config['batchSize'], # IMPORTANT: adding batch size here makes the optimization bad
             epochs=self.__config['epochs'],
             verbose=0,
             validation_split=self.__config['trainingValidationSplit'],
         )
         self.__logger(
             f'Trained {m.history.epoch[-1]+1} epochs on batch {idx + 1} of {n_batches}'
         )
         self.__logger(
             f'Training Loss: {m.history.history["loss"][-1]} | Training Accuracy: {m.history.history["acc"][-1]}'
         )
         self.__logger(
             f'Validation Loss: {m.history.history["val_loss"][-1]} | Validation Accuracy: {m.history.history["val_acc"][-1]}'
         )
         self.__logger(
             '=================================================================================================='
         )
         if (self.__config["lossThresholdToStopTraining"] > 0
                 and m.history.history["loss"][-1] <
                 self.__config["lossThresholdToStopTraining"]
                 and m.history.history["val_loss"][-1] <
                 self.__config["lossThresholdToStopTraining"]):
             enough_accuracy_reached = True
             self.__logger(
                 f'Enough accuracy reached! Ending training after batch {idx + 1} of {n_batches}'
             )
             self.__logger(
                 '=================================================================================================='
             )