def train_model(self, max_epoch=30): evaluator = Evaluator(true_labels=self.test_labels, sentences=self.test_sentence_words_input, position_mt=self.test_position_mt, position_me=self.test_position_ma, correction_factor=self.correction_factor) log = open("../log/" + self.name + ".txt", 'a+', encoding='utf-8') for i in range(max_epoch): self.model.fit( { 'sentence_word': self.train_sentence_words_input, # 'sentence_entity_type': self.train_sentence_entity_inputs, 'position_t': self.train_position_t, 'position_a': self.train_position_a, }, self.train_labels, epochs=1, batch_size=256, verbose=1) print("# -- test set --- #") results = self.model.predict( { 'sentence_word': self.test_sentence_words_input, # 'sentence_entity_type': self.test_sentence_entity_inputs, 'position_t': self.test_position_t, 'position_a': self.test_position_a, }, batch_size=128, verbose=0) print("--------------epoch " + str(i + 1) + " ---------------------") macro_f1, micro_F1, p, r = evaluator.get_f1(predictions=results, epoch=i + 1) log.write("epoch: " + str(i + 1) + " " + str(p) + " " + str(r) + " " + str(micro_F1) + "\n") if (i + 1) % 5 == 0: print("current max macro_F1 score: " + str(evaluator.max_macro_F1 * 100)) print("max macro_F1 is gained in epoch " + str(evaluator.max_macro_F1_epoch)) print("current max micro_F1 score: " + str(evaluator.max_micro_F1 * 100)) print("max micro_F1 is gained in epoch " + str(evaluator.max_micro_F1_epoch)) log.write("current max macro_F1 score: " + str(evaluator.max_macro_F1 * 100) + "\n") log.write("max macro_F1 is gained in epoch " + str(evaluator.max_macro_F1_epoch) + "\n") log.write("current max micro_F1 score: " + str(evaluator.max_micro_F1 * 100) + "\n") log.write("max micro_F1 is gained in epoch " + str(evaluator.max_micro_F1_epoch) + "\n") print( "------------------------------------------------------------") log.close()
position_ids = example_reader.get_position_ids(max_len=82) example_reader.convert_position(position_inputs=train_positions, position_ids=position_ids) example_reader.convert_position(position_inputs=test_positions, position_ids=position_ids) train_aspects = example_reader.pad_aspect_index(train_aspect_text_inputs.tolist(), max_length=9) test_aspects = example_reader.pad_aspect_index(test_aspect_text_inputs.tolist(), max_length=9) # --------------------------------------------------------------------- for i in range(5): model = m.build_model(max_len=82, aspect_max_len=9, embedding_matrix=embedding_matrix, position_embedding_matrix=position_matrix, class_num=3, num_words=4582) # 5144 4582 // # 1523 // 1172 evaluator = Evaluator(true_labels=test_true_labels, sentences=test_sentence_inputs, aspects=test_aspect_text_inputs) epoch = 1 while epoch <= 80: model = m.train_model(sentence_inputs=train_sentence_inputs, position_inputs=train_positions, aspect_input=train_aspects, labels=train_aspect_labels, model=model) results = m.get_predict(sentence_inputs=test_sentence_inputs, position_inputs=test_positions, aspect_input=test_aspects, model=model) print("\n--------------epoch " + str(epoch) + " ---------------------") F, acc = evaluator.get_macro_f1(predictions=results, epoch=epoch) if epoch % 5 == 0: print("current max F1 score: " + str(evaluator.max_F1))
def train_model(self, max_epoch=30): e1 = Evaluator(true_labels=self.test_labels, sentences=self.test_word_inputs, index_ids=self.index_ids, val=False) if self.use_development_set: e2 = Evaluator(true_labels=self.dev_labels, sentences=self.dev_word_inputs, index_ids=self.index_ids) log = open("../log/" + self.name + ".txt", 'a+', encoding='utf-8') for i in range(max_epoch): print("====== epoch " + str(i + 1) + " ======") # -------- if use development set, the early stop will be introduced. if self.use_development_set: self.model.fit( { 'sentence_input': self.train_word_inputs, 'entity_type_input': self.train_entity_inputs }, self.train_labels, epochs=1, batch_size=32, validation_data=([ self.dev_word_inputs, self.dev_entity_inputs ], self.dev_labels), verbose=2) print("# -- develop set --- #") results = self.model.predict( { 'sentence_input': self.dev_word_inputs, 'entity_type_input': self.dev_entity_inputs }, batch_size=64, verbose=0) results = e2.get_true_label(label=results) results = e2.process_bie(sen_label=results) f1, _, _ = e2.get_true_prf(results, epoch=i + 1) if f1 < 0: break else: self.model.fit( { 'sentence_input': self.train_word_inputs, 'entity_type_input': self.train_entity_inputs }, self.train_labels, epochs=1, batch_size=32, verbose=2) print("# -- test set --- #") results = self.model.predict( { 'sentence_input': self.test_word_inputs, 'entity_type_input': self.test_entity_inputs }, batch_size=64, verbose=0) results = e1.get_true_label(label=results) results = e1.process_bie(sen_label=results) f1, p1, r1 = e1.get_true_prf(results, epoch=i + 1) log.write("epoch:{} p:{} r:{} f:{}\n".format(i + 1, p1, r1, f1)) log.close()