def eva_termination(self, model): """ 检查是否终止训练,基于验证集 :param model: 模型 :return: 是否终止训练 Check if or not to stop training, based on validation set :param model: model :return: if or not to stop training """ metric = self.metrics[0] valid = self.valid_results # 如果已经训练超过20轮,且评价指标越小越好,且评价已经连续五轮非减 # If has been trained for over 20 rounds, and evaluation measure is the smaller the better, and the measure has been non-desceasing for five rounds if len( valid ) > 20 and metric in utils.LOWER_METRIC_LIST and utils.strictly_increasing( valid[-5:]): return True # 如果已经训练超过20轮,且评价指标越大越好,且评价已经连续五轮非增 # If has been trained for over 20 rounds, and evaluation measure is the larger the better, and the measure has been non-increasing for five rounds elif len( valid ) > 20 and metric not in utils.LOWER_METRIC_LIST and utils.strictly_decreasing( valid[-5:]): return True # 训练好结果离当前已经20轮以上了 # It has been more than 20 rounds from the best result elif len(valid) - valid.index(utils.best_result(metric, valid)) > 20: return True return False
def eva_termination(self, model): """ 检查是否终止训练,基于验证集 :param model: 模型 :return: 是否终止训练 """ metric = self.metrics[0] valid = self.valid_results # 如果已经训练超过20轮,且评价指标越小越好,且评价已经连续五轮非减 if len(valid) > 20 and metric in utils.LOWER_METRIC_LIST and utils.strictly_increasing(valid[-5:]): return True # 如果已经训练超过20轮,且评价指标越大越好,且评价已经连续五轮非增 elif len(valid) > 20 and metric not in utils.LOWER_METRIC_LIST and utils.strictly_decreasing(valid[-5:]): return True # 训练好结果离当前已经20轮以上了 elif len(valid) - valid.index(utils.best_result(metric, valid)) > 20: return True return False
def train(self, model, data_processor): """ 训练模型 :param model: 模型 :param data_processor: DataProcessor实例 :return: """ # 获得训练、验证、测试数据,epoch=-1不shuffle train_data = data_processor.get_train_data(epoch=-1, model=model) validation_data = data_processor.get_validation_data(model=model) test_data = data_processor.get_test_data( model=model) if data_processor.unlabel_test == 0 else None self._check_time(start=True) # 记录初始时间 # 训练之前的模型效果 init_train = self.evaluate(model, train_data, data_processor) \ if train_data is not None else [-1.0] * len(self.metrics) init_valid = self.evaluate(model, validation_data, data_processor) \ if validation_data is not None else [-1.0] * len(self.metrics) init_test = self.evaluate(model, test_data, data_processor) \ if test_data is not None and data_processor.unlabel_test == 0 else [-1.0] * len(self.metrics) logging.info( "Init: \t train= %s validation= %s test= %s [%.1f s] " % (utils.format_metric(init_train), utils.format_metric(init_valid), utils.format_metric(init_test), self._check_time()) + ','.join(self.metrics)) try: for epoch in range(self.epoch): self._check_time() # 每一轮需要重新获得训练数据,因为涉及shuffle或者topn推荐时需要重新采样负例 epoch_train_data = data_processor.get_train_data(epoch=epoch, model=model) train_predictions, last_batch, mean_loss, mean_loss_l2 = \ self.fit(model, epoch_train_data, data_processor, epoch=epoch) # 检查模型中间结果 if self.check_epoch > 0 and (epoch == 1 or epoch % self.check_epoch == 0): last_batch['mean_loss'] = mean_loss last_batch['mean_loss_l2'] = mean_loss_l2 self.check(model, last_batch) training_time = self._check_time() # # evaluate模型效果 train_result = [mean_loss] + model.evaluate_method( train_predictions, train_data, metrics=['rmse']) valid_result = self.evaluate(model, validation_data, data_processor) \ if validation_data is not None else [-1.0] * len(self.metrics) test_result = self.evaluate(model, test_data, data_processor) \ if test_data is not None and data_processor.unlabel_test == 0 else [-1.0] * len(self.metrics) testing_time = self._check_time() self.train_results.append(train_result) self.valid_results.append(valid_result) self.test_results.append(test_result) # 输出当前模型效果 logging.info( "Epoch %5d [%.1f s]\t train= %s validation= %s test= %s [%.1f s] " % (epoch + 1, training_time, utils.format_metric( train_result), utils.format_metric(valid_result), utils.format_metric(test_result), testing_time) + ','.join(self.metrics)) # 如果当前效果是最优的,保存模型,基于验证集 if utils.best_result( self.metrics[0], self.valid_results) == self.valid_results[-1]: model.save_model() # model.save_model( # model_path='../model/variable_tsne_logic_epoch/variable_tsne_logic_epoch_%d.pt' % (epoch + 1)) # 检查是否终止训练,基于验证集 if self.eva_termination(model) and self.early_stop == 1: logging.info( "Early stop at %d based on validation result." % (epoch + 1)) break except KeyboardInterrupt: logging.info("Early stop manually") save_here = input("Save here? (1/0) (default 0):") if str(save_here).lower().startswith('1'): model.save_model() # Find the best validation result across iterations best_valid_score = utils.best_result(self.metrics[0], self.valid_results) best_epoch = self.valid_results.index(best_valid_score) logging.info( "Best Iter(validation)= %5d\t train= %s valid= %s test= %s [%.1f s] " % (best_epoch + 1, utils.format_metric(self.train_results[best_epoch]), utils.format_metric(self.valid_results[best_epoch]), utils.format_metric(self.test_results[best_epoch]), self.time[1] - self.time[0]) + ','.join(self.metrics)) best_test_score = utils.best_result(self.metrics[0], self.test_results) best_epoch = self.test_results.index(best_test_score) logging.info( "Best Iter(test)= %5d\t train= %s valid= %s test= %s [%.1f s] " % (best_epoch + 1, utils.format_metric(self.train_results[best_epoch]), utils.format_metric(self.valid_results[best_epoch]), utils.format_metric(self.test_results[best_epoch]), self.time[1] - self.time[0]) + ','.join(self.metrics)) model.load_model()
def train(self, model, train_data, validation_data=None, test_data=None, data_processor=None): assert train_data is not None if model.sess is None: self._build_sess(model) if data_processor is None: data_processor = BaseDataProcessor() self._check_time(start=True) init_train = self.evaluate(model, train_data) \ if train_data is not None else [-1.0] * len(self.metrics) init_valid = self.evaluate(model, validation_data) \ if validation_data is not None else [-1.0] * len(self.metrics) init_test = self.evaluate(model, test_data) \ if test_data is not None else [-1.0] * len(self.metrics) logging.info("Init: \t train= %s validation= %s test= %s [%.1f s] " % ( utils.format_metric(init_train), utils.format_metric(init_valid), utils.format_metric(init_test), self._check_time()) + ','.join(self.metrics)) try: for epoch in range(self.epoch): gc.collect() self._check_time() epoch_train_data = copy.deepcopy(train_data) epoch_train_data = data_processor.epoch_process_train(epoch_train_data, epoch=epoch + 1) if self.check_epoch > 0 and (epoch == 1 or epoch % self.check_epoch == 0): self.check(model, epoch_train_data) self.fit(model, epoch_train_data, epoch=epoch + 1) del epoch_train_data training_time = self._check_time() # output validation train_result = self.evaluate(model, train_data) \ if train_data is not None else [-1.0] * len(self.metrics) valid_result = self.evaluate(model, validation_data) \ if validation_data is not None else [-1.0] * len(self.metrics) test_result = self.evaluate(model, test_data) \ if test_data is not None else [-1.0] * len(self.metrics) testing_time = self._check_time() self.train_results.append(train_result) self.valid_results.append(valid_result) self.test_results.append(test_result) logging.info("Epoch %5d [%.1f s]\t train= %s validation= %s test= %s [%.1f s] " % (epoch + 1, training_time, utils.format_metric(train_result), utils.format_metric(valid_result), utils.format_metric(test_result), testing_time) + ','.join(self.metrics)) if utils.best_result(self.metrics[0], self.valid_results) == self.valid_results[-1]: self.save_model(model) if utils.eva_termination(self.metrics[0], self.valid_results): logging.info("Early stop at %d based on validation result." % (epoch + 1)) break except KeyboardInterrupt: logging.info("Early stop manually") save_here = input("Save here? (1/0) (default 0):") if str(save_here).lower().startswith('1'): self.save_model(model) # Find the best validation result across iterations best_valid_score = utils.best_result(self.metrics[0], self.valid_results) best_epoch = self.valid_results.index(best_valid_score) logging.info("Best Iter(validation)= %5d\t train= %s valid= %s test= %s [%.1f s] " % (best_epoch + 1, utils.format_metric(self.train_results[best_epoch]), utils.format_metric(self.valid_results[best_epoch]), utils.format_metric(self.test_results[best_epoch]), self.time[1] - self.time[0]) + ','.join(self.metrics)) best_test_score = utils.best_result(self.metrics[0], self.test_results) best_epoch = self.test_results.index(best_test_score) logging.info("Best Iter(test)= %5d\t train= %s valid= %s test= %s [%.1f s] " % (best_epoch + 1, utils.format_metric(self.train_results[best_epoch]), utils.format_metric(self.valid_results[best_epoch]), utils.format_metric(self.test_results[best_epoch]), self.time[1] - self.time[0]) + ','.join(self.metrics)) self.load_model(model)