예제 #1
0
 def eva_termination(self, model):
     """
     检查是否终止训练,基于验证集
     :param model: 模型
     :return: 是否终止训练
     
     Check if or not to stop training, based on validation set
     :param model: model
     :return: if or not to stop training
     """
     metric = self.metrics[0]
     valid = self.valid_results
     # 如果已经训练超过20轮,且评价指标越小越好,且评价已经连续五轮非减
     # If has been trained for over 20 rounds, and evaluation measure is the smaller the better, and the measure has been non-desceasing for five rounds
     if len(
             valid
     ) > 20 and metric in utils.LOWER_METRIC_LIST and utils.strictly_increasing(
             valid[-5:]):
         return True
     # 如果已经训练超过20轮,且评价指标越大越好,且评价已经连续五轮非增
     # If has been trained for over 20 rounds, and evaluation measure is the larger the better, and the measure has been non-increasing for five rounds
     elif len(
             valid
     ) > 20 and metric not in utils.LOWER_METRIC_LIST and utils.strictly_decreasing(
             valid[-5:]):
         return True
     # 训练好结果离当前已经20轮以上了
     # It has been more than 20 rounds from the best result
     elif len(valid) - valid.index(utils.best_result(metric, valid)) > 20:
         return True
     return False
예제 #2
0
파일: BaseRunner.py 프로젝트: evison/NCR
 def eva_termination(self, model):
     """
     检查是否终止训练,基于验证集
     :param model: 模型
     :return: 是否终止训练
     """
     metric = self.metrics[0]
     valid = self.valid_results
     # 如果已经训练超过20轮,且评价指标越小越好,且评价已经连续五轮非减
     if len(valid) > 20 and metric in utils.LOWER_METRIC_LIST and utils.strictly_increasing(valid[-5:]):
         return True
     # 如果已经训练超过20轮,且评价指标越大越好,且评价已经连续五轮非增
     elif len(valid) > 20 and metric not in utils.LOWER_METRIC_LIST and utils.strictly_decreasing(valid[-5:]):
         return True
     # 训练好结果离当前已经20轮以上了
     elif len(valid) - valid.index(utils.best_result(metric, valid)) > 20:
         return True
     return False
예제 #3
0
    def train(self, model, data_processor):
        """
        训练模型
        :param model: 模型
        :param data_processor: DataProcessor实例
        :return:
        """

        # 获得训练、验证、测试数据,epoch=-1不shuffle
        train_data = data_processor.get_train_data(epoch=-1, model=model)
        validation_data = data_processor.get_validation_data(model=model)
        test_data = data_processor.get_test_data(
            model=model) if data_processor.unlabel_test == 0 else None
        self._check_time(start=True)  # 记录初始时间

        # 训练之前的模型效果
        init_train = self.evaluate(model, train_data, data_processor) \
            if train_data is not None else [-1.0] * len(self.metrics)
        init_valid = self.evaluate(model, validation_data, data_processor) \
            if validation_data is not None else [-1.0] * len(self.metrics)
        init_test = self.evaluate(model, test_data, data_processor) \
            if test_data is not None and data_processor.unlabel_test == 0 else [-1.0] * len(self.metrics)
        logging.info(
            "Init: \t train= %s validation= %s test= %s [%.1f s] " %
            (utils.format_metric(init_train), utils.format_metric(init_valid),
             utils.format_metric(init_test), self._check_time()) +
            ','.join(self.metrics))

        try:
            for epoch in range(self.epoch):
                self._check_time()
                # 每一轮需要重新获得训练数据,因为涉及shuffle或者topn推荐时需要重新采样负例
                epoch_train_data = data_processor.get_train_data(epoch=epoch,
                                                                 model=model)
                train_predictions, last_batch, mean_loss, mean_loss_l2 = \
                    self.fit(model, epoch_train_data, data_processor, epoch=epoch)

                # 检查模型中间结果
                if self.check_epoch > 0 and (epoch == 1
                                             or epoch % self.check_epoch == 0):
                    last_batch['mean_loss'] = mean_loss
                    last_batch['mean_loss_l2'] = mean_loss_l2
                    self.check(model, last_batch)
                training_time = self._check_time()

                # # evaluate模型效果
                train_result = [mean_loss] + model.evaluate_method(
                    train_predictions, train_data, metrics=['rmse'])
                valid_result = self.evaluate(model, validation_data, data_processor) \
                    if validation_data is not None else [-1.0] * len(self.metrics)
                test_result = self.evaluate(model, test_data, data_processor) \
                    if test_data is not None and data_processor.unlabel_test == 0 else [-1.0] * len(self.metrics)
                testing_time = self._check_time()

                self.train_results.append(train_result)
                self.valid_results.append(valid_result)
                self.test_results.append(test_result)

                # 输出当前模型效果
                logging.info(
                    "Epoch %5d [%.1f s]\t train= %s validation= %s test= %s [%.1f s] "
                    %
                    (epoch + 1, training_time, utils.format_metric(
                        train_result), utils.format_metric(valid_result),
                     utils.format_metric(test_result), testing_time) +
                    ','.join(self.metrics))

                # 如果当前效果是最优的,保存模型,基于验证集
                if utils.best_result(
                        self.metrics[0],
                        self.valid_results) == self.valid_results[-1]:
                    model.save_model()
                # model.save_model(
                #     model_path='../model/variable_tsne_logic_epoch/variable_tsne_logic_epoch_%d.pt' % (epoch + 1))
                # 检查是否终止训练,基于验证集
                if self.eva_termination(model) and self.early_stop == 1:
                    logging.info(
                        "Early stop at %d based on validation result." %
                        (epoch + 1))
                    break
        except KeyboardInterrupt:
            logging.info("Early stop manually")
            save_here = input("Save here? (1/0) (default 0):")
            if str(save_here).lower().startswith('1'):
                model.save_model()

        # Find the best validation result across iterations
        best_valid_score = utils.best_result(self.metrics[0],
                                             self.valid_results)
        best_epoch = self.valid_results.index(best_valid_score)
        logging.info(
            "Best Iter(validation)= %5d\t train= %s valid= %s test= %s [%.1f s] "
            % (best_epoch + 1,
               utils.format_metric(self.train_results[best_epoch]),
               utils.format_metric(self.valid_results[best_epoch]),
               utils.format_metric(self.test_results[best_epoch]),
               self.time[1] - self.time[0]) + ','.join(self.metrics))
        best_test_score = utils.best_result(self.metrics[0], self.test_results)
        best_epoch = self.test_results.index(best_test_score)
        logging.info(
            "Best Iter(test)= %5d\t train= %s valid= %s test= %s [%.1f s] " %
            (best_epoch + 1,
             utils.format_metric(self.train_results[best_epoch]),
             utils.format_metric(self.valid_results[best_epoch]),
             utils.format_metric(self.test_results[best_epoch]),
             self.time[1] - self.time[0]) + ','.join(self.metrics))
        model.load_model()
예제 #4
0
    def train(self, model, train_data, validation_data=None, test_data=None, data_processor=None):
        assert train_data is not None
        if model.sess is None:
            self._build_sess(model)
        if data_processor is None:
            data_processor = BaseDataProcessor()
        self._check_time(start=True)

        init_train = self.evaluate(model, train_data) \
            if train_data is not None else [-1.0] * len(self.metrics)
        init_valid = self.evaluate(model, validation_data) \
            if validation_data is not None else [-1.0] * len(self.metrics)
        init_test = self.evaluate(model, test_data) \
            if test_data is not None else [-1.0] * len(self.metrics)
        logging.info("Init: \t train= %s validation= %s test= %s [%.1f s] " % (
            utils.format_metric(init_train), utils.format_metric(init_valid), utils.format_metric(init_test),
            self._check_time()) + ','.join(self.metrics))

        try:
            for epoch in range(self.epoch):
                gc.collect()
                self._check_time()
                epoch_train_data = copy.deepcopy(train_data)
                epoch_train_data = data_processor.epoch_process_train(epoch_train_data, epoch=epoch + 1)
                if self.check_epoch > 0 and (epoch == 1 or epoch % self.check_epoch == 0):
                    self.check(model, epoch_train_data)
                self.fit(model, epoch_train_data, epoch=epoch + 1)
                del epoch_train_data
                training_time = self._check_time()

                # output validation
                train_result = self.evaluate(model, train_data) \
                    if train_data is not None else [-1.0] * len(self.metrics)
                valid_result = self.evaluate(model, validation_data) \
                    if validation_data is not None else [-1.0] * len(self.metrics)
                test_result = self.evaluate(model, test_data) \
                    if test_data is not None else [-1.0] * len(self.metrics)
                testing_time = self._check_time()

                self.train_results.append(train_result)
                self.valid_results.append(valid_result)
                self.test_results.append(test_result)

                logging.info("Epoch %5d [%.1f s]\t train= %s validation= %s test= %s [%.1f s] "
                             % (epoch + 1, training_time, utils.format_metric(train_result),
                                utils.format_metric(valid_result), utils.format_metric(test_result),
                                testing_time) + ','.join(self.metrics))

                if utils.best_result(self.metrics[0], self.valid_results) == self.valid_results[-1]:
                    self.save_model(model)
                if utils.eva_termination(self.metrics[0], self.valid_results):
                    logging.info("Early stop at %d based on validation result." % (epoch + 1))
                    break
        except KeyboardInterrupt:
            logging.info("Early stop manually")
            save_here = input("Save here? (1/0) (default 0):")
            if str(save_here).lower().startswith('1'):
                self.save_model(model)

        # Find the best validation result across iterations
        best_valid_score = utils.best_result(self.metrics[0], self.valid_results)
        best_epoch = self.valid_results.index(best_valid_score)
        logging.info("Best Iter(validation)= %5d\t train= %s valid= %s test= %s [%.1f s] "
                     % (best_epoch + 1,
                        utils.format_metric(self.train_results[best_epoch]),
                        utils.format_metric(self.valid_results[best_epoch]),
                        utils.format_metric(self.test_results[best_epoch]),
                        self.time[1] - self.time[0]) + ','.join(self.metrics))
        best_test_score = utils.best_result(self.metrics[0], self.test_results)
        best_epoch = self.test_results.index(best_test_score)
        logging.info("Best Iter(test)= %5d\t train= %s valid= %s test= %s [%.1f s] "
                     % (best_epoch + 1,
                        utils.format_metric(self.train_results[best_epoch]),
                        utils.format_metric(self.valid_results[best_epoch]),
                        utils.format_metric(self.test_results[best_epoch]),
                        self.time[1] - self.time[0]) + ','.join(self.metrics))
        self.load_model(model)