Exemplo n.º 1
0
    def run(self):
        self.logger.info('args = %s', self.cfg)
        # Setup Metrics
        self.metric = SegmentationMetric(self.n_classes)
        self.loss_meter = average_meter()
        run_start = time.time()
        # Set up results folder
        if not os.path.exists(self.save_image_path):
            os.makedirs(self.save_image_path)

        # if len(self.valid_queue) != 0:
        #     self.logger.info('Begin valid set evaluation')
        #     self.test(self.valid_queue, split='val', desc='promise12')
        if len(self.test_queue) != 0:
            self.logger.info('Begin test set evaluation')
            self.test(self.test_queue,
                      split='test',
                      desc='../predictions/nerve_rst')
        self.logger.info('Evaluation done!')
    def run(self):
        self.logger.info('args = %s', self.cfg)
        # Setup Metrics
        self.metric_train = SegmentationMetric(self.n_classes)
        self.metric_val = SegmentationMetric(self.n_classes)
        self.metric_test = SegmentationMetric(self.n_classes)
        self.val_loss_meter = average_meter()
        self.test_loss_meter = average_meter()
        self.train_loss_meter = average_meter()
        self.train_dice_coeff_meter = average_meter()
        self.val_dice_coeff_meter = average_meter()
        self.patience = 0
        self.save_best = True
        run_start = time.time()

        # Set up results folder
        if not os.path.exists(self.save_image_path):
            os.makedirs(self.save_image_path)

        for epoch in range(self.start_epoch, self.cfg['training']['epoch']):
            self.epoch = epoch

            self.scheduler.step()

            self.logger.info('=> Epoch {}, lr {}'.format(
                self.epoch,
                self.scheduler.get_lr()[-1]))

            # train and search the model
            self.train()

            # valid the model
            self.val()

            self.logger.info('current best loss {}, pixAcc {}, mIoU {}'.format(
                self.best_loss,
                self.best_pixAcc,
                self.best_mIoU,
            ))

            if self.show_dice_coeff:
                self.logger.info('current best DSC {}'.format(
                    self.best_dice_coeff))

            if self.save_best:
                save_checkpoint(
                    {
                        'epoch': epoch + 1,
                        'dur_time': self.dur_time + time.time() - run_start,
                        'model_state': self.model.state_dict(),
                        'model_optimizer': self.model_optimizer.state_dict(),
                        'best_pixAcc': self.best_pixAcc,
                        'best_mIoU': self.best_mIoU,
                        'best_dice_coeff': self.best_dice_coeff,
                        'best_loss': self.best_loss,
                    }, True, self.save_path)
                self.logger.info(
                    'save checkpoint (epoch %d) in %s  dur_time: %s', epoch,
                    self.save_path,
                    calc_time(self.dur_time + time.time() - run_start))
                self.save_best = False

            # if self.patience == self.cfg['training']['max_patience'] or epoch == self.cfg['training']['epoch']-1:
            if epoch == self.cfg['training']['epoch'] - 1:
                # load best model weights
                # self._check_resume(os.path.join(self.save_path, 'checkpint.pth.tar'))
                # # Test
                # if len(self.test_queue) > 0:
                #     self.logger.info('Training ends \n Test')
                #     self.test()
                # else:
                #     self.logger.info('Training ends!')
                print('Early stopping')
                break
            else:
                self.logger.info('current patience :{}'.format(self.patience))

            self.val_loss_meter.reset()
            self.train_loss_meter.reset()
            self.train_dice_coeff_meter.reset()
            self.val_dice_coeff_meter.reset()
            self.metric_train.reset()
            self.metric_val.reset()
            self.logger.info('cost time: {}'.format(
                calc_time(self.dur_time + time.time() - run_start)))

        # export scalar data to JSON for external processing
        self.writer.export_scalars_to_json(self.save_tbx_log +
                                           "/all_scalars.json")
        self.writer.close()
        self.logger.info('cost time: {}'.format(
            calc_time(self.dur_time + time.time() - run_start)))
        self.logger.info('log dir in : {}'.format(self.save_path))
Exemplo n.º 3
0
    def run(self):
        self.logger.info('args = {}'.format(self.cfg))
        # Setup Metrics
        self.metric_train = SegmentationMetric(self.n_classes)
        self.metric_val = SegmentationMetric(self.n_classes)
        self.train_loss_meter = average_meter()
        self.val_loss_meter = average_meter()
        run_start = time.time()
        for epoch in range(self.start_epoch, self.cfg['searching']['epoch']):
            self.epoch = epoch

            # update scheduler
            self.scheduler.step()
            self.logger.info('epoch %d / %d lr %e', self.epoch,
                             self.cfg['searching']['epoch'], self.scheduler.get_lr()[-1])

            # get genotype
            genotype = self.model.genotype()
            self.logger.info('genotype = %s', genotype)
            print('alpha normal_down:', F.softmax(self.model.alphas_normal_down, dim=-1))
            print('alpha down:', F.softmax(self.model.alphas_down, dim=-1))
            print('alpha normal_up:', F.softmax(self.model.alphas_normal_up, dim=-1))
            print('alpha up:', F.softmax(self.model.alphas_up, dim=-1))

            # the performance may be unstable, before train in a degree
            if self.epoch >= self.cfg['searching']['alpha_begin']:
                # check whether the genotype has changed
                if self.geno_type == genotype:
                    self.cur_count += 1
                else:
                    self.cur_count = 0
                    self.geno_type = genotype

                self.logger.info('curr_cout = {}'.format(self.cur_count))

                if self.cur_count >= self.cfg['searching']['max_patience']:
                    self.logger.info('Reach the max patience! \n best genotype {}'.format(genotype))
                    break

            # train and search the model
            self.train()

            # valid the model
            self.infer()

            save_checkpoint({
                'epoch': epoch + 1,
                'dur_time': self.dur_time + time.time() - run_start,
                'cur_count': self.cur_count,
                'geno_type': self.geno_type,
                'model_state': self.model.state_dict(),
                'arch_optimizer': self.arch_optimizer.state_dict(),
                'model_optimizer': self.model_optimizer.state_dict(),
                'alphas_dict': self.model.alphas_dict(),
                'scheduler': self.scheduler.state_dict()
            },False, self.save_path)
            self.logger.info('save checkpoint (epoch %d) in %s  dur_time: %s'
                        , epoch, self.save_path, calc_time(self.dur_time + time.time() - run_start))

            self.metric_train.reset()
            self.metric_val.reset()
            self.val_loss_meter.reset()
            self.train_loss_meter.reset()

        # export scalar data to JSON for external processing
        self.writer.export_scalars_to_json(self.save_tbx_log + "/all_scalars.json")
        self.writer.close()