def _train_stage(self):
        self.model.train()
        running_loss = 0.
        running_acc = 0.
        running_loss_cls = 0.
        running_loss_ft = 0.
        is_first = True
        for e in range(self.start_epoch, self.conf.epochs):
            if is_first:
                self.writer = SummaryWriter(self.conf.log_path)
                is_first = False
            print('epoch {} started'.format(e))
            print("lr: ", self.schedule_lr.get_lr())

            for sample, ft_sample, target in tqdm(iter(self.train_loader)):
                imgs = [sample, ft_sample]
                labels = target

                loss, acc, loss_cls, loss_ft = self._train_batch_data(
                    imgs, labels)
                running_loss_cls += loss_cls
                running_loss_ft += loss_ft
                running_loss += loss
                running_acc += acc

                self.step += 1

                if self.step % self.board_loss_every == 0 and self.step != 0:
                    loss_board = running_loss / self.board_loss_every
                    self.writer.add_scalar('Training/Loss', loss_board,
                                           self.step)
                    acc_board = running_acc / self.board_loss_every
                    self.writer.add_scalar('Training/Acc', acc_board,
                                           self.step)
                    lr = self.optimizer.param_groups[0]['lr']
                    self.writer.add_scalar('Training/Learning_rate', lr,
                                           self.step)
                    loss_cls_board = running_loss_cls / self.board_loss_every
                    self.writer.add_scalar('Training/Loss_cls', loss_cls_board,
                                           self.step)
                    loss_ft_board = running_loss_ft / self.board_loss_every
                    self.writer.add_scalar('Training/Loss_ft', loss_ft_board,
                                           self.step)

                    running_loss = 0.
                    running_acc = 0.
                    running_loss_cls = 0.
                    running_loss_ft = 0.
                if self.step % self.save_every == 0 and self.step != 0:
                    time_stamp = get_time()
                    self._save_state(time_stamp, extra=self.conf.job_name)
            self.schedule_lr.step()

        time_stamp = get_time()
        self._save_state(time_stamp, extra=self.conf.job_name)
        self.writer.close()
    def _train_stage(self):
        val_loss = None
        train_loss = None
        is_first = True
        for e in range(self.start_epoch, self.conf.epochs):
            if is_first:
                self.writer = SummaryWriter(self.conf.log_path)
                is_first = False
            print('epoch {} started'.format(e))
            print("lr: ", self.schedule_lr.get_lr())

            self.model.train()
            for sample, ft_sample, target in tqdm(iter(self.train_loader)):
                imgs = [sample, ft_sample]
                labels = target

                loss, acc, = self._train_batch_data(imgs, labels, True)
                if train_loss is None or loss < train_loss:
                    train_loss = loss
                    time_stamp = get_time()
                    self._save_state(str(time_stamp) + "_train",
                                     extra=self.conf.job_name)
                    print("\nBest train loss", train_loss)
                self.writer.add_scalar('Training/Loss', loss)
                print('\nTraining/Loss', loss)
                self.writer.add_scalar('Training/Acc', acc.item())
                print('Training/Acc', acc.item())
                lr = self.optimizer.param_groups[0]['lr']
                self.writer.add_scalar('Training/Learning_rate', lr)
                print('Training/Learning_rate', lr)

            self.schedule_lr.step()

            self.model.eval()
            for sample, ft_sample, target in tqdm(iter(self.val_loader)):
                imgs = [sample, ft_sample]
                labels = target

                loss, acc, = self._train_batch_data(imgs, labels, False)
                if val_loss is None or loss < val_loss:
                    val_loss = loss
                    time_stamp = get_time()
                    self._save_state(str(time_stamp) + "_val",
                                     extra=self.conf.job_name)
                    print("\nBest val loss", val_loss)

                self.writer.add_scalar('Valid/Loss', loss)
                print('\nValid/Loss', loss)
                self.writer.add_scalar('Valid/Acc', acc.item())
                print('Valid/Acc', acc.item())

        self.writer.close()
示例#3
0
    def _train_eval_stage(self):

        running_loss = 0.
        running_acc = 0.
        running_loss_cls = 0.
        running_loss_ft = 0.
        is_first = True
        self.total_iter = len(self.train_loader) * self.conf.epochs
        for e in range(self.start_epoch, self.conf.epochs):
            self.model.train()
            eval = False
            time_stamp = get_time()
            if is_first:
                self.writer = SummaryWriter(self.conf.log_path, self.conf.time)
                is_first = False
            print('epoch {} started'.format(e))
            print("lr: ", self.schedule_lr.optimizer.param_groups[0]['lr'])
            self.ratio = self.step / self.total_iter

            count = 1
            for sample, ft_sample, target in tqdm(iter(self.train_loader)):
                # count += 1
                # if count == 4:
                #     break
                imgs = [sample, ft_sample]
                labels = target
                loss, acc, loss_cls, loss_ft = self._load_batch_data(
                    imgs, labels, eval)
                running_loss_cls += loss_cls
                running_loss_ft += loss_ft
                running_loss += loss
                running_acc += acc

                self.step += 1
                self.optimizer.step()
                self.schedule_lr.step_iter(self.step)
                # import pdb
                # pdb.set_trace()
                if self.step % self.board_loss_every == 0 and self.step != 0:
                    loss_board = running_loss / self.board_loss_every
                    self.writer.add_scalar('Training/Loss', loss_board,
                                           self.step)
                    acc_board = running_acc / self.board_loss_every
                    self.writer.add_scalar('Training/Acc', acc_board,
                                           self.step)
                    lr = self.optimizer.param_groups[0]['lr']
                    self.writer.add_scalar('Training/Learning_rate', lr,
                                           self.step)
                    loss_cls_board = running_loss_cls / self.board_loss_every
                    self.writer.add_scalar('Training/Loss_cls', loss_cls_board,
                                           self.step)
                    loss_ft_board = running_loss_ft / self.board_loss_every
                    self.writer.add_scalar('Training/Loss_ft', loss_ft_board,
                                           self.step)
                    print(
                        "Step:{}  Training/lr:{}  Loss:{}  Loss_cls:{}  Loss_ft:{}  Acc:{}"
                        .format(str(self.step), str(float("%.4f" % lr)),
                                str(float("%.2f" % loss_board)),
                                str(float("%.2f" % loss_cls_board)),
                                str(float("%.4f" % loss_ft_board)),
                                str(float("%.2f" % acc_board))))

                    running_loss = 0.
                    running_acc = 0.
                    running_loss_cls = 0.
                    running_loss_ft = 0.

            torch.save(self.model.state_dict(),
                       self.conf.model_path + "/epoch_{}.pth".format(e))

            if e % 1 == 0:
                eval = True
                self.model.eval()
                total_val_iter = 0
                eval_loss = 0.
                eval_acc = 0.
                eval_loss_cls = 0.
                eval_loss_ft = 0.
                with torch.no_grad():
                    for sample, ft_sample, target in tqdm(
                            iter(self.eval_loader)):
                        total_val_iter += 1
                        imgs = [sample, ft_sample]
                        labels = target
                        loss, acc, loss_cls, loss_ft = self._load_batch_data(
                            imgs, labels, eval)
                        eval_loss_cls += loss_cls
                        eval_loss_ft += loss_ft
                        eval_loss += loss
                        eval_acc += acc

                    loss_board = eval_loss / total_val_iter
                    self.writer.add_scalar('Eval/Loss', loss_board, self.step)

                    acc_board = eval_acc / total_val_iter
                    self.writer.add_scalar('Eval/Acc', acc_board, self.step)

                    loss_cls_board = eval_loss_cls / total_val_iter
                    self.writer.add_scalar('Eval/Loss_cls', loss_cls_board,
                                           self.step)
                    loss_ft_board = eval_loss_ft / total_val_iter
                    self.writer.add_scalar('Eval/Loss_ft', loss_ft_board,
                                           self.step)
                    self.writer.close()
            # self.schedule_lr.step()

            self.schedule_lr.step_epoch()

        self.writer.close()