Exemplo n.º 1
0
    def resume(self, checkpoint_dir, hyperparameters):
        # Load generators
        last_model_name = get_model_list(checkpoint_dir, 'gen')
        state_dict = torch.load(last_model_name)
        self.gen_a.load_state_dict(state_dict['a'])
        self.gen_b.load_state_dict(state_dict['b'])
        iterations = int(last_model_name[-11:-3])
        # Load discriminators
        last_model_name = get_model_list(checkpoint_dir, 'dis')
        state_dict = torch.load(last_model_name)
        self.dis_a.load_state_dict(state_dict['a'])
        self.dis_b.load_state_dict(state_dict['b'])
        # Load segmentor
        last_model_name = get_model_list(checkpoint_dir, 'seg')
        state_dict = torch.load(last_model_name)
        self.seg.load_state_dict(state_dict)

        # Load optimizers
        state_dict = torch.load(os.path.join(checkpoint_dir, 'opt.pt'))
        self.dis_opt.load_state_dict(state_dict['dis'])
        self.gen_opt.load_state_dict(state_dict['gen'])

        state_dict = torch.load(os.path.join(checkpoint_dir, 'opt_seg.pt'))
        self.seg_opt.load_state_dict(state_dict)

        # Reinitilize schedulers
        self.dis_scheduler = get_scheduler(self.dis_opt, hyperparameters['lr_policy'], hyperparameters, iterations)
        self.gen_scheduler = get_scheduler(self.gen_opt, hyperparameters['lr_policy'], hyperparameters, iterations)
        self.seg_scheduler = get_scheduler(self.seg_opt, 'constant', None, iterations)

        print('Resume from iteration %d' % iterations)
        return iterations
Exemplo n.º 2
0
    def init(self):
        opt = self.args
        if not os.path.exists(opt.saved_dir):
            os.makedirs(opt.saved_dir)
        self.fake_A_pool = ImagePool(
            opt.pool_size
        )  # create image buffer to store previously generated images
        self.fake_B_pool = ImagePool(opt.pool_size)
        self.crit_cycle = torch.nn.L1Loss()
        self.crit_idt = torch.nn.L1Loss()
        self.crit_gan = GANLoss(opt.gan_mode).cuda()
        self.cam_loss = CAMLoss()
        self.optim_G = torch.optim.Adam(itertools.chain(
            self.model.G_A.parameters(), self.model.G_B.parameters()),
                                        lr=opt.lr,
                                        betas=(opt.beta1, 0.999))
        self.optim_D = torch.optim.Adam(
            itertools.chain(self.model.D_A.parameters(),
                            self.model.D_B.parameters()),
            lr=opt.lr,
            betas=(opt.beta1, 0.999))  # default: 0.5
        self.optimizers = [self.optim_G, self.optim_D]

        self.schedulers = [
            get_scheduler(optimizer, self.args)
            for optimizer in self.optimizers
        ]
 def setup(self, opt):
     """Load and print networks"""
     if opt.phase == 'train':
         self.schedulers = utils.get_scheduler(self.optimizer, opt)
     if opt.phase in ['sample', 'interpolate'] or opt.resume:
         load_suffix = 'iter_%d' % opt.load_iter if opt.load_iter > 0 else opt.epoch
         self.load_denoise_model(load_suffix)
     self.print_networks(opt.verbose)
Exemplo n.º 4
0
def get_poseaug_model(args, dataset):
    """
    return PoseAug augmentor and discriminator
    and corresponding optimizer and scheduler
    """
    # Create model: G and D
    print("==> Creating model...")
    device = torch.device("cuda")
    num_joints = dataset.skeleton().num_joints()

    # generator for PoseAug
    model_G = PoseGenerator(args, num_joints * 3).to(device)
    model_G.apply(init_weights)
    print("==> Total parameters: {:.2f}M".format(sum(p.numel() for p in model_G.parameters()) / 1000000.0))

    # discriminator for 3D
    model_d3d = Pos3dDiscriminator(num_joints).to(device)
    model_d3d.apply(init_weights)
    print("==> Total parameters: {:.2f}M".format(sum(p.numel() for p in model_d3d.parameters()) / 1000000.0))

    # discriminator for 2D
    model_d2d = Pos2dDiscriminator(num_joints).to(device)
    model_d2d.apply(init_weights)
    print("==> Total parameters: {:.2f}M".format(sum(p.numel() for p in model_d2d.parameters()) / 1000000.0))

    # prepare optimizer
    g_optimizer = torch.optim.Adam(model_G.parameters(), lr=args.lr_g)
    d3d_optimizer = torch.optim.Adam(model_d3d.parameters(), lr=args.lr_d)
    d2d_optimizer = torch.optim.Adam(model_d2d.parameters(), lr=args.lr_d)

    # prepare scheduler
    g_lr_scheduler = get_scheduler(g_optimizer, policy='lambda', nepoch_fix=0, nepoch=args.epochs)
    d3d_lr_scheduler = get_scheduler(d3d_optimizer, policy='lambda', nepoch_fix=0, nepoch=args.epochs)
    d2d_lr_scheduler = get_scheduler(d2d_optimizer, policy='lambda', nepoch_fix=0, nepoch=args.epochs)

    return {
        'model_G': model_G,
        'model_d3d': model_d3d,
        'model_d2d': model_d2d,
        'optimizer_G': g_optimizer,
        'optimizer_d3d': d3d_optimizer,
        'optimizer_d2d': d2d_optimizer,
        'scheduler_G': g_lr_scheduler,
        'scheduler_d3d': d3d_lr_scheduler,
        'scheduler_d2d': d2d_lr_scheduler,
    }
Exemplo n.º 5
0
    def __init__(self, hyperparameters, opts):
        super(MUNIT_Trainer, self).__init__()
        lr = hyperparameters['lr']
        self.opts = opts

        # Initiate the networks
        self.gen_a = AdaINGen(hyperparameters['input_dim_a'], hyperparameters['gen'])  # auto-encoder for domain a
        self.gen_b = AdaINGen(hyperparameters['input_dim_b'], hyperparameters['gen'])  # auto-encoder for domain b
        self.dis_a = MsImageDis(hyperparameters['input_dim_a'], hyperparameters['dis'])  # discriminator for domain a
        self.dis_b = MsImageDis(hyperparameters['input_dim_b'], hyperparameters['dis'])  # discriminator for domain b
        self.seg = segmentor(num_classes=2, channels=hyperparameters['input_dim_b'], hyperpars=hyperparameters['seg'])

        self.instancenorm = nn.InstanceNorm2d(512, affine=False)
        self.style_dim = hyperparameters['gen']['style_dim']

        # fix the noise used in sampling
        display_size = int(hyperparameters['display_size'])
        self.s_a = torch.randn(display_size, self.style_dim, 1, 1).cuda()
        self.s_b = torch.randn(display_size, self.style_dim, 1, 1).cuda()

        # Setup the optimizers
        beta1 = hyperparameters['beta1']
        beta2 = hyperparameters['beta2']
        dis_params = list(self.dis_a.parameters()) + list(self.dis_b.parameters())
        gen_params = list(self.gen_a.parameters()) + list(self.gen_b.parameters())
        self.dis_opt = torch.optim.Adam([p for p in dis_params if p.requires_grad],
                                        lr=lr, betas=(beta1, beta2), weight_decay=hyperparameters['weight_decay'])
        self.gen_opt = torch.optim.Adam([p for p in gen_params if p.requires_grad],
                                        lr=lr, betas=(beta1, beta2), weight_decay=hyperparameters['weight_decay'])
        self.seg_opt = torch.optim.SGD(self.seg.parameters(), lr=0.01, momentum=0.9, weight_decay=1e-4)

        self.dis_scheduler = get_scheduler(self.dis_opt, hyperparameters['lr_policy'], hyperparameters=hyperparameters)
        self.gen_scheduler = get_scheduler(self.gen_opt, hyperparameters['lr_policy'], hyperparameters=hyperparameters)
        self.seg_scheduler = get_scheduler(self.seg_opt, 'constant', hyperparameters=None)

        # Network weight initialization
        self.apply(weights_init(hyperparameters['init']))
        self.dis_a.apply(weights_init('gaussian'))
        self.dis_b.apply(weights_init('gaussian'))

        self.criterion_seg = DiceLoss(ignore_index=hyperparameters['seg']['ignore_index'])
Exemplo n.º 6
0
def train(**kwargs):
    opt._parse(kwargs)
    train_writer = None
    value_writer = None
    if opt.vis:
        train_writer = SummaryWriter(
            log_dir='./runs/train_' +
            datetime.now().strftime('%y%m%d-%H-%M-%S'))
        value_writer = SummaryWriter(
            log_dir='./runs/val_' + datetime.now().strftime('%y%m%d-%H-%M-%S'))
    previous_loss = 1e10  # 上次学习的loss
    best_precision = 0  # 最好的精确度
    start_epoch = 0
    lr = opt.lr
    perf_scores_history = []  # 绩效分数
    # step1: criterion and optimizer
    # 1. 铰链损失(Hinge Loss):主要用于支持向量机(SVM) 中;
    # 2. 互熵损失 (Cross Entropy Loss,Softmax Loss ):用于Logistic 回归与Softmax 分类中;
    # 3. 平方损失(Square Loss):主要是最小二乘法(OLS)中;
    # 4. 指数损失(Exponential Loss) :主要用于Adaboost 集成学习算法中;
    # 5. 其他损失(如0-1损失,绝对值损失)
    criterion = t.nn.CrossEntropyLoss().to(opt.device)  # 损失函数
    # step2: meters
    train_losses = AverageMeter()  # 误差仪表
    train_top1 = AverageMeter()  # top1 仪表
    train_top5 = AverageMeter()  # top5 仪表
    pylogger = PythonLogger(msglogger)
    # step3: configure model
    model = getattr(models, opt.model)()  # 获得网络结构
    compression_scheduler = distiller.CompressionScheduler(model)
    optimizer = model.get_optimizer(lr, opt.weight_decay)  # 优化器
    if opt.load_model_path:
        # # 把所有的张量加载到CPU中
        # t.load(opt.load_model_path, map_location=lambda storage, loc: storage)
        # t.load(opt.load_model_path, map_location='cpu')
        # # 把所有的张量加载到GPU 1中
        # t.load(opt.load_model_path, map_location=lambda storage, loc: storage.cuda(1))
        # # 把张量从GPU 1 移动到 GPU 0
        # t.load(opt.load_model_path, map_location={'cuda:1': 'cuda:0'})
        checkpoint = t.load(opt.load_model_path)
        start_epoch = checkpoint["epoch"]
        # compression_scheduler.load_state_dict(checkpoint['compression_scheduler'], False)
        best_precision = checkpoint["best_precision"]
        model.load_state_dict(checkpoint["state_dict"])
        optimizer = checkpoint['optimizer']
    model.to(opt.device)  # 加载模型到 GPU

    if opt.compress:
        compression_scheduler = distiller.file_config(
            model, optimizer, opt.compress, compression_scheduler)  # 加载模型修剪计划表
        model.to(opt.device)
    # 学习速率调整器
    lr_scheduler = get_scheduler(optimizer, opt)
    # step4: data_image
    train_data = DatasetFromFilename(opt.data_root, flag='train')  # 训练集
    val_data = DatasetFromFilename(opt.data_root, flag='test')  # 验证集
    train_dataloader = DataLoader(train_data,
                                  opt.batch_size,
                                  shuffle=True,
                                  num_workers=opt.num_workers)  # 训练集加载器
    val_dataloader = DataLoader(val_data,
                                opt.batch_size,
                                shuffle=True,
                                num_workers=opt.num_workers)  # 验证集加载器
    # train
    for epoch in range(start_epoch, opt.max_epoch):
        model.train()
        if opt.pruning:
            compression_scheduler.on_epoch_begin(epoch)  # epoch 开始修剪
        train_losses.reset()  # 重置仪表
        train_top1.reset()  # 重置仪表
        # print('训练数据集大小', len(train_dataloader))
        total_samples = len(train_dataloader.sampler)
        steps_per_epoch = math.ceil(total_samples / opt.batch_size)
        train_progressor = ProgressBar(mode="Train  ",
                                       epoch=epoch,
                                       total_epoch=opt.max_epoch,
                                       model_name=opt.model,
                                       lr=lr,
                                       total=len(train_dataloader))
        lr = lr_scheduler.get_lr()[0]
        for ii, (data, labels, img_path, tag) in enumerate(train_dataloader):
            if not check_date(img_path, tag, msglogger): return
            if opt.pruning:
                compression_scheduler.on_minibatch_begin(
                    epoch, ii, steps_per_epoch, optimizer)  # batch 开始修剪
            train_progressor.current = ii + 1  # 训练集当前进度
            # train model
            input = data.to(opt.device)
            target = labels.to(opt.device)
            if train_writer:
                grid = make_grid(
                    (input.data.cpu() * 0.225 + 0.45).clamp(min=0, max=1))
                train_writer.add_image('train_images', grid,
                                       ii * (epoch + 1))  # 训练图片
            score = model(input)  # 网络结构返回值
            # 计算损失
            loss = criterion(score, target)
            if opt.pruning:
                # Before running the backward phase, we allow the scheduler to modify the loss
                # (e.g. add regularization loss)
                agg_loss = compression_scheduler.before_backward_pass(
                    epoch,
                    ii,
                    steps_per_epoch,
                    loss,
                    optimizer=optimizer,
                    return_loss_components=True)  # 模型修建误差
                loss = agg_loss.overall_loss
            train_losses.update(loss.item(), input.size(0))
            # loss = criterion(score[0], target)  # 计算损失   Inception3网络
            optimizer.zero_grad()  # 参数梯度设成0
            loss.backward()  # 反向传播
            optimizer.step()  # 更新参数

            if opt.pruning:
                compression_scheduler.on_minibatch_end(epoch, ii,
                                                       steps_per_epoch,
                                                       optimizer)  # batch 结束修剪

            precision1_train, precision5_train = accuracy(
                score, target, topk=(1, 5))  # top1 和 top5 的准确率

            # writer.add_graph(model, input)
            # precision1_train, precision2_train = accuracy(score[0], target, topk=(1, 2))  # Inception3网络
            train_losses.update(loss.item(), input.size(0))
            train_top1.update(precision1_train[0].item(), input.size(0))
            train_top5.update(precision5_train[0].item(), input.size(0))
            train_progressor.current_loss = train_losses.avg
            train_progressor.current_top1 = train_top1.avg
            train_progressor.current_top5 = train_top5.avg
            train_progressor()  # 打印进度
            if ii % opt.print_freq == 0:
                if train_writer:
                    train_writer.add_scalar('loss', train_losses.avg,
                                            ii * (epoch + 1))  # 训练误差
                    train_writer.add_text(
                        'top1', 'train accuracy top1 %s' % train_top1.avg,
                        ii * (epoch + 1))  # top1准确率文本
                    train_writer.add_scalars(
                        'accuracy', {
                            'top1': train_top1.avg,
                            'top5': train_top5.avg,
                            'loss': train_losses.avg
                        }, ii * (epoch + 1))
        # train_progressor.done()  # 保存训练结果为txt
        # validate and visualize
        if opt.pruning:
            distiller.log_weights_sparsity(model, epoch,
                                           loggers=[pylogger])  # 打印模型修剪结果
            compression_scheduler.on_epoch_end(epoch, optimizer)  # epoch 结束修剪
        val_loss, val_top1, val_top5 = val(model, criterion, val_dataloader,
                                           epoch, value_writer, lr)  # 校验模型
        sparsity = distiller.model_sparsity(model)
        perf_scores_history.append(
            distiller.MutableNamedTuple(
                {
                    'sparsity': sparsity,
                    'top1': val_top1,
                    'top5': val_top5,
                    'epoch': epoch + 1,
                    'lr': lr,
                    'loss': val_loss
                }, ))
        # 保持绩效分数历史记录从最好到最差的排序
        # 按稀疏度排序为主排序键,然后按top1、top5、epoch排序
        perf_scores_history.sort(key=operator.attrgetter(
            'sparsity', 'top1', 'top5', 'epoch'),
                                 reverse=True)
        for score in perf_scores_history[:1]:
            msglogger.info(
                '==> Best [Top1: %.3f   Top5: %.3f   Sparsity: %.2f on epoch: %d   Lr: %f   Loss: %f]',
                score.top1, score.top5, score.sparsity, score.epoch, lr,
                score.loss)

        best_precision = max(perf_scores_history[0].top1,
                             best_precision)  # 最大top1 准确率
        is_best = epoch + 1 == perf_scores_history[
            0].epoch  # 当前epoch 和最佳epoch 一样
        if is_best:
            model.save({
                "epoch":
                epoch + 1,
                "model_name":
                opt.model,
                "state_dict":
                model.state_dict(),
                "best_precision":
                best_precision,
                "optimizer":
                optimizer,
                "valid_loss": [val_loss, val_top1, val_top5],
                'compression_scheduler':
                compression_scheduler.state_dict(),
            })  # 保存模型
        # update learning rate
        lr_scheduler.step(epoch)  # 更新学习效率
        # 如果训练误差比上次大 降低学习效率
        # if train_losses.val > previous_loss:
        #     lr = lr * opt.lr_decay
        #     # 当loss大于上一次loss,降低学习率
        #     for param_group in optimizer.param_groups:
        #         param_group['lr'] = lr
        #
        # previous_loss = train_losses.val
        t.cuda.empty_cache()  # 这个命令是清除没用的临时变量的
Exemplo n.º 7
0
def main(args):
    print('==> Using settings {}'.format(args))
    device = torch.device("cuda")

    print('==> Loading dataset...')
    data_dict = data_preparation(args)

    print("==> Creating PoseNet model...")
    model_pos = model_pos_preparation(args, data_dict['dataset'], device)
    model_pos_eval = model_pos_preparation(args, data_dict['dataset'],
                                           device)  # used for evaluation only
    # prepare optimizer for posenet
    posenet_optimizer = torch.optim.Adam(model_pos.parameters(), lr=args.lr_p)
    posenet_lr_scheduler = get_scheduler(posenet_optimizer,
                                         policy='lambda',
                                         nepoch_fix=0,
                                         nepoch=args.epochs)

    print("==> Creating PoseAug model...")
    poseaug_dict = get_poseaug_model(args, data_dict['dataset'])

    # loss function
    criterion = nn.MSELoss(reduction='mean').to(device)

    # GAN trick: data buffer for fake data
    fake_3d_sample = Sample_from_Pool()
    fake_2d_sample = Sample_from_Pool()

    args.checkpoint = path.join(
        args.checkpoint, args.posenet_name, args.keypoints,
        datetime.datetime.now().isoformat() + '_' + args.note)
    os.makedirs(args.checkpoint, exist_ok=True)
    print('==> Making checkpoint dir: {}'.format(args.checkpoint))

    logger = Logger(os.path.join(args.checkpoint, 'log.txt'), args)
    logger.record_args(str(model_pos))
    logger.set_names([
        'epoch', 'lr', 'error_h36m_p1', 'error_h36m_p2', 'error_3dhp_p1',
        'error_3dhp_p2'
    ])

    # Init monitor for net work training
    #########################################################
    summary = Summary(args.checkpoint)
    writer = summary.create_summary()

    ##########################################################
    # start training
    ##########################################################
    start_epoch = 0
    dhpp1_best = None
    s911p1_best = None

    for _ in range(start_epoch, args.epochs):

        if summary.epoch == 0:
            # evaluate the pre-train model for epoch 0.
            h36m_p1, h36m_p2, dhp_p1, dhp_p2 = evaluate_posenet(args,
                                                                data_dict,
                                                                model_pos,
                                                                model_pos_eval,
                                                                device,
                                                                summary,
                                                                writer,
                                                                tag='_fake')
            h36m_p1, h36m_p2, dhp_p1, dhp_p2 = evaluate_posenet(args,
                                                                data_dict,
                                                                model_pos,
                                                                model_pos_eval,
                                                                device,
                                                                summary,
                                                                writer,
                                                                tag='_real')
            summary.summary_epoch_update()

        # update train loader
        dataloader_update(args=args, data_dict=data_dict, device=device)

        # Train for one epoch
        train_gan(args, poseaug_dict, data_dict, model_pos, criterion,
                  fake_3d_sample, fake_2d_sample, summary, writer)

        if summary.epoch > args.warmup:
            train_posenet(model_pos, data_dict['train_fake2d3d_loader'],
                          posenet_optimizer, criterion, device)
            h36m_p1, h36m_p2, dhp_p1, dhp_p2 = evaluate_posenet(args,
                                                                data_dict,
                                                                model_pos,
                                                                model_pos_eval,
                                                                device,
                                                                summary,
                                                                writer,
                                                                tag='_fake')

            train_posenet(model_pos, data_dict['train_det2d3d_loader'],
                          posenet_optimizer, criterion, device)
            h36m_p1, h36m_p2, dhp_p1, dhp_p2 = evaluate_posenet(args,
                                                                data_dict,
                                                                model_pos,
                                                                model_pos_eval,
                                                                device,
                                                                summary,
                                                                writer,
                                                                tag='_real')
        # Update learning rates
        ########################
        poseaug_dict['scheduler_G'].step()
        poseaug_dict['scheduler_d3d'].step()
        poseaug_dict['scheduler_d2d'].step()
        posenet_lr_scheduler.step()
        lr_now = posenet_optimizer.param_groups[0]['lr']
        print('\nEpoch: %d | LR: %.8f' % (summary.epoch, lr_now))

        # Update log file
        logger.append(
            [summary.epoch, lr_now, h36m_p1, h36m_p2, dhp_p1, dhp_p2])

        # Update checkpoint
        if dhpp1_best is None or dhpp1_best > dhp_p1:
            dhpp1_best = dhp_p1
            logger.record_args(
                "==> Saving checkpoint at epoch '{}', with dhp_p1 {}".format(
                    summary.epoch, dhpp1_best))
            save_ckpt(
                {
                    'epoch': summary.epoch,
                    'model_pos': model_pos.state_dict()
                },
                args.checkpoint,
                suffix='best_dhp_p1')

        if s911p1_best is None or s911p1_best > h36m_p1:
            s911p1_best = h36m_p1
            logger.record_args(
                "==> Saving checkpoint at epoch '{}', with s911p1 {}".format(
                    summary.epoch, s911p1_best))
            save_ckpt(
                {
                    'epoch': summary.epoch,
                    'model_pos': model_pos.state_dict()
                },
                args.checkpoint,
                suffix='best_h36m_p1')

        summary.summary_epoch_update()

    writer.close()
    logger.close()
Exemplo n.º 8
0
    def train(self):
        previous_loss = 1e10  # 上次学习的loss
        lr = self.opt.lr
        perf_scores_history = []
        pylogger = PythonLogger(msglogger)
        self.train_load_model()
        self.load_compress()
        self.create_write()
        lr_scheduler = get_scheduler(self.optimizer, opt)
        for epoch in range(self.start_epoch, self.opt.max_epoch):
            self.model.train()
            self.load_data()
            if self.opt.pruning:
                self.compression_scheduler.on_epoch_begin(epoch)  # epoch 开始修剪
            self.train_losses.reset()  # 重置仪表
            self.train_top1.reset()  # 重置仪表
            # print('训练数据集大小', len(train_dataloader))
            total_samples = len(self.train_dataloader.sampler)
            steps_per_epoch = math.ceil(total_samples / self.opt.batch_size)
            train_progressor = ProgressBar(mode="Train  ",
                                           epoch=epoch,
                                           total_epoch=self.opt.max_epoch,
                                           model_name=self.opt.model,
                                           total=len(self.train_dataloader))
            lr = lr_scheduler.get_lr()
            for ii, (data, labels,
                     img_path) in enumerate(self.train_dataloader):
                if self.opt.pruning:
                    self.compression_scheduler.on_minibatch_begin(
                        epoch, ii, steps_per_epoch,
                        self.optimizer)  # batch 开始修剪
                train_progressor.current = ii + 1  # 训练集当前进度
                # train model
                input = data.to(self.opt.device)
                target = labels.to(self.opt.device)
                score = self.model(input)  # 网络结构返回值
                loss = self.criterion(score, target)  # 计算损失
                if self.opt.pruning:
                    # Before running the backward phase, we allow the scheduler to modify the loss
                    # (e.g. add regularization loss)
                    agg_loss = self.compression_scheduler.before_backward_pass(
                        epoch,
                        ii,
                        steps_per_epoch,
                        loss,
                        optimizer=self.optimizer,
                        return_loss_components=True)  # 模型修建误差
                    loss = agg_loss.overall_loss
                self.train_losses.update(loss.item(), input.size(0))
                # loss = criterion(score[0], target)  # 计算损失   Inception3网络
                self.optimizer.zero_grad()  # 参数梯度设成0
                loss.backward()  # 反向传播
                self.optimizer.step()  # 更新参数

                if opt.pruning:
                    self.compression_scheduler.on_minibatch_end(
                        epoch, ii, steps_per_epoch,
                        self.optimizer)  # batch 结束修剪

                precision1_train, precision5_train = accuracy(
                    score, target, topk=(1, 5))  # top1 和 top5 的准确率

                # precision1_train, precision2_train = accuracy(score[0], target, topk=(1, 2))  # Inception3网络
                self.train_losses.update(loss.item(), input.size(0))
                self.train_top1.update(precision1_train[0].item(),
                                       input.size(0))
                self.train_top5.update(precision5_train[0].item(),
                                       input.size(0))
                train_progressor.current_loss = self.train_losses.avg
                train_progressor.current_top1 = self.train_top1.avg
                train_progressor.current_top5 = self.train_top5.avg
                train_progressor()  # 打印进度
                if (ii + 1) % self.opt.print_freq == 0:
                    self.visualization_train(input, ii, epoch)
            if self.opt.pruning:
                distiller.log_weights_sparsity(self.model,
                                               epoch,
                                               loggers=[pylogger])  # 打印模型修剪结果
                self.compression_scheduler.on_epoch_end(
                    epoch, self.optimizer)  # epoch 结束修剪
            val_loss, val_top1, val_top5 = val(self.model, self.criterion,
                                               self.val_dataloader, epoch,
                                               self.value_writer)  # 校验模型
            sparsity = distiller.model_sparsity(self.model)
            perf_scores_history.append(
                distiller.MutableNamedTuple(
                    {
                        'sparsity': sparsity,
                        'top1': val_top1,
                        'top5': val_top5,
                        'epoch': epoch + 1,
                        'lr': lr,
                        'loss': val_loss
                    }, ))
            # 保持绩效分数历史记录从最好到最差的排序
            # 按稀疏度排序为主排序键,然后按top1、top5、epoch排序
            perf_scores_history.sort(key=operator.attrgetter(
                'sparsity', 'top1', 'top5', 'epoch'),
                                     reverse=True)
            for score in perf_scores_history[:1]:
                msglogger.info(
                    '==> Best [Top1: %.3f   Top5: %.3f   Sparsity: %.2f on epoch: %d   Lr: %f   Loss: %f]',
                    score.top1, score.top5, score.sparsity, score.epoch, lr,
                    score.loss)

            is_best = epoch == perf_scores_history[
                0].epoch  # 当前epoch 和最佳epoch 一样
            self.best_precision = max(perf_scores_history[0].top1,
                                      self.best_precision)  # 最大top1 准确率
            if is_best:
                self.train_save_model(epoch, val_loss, val_top1, val_top5)
            # update learning rate
            lr = lr_scheduler.get_lr()
            # # 如果训练误差比上次大 降低学习效率
            # if self.train_losses.val > previous_loss:
            #     lr = lr * self.opt.lr_decay
            #     # 当loss大于上一次loss,降低学习率
            #     for param_group in self.optimizer.param_groups:
            #         param_group['lr'] = lr
            #
            # previous_loss = self.train_losses.val
            t.cuda.empty_cache()
Exemplo n.º 9
0
    model = get_model(cfg, [l1_cls_num, l2_cls_num], device, logger)
    if cfg.TRAIN_STAGE == 2:
        last_stage_weight_path = os.path.join(model_dir, 'best_model_stage1.pth')
        load_weight(model, last_stage_weight_path)
        model.module.freeze_backbone()
        model.module.freeze_classifer(0)
    elif cfg.TRAIN_STAGE == 1:
        last_stage_weight_path = os.path.join(args.pretrained_path)
        load_weight(model, last_stage_weight_path)
        model.module.freeze_backbone()
        model.module.freeze_classifer(1)

    # load_pretrained_weight(model, args.pretrained_path)
    combiner = Combiner(cfg, device)
    optimizer = get_optimizer(cfg, model)
    scheduler = get_scheduler(cfg, optimizer)
    # ----- END MODEL BUILDER -----

    trainLoader = DataLoader(
        train_set,
        batch_size=cfg.TRAIN.BATCH_SIZE,
        shuffle=cfg.TRAIN.SHUFFLE,
        num_workers=cfg.TRAIN.NUM_WORKERS,
        pin_memory=cfg.PIN_MEMORY,
        drop_last=True
    )

    validLoader = DataLoader(
        valid_set,
        batch_size=cfg.TEST.BATCH_SIZE,
        shuffle=False,
    def __init__(self, opt):
        super(DDPModel, self).__init__()

        self.gpu_ids = opt.gpu_ids
        self.device = torch.device(
            'cuda:{}'.format(opt.gpu_ids[0])) if opt.gpu_ids else torch.device(
                'cpu')  # get device name: CPU or GPU
        self.save_dir = os.path.join(
            opt.checkpoints_dir,
            opt.name)  # save all the checkpoints to save_dir
        if not os.path.exists(self.save_dir):
            os.makedirs(self.save_dir)
            print('Directory created: %s' % self.save_dir)

        # define schedule
        if opt.beta_schedule == 'cosine':
            """ Cosine Schedule
                @inproceedings{
                    anonymous2021improved,
                    title={Improved Denoising Diffusion Probabilistic Models},
                    author={Anonymous},
                    booktitle={Submitted to International Conference on Learning Representations},
                    year={2021},
                    url={https://openreview.net/forum?id=-NEXDKk8gZ},
                    note={under review}
                }
            """
            s = 0.008
            x = np.linspace(0, opt.num_timesteps - 1, opt.num_timesteps - 1)
            alphas_cumprod = np.cos(
                ((x / opt.num_timesteps) + s) / (1 + s) * np.pi * 0.5)**2
            alphas_cumprod = alphas_cumprod / alphas_cumprod[0]
            alphas_cumprod[alphas_cumprod > 0.999999] = 0.999999
            alphas_cumprod_prev = np.append(1., alphas_cumprod[:-1])
            betas = np.clip(1 - (alphas_cumprod / alphas_cumprod_prev),
                            a_min=0,
                            a_max=0.999)
            alphas = 1. - betas
        else:
            betas = self.get_beta_schedule(opt.beta_schedule, opt.beta_start,
                                           opt.beta_end, opt.num_timesteps)
            alphas = 1. - betas
            alphas_cumprod = np.cumprod(alphas, axis=0)
            alphas_cumprod_prev = np.append(1., alphas_cumprod[:-1])
        assert alphas_cumprod_prev.shape == betas.shape

        assert isinstance(
            betas, np.ndarray) and (betas >= 0).all() and (betas <= 1).all()
        timesteps, = betas.shape
        self.num_timesteps = int(timesteps)

        self.betas = torch.tensor(betas)
        self.alphas_cumprod = torch.tensor(alphas_cumprod)
        self.alphas_cumprod_prev = torch.tensor(alphas_cumprod_prev)

        # calculations for diffusion q(x_t | x_{t-1}) and others
        self.sqrt_alphas_cumprod = torch.tensor(np.sqrt(alphas_cumprod),
                                                dtype=torch.float32,
                                                device=self.device)
        self.sqrt_one_minus_alphas_cumprod = torch.tensor(
            np.sqrt(1. - alphas_cumprod),
            dtype=torch.float32,
            device=self.device)
        self.log_one_minus_alphas_cumprod = torch.tensor(
            np.log(1. - alphas_cumprod),
            dtype=torch.float32,
            device=self.device)
        self.sqrt_recip_alphas_cumprod = torch.tensor(np.sqrt(1. /
                                                              alphas_cumprod),
                                                      dtype=torch.float32,
                                                      device=self.device)
        self.sqrt_recipm1_alphas_cumprod = torch.tensor(
            np.sqrt(1. / alphas_cumprod - 1),
            dtype=torch.float32,
            device=self.device)

        # calculations for posterior q(x_{t-1} | x_t, x_0)
        posterior_variance = betas * (1. - alphas_cumprod_prev) / (
            1. - alphas_cumprod)
        # above: equal to 1. / (1. / (1. - alpha_cumprod_tm1) + alpha_t / beta_t)
        self.posterior_variance = torch.tensor(posterior_variance,
                                               dtype=torch.float32,
                                               device=self.device)
        # below: log calculation clipped because the posterior variance is 0 at the beginning of the diffusion chain
        self.posterior_log_variance_clipped = torch.tensor(np.log(
            np.maximum(posterior_variance, 1e-20)),
                                                           dtype=torch.float32,
                                                           device=self.device)
        self.posterior_mean_coef1 = torch.tensor(
            betas * np.sqrt(alphas_cumprod_prev) / (1. - alphas_cumprod),
            dtype=torch.float32,
            device=self.device)
        self.posterior_mean_coef2 = torch.tensor(
            (1. - alphas_cumprod_prev) * np.sqrt(alphas) /
            (1. - alphas_cumprod),
            dtype=torch.float32,
            device=self.device)

        # setup denoise model
        model = []
        if opt.block_size != 1:
            model += [utils.SpaceToDepth(opt.block_size)]
        model += [
            unet.Unet(opt.input_nc,
                      opt.input_nc,
                      num_middles=1,
                      ngf=opt.ngf,
                      norm=opt.norm,
                      activation=opt.activation,
                      use_dropout=opt.dropout,
                      use_attention=opt.attention,
                      device=self.device)
        ]
        if opt.block_size != 1:
            model += [utils.SpaceToDepth(opt.block_size)]
        self.denoise_model = utils.init_net(nn.Sequential(*model),
                                            opt.init_type, opt.init_gain,
                                            opt.gpu_ids)

        if opt.phase == 'train':
            # setup optimizer, visualizer, and learning rate scheduler
            self.optimizer = torch.optim.Adam(self.denoise_model.parameters(),
                                              lr=opt.lr,
                                              betas=(opt.beta1, 0.999))
            if 'mse' in opt.loss_type:
                self.loss_criteria = nn.MSELoss()
            elif 'l1' in opt.loss_type:
                self.loss_criteria = nn.L1Loss()
            else:
                raise NotImplementedError(opt.loss_type)
            # set prediction function
            if 'noisepred' in opt.loss_type:
                self.pred_fn = DDPModel._noisepred
            else:
                raise NotImplementedError(opt.loss_type)
            self.loss_type = opt.loss_type
            self.visualizer = visualizer.Visualizer(opt)
            self.scheduler = utils.get_scheduler(self.optimizer, opt)
            self.lr_policy = opt.lr_policy
        else:
            self.image_size = (opt.batch_size, opt.input_nc, opt.load_size,
                               opt.load_size)
            self.denoise_model.train(False)
            # set prediction function
            if 'noisepred' in opt.loss_type:
                self.pred_fn = self.predict_start_from_noise
            else:
                raise NotImplementedError(opt.loss_type)

        if opt.phase == 'interpolate':
            self.mix_rate = opt.mix_rate