Пример #1
0
def set_running_statistics(model, data_loader, distributed=False):
    bn_mean = {}
    bn_var = {}

    forward_model = copy.deepcopy(model)
    for name, m in forward_model.named_modules():
        if isinstance(m, nn.BatchNorm2d):
            if distributed:
                bn_mean[name] = DistributedTensor(name + '#mean')
                bn_var[name] = DistributedTensor(name + '#var')
            else:
                bn_mean[name] = AverageMeter()
                bn_var[name] = AverageMeter()

            def new_forward(bn, mean_est, var_est):
                def lambda_forward(x):
                    batch_mean = x.mean(0, keepdim=True).mean(
                        2, keepdim=True).mean(3, keepdim=True)  # 1, C, 1, 1
                    batch_var = (x - batch_mean) * (x - batch_mean)
                    batch_var = batch_var.mean(0, keepdim=True).mean(
                        2, keepdim=True).mean(3, keepdim=True)

                    batch_mean = torch.squeeze(batch_mean)
                    batch_var = torch.squeeze(batch_var)

                    mean_est.update(batch_mean.data, x.size(0))
                    var_est.update(batch_var.data, x.size(0))

                    # bn forward using calculated mean & var
                    _feature_dim = batch_mean.size(0)
                    return F.batch_norm(
                        x,
                        batch_mean,
                        batch_var,
                        bn.weight[:_feature_dim],
                        bn.bias[:_feature_dim],
                        False,
                        0.0,
                        bn.eps,
                    )

                return lambda_forward

            m.forward = new_forward(m, bn_mean[name], bn_var[name])

    with torch.no_grad():
        DynamicBatchNorm2d.SET_RUNNING_STATISTICS = True
        for images in data_loader:
            images = images['image'].to(get_net_device(forward_model))
            forward_model(images)
        DynamicBatchNorm2d.SET_RUNNING_STATISTICS = False

    for name, m in model.named_modules():
        if name in bn_mean and bn_mean[name].count > 0:
            feature_dim = bn_mean[name].avg.size(0)
            assert isinstance(m, nn.BatchNorm2d)
            m.running_mean.data[:feature_dim].copy_(bn_mean[name].avg)
            m.running_var.data[:feature_dim].copy_(bn_var[name].avg)
Пример #2
0
def validate(net, path, image_size, data_loader, batch_size=100, device='cuda:0'):
    if 'cuda' in device:
        net = torch.nn.DataParallel(net).to(device)
    else:
        net = net.to(device)

    data_loader.dataset.transform = transforms.Compose([
        transforms.Resize(int(math.ceil(image_size / 0.875))),
        transforms.CenterCrop(image_size),
        transforms.ToTensor(),
        transforms.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225]
        ),
    ])

    cudnn.benchmark = True
    criterion = nn.CrossEntropyLoss().to(device)

    net.eval()
    net = net.to(device)
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    with torch.no_grad():
        with tqdm(total=len(data_loader), desc='Validate') as t:
            for i, (images, labels) in enumerate(data_loader):
                images, labels = images.to(device), labels.to(device)
                # compute output
                output = net(images)

                # # ? pc: handle abnormal labels
                # labels = labels-1
                # labels[labels<0]=0
                # print('-'*20)
                # print('MIN: %d | MAX: %d'%(min(labels), max(labels)))
                # loss = criterion(output, labels)

                # measure accuracy and record loss
                acc1, acc5 = accuracy(output, labels, topk=(1, 5))

                # losses.update(loss.item(), images.size(0))
                top1.update(acc1[0].item(), images.size(0))
                top5.update(acc5[0].item(), images.size(0))
                t.set_postfix({
                    'loss': losses.avg,
                    'top1': top1.avg,
                    'top5': top5.avg,
                    'img_size': images.size(2),
                })
                t.update(1)

    
    print('Results: loss=%.5f,\t top1=%.1f,\t top5=%.1f' % (losses.avg, top1.avg, top5.avg))
    return top1.avg
Пример #3
0
    def validate(self,
                 epoch=0,
                 is_test=False,
                 run_str='',
                 net=None,
                 data_loader=None,
                 no_logs=False,
                 train_mode=False):
        if net is None:
            net = self.net
        if not isinstance(net, nn.DataParallel):
            net = nn.DataParallel(net)

        if data_loader is None:
            data_loader = self.run_config.test_loader if is_test else self.run_config.valid_loader

        if train_mode:
            net.train()
        else:
            net.eval()

        losses = AverageMeter()
        metric_dict = self.get_metric_dict()

        with torch.no_grad():
            with tqdm(total=len(data_loader),
                      desc='Validate Epoch #{} {}'.format(epoch + 1, run_str),
                      disable=no_logs) as t:
                for i, (images, labels) in enumerate(data_loader):
                    images, labels = images.to(self.device), labels.to(
                        self.device)
                    # compute output
                    output = net(images)
                    # print(f'actual size: {images.shape[-1]}')
                    loss = self.test_criterion(output, labels)
                    # measure accuracy and record loss
                    self.update_metric(metric_dict, output, labels)

                    losses.update(loss.item(), images.size(0))
                    t.set_postfix({
                        'loss':
                        losses.avg,
                        **self.get_metric_vals(metric_dict, return_dict=True),
                        'img_size':
                        images.size(2),
                    })
                    t.update(1)
        return losses.avg, self.get_metric_vals(metric_dict)
Пример #4
0
    def eval_one_epoch(self, args, epoch, warmup_epochs=0, warmup_lr=0):
        # switch to train mode
        self.net.train()
        MyRandomResizedCrop.EPOCH = epoch  # required by elastic resolution
        # nBatch = len(self.run_config.train_loader)

        losses = AverageMeter()
        metric_dict = self.get_metric_dict()
        data_time = AverageMeter()

        # with tqdm(total=nBatch, desc='{} Train Epoch #{}'.format(self.run_config.dataset, epoch + 1)) as t:
        end = time.time()
        for i, (images, labels) in enumerate(self.run_config.valid_loader):
            # for i, (images, labels) in enumerate(self.run_config.train_loader):
            #     if i >= 2:
            #         break
            MyRandomResizedCrop.BATCH = i
            data_time.update(time.time() - end)
            images, labels = images.to(self.device), labels.to(self.device)
            target = labels
            # compute output
            output = self.net(images)
            loss = self.train_criterion(output, labels)
            loss_type = 'ce'
            # compute gradient and do SGD step
            # self.model.zero_grad()  # or self.optimizer.zero_grad()
            loss.backward()
            # self.get_grads()
            # self.optimizer.step()
            # measure accuracy and record loss
            losses.update(loss.item(), images.size(0))
            self.update_metric(metric_dict, output, target)
            # t.set_postfix({
            #     'loss': losses.avg,
            #     **self.get_metric_vals(metric_dict, return_dict=True),
            #     'img_size': images.size(2),
            #     'loss_type': loss_type,
            #     'data_time': data_time.avg,
            # })
            # t.update(1)
            end = time.time()

            # self.get_grads()

        return losses.avg, self.get_metric_vals(metric_dict)
Пример #5
0
def train_one_epoch(run_manager, args, epoch, warmup_epochs=0, warmup_lr=0):
    dynamic_net = run_manager.net
    if isinstance(dynamic_net, nn.DataParallel):
        dynamic_net = dynamic_net.module

    # switch to train mode
    dynamic_net.train()
    # run_manager.run_config.train_loader.sampler.set_epoch(epoch)
    # MyRandomResizedCrop.EPOCH = epoch
    #################### Code for freezing BN. Overfitting 할 때는 주석 해제하면됨.
    # for m in dynamic_net.modules():
    #     if isinstance(m, nn.BatchNorm2d):
    #         ########## Use running mean/var
    #         m.eval()
    #         ########## BN weight/bias freeze
    #         # m.weight.requires_grad = False
    #         # m.bias.requires_grad = False

    nBatch = len(run_manager.run_config.train_loader)

    data_time = AverageMeter()
    # losses = DistributedMetric('train_loss')
    # top1 = DistributedMetric('train_top1')
    # top5 = DistributedMetric('train_top5')
    losses = AverageMeter()
    psnr_averagemeter = AverageMeter()

    with tqdm(total=nBatch, desc='Train Epoch #{}'.format(epoch + 1)) as t:
        end = time.time()
        for i, mini_batch in enumerate(run_manager.run_config.train_loader):
            images = mini_batch['image']
            #################### 2x or 4x 고르는 부분.
            x2_down_images = mini_batch['2x_down_image']
            x4_down_images = mini_batch['4x_down_image']
            data_time.update(time.time() - end)
            if epoch < warmup_epochs:
                new_lr = run_manager.run_config.warmup_adjust_learning_rate(
                    run_manager.optimizer,
                    warmup_epochs * nBatch,
                    nBatch,
                    epoch,
                    i,
                    warmup_lr,
                )
            else:
                new_lr = run_manager.run_config.adjust_learning_rate(
                    run_manager.optimizer, epoch - warmup_epochs, i, nBatch)

            images = images.cuda()
            #################### 2x or 4x 고르는 부분.
            x2_down_images = x2_down_images.cuda()
            x4_down_images = x4_down_images.cuda()
            target = images

            # soft target
            if args.kd_ratio > 0:
                args.teacher_model.train()
                with torch.no_grad():
                    soft_logits = args.teacher_model(images).detach()
                    soft_label = F.softmax(soft_logits, dim=1)

            # clear gradients
            run_manager.optimizer.zero_grad()

            loss_of_subnets, psnr_of_subnets = [], []
            # compute output
            subnet_str = ''
            for _ in range(args.dynamic_batch_size):

                # set random seed before sampling
                if args.independent_distributed_sampling:
                    subnet_seed = os.getpid() + time.time()
                else:
                    subnet_seed = int('%d%.3d%.3d' %
                                      (epoch * nBatch + i, _, 0))
                random.seed(subnet_seed)

                #################### Random Sampling과 Structured Sampling중에 주석 바꿔가면서 고르면 됨. Single Architecture Overfitting을 위해서 여기 수정해주면 가능.
                subnet_settings = dynamic_net.sample_active_subnet()
                # dynamic_net.set_active_subnet(ks=7, e=3, d=2, pixel_d=1)

                subnet_str += '%d: ' % _ + ','.join([
                    '%s_%s' %
                    (key, '%.1f' %
                     subset_mean(val, 0) if isinstance(val, list) else val)
                    for key, val in subnet_settings.items()
                ]) + ' || '

                #################### 2x or 4x 고르는 부분.
                # output = run_manager.net(images)
                if subnet_settings['pixel_d'][0] == 1:
                    output = run_manager.net(x2_down_images)
                elif subnet_settings['pixel_d'][0] == 2:
                    output = run_manager.net(x4_down_images)

                if args.kd_ratio == 0:
                    loss = run_manager.train_criterion(output, images)
                    loss_type = 'mse'
                else:
                    if args.kd_type == 'ce':
                        kd_loss = cross_entropy_loss_with_soft_target(
                            output, soft_label)
                    else:
                        kd_loss = F.mse_loss(output, soft_logits)
                    loss = args.kd_ratio * kd_loss + run_manager.train_criterion(
                        output, labels)
                    loss = loss * (2 / (args.kd_ratio + 1))
                    loss_type = '%.1fkd-%s & ce' % (args.kd_ratio,
                                                    args.kd_type)

                # measure accuracy and record loss
                # acc1, acc5 = accuracy(output, target, topk=(1, 5))
                psnr_current = psnr(rgb2y(tensor2img_np(output)),
                                    rgb2y(tensor2img_np(images)))
                loss_of_subnets.append(loss)
                # acc1_of_subnets.append(acc1[0])
                # acc5_of_subnets.append(acc5[0])
                psnr_of_subnets.append(psnr_current)

                loss.backward()
            run_manager.optimizer.step()

            losses.update(list_mean(loss_of_subnets), images.size(0))
            # top1.update(list_mean(acc1_of_subnets), images.size(0))
            # top5.update(list_mean(acc5_of_subnets), images.size(0))
            psnr_averagemeter.update(list_mean(psnr_of_subnets),
                                     images.size(0))

            t.set_postfix({
                'loss': losses.avg.item(),
                # 'top1': top1.avg.item(),
                # 'top5': top5.avg.item(),
                'psnr': psnr_averagemeter.avg,
                'R': images.size(2),
                'lr': new_lr,
                'loss_type': loss_type,
                'seed': str(subnet_seed),
                'str': subnet_str,
                'data_time': data_time.avg,
            })
            t.update(1)
            end = time.time()
    return losses.avg.item(), psnr_averagemeter.avg
Пример #6
0
        datasets.ImageFolder(root=os.path.join(imagenet_data_path, 'val'),
                             # transform=build_val_transform(224)
                             ),
        batch_size=1,  # test batch size
        shuffle=True,
        num_workers=16,  # number of workers for the data loader
        pin_memory=True,
        drop_last=False,
    )
    print('The ImageNet dataloader is ready.')
else:
    print(
        'Since GPU is not found in the environment, we skip all scripts related to ImageNet evaluation.'
    )

gpu_ava_delay = AverageMeter()
cpu_ava_delay = AverageMeter()
resolution = [160, 176, 192, 208, 224]
onnxpath = './onnxs/tmp2.onnx'
csv_f = open('./dataset/tmep.csv', 'w', encoding='utf-8', newline='')
csv_writer = csv.writer(csv_f)
csv_writer.writerow([
    'arch_config',
    'gpu latency',
    'cpu latency',
])

archmanager = myArchManager()
# configs = getconfig(archmanager.depths,archmanager.resolutions)
configs = []
for item in csv.reader(
Пример #7
0
    def train_one_epoch(self, args, epoch, warmup_epochs=0, warmup_lr=0):
        # switch to train mode
        self.net.train()
        MyRandomResizedCrop.EPOCH = epoch  # required by elastic resolution

        nBatch = len(self.run_config.train_loader)

        losses = AverageMeter()
        metric_dict = self.get_metric_dict()
        data_time = AverageMeter()

        with tqdm(total=nBatch,
                  desc='{} Train Epoch #{}'.format(self.run_config.dataset,
                                                   epoch + 1)) as t:
            end = time.time()
            for i, (images, labels) in enumerate(self.run_config.train_loader):
                MyRandomResizedCrop.BATCH = i
                data_time.update(time.time() - end)
                if epoch < warmup_epochs:
                    new_lr = self.run_config.warmup_adjust_learning_rate(
                        self.optimizer,
                        warmup_epochs * nBatch,
                        nBatch,
                        epoch,
                        i,
                        warmup_lr,
                    )
                else:
                    new_lr = self.run_config.adjust_learning_rate(
                        self.optimizer, epoch - warmup_epochs, i, nBatch)

                images, labels = images.to(self.device), labels.to(self.device)
                target = labels
                if isinstance(self.run_config.mixup_alpha, float):
                    # transform data
                    lam = random.betavariate(self.run_config.mixup_alpha,
                                             self.run_config.mixup_alpha)
                    images = mix_images(images, lam)
                    labels = mix_labels(
                        labels, lam, self.run_config.data_provider.n_classes,
                        self.run_config.label_smoothing)

                # soft target
                if args.teacher_model is not None:
                    args.teacher_model.train()
                    with torch.no_grad():
                        soft_logits = args.teacher_model(images).detach()
                        soft_label = F.softmax(soft_logits, dim=1)

                # compute output
                output = self.net(images)
                loss = self.train_criterion(output, labels)

                if args.teacher_model is None:
                    loss_type = 'ce'
                else:
                    if args.kd_type == 'ce':
                        kd_loss = cross_entropy_loss_with_soft_target(
                            output, soft_label)
                    else:
                        kd_loss = F.mse_loss(output, soft_logits)
                    loss = args.kd_ratio * kd_loss + loss
                    loss_type = '%.1fkd+ce' % args.kd_ratio

                # compute gradient and do SGD step
                self.net.zero_grad()  # or self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()

                # measure accuracy and record loss
                losses.update(loss.item(), images.size(0))
                self.update_metric(metric_dict, output, target)

                t.set_postfix({
                    'loss':
                    losses.avg,
                    **self.get_metric_vals(metric_dict, return_dict=True),
                    'img_size':
                    images.size(2),
                    'lr':
                    new_lr,
                    'loss_type':
                    loss_type,
                    'data_time':
                    data_time.avg,
                })
                t.update(1)
                end = time.time()
        return losses.avg, self.get_metric_vals(metric_dict)
Пример #8
0
 def get_metric_dict(self):
     return {
         'top1': AverageMeter(),
         'top5': AverageMeter(),
     }
Пример #9
0
    def train_one_epoch(self, args, epoch, warmup_epochs=0, warmup_lr=0):
        # switch to train mode
        self.net.train()
        #################### Code for freezing BN. Overfitting 할 때는 주석 해제하면됨.
        for m in self.net.modules():
            if isinstance(m, nn.BatchNorm2d):
                ########## Use running mean/var
                m.eval()
                ########## BN weight/bias freeze
                # m.weight.requires_grad = False
                # m.bias.requires_grad = False

        nBatch = len(self.run_config.train_loader)

        losses = AverageMeter()
        # top1 = AverageMeter()
        # top5 = AverageMeter()
        psnr_averagemeter = AverageMeter()
        data_time = AverageMeter()

        with tqdm(total=nBatch, desc='Train Epoch #{}'.format(epoch + 1)) as t:
            end = time.time()
            for i, mini_batch in enumerate(self.run_config.train_loader):
                images = mini_batch['image']
                #################### 2x or 4x 고르는 부분.
                x2_down_images = mini_batch['2x_down_image']
                # x4_down_images = mini_batch['4x_down_image']
                data_time.update(time.time() - end)
                if epoch < warmup_epochs:
                    new_lr = self.run_config.warmup_adjust_learning_rate(
                        self.optimizer,
                        warmup_epochs * nBatch,
                        nBatch,
                        epoch,
                        i,
                        warmup_lr,
                    )
                else:
                    new_lr = self.run_config.adjust_learning_rate(
                        self.optimizer, epoch - warmup_epochs, i, nBatch)

                images = images.to(self.device)
                #################### 2x or 4x 고르는 부분.
                x2_down_images = x2_down_images.to(self.device)
                # x4_down_images = x4_down_images.to(self.device)
                target = images

                # soft target
                if args.teacher_model is not None:
                    args.teacher_model.train()
                    with torch.no_grad():
                        soft_logits = args.teacher_model(images).detach()
                        # soft_label = F.softmax(soft_logits, dim=1)

                # compute output
                if isinstance(self.network, torchvision.models.Inception3):
                    output, aux_outputs = self.net(images)
                    loss1 = self.train_criterion(output, labels)
                    loss2 = self.train_criterion(aux_outputs, labels)
                    loss = loss1 + 0.4 * loss2
                else:
                    #################### 2x or 4x 고르는 부분.
                    output = self.net(x2_down_images)
                    # output = self.net(x4_down_images)
                    loss = self.train_criterion(output, images)

                if args.teacher_model is None:
                    loss_type = 'mse'
                else:
                    if args.kd_type == 'ce':
                        kd_loss = cross_entropy_loss_with_soft_target(
                            output, soft_label)
                    else:
                        kd_loss = F.mse_loss(output, soft_logits)
                    loss = args.kd_ratio * kd_loss + loss
                    loss_type = '%.1fkd-%s & mse' % (args.kd_ratio,
                                                     args.kd_type)

                # compute gradient and do SGD step
                self.net.zero_grad()  # or self.optimizer.zero_grad()
                if self.mix_prec is not None:
                    from apex import amp
                    with amp.scale_loss(loss, self.optimizer) as scaled_loss:
                        scaled_loss.backward()
                else:
                    loss.backward()
                self.optimizer.step()

                # measure accuracy and record loss
                # acc1, acc5 = accuracy(output, target, topk=(1, 5))
                psnr_current = psnr(rgb2y(tensor2img_np(output)),
                                    rgb2y(tensor2img_np(images)))
                losses.update(loss.item(), images.size(0))
                # top1.update(acc1[0].item(), images.size(0))
                # top5.update(acc5[0].item(), images.size(0))
                psnr_averagemeter.update(psnr_current, images.size(0))

                t.set_postfix({
                    'loss': losses.avg,
                    # 'top1': top1.avg,
                    # 'top5': top5.avg,
                    'psnr': psnr_averagemeter.avg,
                    'img_size': images.size(2),
                    'lr': new_lr,
                    'loss_type': loss_type,
                    'data_time': data_time.avg,
                })
                t.update(1)
                end = time.time()
        return losses.avg, psnr_averagemeter.avg
    def train_one_epoch(self, args, epoch, warmup_epochs=5, warmup_lr=0):
        self.net.train()
        self.run_config.train_loader.sampler.set_epoch(epoch)
        MyRandomResizedCrop.EPOCH = epoch

        nBatch = len(self.run_config.train_loader)

        losses = DistributedMetric('train_loss')
        top1 = DistributedMetric('train_top1')
        top5 = DistributedMetric('train_top5')
        data_time = AverageMeter()

        with tqdm(total=nBatch,
                  desc='Train Epoch #{}'.format(epoch + 1),
                  disable=not self.is_root) as t:
            end = time.time()
            for i, (images, labels) in enumerate(self.run_config.train_loader):
                data_time.update(time.time() - end)
                if epoch < warmup_epochs:
                    new_lr = self.run_config.warmup_adjust_learning_rate(
                        self.optimizer, warmup_epochs * nBatch, nBatch, epoch, i, warmup_lr,
                    )
                else:
                    new_lr = self.run_config.adjust_learning_rate(self.optimizer, epoch - warmup_epochs, i, nBatch)

                images, labels = images.cuda(), labels.cuda()
                target = labels

                # soft target
                if args.teacher_model is not None:
                    args.teacher_model.train()
                    with torch.no_grad():
                        soft_logits = args.teacher_model(images).detach()
                        soft_label = F.softmax(soft_logits, dim=1)

                # compute output
                output = self.net(images)

                if args.teacher_model is None:
                    loss = self.train_criterion(output, labels)
                    loss_type = 'ce'
                else:
                    if args.kd_type == 'ce':
                        kd_loss = cross_entropy_loss_with_soft_target(output, soft_label)
                    else:
                        kd_loss = F.mse_loss(output, soft_logits)
                    loss = args.kd_ratio * kd_loss + self.train_criterion(output, labels)
                    loss_type = '%.1fkd-%s & ce' % (args.kd_ratio, args.kd_type)

                # update
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()

                # measure accuracy and record loss
                acc1, acc5 = accuracy(output, target, topk=(1, 5))
                losses.update(loss, images.size(0))
                top1.update(acc1[0], images.size(0))
                top5.update(acc5[0], images.size(0))

                t.set_postfix({
                    'loss': losses.avg.item(),
                    'top1': top1.avg.item(),
                    'top5': top5.avg.item(),
                    'img_size': images.size(2),
                    'lr': new_lr,
                    'loss_type': loss_type,
                    'data_time': data_time.avg,
                })
                t.update(1)
                end = time.time()

        return losses.avg.item(), top1.avg.item(), top5.avg.item()
Пример #11
0
    def train_one_epoch(self, args, epoch, warmup_epochs=0, warmup_lr=0):
        # switch to train mode
        self.net.train()

        nBatch = len(self.run_config.train_loader)

        losses = AverageMeter()
        top1 = AverageMeter()
        top5 = AverageMeter()
        data_time = AverageMeter()

        with tqdm(total=nBatch,
                  desc='Train Epoch #{}'.format(epoch + 1)) as t:
            end = time.time()
            for i, (images, labels) in enumerate(self.run_config.train_loader):
                data_time.update(time.time() - end)
                if epoch < warmup_epochs:
                    new_lr = self.run_config.warmup_adjust_learning_rate(
                        self.optimizer, warmup_epochs * nBatch, nBatch, epoch, i, warmup_lr,
                    )
                else:
                    new_lr = self.run_config.adjust_learning_rate(self.optimizer, epoch - warmup_epochs, i, nBatch)

                images, labels = images.to(self.device), labels.to(self.device)
                target = labels

                # soft target
                if args.teacher_model is not None:
                    args.teacher_model.train()
                    with torch.no_grad():
                        soft_logits = args.teacher_model(images).detach()
                        soft_label = F.softmax(soft_logits, dim=1)

                # compute output
                if isinstance(self.network, torchvision.models.Inception3):
                    output, aux_outputs = self.net(images)
                    loss1 = self.train_criterion(output, labels)
                    loss2 = self.train_criterion(aux_outputs, labels)
                    loss = loss1 + 0.4 * loss2
                else:
                    output = self.net(images)
                    loss = self.train_criterion(output, labels)

                if args.teacher_model is None:
                    loss_type = 'ce'
                else:
                    if args.kd_type == 'ce':
                        kd_loss = cross_entropy_loss_with_soft_target(output, soft_label)
                    else:
                        kd_loss = F.mse_loss(output, soft_logits)
                    loss = args.kd_ratio * kd_loss + loss
                    loss_type = '%.1fkd-%s & ce' % (args.kd_ratio, args.kd_type)

                # compute gradient and do SGD step
                self.net.zero_grad()  # or self.optimizer.zero_grad()
                if self.mix_prec is not None:
                    from apex import amp
                    with amp.scale_loss(loss, self.optimizer) as scaled_loss:
                        scaled_loss.backward()
                else:
                    loss.backward()
                self.optimizer.step()

                # measure accuracy and record loss
                acc1, acc5 = accuracy(output, target, topk=(1, 5))
                losses.update(loss.item(), images.size(0))
                top1.update(acc1[0].item(), images.size(0))
                top5.update(acc5[0].item(), images.size(0))

                t.set_postfix({
                    'loss': losses.avg,
                    'top1': top1.avg,
                    'top5': top5.avg,
                    'img_size': images.size(2),
                    'lr': new_lr,
                    'loss_type': loss_type,
                    'data_time': data_time.avg,
                })
                t.update(1)
                end = time.time()
        return losses.avg, top1.avg, top5.avg
Пример #12
0
    def validate(self, epoch=0, is_test=True, run_str='', net=None, data_loader=None, no_logs=False):
        if net is None:
            net = self.net
        if not isinstance(net, nn.DataParallel):
            net = nn.DataParallel(net)

        if data_loader is None:
            if is_test:
                data_loader = self.run_config.test_loader
            else:
                data_loader = self.run_config.valid_loader

        net.eval()

        losses = AverageMeter()
        top1 = AverageMeter()
        top5 = AverageMeter()

        with torch.no_grad():
            with tqdm(total=len(data_loader),
                      desc='Validate Epoch #{} {}'.format(epoch + 1, run_str), disable=no_logs) as t:
                for i, (images, labels) in enumerate(data_loader):
                    images, labels = images.to(self.device), labels.to(self.device)
                    # compute output
                    output = net(images)
                    loss = self.test_criterion(output, labels)
                    # measure accuracy and record loss
                    acc1, acc5 = accuracy(output, labels, topk=(1, 5))

                    losses.update(loss.item(), images.size(0))
                    top1.update(acc1[0].item(), images.size(0))
                    top5.update(acc5[0].item(), images.size(0))
                    t.set_postfix({
                        'loss': losses.avg,
                        'top1': top1.avg,
                        'top5': top5.avg,
                        'img_size': images.size(2),
                    })
                    t.update(1)
        return losses.avg, top1.avg, top5.avg
Пример #13
0
    def train_one_epoch(self, args, epoch, warmup_epochs=5, warmup_lr=0):
        self.net.train()
        # self.run_config.train_loader.sampler.set_epoch(epoch)  # required by distributed sampler
        MyRandomResizedCrop.EPOCH = epoch  # required by elastic resolution

        nBatch = len(self.run_config.train_loader)

        # losses = DistributedMetric('train_loss')
        losses = AverageMeter()
        metric_dict = self.get_metric_dict()
        data_time = AverageMeter()

        with tqdm(total=nBatch,
                  desc='Train Epoch #{}'.format(epoch + 1),
                  disable=not self.is_root) as t:
            end = time.time()
            for i, (images, labels) in enumerate(self.run_config.train_loader):
                MyRandomResizedCrop.BATCH = i
                data_time.update(time.time() - end)
                if epoch < warmup_epochs:
                    new_lr = self.run_config.warmup_adjust_learning_rate(
                        self.optimizer,
                        warmup_epochs * nBatch,
                        nBatch,
                        epoch,
                        i,
                        warmup_lr,
                    )
                else:
                    new_lr = self.run_config.adjust_learning_rate(
                        self.optimizer, epoch - warmup_epochs, i, nBatch)

                # images, labels = images.cuda(), labels.cuda()
                target = labels
                if isinstance(self.run_config.mixup_alpha, float):
                    # transform data
                    random.seed(int('%d%.3d' % (i, epoch)))
                    lam = random.betavariate(self.run_config.mixup_alpha,
                                             self.run_config.mixup_alpha)
                    images = mix_images(images, lam)
                    labels = mix_labels(
                        labels, lam, self.run_config.data_provider.n_classes,
                        self.run_config.label_smoothing)

                # soft target
                # if args.teacher_model is not None:
                #     args.teacher_model.train()
                #     with torch.no_grad():
                #         soft_logits = args.teacher_model(images).detach()
                #         soft_label = F.softmax(soft_logits, dim=1)

                # compute output
                output = self.net(images)

                # if args.teacher_model is None:
                loss = self.train_criterion(output, labels)
                loss_type = 'ce'
                # else:
                #     if args.kd_type == 'ce':
                #         kd_loss = cross_entropy_loss_with_soft_target(output, soft_label)
                #     else:
                #         kd_loss = F.mse_loss(output, soft_logits)
                #     loss = args.kd_ratio * kd_loss + self.train_criterion(output, labels)
                #     loss_type = '%.1fkd+ce' % args.kd_ratio

                # update
                self.optimizer.zero_grad()
                loss.backward()
                self.optimizer.step()

                # measure accuracy and record loss
                # losses.update(loss, images.size(0))
                # self.update_metric(metric_dict, output, target) #todo : update_metric 에 horovod 가 있음. 바꾸든지 이 메소드 안쓰든지 해야할

                # t.set_postfix({
                #     'loss': losses.avg.item(),
                #     **self.get_metric_vals(metric_dict, return_dict=True),
                #     'img_size': images.size(2),
                #     'lr': new_lr,
                #     'loss_type': loss_type,
                #     'data_time': data_time.avg,
                # })
                # t.update(1)
                # end = time.time()
                break

        # return losses.avg.item()#, self.get_metric_vals(metric_dict)
        return losses.avg  # , self.get_metric_vals(metric_dict)
def train_one_epoch(run_manager, args, epoch, warmup_epochs=0, warmup_lr=0):
    dynamic_net = run_manager.net

    # switch to train mode
    dynamic_net.train()
    run_manager.run_config.train_loader.sampler.set_epoch(epoch)
    MyRandomResizedCrop.EPOCH = epoch

    nBatch = len(run_manager.run_config.train_loader)

    data_time = AverageMeter()
    losses = DistributedMetric('train_loss')
    top1 = DistributedMetric('train_top1')
    top5 = DistributedMetric('train_top5')

    with tqdm(total=nBatch,
              desc='Train Epoch #{}'.format(epoch + 1),
              disable=not run_manager.is_root) as t:
        end = time.time()
        for i, (images,
                labels) in enumerate(run_manager.run_config.train_loader):
            data_time.update(time.time() - end)
            if epoch < warmup_epochs:
                new_lr = run_manager.run_config.warmup_adjust_learning_rate(
                    run_manager.optimizer,
                    warmup_epochs * nBatch,
                    nBatch,
                    epoch,
                    i,
                    warmup_lr,
                )
            else:
                new_lr = run_manager.run_config.adjust_learning_rate(
                    run_manager.optimizer, epoch - warmup_epochs, i, nBatch)

            images, labels = images.cuda(), labels.cuda()
            target = labels

            # soft target
            if args.kd_ratio > 0:
                args.teacher_model.train()
                with torch.no_grad():
                    soft_logits = args.teacher_model(images).detach()
                    soft_label = F.softmax(soft_logits, dim=1)

            # clear gradients
            run_manager.optimizer.zero_grad()

            loss_of_subnets, acc1_of_subnets, acc5_of_subnets = [], [], []
            # compute output
            subnet_str = ''
            for _ in range(args.dynamic_batch_size):

                # set random seed before sampling
                if args.independent_distributed_sampling:
                    subnet_seed = os.getpid() + time.time()
                else:
                    subnet_seed = int('%d%.3d%.3d' %
                                      (epoch * nBatch + i, _, 0))
                random.seed(subnet_seed)
                subnet_settings = dynamic_net.sample_active_subnet()
                subnet_str += '%d: ' % _ + ','.join([
                    '%s_%s' %
                    (key, '%.1f' %
                     subset_mean(val, 0) if isinstance(val, list) else val)
                    for key, val in subnet_settings.items()
                ]) + ' || '

                output = run_manager.net(images)
                if args.kd_ratio == 0:
                    loss = run_manager.train_criterion(output, labels)
                    loss_type = 'ce'
                else:
                    if args.kd_type == 'ce':
                        kd_loss = cross_entropy_loss_with_soft_target(
                            output, soft_label)
                    else:
                        kd_loss = F.mse_loss(output, soft_logits)
                    loss = args.kd_ratio * kd_loss + run_manager.train_criterion(
                        output, labels)
                    loss = loss * (2 / (args.kd_ratio + 1))
                    loss_type = '%.1fkd-%s & ce' % (args.kd_ratio,
                                                    args.kd_type)

                # measure accuracy and record loss
                acc1, acc5 = accuracy(output, target, topk=(1, 5))
                loss_of_subnets.append(loss)
                acc1_of_subnets.append(acc1[0])
                acc5_of_subnets.append(acc5[0])

                loss.backward()
            run_manager.optimizer.step()

            losses.update(list_mean(loss_of_subnets), images.size(0))
            top1.update(list_mean(acc1_of_subnets), images.size(0))
            top5.update(list_mean(acc5_of_subnets), images.size(0))

            t.set_postfix({
                'loss': losses.avg.item(),
                'top1': top1.avg.item(),
                'top5': top5.avg.item(),
                'R': images.size(2),
                'lr': new_lr,
                'loss_type': loss_type,
                'seed': str(subnet_seed),
                'str': subnet_str,
                'data_time': data_time.avg,
            })
            t.update(1)
            end = time.time()
    return losses.avg.item(), top1.avg.item(), top5.avg.item()
Пример #15
0
    def train_one_epoch(self, args, epoch, warmup_epochs=0, warmup_lr=0):
        # switch to train mode
        self.net.train()
        MyRandomResizedCrop.EPOCH = epoch  # required by elastic resolution

        nBatch = len(self.run_config.train_loader)

        losses = AverageMeter()
        metric_dict = self.get_metric_dict()
        data_time = AverageMeter()

        with tqdm(total=nBatch,
                  desc='{} Train Epoch #{}'.format(self.run_config.dataset, epoch + 1)) as t:
            end = time.time()
            para_loader = pl.ParallelLoader(self.run_config.train_loader, [self.device])
            para_loader = para_loader.per_device_loader(self.device)
            for i, (images, labels) in enumerate(para_loader):
                MyRandomResizedCrop.BATCH = i
                data_time.update(time.time() - end)
                if epoch < warmup_epochs:
                    new_lr = self.run_config.warmup_adjust_learning_rate(
                        self.optimizer, warmup_epochs * nBatch, nBatch, epoch, i, warmup_lr,
                    )
                else:
                    new_lr = self.run_config.adjust_learning_rate(self.optimizer, epoch - warmup_epochs, i, nBatch)
                new_lr *= xm.xrt_world_size()

                target = labels
                if isinstance(self.run_config.mixup_alpha, float):
                    # transform data
                    lam = random.betavariate(self.run_config.mixup_alpha, self.run_config.mixup_alpha)
                    images = mix_images(images, lam)
                    labels = mix_labels(
                        labels, lam, self.run_config.data_provider.n_classes, self.run_config.label_smoothing
                    )
                images = images.to(self.device)
                labels = labels.to(self.device)

                # compute output
                output = self.net(images)
                loss = self.train_criterion(output, labels)

                # if args.teacher_model is None:
                loss_type = 'ce'

                # compute gradient and do SGD step
                self.net.zero_grad()  # or self.optimizer.zero_grad()
                loss.backward()
                # self.optimizer.step()
                xm.optimizer_step(self.optimizer)

                # measure accuracy and record loss
                losses.update(loss.item(), images.size(0))
                self.update_metric(metric_dict, output, target)

                t.set_postfix({
                    'loss': losses.avg,
                    **self.get_metric_vals(metric_dict, return_dict=True),
                    'img_size': images.size(2),
                    'lr': new_lr,
                    'loss_type': loss_type,
                    'data_time': data_time.avg,
                })
                t.update(1)
                end = time.time()
        return losses.avg, self.get_metric_vals(metric_dict)
Пример #16
0
                        0.224,
                        0.225]),
            ])),
    batch_size=args.batch_size,
    shuffle=True,
    num_workers=args.workers,
    pin_memory=True,
    drop_last=False,
)

net = torch.nn.DataParallel(net).cuda()
cudnn.benchmark = True
criterion = nn.CrossEntropyLoss().cuda()

net.eval()
losses = AverageMeter()
top1 = AverageMeter()
top5 = AverageMeter()

with torch.no_grad():
    with tqdm(total=len(data_loader), desc='Validate') as t:
        for i, (images, labels) in enumerate(data_loader):
            images, labels = images.cuda(), labels.cuda()
            # compute output
            output = net(images)
            loss = criterion(output, labels)
            # measure accuracy and record loss
            acc1, acc5 = accuracy(output, labels, topk=(1, 5))

            losses.update(loss.item(), images.size(0))
            top1.update(acc1[0].item(), images.size(0))
Пример #17
0
    def validate(self,
                 epoch=0,
                 is_test=True,
                 run_str='',
                 net=None,
                 data_loader=None,
                 no_logs=False,
                 tensorboard_logging=False):
        if tensorboard_logging:
            from tensorboardX import SummaryWriter  ################## for tensorboardX. Seuqential Video에 대해서 로그찍을 필요 없을때는 그냥 삭제하면됨.
            writer = SummaryWriter(
                './runs/sr_teacher_bn_mse_bolt'
            )  ################## 필요할 때마다 log위치 수정가능. for tensorboardX. Seuqential Video에 대해서 로그찍을 필요 없을때는 그냥 삭제하면됨.

        if net is None:
            net = self.net
        if not isinstance(net, nn.DataParallel):
            net = nn.DataParallel(net)

        if data_loader is None:
            if is_test:
                data_loader = self.run_config.test_loader
            else:
                data_loader = self.run_config.valid_loader

        net.eval()

        losses = AverageMeter()
        # top1 = AverageMeter()
        # top5 = AverageMeter()
        psnr_averagemeter = AverageMeter()

        with torch.no_grad():
            with tqdm(total=len(data_loader),
                      desc='Validate Epoch #{} {}'.format(epoch + 1, run_str),
                      disable=no_logs) as t:
                for i, mini_batch in enumerate(data_loader):
                    images = mini_batch['image']
                    #################### 2x or 4x 고르는 부분.
                    x2_down_images = mini_batch['2x_down_image']
                    # x4_down_images = mini_batch['4x_down_image']
                    images = images.to(self.device)
                    #################### 2x or 4x 고르는 부분.
                    x2_down_images = x2_down_images.to(self.device)
                    # x4_down_images = x4_down_images.to(self.device)
                    # compute output
                    #################### 2x or 4x 고르는 부분.
                    output = net(x2_down_images)
                    # output = net(x4_down_images)
                    loss = self.test_criterion(output, images)
                    # measure accuracy and record loss
                    psnr_current = psnr(
                        rgb2y(tensor2img_np(output)),
                        rgb2y(tensor2img_np(images)))  # HR Comparison
                    # import PIL  # LR Comparison
                    # import torchvision.transforms as transforms  # LR Comparison
                    # output = output.cpu().data[0, :, :, :]  # LR Comparison
                    # output = transforms.ToPILImage()(output)  # LR Comparison
                    # output = output.resize((int(output.size[0]/2), int(output.size[1]/2)), resample=PIL.Image.BICUBIC)  # LR Comparison
                    # output.save('zssr.png')  # LR Comparison FOR VALIDATE BICUBIC_DOWN
                    # output = transforms.ToTensor()(output)  # LR Comparison
                    # psnr_current = psnr(rgb2y(tensor2img_np(output)), rgb2y(tensor2img_np(x2_down_images)))  # LR Comparison

                    if tensorboard_logging:
                        writer.add_scalars(
                            'metric', {'psnr': psnr_current}, i
                        )  ################## for tensorboardX. Seuqential Video에 대해서 로그찍을 필요 없을때는 그냥 삭제하면됨.

                    losses.update(loss.item(), images.size(0))
                    # top1.update(acc1[0].item(), images.size(0))
                    # top5.update(acc5[0].item(), images.size(0))
                    psnr_averagemeter.update(psnr_current, images.size(0))
                    t.set_postfix({
                        'loss': losses.avg,
                        # 'top1': top1.avg,
                        # 'top5': top5.avg,
                        'psnr': psnr_averagemeter.avg,
                        'img_size': images.size(2),
                    })
                    t.update(1)

        if tensorboard_logging:
            writer.close(
            )  #################### for tensorboardX. Seuqential Video에 대해서 로그찍을 필요 없을때는 그냥 삭제하면됨.

        return losses.avg, psnr_averagemeter.avg