示例#1
0
def test(args):
    # output folder
    outdir = 'outdir'
    if not os.path.exists(outdir):
        os.makedirs(outdir)
    # data transforms
    input_transform = transform.Compose([
        transform.ToTensor(),
        transform.Normalize([.485, .456, .406], [.229, .224, .225])
    ])
    # dataset
    if args.eval:
        testset = get_dataset(args.dataset,
                              split='val',
                              mode='testval',
                              transform=input_transform)
    elif args.test_val:
        testset = get_dataset(args.dataset,
                              split='val',
                              mode='test',
                              transform=input_transform)
    else:
        testset = get_dataset(args.dataset,
                              split='test',
                              mode='test',
                              transform=input_transform)
    # dataloader
    loader_kwargs = {'num_workers': args.workers, 'pin_memory': True} \
        if args.cuda else {}
    test_data = data.DataLoader(testset,
                                batch_size=args.test_batch_size,
                                drop_last=False,
                                shuffle=False,
                                collate_fn=test_batchify_fn,
                                **loader_kwargs)
    # model
    pretrained = args.resume is None and args.verify is None
    if args.model_zoo is not None:
        model = get_model(args.model_zoo, pretrained=pretrained)
        model.base_size = args.base_size
        model.crop_size = args.crop_size
    else:
        # my
        model_kwargs = {}
        if args.choice_indices is not None:
            assert 'alone_resnest50' in args.backbone
            model_kwargs['choice_indices'] = args.choice_indices
        #
        model = get_segmentation_model(
            args.model,
            dataset=args.dataset,
            backbone=args.backbone,
            aux=args.aux,
            se_loss=args.se_loss,
            norm_layer=torch.nn.BatchNorm2d if args.acc_bn else SyncBatchNorm,
            base_size=args.base_size,
            crop_size=args.crop_size,
            **model_kwargs)

    # resuming checkpoint
    if args.verify is not None and os.path.isfile(args.verify):
        print("=> loading checkpoint '{}'".format(args.verify))
        model.load_state_dict(torch.load(args.verify, map_location='cpu'))
    elif args.resume is not None and os.path.isfile(args.resume):
        checkpoint = torch.load(args.resume, map_location='cpu')
        # strict=False, so that it is compatible with old pytorch saved models
        model.load_state_dict(checkpoint['state_dict'])
        print("=> loaded checkpoint '{}'".format(args.resume))
    elif not pretrained:
        raise RuntimeError("=> no checkpoint found")

    print(model)
    if args.acc_bn:
        from encoding.utils.precise_bn import update_bn_stats
        data_kwargs = {
            'transform': input_transform,
            'base_size': args.base_size,
            'crop_size': args.crop_size
        }
        trainset = get_dataset(args.dataset,
                               split=args.train_split,
                               mode='train',
                               **data_kwargs)
        trainloader = data.DataLoader(ReturnFirstClosure(trainset),
                                      batch_size=args.batch_size,
                                      drop_last=True,
                                      shuffle=True,
                                      **loader_kwargs)
        print('Reseting BN statistics')
        #model.apply(reset_bn_statistics)
        model.cuda()
        update_bn_stats(model, trainloader)

    if args.export:
        torch.save(model.state_dict(), args.export + '.pth')
        return

    scales = [0.75, 1.0, 1.25, 1.5, 1.75, 2.0, 2.25] if args.dataset == 'citys' else \
            [0.5, 0.75, 1.0, 1.25, 1.5, 1.75]#, 2.0
    evaluator = MultiEvalModule(model, testset.num_class, scales=scales).cuda()
    evaluator.eval()
    metric = utils.SegmentationMetric(testset.num_class)

    tbar = tqdm(test_data)
    for i, (image, dst) in enumerate(tbar):
        if args.eval:
            with torch.no_grad():
                predicts = evaluator.parallel_forward(image)
                metric.update(dst, predicts)
                pixAcc, mIoU = metric.get()
                tbar.set_description('pixAcc: %.4f, mIoU: %.4f' %
                                     (pixAcc, mIoU))
        else:
            with torch.no_grad():
                outputs = evaluator.parallel_forward(image)
                predicts = [
                    testset.make_pred(torch.max(output, 1)[1].cpu().numpy())
                    for output in outputs
                ]
            for predict, impath in zip(predicts, dst):
                mask = utils.get_mask_pallete(predict, args.dataset)
                outname = os.path.splitext(impath)[0] + '.png'
                mask.save(os.path.join(outdir, outname))

    if args.eval:
        print('pixAcc: %.4f, mIoU: %.4f' % (pixAcc, mIoU))
示例#2
0
    def __init__(self, args):
        self.args = args
        # data transforms
        input_transform = transform.Compose([
            transform.ToTensor(),
            transform.Normalize([.485, .456, .406], [.229, .224, .225])
        ])
        # dataset
        data_kwargs = {
            'transform': input_transform,
            'base_size': args.base_size,
            'crop_size': args.crop_size
        }

        trainset_1 = get_dataset(
            'pascal_voc',
            root=os.path.expanduser('/fast/users/a1675776/data/encoding/data'),
            split='train',
            mode='train',
            **data_kwargs)

        trainset_2 = get_dataset(
            'pascal_aug',
            root=os.path.expanduser('/fast/users/a1675776/data/encoding/data'),
            split='train',
            mode='train',
            **data_kwargs)
        testset = get_dataset(
            'pascal_voc',
            root=os.path.expanduser('/fast/users/a1675776/data/encoding/data'),
            split='val',
            mode='val',
            **data_kwargs)

        concatenate_trainset = torch.utils.data.ConcatDataset(
            [trainset_1, trainset_2])
        # dataloader
        kwargs = {'num_workers': args.workers, 'pin_memory': True} \
            if args.cuda else {}
        self.trainloader = data.DataLoader(concatenate_trainset,
                                           batch_size=args.batch_size,
                                           drop_last=True,
                                           shuffle=True,
                                           **kwargs)
        self.valloader = data.DataLoader(testset,
                                         batch_size=args.batch_size,
                                         drop_last=False,
                                         shuffle=False,
                                         **kwargs)

        self.nclass = trainset_1.num_class
        # model
        model = get_segmentation_model(args.model,
                                       dataset=args.dataset,
                                       backbone=args.backbone,
                                       aux=args.aux,
                                       se_loss=args.se_loss,
                                       norm_layer=SyncBatchNorm,
                                       base_size=args.base_size,
                                       crop_size=args.crop_size)
        #       print(model)
        # optimizer using different LR
        params_list = [
            {
                'params': model.pretrained.parameters(),
                'lr': args.lr
            },
        ]
        if hasattr(model, 'head'):
            params_list.append({
                'params': model.head.parameters(),
                'lr': args.lr * 10
            })
        if hasattr(model, 'auxlayer'):
            params_list.append({
                'params': model.auxlayer.parameters(),
                'lr': args.lr * 10
            })
        optimizer = torch.optim.Adam(params_list,
                                     lr=args.lr,
                                     weight_decay=args.weight_decay)
        # criterions
        self.criterion = SegmentationLosses(se_loss=args.se_loss,
                                            aux=args.aux,
                                            nclass=self.nclass,
                                            se_weight=args.se_weight,
                                            aux_weight=args.aux_weight)
        self.model, self.optimizer = model, optimizer
        # using cuda
        if args.cuda:
            self.model = DataParallelModel(self.model).cuda()
            self.criterion = DataParallelCriterion(self.criterion).cuda()
        # resuming checkpoint
        if args.resume is not None:
            if not os.path.isfile(args.resume):
                raise RuntimeError("=> no checkpoint found at '{}'".format(
                    args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            if args.cuda:
                self.model.module.load_state_dict(checkpoint['state_dict'])
            else:
                self.model.load_state_dict(checkpoint['state_dict'])
            if not args.ft:
                self.optimizer.load_state_dict(checkpoint['optimizer'])
            self.best_pred = checkpoint['best_pred']
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        # clear start epoch if fine-tuning
        if args.ft:
            args.start_epoch = 0
        # lr scheduler
        self.scheduler = utils.LR_Scheduler(args.lr_scheduler, args.lr,
                                            args.epochs, len(self.trainloader))
        self.best_pred = 0.0
示例#3
0
    def __init__(self, args):
        self.args = args
        # data transforms
        input_transform = transform.Compose([
            transform.ToTensor(),
            transform.Normalize([.485, .456, .406], [.229, .224, .225])
        ])
        # dataset
        data_kwargs = {
            'transform': input_transform,
            'base_size': args.base_size,
            'crop_size': args.crop_size
        }
        trainset = get_dataset(args.dataset,
                               split=args.train_split,
                               mode='train',
                               **data_kwargs)
        testset = get_dataset(args.dataset,
                              split='val',
                              mode='val',
                              **data_kwargs)

        self.train_sampler = torch.utils.data.distributed.DistributedSampler(
            trainset)
        self.val_sampler = torch.utils.data.distributed.DistributedSampler(
            testset)
        # dataloader
        kwargs = {'num_workers': args.workers, 'pin_memory': True} \
            if args.cuda else {}
        #self.trainloader = data.DataLoader(trainset, batch_size=args.batch_size,
        #                                   drop_last=True, shuffle=True, **kwargs)
        #collate_fn=test_batchify_fn,
        self.trainloader = data.DataLoader(trainset,
                                           batch_size=args.batch_size //
                                           args.world_size,
                                           drop_last=True,
                                           shuffle=False,
                                           sampler=self.train_sampler,
                                           **kwargs)
        #self.valloader = data.DataLoader(testset, batch_size=args.batch_size,
        self.valloader = data.DataLoader(testset,
                                         batch_size=args.test_batch_size //
                                         args.world_size,
                                         drop_last=False,
                                         shuffle=False,
                                         sampler=self.val_sampler,
                                         **kwargs)
        self.nclass = trainset.num_class
        #Norm_method = nn.SyncBatchNorm
        #Norm_method = nn.BatchNorm2d(momentum=0.01)
        Norm_method = nn.BatchNorm2d
        # model
        model = get_segmentation_model(args.model,
                                       dataset=args.dataset,
                                       backbone=args.backbone,
                                       aux=args.aux,
                                       multi_grid=args.multi_grid,
                                       se_loss=args.se_loss,
                                       norm_layer=Norm_method,
                                       lateral=args.lateral,
                                       root=args.backbone_path,
                                       base_size=args.base_size,
                                       crop_size=args.crop_size)
        if self.args.rank == 0:
            print(model)

        # optimizer using different LR
        params_list = [
            {
                'params': model.pretrained.parameters(),
                'lr': args.lr
            },
        ]
        if hasattr(model, 'head'):
            params_list.append({
                'params': model.head.parameters(),
                'lr': args.lr * 10
            })
        if hasattr(model, 'auxlayer'):
            params_list.append({
                'params': model.auxlayer.parameters(),
                'lr': args.lr * 10
            })
        optimizer = torch.optim.SGD(params_list,
                                    lr=args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)
        self.optimizer = optimizer

        #self.model = model
        # criterions
        self.criterion = SegmentationLosses(se_loss=args.se_loss,
                                            aux=args.aux,
                                            nclass=self.nclass,
                                            se_weight=args.se_weight,
                                            aux_weight=args.aux_weight)

        device = torch.device('cuda:{}'.format(args.local_rank))

        self.device = device
        # using cuda
        if args.cuda:
            #self.model = DataParallelModel(self.model).cuda()
            #self.model = self.model.cuda()
            sync_bn_model = FullModel(model, self.criterion)
            #self.model.cuda()
            #broadcast_params(self.model)
            #num_gpus = torch.cuda.device_count()
            #local_rank = args.local_rank % num_gpus
            #local_rank = args.local_rank
            #process_group = torch.distributed.new_group([args.local_rank])
            #process_group = torch.distributed.new_group([args.rank])
            #sync_bn_model = torch.nn.utils.convert_sync_batchnorm(self.model, process_group)
            sync_bn_model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(
                sync_bn_model)
            sync_bn_model = sync_bn_model.to(device)
            #self.model = torch.nn.parallel.DistributedDataParallel(self.model, device_ids=[args.local_rank], output_device=args.local_rank)
            self.model = torch.nn.parallel.DistributedDataParallel(
                sync_bn_model,
                device_ids=[args.local_rank],
                output_device=args.local_rank,
                find_unused_parameters=True)
            #self.criterion = DataParallelCriterion(self.criterion).cuda()
            #self.criterion = self.criterion.cuda()
            dist.barrier()

        # resuming checkpoint
        #if args.resume is not None and self.args.rank == 0:
        if args.resume is not None:
            if not os.path.isfile(args.resume):
                raise RuntimeError("=> no checkpoint found at '{}'".format(
                    args.resume))
            checkpoint = torch.load(args.resume)
            old_state_dict = checkpoint['state_dict']
            new_state_dict = dict()
            for k, v in old_state_dict.items():
                if k.startswith('module.'):
                    #new_state_dict[k[len('module.'):]] = old_state_dict[k]
                    new_state_dict[k] = old_state_dict[k]
                else:
                    new_state_dict[k] = old_state_dict[k]

            args.start_epoch = checkpoint['epoch']
            if args.cuda:
                #self.model.module.load_state_dict(checkpoint['state_dict'])
                #self.model.load_state_dict(checkpoint['state_dict'])
                self.model.load_state_dict(new_state_dict)
            else:
                #self.model.load_state_dict(checkpoint['state_dict'])
                self.model.load_state_dict(new_state_dict)
            if not args.ft:
                self.optimizer.load_state_dict(checkpoint['optimizer'])
                for state in self.optimizer.state.values():
                    for k, v in state.items():
                        if isinstance(v, torch.Tensor):
                            state[k] = v.cuda()

            self.best_pred = checkpoint['best_pred']
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))

        # clear start epoch if fine-tuning
        if args.ft:
            args.start_epoch = 0
        # lr scheduler
        self.scheduler = utils.LR_Scheduler(args.lr_scheduler,
                                            args.lr,
                                            args.epochs,
                                            len(self.trainloader),
                                            local_rank=self.args.rank)
        print('len(trainloader) : %.3f ' % (len(self.trainloader)))

        self.best_pred = 0.0
        #for sumaryWriter
        self.track_loss = 0.0
        self.track_pixAcc = 0.0
        self.track_mIoU = 0.0
示例#4
0
def main_worker(gpu, ngpus_per_node, args):
    global best_pred
    args.gpu = gpu
    args.rank = args.rank * ngpus_per_node + gpu
    print('rank: {} / {}'.format(args.rank, args.world_size))
    dist.init_process_group(backend=args.dist_backend,
                            init_method=args.dist_url,
                            world_size=args.world_size,
                            rank=args.rank)
    torch.cuda.set_device(args.gpu)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    cudnn.benchmark = True
    # data transforms
    input_transform = transform.Compose([
        transform.ToTensor(),
        transform.Normalize([.485, .456, .406], [.229, .224, .225])
    ])
    # dataset
    data_kwargs = {
        'transform': input_transform,
        'base_size': args.base_size,
        'crop_size': args.crop_size
    }
    trainset = get_dataset(args.dataset,
                           split=args.train_split,
                           mode='train',
                           **data_kwargs)
    valset = get_dataset(args.dataset, split='val', mode='val', **data_kwargs)
    train_sampler = torch.utils.data.distributed.DistributedSampler(trainset)
    val_sampler = torch.utils.data.distributed.DistributedSampler(
        valset, shuffle=False)
    # dataloader
    loader_kwargs = {
        'batch_size': args.batch_size,
        'num_workers': args.workers,
        'pin_memory': True
    }
    trainloader = data.DataLoader(trainset,
                                  sampler=train_sampler,
                                  drop_last=True,
                                  **loader_kwargs)
    valloader = data.DataLoader(valset, sampler=val_sampler, **loader_kwargs)
    nclass = trainset.num_class
    # model
    model_kwargs = {}
    if args.rectify:
        model_kwargs['rectified_conv'] = True
        model_kwargs['rectify_avg'] = args.rectify_avg
    model = get_segmentation_model(args.model,
                                   dataset=args.dataset,
                                   backbone=args.backbone,
                                   aux=args.aux,
                                   se_loss=args.se_loss,
                                   norm_layer=DistSyncBatchNorm,
                                   base_size=args.base_size,
                                   crop_size=args.crop_size,
                                   **model_kwargs)
    if args.gpu == 0:
        print(model)
    # optimizer using different LR
    params_list = [
        {
            'params': model.pretrained.parameters(),
            'lr': args.lr
        },
    ]
    if hasattr(model, 'head'):
        params_list.append({
            'params': model.head.parameters(),
            'lr': args.lr * 10
        })
    if hasattr(model, 'auxlayer'):
        params_list.append({
            'params': model.auxlayer.parameters(),
            'lr': args.lr * 10
        })
    optimizer = torch.optim.SGD(params_list,
                                lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    # optimizer = torch.optim.Adam(params_list,
    #                             lr=args.lr,
    #                             # momentum=args.momentum,
    #                             weight_decay=args.weight_decay)
    # criterions
    criterion = SegmentationLosses(se_loss=args.se_loss,
                                   aux=args.aux,
                                   nclass=nclass,
                                   se_weight=args.se_weight,
                                   aux_weight=args.aux_weight)
    # distributed data parallel
    model.cuda(args.gpu)
    criterion.cuda(args.gpu)
    model = DistributedDataParallel(model, device_ids=[args.gpu])
    metric = utils.SegmentationMetric(nclass=nclass)

    # resuming checkpoint
    if args.resume is not None:
        if not os.path.isfile(args.resume):
            raise RuntimeError("=> no checkpoint found at '{}'".format(
                args.resume))

        checkpoint = torch.load(args.resume)
        args.start_epoch = checkpoint['epoch']
        model.module.load_state_dict(checkpoint['state_dict'])
        '''
        checkpoint = torch.load(args.resume, map_location='cpu')
        args.start_epoch = checkpoint['epoch']
        model.module.load_state_dict(checkpoint['state_dict'])
        model.cuda()
        '''
        if not args.ft:
            optimizer.load_state_dict(checkpoint['optimizer'])
        best_pred = checkpoint['best_pred']
        print("=> loaded checkpoint '{}' (epoch {})".format(
            args.resume, checkpoint['epoch']))
    # clear start epoch if fine-tuning
    if args.ft:
        args.start_epoch = 0

    # lr scheduler
    scheduler = utils.LR_Scheduler_Head(args.lr_scheduler, args.lr,
                                        args.epochs, len(trainloader))
    # train_losses = [2.855, 2.513, 2.275, 2.128, 2.001, 1.875, 1.855, 1.916, 1.987, 1.915, 1.952]
    train_losses = []

    def training(epoch):
        train_sampler.set_epoch(epoch)
        global best_pred
        train_loss = 0.0
        model.train()
        tic = time.time()
        for i, (image, target) in enumerate(trainloader):
            scheduler(optimizer, i, epoch, best_pred)
            optimizer.zero_grad()
            outputs = model(image)
            target = target.cuda(args.gpu)
            loss = criterion(*outputs, target)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            if i % 100 == 0 and args.gpu == 0:
                iter_per_sec = 100.0 / (
                    time.time() - tic) if i != 0 else 1.0 / (time.time() - tic)
                tic = time.time()
                print('Epoch: {}, Iter: {}, Speed: {:.3f} iter/sec, Train loss: {:.3f}'. \
                      format(epoch, i, iter_per_sec, train_loss / (i + 1)))
        train_losses.append(train_loss / len(trainloader))
        if epoch > 1:
            if train_losses[epoch] < train_losses[epoch - 1]:
                utils.save_checkpoint(
                    {
                        'epoch': epoch + 1,
                        'state_dict': model.module.state_dict(),
                        'optimizer': optimizer.state_dict(),
                        'best_pred': new_preds[(epoch - 1) // 10],
                    },
                    args,
                    False,
                    filename='checkpoint_train.pth.tar')
        plt.plot(train_losses)
        plt.xlabel('Epoch')
        plt.ylabel('Train_loss')
        plt.title('Train_Loss')
        plt.grid()
        plt.savefig('./loss_fig/train_losses.pdf')
        plt.savefig('./loss_fig/train_losses.svg')
        plt.close()

    # p_m = [(0.3, 0.05), (0.23, 0.54)]
    # new_preds = [0.175, 0.392]
    p_m = []
    new_preds = []

    def validation(epoch):
        # Fast test during the training using single-crop only
        global best_pred
        is_best = False
        model.eval()
        metric.reset()

        for i, (image, target) in enumerate(valloader):
            with torch.no_grad():
                pred = model(image)[0]
                target = target.cuda(args.gpu)
                metric.update(target, pred)

            if i % 100 == 0:
                all_metircs = metric.get_all()
                all_metircs = utils.torch_dist_sum(args.gpu, *all_metircs)
                pixAcc, mIoU = utils.get_pixacc_miou(*all_metircs)
                if args.gpu == 0:
                    print('pixAcc: %.3f, mIoU1: %.3f' % (pixAcc, mIoU))

        all_metircs = metric.get_all()
        all_metircs = utils.torch_dist_sum(args.gpu, *all_metircs)
        pixAcc, mIoU = utils.get_pixacc_miou(*all_metircs)
        if args.gpu == 0:
            print('pixAcc: %.3f, mIoU2: %.3f' % (pixAcc, mIoU))

            p_m.append((pixAcc, mIoU))
            plt.plot(p_m)
            plt.xlabel('10 Epoch')
            plt.ylabel('pixAcc, mIoU')
            plt.title('pixAcc, mIoU')
            plt.grid()
            plt.legend(('pixAcc', 'mIoU'))

            plt.savefig('./loss_fig/pixAcc_mIoU.pdf')
            plt.savefig('./loss_fig/pixAcc_mIoU.svg')
            plt.close()

            if args.eval: return
            new_pred = (pixAcc + mIoU) / 2
            new_preds.append(new_pred)

            plt.plot(new_preds)
            plt.xlabel('10 Epoch')
            plt.ylabel('new_predication')
            plt.title('new_predication')
            plt.grid()
            plt.savefig('./loss_fig/new_predication.pdf')
            plt.savefig('./loss_fig/new_predication.svg')
            plt.close()

            if new_pred > best_pred:
                is_best = True
                best_pred = new_pred
            utils.save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'state_dict': model.module.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'best_pred': best_pred,
                },
                args,
                is_best,
                filename='checkpoint_train_{}.pth.tar'.format(epoch + 1))

    if args.export:
        if args.gpu == 0:
            torch.save(model.module.state_dict(), args.export + '.pth')
        return

    if args.eval:
        validation(args.start_epoch)
        return

    if args.gpu == 0:
        print('Starting Epoch:', args.start_epoch)
        print('Total Epoches:', args.epochs)

    for epoch in range(args.start_epoch, args.epochs):
        tic = time.time()
        training(epoch)
        if epoch % 10 == 0 or epoch == args.epochs - 1:
            validation(epoch)
        elapsed = time.time() - tic
        if args.gpu == 0:
            print(f'Epoch: {epoch}, Time cost: {elapsed}')

    validation(epoch)
示例#5
0
 def __init__(self, args):
     self.args = args
     # data transforms
     input_transform = transform.Compose([
         transform.ToTensor(),
         transform.Normalize([.485, .456, .406], [.229, .224, .225])
     ])
     # dataset
     data_kwargs = {
         'transform': input_transform,
         'base_size': args.base_size,
         'crop_size': args.crop_size
     }
     trainset = get_dataset(args.dataset,
                            split=args.train_split,
                            mode='train',
                            **data_kwargs)
     valset = get_dataset(
         args.dataset,
         split='val',
         mode='ms_val' if args.multi_scale_eval else 'fast_val',
         **data_kwargs)
     # dataloader
     kwargs = {'num_workers': args.workers, 'pin_memory': True}
     self.trainloader = data.DataLoader(trainset,
                                        batch_size=args.batch_size,
                                        drop_last=True,
                                        shuffle=True,
                                        **kwargs)
     if self.args.multi_scale_eval:
         kwargs['collate_fn'] = test_batchify_fn
     self.valloader = data.DataLoader(valset,
                                      batch_size=args.test_batch_size,
                                      drop_last=False,
                                      shuffle=False,
                                      **kwargs)
     self.nclass = trainset.num_class
     # model
     if args.norm_layer == 'bn':
         norm_layer = BatchNorm2d
     elif args.norm_layer == 'sync_bn':
         assert args.multi_gpu, "SyncBatchNorm can only be used when multi GPUs are available!"
         norm_layer = SyncBatchNorm
     else:
         raise ValueError('Invalid norm_layer {}'.format(args.norm_layer))
     model = get_segmentation_model(
         args.model,
         dataset=args.dataset,
         backbone=args.backbone,
         aux=args.aux,
         se_loss=args.se_loss,
         norm_layer=norm_layer,
         base_size=args.base_size,
         crop_size=args.crop_size,
         multi_grid=True,
         multi_dilation=[2, 4, 8],
         only_pam=True,
     )
     print(model)
     # optimizer using different LR
     params_list = [
         {
             'params': model.pretrained.parameters(),
             'lr': args.lr
         },
     ]
     if hasattr(model, 'head'):
         params_list.append({
             'params': model.head.parameters(),
             'lr': args.lr
         })
     if hasattr(model, 'auxlayer'):
         params_list.append({
             'params': model.auxlayer.parameters(),
             'lr': args.lr
         })
     optimizer = torch.optim.SGD(params_list,
                                 lr=args.lr,
                                 momentum=args.momentum,
                                 weight_decay=args.weight_decay)
     # criterions
     self.criterion = SegmentationMultiLosses()
     self.model, self.optimizer = model, optimizer
     # using cuda
     if args.multi_gpu:
         self.model = DataParallelModel(self.model).cuda()
         self.criterion = DataParallelCriterion(self.criterion).cuda()
     else:
         self.model = self.model.cuda()
         self.criterion = self.criterion.cuda()
     self.single_device_model = self.model.module if self.args.multi_gpu else self.model
     # resuming checkpoint
     if args.resume is not None:
         if not os.path.isfile(args.resume):
             raise RuntimeError("=> no checkpoint found at '{}'".format(
                 args.resume))
         checkpoint = torch.load(args.resume)
         args.start_epoch = checkpoint['epoch']
         self.single_device_model.load_state_dict(checkpoint['state_dict'])
         if not args.ft and not (args.only_val or args.only_vis
                                 or args.only_infer):
             self.optimizer.load_state_dict(checkpoint['optimizer'])
         self.best_pred = checkpoint['best_pred']
         print("=> loaded checkpoint '{}' (epoch {}), best_pred {}".format(
             args.resume, checkpoint['epoch'], checkpoint['best_pred']))
     # clear start epoch if fine-tuning
     if args.ft:
         args.start_epoch = 0
     # lr scheduler
     self.lr_scheduler = torch.optim.lr_scheduler.ExponentialLR(
         optimizer, 0.6)
     self.best_pred = 0.0
示例#6
0
def main_worker(gpu, ngpus_per_node, args):
    global best_pred
    args.gpu = gpu
    args.rank = args.rank * ngpus_per_node + gpu
    print('rank: {} / {}'.format(args.rank, args.world_size))
    dist.init_process_group(backend=args.dist_backend,
                            init_method=args.dist_url,
                            world_size=args.world_size,
                            rank=args.rank)
    torch.cuda.set_device(args.gpu)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    cudnn.benchmark = True
    # data transforms
    input_transform = transform.Compose([
        transform.ToTensor(),
        transform.Normalize([.485, .456, .406], [.229, .224, .225])
    ])
    # dataset
    data_kwargs = {
        'transform': input_transform,
        'base_size': args.base_size,
        'crop_size': args.crop_size
    }
    trainset = get_dataset(args.dataset,
                           split=args.train_split,
                           mode='train',
                           **data_kwargs)
    valset = get_dataset(args.dataset, split='val', mode='val', **data_kwargs)
    train_sampler = torch.utils.data.distributed.DistributedSampler(trainset)
    val_sampler = torch.utils.data.distributed.DistributedSampler(
        valset, shuffle=False)
    # dataloader
    loader_kwargs = {
        'batch_size': args.batch_size,
        'num_workers': args.workers,
        'pin_memory': True
    }
    trainloader = data.DataLoader(trainset,
                                  sampler=train_sampler,
                                  drop_last=True,
                                  **loader_kwargs)
    valloader = data.DataLoader(valset, sampler=val_sampler, **loader_kwargs)
    nclass = trainset.num_class
    # model
    model_kwargs = {}
    if args.rectify:
        model_kwargs['rectified_conv'] = True
        model_kwargs['rectify_avg'] = args.rectify_avg
    model = get_segmentation_model(args.model,
                                   dataset=args.dataset,
                                   backbone=args.backbone,
                                   aux=args.aux,
                                   se_loss=args.se_loss,
                                   norm_layer=DistSyncBatchNorm,
                                   base_size=args.base_size,
                                   crop_size=args.crop_size,
                                   **model_kwargs)
    if args.gpu == 0:
        print(model)
    # optimizer using different LR
    params_list = [
        {
            'params': model.pretrained.parameters(),
            'lr': args.lr
        },
    ]
    if hasattr(model, 'head'):
        params_list.append({
            'params': model.head.parameters(),
            'lr': args.lr * 10
        })
    if hasattr(model, 'auxlayer'):
        params_list.append({
            'params': model.auxlayer.parameters(),
            'lr': args.lr * 10
        })
    optimizer = torch.optim.SGD(params_list,
                                lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    # criterions
    criterion = SegmentationLosses(se_loss=args.se_loss,
                                   aux=args.aux,
                                   nclass=nclass,
                                   se_weight=args.se_weight,
                                   aux_weight=args.aux_weight)
    # distributed data parallel
    model.cuda(args.gpu)
    criterion.cuda(args.gpu)
    model = DistributedDataParallel(model, device_ids=[args.gpu])
    metric = utils.SegmentationMetric(nclass=nclass)

    # resuming checkpoint
    if args.resume is not None:
        if not os.path.isfile(args.resume):
            raise RuntimeError("=> no checkpoint found at '{}'".format(
                args.resume))
        checkpoint = torch.load(args.resume)
        args.start_epoch = checkpoint['epoch']
        model.module.load_state_dict(checkpoint['state_dict'])
        if not args.ft:
            optimizer.load_state_dict(checkpoint['optimizer'])
        best_pred = checkpoint['best_pred']
        print("=> loaded checkpoint '{}' (epoch {})".format(
            args.resume, checkpoint['epoch']))
    # clear start epoch if fine-tuning
    if args.ft:
        args.start_epoch = 0

    # lr scheduler
    scheduler = utils.LR_Scheduler_Head(args.lr_scheduler, args.lr,
                                        args.epochs, len(trainloader))

    def training(epoch):
        global best_pred
        train_loss = 0.0
        model.train()
        tic = time.time()
        for i, (image, target) in enumerate(trainloader):
            scheduler(optimizer, i, epoch, best_pred)
            optimizer.zero_grad()
            outputs = model(image)
            target = target.cuda(args.gpu)
            loss = criterion(*outputs, target)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            if i % 100 == 0 and args.gpu == 0:
                iter_per_sec = 100.0 / (
                    time.time() - tic) if i != 0 else 1.0 / (time.time() - tic)
                tic = time.time()
                print('Epoch: {}, Iter: {}, Speed: {:.3f} iter/sec, Train loss: {:.3f}'. \
                      format(epoch, i, iter_per_sec, train_loss / (i + 1)))

    def validation(epoch):
        # Fast test during the training using single-crop only
        global best_pred
        is_best = False
        model.eval()
        metric.reset()

        for i, (image, target) in enumerate(valloader):
            with torch.no_grad():
                #correct, labeled, inter, union = eval_batch(model, image, target)
                pred = model(image)[0]
                target = target.cuda(args.gpu)
                metric.update(target, pred)

            pixAcc, mIoU = metric.get()
            if i % 100 == 0 and args.gpu == 0:
                print('pixAcc: %.3f, mIoU: %.3f' % (pixAcc, mIoU))

        if args.gpu == 0:
            pixAcc, mIoU = torch_dist_avg(args.gpu, pixAcc, mIoU)
            print('pixAcc: %.3f, mIoU: %.3f' % (pixAcc, mIoU))

            new_pred = (pixAcc + mIoU) / 2
            if new_pred > best_pred:
                is_best = True
                best_pred = new_pred
            utils.save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'state_dict': model.module.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'best_pred': best_pred,
                }, args, is_best)

    if args.gpu == 0:
        print('Starting Epoch:', args.start_epoch)
        print('Total Epoches:', args.epochs)

    for epoch in range(args.start_epoch, args.epochs):
        tic = time.time()
        training(epoch)
        if epoch % 10 == 0:
            validation(epoch)
        elapsed = time.time() - tic
        if args.gpu == 0:
            print(f'Epoch: {epoch}, Time cost: {elapsed}')

    validation(epoch)
    def __init__(self, args):
        self.args = args
        args.log_name = str(args.checkname)
        self.logger = utils.create_logger(args.log_root, args.log_name)
        # data transforms
        input_transform = None
        # dataset
        data_kwargs = {
            'transform': input_transform,
            'base_size': args.base_size,
            'crop_size': args.crop_size
        }
        trainset = get_dataset(args.dataset,
                               split='train',
                               mode='train',
                               **data_kwargs)
        testset = get_dataset(args.dataset,
                              split='val',
                              mode='val',
                              **data_kwargs)
        # dataloader
        kwargs = {'num_workers': args.workers, 'pin_memory': True} \
            if args.cuda else {}
        self.trainloader = data.DataLoader(trainset,
                                           batch_size=args.batch_size,
                                           drop_last=True,
                                           shuffle=True,
                                           **kwargs)
        self.valloader = data.DataLoader(testset,
                                         batch_size=args.batch_size,
                                         drop_last=False,
                                         shuffle=False,
                                         **kwargs)
        self.nclass = trainset.num_class
        # model
        model = get_segmentation_model(args.model,
                                       dataset=args.dataset,
                                       backbone=args.backbone,
                                       aux=args.aux,
                                       se_loss=args.se_loss,
                                       norm_layer=BatchNorm2d,
                                       base_size=args.base_size,
                                       crop_size=args.crop_size)
        #print(model)
        self.logger.info(model)
        # optimizer using different LR
        params_list = [
            {
                'params': model.pretrained.parameters(),
                'lr': args.lr
            },
        ]
        if hasattr(model, 'head'):
            params_list.append({
                'params': model.head.parameters(),
                'lr': args.lr * 10
            })
        if hasattr(model, 'auxlayer'):
            params_list.append({
                'params': model.auxlayer.parameters(),
                'lr': args.lr * 10
            })
        optimizer = torch.optim.SGD(params_list,
                                    lr=args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)
        self.criterion = SegmentationMultiLosses(nclass=self.nclass)
        #self.criterion = SegmentationLosses(se_loss=args.se_loss, aux=args.aux,nclass=self.nclass)

        self.model, self.optimizer = model, optimizer
        # using cuda
        if args.cuda:
            self.model = torch.nn.DataParallel(self.model).cuda()
            self.criterion = torch.nn.DataParallel(self.criterion).cuda()
        # finetune from a trained model
        if args.ft:
            args.start_epoch = 0
            checkpoint = torch.load(args.ft_resume)
            if args.cuda:
                self.model.module.load_state_dict(checkpoint, strict=False)
            else:
                self.model.load_state_dict(checkpoint, strict=False)
            self.logger.info("=> loaded checkpoint '{}' (epoch {})".format(
                args.ft_resume, args.start_epoch))
        # resuming checkpoint
        if args.resume:
            if not os.path.isfile(args.resume):
                raise RuntimeError("=> no checkpoint found at '{}'".format(
                    args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            if args.cuda:
                self.model.module.load_state_dict(checkpoint['state_dict'])
            else:
                self.model.load_state_dict(checkpoint['state_dict'])
            if not args.ft:
                self.optimizer.load_state_dict(checkpoint['optimizer'])
            self.best_pred = checkpoint['best_pred']
            self.logger.info("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        # lr scheduler
        self.scheduler = utils.LR_Scheduler(
            args.lr_scheduler,
            args.lr,
            args.epochs,
            len(self.trainloader),  # logger=self.logger,
            lr_step=args.lr_step)
        self.best_pred = 0.0
示例#8
0
    # dataset
<<<<<<< HEAD
    # if args.eval:
    #     testset = get_segmentation_dataset(args.dataset, split='val', mode='testval',
    #                                        transform=input_transform)
    # elif args.test_val:
    #     testset = get_segmentation_dataset(args.dataset, split='val', mode='test',
    #                                        transform=input_transform)
    # else:
    #     testset = get_segmentation_dataset(args.dataset, split='test', mode='test',
    #                                        transform=input_transform)
    data_kwargs = {'transform': get_inference_augmentation(), 'base_size': args.base_size}
    testset = get_dataset(args.dataset,
                          root=DATA_DIR,
                          source=['sample_submission.csv'],
                          split=['test_1801.npy'],
                          mode='test',
                          **data_kwargs)

=======
    if args.eval:
        testset = get_dataset(args.dataset, split='val', mode='testval',
                              transform=input_transform)
    elif args.test_val:
        testset = get_dataset(args.dataset, split='val', mode='test',
                              transform=input_transform)
    else:
        testset = get_dataset(args.dataset, split='test', mode='test',
                              transform=input_transform)
>>>>>>> upstream/master
    # dataloader
示例#9
0
def visualize(args):
    directory = "runs/%s/%s/%s/vis/results" % (args.dataset, args.model,
                                               args.checkname)
    print("visualize directory : ", directory)
    if not os.path.exists(directory):
        os.mkdir(directory)
    # Get the model
    model = get_segmentation_model(args.model,
                                   dataset=args.dataset,
                                   backbone=args.backbone,
                                   aux=args.aux,
                                   se_loss=args.se_loss,
                                   norm_layer=SyncBatchNorm,
                                   base_size=args.base_size,
                                   crop_size=args.crop_size)
    model = model.cuda()

    # resuming checkpoint
    if args.resume is None or not os.path.isfile(args.resume):
        raise RuntimeError("=> no checkpoint found at '{}'".format(
            args.resume))
    checkpoint = torch.load(args.resume)
    # strict=False, so that it is compatible with old pytorch saved models
    model.load_state_dict(checkpoint['state_dict'])
    # for key in checkpoint['state_dict']:
    #     print(key)
    print("=> loaded checkpoint '{}' (epoch {})".format(
        args.resume, checkpoint['epoch']))
    model.eval()

    # Prepare the image
    input_transform = transform.Compose([
        transform.ToTensor(),
        transform.Normalize([.485, .456, .406], [.229, .224, .225])
    ])

    kwargs = {'num_workers': args.workers, 'pin_memory': True} \
        if args.cuda else {}

    data_kwargs = {
        'transform': input_transform,
        'base_size': args.base_size,
        'crop_size': args.crop_size,
        'root': args.data_root
    }

    testset = get_dataset(args.dataset, split='val', mode='val', **data_kwargs)

    testloader = data.DataLoader(testset,
                                 batch_size=1,
                                 drop_last=False,
                                 shuffle=False,
                                 **kwargs)

    avg = [.485, .456, .406]
    std = [.229, .224, .225]
    # visualize

    cnt = 0
    for i, (image, dst) in enumerate(tqdm(testloader)):
        if cnt == 100:
            break
        prob = np.random.rand(1)[0]
        if prob > 0.2:
            continue

        image = image.cuda()
        output = model.evaluate(image)
        pred = torch.max(output, 1)[1].cpu().numpy() + 1
        mask = encoding.utils.get_mask_pallete(pred, 'pascal_context')

        dst = dst.numpy() + 1
        gt = encoding.utils.get_mask_pallete(dst, 'pascal_context')

        im = image.cpu().numpy().squeeze().transpose(1, 2, 0)
        im = im * std + avg
        im = im * 255
        im = im.astype('uint8')
        im = Image.fromarray(im)

        target = Image.new('RGB', (480 * 3 + 20, 480), color=(255, 255, 255))
        target.paste(im, (0, 0))
        target.paste(gt, (490, 0))
        target.paste(mask, (980, 0))
        target.save('{}/{}.png'.format(directory, str(i)))
        cnt += 1
示例#10
0
def visualize_attn(args):
    directory = "runs/%s/%s/%s/vis/attn" % (args.dataset, args.model,
                                            args.checkname)
    print("visualize directory : ", directory)
    if not os.path.exists(directory):
        os.mkdir(directory)
    # Get the model
    model = get_segmentation_model(args.model,
                                   dataset=args.dataset,
                                   backbone=args.backbone,
                                   aux=args.aux,
                                   se_loss=args.se_loss,
                                   norm_layer=SyncBatchNorm,
                                   base_size=args.base_size,
                                   crop_size=args.crop_size)
    model = model.cuda()
    # print(model)
    # print("=================================")
    # resuming checkpoint
    if args.resume is None or not os.path.isfile(args.resume):
        raise RuntimeError("=> no checkpoint found at '{}'".format(
            args.resume))
    checkpoint = torch.load(args.resume)
    # strict=False, so that it is compatible with old pytorch saved models
    model.load_state_dict(checkpoint['state_dict'])
    # for key in checkpoint['state_dict']:
    #     print(key)
    print("=> loaded checkpoint '{}' (epoch {})".format(
        args.resume, checkpoint['epoch']))
    model.eval()

    # Prepare the image
    input_transform = transform.Compose([
        transform.ToTensor(),
        transform.Normalize([.485, .456, .406], [.229, .224, .225])
    ])

    kwargs = {'num_workers': args.workers, 'pin_memory': True} \
        if args.cuda else {}

    data_kwargs = {
        'transform': input_transform,
        'base_size': args.base_size,
        'crop_size': args.crop_size,
        'root': args.data_root
    }

    testset = get_dataset(args.dataset, split='val', mode='val', **data_kwargs)

    testloader = data.DataLoader(testset,
                                 batch_size=1,
                                 drop_last=False,
                                 shuffle=False,
                                 **kwargs)

    avg = [.485, .456, .406]
    std = [.229, .224, .225]
    # visualize

    cnt = 0
    for i, (image, dst) in enumerate(tqdm(testloader)):
        if cnt == 100:
            break
        prob = np.random.rand(1)[0]
        if prob > 0.2:
            continue

        image = image.cuda()
        output = model.evaluate(image)
        cnt += 1
        print("{}/100".format(cnt))
示例#11
0
        self.dice_scores = {'train':[], 'val':[]}
        self.iou_scores = {'train':[], 'val':[]}
        self.best_loss = float("inf")

        # # data transforms
        # input_transform = transform.Compose([
        #     transform.ToTensor(),
        #     transform.Normalize([.485, .456, .406], [.229, .224, .225])])

        # dataset
        data_kwargs = {'transform': get_training_augmentation(), 'base_size': args.base_size,
                       'crop_size': args.crop_size}
<<<<<<< HEAD
        trainset = get_dataset(args.dataset,
                               root=DATA_DIR,
                               source=['train.csv'],
                               split=['train_#2fold_11940.npy'],
                               mode='train',
                               **data_kwargs)
        testset = get_dataset(args.dataset,
                              root=DATA_DIR,
                              source=['train.csv'],
                              split=['valid_#2fold_628.npy'],
                              mode ='val',
                              **data_kwargs)
=======
        trainset = get_dataset(args.dataset, split=args.train_split, mode='train', **data_kwargs)
        testset = get_dataset(args.dataset, split='val', mode ='val', **data_kwargs)
>>>>>>> upstream/master
        # dataloader
        # kwargs = {'num_workers': args.workers, 'pin_memory': True} \
        #     if args.cuda else {}